├── .DS_Store
├── .gitignore
├── Course_5_emotion_vgg_finetune
    ├── FVGG_Emo.py
    ├── test_fvgg_emo.txt
    ├── train_fvgg_emo.txt
    └── training_instruction.txt
├── README.md
├── WORKSPACE
├── course_10_dqn.md
├── course_1_tf_basic_operation.py
├── course_1_tf_lr.ipynb
├── course_1_tf_lr.py
├── course_2_tf_nn.ipynb
├── course_2_tf_nn.py
├── course_3_tf_mnist_cnn.ipynb
├── course_3_tf_mnist_cnn.py
├── course_6_obj_detection.txt
├── course_7_lstm_learn_shakespeare.ipynb
├── course_7_seq2seq.py
├── course_7_shakespeare_gen.py
├── course_8_image2txt
    ├── BUILD
    ├── configuration.py
    ├── data
    │   ├── build_mscoco_data.py
    │   └── download_and_preprocess_mscoco.sh
    ├── evaluate.py
    ├── inference_utils
    │   ├── BUILD
    │   ├── caption_generator.py
    │   ├── caption_generator_test.py
    │   ├── inference_wrapper_base.py
    │   └── vocabulary.py
    ├── inference_wrapper.py
    ├── ops
    │   ├── BUILD
    │   ├── image_embedding.py
    │   ├── image_embedding_test.py
    │   ├── image_processing.py
    │   └── inputs.py
    ├── readme.md
    ├── run_inference.py
    ├── show_and_tell_model.py
    ├── show_and_tell_model_test.py
    └── train.py
├── course_9_pix2pix_file.md
├── course_example_vgg
    ├── .gitignore
    ├── course_4_vgg16_test.py
    ├── couse_4_vgg16_test.ipynb
    ├── imagenet1000_clsid_to_human.py
    ├── imagenet_class_list.txt
    ├── np_plot.py
    ├── np_plot.pyc
    ├── test_data
    │   ├── dog.png
    │   ├── puzzle.jpeg
    │   └── tiger.jpeg
    ├── utils.py
    ├── utils.pyc
    ├── vgg-model-download-link
    ├── vgg-model-download-link.txt
    ├── vgg16.py
    └── vgg16_test.py
├── example_autoencoder_recon.ipynb
├── g3doc
    ├── COCO_val2014_000000224477.jpg
    ├── example_captions.jpg
    └── show_and_tell_architecture.png
├── index.html
├── libs
    ├── __init__.py
    ├── activations.py
    ├── batch_norm.py
    ├── connections.py
    ├── dataset_utils.py
    ├── datasets.py
    └── utils.py
├── mnist
    ├── mnist.pkl.gz
    ├── t10k-images-idx3-ubyte.gz
    ├── t10k-labels-idx1-ubyte.gz
    ├── train-images-idx3-ubyte.gz
    └── train-labels-idx1-ubyte.gz
└── tf_1_try.ipynb


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wiibrew/DeepLearningCourseCodes/6b20c12415893f270b30c3cba640732c090b49ba/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /bazel-bin
2 | /bazel-ci_build-cache
3 | /bazel-genfiles
4 | /bazel-out
5 | /bazel-im2txt
6 | /bazel-testlogs
7 | /bazel-tf
8 | 


--------------------------------------------------------------------------------
/Course_5_emotion_vgg_finetune/FVGG_Emo.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Retraining (Finetuning) Example with vgg.tflearn. Using weights from VGG model to retrain
 3 | network for a new task (your own dataset).All weights are restored except
 4 | last layer (softmax) that will be retrained to match the new task (finetuning).
 5 | 
 6 | DATASET for this experiment can be obtianed at: https://pan.baidu.com/s/1kUEnhP1
 7 | 
 8 | edited by wei li for vgg finetuning
 9 | 
10 | '''
11 | import tflearn
12 | from tflearn.data_preprocessing import ImagePreprocessing
13 | import os
14 | 
15 | 
16 | def vgg16(input, num_class):
17 | 
18 |     #in the model, we added trainable=False to make sure the parameter are not updated during training
19 |     x = tflearn.conv_2d(input, 64, 3, activation='relu', scope='conv1_1',trainable=False)
20 |     x = tflearn.conv_2d(x, 64, 3, activation='relu', scope='conv1_2',trainable=False)
21 |     x = tflearn.max_pool_2d(x, 2, strides=2, name='maxpool1')
22 | 
23 |     x = tflearn.conv_2d(x, 128, 3, activation='relu', scope='conv2_1',trainable=False)
24 |     x = tflearn.conv_2d(x, 128, 3, activation='relu', scope='conv2_2',trainable=False)
25 |     x = tflearn.max_pool_2d(x, 2, strides=2, name='maxpool2')
26 | 
27 |     x = tflearn.conv_2d(x, 256, 3, activation='relu', scope='conv3_1',trainable=False)
28 |     x = tflearn.conv_2d(x, 256, 3, activation='relu', scope='conv3_2',trainable=False)
29 |     x = tflearn.conv_2d(x, 256, 3, activation='relu', scope='conv3_3',trainable=False)
30 |     x = tflearn.max_pool_2d(x, 2, strides=2, name='maxpool3')
31 | 
32 |     x = tflearn.conv_2d(x, 512, 3, activation='relu', scope='conv4_1',trainable=False)
33 |     x = tflearn.conv_2d(x, 512, 3, activation='relu', scope='conv4_2',trainable=False)
34 |     x = tflearn.conv_2d(x, 512, 3, activation='relu', scope='conv4_3',trainable=False)
35 |     x = tflearn.max_pool_2d(x, 2, strides=2, name='maxpool4')
36 | 
37 |     x = tflearn.conv_2d(x, 512, 3, activation='relu', scope='conv5_1')
38 |     x = tflearn.conv_2d(x, 512, 3, activation='relu', scope='conv5_2')
39 |     x = tflearn.conv_2d(x, 512, 3, activation='relu', scope='conv5_3')
40 |     x = tflearn.max_pool_2d(x, 2, strides=2, name='maxpool5')
41 | 
42 |     x = tflearn.fully_connected(x, 4096, activation='relu', scope='fc6')
43 |     x = tflearn.dropout(x, 0.5, name='dropout1')
44 |     #we changed the structure here to let the fc only have 2048, less parameter, enough for our task
45 |     x = tflearn.fully_connected(x, 2048, activation='relu', scope='fc7',restore=False)
46 |     x = tflearn.dropout(x, 0.5, name='dropout2')
47 | 
48 |     x = tflearn.fully_connected(x, num_class, activation='softmax', scope='fc8',
49 |                                 restore=False)
50 | 
51 |     return x
52 | 
53 | 
54 | # data_dir = "webemo_tr/"
55 | model_path = "."
56 | # the file gen by generated by gen_files_list.py
57 | files_list = "./train_fvgg_emo.txt"
58 | 
59 | from tflearn.data_utils import image_preloader
60 | 
61 | X, Y = image_preloader(files_list, image_shape=(224, 224), mode='file',
62 |                        categorical_labels=True, normalize=False,
63 |                        files_extension=['.jpg', '.png'], filter_channel=True)
64 | # or use the mode 'floder'
65 | # X, Y = image_preloader(data_dir, image_shape=(224, 224), mode='folder',
66 | #                        categorical_labels=True, normalize=True,
67 | #                        files_extension=['.jpg', '.png'], filter_channel=True)
68 | # print X.shape
69 | num_classes = 7 # num of your dataset
70 | 
71 | # VGG preprocessing
72 | img_prep = ImagePreprocessing()
73 | img_prep.add_featurewise_zero_center(mean=[123.68, 116.779, 103.939],
74 |                                      per_channel=True)
75 | # VGG Network
76 | x = tflearn.input_data(shape=[None, 224, 224, 3], name='input',
77 |                        data_preprocessing=img_prep)
78 | softmax = vgg16(x, num_classes)
79 | regression = tflearn.regression(softmax, optimizer='adam',
80 |                                 loss='categorical_crossentropy',
81 |                                 learning_rate=0.0001, restore=False)
82 | 
83 | model = tflearn.DNN(regression, checkpoint_path='vgg-finetuning',
84 |                     max_checkpoints=3, tensorboard_verbose=2,
85 |                     tensorboard_dir="./logs")
86 | 
87 | model_file = os.path.join(model_path, "vgg16.tflearn")
88 | model.load(model_file, weights_only=True)
89 | 
90 | # Start finetuning
91 | model.fit(X, Y, n_epoch=20, validation_set=0.1, shuffle=True,
92 |           show_metric=True, batch_size=64, snapshot_epoch=False,
93 |           snapshot_step=200, run_id='vgg-finetuning')
94 | 
95 | model.save('ChinaHadoop_vgg_finetune_emo.tfmodel')
96 | ##let's just test if the model can predict image right
97 | # model.predict(img_array) to see the final result
98 | 
99 | 


--------------------------------------------------------------------------------
/Course_5_emotion_vgg_finetune/training_instruction.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 0. install tflearn: pip install git+https://github.com/tflearn/tflearn.git
 3 | 1. download the training data and unzip at this folder, dataset from https://drive.google.com/open?id=0B3ANX1iL124qbmxOc2cyQzhvUFE
 4 | if you cannot access to google network, you can download the dataset from baidu cloud drive: https://pan.baidu.com/s/1kUEnhP1
 5 | 2. download the pretrained tflearn vgg model from https://www.dropbox.com/s/9li9mi4105jf45v/vgg16.tflearn?dl=0
 6 | 3. go through the code, make sure all the path are correct
 7 | 
 8 | run and wait...
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | This is the codes collection for online deep learning course at ChinaHadoop.cn. 
3 | 
4 | The codes are tested on ubuntu+cuda+cuDNN environment, if you have problems you can 
5 | 
6 | contact the lecture Wei Li via weibo or course QQ.
7 | 


--------------------------------------------------------------------------------
/WORKSPACE:
--------------------------------------------------------------------------------
1 | workspace(name = "im2txt")
2 | 


--------------------------------------------------------------------------------
/course_10_dqn.md:
--------------------------------------------------------------------------------
1 | https://github.com/wiibrew/dqn
2 | 


--------------------------------------------------------------------------------
/course_1_tf_basic_operation.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | basic tf operation examples, 
 3 | 1. write a tf function use tf.xxxx
 4 | 2. feed data to tf.placeholder and set data to tf.Variable
 5 | 3.run...
 6 | '''
 7 | 
 8 | #
 9 | import tensorflow as tf
10 | 
11 | # direct sum with constand value
12 | a = tf.constant(2)
13 | b = tf.constant(3)
14 | c=a+b
15 | d=a*b
16 | 
17 | sess=tf.Session()
18 | print sess.run(c)
19 | print sess.run(d)
20 | 
21 | # 
22 | a = tf.placeholder(tf.int16)
23 | b = tf.placeholder(tf.int16)
24 | 
25 | # 
26 | add = tf.add(a, b)
27 | mul = tf.multiply(a, b)
28 | print sess.run(add, feed_dict={a: 2, b: 3})
29 | print sess.run(mul, feed_dict={a: 2, b: 3})
30 | 
31 | 
32 | 
33 | #
34 | matrix1 = tf.constant([[3., 3.]])
35 | matrix2 = tf.constant([[2.],[2.]])
36 | product = tf.matmul(matrix2, matrix1)
37 | print sess.run(product)
38 | 
39 | #here you should also be able to use tf.placeholder
40 | mat1=tf.Variable(tf.random_normal([3,2]))
41 | mat2=tf.Variable(tf.random_normal([2,3]))
42 | product=tf.matmul(mat1,mat2)
43 | 
44 | m1=[[1,3],[2,1],[0,5]]
45 | m2=[[3,2,1],[1,2,3]]
46 | 
47 | print sess.run(product,feed_dict={mat1:m1,mat2:m2})


--------------------------------------------------------------------------------
/course_1_tf_lr.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 8,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "'''\n",
 12 |     "linear regression experiment, hope you can know:\n",
 13 |     "1. how to design the learning model\n",
 14 |     "2. optimize the model\n",
 15 |     "3. dealing with the dataset\n",
 16 |     "\n",
 17 |     "Original Author: Aymeric Damien\n",
 18 |     "Edited by Wei Li for ChinaHadoop Deep learning course\n",
 19 |     "Project: https://github.com/aymericdamien/TensorFlow-Examples/\n",
 20 |     "'''\n",
 21 |     "\n",
 22 |     "\n",
 23 |     "import tensorflow as tf\n",
 24 |     "import numpy\n",
 25 |     "rng = numpy.random\n",
 26 |     "\n",
 27 |     "# model params\n",
 28 |     "learning_rate = 0.02\n",
 29 |     "training_epochs = 3000\n",
 30 |     "display_step=50\n",
 31 |     "# \n",
 32 |     "train_X = numpy.asarray([3.3,4.4,5.5,6.71,6.93,4.168,9.779,6.182,7.59,2.167,\n",
 33 |     "                         7.042,10.791,5.313,7.997,5.654,9.27,3.1])\n",
 34 |     "train_Y = numpy.asarray([1.7,2.76,2.09,3.19,1.694,1.573,3.366,2.596,2.53,1.221,\n",
 35 |     "                         2.827,3.465,1.65,2.904,2.42,2.94,1.3])\n",
 36 |     "n_samples = train_X.shape[0]\n",
 37 |     "\n",
 38 |     "# tf Graph Input\n",
 39 |     "X = tf.placeholder(\"float\")\n",
 40 |     "Y = tf.placeholder(\"float\")\n",
 41 |     "\n",
 42 |     "# Set model weights\n",
 43 |     "W = tf.Variable(rng.randn(), name=\"weight\")\n",
 44 |     "b = tf.Variable(rng.randn(), name=\"bias\")\n",
 45 |     "\n",
 46 |     "# Construct a linear model\n",
 47 |     "pred = tf.add(tf.multiply(X, W), b)\n",
 48 |     "\n",
 49 |     "# Mean squared error\n",
 50 |     "cost = tf.reduce_sum(tf.pow(pred-Y, 2))/(2*n_samples)\n",
 51 |     "# Gradient descent\n",
 52 |     "optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)\n",
 53 |     "\n",
 54 |     "# Initializing the variables\n",
 55 |     "init = tf.global_variables_initializer()"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 9,
 61 |    "metadata": {
 62 |     "collapsed": false
 63 |    },
 64 |    "outputs": [
 65 |     {
 66 |      "name": "stdout",
 67 |      "output_type": "stream",
 68 |      "text": [
 69 |       "('Epoch:', '0050', 'cost=', '0.178423569', 'W=', 0.42291793, 'b=', -0.4734658)\n",
 70 |       "('Epoch:', '0100', 'cost=', '0.156202286', 'W=', 0.40251526, 'b=', -0.32475927)\n",
 71 |       "('Epoch:', '0150', 'cost=', '0.138855815', 'W=', 0.38448787, 'b=', -0.19336548)\n",
 72 |       "('Epoch:', '0200', 'cost=', '0.125314981', 'W=', 0.36855927, 'b=', -0.077268951)\n",
 73 |       "('Epoch:', '0250', 'cost=', '0.114744954', 'W=', 0.35448512, 'b=', 0.025311502)\n",
 74 |       "('Epoch:', '0300', 'cost=', '0.106494129', 'W=', 0.34204948, 'b=', 0.11594942)\n",
 75 |       "('Epoch:', '0350', 'cost=', '0.100053802', 'W=', 0.3310616, 'b=', 0.19603507)\n",
 76 |       "('Epoch:', '0400', 'cost=', '0.095026731', 'W=', 0.32135299, 'b=', 0.26679745)\n",
 77 |       "('Epoch:', '0450', 'cost=', '0.091103002', 'W=', 0.31277463, 'b=', 0.3293213)\n",
 78 |       "('Epoch:', '0500', 'cost=', '0.088040523', 'W=', 0.30519509, 'b=', 0.38456526)\n",
 79 |       "('Epoch:', '0550', 'cost=', '0.085650302', 'W=', 0.29849792, 'b=', 0.43337804)\n",
 80 |       "('Epoch:', '0600', 'cost=', '0.083784848', 'W=', 0.29258049, 'b=', 0.47650799)\n",
 81 |       "('Epoch:', '0650', 'cost=', '0.082329050', 'W=', 0.28735185, 'b=', 0.51461679)\n",
 82 |       "('Epoch:', '0700', 'cost=', '0.081192940', 'W=', 0.28273201, 'b=', 0.54828918)\n",
 83 |       "('Epoch:', '0750', 'cost=', '0.080306433', 'W=', 0.27865005, 'b=', 0.57804072)\n",
 84 |       "('Epoch:', '0800', 'cost=', '0.079614699', 'W=', 0.27504328, 'b=', 0.60432887)\n",
 85 |       "('Epoch:', '0850', 'cost=', '0.079074971', 'W=', 0.27185646, 'b=', 0.6275565)\n",
 86 |       "('Epoch:', '0900', 'cost=', '0.078653932', 'W=', 0.26904064, 'b=', 0.64807951)\n",
 87 |       "('Epoch:', '0950', 'cost=', '0.078325450', 'W=', 0.26655263, 'b=', 0.66621393)\n",
 88 |       "('Epoch:', '1000', 'cost=', '0.078069247', 'W=', 0.26435426, 'b=', 0.68223649)\n",
 89 |       "('Epoch:', '1050', 'cost=', '0.077869445', 'W=', 0.26241186, 'b=', 0.69639373)\n",
 90 |       "('Epoch:', '1100', 'cost=', '0.077713616', 'W=', 0.26069549, 'b=', 0.70890343)\n",
 91 |       "('Epoch:', '1150', 'cost=', '0.077592134', 'W=', 0.25917912, 'b=', 0.71995574)\n",
 92 |       "('Epoch:', '1200', 'cost=', '0.077497423', 'W=', 0.2578392, 'b=', 0.72972184)\n",
 93 |       "('Epoch:', '1250', 'cost=', '0.077423617', 'W=', 0.25665545, 'b=', 0.73834991)\n",
 94 |       "('Epoch:', '1300', 'cost=', '0.077366099', 'W=', 0.2556093, 'b=', 0.74597466)\n",
 95 |       "('Epoch:', '1350', 'cost=', '0.077321291', 'W=', 0.25468507, 'b=', 0.75271124)\n",
 96 |       "('Epoch:', '1400', 'cost=', '0.077286400', 'W=', 0.25386831, 'b=', 0.75866407)\n",
 97 |       "('Epoch:', '1450', 'cost=', '0.077259235', 'W=', 0.25314665, 'b=', 0.76392406)\n",
 98 |       "('Epoch:', '1500', 'cost=', '0.077238098', 'W=', 0.252509, 'b=', 0.76857102)\n",
 99 |       "('Epoch:', '1550', 'cost=', '0.077221632', 'W=', 0.25194564, 'b=', 0.77267736)\n",
100 |       "('Epoch:', '1600', 'cost=', '0.077208854', 'W=', 0.25144795, 'b=', 0.77630514)\n",
101 |       "('Epoch:', '1650', 'cost=', '0.077198923', 'W=', 0.25100803, 'b=', 0.77951139)\n",
102 |       "('Epoch:', '1700', 'cost=', '0.077191189', 'W=', 0.25061971, 'b=', 0.78234196)\n",
103 |       "('Epoch:', '1750', 'cost=', '0.077185199', 'W=', 0.25027612, 'b=', 0.78484607)\n",
104 |       "('Epoch:', '1800', 'cost=', '0.077180564', 'W=', 0.24997255, 'b=', 0.78705853)\n",
105 |       "('Epoch:', '1850', 'cost=', '0.077176966', 'W=', 0.2497045, 'b=', 0.78901207)\n",
106 |       "('Epoch:', '1900', 'cost=', '0.077174187', 'W=', 0.24946776, 'b=', 0.79073763)\n",
107 |       "('Epoch:', '1950', 'cost=', '0.077172041', 'W=', 0.24925858, 'b=', 0.79226238)\n",
108 |       "('Epoch:', '2000', 'cost=', '0.077170387', 'W=', 0.24907368, 'b=', 0.7936098)\n",
109 |       "('Epoch:', '2050', 'cost=', '0.077169113', 'W=', 0.24891038, 'b=', 0.79480028)\n",
110 |       "('Epoch:', '2100', 'cost=', '0.077168114', 'W=', 0.24876596, 'b=', 0.79585338)\n",
111 |       "('Epoch:', '2150', 'cost=', '0.077167362', 'W=', 0.24863829, 'b=', 0.79678357)\n",
112 |       "('Epoch:', '2200', 'cost=', '0.077166796', 'W=', 0.24852541, 'b=', 0.79760629)\n",
113 |       "('Epoch:', '2250', 'cost=', '0.077166334', 'W=', 0.24842578, 'b=', 0.79833227)\n",
114 |       "('Epoch:', '2300', 'cost=', '0.077165999', 'W=', 0.2483376, 'b=', 0.79897529)\n",
115 |       "('Epoch:', '2350', 'cost=', '0.077165760', 'W=', 0.24825987, 'b=', 0.79954147)\n",
116 |       "('Epoch:', '2400', 'cost=', '0.077165581', 'W=', 0.24819092, 'b=', 0.80004394)\n",
117 |       "('Epoch:', '2450', 'cost=', '0.077165432', 'W=', 0.24813022, 'b=', 0.80048668)\n",
118 |       "('Epoch:', '2500', 'cost=', '0.077165321', 'W=', 0.24807698, 'b=', 0.80087441)\n",
119 |       "('Epoch:', '2550', 'cost=', '0.077165253', 'W=', 0.24802969, 'b=', 0.80121905)\n",
120 |       "('Epoch:', '2600', 'cost=', '0.077165186', 'W=', 0.24798796, 'b=', 0.80152339)\n",
121 |       "('Epoch:', '2650', 'cost=', '0.077165157', 'W=', 0.2479513, 'b=', 0.8017906)\n",
122 |       "('Epoch:', '2700', 'cost=', '0.077165119', 'W=', 0.24791868, 'b=', 0.80202842)\n",
123 |       "('Epoch:', '2750', 'cost=', '0.077165097', 'W=', 0.24789007, 'b=', 0.80223686)\n",
124 |       "('Epoch:', '2800', 'cost=', '0.077165097', 'W=', 0.24786451, 'b=', 0.80242288)\n",
125 |       "('Epoch:', '2850', 'cost=', '0.077165082', 'W=', 0.24784194, 'b=', 0.80258781)\n",
126 |       "('Epoch:', '2900', 'cost=', '0.077165082', 'W=', 0.24782193, 'b=', 0.80273348)\n",
127 |       "('Epoch:', '2950', 'cost=', '0.077165082', 'W=', 0.24780463, 'b=', 0.80285954)\n",
128 |       "('Epoch:', '3000', 'cost=', '0.077165082', 'W=', 0.24778947, 'b=', 0.80296975)\n",
129 |       "('Training cost=', 0.077165082, 'W=', 0.24778947, 'b=', 0.80296975, '\\n')\n",
130 |       "Tssting...\n",
131 |       "('Test LOSS=', 0.079976395)\n",
132 |       "('Final Loss:', 0.0028113127)\n"
133 |      ]
134 |     }
135 |    ],
136 |    "source": [
137 |     "\n",
138 |     "# Launch the graph\n",
139 |     "with tf.Session() as sess:\n",
140 |     "    sess.run(init)\n",
141 |     "\n",
142 |     "    # Fit all training data\n",
143 |     "    for epoch in range(training_epochs):\n",
144 |     "        for (x, y) in zip(train_X, train_Y):\n",
145 |     "            sess.run(optimizer, feed_dict={X: x, Y: y})\n",
146 |     "\n",
147 |     "        # Display logs per epoch step\n",
148 |     "        if (epoch+1) % display_step == 0:\n",
149 |     "            c = sess.run(cost, feed_dict={X: train_X, Y:train_Y})\n",
150 |     "            print(\"Epoch:\", '%04d' % (epoch+1), \"cost=\", \"{:.9f}\".format(c), \\\n",
151 |     "                \"W=\", sess.run(W), \"b=\", sess.run(b))\n",
152 |     "\n",
153 |     "\n",
154 |     "    training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y})\n",
155 |     "    print(\"Training cost=\", training_cost, \"W=\", sess.run(W), \"b=\", sess.run(b), '\\n')\n",
156 |     "\n",
157 |     " \n",
158 |     "\n",
159 |     "    # the testing data\n",
160 |     "    test_X = numpy.asarray([6.83, 4.668, 8.9, 7.91, 5.7, 8.7, 3.1, 2.1])\n",
161 |     "    test_Y = numpy.asarray([1.84, 2.273, 3.2, 2.831, 2.92, 3.24, 1.35, 1.03])\n",
162 |     "\n",
163 |     "    print(\"Tssting...\")\n",
164 |     "    testing_cost = sess.run(\n",
165 |     "        tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * test_X.shape[0]),\n",
166 |     "        feed_dict={X: test_X, Y: test_Y})  # same function as cost above\n",
167 |     "    print(\"Test LOSS=\", testing_cost)\n",
168 |     "    print(\"Final Loss:\", abs(\n",
169 |     "        training_cost - testing_cost))"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": null,
175 |    "metadata": {
176 |     "collapsed": true
177 |    },
178 |    "outputs": [],
179 |    "source": []
180 |   }
181 |  ],
182 |  "metadata": {
183 |   "kernelspec": {
184 |    "display_name": "Python 2",
185 |    "language": "python",
186 |    "name": "python2"
187 |   },
188 |   "language_info": {
189 |    "codemirror_mode": {
190 |     "name": "ipython",
191 |     "version": 2
192 |    },
193 |    "file_extension": ".py",
194 |    "mimetype": "text/x-python",
195 |    "name": "python",
196 |    "nbconvert_exporter": "python",
197 |    "pygments_lexer": "ipython2",
198 |    "version": "2.7.12"
199 |   }
200 |  },
201 |  "nbformat": 4,
202 |  "nbformat_minor": 2
203 | }
204 | 


--------------------------------------------------------------------------------
/course_1_tf_lr.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | linear regression experiment, hope you can know:
 3 | 1. how to design the learning model
 4 | 2. optimize the model
 5 | 3. dealing with the dataset
 6 | 
 7 | Original Author: Aymeric Damien
 8 | Edited by Wei Li for ChinaHadoop Deep learning course
 9 | Project: https://github.com/aymericdamien/TensorFlow-Examples/
10 | '''
11 | 
12 | 
13 | import tensorflow as tf
14 | import numpy
15 | rng = numpy.random
16 | 
17 | # model params
18 | learning_rate = 0.02
19 | training_epochs = 3000
20 | display_step=50
21 | # 
22 | train_X = numpy.asarray([3.3,4.4,5.5,6.71,6.93,4.168,9.779,6.182,7.59,2.167,
23 |                          7.042,10.791,5.313,7.997,5.654,9.27,3.1])
24 | train_Y = numpy.asarray([1.7,2.76,2.09,3.19,1.694,1.573,3.366,2.596,2.53,1.221,
25 |                          2.827,3.465,1.65,2.904,2.42,2.94,1.3])
26 | n_samples = train_X.shape[0]
27 | 
28 | # tf Graph Input
29 | X = tf.placeholder("float")
30 | Y = tf.placeholder("float")
31 | 
32 | # Set model weights
33 | W = tf.Variable(rng.randn(), name="weight")
34 | b = tf.Variable(rng.randn(), name="bias")
35 | 
36 | # Construct a linear model
37 | pred = tf.add(tf.multiply(X, W), b)
38 | 
39 | # Mean squared error
40 | cost = tf.reduce_sum(tf.pow(pred-Y, 2))/(2*n_samples)
41 | # Gradient descent
42 | optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
43 | 
44 | # Initializing the variables
45 | init = tf.global_variables_initializer()
46 | 
47 | # Launch the graph
48 | with tf.Session() as sess:
49 |     sess.run(init)
50 | 
51 |     # Fit all training data
52 |     for epoch in range(training_epochs):
53 |         for (x, y) in zip(train_X, train_Y):
54 |             sess.run(optimizer, feed_dict={X: x, Y: y})
55 | 
56 |         # Display logs per epoch step
57 |         if (epoch+1) % display_step == 0:
58 |             c = sess.run(cost, feed_dict={X: train_X, Y:train_Y})
59 |             print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), \
60 |                 "W=", sess.run(W), "b=", sess.run(b))
61 | 
62 | 
63 |     training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
64 |     print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')
65 | 
66 |  
67 | 
68 |     # the testing data
69 |     test_X = numpy.asarray([6.83, 4.668, 8.9, 7.91, 5.7, 8.7, 3.1, 2.1])
70 |     test_Y = numpy.asarray([1.84, 2.273, 3.2, 2.831, 2.92, 3.24, 1.35, 1.03])
71 | 
72 |     print("Tssting...")
73 |     testing_cost = sess.run(
74 |         tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * test_X.shape[0]),
75 |         feed_dict={X: test_X, Y: test_Y})  # same function as cost above
76 |     print("Test LOSS=", testing_cost)
77 |     print("Final Loss:", abs(
78 |         training_cost - testing_cost))
79 | 
80 |     


--------------------------------------------------------------------------------
/course_2_tf_nn.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {
  7 |     "collapsed": false,
  8 |     "deletable": true,
  9 |     "editable": true
 10 |    },
 11 |    "outputs": [
 12 |     {
 13 |      "name": "stdout",
 14 |      "output_type": "stream",
 15 |      "text": [
 16 |       "Extracting ./mnist/train-images-idx3-ubyte.gz\n",
 17 |       "Extracting ./mnist/train-labels-idx1-ubyte.gz\n",
 18 |       "Extracting ./mnist/t10k-images-idx3-ubyte.gz\n",
 19 |       "Extracting ./mnist/t10k-labels-idx1-ubyte.gz\n",
 20 |       "('Epoch:', '0001', 'cost=', '215.548141965')\n",
 21 |       "('Epoch:', '0002', 'cost=', '54.977557694')\n",
 22 |       "('Epoch:', '0003', 'cost=', '33.899888993')\n",
 23 |       "('Epoch:', '0004', 'cost=', '23.234023376')\n",
 24 |       "('Epoch:', '0005', 'cost=', '16.552313167')\n",
 25 |       "('Epoch:', '0006', 'cost=', '12.184614655')\n",
 26 |       "('Epoch:', '0007', 'cost=', '8.918999288')\n",
 27 |       "('Epoch:', '0008', 'cost=', '6.555203167')\n",
 28 |       "('Epoch:', '0009', 'cost=', '4.864825427')\n",
 29 |       "('Epoch:', '0010', 'cost=', '3.541727996')\n",
 30 |       "('Epoch:', '0011', 'cost=', '2.601980731')\n",
 31 |       "('Epoch:', '0012', 'cost=', '2.013708151')\n",
 32 |       "('Epoch:', '0013', 'cost=', '1.447752024')\n",
 33 |       "('Epoch:', '0014', 'cost=', '1.284220558')\n",
 34 |       "('Epoch:', '0015', 'cost=', '1.063494972')\n",
 35 |       "('Epoch:', '0016', 'cost=', '1.089214503')\n",
 36 |       "('Epoch:', '0017', 'cost=', '0.819465103')\n",
 37 |       "('Epoch:', '0018', 'cost=', '0.826465986')\n",
 38 |       "('Epoch:', '0019', 'cost=', '0.756363073')\n",
 39 |       "('Epoch:', '0020', 'cost=', '0.756904836')\n",
 40 |       "('Epoch:', '0021', 'cost=', '0.772401051')\n",
 41 |       "('Epoch:', '0022', 'cost=', '0.591537078')\n",
 42 |       "('Epoch:', '0023', 'cost=', '0.518754110')\n",
 43 |       "('Epoch:', '0024', 'cost=', '0.653424654')\n",
 44 |       "('Epoch:', '0025', 'cost=', '0.639180361')\n",
 45 |       "('Epoch:', '0026', 'cost=', '0.418257485')\n",
 46 |       "('Epoch:', '0027', 'cost=', '0.434976982')\n",
 47 |       "('Epoch:', '0028', 'cost=', '0.606400410')\n",
 48 |       "('Epoch:', '0029', 'cost=', '0.475488307')\n",
 49 |       "('Epoch:', '0030', 'cost=', '0.458589170')\n",
 50 |       "Optimization Finished!\n",
 51 |       "('Accuracy:', 0.96039999)\n"
 52 |      ]
 53 |     }
 54 |    ],
 55 |    "source": [
 56 |     "#get the mnist data \n",
 57 |     "# wget http://deeplearning.net/data/mnist/mnist.pkl.gz\n",
 58 |     "\n",
 59 |     "\n",
 60 |     "\n",
 61 |     "\n",
 62 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
 63 |     "mnist = input_data.read_data_sets(\"./mnist/\", one_hot=True)\n",
 64 |     "\n",
 65 |     "import tensorflow as tf\n",
 66 |     "\n",
 67 |     "# Parameters\n",
 68 |     "learning_rate = 0.001\n",
 69 |     "training_epochs = 30\n",
 70 |     "batch_size = 100\n",
 71 |     "display_step = 1\n",
 72 |     "\n",
 73 |     "# Network Parameters\n",
 74 |     "n_hidden_1 = 256 # 1st layer number of features\n",
 75 |     "n_hidden_2 = 512 # 2nd layer number of features\n",
 76 |     "n_input = 784 # MNIST data input (img shape: 28*28)\n",
 77 |     "n_classes = 10 # MNIST total classes (0-9 digits)\n",
 78 |     "\n",
 79 |     "# tf Graph input\n",
 80 |     "x = tf.placeholder(\"float\", [None, n_input])\n",
 81 |     "y = tf.placeholder(\"float\", [None, n_classes])\n",
 82 |     "\n",
 83 |     "\n",
 84 |     "# Create model\n",
 85 |     "def multilayer_perceptron(x, weights, biases):\n",
 86 |     "    # Hidden layer with RELU activation\n",
 87 |     "    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])\n",
 88 |     "    layer_1 = tf.nn.relu(layer_1)\n",
 89 |     "    # Hidden layer with RELU activation\n",
 90 |     "    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])\n",
 91 |     "    layer_2 = tf.nn.relu(layer_2)\n",
 92 |     "\n",
 93 |     "    # layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])\n",
 94 |     "    # layer_3 = tf.nn.relu(layer_3)\n",
 95 |     "\n",
 96 |     "\n",
 97 |     "\n",
 98 |     "    #we can add dropout layer\n",
 99 |     "    # drop_out = tf.nn.dropout(layer_2, 0.75)\n",
100 |     "\n",
101 |     "\n",
102 |     "\n",
103 |     "    # Output layer with linear activation\n",
104 |     "    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']\n",
105 |     "    return out_layer\n",
106 |     "\n",
107 |     "# Store layers weight & biases\n",
108 |     "weights = {\n",
109 |     "    #you can change \n",
110 |     "    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),\n",
111 |     "    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),\n",
112 |     "    #'h3': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),\n",
113 |     "    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))\n",
114 |     "}\n",
115 |     "biases = {\n",
116 |     "    'b1': tf.Variable(tf.random_normal([n_hidden_1])),\n",
117 |     "    'b2': tf.Variable(tf.random_normal([n_hidden_2])),\n",
118 |     "    #'b3': tf.Variable(tf.random_normal([n_hidden_2])),\n",
119 |     "    'out': tf.Variable(tf.random_normal([n_classes]))\n",
120 |     "}\n",
121 |     "\n",
122 |     "# Construct model\n",
123 |     "pred = multilayer_perceptron(x, weights, biases)\n",
124 |     "\n",
125 |     "# Define loss and optimizer\n",
126 |     "cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))\n",
127 |     "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)\n",
128 |     "\n",
129 |     "# Initializing the variables\n",
130 |     "init = tf.global_variables_initializer()\n",
131 |     "\n",
132 |     "# Launch the graph\n",
133 |     "with tf.Session() as sess:\n",
134 |     "    sess.run(init)\n",
135 |     "\n",
136 |     "    # Training cycle\n",
137 |     "    for epoch in range(training_epochs):\n",
138 |     "        avg_cost = 0.\n",
139 |     "        total_batch = int(mnist.train.num_examples/batch_size)\n",
140 |     "        # Loop over all batches\n",
141 |     "        for i in range(total_batch):\n",
142 |     "            batch_x, batch_y = mnist.train.next_batch(batch_size)\n",
143 |     "            # Run optimization op (backprop) and cost op (to get loss value)\n",
144 |     "            _, c = sess.run([optimizer, cost], feed_dict={x: batch_x,\n",
145 |     "                                                          y: batch_y})\n",
146 |     "            # Compute average loss\n",
147 |     "            avg_cost += c / total_batch\n",
148 |     "        # Display logs per epoch step\n",
149 |     "        if epoch % display_step == 0:\n",
150 |     "            print(\"Epoch:\", '%04d' % (epoch+1), \"cost=\", \\\n",
151 |     "                \"{:.9f}\".format(avg_cost))\n",
152 |     "    print(\"Optimization Finished!\")\n",
153 |     "\n",
154 |     "    # Test model\n",
155 |     "    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))\n",
156 |     "    # Calculate accuracy\n",
157 |     "    accuracy = tf.reduce_mean(tf.cast(correct_prediction, \"float\"))\n",
158 |     "    print(\"Accuracy:\", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))\n"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {
165 |     "collapsed": true,
166 |     "deletable": true,
167 |     "editable": true
168 |    },
169 |    "outputs": [],
170 |    "source": []
171 |   }
172 |  ],
173 |  "metadata": {
174 |   "kernelspec": {
175 |    "display_name": "Python 2",
176 |    "language": "python",
177 |    "name": "python2"
178 |   },
179 |   "language_info": {
180 |    "codemirror_mode": {
181 |     "name": "ipython",
182 |     "version": 2
183 |    },
184 |    "file_extension": ".py",
185 |    "mimetype": "text/x-python",
186 |    "name": "python",
187 |    "nbconvert_exporter": "python",
188 |    "pygments_lexer": "ipython2",
189 |    "version": "2.7.12"
190 |   }
191 |  },
192 |  "nbformat": 4,
193 |  "nbformat_minor": 2
194 | }
195 | 


--------------------------------------------------------------------------------
/course_2_tf_nn.py:
--------------------------------------------------------------------------------
  1 | #get the mnist data 
  2 | # wget http://deeplearning.net/data/mnist/mnist.pkl.gz
  3 | 
  4 | 
  5 | 
  6 | 
  7 | from tensorflow.examples.tutorials.mnist import input_data
  8 | mnist = input_data.read_data_sets("./mnist/", one_hot=True)
  9 | 
 10 | import tensorflow as tf
 11 | 
 12 | # Parameters
 13 | learning_rate = 0.001
 14 | training_epochs = 30
 15 | batch_size = 100
 16 | display_step = 1
 17 | 
 18 | # Network Parameters
 19 | n_hidden_1 = 256 # 1st layer number of features
 20 | n_hidden_2 = 512 # 2nd layer number of features
 21 | n_input = 784 # MNIST data input (img shape: 28*28)
 22 | n_classes = 10 # MNIST total classes (0-9 digits)
 23 | 
 24 | # tf Graph input
 25 | x = tf.placeholder("float", [None, n_input])
 26 | y = tf.placeholder("float", [None, n_classes])
 27 | 
 28 | 
 29 | # Create model
 30 | def multilayer_perceptron(x, weights, biases):
 31 |     # Hidden layer with RELU activation
 32 |     layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
 33 |     layer_1 = tf.nn.relu(layer_1)
 34 |     # Hidden layer with RELU activation
 35 |     layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
 36 |     layer_2 = tf.nn.relu(layer_2)
 37 | 
 38 |     # layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
 39 |     # layer_3 = tf.nn.relu(layer_3)
 40 | 
 41 | 
 42 | 
 43 |     #we can add dropout layer
 44 |     # drop_out = tf.nn.dropout(layer_2, 0.75)
 45 | 
 46 | 
 47 | 
 48 |     # Output layer with linear activation
 49 |     out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
 50 |     return out_layer
 51 | 
 52 | # Store layers weight & biases
 53 | weights = {
 54 |     #you can change 
 55 |     'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
 56 |     'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
 57 |     #'h3': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
 58 |     'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
 59 | }
 60 | biases = {
 61 |     'b1': tf.Variable(tf.random_normal([n_hidden_1])),
 62 |     'b2': tf.Variable(tf.random_normal([n_hidden_2])),
 63 |     #'b3': tf.Variable(tf.random_normal([n_hidden_2])),
 64 |     'out': tf.Variable(tf.random_normal([n_classes]))
 65 | }
 66 | 
 67 | # Construct model
 68 | pred = multilayer_perceptron(x, weights, biases)
 69 | 
 70 | # Define loss and optimizer
 71 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
 72 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
 73 | 
 74 | # Initializing the variables
 75 | init = tf.global_variables_initializer()
 76 | 
 77 | # Launch the graph
 78 | with tf.Session() as sess:
 79 |     sess.run(init)
 80 | 
 81 |     # Training cycle
 82 |     for epoch in range(training_epochs):
 83 |         avg_cost = 0.
 84 |         total_batch = int(mnist.train.num_examples/batch_size)
 85 |         # Loop over all batches
 86 |         for i in range(total_batch):
 87 |             batch_x, batch_y = mnist.train.next_batch(batch_size)
 88 |             # Run optimization op (backprop) and cost op (to get loss value)
 89 |             _, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
 90 |                                                           y: batch_y})
 91 |             # Compute average loss
 92 |             avg_cost += c / total_batch
 93 |         # Display logs per epoch step
 94 |         if epoch % display_step == 0:
 95 |             print("Epoch:", '%04d' % (epoch+1), "cost=", \
 96 |                 "{:.9f}".format(avg_cost))
 97 |     print("Optimization Finished!")
 98 | 
 99 |     # Test model
100 |     correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
101 |     # Calculate accuracy
102 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
103 |     print("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
104 | 


--------------------------------------------------------------------------------
/course_3_tf_mnist_cnn.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 11,
  6 |    "metadata": {
  7 |     "collapsed": false,
  8 |     "deletable": true,
  9 |     "editable": true
 10 |    },
 11 |    "outputs": [
 12 |     {
 13 |      "name": "stdout",
 14 |      "output_type": "stream",
 15 |      "text": [
 16 |       "Extracting ./train-images-idx3-ubyte.gz\n",
 17 |       "Extracting ./train-labels-idx1-ubyte.gz\n",
 18 |       "Extracting ./t10k-images-idx3-ubyte.gz\n",
 19 |       "Extracting ./t10k-labels-idx1-ubyte.gz\n"
 20 |      ]
 21 |     }
 22 |    ],
 23 |    "source": [
 24 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
 25 |     "mnist = input_data.read_data_sets(\".\", one_hot=True)\n",
 26 |     "\n",
 27 |     "import tensorflow as tf\n",
 28 |     "\n",
 29 |     "# Parameters\n",
 30 |     "learning_rate = 0.001\n",
 31 |     "training_epochs = 30\n",
 32 |     "batch_size = 100\n",
 33 |     "display_step = 1\n",
 34 |     "\n",
 35 |     "# Network Parameters\n",
 36 |     "n_input = 784 # MNIST data input (img shape: 28*28)\n",
 37 |     "n_classes = 10 # MNIST total classes (0-9 digits)\n",
 38 |     "\n",
 39 |     "# tf Graph input\n",
 40 |     "x = tf.placeholder(\"float\", [None, n_input])\n",
 41 |     "y = tf.placeholder(\"float\", [None, n_classes])"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 12,
 47 |    "metadata": {
 48 |     "collapsed": true,
 49 |     "deletable": true,
 50 |     "editable": true
 51 |    },
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "#pre-define the  \n",
 55 |     "def conv2d(x, W):\n",
 56 |     "  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')\n",
 57 |     "\n",
 58 |     "def max_pool_2x2(x):\n",
 59 |     "  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],\n",
 60 |     "                        strides=[1, 2, 2, 1], padding='SAME')"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 13,
 66 |    "metadata": {
 67 |     "collapsed": true,
 68 |     "deletable": true,
 69 |     "editable": true
 70 |    },
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "def multilayer_perceptron(x, weights, biases):\n",
 74 |     "    #now, we want to change this to a CNN network\n",
 75 |     "\n",
 76 |     "    #first reshape the data to 4-D\n",
 77 |     "\n",
 78 |     "    x_image = tf.reshape(x, [-1,28,28,1])\n",
 79 |     "\n",
 80 |     "    #then apply cnn layers\n",
 81 |     "\n",
 82 |     "    h_conv1 = tf.nn.relu(conv2d(x_image, weights['conv1']) + biases['conv_b1'])\n",
 83 |     "    h_pool1 = max_pool_2x2(h_conv1)\n",
 84 |     "\n",
 85 |     "    h_conv2 = tf.nn.relu(conv2d(h_pool1, weights['conv2']) + biases['conv_b2'])\n",
 86 |     "    h_pool2 = max_pool_2x2(h_conv2)\n",
 87 |     "\n",
 88 |     "    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])\n",
 89 |     "    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, weights['fc1']) + biases['fc1_b'])\n",
 90 |     "\n",
 91 |     "\n",
 92 |     "    # Output layer with linear activation\n",
 93 |     "    out_layer = tf.matmul(h_fc1, weights['out']) + biases['out_b']\n",
 94 |     "    return out_layer"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 14,
100 |    "metadata": {
101 |     "collapsed": true
102 |    },
103 |    "outputs": [],
104 |    "source": [
105 |     "# Store layers weight & biases\n",
106 |     "weights = {\n",
107 |     "    'conv1': tf.Variable(tf.random_normal([5, 5, 1, 32])),\n",
108 |     "    'conv2': tf.Variable(tf.random_normal([5, 5, 32, 64])),\n",
109 |     "    'fc1' : tf.Variable(tf.random_normal([7*7*64,256])),\n",
110 |     "    'out': tf.Variable(tf.random_normal([256,n_classes]))\n",
111 |     "}\n",
112 |     "biases = {\n",
113 |     "    'conv_b1': tf.Variable(tf.random_normal([32])),\n",
114 |     "    'conv_b2': tf.Variable(tf.random_normal([64])),\n",
115 |     "    'fc1_b': tf.Variable(tf.random_normal([256])),\n",
116 |     "    'out_b': tf.Variable(tf.random_normal([n_classes]))\n",
117 |     "}\n",
118 |     "\n",
119 |     "# Construct model\n",
120 |     "pred = multilayer_perceptron(x, weights, biases)\n",
121 |     "\n",
122 |     "# Define loss and optimizer\n",
123 |     "cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))\n",
124 |     "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)\n",
125 |     "\n",
126 |     "# Initializing the variables\n",
127 |     "init = tf.global_variables_initializer()"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 15,
133 |    "metadata": {
134 |     "collapsed": false
135 |    },
136 |    "outputs": [
137 |     {
138 |      "name": "stdout",
139 |      "output_type": "stream",
140 |      "text": [
141 |       "('Epoch:', '0001', 'cost=', '2005.953651756')\n",
142 |       "('Epoch:', '0002', 'cost=', '361.200756125')\n",
143 |       "('Epoch:', '0003', 'cost=', '222.655593089')\n",
144 |       "('Epoch:', '0004', 'cost=', '154.397716973')\n",
145 |       "('Epoch:', '0005', 'cost=', '108.289408546')\n",
146 |       "('Epoch:', '0006', 'cost=', '83.728486200')\n",
147 |       "('Epoch:', '0007', 'cost=', '63.813128544')\n",
148 |       "('Epoch:', '0008', 'cost=', '52.091127872')\n",
149 |       "('Epoch:', '0009', 'cost=', '38.352929364')\n",
150 |       "('Epoch:', '0010', 'cost=', '30.455494692')\n",
151 |       "('Epoch:', '0011', 'cost=', '25.972187011')\n",
152 |       "('Epoch:', '0012', 'cost=', '20.754565103')\n",
153 |       "('Epoch:', '0013', 'cost=', '18.515140012')\n",
154 |       "('Epoch:', '0014', 'cost=', '14.170893429')\n",
155 |       "('Epoch:', '0015', 'cost=', '13.025495452')\n",
156 |       "('Epoch:', '0016', 'cost=', '11.380087092')\n",
157 |       "('Epoch:', '0017', 'cost=', '12.045677507')\n",
158 |       "('Epoch:', '0018', 'cost=', '9.095552578')\n",
159 |       "('Epoch:', '0019', 'cost=', '8.405252479')\n",
160 |       "('Epoch:', '0020', 'cost=', '7.802369204')\n",
161 |       "('Epoch:', '0021', 'cost=', '8.664561321')\n",
162 |       "('Epoch:', '0022', 'cost=', '6.413273589')\n",
163 |       "('Epoch:', '0023', 'cost=', '7.001173552')\n",
164 |       "('Epoch:', '0024', 'cost=', '3.928643572')\n",
165 |       "('Epoch:', '0025', 'cost=', '6.000280571')\n",
166 |       "('Epoch:', '0026', 'cost=', '3.947065584')\n",
167 |       "('Epoch:', '0027', 'cost=', '5.913655243')\n",
168 |       "('Epoch:', '0028', 'cost=', '4.686071558')\n",
169 |       "('Epoch:', '0029', 'cost=', '3.783876064')\n",
170 |       "('Epoch:', '0030', 'cost=', '3.133972832')\n",
171 |       "Optimization Finished!\n",
172 |       "('Accuracy:', 0.98420006)\n"
173 |      ]
174 |     }
175 |    ],
176 |    "source": [
177 |     "\n",
178 |     "# Launch the graph\n",
179 |     "with tf.Session() as sess:\n",
180 |     "    sess.run(init)\n",
181 |     "\n",
182 |     "    # Training cycle\n",
183 |     "    for epoch in range(training_epochs):\n",
184 |     "        avg_cost = 0.\n",
185 |     "        total_batch = int(mnist.train.num_examples/batch_size)\n",
186 |     "        # Loop over all batches\n",
187 |     "        for i in range(total_batch):\n",
188 |     "            batch_x, batch_y = mnist.train.next_batch(batch_size)\n",
189 |     "            # Run optimization op (backprop) and cost op (to get loss value)\n",
190 |     "            _, c = sess.run([optimizer, cost], feed_dict={x: batch_x,\n",
191 |     "                                                          y: batch_y})\n",
192 |     "            # Compute average loss\n",
193 |     "            avg_cost += c / total_batch\n",
194 |     "        # Display logs per epoch step\n",
195 |     "        if epoch % display_step == 0:\n",
196 |     "            print(\"Epoch:\", '%04d' % (epoch+1), \"cost=\", \\\n",
197 |     "                \"{:.9f}\".format(avg_cost))\n",
198 |     "    print(\"Optimization Finished!\")\n",
199 |     "\n",
200 |     "    # Test model\n",
201 |     "    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))\n",
202 |     "    # Calculate accuracy\n",
203 |     "    accuracy = tf.reduce_mean(tf.cast(correct_prediction, \"float\"))\n",
204 |     "    print(\"Accuracy:\", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": null,
210 |    "metadata": {
211 |     "collapsed": true
212 |    },
213 |    "outputs": [],
214 |    "source": []
215 |   }
216 |  ],
217 |  "metadata": {
218 |   "kernelspec": {
219 |    "display_name": "Python 2",
220 |    "language": "python",
221 |    "name": "python2"
222 |   },
223 |   "language_info": {
224 |    "codemirror_mode": {
225 |     "name": "ipython",
226 |     "version": 2
227 |    },
228 |    "file_extension": ".py",
229 |    "mimetype": "text/x-python",
230 |    "name": "python",
231 |    "nbconvert_exporter": "python",
232 |    "pygments_lexer": "ipython2",
233 |    "version": "2.7.12"
234 |   }
235 |  },
236 |  "nbformat": 4,
237 |  "nbformat_minor": 2
238 | }
239 | 


--------------------------------------------------------------------------------
/course_3_tf_mnist_cnn.py:
--------------------------------------------------------------------------------
  1 | #get the mnist data 
  2 | # wget http://deeplearning.net/data/mnist/mnist.pkl.gz
  3 | 
  4 | 
  5 | 
  6 | 
  7 | from tensorflow.examples.tutorials.mnist import input_data
  8 | mnist = input_data.read_data_sets(".", one_hot=True)
  9 | 
 10 | import tensorflow as tf
 11 | 
 12 | # Parameters
 13 | learning_rate = 0.001
 14 | training_epochs = 30
 15 | batch_size = 100
 16 | display_step = 1
 17 | 
 18 | # Network Parameters
 19 | n_input = 784 # MNIST data input (img shape: 28*28)
 20 | n_classes = 10 # MNIST total classes (0-9 digits)
 21 | 
 22 | # tf Graph input
 23 | x = tf.placeholder("float", [None, n_input])
 24 | y = tf.placeholder("float", [None, n_classes])
 25 | 
 26 | #pre-define the  
 27 | def conv2d(x, W):
 28 |   return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
 29 | 
 30 | def max_pool_2x2(x):
 31 |   return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
 32 |                         strides=[1, 2, 2, 1], padding='SAME')
 33 | 
 34 | 
 35 | # Create model
 36 | def multilayer_perceptron(x, weights, biases):
 37 |     #now, we want to change this to a CNN network
 38 | 
 39 |     #first reshape the data to 4-D
 40 | 
 41 |     x_image = tf.reshape(x, [-1,28,28,1])
 42 | 
 43 |     #then apply cnn layers
 44 | 
 45 |     h_conv1 = tf.nn.relu(conv2d(x_image, weights['conv1']) + biases['conv_b1'])
 46 |     h_pool1 = max_pool_2x2(h_conv1)
 47 | 
 48 |     h_conv2 = tf.nn.relu(conv2d(h_pool1, weights['conv2']) + biases['conv_b2'])
 49 |     h_pool2 = max_pool_2x2(h_conv2)
 50 | 
 51 |     h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
 52 |     h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, weights['fc1']) + biases['fc1_b'])
 53 | 
 54 | 
 55 |     # Output layer with linear activation
 56 |     out_layer = tf.matmul(h_fc1, weights['out']) + biases['out_b']
 57 |     return out_layer
 58 | 
 59 | # Store layers weight & biases
 60 | weights = {
 61 |     'conv1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
 62 |     'conv2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
 63 |     'fc1' : tf.Variable(tf.random_normal([7*7*64,256])),
 64 |     'out': tf.Variable(tf.random_normal([256,n_classes]))
 65 | }
 66 | biases = {
 67 |     'conv_b1': tf.Variable(tf.random_normal([32])),
 68 |     'conv_b2': tf.Variable(tf.random_normal([64])),
 69 |     'fc1_b': tf.Variable(tf.random_normal([256])),
 70 |     'out_b': tf.Variable(tf.random_normal([n_classes]))
 71 | }
 72 | 
 73 | # Construct model
 74 | pred = multilayer_perceptron(x, weights, biases)
 75 | 
 76 | # Define loss and optimizer
 77 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
 78 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
 79 | 
 80 | # Initializing the variables
 81 | init = tf.global_variables_initializer()
 82 | 
 83 | # Launch the graph
 84 | with tf.Session() as sess:
 85 |     sess.run(init)
 86 | 
 87 |     # Training cycle
 88 |     for epoch in range(training_epochs):
 89 |         avg_cost = 0.
 90 |         total_batch = int(mnist.train.num_examples/batch_size)
 91 |         # Loop over all batches
 92 |         for i in range(total_batch):
 93 |             batch_x, batch_y = mnist.train.next_batch(batch_size)
 94 |             # Run optimization op (backprop) and cost op (to get loss value)
 95 |             _, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
 96 |                                                           y: batch_y})
 97 |             # Compute average loss
 98 |             avg_cost += c / total_batch
 99 |         # Display logs per epoch step
100 |         if epoch % display_step == 0:
101 |             print("Epoch:", '%04d' % (epoch+1), "cost=", \
102 |                 "{:.9f}".format(avg_cost))
103 |     print("Optimization Finished!")
104 | 
105 |     # Test model
106 |     correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
107 |     # Calculate accuracy
108 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
109 |     print("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))


--------------------------------------------------------------------------------
/course_6_obj_detection.txt:
--------------------------------------------------------------------------------
 1 | YOLO example:
 2 | https://github.com/wiibrew/YOLO_tensorflow
 3 | 
 4 | 
 5 | Faster RCNN examples:
 6 | https://github.com/wiibrew/Faster-RCNN_TF
 7 | model desfine and train:
 8 | https://github.com/wiibrew/Faster-RCNN_TF/blob/master/lib/networks/VGGnet_train.py
 9 | details of layer(loss and RPN):
10 | https://github.com/wiibrew/Faster-RCNN_TF/blob/master/lib/networks/network.py
11 | 


--------------------------------------------------------------------------------
/course_7_lstm_learn_shakespeare.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false,
  8 |     "scrolled": true
  9 |    },
 10 |    "outputs": [
 11 |     {
 12 |      "name": "stdout",
 13 |      "output_type": "stream",
 14 |      "text": [
 15 |       "hdf5 is not supported on this machine (please install/reinstall h5py for optimal experience)\n"
 16 |      ]
 17 |     }
 18 |    ],
 19 |    "source": [
 20 |     "from __future__ import absolute_import, division, print_function\n",
 21 |     "\n",
 22 |     "import os\n",
 23 |     "import pickle\n",
 24 |     "from six.moves import urllib\n",
 25 |     "\n",
 26 |     "import tflearn\n",
 27 |     "from tflearn.data_utils import *\n",
 28 |     "\n",
 29 |     "path = \"shakespeare_input.txt\"\n",
 30 |     "char_idx_file = 'char_idx.pickle'\n",
 31 |     "\n",
 32 |     "if not os.path.isfile(path):\n",
 33 |     "    urllib.request.urlretrieve(\"https://raw.githubusercontent.com/tflearn/tflearn.github.io/master/resources/shakespeare_input.txt\", path)\n"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 2,
 39 |    "metadata": {
 40 |     "collapsed": false
 41 |    },
 42 |    "outputs": [
 43 |     {
 44 |      "name": "stdout",
 45 |      "output_type": "stream",
 46 |      "text": [
 47 |       "Loading previous char_idx\n",
 48 |       "Vectorizing text...\n",
 49 |       "Text total length: 4,573,338\n",
 50 |       "Distinct chars   : 67\n",
 51 |       "Total sequences  : 1,524,438\n"
 52 |      ]
 53 |     }
 54 |    ],
 55 |    "source": [
 56 |     "maxlen = 25\n",
 57 |     "\n",
 58 |     "char_idx = None\n",
 59 |     "if os.path.isfile(char_idx_file):\n",
 60 |     "  print('Loading previous char_idx')\n",
 61 |     "  char_idx = pickle.load(open(char_idx_file, 'rb'))\n",
 62 |     "\n",
 63 |     "X, Y, char_idx = \\\n",
 64 |     "    textfile_to_semi_redundant_sequences(path, seq_maxlen=maxlen, redun_step=3,\n",
 65 |     "                                         pre_defined_char_idx=char_idx)"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 3,
 71 |    "metadata": {
 72 |     "collapsed": true
 73 |    },
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "pickle.dump(char_idx, open(char_idx_file,'wb'))\n",
 77 |     "\n",
 78 |     "g = tflearn.input_data([None, maxlen, len(char_idx)])\n",
 79 |     "g = tflearn.lstm(g, 512, return_seq=True)\n",
 80 |     "g = tflearn.dropout(g, 0.5)\n",
 81 |     "g = tflearn.lstm(g, 512, return_seq=True)\n",
 82 |     "g = tflearn.dropout(g, 0.5)\n",
 83 |     "g = tflearn.lstm(g, 512)\n",
 84 |     "g = tflearn.dropout(g, 0.5)\n",
 85 |     "g = tflearn.fully_connected(g, len(char_idx), activation='softmax')\n",
 86 |     "g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy',\n",
 87 |     "                       learning_rate=0.001)\n",
 88 |     "\n",
 89 |     "m = tflearn.SequenceGenerator(g, dictionary=char_idx,\n",
 90 |     "                              seq_maxlen=maxlen,\n",
 91 |     "                              clip_gradients=5.0,\n",
 92 |     "                              checkpoint_path='model_shakespeare')"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 4,
 98 |    "metadata": {
 99 |     "collapsed": false
100 |    },
101 |    "outputs": [
102 |     {
103 |      "name": "stdout",
104 |      "output_type": "stream",
105 |      "text": [
106 |       "Training Step: 107189  | total loss: \u001b[1m\u001b[32m1.34488\u001b[0m\u001b[0m | time: 561.687s\n",
107 |       "| Adam | epoch: 010 | loss: 1.34488 -- iter: 1371904/1371994\n",
108 |       "Training Step: 107190  | total loss: \u001b[1m\u001b[32m1.35806\u001b[0m\u001b[0m | time: 600.688s\n",
109 |       "| Adam | epoch: 010 | loss: 1.35806 | val_loss: 1.28005 -- iter: 1371994/1371994\n",
110 |       "--\n",
111 |       "INFO:tensorflow:/home/wei/Documents/DeepLearningCourseCodes/model_shakespeare-107190 is not in all_model_checkpoint_paths. Manually adding it.\n",
112 |       "WARNING:tensorflow:Error encountered when serializing layer_tensor/LSTM.\n",
113 |       "Type is unsupported, or the types of the items don't match field type in CollectionDef.\n",
114 |       "'list' object has no attribute 'name'\n",
115 |       "WARNING:tensorflow:Error encountered when serializing layer_tensor/Dropout.\n",
116 |       "Type is unsupported, or the types of the items don't match field type in CollectionDef.\n",
117 |       "'list' object has no attribute 'name'\n",
118 |       "WARNING:tensorflow:Error encountered when serializing layer_tensor/LSTM_1.\n",
119 |       "Type is unsupported, or the types of the items don't match field type in CollectionDef.\n",
120 |       "'list' object has no attribute 'name'\n",
121 |       "-- TESTING...\n",
122 |       "-- Test with temperature of 1.0 --\n",
123 |       "ou see'st with peril I have content, which reason'd let me clear?\n",
124 |       "\n",
125 |       "GORENIL:\n",
126 |       "And what you have stop this occasion is better blame?\n",
127 |       "\n",
128 |       "PAROLLES:\n",
129 |       "Why, with the enument in question not, peace, my father knight.\n",
130 |       "I'll so know the night, being done,\n",
131 |       "And villany, my doom, is the commanded tarteries.\n",
132 |       "\n",
133 |       "WARWICK:\n",
134 |       "My son is thy place and quickly.\n",
135 |       "How now, where wates, let Choefight and walks be bones;\n",
136 |       "Our flock, if he have partled there than we\n",
137 |       "enteral Mancanimone.\n",
138 |       "\n",
139 |       "PAGE:\n",
140 |       "Carsing which not to seek it,\n",
141 |       "Not yet worth and sow, beards of himself.\n",
142 |       "\n",
143 |       "POINS:\n",
144 |       "All his cities, thou wilt there?\n",
145 |       "\n",
146 |       "DUGLET:\n",
147 |       "My man for, goes but yet I do\n",
148 |       "loathe, and\n",
149 |       "-- Test with temperature of 0.5 --\n",
150 |       "ou see'st with peril I have been to keep me to him.\n",
151 |       "\n",
152 |       "DON ADRIANO DE ARMADO:\n",
153 |       "There is no fair course and honest sins\n",
154 |       "And be the villain of the care of she is these death,\n",
155 |       "And the speedy prince were so soon of the house.\n",
156 |       "\n",
157 |       "SIR TOBY BELCH:\n",
158 |       "Thou shalt stand do the love of her soul,\n",
159 |       "The last she will come a man to the rest,\n",
160 |       "They so service and enemy.\n",
161 |       "\n",
162 |       "MARCIUS:\n",
163 |       "I hope the name of France?\n",
164 |       "\n",
165 |       "FALSTAFF:\n",
166 |       "Why, what is the father may have the large of a stand.\n",
167 |       "\n",
168 |       "DEMETRIUS:\n",
169 |       "And I would not be my soul to him to him: and I have the love\n",
170 |       "And be it in his fight.\n",
171 |       "\n",
172 |       "ALBANY:\n",
173 |       "What was this to the heart;\n",
174 |       "The night of the most prince and my most\n"
175 |      ]
176 |     }
177 |    ],
178 |    "source": [
179 |     "for i in range(10):\n",
180 |     "    seed = random_sequence_from_textfile(path, maxlen)\n",
181 |     "    m.fit(X, Y, validation_set=0.1, batch_size=128,\n",
182 |     "          n_epoch=1, run_id='shakespeare')\n",
183 |     "    print(\"-- TESTING...\")\n",
184 |     "    print(\"-- Test with temperature of 1.0 --\")\n",
185 |     "    print(m.generate(600, temperature=1.0, seq_seed=seed))\n",
186 |     "    print(\"-- Test with temperature of 0.5 --\")\n",
187 |     "    print(m.generate(600, temperature=0.5, seq_seed=seed))"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "metadata": {
194 |     "collapsed": true
195 |    },
196 |    "outputs": [],
197 |    "source": []
198 |   }
199 |  ],
200 |  "metadata": {
201 |   "kernelspec": {
202 |    "display_name": "Python 2",
203 |    "language": "python",
204 |    "name": "python2"
205 |   },
206 |   "language_info": {
207 |    "codemirror_mode": {
208 |     "name": "ipython",
209 |     "version": 2
210 |    },
211 |    "file_extension": ".py",
212 |    "mimetype": "text/x-python",
213 |    "name": "python",
214 |    "nbconvert_exporter": "python",
215 |    "pygments_lexer": "ipython2",
216 |    "version": "2.7.12"
217 |   }
218 |  },
219 |  "nbformat": 4,
220 |  "nbformat_minor": 2
221 | }
222 | 


--------------------------------------------------------------------------------
/course_7_shakespeare_gen.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import os
 4 | import pickle
 5 | from six.moves import urllib
 6 | 
 7 | import tflearn
 8 | from tflearn.data_utils import *
 9 | 
10 | path = "shakespeare_input.txt"
11 | char_idx_file = 'char_idx.pickle'
12 | 
13 | if not os.path.isfile(path):
14 |     urllib.request.urlretrieve("https://raw.githubusercontent.com/tflearn/tflearn.github.io/master/resources/shakespeare_input.txt", path)
15 | 
16 | maxlen = 25
17 | 
18 | char_idx = None
19 | if os.path.isfile(char_idx_file):
20 |   print('Loading previous char_idx')
21 |   char_idx = pickle.load(open(char_idx_file, 'rb'))
22 | 
23 | X, Y, char_idx = \
24 |     textfile_to_semi_redundant_sequences(path, seq_maxlen=maxlen, redun_step=3,
25 |                                          pre_defined_char_idx=char_idx)
26 | 
27 | pickle.dump(char_idx, open(char_idx_file,'wb'))
28 | 
29 | g = tflearn.input_data([None, maxlen, len(char_idx)])
30 | g = tflearn.lstm(g, 512, return_seq=True)
31 | g = tflearn.dropout(g, 0.5)
32 | g = tflearn.lstm(g, 512, return_seq=True)
33 | g = tflearn.dropout(g, 0.5)
34 | g = tflearn.lstm(g, 512)
35 | g = tflearn.dropout(g, 0.5)
36 | g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
37 | g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy',
38 |                        learning_rate=0.001)
39 | 
40 | m = tflearn.SequenceGenerator(g, dictionary=char_idx,
41 |                               seq_maxlen=maxlen,
42 |                               clip_gradients=5.0,
43 |                               checkpoint_path='model_shakespeare')
44 | 
45 | for i in range(50):
46 |     seed = random_sequence_from_textfile(path, maxlen)
47 |     m.fit(X, Y, validation_set=0.1, batch_size=128,
48 |           n_epoch=1, run_id='shakespeare')
49 |     print("-- TESTING...")
50 |     print("-- Test with temperature of 1.0 --")
51 |     print(m.generate(600, temperature=1.0, seq_seed=seed))
52 |     print("-- Test with temperature of 0.5 --")
53 |     print(m.generate(600, temperature=0.5, seq_seed=seed))
54 | 


--------------------------------------------------------------------------------
/course_8_image2txt/BUILD:
--------------------------------------------------------------------------------
 1 | package(default_visibility = [":internal"])
 2 | 
 3 | licenses(["notice"])  # Apache 2.0
 4 | 
 5 | exports_files(["LICENSE"])
 6 | 
 7 | package_group(
 8 |     name = "internal",
 9 |     packages = [
10 |         "//im2txt/...",
11 |     ],
12 | )
13 | 
14 | py_binary(
15 |     name = "build_mscoco_data",
16 |     srcs = [
17 |         "data/build_mscoco_data.py",
18 |     ],
19 | )
20 | 
21 | sh_binary(
22 |     name = "download_and_preprocess_mscoco",
23 |     srcs = ["data/download_and_preprocess_mscoco.sh"],
24 |     data = [
25 |         ":build_mscoco_data",
26 |     ],
27 | )
28 | 
29 | py_library(
30 |     name = "configuration",
31 |     srcs = ["configuration.py"],
32 |     srcs_version = "PY2AND3",
33 | )
34 | 
35 | py_library(
36 |     name = "show_and_tell_model",
37 |     srcs = ["show_and_tell_model.py"],
38 |     srcs_version = "PY2AND3",
39 |     deps = [
40 |         "//im2txt/ops:image_embedding",
41 |         "//im2txt/ops:image_processing",
42 |         "//im2txt/ops:inputs",
43 |     ],
44 | )
45 | 
46 | py_test(
47 |     name = "show_and_tell_model_test",
48 |     size = "large",
49 |     srcs = ["show_and_tell_model_test.py"],
50 |     deps = [
51 |         ":configuration",
52 |         ":show_and_tell_model",
53 |     ],
54 | )
55 | 
56 | py_library(
57 |     name = "inference_wrapper",
58 |     srcs = ["inference_wrapper.py"],
59 |     srcs_version = "PY2AND3",
60 |     deps = [
61 |         ":show_and_tell_model",
62 |         "//im2txt/inference_utils:inference_wrapper_base",
63 |     ],
64 | )
65 | 
66 | py_binary(
67 |     name = "train",
68 |     srcs = ["train.py"],
69 |     srcs_version = "PY2AND3",
70 |     deps = [
71 |         ":configuration",
72 |         ":show_and_tell_model",
73 |     ],
74 | )
75 | 
76 | py_binary(
77 |     name = "evaluate",
78 |     srcs = ["evaluate.py"],
79 |     srcs_version = "PY2AND3",
80 |     deps = [
81 |         ":configuration",
82 |         ":show_and_tell_model",
83 |     ],
84 | )
85 | 
86 | py_binary(
87 |     name = "run_inference",
88 |     srcs = ["run_inference.py"],
89 |     srcs_version = "PY2AND3",
90 |     deps = [
91 |         ":configuration",
92 |         ":inference_wrapper",
93 |         "//im2txt/inference_utils:caption_generator",
94 |         "//im2txt/inference_utils:vocabulary",
95 |     ],
96 | )
97 | 


--------------------------------------------------------------------------------
/course_8_image2txt/configuration.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Image-to-text model and training configurations."""
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | 
 23 | class ModelConfig(object):
 24 |   """Wrapper class for model hyperparameters."""
 25 | 
 26 |   def __init__(self):
 27 |     """Sets the default model hyperparameters."""
 28 |     # File pattern of sharded TFRecord file containing SequenceExample protos.
 29 |     # Must be provided in training and evaluation modes.
 30 |     self.input_file_pattern = None
 31 | 
 32 |     # Image format ("jpeg" or "png").
 33 |     self.image_format = "jpeg"
 34 | 
 35 |     # Approximate number of values per input shard. Used to ensure sufficient
 36 |     # mixing between shards in training.
 37 |     self.values_per_input_shard = 2300
 38 |     # Minimum number of shards to keep in the input queue.
 39 |     self.input_queue_capacity_factor = 2
 40 |     # Number of threads for prefetching SequenceExample protos.
 41 |     self.num_input_reader_threads = 1
 42 | 
 43 |     # Name of the SequenceExample context feature containing image data.
 44 |     self.image_feature_name = "image/data"
 45 |     # Name of the SequenceExample feature list containing integer captions.
 46 |     self.caption_feature_name = "image/caption_ids"
 47 | 
 48 |     # Number of unique words in the vocab (plus 1, for <UNK>).
 49 |     # The default value is larger than the expected actual vocab size to allow
 50 |     # for differences between tokenizer versions used in preprocessing. There is
 51 |     # no harm in using a value greater than the actual vocab size, but using a
 52 |     # value less than the actual vocab size will result in an error.
 53 |     self.vocab_size = 12000
 54 | 
 55 |     # Number of threads for image preprocessing. Should be a multiple of 2.
 56 |     self.num_preprocess_threads = 4
 57 | 
 58 |     # Batch size.
 59 |     self.batch_size = 32
 60 | 
 61 |     # File containing an Inception v3 checkpoint to initialize the variables
 62 |     # of the Inception model. Must be provided when starting training for the
 63 |     # first time.
 64 |     self.inception_checkpoint_file = None
 65 | 
 66 |     # Dimensions of Inception v3 input images.
 67 |     self.image_height = 299
 68 |     self.image_width = 299
 69 | 
 70 |     # Scale used to initialize model variables.
 71 |     self.initializer_scale = 0.08
 72 | 
 73 |     # LSTM input and output dimensionality, respectively.
 74 |     self.embedding_size = 512
 75 |     self.num_lstm_units = 512
 76 | 
 77 |     # If < 1.0, the dropout keep probability applied to LSTM variables.
 78 |     self.lstm_dropout_keep_prob = 0.7
 79 | 
 80 | 
 81 | class TrainingConfig(object):
 82 |   """Wrapper class for training hyperparameters."""
 83 | 
 84 |   def __init__(self):
 85 |     """Sets the default training hyperparameters."""
 86 |     # Number of examples per epoch of training data.
 87 |     self.num_examples_per_epoch = 586363
 88 | 
 89 |     # Optimizer for training the model.
 90 |     self.optimizer = "SGD"
 91 | 
 92 |     # Learning rate for the initial phase of training.
 93 |     self.initial_learning_rate = 2.0
 94 |     self.learning_rate_decay_factor = 0.5
 95 |     self.num_epochs_per_decay = 8.0
 96 | 
 97 |     # Learning rate when fine tuning the Inception v3 parameters.
 98 |     self.train_inception_learning_rate = 0.0005
 99 | 
100 |     # If not None, clip gradients to this value.
101 |     self.clip_gradients = 5.0
102 | 
103 |     # How many model checkpoints to keep.
104 |     self.max_checkpoints_to_keep = 5
105 | 


--------------------------------------------------------------------------------
/course_8_image2txt/data/download_and_preprocess_mscoco.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ==============================================================================
16 | 
17 | # Script to download and preprocess the MSCOCO data set.
18 | #
19 | # The outputs of this script are sharded TFRecord files containing serialized
20 | # SequenceExample protocol buffers. See build_mscoco_data.py for details of how
21 | # the SequenceExample protocol buffers are constructed.
22 | #
23 | # usage:
24 | #  ./download_and_preprocess_mscoco.sh
25 | set -e
26 | 
27 | if [ -z "$1" ]; then
28 |   echo "usage download_and_preproces_mscoco.sh [data dir]"
29 |   exit
30 | fi
31 | 
32 | if [ "$(uname)" == "Darwin" ]; then
33 |   UNZIP="tar -xf"
34 | else
35 |   UNZIP="unzip -nq"
36 | fi
37 | 
38 | # Create the output directories.
39 | OUTPUT_DIR="${1%/}"
40 | SCRATCH_DIR="${OUTPUT_DIR}/raw-data"
41 | mkdir -p "${OUTPUT_DIR}"
42 | mkdir -p "${SCRATCH_DIR}"
43 | CURRENT_DIR=$(pwd)
44 | WORK_DIR="$0.runfiles/im2txt/im2txt"
45 | 
46 | # Helper function to download and unpack a .zip file.
47 | function download_and_unzip() {
48 |   local BASE_URL=${1}
49 |   local FILENAME=${2}
50 | 
51 |   if [ ! -f ${FILENAME} ]; then
52 |     echo "Downloading ${FILENAME} to $(pwd)"
53 |     wget -nd -c "${BASE_URL}/${FILENAME}"
54 |   else
55 |     echo "Skipping download of ${FILENAME}"
56 |   fi
57 |   echo "Unzipping ${FILENAME}"
58 |   ${UNZIP} ${FILENAME}
59 | }
60 | 
61 | cd ${SCRATCH_DIR}
62 | 
63 | # Download the images.
64 | BASE_IMAGE_URL="http://msvocds.blob.core.windows.net/coco2014"
65 | 
66 | TRAIN_IMAGE_FILE="train2014.zip"
67 | download_and_unzip ${BASE_IMAGE_URL} ${TRAIN_IMAGE_FILE}
68 | TRAIN_IMAGE_DIR="${SCRATCH_DIR}/train2014"
69 | 
70 | VAL_IMAGE_FILE="val2014.zip"
71 | download_and_unzip ${BASE_IMAGE_URL} ${VAL_IMAGE_FILE}
72 | VAL_IMAGE_DIR="${SCRATCH_DIR}/val2014"
73 | 
74 | # Download the captions.
75 | BASE_CAPTIONS_URL="http://msvocds.blob.core.windows.net/annotations-1-0-3"
76 | CAPTIONS_FILE="captions_train-val2014.zip"
77 | download_and_unzip ${BASE_CAPTIONS_URL} ${CAPTIONS_FILE}
78 | TRAIN_CAPTIONS_FILE="${SCRATCH_DIR}/annotations/captions_train2014.json"
79 | VAL_CAPTIONS_FILE="${SCRATCH_DIR}/annotations/captions_val2014.json"
80 | 
81 | # Build TFRecords of the image data.
82 | cd "${CURRENT_DIR}"
83 | BUILD_SCRIPT="${WORK_DIR}/build_mscoco_data"
84 | "${BUILD_SCRIPT}" \
85 |   --train_image_dir="${TRAIN_IMAGE_DIR}" \
86 |   --val_image_dir="${VAL_IMAGE_DIR}" \
87 |   --train_captions_file="${TRAIN_CAPTIONS_FILE}" \
88 |   --val_captions_file="${VAL_CAPTIONS_FILE}" \
89 |   --output_dir="${OUTPUT_DIR}" \
90 |   --word_counts_output_file="${OUTPUT_DIR}/word_counts.txt" \
91 | 


--------------------------------------------------------------------------------
/course_8_image2txt/evaluate.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Evaluate the model.
 17 | 
 18 | This script should be run concurrently with training so that summaries show up
 19 | in TensorBoard.
 20 | """
 21 | 
 22 | from __future__ import absolute_import
 23 | from __future__ import division
 24 | from __future__ import print_function
 25 | 
 26 | import math
 27 | import os.path
 28 | import time
 29 | 
 30 | 
 31 | import numpy as np
 32 | import tensorflow as tf
 33 | 
 34 | from im2txt import configuration
 35 | from im2txt import show_and_tell_model
 36 | 
 37 | FLAGS = tf.flags.FLAGS
 38 | 
 39 | tf.flags.DEFINE_string("input_file_pattern", "",
 40 |                        "File pattern of sharded TFRecord input files.")
 41 | tf.flags.DEFINE_string("checkpoint_dir", "",
 42 |                        "Directory containing model checkpoints.")
 43 | tf.flags.DEFINE_string("eval_dir", "", "Directory to write event logs.")
 44 | 
 45 | tf.flags.DEFINE_integer("eval_interval_secs", 600,
 46 |                         "Interval between evaluation runs.")
 47 | tf.flags.DEFINE_integer("num_eval_examples", 10132,
 48 |                         "Number of examples for evaluation.")
 49 | 
 50 | tf.flags.DEFINE_integer("min_global_step", 5000,
 51 |                         "Minimum global step to run evaluation.")
 52 | 
 53 | tf.logging.set_verbosity(tf.logging.INFO)
 54 | 
 55 | 
 56 | def evaluate_model(sess, model, global_step, summary_writer, summary_op):
 57 |   """Computes perplexity-per-word over the evaluation dataset.
 58 | 
 59 |   Summaries and perplexity-per-word are written out to the eval directory.
 60 | 
 61 |   Args:
 62 |     sess: Session object.
 63 |     model: Instance of ShowAndTellModel; the model to evaluate.
 64 |     global_step: Integer; global step of the model checkpoint.
 65 |     summary_writer: Instance of FileWriter.
 66 |     summary_op: Op for generating model summaries.
 67 |   """
 68 |   # Log model summaries on a single batch.
 69 |   summary_str = sess.run(summary_op)
 70 |   summary_writer.add_summary(summary_str, global_step)
 71 | 
 72 |   # Compute perplexity over the entire dataset.
 73 |   num_eval_batches = int(
 74 |       math.ceil(FLAGS.num_eval_examples / model.config.batch_size))
 75 | 
 76 |   start_time = time.time()
 77 |   sum_losses = 0.
 78 |   sum_weights = 0.
 79 |   for i in xrange(num_eval_batches):
 80 |     cross_entropy_losses, weights = sess.run([
 81 |         model.target_cross_entropy_losses,
 82 |         model.target_cross_entropy_loss_weights
 83 |     ])
 84 |     sum_losses += np.sum(cross_entropy_losses * weights)
 85 |     sum_weights += np.sum(weights)
 86 |     if not i % 100:
 87 |       tf.logging.info("Computed losses for %d of %d batches.", i + 1,
 88 |                       num_eval_batches)
 89 |   eval_time = time.time() - start_time
 90 | 
 91 |   perplexity = math.exp(sum_losses / sum_weights)
 92 |   tf.logging.info("Perplexity = %f (%.2g sec)", perplexity, eval_time)
 93 | 
 94 |   # Log perplexity to the FileWriter.
 95 |   summary = tf.Summary()
 96 |   value = summary.value.add()
 97 |   value.simple_value = perplexity
 98 |   value.tag = "Perplexity"
 99 |   summary_writer.add_summary(summary, global_step)
100 | 
101 |   # Write the Events file to the eval directory.
102 |   summary_writer.flush()
103 |   tf.logging.info("Finished processing evaluation at global step %d.",
104 |                   global_step)
105 | 
106 | 
107 | def run_once(model, saver, summary_writer, summary_op):
108 |   """Evaluates the latest model checkpoint.
109 | 
110 |   Args:
111 |     model: Instance of ShowAndTellModel; the model to evaluate.
112 |     saver: Instance of tf.train.Saver for restoring model Variables.
113 |     summary_writer: Instance of FileWriter.
114 |     summary_op: Op for generating model summaries.
115 |   """
116 |   model_path = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
117 |   if not model_path:
118 |     tf.logging.info("Skipping evaluation. No checkpoint found in: %s",
119 |                     FLAGS.checkpoint_dir)
120 |     return
121 | 
122 |   with tf.Session() as sess:
123 |     # Load model from checkpoint.
124 |     tf.logging.info("Loading model from checkpoint: %s", model_path)
125 |     saver.restore(sess, model_path)
126 |     global_step = tf.train.global_step(sess, model.global_step.name)
127 |     tf.logging.info("Successfully loaded %s at global step = %d.",
128 |                     os.path.basename(model_path), global_step)
129 |     if global_step < FLAGS.min_global_step:
130 |       tf.logging.info("Skipping evaluation. Global step = %d < %d", global_step,
131 |                       FLAGS.min_global_step)
132 |       return
133 | 
134 |     # Start the queue runners.
135 |     coord = tf.train.Coordinator()
136 |     threads = tf.train.start_queue_runners(coord=coord)
137 | 
138 |     # Run evaluation on the latest checkpoint.
139 |     try:
140 |       evaluate_model(
141 |           sess=sess,
142 |           model=model,
143 |           global_step=global_step,
144 |           summary_writer=summary_writer,
145 |           summary_op=summary_op)
146 |     except Exception, e:  # pylint: disable=broad-except
147 |       tf.logging.error("Evaluation failed.")
148 |       coord.request_stop(e)
149 | 
150 |     coord.request_stop()
151 |     coord.join(threads, stop_grace_period_secs=10)
152 | 
153 | 
154 | def run():
155 |   """Runs evaluation in a loop, and logs summaries to TensorBoard."""
156 |   # Create the evaluation directory if it doesn't exist.
157 |   eval_dir = FLAGS.eval_dir
158 |   if not tf.gfile.IsDirectory(eval_dir):
159 |     tf.logging.info("Creating eval directory: %s", eval_dir)
160 |     tf.gfile.MakeDirs(eval_dir)
161 | 
162 |   g = tf.Graph()
163 |   with g.as_default():
164 |     # Build the model for evaluation.
165 |     model_config = configuration.ModelConfig()
166 |     model_config.input_file_pattern = FLAGS.input_file_pattern
167 |     model = show_and_tell_model.ShowAndTellModel(model_config, mode="eval")
168 |     model.build()
169 | 
170 |     # Create the Saver to restore model Variables.
171 |     saver = tf.train.Saver()
172 | 
173 |     # Create the summary operation and the summary writer.
174 |     summary_op = tf.summary.merge_all()
175 |     summary_writer = tf.summary.FileWriter(eval_dir)
176 | 
177 |     g.finalize()
178 | 
179 |     # Run a new evaluation run every eval_interval_secs.
180 |     while True:
181 |       start = time.time()
182 |       tf.logging.info("Starting evaluation at " + time.strftime(
183 |           "%Y-%m-%d-%H:%M:%S", time.localtime()))
184 |       run_once(model, saver, summary_writer, summary_op)
185 |       time_to_next_eval = start + FLAGS.eval_interval_secs - time.time()
186 |       if time_to_next_eval > 0:
187 |         time.sleep(time_to_next_eval)
188 | 
189 | 
190 | def main(unused_argv):
191 |   assert FLAGS.input_file_pattern, "--input_file_pattern is required"
192 |   assert FLAGS.checkpoint_dir, "--checkpoint_dir is required"
193 |   assert FLAGS.eval_dir, "--eval_dir is required"
194 |   run()
195 | 
196 | 
197 | if __name__ == "__main__":
198 |   tf.app.run()
199 | 


--------------------------------------------------------------------------------
/course_8_image2txt/inference_utils/BUILD:
--------------------------------------------------------------------------------
 1 | package(default_visibility = ["//im2txt:internal"])
 2 | 
 3 | licenses(["notice"])  # Apache 2.0
 4 | 
 5 | exports_files(["LICENSE"])
 6 | 
 7 | py_library(
 8 |     name = "inference_wrapper_base",
 9 |     srcs = ["inference_wrapper_base.py"],
10 |     srcs_version = "PY2AND3",
11 | )
12 | 
13 | py_library(
14 |     name = "vocabulary",
15 |     srcs = ["vocabulary.py"],
16 |     srcs_version = "PY2AND3",
17 | )
18 | 
19 | py_library(
20 |     name = "caption_generator",
21 |     srcs = ["caption_generator.py"],
22 |     srcs_version = "PY2AND3",
23 | )
24 | 
25 | py_test(
26 |     name = "caption_generator_test",
27 |     srcs = ["caption_generator_test.py"],
28 |     deps = [
29 |         ":caption_generator",
30 |     ],
31 | )
32 | 


--------------------------------------------------------------------------------
/course_8_image2txt/inference_utils/caption_generator.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Class for generating captions from an image-to-text model."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | import heapq
 22 | import math
 23 | 
 24 | 
 25 | import numpy as np
 26 | 
 27 | 
 28 | class Caption(object):
 29 |   """Represents a complete or partial caption."""
 30 | 
 31 |   def __init__(self, sentence, state, logprob, score, metadata=None):
 32 |     """Initializes the Caption.
 33 | 
 34 |     Args:
 35 |       sentence: List of word ids in the caption.
 36 |       state: Model state after generating the previous word.
 37 |       logprob: Log-probability of the caption.
 38 |       score: Score of the caption.
 39 |       metadata: Optional metadata associated with the partial sentence. If not
 40 |         None, a list of strings with the same length as 'sentence'.
 41 |     """
 42 |     self.sentence = sentence
 43 |     self.state = state
 44 |     self.logprob = logprob
 45 |     self.score = score
 46 |     self.metadata = metadata
 47 | 
 48 |   def __cmp__(self, other):
 49 |     """Compares Captions by score."""
 50 |     assert isinstance(other, Caption)
 51 |     if self.score == other.score:
 52 |       return 0
 53 |     elif self.score < other.score:
 54 |       return -1
 55 |     else:
 56 |       return 1
 57 |   
 58 |   # For Python 3 compatibility (__cmp__ is deprecated).
 59 |   def __lt__(self, other):
 60 |     assert isinstance(other, Caption)
 61 |     return self.score < other.score
 62 |   
 63 |   # Also for Python 3 compatibility.
 64 |   def __eq__(self, other):
 65 |     assert isinstance(other, Caption)
 66 |     return self.score == other.score
 67 | 
 68 | 
 69 | class TopN(object):
 70 |   """Maintains the top n elements of an incrementally provided set."""
 71 | 
 72 |   def __init__(self, n):
 73 |     self._n = n
 74 |     self._data = []
 75 | 
 76 |   def size(self):
 77 |     assert self._data is not None
 78 |     return len(self._data)
 79 | 
 80 |   def push(self, x):
 81 |     """Pushes a new element."""
 82 |     assert self._data is not None
 83 |     if len(self._data) < self._n:
 84 |       heapq.heappush(self._data, x)
 85 |     else:
 86 |       heapq.heappushpop(self._data, x)
 87 | 
 88 |   def extract(self, sort=False):
 89 |     """Extracts all elements from the TopN. This is a destructive operation.
 90 | 
 91 |     The only method that can be called immediately after extract() is reset().
 92 | 
 93 |     Args:
 94 |       sort: Whether to return the elements in descending sorted order.
 95 | 
 96 |     Returns:
 97 |       A list of data; the top n elements provided to the set.
 98 |     """
 99 |     assert self._data is not None
100 |     data = self._data
101 |     self._data = None
102 |     if sort:
103 |       data.sort(reverse=True)
104 |     return data
105 | 
106 |   def reset(self):
107 |     """Returns the TopN to an empty state."""
108 |     self._data = []
109 | 
110 | 
111 | class CaptionGenerator(object):
112 |   """Class to generate captions from an image-to-text model."""
113 | 
114 |   def __init__(self,
115 |                model,
116 |                vocab,
117 |                beam_size=3,
118 |                max_caption_length=20,
119 |                length_normalization_factor=0.0):
120 |     """Initializes the generator.
121 | 
122 |     Args:
123 |       model: Object encapsulating a trained image-to-text model. Must have
124 |         methods feed_image() and inference_step(). For example, an instance of
125 |         InferenceWrapperBase.
126 |       vocab: A Vocabulary object.
127 |       beam_size: Beam size to use when generating captions.
128 |       max_caption_length: The maximum caption length before stopping the search.
129 |       length_normalization_factor: If != 0, a number x such that captions are
130 |         scored by logprob/length^x, rather than logprob. This changes the
131 |         relative scores of captions depending on their lengths. For example, if
132 |         x > 0 then longer captions will be favored.
133 |     """
134 |     self.vocab = vocab
135 |     self.model = model
136 | 
137 |     self.beam_size = beam_size
138 |     self.max_caption_length = max_caption_length
139 |     self.length_normalization_factor = length_normalization_factor
140 | 
141 |   def beam_search(self, sess, encoded_image):
142 |     """Runs beam search caption generation on a single image.
143 | 
144 |     Args:
145 |       sess: TensorFlow Session object.
146 |       encoded_image: An encoded image string.
147 | 
148 |     Returns:
149 |       A list of Caption sorted by descending score.
150 |     """
151 |     # Feed in the image to get the initial state.
152 |     initial_state = self.model.feed_image(sess, encoded_image)
153 | 
154 |     initial_beam = Caption(
155 |         sentence=[self.vocab.start_id],
156 |         state=initial_state[0],
157 |         logprob=0.0,
158 |         score=0.0,
159 |         metadata=[""])
160 |     partial_captions = TopN(self.beam_size)
161 |     partial_captions.push(initial_beam)
162 |     complete_captions = TopN(self.beam_size)
163 | 
164 |     # Run beam search.
165 |     for _ in range(self.max_caption_length - 1):
166 |       partial_captions_list = partial_captions.extract()
167 |       partial_captions.reset()
168 |       input_feed = np.array([c.sentence[-1] for c in partial_captions_list])
169 |       state_feed = np.array([c.state for c in partial_captions_list])
170 | 
171 |       softmax, new_states, metadata = self.model.inference_step(sess,
172 |                                                                 input_feed,
173 |                                                                 state_feed)
174 | 
175 |       for i, partial_caption in enumerate(partial_captions_list):
176 |         word_probabilities = softmax[i]
177 |         state = new_states[i]
178 |         # For this partial caption, get the beam_size most probable next words.
179 |         words_and_probs = list(enumerate(word_probabilities))
180 |         words_and_probs.sort(key=lambda x: -x[1])
181 |         words_and_probs = words_and_probs[0:self.beam_size]
182 |         # Each next word gives a new partial caption.
183 |         for w, p in words_and_probs:
184 |           if p < 1e-12:
185 |             continue  # Avoid log(0).
186 |           sentence = partial_caption.sentence + [w]
187 |           logprob = partial_caption.logprob + math.log(p)
188 |           score = logprob
189 |           if metadata:
190 |             metadata_list = partial_caption.metadata + [metadata[i]]
191 |           else:
192 |             metadata_list = None
193 |           if w == self.vocab.end_id:
194 |             if self.length_normalization_factor > 0:
195 |               score /= len(sentence)**self.length_normalization_factor
196 |             beam = Caption(sentence, state, logprob, score, metadata_list)
197 |             complete_captions.push(beam)
198 |           else:
199 |             beam = Caption(sentence, state, logprob, score, metadata_list)
200 |             partial_captions.push(beam)
201 |       if partial_captions.size() == 0:
202 |         # We have run out of partial candidates; happens when beam_size = 1.
203 |         break
204 | 
205 |     # If we have no complete captions then fall back to the partial captions.
206 |     # But never output a mixture of complete and partial captions because a
207 |     # partial caption could have a higher score than all the complete captions.
208 |     if not complete_captions.size():
209 |       complete_captions = partial_captions
210 | 
211 |     return complete_captions.extract(sort=True)
212 | 


--------------------------------------------------------------------------------
/course_8_image2txt/inference_utils/caption_generator_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Unit tests for CaptionGenerator."""
 16 | 
 17 | import math
 18 | 
 19 | 
 20 | 
 21 | import numpy as np
 22 | import tensorflow as tf
 23 | 
 24 | from im2txt.inference_utils import caption_generator
 25 | 
 26 | 
 27 | class FakeVocab(object):
 28 |   """Fake Vocabulary for testing purposes."""
 29 | 
 30 |   def __init__(self):
 31 |     self.start_id = 0  # Word id denoting sentence start.
 32 |     self.end_id = 1  # Word id denoting sentence end.
 33 | 
 34 | 
 35 | class FakeModel(object):
 36 |   """Fake model for testing purposes."""
 37 | 
 38 |   def __init__(self):
 39 |     # Number of words in the vocab.
 40 |     self._vocab_size = 12
 41 | 
 42 |     # Dimensionality of the nominal model state.
 43 |     self._state_size = 1
 44 | 
 45 |     # Map of previous word to the probability distribution of the next word.
 46 |     self._probabilities = {
 47 |         0: {1: 0.1,
 48 |             2: 0.2,
 49 |             3: 0.3,
 50 |             4: 0.4},
 51 |         2: {5: 0.1,
 52 |             6: 0.9},
 53 |         3: {1: 0.1,
 54 |             7: 0.4,
 55 |             8: 0.5},
 56 |         4: {1: 0.3,
 57 |             9: 0.3,
 58 |             10: 0.4},
 59 |         5: {1: 1.0},
 60 |         6: {1: 1.0},
 61 |         7: {1: 1.0},
 62 |         8: {1: 1.0},
 63 |         9: {1: 0.5,
 64 |             11: 0.5},
 65 |         10: {1: 1.0},
 66 |         11: {1: 1.0},
 67 |     }
 68 | 
 69 |   # pylint: disable=unused-argument
 70 | 
 71 |   def feed_image(self, sess, encoded_image):
 72 |     # Return a nominal model state.
 73 |     return np.zeros([1, self._state_size])
 74 | 
 75 |   def inference_step(self, sess, input_feed, state_feed):
 76 |     # Compute the matrix of softmax distributions for the next batch of words.
 77 |     batch_size = input_feed.shape[0]
 78 |     softmax_output = np.zeros([batch_size, self._vocab_size])
 79 |     for batch_index, word_id in enumerate(input_feed):
 80 |       for next_word, probability in self._probabilities[word_id].items():
 81 |         softmax_output[batch_index, next_word] = probability
 82 | 
 83 |     # Nominal state and metadata.
 84 |     new_state = np.zeros([batch_size, self._state_size])
 85 |     metadata = None
 86 | 
 87 |     return softmax_output, new_state, metadata
 88 | 
 89 |   # pylint: enable=unused-argument
 90 | 
 91 | 
 92 | class CaptionGeneratorTest(tf.test.TestCase):
 93 | 
 94 |   def _assertExpectedCaptions(self,
 95 |                               expected_captions,
 96 |                               beam_size=3,
 97 |                               max_caption_length=20,
 98 |                               length_normalization_factor=0):
 99 |     """Tests that beam search generates the expected captions.
100 | 
101 |     Args:
102 |       expected_captions: A sequence of pairs (sentence, probability), where
103 |         sentence is a list of integer ids and probability is a float in [0, 1].
104 |       beam_size: Parameter passed to beam_search().
105 |       max_caption_length: Parameter passed to beam_search().
106 |       length_normalization_factor: Parameter passed to beam_search().
107 |     """
108 |     expected_sentences = [c[0] for c in expected_captions]
109 |     expected_probabilities = [c[1] for c in expected_captions]
110 | 
111 |     # Generate captions.
112 |     generator = caption_generator.CaptionGenerator(
113 |         model=FakeModel(),
114 |         vocab=FakeVocab(),
115 |         beam_size=beam_size,
116 |         max_caption_length=max_caption_length,
117 |         length_normalization_factor=length_normalization_factor)
118 |     actual_captions = generator.beam_search(sess=None, encoded_image=None)
119 | 
120 |     actual_sentences = [c.sentence for c in actual_captions]
121 |     actual_probabilities = [math.exp(c.logprob) for c in actual_captions]
122 | 
123 |     self.assertEqual(expected_sentences, actual_sentences)
124 |     self.assertAllClose(expected_probabilities, actual_probabilities)
125 | 
126 |   def testBeamSize(self):
127 |     # Beam size = 1.
128 |     expected = [([0, 4, 10, 1], 0.16)]
129 |     self._assertExpectedCaptions(expected, beam_size=1)
130 | 
131 |     # Beam size = 2.
132 |     expected = [([0, 4, 10, 1], 0.16), ([0, 3, 8, 1], 0.15)]
133 |     self._assertExpectedCaptions(expected, beam_size=2)
134 | 
135 |     # Beam size = 3.
136 |     expected = [
137 |         ([0, 2, 6, 1], 0.18), ([0, 4, 10, 1], 0.16), ([0, 3, 8, 1], 0.15)
138 |     ]
139 |     self._assertExpectedCaptions(expected, beam_size=3)
140 | 
141 |   def testMaxLength(self):
142 |     # Max length = 1.
143 |     expected = [([0], 1.0)]
144 |     self._assertExpectedCaptions(expected, max_caption_length=1)
145 | 
146 |     # Max length = 2.
147 |     # There are no complete sentences, so partial sentences are returned.
148 |     expected = [([0, 4], 0.4), ([0, 3], 0.3), ([0, 2], 0.2)]
149 |     self._assertExpectedCaptions(expected, max_caption_length=2)
150 | 
151 |     # Max length = 3.
152 |     # There is at least one complete sentence, so only complete sentences are
153 |     # returned.
154 |     expected = [([0, 4, 1], 0.12), ([0, 3, 1], 0.03)]
155 |     self._assertExpectedCaptions(expected, max_caption_length=3)
156 | 
157 |     # Max length = 4.
158 |     expected = [
159 |         ([0, 2, 6, 1], 0.18), ([0, 4, 10, 1], 0.16), ([0, 3, 8, 1], 0.15)
160 |     ]
161 |     self._assertExpectedCaptions(expected, max_caption_length=4)
162 | 
163 |   def testLengthNormalization(self):
164 |     # Length normalization factor = 3.
165 |     # The longest caption is returned first, despite having low probability,
166 |     # because it has the highest log(probability)/length**3.
167 |     expected = [
168 |         ([0, 4, 9, 11, 1], 0.06),
169 |         ([0, 2, 6, 1], 0.18),
170 |         ([0, 4, 10, 1], 0.16),
171 |         ([0, 3, 8, 1], 0.15),
172 |     ]
173 |     self._assertExpectedCaptions(
174 |         expected, beam_size=4, length_normalization_factor=3)
175 | 
176 | 
177 | if __name__ == '__main__':
178 |   tf.test.main()
179 | 


--------------------------------------------------------------------------------
/course_8_image2txt/inference_utils/inference_wrapper_base.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Base wrapper class for performing inference with an image-to-text model.
 16 | 
 17 | Subclasses must implement the following methods:
 18 | 
 19 |   build_model():
 20 |     Builds the model for inference and returns the model object.
 21 | 
 22 |   feed_image():
 23 |     Takes an encoded image and returns the initial model state, where "state"
 24 |     is a numpy array whose specifics are defined by the subclass, e.g.
 25 |     concatenated LSTM state. It's assumed that feed_image() will be called
 26 |     precisely once at the start of inference for each image. Subclasses may
 27 |     compute and/or save per-image internal context in this method.
 28 | 
 29 |   inference_step():
 30 |     Takes a batch of inputs and states at a single time-step. Returns the
 31 |     softmax output corresponding to the inputs, and the new states of the batch.
 32 |     Optionally also returns metadata about the current inference step, e.g. a
 33 |     serialized numpy array containing activations from a particular model layer.
 34 | 
 35 | Client usage:
 36 |   1. Build the model inference graph via build_graph_from_config() or
 37 |      build_graph_from_proto().
 38 |   2. Call the resulting restore_fn to load the model checkpoint.
 39 |   3. For each image in a batch of images:
 40 |      a) Call feed_image() once to get the initial state.
 41 |      b) For each step of caption generation, call inference_step().
 42 | """
 43 | 
 44 | from __future__ import absolute_import
 45 | from __future__ import division
 46 | from __future__ import print_function
 47 | 
 48 | import os.path
 49 | 
 50 | 
 51 | import tensorflow as tf
 52 | 
 53 | # pylint: disable=unused-argument
 54 | 
 55 | 
 56 | class InferenceWrapperBase(object):
 57 |   """Base wrapper class for performing inference with an image-to-text model."""
 58 | 
 59 |   def __init__(self):
 60 |     pass
 61 | 
 62 |   def build_model(self, model_config):
 63 |     """Builds the model for inference.
 64 | 
 65 |     Args:
 66 |       model_config: Object containing configuration for building the model.
 67 | 
 68 |     Returns:
 69 |       model: The model object.
 70 |     """
 71 |     tf.logging.fatal("Please implement build_model in subclass")
 72 | 
 73 |   def _create_restore_fn(self, checkpoint_path, saver):
 74 |     """Creates a function that restores a model from checkpoint.
 75 | 
 76 |     Args:
 77 |       checkpoint_path: Checkpoint file or a directory containing a checkpoint
 78 |         file.
 79 |       saver: Saver for restoring variables from the checkpoint file.
 80 | 
 81 |     Returns:
 82 |       restore_fn: A function such that restore_fn(sess) loads model variables
 83 |         from the checkpoint file.
 84 | 
 85 |     Raises:
 86 |       ValueError: If checkpoint_path does not refer to a checkpoint file or a
 87 |         directory containing a checkpoint file.
 88 |     """
 89 |     if tf.gfile.IsDirectory(checkpoint_path):
 90 |       checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)
 91 |       if not checkpoint_path:
 92 |         raise ValueError("No checkpoint file found in: %s" % checkpoint_path)
 93 | 
 94 |     def _restore_fn(sess):
 95 |       tf.logging.info("Loading model from checkpoint: %s", checkpoint_path)
 96 |       saver.restore(sess, checkpoint_path)
 97 |       tf.logging.info("Successfully loaded checkpoint: %s",
 98 |                       os.path.basename(checkpoint_path))
 99 | 
100 |     return _restore_fn
101 | 
102 |   def build_graph_from_config(self, model_config, checkpoint_path):
103 |     """Builds the inference graph from a configuration object.
104 | 
105 |     Args:
106 |       model_config: Object containing configuration for building the model.
107 |       checkpoint_path: Checkpoint file or a directory containing a checkpoint
108 |         file.
109 | 
110 |     Returns:
111 |       restore_fn: A function such that restore_fn(sess) loads model variables
112 |         from the checkpoint file.
113 |     """
114 |     tf.logging.info("Building model.")
115 |     self.build_model(model_config)
116 |     saver = tf.train.Saver()
117 | 
118 |     return self._create_restore_fn(checkpoint_path, saver)
119 | 
120 |   def build_graph_from_proto(self, graph_def_file, saver_def_file,
121 |                              checkpoint_path):
122 |     """Builds the inference graph from serialized GraphDef and SaverDef protos.
123 | 
124 |     Args:
125 |       graph_def_file: File containing a serialized GraphDef proto.
126 |       saver_def_file: File containing a serialized SaverDef proto.
127 |       checkpoint_path: Checkpoint file or a directory containing a checkpoint
128 |         file.
129 | 
130 |     Returns:
131 |       restore_fn: A function such that restore_fn(sess) loads model variables
132 |         from the checkpoint file.
133 |     """
134 |     # Load the Graph.
135 |     tf.logging.info("Loading GraphDef from file: %s", graph_def_file)
136 |     graph_def = tf.GraphDef()
137 |     with tf.gfile.FastGFile(graph_def_file, "rb") as f:
138 |       graph_def.ParseFromString(f.read())
139 |     tf.import_graph_def(graph_def, name="")
140 | 
141 |     # Load the Saver.
142 |     tf.logging.info("Loading SaverDef from file: %s", saver_def_file)
143 |     saver_def = tf.train.SaverDef()
144 |     with tf.gfile.FastGFile(saver_def_file, "rb") as f:
145 |       saver_def.ParseFromString(f.read())
146 |     saver = tf.train.Saver(saver_def=saver_def)
147 | 
148 |     return self._create_restore_fn(checkpoint_path, saver)
149 | 
150 |   def feed_image(self, sess, encoded_image):
151 |     """Feeds an image and returns the initial model state.
152 | 
153 |     See comments at the top of file.
154 | 
155 |     Args:
156 |       sess: TensorFlow Session object.
157 |       encoded_image: An encoded image string.
158 | 
159 |     Returns:
160 |       state: A numpy array of shape [1, state_size].
161 |     """
162 |     tf.logging.fatal("Please implement feed_image in subclass")
163 | 
164 |   def inference_step(self, sess, input_feed, state_feed):
165 |     """Runs one step of inference.
166 | 
167 |     Args:
168 |       sess: TensorFlow Session object.
169 |       input_feed: A numpy array of shape [batch_size].
170 |       state_feed: A numpy array of shape [batch_size, state_size].
171 | 
172 |     Returns:
173 |       softmax_output: A numpy array of shape [batch_size, vocab_size].
174 |       new_state: A numpy array of shape [batch_size, state_size].
175 |       metadata: Optional. If not None, a string containing metadata about the
176 |         current inference step (e.g. serialized numpy array containing
177 |         activations from a particular model layer.).
178 |     """
179 |     tf.logging.fatal("Please implement inference_step in subclass")
180 | 
181 | # pylint: enable=unused-argument
182 | 


--------------------------------------------------------------------------------
/course_8_image2txt/inference_utils/vocabulary.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Vocabulary class for an image-to-text model."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | 
22 | import tensorflow as tf
23 | 
24 | 
25 | class Vocabulary(object):
26 |   """Vocabulary class for an image-to-text model."""
27 | 
28 |   def __init__(self,
29 |                vocab_file,
30 |                start_word="<S>",
31 |                end_word="</S>",
32 |                unk_word="<UNK>"):
33 |     """Initializes the vocabulary.
34 | 
35 |     Args:
36 |       vocab_file: File containing the vocabulary, where the words are the first
37 |         whitespace-separated token on each line (other tokens are ignored) and
38 |         the word ids are the corresponding line numbers.
39 |       start_word: Special word denoting sentence start.
40 |       end_word: Special word denoting sentence end.
41 |       unk_word: Special word denoting unknown words.
42 |     """
43 |     if not tf.gfile.Exists(vocab_file):
44 |       tf.logging.fatal("Vocab file %s not found.", vocab_file)
45 |     tf.logging.info("Initializing vocabulary from file: %s", vocab_file)
46 | 
47 |     with tf.gfile.GFile(vocab_file, mode="r") as f:
48 |       reverse_vocab = list(f.readlines())
49 |     reverse_vocab = [line.split()[0] for line in reverse_vocab]
50 |     assert start_word in reverse_vocab
51 |     assert end_word in reverse_vocab
52 |     if unk_word not in reverse_vocab:
53 |       reverse_vocab.append(unk_word)
54 |     vocab = dict([(x, y) for (y, x) in enumerate(reverse_vocab)])
55 | 
56 |     tf.logging.info("Created vocabulary with %d words" % len(vocab))
57 | 
58 |     self.vocab = vocab  # vocab[word] = id
59 |     self.reverse_vocab = reverse_vocab  # reverse_vocab[id] = word
60 | 
61 |     # Save special word ids.
62 |     self.start_id = vocab[start_word]
63 |     self.end_id = vocab[end_word]
64 |     self.unk_id = vocab[unk_word]
65 | 
66 |   def word_to_id(self, word):
67 |     """Returns the integer word id of a word string."""
68 |     if word in self.vocab:
69 |       return self.vocab[word]
70 |     else:
71 |       return self.unk_id
72 | 
73 |   def id_to_word(self, word_id):
74 |     """Returns the word string of an integer word id."""
75 |     if word_id >= len(self.reverse_vocab):
76 |       return self.reverse_vocab[self.unk_id]
77 |     else:
78 |       return self.reverse_vocab[word_id]
79 | 


--------------------------------------------------------------------------------
/course_8_image2txt/inference_wrapper.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Model wrapper class for performing inference with a ShowAndTellModel."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | 
22 | 
23 | 
24 | from im2txt import show_and_tell_model
25 | from im2txt.inference_utils import inference_wrapper_base
26 | 
27 | 
28 | class InferenceWrapper(inference_wrapper_base.InferenceWrapperBase):
29 |   """Model wrapper class for performing inference with a ShowAndTellModel."""
30 | 
31 |   def __init__(self):
32 |     super(InferenceWrapper, self).__init__()
33 | 
34 |   def build_model(self, model_config):
35 |     model = show_and_tell_model.ShowAndTellModel(model_config, mode="inference")
36 |     model.build()
37 |     return model
38 | 
39 |   def feed_image(self, sess, encoded_image):
40 |     initial_state = sess.run(fetches="lstm/initial_state:0",
41 |                              feed_dict={"image_feed:0": encoded_image})
42 |     return initial_state
43 | 
44 |   def inference_step(self, sess, input_feed, state_feed):
45 |     softmax_output, state_output = sess.run(
46 |         fetches=["softmax:0", "lstm/state:0"],
47 |         feed_dict={
48 |             "input_feed:0": input_feed,
49 |             "lstm/state_feed:0": state_feed,
50 |         })
51 |     return softmax_output, state_output, None
52 | 


--------------------------------------------------------------------------------
/course_8_image2txt/ops/BUILD:
--------------------------------------------------------------------------------
 1 | package(default_visibility = ["//im2txt:internal"])
 2 | 
 3 | licenses(["notice"])  # Apache 2.0
 4 | 
 5 | exports_files(["LICENSE"])
 6 | 
 7 | py_library(
 8 |     name = "image_processing",
 9 |     srcs = ["image_processing.py"],
10 |     srcs_version = "PY2AND3",
11 | )
12 | 
13 | py_library(
14 |     name = "image_embedding",
15 |     srcs = ["image_embedding.py"],
16 |     srcs_version = "PY2AND3",
17 | )
18 | 
19 | py_test(
20 |     name = "image_embedding_test",
21 |     size = "small",
22 |     srcs = ["image_embedding_test.py"],
23 |     deps = [
24 |         ":image_embedding",
25 |     ],
26 | )
27 | 
28 | py_library(
29 |     name = "inputs",
30 |     srcs = ["inputs.py"],
31 |     srcs_version = "PY2AND3",
32 | )
33 | 


--------------------------------------------------------------------------------
/course_8_image2txt/ops/image_embedding.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Image embedding ops."""
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | 
 23 | import tensorflow as tf
 24 | 
 25 | from tensorflow.contrib.slim.python.slim.nets.inception_v3 import inception_v3_base
 26 | 
 27 | slim = tf.contrib.slim
 28 | 
 29 | 
 30 | def inception_v3(images,
 31 |                  trainable=True,
 32 |                  is_training=True,
 33 |                  weight_decay=0.00004,
 34 |                  stddev=0.1,
 35 |                  dropout_keep_prob=0.8,
 36 |                  use_batch_norm=True,
 37 |                  batch_norm_params=None,
 38 |                  add_summaries=True,
 39 |                  scope="InceptionV3"):
 40 |   """Builds an Inception V3 subgraph for image embeddings.
 41 | 
 42 |   Args:
 43 |     images: A float32 Tensor of shape [batch, height, width, channels].
 44 |     trainable: Whether the inception submodel should be trainable or not.
 45 |     is_training: Boolean indicating training mode or not.
 46 |     weight_decay: Coefficient for weight regularization.
 47 |     stddev: The standard deviation of the trunctated normal weight initializer.
 48 |     dropout_keep_prob: Dropout keep probability.
 49 |     use_batch_norm: Whether to use batch normalization.
 50 |     batch_norm_params: Parameters for batch normalization. See
 51 |       tf.contrib.layers.batch_norm for details.
 52 |     add_summaries: Whether to add activation summaries.
 53 |     scope: Optional Variable scope.
 54 | 
 55 |   Returns:
 56 |     end_points: A dictionary of activations from inception_v3 layers.
 57 |   """
 58 |   # Only consider the inception model to be in training mode if it's trainable.
 59 |   is_inception_model_training = trainable and is_training
 60 | 
 61 |   if use_batch_norm:
 62 |     # Default parameters for batch normalization.
 63 |     if not batch_norm_params:
 64 |       batch_norm_params = {
 65 |           "is_training": is_inception_model_training,
 66 |           "trainable": trainable,
 67 |           # Decay for the moving averages.
 68 |           "decay": 0.9997,
 69 |           # Epsilon to prevent 0s in variance.
 70 |           "epsilon": 0.001,
 71 |           # Collection containing the moving mean and moving variance.
 72 |           "variables_collections": {
 73 |               "beta": None,
 74 |               "gamma": None,
 75 |               "moving_mean": ["moving_vars"],
 76 |               "moving_variance": ["moving_vars"],
 77 |           }
 78 |       }
 79 |   else:
 80 |     batch_norm_params = None
 81 | 
 82 |   if trainable:
 83 |     weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay)
 84 |   else:
 85 |     weights_regularizer = None
 86 | 
 87 |   with tf.variable_scope(scope, "InceptionV3", [images]) as scope:
 88 |     with slim.arg_scope(
 89 |         [slim.conv2d, slim.fully_connected],
 90 |         weights_regularizer=weights_regularizer,
 91 |         trainable=trainable):
 92 |       with slim.arg_scope(
 93 |           [slim.conv2d],
 94 |           weights_initializer=tf.truncated_normal_initializer(stddev=stddev),
 95 |           activation_fn=tf.nn.relu,
 96 |           normalizer_fn=slim.batch_norm,
 97 |           normalizer_params=batch_norm_params):
 98 |         net, end_points = inception_v3_base(images, scope=scope)
 99 |         with tf.variable_scope("logits"):
100 |           shape = net.get_shape()
101 |           net = slim.avg_pool2d(net, shape[1:3], padding="VALID", scope="pool")
102 |           net = slim.dropout(
103 |               net,
104 |               keep_prob=dropout_keep_prob,
105 |               is_training=is_inception_model_training,
106 |               scope="dropout")
107 |           net = slim.flatten(net, scope="flatten")
108 | 
109 |   # Add summaries.
110 |   if add_summaries:
111 |     for v in end_points.values():
112 |       tf.contrib.layers.summaries.summarize_activation(v)
113 | 
114 |   return net
115 | 


--------------------------------------------------------------------------------
/course_8_image2txt/ops/image_embedding_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Tests for tensorflow_models.im2txt.ops.image_embedding."""
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | 
 23 | import tensorflow as tf
 24 | 
 25 | from im2txt.ops import image_embedding
 26 | 
 27 | 
 28 | class InceptionV3Test(tf.test.TestCase):
 29 | 
 30 |   def setUp(self):
 31 |     super(InceptionV3Test, self).setUp()
 32 | 
 33 |     batch_size = 4
 34 |     height = 299
 35 |     width = 299
 36 |     num_channels = 3
 37 |     self._images = tf.placeholder(tf.float32,
 38 |                                   [batch_size, height, width, num_channels])
 39 |     self._batch_size = batch_size
 40 | 
 41 |   def _countInceptionParameters(self):
 42 |     """Counts the number of parameters in the inception model at top scope."""
 43 |     counter = {}
 44 |     for v in tf.global_variables():
 45 |       name_tokens = v.op.name.split("/")
 46 |       if name_tokens[0] == "InceptionV3":
 47 |         name = "InceptionV3/" + name_tokens[1]
 48 |         num_params = v.get_shape().num_elements()
 49 |         assert num_params
 50 |         counter[name] = counter.get(name, 0) + num_params
 51 |     return counter
 52 | 
 53 |   def _verifyParameterCounts(self):
 54 |     """Verifies the number of parameters in the inception model."""
 55 |     param_counts = self._countInceptionParameters()
 56 |     expected_param_counts = {
 57 |         "InceptionV3/Conv2d_1a_3x3": 960,
 58 |         "InceptionV3/Conv2d_2a_3x3": 9312,
 59 |         "InceptionV3/Conv2d_2b_3x3": 18624,
 60 |         "InceptionV3/Conv2d_3b_1x1": 5360,
 61 |         "InceptionV3/Conv2d_4a_3x3": 138816,
 62 |         "InceptionV3/Mixed_5b": 256368,
 63 |         "InceptionV3/Mixed_5c": 277968,
 64 |         "InceptionV3/Mixed_5d": 285648,
 65 |         "InceptionV3/Mixed_6a": 1153920,
 66 |         "InceptionV3/Mixed_6b": 1298944,
 67 |         "InceptionV3/Mixed_6c": 1692736,
 68 |         "InceptionV3/Mixed_6d": 1692736,
 69 |         "InceptionV3/Mixed_6e": 2143872,
 70 |         "InceptionV3/Mixed_7a": 1699584,
 71 |         "InceptionV3/Mixed_7b": 5047872,
 72 |         "InceptionV3/Mixed_7c": 6080064,
 73 |     }
 74 |     self.assertDictEqual(expected_param_counts, param_counts)
 75 | 
 76 |   def _assertCollectionSize(self, expected_size, collection):
 77 |     actual_size = len(tf.get_collection(collection))
 78 |     if expected_size != actual_size:
 79 |       self.fail("Found %d items in collection %s (expected %d)." %
 80 |                 (actual_size, collection, expected_size))
 81 | 
 82 |   def testTrainableTrueIsTrainingTrue(self):
 83 |     embeddings = image_embedding.inception_v3(
 84 |         self._images, trainable=True, is_training=True)
 85 |     self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list())
 86 | 
 87 |     self._verifyParameterCounts()
 88 |     self._assertCollectionSize(376, tf.GraphKeys.GLOBAL_VARIABLES)
 89 |     self._assertCollectionSize(188, tf.GraphKeys.TRAINABLE_VARIABLES)
 90 |     self._assertCollectionSize(188, tf.GraphKeys.UPDATE_OPS)
 91 |     self._assertCollectionSize(94, tf.GraphKeys.REGULARIZATION_LOSSES)
 92 |     self._assertCollectionSize(0, tf.GraphKeys.LOSSES)
 93 |     self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES)
 94 | 
 95 |   def testTrainableTrueIsTrainingFalse(self):
 96 |     embeddings = image_embedding.inception_v3(
 97 |         self._images, trainable=True, is_training=False)
 98 |     self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list())
 99 | 
100 |     self._verifyParameterCounts()
101 |     self._assertCollectionSize(376, tf.GraphKeys.GLOBAL_VARIABLES)
102 |     self._assertCollectionSize(188, tf.GraphKeys.TRAINABLE_VARIABLES)
103 |     self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS)
104 |     self._assertCollectionSize(94, tf.GraphKeys.REGULARIZATION_LOSSES)
105 |     self._assertCollectionSize(0, tf.GraphKeys.LOSSES)
106 |     self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES)
107 | 
108 |   def testTrainableFalseIsTrainingTrue(self):
109 |     embeddings = image_embedding.inception_v3(
110 |         self._images, trainable=False, is_training=True)
111 |     self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list())
112 | 
113 |     self._verifyParameterCounts()
114 |     self._assertCollectionSize(376, tf.GraphKeys.GLOBAL_VARIABLES)
115 |     self._assertCollectionSize(0, tf.GraphKeys.TRAINABLE_VARIABLES)
116 |     self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS)
117 |     self._assertCollectionSize(0, tf.GraphKeys.REGULARIZATION_LOSSES)
118 |     self._assertCollectionSize(0, tf.GraphKeys.LOSSES)
119 |     self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES)
120 | 
121 |   def testTrainableFalseIsTrainingFalse(self):
122 |     embeddings = image_embedding.inception_v3(
123 |         self._images, trainable=False, is_training=False)
124 |     self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list())
125 | 
126 |     self._verifyParameterCounts()
127 |     self._assertCollectionSize(376, tf.GraphKeys.GLOBAL_VARIABLES)
128 |     self._assertCollectionSize(0, tf.GraphKeys.TRAINABLE_VARIABLES)
129 |     self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS)
130 |     self._assertCollectionSize(0, tf.GraphKeys.REGULARIZATION_LOSSES)
131 |     self._assertCollectionSize(0, tf.GraphKeys.LOSSES)
132 |     self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES)
133 | 
134 | 
135 | if __name__ == "__main__":
136 |   tf.test.main()
137 | 


--------------------------------------------------------------------------------
/course_8_image2txt/ops/image_processing.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Helper functions for image preprocessing."""
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | 
 23 | import tensorflow as tf
 24 | 
 25 | 
 26 | def distort_image(image, thread_id):
 27 |   """Perform random distortions on an image.
 28 | 
 29 |   Args:
 30 |     image: A float32 Tensor of shape [height, width, 3] with values in [0, 1).
 31 |     thread_id: Preprocessing thread id used to select the ordering of color
 32 |       distortions. There should be a multiple of 2 preprocessing threads.
 33 | 
 34 |   Returns:
 35 |     distorted_image: A float32 Tensor of shape [height, width, 3] with values in
 36 |       [0, 1].
 37 |   """
 38 |   # Randomly flip horizontally.
 39 |   with tf.name_scope("flip_horizontal", values=[image]):
 40 |     image = tf.image.random_flip_left_right(image)
 41 | 
 42 |   # Randomly distort the colors based on thread id.
 43 |   color_ordering = thread_id % 2
 44 |   with tf.name_scope("distort_color", values=[image]):
 45 |     if color_ordering == 0:
 46 |       image = tf.image.random_brightness(image, max_delta=32. / 255.)
 47 |       image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
 48 |       image = tf.image.random_hue(image, max_delta=0.032)
 49 |       image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
 50 |     elif color_ordering == 1:
 51 |       image = tf.image.random_brightness(image, max_delta=32. / 255.)
 52 |       image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
 53 |       image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
 54 |       image = tf.image.random_hue(image, max_delta=0.032)
 55 | 
 56 |     # The random_* ops do not necessarily clamp.
 57 |     image = tf.clip_by_value(image, 0.0, 1.0)
 58 | 
 59 |   return image
 60 | 
 61 | 
 62 | def process_image(encoded_image,
 63 |                   is_training,
 64 |                   height,
 65 |                   width,
 66 |                   resize_height=346,
 67 |                   resize_width=346,
 68 |                   thread_id=0,
 69 |                   image_format="jpeg"):
 70 |   """Decode an image, resize and apply random distortions.
 71 | 
 72 |   In training, images are distorted slightly differently depending on thread_id.
 73 | 
 74 |   Args:
 75 |     encoded_image: String Tensor containing the image.
 76 |     is_training: Boolean; whether preprocessing for training or eval.
 77 |     height: Height of the output image.
 78 |     width: Width of the output image.
 79 |     resize_height: If > 0, resize height before crop to final dimensions.
 80 |     resize_width: If > 0, resize width before crop to final dimensions.
 81 |     thread_id: Preprocessing thread id used to select the ordering of color
 82 |       distortions. There should be a multiple of 2 preprocessing threads.
 83 |     image_format: "jpeg" or "png".
 84 | 
 85 |   Returns:
 86 |     A float32 Tensor of shape [height, width, 3] with values in [-1, 1].
 87 | 
 88 |   Raises:
 89 |     ValueError: If image_format is invalid.
 90 |   """
 91 |   # Helper function to log an image summary to the visualizer. Summaries are
 92 |   # only logged in thread 0.
 93 |   def image_summary(name, image):
 94 |     if not thread_id:
 95 |       tf.summary.image(name, tf.expand_dims(image, 0))
 96 | 
 97 |   # Decode image into a float32 Tensor of shape [?, ?, 3] with values in [0, 1).
 98 |   with tf.name_scope("decode", values=[encoded_image]):
 99 |     if image_format == "jpeg":
100 |       image = tf.image.decode_jpeg(encoded_image, channels=3)
101 |     elif image_format == "png":
102 |       image = tf.image.decode_png(encoded_image, channels=3)
103 |     else:
104 |       raise ValueError("Invalid image format: %s" % image_format)
105 |   image = tf.image.convert_image_dtype(image, dtype=tf.float32)
106 |   image_summary("original_image", image)
107 | 
108 |   # Resize image.
109 |   assert (resize_height > 0) == (resize_width > 0)
110 |   if resize_height:
111 |     image = tf.image.resize_images(image,
112 |                                    size=[resize_height, resize_width],
113 |                                    method=tf.image.ResizeMethod.BILINEAR)
114 | 
115 |   # Crop to final dimensions.
116 |   if is_training:
117 |     image = tf.random_crop(image, [height, width, 3])
118 |   else:
119 |     # Central crop, assuming resize_height > height, resize_width > width.
120 |     image = tf.image.resize_image_with_crop_or_pad(image, height, width)
121 | 
122 |   image_summary("resized_image", image)
123 | 
124 |   # Randomly distort the image.
125 |   if is_training:
126 |     image = distort_image(image, thread_id)
127 | 
128 |   image_summary("final_image", image)
129 | 
130 |   # Rescale to [-1,1] instead of [0, 1]
131 |   image = tf.subtract(image, 0.5)
132 |   image = tf.multiply(image, 2.0)
133 |   return image
134 | 


--------------------------------------------------------------------------------
/course_8_image2txt/ops/inputs.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Input ops."""
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | 
 23 | import tensorflow as tf
 24 | 
 25 | 
 26 | def parse_sequence_example(serialized, image_feature, caption_feature):
 27 |   """Parses a tensorflow.SequenceExample into an image and caption.
 28 | 
 29 |   Args:
 30 |     serialized: A scalar string Tensor; a single serialized SequenceExample.
 31 |     image_feature: Name of SequenceExample context feature containing image
 32 |       data.
 33 |     caption_feature: Name of SequenceExample feature list containing integer
 34 |       captions.
 35 | 
 36 |   Returns:
 37 |     encoded_image: A scalar string Tensor containing a JPEG encoded image.
 38 |     caption: A 1-D uint64 Tensor with dynamically specified length.
 39 |   """
 40 |   context, sequence = tf.parse_single_sequence_example(
 41 |       serialized,
 42 |       context_features={
 43 |           image_feature: tf.FixedLenFeature([], dtype=tf.string)
 44 |       },
 45 |       sequence_features={
 46 |           caption_feature: tf.FixedLenSequenceFeature([], dtype=tf.int64),
 47 |       })
 48 | 
 49 |   encoded_image = context[image_feature]
 50 |   caption = sequence[caption_feature]
 51 |   return encoded_image, caption
 52 | 
 53 | 
 54 | def prefetch_input_data(reader,
 55 |                         file_pattern,
 56 |                         is_training,
 57 |                         batch_size,
 58 |                         values_per_shard,
 59 |                         input_queue_capacity_factor=16,
 60 |                         num_reader_threads=1,
 61 |                         shard_queue_name="filename_queue",
 62 |                         value_queue_name="input_queue"):
 63 |   """Prefetches string values from disk into an input queue.
 64 | 
 65 |   In training the capacity of the queue is important because a larger queue
 66 |   means better mixing of training examples between shards. The minimum number of
 67 |   values kept in the queue is values_per_shard * input_queue_capacity_factor,
 68 |   where input_queue_memory factor should be chosen to trade-off better mixing
 69 |   with memory usage.
 70 | 
 71 |   Args:
 72 |     reader: Instance of tf.ReaderBase.
 73 |     file_pattern: Comma-separated list of file patterns (e.g.
 74 |         /tmp/train_data-?????-of-00100).
 75 |     is_training: Boolean; whether prefetching for training or eval.
 76 |     batch_size: Model batch size used to determine queue capacity.
 77 |     values_per_shard: Approximate number of values per shard.
 78 |     input_queue_capacity_factor: Minimum number of values to keep in the queue
 79 |       in multiples of values_per_shard. See comments above.
 80 |     num_reader_threads: Number of reader threads to fill the queue.
 81 |     shard_queue_name: Name for the shards filename queue.
 82 |     value_queue_name: Name for the values input queue.
 83 | 
 84 |   Returns:
 85 |     A Queue containing prefetched string values.
 86 |   """
 87 |   data_files = []
 88 |   for pattern in file_pattern.split(","):
 89 |     data_files.extend(tf.gfile.Glob(pattern))
 90 |   if not data_files:
 91 |     tf.logging.fatal("Found no input files matching %s", file_pattern)
 92 |   else:
 93 |     tf.logging.info("Prefetching values from %d files matching %s",
 94 |                     len(data_files), file_pattern)
 95 | 
 96 |   if is_training:
 97 |     filename_queue = tf.train.string_input_producer(
 98 |         data_files, shuffle=True, capacity=16, name=shard_queue_name)
 99 |     min_queue_examples = values_per_shard * input_queue_capacity_factor
100 |     capacity = min_queue_examples + 100 * batch_size
101 |     values_queue = tf.RandomShuffleQueue(
102 |         capacity=capacity,
103 |         min_after_dequeue=min_queue_examples,
104 |         dtypes=[tf.string],
105 |         name="random_" + value_queue_name)
106 |   else:
107 |     filename_queue = tf.train.string_input_producer(
108 |         data_files, shuffle=False, capacity=1, name=shard_queue_name)
109 |     capacity = values_per_shard + 3 * batch_size
110 |     values_queue = tf.FIFOQueue(
111 |         capacity=capacity, dtypes=[tf.string], name="fifo_" + value_queue_name)
112 | 
113 |   enqueue_ops = []
114 |   for _ in range(num_reader_threads):
115 |     _, value = reader.read(filename_queue)
116 |     enqueue_ops.append(values_queue.enqueue([value]))
117 |   tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner(
118 |       values_queue, enqueue_ops))
119 |   tf.summary.scalar(
120 |       "queue/%s/fraction_of_%d_full" % (values_queue.name, capacity),
121 |       tf.cast(values_queue.size(), tf.float32) * (1. / capacity))
122 | 
123 |   return values_queue
124 | 
125 | 
126 | def batch_with_dynamic_pad(images_and_captions,
127 |                            batch_size,
128 |                            queue_capacity,
129 |                            add_summaries=True):
130 |   """Batches input images and captions.
131 | 
132 |   This function splits the caption into an input sequence and a target sequence,
133 |   where the target sequence is the input sequence right-shifted by 1. Input and
134 |   target sequences are batched and padded up to the maximum length of sequences
135 |   in the batch. A mask is created to distinguish real words from padding words.
136 | 
137 |   Example:
138 |     Actual captions in the batch ('-' denotes padded character):
139 |       [
140 |         [ 1 2 5 4 5 ],
141 |         [ 1 2 3 4 - ],
142 |         [ 1 2 3 - - ],
143 |       ]
144 | 
145 |     input_seqs:
146 |       [
147 |         [ 1 2 3 4 ],
148 |         [ 1 2 3 - ],
149 |         [ 1 2 - - ],
150 |       ]
151 | 
152 |     target_seqs:
153 |       [
154 |         [ 2 3 4 5 ],
155 |         [ 2 3 4 - ],
156 |         [ 2 3 - - ],
157 |       ]
158 | 
159 |     mask:
160 |       [
161 |         [ 1 1 1 1 ],
162 |         [ 1 1 1 0 ],
163 |         [ 1 1 0 0 ],
164 |       ]
165 | 
166 |   Args:
167 |     images_and_captions: A list of pairs [image, caption], where image is a
168 |       Tensor of shape [height, width, channels] and caption is a 1-D Tensor of
169 |       any length. Each pair will be processed and added to the queue in a
170 |       separate thread.
171 |     batch_size: Batch size.
172 |     queue_capacity: Queue capacity.
173 |     add_summaries: If true, add caption length summaries.
174 | 
175 |   Returns:
176 |     images: A Tensor of shape [batch_size, height, width, channels].
177 |     input_seqs: An int32 Tensor of shape [batch_size, padded_length].
178 |     target_seqs: An int32 Tensor of shape [batch_size, padded_length].
179 |     mask: An int32 0/1 Tensor of shape [batch_size, padded_length].
180 |   """
181 |   enqueue_list = []
182 |   for image, caption in images_and_captions:
183 |     caption_length = tf.shape(caption)[0]
184 |     input_length = tf.expand_dims(tf.subtract(caption_length, 1), 0)
185 | 
186 |     input_seq = tf.slice(caption, [0], input_length)
187 |     target_seq = tf.slice(caption, [1], input_length)
188 |     indicator = tf.ones(input_length, dtype=tf.int32)
189 |     enqueue_list.append([image, input_seq, target_seq, indicator])
190 | 
191 |   images, input_seqs, target_seqs, mask = tf.train.batch_join(
192 |       enqueue_list,
193 |       batch_size=batch_size,
194 |       capacity=queue_capacity,
195 |       dynamic_pad=True,
196 |       name="batch_and_pad")
197 | 
198 |   if add_summaries:
199 |     lengths = tf.add(tf.reduce_sum(mask, 1), 1)
200 |     tf.summary.scalar("caption_length/batch_min", tf.reduce_min(lengths))
201 |     tf.summary.scalar("caption_length/batch_max", tf.reduce_max(lengths))
202 |     tf.summary.scalar("caption_length/batch_mean", tf.reduce_mean(lengths))
203 | 
204 |   return images, input_seqs, target_seqs, mask
205 | 


--------------------------------------------------------------------------------
/course_8_image2txt/readme.md:
--------------------------------------------------------------------------------
  1 | # Show and Tell: A Neural Image Caption Generator
  2 | 
  3 | A TensorFlow implementation of the image-to-text model described in the paper:
  4 | 
  5 | "Show and Tell: Lessons learned from the 2015 MSCOCO Image Captioning
  6 | Challenge."
  7 | 
  8 | Oriol Vinyals, Alexander Toshev, Samy Bengio, Dumitru Erhan.
  9 | 
 10 | *IEEE transactions on pattern analysis and machine intelligence (2016).*
 11 | 
 12 | Full text available at: http://arxiv.org/abs/1609.06647
 13 | 
 14 | ## Contact
 15 | ***Author:*** Chris Shallue
 16 | 
 17 | ***Pull requests and issues:*** @cshallue
 18 | 
 19 | ## Contents
 20 | * [Model Overview](#model-overview)
 21 |     * [Introduction](#introduction)
 22 |     * [Architecture](#architecture)
 23 | * [Getting Started](#getting-started)
 24 |     * [A Note on Hardware and Training Time](#a-note-on-hardware-and-training-time)
 25 |     * [Install Required Packages](#install-required-packages)
 26 |     * [Prepare the Training Data](#prepare-the-training-data)
 27 |     * [Download the Inception v3 Checkpoint](#download-the-inception-v3-checkpoint)
 28 | * [Training a Model](#training-a-model)
 29 |     * [Initial Training](#initial-training)
 30 |     * [Fine Tune the Inception v3 Model](#fine-tune-the-inception-v3-model)
 31 | * [Generating Captions](#generating-captions)
 32 | 
 33 | ## Model Overview
 34 | 
 35 | ### Introduction
 36 | 
 37 | The *Show and Tell* model is a deep neural network that learns how to describe
 38 | the content of images. For example:
 39 | 
 40 | ![Example captions](../g3doc/example_captions.jpg)
 41 | 
 42 | ### Architecture
 43 | 
 44 | The *Show and Tell* model is an example of an *encoder-decoder* neural network.
 45 | It works by first "encoding" an image into a fixed-length vector representation,
 46 | and then "decoding" the representation into a natural language description.
 47 | 
 48 | The image encoder is a deep convolutional neural network. This type of
 49 | network is widely used for image tasks and is currently state-of-the-art for
 50 | object recognition and detection. Our particular choice of network is the
 51 | [*Inception v3*](http://arxiv.org/abs/1512.00567) image recognition model
 52 | pretrained on the
 53 | [ILSVRC-2012-CLS](http://www.image-net.org/challenges/LSVRC/2012/) image
 54 | classification dataset.
 55 | 
 56 | The decoder is a long short-term memory (LSTM) network. This type of network is
 57 | commonly used for sequence modeling tasks such as language modeling and machine
 58 | translation. In the *Show and Tell* model, the LSTM network is trained as a
 59 | language model conditioned on the image encoding.
 60 | 
 61 | Words in the captions are represented with an embedding model. Each word in the
 62 | vocabulary is associated with a fixed-length vector representation that is
 63 | learned during training.
 64 | 
 65 | The following diagram illustrates the model architecture.
 66 | 
 67 | ![Show and Tell Architecture](../g3doc/show_and_tell_architecture.png)
 68 | 
 69 | In this diagram, \{*s*<sub>0</sub>, *s*<sub>1</sub>, ..., *s*<sub>*N*-1</sub>\}
 70 | are the words of the caption and \{*w*<sub>*e*</sub>*s*<sub>0</sub>,
 71 | *w*<sub>*e*</sub>*s*<sub>1</sub>, ..., *w*<sub>*e*</sub>*s*<sub>*N*-1</sub>\}
 72 | are their corresponding word embedding vectors. The outputs \{*p*<sub>1</sub>,
 73 | *p*<sub>2</sub>, ..., *p*<sub>*N*</sub>\} of the LSTM are probability
 74 | distributions generated by the model for the next word in the sentence. The
 75 | terms \{log *p*<sub>1</sub>(*s*<sub>1</sub>),
 76 | log *p*<sub>2</sub>(*s*<sub>2</sub>), ...,
 77 | log *p*<sub>*N*</sub>(*s*<sub>*N*</sub>)\} are the log-likelihoods of the
 78 | correct word at each step; the negated sum of these terms is the minimization
 79 | objective of the model.
 80 | 
 81 | During the first phase of training the parameters of the *Inception v3* model
 82 | are kept fixed: it is simply a static image encoder function. A single trainable
 83 | layer is added on top of the *Inception v3* model to transform the image
 84 | embedding into the word embedding vector space. The model is trained with
 85 | respect to the parameters of the word embeddings, the parameters of the layer on
 86 | top of *Inception v3* and the parameters of the LSTM. In the second phase of
 87 | training, all parameters - including the parameters of *Inception v3* - are
 88 | trained to jointly fine-tune the image encoder and the LSTM.
 89 | 
 90 | Given a trained model and an image we use *beam search* to generate captions for
 91 | that image. Captions are generated word-by-word, where at each step *t* we use
 92 | the set of sentences already generated with length *t* - 1 to generate a new set
 93 | of sentences with length *t*. We keep only the top *k* candidates at each step,
 94 | where the hyperparameter *k* is called the *beam size*. We have found the best
 95 | performance with *k* = 3.
 96 | 
 97 | ## Getting Started
 98 | 
 99 | ### A Note on Hardware and Training Time
100 | 
101 | The time required to train the *Show and Tell* model depends on your specific
102 | hardware and computational capacity. In this guide we assume you will be running
103 | training on a single machine with a GPU. In our experience on an NVIDIA Tesla
104 | K20m GPU the initial training phase takes 1-2 weeks. The second training phase
105 | may take several additional weeks to achieve peak performance (but you can stop
106 | this phase early and still get reasonable results).
107 | 
108 | It is possible to achieve a speed-up by implementing distributed training across
109 | a cluster of machines with GPUs, but that is not covered in this guide.
110 | 
111 | Whilst it is possible to run this code on a CPU, beware that this may be
112 | approximately 10 times slower.
113 | 
114 | ### Install Required Packages
115 | First ensure that you have installed the following required packages:
116 | 
117 | * **Bazel** ([instructions](http://bazel.io/docs/install.html))
118 | * **TensorFlow** 1.0 or greater ([instructions](https://www.tensorflow.org/install/))
119 | * **NumPy** ([instructions](http://www.scipy.org/install.html))
120 | * **Natural Language Toolkit (NLTK)**:
121 |     * First install NLTK ([instructions](http://www.nltk.org/install.html))
122 |     * Then install the NLTK data ([instructions](http://www.nltk.org/data.html))
123 | 
124 | ### Prepare the Training Data
125 | 
126 | To train the model you will need to provide training data in native TFRecord
127 | format. The TFRecord format consists of a set of sharded files containing
128 | serialized `tf.SequenceExample` protocol buffers. Each `tf.SequenceExample`
129 | proto contains an image (JPEG format), a caption and metadata such as the image
130 | id.
131 | 
132 | Each caption is a list of words. During preprocessing, a dictionary is created
133 | that assigns each word in the vocabulary to an integer-valued id. Each caption
134 | is encoded as a list of integer word ids in the `tf.SequenceExample` protos.
135 | 
136 | We have provided a script to download and preprocess the [MSCOCO](http://mscoco.org/) image captioning data set into this format. Downloading
137 | and preprocessing the data may take several hours depending on your network and
138 | computer speed. Please be patient.
139 | 
140 | Before running the script, ensure that your hard disk has at least 150GB of
141 | available space for storing the downloaded and processed data.
142 | 
143 | ```shell
144 | # Location to save the MSCOCO data.
145 | MSCOCO_DIR="${HOME}/im2txt/data/mscoco"
146 | 
147 | # Build the preprocessing script.
148 | bazel build im2txt/download_and_preprocess_mscoco
149 | 
150 | # Run the preprocessing script.
151 | bazel-bin/im2txt/download_and_preprocess_mscoco "${MSCOCO_DIR}"
152 | ```
153 | 
154 | The final line of the output should read:
155 | 
156 | ```
157 | 2016-09-01 16:47:47.296630: Finished processing all 20267 image-caption pairs in data set 'test'.
158 | ```
159 | 
160 | When the script finishes you will find 256 training, 4 validation and 8 testing
161 | files in `DATA_DIR`. The files will match the patterns `train-?????-of-00256`,
162 | `val-?????-of-00004` and `test-?????-of-00008`, respectively.
163 | 
164 | ### Download the Inception v3 Checkpoint
165 | 
166 | The *Show and Tell* model requires a pretrained *Inception v3* checkpoint file
167 | to initialize the parameters of its image encoder submodel.
168 | 
169 | This checkpoint file is provided by the
170 | [TensorFlow-Slim image classification library](https://github.com/tensorflow/models/tree/master/slim#tensorflow-slim-image-classification-library)
171 | which provides a suite of pre-trained image classification models. You can read
172 | more about the models provided by the library
173 | [here](https://github.com/tensorflow/models/tree/master/slim#pre-trained-models).
174 | 
175 | 
176 | Run the following commands to download the *Inception v3* checkpoint.
177 | 
178 | ```shell
179 | # Location to save the Inception v3 checkpoint.
180 | INCEPTION_DIR="${HOME}/im2txt/data"
181 | mkdir -p ${INCEPTION_DIR}
182 | 
183 | wget "http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz"
184 | tar -xvf "inception_v3_2016_08_28.tar.gz" -C ${INCEPTION_DIR}
185 | rm "inception_v3_2016_08_28.tar.gz"
186 | ```
187 | 
188 | Note that the *Inception v3* checkpoint will only be used for initializing the
189 | parameters of the *Show and Tell* model. Once the *Show and Tell* model starts
190 | training it will save its own checkpoint files containing the values of all its
191 | parameters (including copies of the *Inception v3* parameters). If training is
192 | stopped and restarted, the parameter values will be restored from the latest
193 | *Show and Tell* checkpoint and the *Inception v3* checkpoint will be ignored. In
194 | other words, the *Inception v3* checkpoint is only used in the 0-th global step
195 | (initialization) of training the *Show and Tell* model.
196 | 
197 | ## Training a Model
198 | 
199 | ### Initial Training
200 | 
201 | Run the training script.
202 | 
203 | ```shell
204 | # Directory containing preprocessed MSCOCO data.
205 | MSCOCO_DIR="${HOME}/im2txt/data/mscoco"
206 | 
207 | # Inception v3 checkpoint file.
208 | INCEPTION_CHECKPOINT="${HOME}/im2txt/data/inception_v3.ckpt"
209 | 
210 | # Directory to save the model.
211 | MODEL_DIR="${HOME}/im2txt/model"
212 | 
213 | # Build the model.
214 | bazel build -c opt im2txt/...
215 | 
216 | # Run the training script.
217 | bazel-bin/im2txt/train \
218 |   --input_file_pattern="${MSCOCO_DIR}/train-?????-of-00256" \
219 |   --inception_checkpoint_file="${INCEPTION_CHECKPOINT}" \
220 |   --train_dir="${MODEL_DIR}/train" \
221 |   --train_inception=false \
222 |   --number_of_steps=1000000
223 | ```
224 | 
225 | Run the evaluation script in a separate process. This will log evaluation
226 | metrics to TensorBoard which allows training progress to be monitored in
227 | real-time.
228 | 
229 | Note that you may run out of memory if you run the evaluation script on the same
230 | GPU as the training script. You can run the command
231 | `export CUDA_VISIBLE_DEVICES=""` to force the evaluation script to run on CPU.
232 | If evaluation runs too slowly on CPU, you can decrease the value of
233 | `--num_eval_examples`.
234 | 
235 | ```shell
236 | MSCOCO_DIR="${HOME}/im2txt/data/mscoco"
237 | MODEL_DIR="${HOME}/im2txt/model"
238 | 
239 | # Ignore GPU devices (only necessary if your GPU is currently memory
240 | # constrained, for example, by running the training script).
241 | export CUDA_VISIBLE_DEVICES=""
242 | 
243 | # Run the evaluation script. This will run in a loop, periodically loading the
244 | # latest model checkpoint file and computing evaluation metrics.
245 | bazel-bin/im2txt/evaluate \
246 |   --input_file_pattern="${MSCOCO_DIR}/val-?????-of-00004" \
247 |   --checkpoint_dir="${MODEL_DIR}/train" \
248 |   --eval_dir="${MODEL_DIR}/eval"
249 | ```
250 | 
251 | Run a TensorBoard server in a separate process for real-time monitoring of
252 | training progress and evaluation metrics.
253 | 
254 | ```shell
255 | MODEL_DIR="${HOME}/im2txt/model"
256 | 
257 | # Run a TensorBoard server.
258 | tensorboard --logdir="${MODEL_DIR}"
259 | ```
260 | 
261 | ### Fine Tune the Inception v3 Model
262 | 
263 | Your model will already be able to generate reasonable captions after the first
264 | phase of training. Try it out! (See [Generating Captions](#generating-captions)).
265 | 
266 | You can further improve the performance of the model by running a
267 | second training phase to jointly fine-tune the parameters of the *Inception v3*
268 | image submodel and the LSTM.
269 | 
270 | ```shell
271 | # Restart the training script with --train_inception=true.
272 | bazel-bin/im2txt/train \
273 |   --input_file_pattern="${MSCOCO_DIR}/train-?????-of-00256" \
274 |   --train_dir="${MODEL_DIR}/train" \
275 |   --train_inception=true \
276 |   --number_of_steps=3000000  # Additional 2M steps (assuming 1M in initial training).
277 | ```
278 | 
279 | Note that training will proceed much slower now, and the model will continue to
280 | improve by a small amount for a long time. We have found that it will improve
281 | slowly for an additional 2-2.5 million steps before it begins to overfit. This
282 | may take several weeks on a single GPU. If you don't care about absolutely
283 | optimal performance then feel free to halt training sooner by stopping the
284 | training script or passing a smaller value to the flag `--number_of_steps`. Your
285 | model will still work reasonably well.
286 | 
287 | ## Generating Captions
288 | 
289 | Your trained *Show and Tell* model can generate captions for any JPEG image! The
290 | following command line will generate captions for an image from the test set.
291 | 
292 | ```shell
293 | # Path to checkpoint file or a directory containing checkpoint files. Passing
294 | # a directory will only work if there is also a file named 'checkpoint' which
295 | # lists the available checkpoints in the directory. It will not work if you
296 | # point to a directory with just a copy of a model checkpoint: in that case,
297 | # you will need to pass the checkpoint path explicitly.
298 | CHECKPOINT_PATH="${HOME}/im2txt/model/train"
299 | 
300 | # Vocabulary file generated by the preprocessing script.
301 | VOCAB_FILE="${HOME}/im2txt/data/mscoco/word_counts.txt"
302 | 
303 | # JPEG image file to caption.
304 | IMAGE_FILE="${HOME}/im2txt/data/mscoco/raw-data/val2014/COCO_val2014_000000224477.jpg"
305 | 
306 | # Build the inference binary.
307 | bazel build -c opt im2txt/run_inference
308 | 
309 | # Ignore GPU devices (only necessary if your GPU is currently memory
310 | # constrained, for example, by running the training script).
311 | export CUDA_VISIBLE_DEVICES=""
312 | 
313 | # Run inference to generate captions.
314 | bazel-bin/im2txt/run_inference \
315 |   --checkpoint_path=${CHECKPOINT_PATH} \
316 |   --vocab_file=${VOCAB_FILE} \
317 |   --input_files=${IMAGE_FILE}
318 | ```
319 | 
320 | Example output:
321 | 
322 | ```shell
323 | Captions for image COCO_val2014_000000224477.jpg:
324 |   0) a man riding a wave on top of a surfboard . (p=0.040413)
325 |   1) a person riding a surf board on a wave (p=0.017452)
326 |   2) a man riding a wave on a surfboard in the ocean . (p=0.005743)
327 | ```
328 | 
329 | Note: you may get different results. Some variation between different models is
330 | expected.
331 | 
332 | Here is the image:
333 | 
334 | ![Surfer](../g3doc/COCO_val2014_000000224477.jpg)
335 | 


--------------------------------------------------------------------------------
/course_8_image2txt/run_inference.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | r"""Generate captions for images using default beam search parameters."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | import math
22 | import os
23 | 
24 | 
25 | import tensorflow as tf
26 | 
27 | from im2txt import configuration
28 | from im2txt import inference_wrapper
29 | from im2txt.inference_utils import caption_generator
30 | from im2txt.inference_utils import vocabulary
31 | 
32 | FLAGS = tf.flags.FLAGS
33 | 
34 | tf.flags.DEFINE_string("checkpoint_path", "",
35 |                        "Model checkpoint file or directory containing a "
36 |                        "model checkpoint file.")
37 | tf.flags.DEFINE_string("vocab_file", "", "Text file containing the vocabulary.")
38 | tf.flags.DEFINE_string("input_files", "",
39 |                        "File pattern or comma-separated list of file patterns "
40 |                        "of image files.")
41 | 
42 | tf.logging.set_verbosity(tf.logging.INFO)
43 | 
44 | 
45 | def main(_):
46 |   # Build the inference graph.
47 |   g = tf.Graph()
48 |   with g.as_default():
49 |     model = inference_wrapper.InferenceWrapper()
50 |     restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
51 |                                                FLAGS.checkpoint_path)
52 |   g.finalize()
53 | 
54 |   # Create the vocabulary.
55 |   vocab = vocabulary.Vocabulary(FLAGS.vocab_file)
56 | 
57 |   filenames = []
58 |   for file_pattern in FLAGS.input_files.split(","):
59 |     filenames.extend(tf.gfile.Glob(file_pattern))
60 |   tf.logging.info("Running caption generation on %d files matching %s",
61 |                   len(filenames), FLAGS.input_files)
62 | 
63 |   with tf.Session(graph=g) as sess:
64 |     # Load the model from checkpoint.
65 |     restore_fn(sess)
66 | 
67 |     # Prepare the caption generator. Here we are implicitly using the default
68 |     # beam search parameters. See caption_generator.py for a description of the
69 |     # available beam search parameters.
70 |     generator = caption_generator.CaptionGenerator(model, vocab)
71 | 
72 |     for filename in filenames:
73 |       with tf.gfile.GFile(filename, "r") as f:
74 |         image = f.read()
75 |       captions = generator.beam_search(sess, image)
76 |       print("Captions for image %s:" % os.path.basename(filename))
77 |       for i, caption in enumerate(captions):
78 |         # Ignore begin and end words.
79 |         sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]
80 |         sentence = " ".join(sentence)
81 |         print("  %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob)))
82 | 
83 | 
84 | if __name__ == "__main__":
85 |   tf.app.run()
86 | 


--------------------------------------------------------------------------------
/course_8_image2txt/show_and_tell_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Image-to-text implementation based on http://arxiv.org/abs/1411.4555.
 17 | 
 18 | "Show and Tell: A Neural Image Caption Generator"
 19 | Oriol Vinyals, Alexander Toshev, Samy Bengio, Dumitru Erhan
 20 | """
 21 | 
 22 | from __future__ import absolute_import
 23 | from __future__ import division
 24 | from __future__ import print_function
 25 | 
 26 | 
 27 | import tensorflow as tf
 28 | 
 29 | from im2txt.ops import image_embedding
 30 | from im2txt.ops import image_processing
 31 | from im2txt.ops import inputs as input_ops
 32 | 
 33 | 
 34 | class ShowAndTellModel(object):
 35 |   """Image-to-text implementation based on http://arxiv.org/abs/1411.4555.
 36 | 
 37 |   "Show and Tell: A Neural Image Caption Generator"
 38 |   Oriol Vinyals, Alexander Toshev, Samy Bengio, Dumitru Erhan
 39 |   """
 40 | 
 41 |   def __init__(self, config, mode, train_inception=False):
 42 |     """Basic setup.
 43 | 
 44 |     Args:
 45 |       config: Object containing configuration parameters.
 46 |       mode: "train", "eval" or "inference".
 47 |       train_inception: Whether the inception submodel variables are trainable.
 48 |     """
 49 |     assert mode in ["train", "eval", "inference"]
 50 |     self.config = config
 51 |     self.mode = mode
 52 |     self.train_inception = train_inception
 53 | 
 54 |     # Reader for the input data.
 55 |     self.reader = tf.TFRecordReader()
 56 | 
 57 |     # To match the "Show and Tell" paper we initialize all variables with a
 58 |     # random uniform initializer.
 59 |     self.initializer = tf.random_uniform_initializer(
 60 |         minval=-self.config.initializer_scale,
 61 |         maxval=self.config.initializer_scale)
 62 | 
 63 |     # A float32 Tensor with shape [batch_size, height, width, channels].
 64 |     self.images = None
 65 | 
 66 |     # An int32 Tensor with shape [batch_size, padded_length].
 67 |     self.input_seqs = None
 68 | 
 69 |     # An int32 Tensor with shape [batch_size, padded_length].
 70 |     self.target_seqs = None
 71 | 
 72 |     # An int32 0/1 Tensor with shape [batch_size, padded_length].
 73 |     self.input_mask = None
 74 | 
 75 |     # A float32 Tensor with shape [batch_size, embedding_size].
 76 |     self.image_embeddings = None
 77 | 
 78 |     # A float32 Tensor with shape [batch_size, padded_length, embedding_size].
 79 |     self.seq_embeddings = None
 80 | 
 81 |     # A float32 scalar Tensor; the total loss for the trainer to optimize.
 82 |     self.total_loss = None
 83 | 
 84 |     # A float32 Tensor with shape [batch_size * padded_length].
 85 |     self.target_cross_entropy_losses = None
 86 | 
 87 |     # A float32 Tensor with shape [batch_size * padded_length].
 88 |     self.target_cross_entropy_loss_weights = None
 89 | 
 90 |     # Collection of variables from the inception submodel.
 91 |     self.inception_variables = []
 92 | 
 93 |     # Function to restore the inception submodel from checkpoint.
 94 |     self.init_fn = None
 95 | 
 96 |     # Global step Tensor.
 97 |     self.global_step = None
 98 | 
 99 |   def is_training(self):
100 |     """Returns true if the model is built for training mode."""
101 |     return self.mode == "train"
102 | 
103 |   def process_image(self, encoded_image, thread_id=0):
104 |     """Decodes and processes an image string.
105 | 
106 |     Args:
107 |       encoded_image: A scalar string Tensor; the encoded image.
108 |       thread_id: Preprocessing thread id used to select the ordering of color
109 |         distortions.
110 | 
111 |     Returns:
112 |       A float32 Tensor of shape [height, width, 3]; the processed image.
113 |     """
114 |     return image_processing.process_image(encoded_image,
115 |                                           is_training=self.is_training(),
116 |                                           height=self.config.image_height,
117 |                                           width=self.config.image_width,
118 |                                           thread_id=thread_id,
119 |                                           image_format=self.config.image_format)
120 | 
121 |   def build_inputs(self):
122 |     """Input prefetching, preprocessing and batching.
123 | 
124 |     Outputs:
125 |       self.images
126 |       self.input_seqs
127 |       self.target_seqs (training and eval only)
128 |       self.input_mask (training and eval only)
129 |     """
130 |     if self.mode == "inference":
131 |       # In inference mode, images and inputs are fed via placeholders.
132 |       image_feed = tf.placeholder(dtype=tf.string, shape=[], name="image_feed")
133 |       input_feed = tf.placeholder(dtype=tf.int64,
134 |                                   shape=[None],  # batch_size
135 |                                   name="input_feed")
136 | 
137 |       # Process image and insert batch dimensions.
138 |       images = tf.expand_dims(self.process_image(image_feed), 0)
139 |       input_seqs = tf.expand_dims(input_feed, 1)
140 | 
141 |       # No target sequences or input mask in inference mode.
142 |       target_seqs = None
143 |       input_mask = None
144 |     else:
145 |       # Prefetch serialized SequenceExample protos.
146 |       input_queue = input_ops.prefetch_input_data(
147 |           self.reader,
148 |           self.config.input_file_pattern,
149 |           is_training=self.is_training(),
150 |           batch_size=self.config.batch_size,
151 |           values_per_shard=self.config.values_per_input_shard,
152 |           input_queue_capacity_factor=self.config.input_queue_capacity_factor,
153 |           num_reader_threads=self.config.num_input_reader_threads)
154 | 
155 |       # Image processing and random distortion. Split across multiple threads
156 |       # with each thread applying a slightly different distortion.
157 |       assert self.config.num_preprocess_threads % 2 == 0
158 |       images_and_captions = []
159 |       for thread_id in range(self.config.num_preprocess_threads):
160 |         serialized_sequence_example = input_queue.dequeue()
161 |         encoded_image, caption = input_ops.parse_sequence_example(
162 |             serialized_sequence_example,
163 |             image_feature=self.config.image_feature_name,
164 |             caption_feature=self.config.caption_feature_name)
165 |         image = self.process_image(encoded_image, thread_id=thread_id)
166 |         images_and_captions.append([image, caption])
167 | 
168 |       # Batch inputs.
169 |       queue_capacity = (2 * self.config.num_preprocess_threads *
170 |                         self.config.batch_size)
171 |       images, input_seqs, target_seqs, input_mask = (
172 |           input_ops.batch_with_dynamic_pad(images_and_captions,
173 |                                            batch_size=self.config.batch_size,
174 |                                            queue_capacity=queue_capacity))
175 | 
176 |     self.images = images
177 |     self.input_seqs = input_seqs
178 |     self.target_seqs = target_seqs
179 |     self.input_mask = input_mask
180 | 
181 |   def build_image_embeddings(self):
182 |     """Builds the image model subgraph and generates image embeddings.
183 | 
184 |     Inputs:
185 |       self.images
186 | 
187 |     Outputs:
188 |       self.image_embeddings
189 |     """
190 |     inception_output = image_embedding.inception_v3(
191 |         self.images,
192 |         trainable=self.train_inception,
193 |         is_training=self.is_training())
194 |     self.inception_variables = tf.get_collection(
195 |         tf.GraphKeys.GLOBAL_VARIABLES, scope="InceptionV3")
196 | 
197 |     # Map inception output into embedding space.
198 |     with tf.variable_scope("image_embedding") as scope:
199 |       image_embeddings = tf.contrib.layers.fully_connected(
200 |           inputs=inception_output,
201 |           num_outputs=self.config.embedding_size,
202 |           activation_fn=None,
203 |           weights_initializer=self.initializer,
204 |           biases_initializer=None,
205 |           scope=scope)
206 | 
207 |     # Save the embedding size in the graph.
208 |     tf.constant(self.config.embedding_size, name="embedding_size")
209 | 
210 |     self.image_embeddings = image_embeddings
211 | 
212 |   def build_seq_embeddings(self):
213 |     """Builds the input sequence embeddings.
214 | 
215 |     Inputs:
216 |       self.input_seqs
217 | 
218 |     Outputs:
219 |       self.seq_embeddings
220 |     """
221 |     with tf.variable_scope("seq_embedding"), tf.device("/cpu:0"):
222 |       embedding_map = tf.get_variable(
223 |           name="map",
224 |           shape=[self.config.vocab_size, self.config.embedding_size],
225 |           initializer=self.initializer)
226 |       seq_embeddings = tf.nn.embedding_lookup(embedding_map, self.input_seqs)
227 | 
228 |     self.seq_embeddings = seq_embeddings
229 | 
230 |   def build_model(self):
231 |     """Builds the model.
232 | 
233 |     Inputs:
234 |       self.image_embeddings
235 |       self.seq_embeddings
236 |       self.target_seqs (training and eval only)
237 |       self.input_mask (training and eval only)
238 | 
239 |     Outputs:
240 |       self.total_loss (training and eval only)
241 |       self.target_cross_entropy_losses (training and eval only)
242 |       self.target_cross_entropy_loss_weights (training and eval only)
243 |     """
244 |     # This LSTM cell has biases and outputs tanh(new_c) * sigmoid(o), but the
245 |     # modified LSTM in the "Show and Tell" paper has no biases and outputs
246 |     # new_c * sigmoid(o).
247 |     lstm_cell = tf.contrib.rnn.BasicLSTMCell(
248 |         num_units=self.config.num_lstm_units, state_is_tuple=True)
249 |     if self.mode == "train":
250 |       lstm_cell = tf.contrib.rnn.DropoutWrapper(
251 |           lstm_cell,
252 |           input_keep_prob=self.config.lstm_dropout_keep_prob,
253 |           output_keep_prob=self.config.lstm_dropout_keep_prob)
254 | 
255 |     with tf.variable_scope("lstm", initializer=self.initializer) as lstm_scope:
256 |       # Feed the image embeddings to set the initial LSTM state.
257 |       zero_state = lstm_cell.zero_state(
258 |           batch_size=self.image_embeddings.get_shape()[0], dtype=tf.float32)
259 |       _, initial_state = lstm_cell(self.image_embeddings, zero_state)
260 | 
261 |       # Allow the LSTM variables to be reused.
262 |       lstm_scope.reuse_variables()
263 | 
264 |       if self.mode == "inference":
265 |         # In inference mode, use concatenated states for convenient feeding and
266 |         # fetching.
267 |         tf.concat(axis=1, values=initial_state, name="initial_state")
268 | 
269 |         # Placeholder for feeding a batch of concatenated states.
270 |         state_feed = tf.placeholder(dtype=tf.float32,
271 |                                     shape=[None, sum(lstm_cell.state_size)],
272 |                                     name="state_feed")
273 |         state_tuple = tf.split(value=state_feed, num_or_size_splits=2, axis=1)
274 | 
275 |         # Run a single LSTM step.
276 |         lstm_outputs, state_tuple = lstm_cell(
277 |             inputs=tf.squeeze(self.seq_embeddings, axis=[1]),
278 |             state=state_tuple)
279 | 
280 |         # Concatentate the resulting state.
281 |         tf.concat(axis=1, values=state_tuple, name="state")
282 |       else:
283 |         # Run the batch of sequence embeddings through the LSTM.
284 |         sequence_length = tf.reduce_sum(self.input_mask, 1)
285 |         lstm_outputs, _ = tf.nn.dynamic_rnn(cell=lstm_cell,
286 |                                             inputs=self.seq_embeddings,
287 |                                             sequence_length=sequence_length,
288 |                                             initial_state=initial_state,
289 |                                             dtype=tf.float32,
290 |                                             scope=lstm_scope)
291 | 
292 |     # Stack batches vertically.
293 |     lstm_outputs = tf.reshape(lstm_outputs, [-1, lstm_cell.output_size])
294 | 
295 |     with tf.variable_scope("logits") as logits_scope:
296 |       logits = tf.contrib.layers.fully_connected(
297 |           inputs=lstm_outputs,
298 |           num_outputs=self.config.vocab_size,
299 |           activation_fn=None,
300 |           weights_initializer=self.initializer,
301 |           scope=logits_scope)
302 | 
303 |     if self.mode == "inference":
304 |       tf.nn.softmax(logits, name="softmax")
305 |     else:
306 |       targets = tf.reshape(self.target_seqs, [-1])
307 |       weights = tf.to_float(tf.reshape(self.input_mask, [-1]))
308 | 
309 |       # Compute losses.
310 |       losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets,
311 |                                                               logits=logits)
312 |       batch_loss = tf.div(tf.reduce_sum(tf.multiply(losses, weights)),
313 |                           tf.reduce_sum(weights),
314 |                           name="batch_loss")
315 |       tf.losses.add_loss(batch_loss)
316 |       total_loss = tf.losses.get_total_loss()
317 | 
318 |       # Add summaries.
319 |       tf.summary.scalar("losses/batch_loss", batch_loss)
320 |       tf.summary.scalar("losses/total_loss", total_loss)
321 |       for var in tf.trainable_variables():
322 |         tf.summary.histogram("parameters/" + var.op.name, var)
323 | 
324 |       self.total_loss = total_loss
325 |       self.target_cross_entropy_losses = losses  # Used in evaluation.
326 |       self.target_cross_entropy_loss_weights = weights  # Used in evaluation.
327 | 
328 |   def setup_inception_initializer(self):
329 |     """Sets up the function to restore inception variables from checkpoint."""
330 |     if self.mode != "inference":
331 |       # Restore inception variables only.
332 |       saver = tf.train.Saver(self.inception_variables)
333 | 
334 |       def restore_fn(sess):
335 |         tf.logging.info("Restoring Inception variables from checkpoint file %s",
336 |                         self.config.inception_checkpoint_file)
337 |         saver.restore(sess, self.config.inception_checkpoint_file)
338 | 
339 |       self.init_fn = restore_fn
340 | 
341 |   def setup_global_step(self):
342 |     """Sets up the global step Tensor."""
343 |     global_step = tf.Variable(
344 |         initial_value=0,
345 |         name="global_step",
346 |         trainable=False,
347 |         collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES])
348 | 
349 |     self.global_step = global_step
350 | 
351 |   def build(self):
352 |     """Creates all ops for training and evaluation."""
353 |     self.build_inputs()
354 |     self.build_image_embeddings()
355 |     self.build_seq_embeddings()
356 |     self.build_model()
357 |     self.setup_inception_initializer()
358 |     self.setup_global_step()
359 | 


--------------------------------------------------------------------------------
/course_8_image2txt/show_and_tell_model_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Tests for tensorflow_models.im2txt.show_and_tell_model."""
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | 
 23 | import numpy as np
 24 | import tensorflow as tf
 25 | 
 26 | from im2txt import configuration
 27 | from im2txt import show_and_tell_model
 28 | 
 29 | 
 30 | class ShowAndTellModel(show_and_tell_model.ShowAndTellModel):
 31 |   """Subclass of ShowAndTellModel without the disk I/O."""
 32 | 
 33 |   def build_inputs(self):
 34 |     if self.mode == "inference":
 35 |       # Inference mode doesn't read from disk, so defer to parent.
 36 |       return super(ShowAndTellModel, self).build_inputs()
 37 |     else:
 38 |       # Replace disk I/O with random Tensors.
 39 |       self.images = tf.random_uniform(
 40 |           shape=[self.config.batch_size, self.config.image_height,
 41 |                  self.config.image_width, 3],
 42 |           minval=-1,
 43 |           maxval=1)
 44 |       self.input_seqs = tf.random_uniform(
 45 |           [self.config.batch_size, 15],
 46 |           minval=0,
 47 |           maxval=self.config.vocab_size,
 48 |           dtype=tf.int64)
 49 |       self.target_seqs = tf.random_uniform(
 50 |           [self.config.batch_size, 15],
 51 |           minval=0,
 52 |           maxval=self.config.vocab_size,
 53 |           dtype=tf.int64)
 54 |       self.input_mask = tf.ones_like(self.input_seqs)
 55 | 
 56 | 
 57 | class ShowAndTellModelTest(tf.test.TestCase):
 58 | 
 59 |   def setUp(self):
 60 |     super(ShowAndTellModelTest, self).setUp()
 61 |     self._model_config = configuration.ModelConfig()
 62 | 
 63 |   def _countModelParameters(self):
 64 |     """Counts the number of parameters in the model at top level scope."""
 65 |     counter = {}
 66 |     for v in tf.global_variables():
 67 |       name = v.op.name.split("/")[0]
 68 |       num_params = v.get_shape().num_elements()
 69 |       assert num_params
 70 |       counter[name] = counter.get(name, 0) + num_params
 71 |     return counter
 72 | 
 73 |   def _checkModelParameters(self):
 74 |     """Verifies the number of parameters in the model."""
 75 |     param_counts = self._countModelParameters()
 76 |     expected_param_counts = {
 77 |         "InceptionV3": 21802784,
 78 |         # inception_output_size * embedding_size
 79 |         "image_embedding": 1048576,
 80 |         # vocab_size * embedding_size
 81 |         "seq_embedding": 6144000,
 82 |         # (embedding_size + num_lstm_units + 1) * 4 * num_lstm_units
 83 |         "lstm": 2099200,
 84 |         # (num_lstm_units + 1) * vocab_size
 85 |         "logits": 6156000,
 86 |         "global_step": 1,
 87 |     }
 88 |     self.assertDictEqual(expected_param_counts, param_counts)
 89 | 
 90 |   def _checkOutputs(self, expected_shapes, feed_dict=None):
 91 |     """Verifies that the model produces expected outputs.
 92 | 
 93 |     Args:
 94 |       expected_shapes: A dict mapping Tensor or Tensor name to expected output
 95 |         shape.
 96 |       feed_dict: Values of Tensors to feed into Session.run().
 97 |     """
 98 |     fetches = expected_shapes.keys()
 99 | 
100 |     with self.test_session() as sess:
101 |       sess.run(tf.global_variables_initializer())
102 |       outputs = sess.run(fetches, feed_dict)
103 | 
104 |     for index, output in enumerate(outputs):
105 |       tensor = fetches[index]
106 |       expected = expected_shapes[tensor]
107 |       actual = output.shape
108 |       if expected != actual:
109 |         self.fail("Tensor %s has shape %s (expected %s)." %
110 |                   (tensor, actual, expected))
111 | 
112 |   def testBuildForTraining(self):
113 |     model = ShowAndTellModel(self._model_config, mode="train")
114 |     model.build()
115 | 
116 |     self._checkModelParameters()
117 | 
118 |     expected_shapes = {
119 |         # [batch_size, image_height, image_width, 3]
120 |         model.images: (32, 299, 299, 3),
121 |         # [batch_size, sequence_length]
122 |         model.input_seqs: (32, 15),
123 |         # [batch_size, sequence_length]
124 |         model.target_seqs: (32, 15),
125 |         # [batch_size, sequence_length]
126 |         model.input_mask: (32, 15),
127 |         # [batch_size, embedding_size]
128 |         model.image_embeddings: (32, 512),
129 |         # [batch_size, sequence_length, embedding_size]
130 |         model.seq_embeddings: (32, 15, 512),
131 |         # Scalar
132 |         model.total_loss: (),
133 |         # [batch_size * sequence_length]
134 |         model.target_cross_entropy_losses: (480,),
135 |         # [batch_size * sequence_length]
136 |         model.target_cross_entropy_loss_weights: (480,),
137 |     }
138 |     self._checkOutputs(expected_shapes)
139 | 
140 |   def testBuildForEval(self):
141 |     model = ShowAndTellModel(self._model_config, mode="eval")
142 |     model.build()
143 | 
144 |     self._checkModelParameters()
145 | 
146 |     expected_shapes = {
147 |         # [batch_size, image_height, image_width, 3]
148 |         model.images: (32, 299, 299, 3),
149 |         # [batch_size, sequence_length]
150 |         model.input_seqs: (32, 15),
151 |         # [batch_size, sequence_length]
152 |         model.target_seqs: (32, 15),
153 |         # [batch_size, sequence_length]
154 |         model.input_mask: (32, 15),
155 |         # [batch_size, embedding_size]
156 |         model.image_embeddings: (32, 512),
157 |         # [batch_size, sequence_length, embedding_size]
158 |         model.seq_embeddings: (32, 15, 512),
159 |         # Scalar
160 |         model.total_loss: (),
161 |         # [batch_size * sequence_length]
162 |         model.target_cross_entropy_losses: (480,),
163 |         # [batch_size * sequence_length]
164 |         model.target_cross_entropy_loss_weights: (480,),
165 |     }
166 |     self._checkOutputs(expected_shapes)
167 | 
168 |   def testBuildForInference(self):
169 |     model = ShowAndTellModel(self._model_config, mode="inference")
170 |     model.build()
171 | 
172 |     self._checkModelParameters()
173 | 
174 |     # Test feeding an image to get the initial LSTM state.
175 |     images_feed = np.random.rand(1, 299, 299, 3)
176 |     feed_dict = {model.images: images_feed}
177 |     expected_shapes = {
178 |         # [batch_size, embedding_size]
179 |         model.image_embeddings: (1, 512),
180 |         # [batch_size, 2 * num_lstm_units]
181 |         "lstm/initial_state:0": (1, 1024),
182 |     }
183 |     self._checkOutputs(expected_shapes, feed_dict)
184 | 
185 |     # Test feeding a batch of inputs and LSTM states to get softmax output and
186 |     # LSTM states.
187 |     input_feed = np.random.randint(0, 10, size=3)
188 |     state_feed = np.random.rand(3, 1024)
189 |     feed_dict = {"input_feed:0": input_feed, "lstm/state_feed:0": state_feed}
190 |     expected_shapes = {
191 |         # [batch_size, 2 * num_lstm_units]
192 |         "lstm/state:0": (3, 1024),
193 |         # [batch_size, vocab_size]
194 |         "softmax:0": (3, 12000),
195 |     }
196 |     self._checkOutputs(expected_shapes, feed_dict)
197 | 
198 | 
199 | if __name__ == "__main__":
200 |   tf.test.main()
201 | 


--------------------------------------------------------------------------------
/course_8_image2txt/train.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Train the model."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | 
 22 | import tensorflow as tf
 23 | 
 24 | from im2txt import configuration
 25 | from im2txt import show_and_tell_model
 26 | 
 27 | FLAGS = tf.app.flags.FLAGS
 28 | 
 29 | tf.flags.DEFINE_string("input_file_pattern", "",
 30 |                        "File pattern of sharded TFRecord input files.")
 31 | tf.flags.DEFINE_string("inception_checkpoint_file", "",
 32 |                        "Path to a pretrained inception_v3 model.")
 33 | tf.flags.DEFINE_string("train_dir", "",
 34 |                        "Directory for saving and loading model checkpoints.")
 35 | tf.flags.DEFINE_boolean("train_inception", False,
 36 |                         "Whether to train inception submodel variables.")
 37 | tf.flags.DEFINE_integer("number_of_steps", 1000000, "Number of training steps.")
 38 | tf.flags.DEFINE_integer("log_every_n_steps", 1,
 39 |                         "Frequency at which loss and global step are logged.")
 40 | 
 41 | tf.logging.set_verbosity(tf.logging.INFO)
 42 | 
 43 | 
 44 | def main(unused_argv):
 45 |   assert FLAGS.input_file_pattern, "--input_file_pattern is required"
 46 |   assert FLAGS.train_dir, "--train_dir is required"
 47 | 
 48 |   model_config = configuration.ModelConfig()
 49 |   model_config.input_file_pattern = FLAGS.input_file_pattern
 50 |   model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file
 51 |   training_config = configuration.TrainingConfig()
 52 | 
 53 |   # Create training directory.
 54 |   train_dir = FLAGS.train_dir
 55 |   if not tf.gfile.IsDirectory(train_dir):
 56 |     tf.logging.info("Creating training directory: %s", train_dir)
 57 |     tf.gfile.MakeDirs(train_dir)
 58 | 
 59 |   # Build the TensorFlow graph.
 60 |   g = tf.Graph()
 61 |   with g.as_default():
 62 |     # Build the model.
 63 |     model = show_and_tell_model.ShowAndTellModel(
 64 |         model_config, mode="train", train_inception=FLAGS.train_inception)
 65 |     model.build()
 66 | 
 67 |     # Set up the learning rate.
 68 |     learning_rate_decay_fn = None
 69 |     if FLAGS.train_inception:
 70 |       learning_rate = tf.constant(training_config.train_inception_learning_rate)
 71 |     else:
 72 |       learning_rate = tf.constant(training_config.initial_learning_rate)
 73 |       if training_config.learning_rate_decay_factor > 0:
 74 |         num_batches_per_epoch = (training_config.num_examples_per_epoch /
 75 |                                  model_config.batch_size)
 76 |         decay_steps = int(num_batches_per_epoch *
 77 |                           training_config.num_epochs_per_decay)
 78 | 
 79 |         def _learning_rate_decay_fn(learning_rate, global_step):
 80 |           return tf.train.exponential_decay(
 81 |               learning_rate,
 82 |               global_step,
 83 |               decay_steps=decay_steps,
 84 |               decay_rate=training_config.learning_rate_decay_factor,
 85 |               staircase=True)
 86 | 
 87 |         learning_rate_decay_fn = _learning_rate_decay_fn
 88 | 
 89 |     # Set up the training ops.
 90 |     train_op = tf.contrib.layers.optimize_loss(
 91 |         loss=model.total_loss,
 92 |         global_step=model.global_step,
 93 |         learning_rate=learning_rate,
 94 |         optimizer=training_config.optimizer,
 95 |         clip_gradients=training_config.clip_gradients,
 96 |         learning_rate_decay_fn=learning_rate_decay_fn)
 97 | 
 98 |     # Set up the Saver for saving and restoring model checkpoints.
 99 |     saver = tf.train.Saver(max_to_keep=training_config.max_checkpoints_to_keep)
100 | 
101 |   # Run training.
102 |   tf.contrib.slim.learning.train(
103 |       train_op,
104 |       train_dir,
105 |       log_every_n_steps=FLAGS.log_every_n_steps,
106 |       graph=g,
107 |       global_step=model.global_step,
108 |       number_of_steps=FLAGS.number_of_steps,
109 |       init_fn=model.init_fn,
110 |       saver=saver)
111 | 
112 | 
113 | if __name__ == "__main__":
114 |   tf.app.run()
115 | 


--------------------------------------------------------------------------------
/course_9_pix2pix_file.md:
--------------------------------------------------------------------------------
1 | https://github.com/wiibrew/pix2pix-tensorflow-1
2 | 


--------------------------------------------------------------------------------
/course_example_vgg/.gitignore:
--------------------------------------------------------------------------------
1 | vgg16.npy
2 | 


--------------------------------------------------------------------------------
/course_example_vgg/course_4_vgg16_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | # import matplotlib.pyplot as plt
 4 | import matplotlib.image as mpimg
 5 | import skimage
 6 | import vgg16
 7 | import utils
 8 | 
 9 | 
10 | img1 = utils.load_image("./test_data/dog.png")
11 | 
12 | print img1.shape
13 | 
14 | 
15 | batch = img1.reshape((1, 224, 224, 3))
16 | 
17 | #plot the image
18 | 
19 | # imgshow1=plt.imshow(img1)
20 | 
21 | # with tf.Session(config=tf.ConfigProto(gpu_options=(tf.GPUOptions(per_process_gpu_memory_fraction=0.7)))) as sess:
22 | with tf.device('/cpu:0'):
23 |     with tf.Session() as sess:
24 |         images = tf.placeholder("float", [1, 224, 224, 3])
25 |         feed_dict = {images: batch}
26 | 
27 |         vgg = vgg16.Vgg16()
28 |         with tf.name_scope("content_vgg"):
29 |             vgg.build(images)
30 | 
31 |         prob = sess.run(vgg.prob, feed_dict=feed_dict)
32 |         top5 = np.argsort(prob[0])[-1:-6:-1]
33 |         for n, label in enumerate(top5):
34 |             print label
35 |         pool1 = sess.run(vgg.pool1, feed_dict=feed_dict)
36 |         print pool1.shape
37 |         conv3_3=sess.run(vgg.conv3_3, feed_dict=feed_dict)
38 |         print conv3_3.shape
39 | #now let's plot the model filters
40 | vgg = vgg16.Vgg16()
41 | 
42 | #get the saved parameter dict keys
43 | print vgg.data_dict.keys()
44 | 
45 | #show the first conv layer
46 | filter_conv1=vgg.get_conv_filter("conv1_1")
47 | print 'filter_conv1', filter_conv1.shape
48 | 
49 | tf.Print(filter_conv1[:,:,:,:5],[filter_conv1[:,:,:,:5]])
50 | 
51 | filter_conv3=vgg.get_conv_filter("conv3_3")
52 | print 'filter_conv3', filter_conv3.shape
53 | 
54 | tf.Print(filter_conv3[:,:,:3,:5],[filter_conv3[:,:,:3,:5]])


--------------------------------------------------------------------------------
/course_example_vgg/np_plot.py:
--------------------------------------------------------------------------------
 1 | #the script is for geneate a figure for deep learning model 
 2 | #datablob and filter parameter visualization
 3 | import numpy as np 
 4 | import skimage
 5 | 
 6 | def plot_array(arr4d,size,stride=5):
 7 | 	#
 8 | 	_,h,w,d=arr4d.shape
 9 | 	N_blocks=size/(h+stride)
10 | 	step=h+stride
11 | 	Im_arr=np.zeros((size,size))
12 | 	cnt=0
13 | 	for i in range(N_blocks):
14 | 		for j in range(N_blocks):
15 | 			Im_arr[i*step:(i+1)*step,j*step:(j+1)*step]=arr4d[0,:,:,cnt]
16 | 			cnt+=1
17 | 	return Im_arr
18 | def plot_filter(arr4d,filter_num=6, stride=1):
19 | 	h=3
20 | 	N_blocks=filter_num
21 | 	step=4
22 | 	Im_arr=np.zeros((filter_num*step,filter_num*step))
23 | 	cnt=0
24 | 	for i in range(N_blocks):
25 | 		for j in range(N_blocks):
26 | 			Im_arr[i*step:(i+1)*step,j*step:(j+1)*step]=arr4d[:,:,0,cnt]
27 | 			cnt+=1
28 | 	skimage.transform.resize(img, (200, 200))
29 | 	return Im_arr


--------------------------------------------------------------------------------
/course_example_vgg/np_plot.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wiibrew/DeepLearningCourseCodes/6b20c12415893f270b30c3cba640732c090b49ba/course_example_vgg/np_plot.pyc


--------------------------------------------------------------------------------
/course_example_vgg/test_data/dog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wiibrew/DeepLearningCourseCodes/6b20c12415893f270b30c3cba640732c090b49ba/course_example_vgg/test_data/dog.png


--------------------------------------------------------------------------------
/course_example_vgg/test_data/puzzle.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wiibrew/DeepLearningCourseCodes/6b20c12415893f270b30c3cba640732c090b49ba/course_example_vgg/test_data/puzzle.jpeg


--------------------------------------------------------------------------------
/course_example_vgg/test_data/tiger.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wiibrew/DeepLearningCourseCodes/6b20c12415893f270b30c3cba640732c090b49ba/course_example_vgg/test_data/tiger.jpeg


--------------------------------------------------------------------------------
/course_example_vgg/utils.py:
--------------------------------------------------------------------------------
 1 | import skimage
 2 | import skimage.io
 3 | import skimage.transform
 4 | import numpy as np
 5 | 
 6 | 
 7 | # synset = [l.strip() for l in open('synset.txt').readlines()]
 8 | 
 9 | 
10 | # returns image of shape [224, 224, 3]
11 | # [height, width, depth]
12 | def load_image(path):
13 |     # load image
14 |     img = skimage.io.imread(path)
15 |     img = img / 255.0
16 |     assert (0 <= img).all() and (img <= 1.0).all()
17 |     # print "Original Image Shape: ", img.shape
18 |     # we crop image from center
19 |     short_edge = min(img.shape[:2])
20 |     yy = int((img.shape[0] - short_edge) / 2)
21 |     xx = int((img.shape[1] - short_edge) / 2)
22 |     crop_img = img[yy: yy + short_edge, xx: xx + short_edge]
23 |     # resize to 224, 224
24 |     resized_img = skimage.transform.resize(crop_img, (224, 224))
25 |     return resized_img
26 | 
27 | 
28 | # returns the top1 string
29 | def print_prob(prob, file_path):
30 |     synset = [l.strip() for l in open(file_path).readlines()]
31 | 
32 |     # print prob
33 |     pred = np.argsort(prob)[::-1]
34 | 
35 |     # Get top1 label
36 |     top1 = synset[pred[0]]
37 |     print(("Top1: ", top1, prob[pred[0]]))
38 |     # Get top5 label
39 |     top5 = [(synset[pred[i]], prob[pred[i]]) for i in range(5)]
40 |     print(("Top5: ", top5))
41 |     return top1
42 | 
43 | 
44 | def load_image2(path, height=None, width=None):
45 |     # load image
46 |     img = skimage.io.imread(path)
47 |     img = img / 255.0
48 |     if height is not None and width is not None:
49 |         ny = height
50 |         nx = width
51 |     elif height is not None:
52 |         ny = height
53 |         nx = img.shape[1] * ny / img.shape[0]
54 |     elif width is not None:
55 |         nx = width
56 |         ny = img.shape[0] * nx / img.shape[1]
57 |     else:
58 |         ny = img.shape[0]
59 |         nx = img.shape[1]
60 |     return skimage.transform.resize(img, (ny, nx))
61 | 
62 | 
63 | def test():
64 |     img = skimage.io.imread("./test_data/starry_night.jpg")
65 |     ny = 300
66 |     nx = img.shape[1] * ny / img.shape[0]
67 |     img = skimage.transform.resize(img, (ny, nx))
68 |     skimage.io.imsave("./test_data/test/output.jpg", img)
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     test()
73 | 


--------------------------------------------------------------------------------
/course_example_vgg/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wiibrew/DeepLearningCourseCodes/6b20c12415893f270b30c3cba640732c090b49ba/course_example_vgg/utils.pyc


--------------------------------------------------------------------------------
/course_example_vgg/vgg-model-download-link:
--------------------------------------------------------------------------------
1 | vgg16 model download link:
2 | https://mega.nz/#!YU1FWJrA!O1ywiCS2IiOlUCtCpI6HTJOMrneN-Qdv3ywQP5poecM
3 | vgg19 model download link:


--------------------------------------------------------------------------------
/course_example_vgg/vgg-model-download-link.txt:
--------------------------------------------------------------------------------
1 | vgg16 model download link:
2 | https://mega.nz/#!YU1FWJrA!O1ywiCS2IiOlUCtCpI6HTJOMrneN-Qdv3ywQP5poecM
3 | vgg19 model download link:
4 | https://mega.nz/#!xZ8glS6J!MAnE91ND_WyfZ_8mvkuSa2YcA7q-1ehfSm-Q1fxOvvs


--------------------------------------------------------------------------------
/course_example_vgg/vgg16.py:
--------------------------------------------------------------------------------
  1 | import inspect
  2 | import os
  3 | 
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | import time
  7 | 
  8 | VGG_MEAN = [103.939, 116.779, 123.68]
  9 | 
 10 | 
 11 | class Vgg16:
 12 |     def __init__(self, vgg16_npy_path=None):
 13 |         if vgg16_npy_path is None:
 14 |             path = inspect.getfile(Vgg16)
 15 |             path = os.path.abspath(os.path.join(path, os.pardir))
 16 |             path = os.path.join(path, "vgg16.npy")
 17 |             vgg16_npy_path = path
 18 |             print(path)
 19 | 
 20 |         self.data_dict = np.load(vgg16_npy_path, encoding='latin1').item()
 21 |         print("npy file loaded")
 22 | 
 23 |     def build(self, rgb):
 24 |         """
 25 |         load variable from npy to build the VGG
 26 |         :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1]
 27 |         """
 28 | 
 29 |         start_time = time.time()
 30 |         print("build model started")
 31 |         rgb_scaled = rgb * 255.0
 32 | 
 33 |         # Convert RGB to BGR
 34 |         red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=rgb_scaled)
 35 |         assert red.get_shape().as_list()[1:] == [224, 224, 1]
 36 |         assert green.get_shape().as_list()[1:] == [224, 224, 1]
 37 |         assert blue.get_shape().as_list()[1:] == [224, 224, 1]
 38 |         bgr = tf.concat(axis=3, values=[
 39 |             blue - VGG_MEAN[0],
 40 |             green - VGG_MEAN[1],
 41 |             red - VGG_MEAN[2],
 42 |         ])
 43 |         assert bgr.get_shape().as_list()[1:] == [224, 224, 3]
 44 | 
 45 |         self.conv1_1 = self.conv_layer(bgr, "conv1_1")
 46 |         self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2")
 47 |         self.pool1 = self.max_pool(self.conv1_2, 'pool1')
 48 | 
 49 |         self.conv2_1 = self.conv_layer(self.pool1, "conv2_1")
 50 |         self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2")
 51 |         self.pool2 = self.max_pool(self.conv2_2, 'pool2')
 52 | 
 53 |         self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
 54 |         self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
 55 |         self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
 56 |         self.pool3 = self.max_pool(self.conv3_3, 'pool3')
 57 | 
 58 |         self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
 59 |         self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
 60 |         self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
 61 |         self.pool4 = self.max_pool(self.conv4_3, 'pool4')
 62 | 
 63 |         self.conv5_1 = self.conv_layer(self.pool4, "conv5_1")
 64 |         self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2")
 65 |         self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3")
 66 |         self.pool5 = self.max_pool(self.conv5_3, 'pool5')
 67 | 
 68 |         self.fc6 = self.fc_layer(self.pool5, "fc6")
 69 |         assert self.fc6.get_shape().as_list()[1:] == [4096]
 70 |         self.relu6 = tf.nn.relu(self.fc6)
 71 | 
 72 |         self.fc7 = self.fc_layer(self.relu6, "fc7")
 73 |         self.relu7 = tf.nn.relu(self.fc7)
 74 | 
 75 |         self.fc8 = self.fc_layer(self.relu7, "fc8")
 76 | 
 77 |         self.prob = tf.nn.softmax(self.fc8, name="prob")
 78 | 
 79 |         self.data_dict = None
 80 |         print(("build model finished: %ds" % (time.time() - start_time)))
 81 | 
 82 |     def avg_pool(self, bottom, name):
 83 |         return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
 84 | 
 85 |     def max_pool(self, bottom, name):
 86 |         return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
 87 | 
 88 |     def conv_layer(self, bottom, name):
 89 |         with tf.variable_scope(name):
 90 |             filt = self.get_conv_filter(name)
 91 | 
 92 |             conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')
 93 | 
 94 |             conv_biases = self.get_bias(name)
 95 |             bias = tf.nn.bias_add(conv, conv_biases)
 96 | 
 97 |             relu = tf.nn.relu(bias)
 98 |             return relu
 99 | 
100 |     def fc_layer(self, bottom, name):
101 |         with tf.variable_scope(name):
102 |             shape = bottom.get_shape().as_list()
103 |             dim = 1
104 |             for d in shape[1:]:
105 |                 dim *= d
106 |             x = tf.reshape(bottom, [-1, dim])
107 | 
108 |             weights = self.get_fc_weight(name)
109 |             biases = self.get_bias(name)
110 | 
111 |             # Fully connected layer. Note that the '+' operation automatically
112 |             # broadcasts the biases.
113 |             fc = tf.nn.bias_add(tf.matmul(x, weights), biases)
114 | 
115 |             return fc
116 | 
117 |     def get_conv_filter(self, name):
118 |         return tf.constant(self.data_dict[name][0], name="filter")
119 | 
120 |     def get_bias(self, name):
121 |         return tf.constant(self.data_dict[name][1], name="biases")
122 | 
123 |     def get_fc_weight(self, name):
124 |         return tf.constant(self.data_dict[name][0], name="weights")


--------------------------------------------------------------------------------
/course_example_vgg/vgg16_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | import vgg16
 5 | import utils
 6 | from imagenet1000_clsid_to_human import labels
 7 | 
 8 | img1 = utils.load_image("./test_data/tiger.jpeg")
 9 | img2 = utils.load_image("./test_data/puzzle.jpeg")
10 | 
11 | batch1 = img1.reshape((1, 224, 224, 3))
12 | batch2 = img2.reshape((1, 224, 224, 3))
13 | 
14 | batch = np.concatenate((batch1, batch2), 0)
15 | 
16 | def percent(v):
17 |     return '%.2f%%' % (v * 100)
18 | 
19 | # with tf.Session(config=tf.ConfigProto(gpu_options=(tf.GPUOptions(per_process_gpu_memory_fraction=0.7)))) as sess:
20 | with tf.device('/cpu:0'):
21 |     with tf.Session() as sess:
22 |         images = tf.placeholder("float", [2, 224, 224, 3])
23 |         feed_dict = {images: batch}
24 | 
25 |         vgg = vgg16.Vgg16()
26 |         with tf.name_scope("content_vgg"):
27 |             vgg.build(images)
28 | 
29 |         prob = sess.run(vgg.prob, feed_dict=feed_dict)
30 |         for i, p in enumerate(prob):
31 |             v = sess.run(tf.nn.top_k(p, 5))
32 |             print('-'*4)
33 |             for j, k in enumerate(v.indices):
34 |                 print(labels[k], ':', percent(v.values[j]))
35 | 


--------------------------------------------------------------------------------
/g3doc/COCO_val2014_000000224477.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wiibrew/DeepLearningCourseCodes/6b20c12415893f270b30c3cba640732c090b49ba/g3doc/COCO_val2014_000000224477.jpg


--------------------------------------------------------------------------------
/g3doc/example_captions.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wiibrew/DeepLearningCourseCodes/6b20c12415893f270b30c3cba640732c090b49ba/g3doc/example_captions.jpg


--------------------------------------------------------------------------------
/g3doc/show_and_tell_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wiibrew/DeepLearningCourseCodes/6b20c12415893f270b30c3cba640732c090b49ba/g3doc/show_and_tell_architecture.png


--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
1 | <html><body><table><tr><th>name</th><th>input</th><th>output</th><th>target</th></tr><tr><td>1</td><td><img src='images/1-inputs.png'></td><td><img src='images/1-outputs.png'></td><td><img src='images/1-targets.png'></td></tr><tr><td>2</td><td><img src='images/2-inputs.png'></td><td><img src='images/2-outputs.png'></td><td><img src='images/2-targets.png'></td></tr><tr><td>3</td><td><img src='images/3-inputs.png'></td><td><img src='images/3-outputs.png'></td><td><img src='images/3-targets.png'></td></tr><tr><td>4</td><td><img src='images/4-inputs.png'></td><td><img src='images/4-outputs.png'></td><td><img src='images/4-targets.png'></td></tr><tr><td>5</td><td><img src='images/5-inputs.png'></td><td><img src='images/5-outputs.png'></td><td><img src='images/5-targets.png'></td></tr><tr><td>6</td><td><img src='images/6-inputs.png'></td><td><img src='images/6-outputs.png'></td><td><img src='images/6-targets.png'></td></tr><tr><td>7</td><td><img src='images/7-inputs.png'></td><td><img src='images/7-outputs.png'></td><td><img src='images/7-targets.png'></td></tr><tr><td>8</td><td><img src='images/8-inputs.png'></td><td><img src='images/8-outputs.png'></td><td><img src='images/8-targets.png'></td></tr><tr><td>9</td><td><img src='images/9-inputs.png'></td><td><img src='images/9-outputs.png'></td><td><img src='images/9-targets.png'></td></tr><tr><td>10</td><td><img src='images/10-inputs.png'></td><td><img src='images/10-outputs.png'></td><td><img src='images/10-targets.png'></td></tr><tr><td>11</td><td><img src='images/11-inputs.png'></td><td><img src='images/11-outputs.png'></td><td><img src='images/11-targets.png'></td></tr><tr><td>12</td><td><img src='images/12-inputs.png'></td><td><img src='images/12-outputs.png'></td><td><img src='images/12-targets.png'></td></tr><tr><td>13</td><td><img src='images/13-inputs.png'></td><td><img src='images/13-outputs.png'></td><td><img src='images/13-targets.png'></td></tr><tr><td>14</td><td><img src='images/14-inputs.png'></td><td><img src='images/14-outputs.png'></td><td><img src='images/14-targets.png'></td></tr><tr><td>15</td><td><img src='images/15-inputs.png'></td><td><img src='images/15-outputs.png'></td><td><img src='images/15-targets.png'></td></tr><tr><td>16</td><td><img src='images/16-inputs.png'></td><td><img src='images/16-outputs.png'></td><td><img src='images/16-targets.png'></td></tr><tr><td>17</td><td><img src='images/17-inputs.png'></td><td><img src='images/17-outputs.png'></td><td><img src='images/17-targets.png'></td></tr><tr><td>18</td><td><img src='images/18-inputs.png'></td><td><img src='images/18-outputs.png'></td><td><img src='images/18-targets.png'></td></tr><tr><td>19</td><td><img src='images/19-inputs.png'></td><td><img src='images/19-outputs.png'></td><td><img src='images/19-targets.png'></td></tr><tr><td>20</td><td><img src='images/20-inputs.png'></td><td><img src='images/20-outputs.png'></td><td><img src='images/20-targets.png'></td></tr><tr><td>21</td><td><img src='images/21-inputs.png'></td><td><img src='images/21-outputs.png'></td><td><img src='images/21-targets.png'></td></tr><tr><td>22</td><td><img src='images/22-inputs.png'></td><td><img src='images/22-outputs.png'></td><td><img src='images/22-targets.png'></td></tr><tr><td>23</td><td><img src='images/23-inputs.png'></td><td><img src='images/23-outputs.png'></td><td><img src='images/23-targets.png'></td></tr><tr><td>24</td><td><img src='images/24-inputs.png'></td><td><img src='images/24-outputs.png'></td><td><img src='images/24-targets.png'></td></tr><tr><td>25</td><td><img src='images/25-inputs.png'></td><td><img src='images/25-outputs.png'></td><td><img src='images/25-targets.png'></td></tr><tr><td>26</td><td><img src='images/26-inputs.png'></td><td><img src='images/26-outputs.png'></td><td><img src='images/26-targets.png'></td></tr><tr><td>27</td><td><img src='images/27-inputs.png'></td><td><img src='images/27-outputs.png'></td><td><img src='images/27-targets.png'></td></tr><tr><td>28</td><td><img src='images/28-inputs.png'></td><td><img src='images/28-outputs.png'></td><td><img src='images/28-targets.png'></td></tr><tr><td>29</td><td><img src='images/29-inputs.png'></td><td><img src='images/29-outputs.png'></td><td><img src='images/29-targets.png'></td></tr><tr><td>30</td><td><img src='images/30-inputs.png'></td><td><img src='images/30-outputs.png'></td><td><img src='images/30-targets.png'></td></tr><tr><td>31</td><td><img src='images/31-inputs.png'></td><td><img src='images/31-outputs.png'></td><td><img src='images/31-targets.png'></td></tr><tr><td>32</td><td><img src='images/32-inputs.png'></td><td><img src='images/32-outputs.png'></td><td><img src='images/32-targets.png'></td></tr><tr><td>33</td><td><img src='images/33-inputs.png'></td><td><img src='images/33-outputs.png'></td><td><img src='images/33-targets.png'></td></tr><tr><td>34</td><td><img src='images/34-inputs.png'></td><td><img src='images/34-outputs.png'></td><td><img src='images/34-targets.png'></td></tr><tr><td>35</td><td><img src='images/35-inputs.png'></td><td><img src='images/35-outputs.png'></td><td><img src='images/35-targets.png'></td></tr><tr><td>36</td><td><img src='images/36-inputs.png'></td><td><img src='images/36-outputs.png'></td><td><img src='images/36-targets.png'></td></tr><tr><td>37</td><td><img src='images/37-inputs.png'></td><td><img src='images/37-outputs.png'></td><td><img src='images/37-targets.png'></td></tr><tr><td>38</td><td><img src='images/38-inputs.png'></td><td><img src='images/38-outputs.png'></td><td><img src='images/38-targets.png'></td></tr><tr><td>39</td><td><img src='images/39-inputs.png'></td><td><img src='images/39-outputs.png'></td><td><img src='images/39-targets.png'></td></tr><tr><td>40</td><td><img src='images/40-inputs.png'></td><td><img src='images/40-outputs.png'></td><td><img src='images/40-targets.png'></td></tr><tr><td>41</td><td><img src='images/41-inputs.png'></td><td><img src='images/41-outputs.png'></td><td><img src='images/41-targets.png'></td></tr><tr><td>42</td><td><img src='images/42-inputs.png'></td><td><img src='images/42-outputs.png'></td><td><img src='images/42-targets.png'></td></tr><tr><td>43</td><td><img src='images/43-inputs.png'></td><td><img src='images/43-outputs.png'></td><td><img src='images/43-targets.png'></td></tr><tr><td>44</td><td><img src='images/44-inputs.png'></td><td><img src='images/44-outputs.png'></td><td><img src='images/44-targets.png'></td></tr><tr><td>45</td><td><img src='images/45-inputs.png'></td><td><img src='images/45-outputs.png'></td><td><img src='images/45-targets.png'></td></tr><tr><td>46</td><td><img src='images/46-inputs.png'></td><td><img src='images/46-outputs.png'></td><td><img src='images/46-targets.png'></td></tr><tr><td>47</td><td><img src='images/47-inputs.png'></td><td><img src='images/47-outputs.png'></td><td><img src='images/47-targets.png'></td></tr><tr><td>48</td><td><img src='images/48-inputs.png'></td><td><img src='images/48-outputs.png'></td><td><img src='images/48-targets.png'></td></tr><tr><td>49</td><td><img src='images/49-inputs.png'></td><td><img src='images/49-outputs.png'></td><td><img src='images/49-targets.png'></td></tr><tr><td>50</td><td><img src='images/50-inputs.png'></td><td><img src='images/50-outputs.png'></td><td><img src='images/50-targets.png'></td></tr><tr><td>51</td><td><img src='images/51-inputs.png'></td><td><img src='images/51-outputs.png'></td><td><img src='images/51-targets.png'></td></tr><tr><td>52</td><td><img src='images/52-inputs.png'></td><td><img src='images/52-outputs.png'></td><td><img src='images/52-targets.png'></td></tr><tr><td>53</td><td><img src='images/53-inputs.png'></td><td><img src='images/53-outputs.png'></td><td><img src='images/53-targets.png'></td></tr><tr><td>54</td><td><img src='images/54-inputs.png'></td><td><img src='images/54-outputs.png'></td><td><img src='images/54-targets.png'></td></tr><tr><td>55</td><td><img src='images/55-inputs.png'></td><td><img src='images/55-outputs.png'></td><td><img src='images/55-targets.png'></td></tr><tr><td>56</td><td><img src='images/56-inputs.png'></td><td><img src='images/56-outputs.png'></td><td><img src='images/56-targets.png'></td></tr><tr><td>57</td><td><img src='images/57-inputs.png'></td><td><img src='images/57-outputs.png'></td><td><img src='images/57-targets.png'></td></tr><tr><td>58</td><td><img src='images/58-inputs.png'></td><td><img src='images/58-outputs.png'></td><td><img src='images/58-targets.png'></td></tr><tr><td>59</td><td><img src='images/59-inputs.png'></td><td><img src='images/59-outputs.png'></td><td><img src='images/59-targets.png'></td></tr><tr><td>60</td><td><img src='images/60-inputs.png'></td><td><img src='images/60-outputs.png'></td><td><img src='images/60-targets.png'></td></tr><tr><td>61</td><td><img src='images/61-inputs.png'></td><td><img src='images/61-outputs.png'></td><td><img src='images/61-targets.png'></td></tr><tr><td>62</td><td><img src='images/62-inputs.png'></td><td><img src='images/62-outputs.png'></td><td><img src='images/62-targets.png'></td></tr><tr><td>63</td><td><img src='images/63-inputs.png'></td><td><img src='images/63-outputs.png'></td><td><img src='images/63-targets.png'></td></tr><tr><td>64</td><td><img src='images/64-inputs.png'></td><td><img src='images/64-outputs.png'></td><td><img src='images/64-targets.png'></td></tr><tr><td>65</td><td><img src='images/65-inputs.png'></td><td><img src='images/65-outputs.png'></td><td><img src='images/65-targets.png'></td></tr><tr><td>66</td><td><img src='images/66-inputs.png'></td><td><img src='images/66-outputs.png'></td><td><img src='images/66-targets.png'></td></tr><tr><td>67</td><td><img src='images/67-inputs.png'></td><td><img src='images/67-outputs.png'></td><td><img src='images/67-targets.png'></td></tr><tr><td>68</td><td><img src='images/68-inputs.png'></td><td><img src='images/68-outputs.png'></td><td><img src='images/68-targets.png'></td></tr><tr><td>69</td><td><img src='images/69-inputs.png'></td><td><img src='images/69-outputs.png'></td><td><img src='images/69-targets.png'></td></tr><tr><td>70</td><td><img src='images/70-inputs.png'></td><td><img src='images/70-outputs.png'></td><td><img src='images/70-targets.png'></td></tr><tr><td>71</td><td><img src='images/71-inputs.png'></td><td><img src='images/71-outputs.png'></td><td><img src='images/71-targets.png'></td></tr><tr><td>72</td><td><img src='images/72-inputs.png'></td><td><img src='images/72-outputs.png'></td><td><img src='images/72-targets.png'></td></tr><tr><td>73</td><td><img src='images/73-inputs.png'></td><td><img src='images/73-outputs.png'></td><td><img src='images/73-targets.png'></td></tr><tr><td>74</td><td><img src='images/74-inputs.png'></td><td><img src='images/74-outputs.png'></td><td><img src='images/74-targets.png'></td></tr><tr><td>75</td><td><img src='images/75-inputs.png'></td><td><img src='images/75-outputs.png'></td><td><img src='images/75-targets.png'></td></tr><tr><td>76</td><td><img src='images/76-inputs.png'></td><td><img src='images/76-outputs.png'></td><td><img src='images/76-targets.png'></td></tr><tr><td>77</td><td><img src='images/77-inputs.png'></td><td><img src='images/77-outputs.png'></td><td><img src='images/77-targets.png'></td></tr><tr><td>78</td><td><img src='images/78-inputs.png'></td><td><img src='images/78-outputs.png'></td><td><img src='images/78-targets.png'></td></tr><tr><td>79</td><td><img src='images/79-inputs.png'></td><td><img src='images/79-outputs.png'></td><td><img src='images/79-targets.png'></td></tr><tr><td>80</td><td><img src='images/80-inputs.png'></td><td><img src='images/80-outputs.png'></td><td><img src='images/80-targets.png'></td></tr><tr><td>81</td><td><img src='images/81-inputs.png'></td><td><img src='images/81-outputs.png'></td><td><img src='images/81-targets.png'></td></tr><tr><td>82</td><td><img src='images/82-inputs.png'></td><td><img src='images/82-outputs.png'></td><td><img src='images/82-targets.png'></td></tr><tr><td>83</td><td><img src='images/83-inputs.png'></td><td><img src='images/83-outputs.png'></td><td><img src='images/83-targets.png'></td></tr><tr><td>84</td><td><img src='images/84-inputs.png'></td><td><img src='images/84-outputs.png'></td><td><img src='images/84-targets.png'></td></tr><tr><td>85</td><td><img src='images/85-inputs.png'></td><td><img src='images/85-outputs.png'></td><td><img src='images/85-targets.png'></td></tr><tr><td>86</td><td><img src='images/86-inputs.png'></td><td><img src='images/86-outputs.png'></td><td><img src='images/86-targets.png'></td></tr><tr><td>87</td><td><img src='images/87-inputs.png'></td><td><img src='images/87-outputs.png'></td><td><img src='images/87-targets.png'></td></tr><tr><td>88</td><td><img src='images/88-inputs.png'></td><td><img src='images/88-outputs.png'></td><td><img src='images/88-targets.png'></td></tr><tr><td>89</td><td><img src='images/89-inputs.png'></td><td><img src='images/89-outputs.png'></td><td><img src='images/89-targets.png'></td></tr><tr><td>90</td><td><img src='images/90-inputs.png'></td><td><img src='images/90-outputs.png'></td><td><img src='images/90-targets.png'></td></tr><tr><td>91</td><td><img src='images/91-inputs.png'></td><td><img src='images/91-outputs.png'></td><td><img src='images/91-targets.png'></td></tr><tr><td>92</td><td><img src='images/92-inputs.png'></td><td><img src='images/92-outputs.png'></td><td><img src='images/92-targets.png'></td></tr><tr><td>93</td><td><img src='images/93-inputs.png'></td><td><img src='images/93-outputs.png'></td><td><img src='images/93-targets.png'></td></tr><tr><td>94</td><td><img src='images/94-inputs.png'></td><td><img src='images/94-outputs.png'></td><td><img src='images/94-targets.png'></td></tr><tr><td>95</td><td><img src='images/95-inputs.png'></td><td><img src='images/95-outputs.png'></td><td><img src='images/95-targets.png'></td></tr><tr><td>96</td><td><img src='images/96-inputs.png'></td><td><img src='images/96-outputs.png'></td><td><img src='images/96-targets.png'></td></tr><tr><td>97</td><td><img src='images/97-inputs.png'></td><td><img src='images/97-outputs.png'></td><td><img src='images/97-targets.png'></td></tr><tr><td>98</td><td><img src='images/98-inputs.png'></td><td><img src='images/98-outputs.png'></td><td><img src='images/98-targets.png'></td></tr><tr><td>99</td><td><img src='images/99-inputs.png'></td><td><img src='images/99-outputs.png'></td><td><img src='images/99-targets.png'></td></tr><tr><td>100</td><td><img src='images/100-inputs.png'></td><td><img src='images/100-outputs.png'></td><td><img src='images/100-targets.png'></td></tr>


--------------------------------------------------------------------------------
/libs/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/libs/activations.py:
--------------------------------------------------------------------------------
 1 | """Activations for TensorFlow.
 2 | Parag K. Mital, Jan 2016."""
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | def lrelu(x, leak=0.2, name="lrelu"):
 7 |     """Leaky rectifier.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     x : Tensor
12 |         The tensor to apply the nonlinearity to.
13 |     leak : float, optional
14 |         Leakage parameter.
15 |     name : str, optional
16 |         Variable scope to use.
17 | 
18 |     Returns
19 |     -------
20 |     x : Tensor
21 |         Output of the nonlinearity.
22 |     """
23 |     with tf.variable_scope(name):
24 |         f1 = 0.5 * (1 + leak)
25 |         f2 = 0.5 * (1 - leak)
26 |         return f1 * x + f2 * abs(x)
27 | 


--------------------------------------------------------------------------------
/libs/batch_norm.py:
--------------------------------------------------------------------------------
 1 | """Batch Normalization for TensorFlow.
 2 | Parag K. Mital, Jan 2016.
 3 | """
 4 | 
 5 | import tensorflow as tf
 6 | 
 7 | 
 8 | def batch_norm(x, phase_train, scope='bn', affine=True):
 9 |     """
10 |     Batch normalization on convolutional maps.
11 | 
12 |     from: https://stackoverflow.com/questions/33949786/how-could-i-
13 |     use-batch-normalization-in-tensorflow
14 | 
15 |     Only modified to infer shape from input tensor x.
16 | 
17 |     Parameters
18 |     ----------
19 |     x
20 |         Tensor, 4D BHWD input maps
21 |     phase_train
22 |         boolean tf.Variable, true indicates training phase
23 |     scope
24 |         string, variable scope
25 |     affine
26 |         whether to affine-transform outputs
27 | 
28 |     Return
29 |     ------
30 |     normed
31 |         batch-normalized maps
32 |     """
33 |     with tf.variable_scope(scope):
34 |         shape = x.get_shape().as_list()
35 | 
36 |         beta = tf.Variable(tf.constant(0.0, shape=[shape[-1]]),
37 |                            name='beta', trainable=True)
38 |         gamma = tf.Variable(tf.constant(1.0, shape=[shape[-1]]),
39 |                             name='gamma', trainable=affine)
40 | 
41 |         batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments')
42 |         ema = tf.train.ExponentialMovingAverage(decay=0.9)
43 |         ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var)
44 | 
45 |         def mean_var_with_update():
46 |             """Summary
47 | 
48 |             Returns
49 |             -------
50 |             name : TYPE
51 |                 Description
52 |             """
53 |             ema_apply_op = ema.apply([batch_mean, batch_var])
54 |             with tf.control_dependencies([ema_apply_op]):
55 |                 return tf.identity(batch_mean), tf.identity(batch_var)
56 |         mean, var = tf.cond(phase_train,
57 |                                           mean_var_with_update,
58 |                                           lambda: (ema_mean, ema_var))
59 | 
60 |         normed = tf.nn.batch_norm_with_global_normalization(
61 |             x, mean, var, beta, gamma, 1e-3, affine)
62 |     return normed
63 | 


--------------------------------------------------------------------------------
/libs/connections.py:
--------------------------------------------------------------------------------
  1 | """APL 2.0 code from github.com/pkmital/tensorflow_tutorials w/ permission
  2 | from Parag K. Mital.
  3 | """
  4 | import math
  5 | import tensorflow as tf
  6 | 
  7 | 
  8 | def batch_norm(x, phase_train, scope='bn', affine=True):
  9 |     """
 10 |     Batch normalization on convolutional maps.
 11 |     from: https://stackoverflow.com/questions/33949786/how-could-i-
 12 |     use-batch-normalization-in-tensorflow
 13 |     Only modified to infer shape from input tensor x.
 14 |     Parameters
 15 |     ----------
 16 |     x
 17 |         Tensor, 4D BHWD input maps
 18 |     phase_train
 19 |         boolean tf.Variable, true indicates training phase
 20 |     scope
 21 |         string, variable scope
 22 |     affine
 23 |         whether to affine-transform outputs
 24 |     Return
 25 |     ------
 26 |     normed
 27 |         batch-normalized maps
 28 |     """
 29 |     with tf.variable_scope(scope):
 30 |         og_shape = x.get_shape().as_list()
 31 |         if len(og_shape) == 2:
 32 |             x = tf.reshape(x, [-1, 1, 1, og_shape[1]])
 33 |         shape = x.get_shape().as_list()
 34 |         beta = tf.Variable(tf.constant(0.0, shape=[shape[-1]]),
 35 |                            name='beta', trainable=True)
 36 |         gamma = tf.Variable(tf.constant(1.0, shape=[shape[-1]]),
 37 |                             name='gamma', trainable=affine)
 38 | 
 39 |         batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments')
 40 |         ema = tf.train.ExponentialMovingAverage(decay=0.9)
 41 |         ema_apply_op = ema.apply([batch_mean, batch_var])
 42 |         ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var)
 43 | 
 44 |         def mean_var_with_update():
 45 |             """Summary
 46 |             Returns
 47 |             -------
 48 |             name : TYPE
 49 |                 Description
 50 |             """
 51 |             with tf.control_dependencies([ema_apply_op]):
 52 |                 return tf.identity(batch_mean), tf.identity(batch_var)
 53 |         mean, var = tf.cond(phase_train,
 54 |                                           mean_var_with_update,
 55 |                                           lambda: (ema_mean, ema_var))
 56 | 
 57 |         normed = tf.nn.batch_norm_with_global_normalization(
 58 |             x, mean, var, beta, gamma, 1e-3, affine)
 59 |         if len(og_shape) == 2:
 60 |             normed = tf.reshape(normed, [-1, og_shape[-1]])
 61 |     return normed
 62 | 
 63 | 
 64 | def lrelu(x, leak=0.2, name="lrelu"):
 65 |     """Leaky rectifier.
 66 |     Parameters
 67 |     ----------
 68 |     x : Tensor
 69 |         The tensor to apply the nonlinearity to.
 70 |     leak : float, optional
 71 |         Leakage parameter.
 72 |     name : str, optional
 73 |         Variable scope to use.
 74 |     Returns
 75 |     -------
 76 |     x : Tensor
 77 |         Output of the nonlinearity.
 78 |     """
 79 |     with tf.variable_scope(name):
 80 |         f1 = 0.5 * (1 + leak)
 81 |         f2 = 0.5 * (1 - leak)
 82 |         return f1 * x + f2 * abs(x)
 83 | 
 84 | 
 85 | def linear(x, n_units, scope=None, stddev=0.02,
 86 |            activation=lambda x: x):
 87 |     """Fully-connected network.
 88 |     Parameters
 89 |     ----------
 90 |     x : Tensor
 91 |         Input tensor to the network.
 92 |     n_units : int
 93 |         Number of units to connect to.
 94 |     scope : str, optional
 95 |         Variable scope to use.
 96 |     stddev : float, optional
 97 |         Initialization's standard deviation.
 98 |     activation : arguments, optional
 99 |         Function which applies a nonlinearity
100 |     Returns
101 |     -------
102 |     x : Tensor
103 |         Fully-connected output.
104 |     """
105 |     shape = x.get_shape().as_list()
106 | 
107 |     with tf.variable_scope(scope or "Linear"):
108 |         matrix = tf.get_variable("Matrix", [shape[1], n_units], tf.float32,
109 |                                  tf.random_normal_initializer(stddev=stddev))
110 |         return activation(tf.matmul(x, matrix))
111 | 
112 | 
113 | def conv2d(x, n_filters,
114 |            k_h=5, k_w=5,
115 |            stride_h=2, stride_w=2,
116 |            stddev=0.02,
117 |            activation=None,
118 |            bias=True,
119 |            padding='SAME',
120 |            name="Conv2D"):
121 |     """2D Convolution with options for kernel size, stride, and init deviation.
122 | 
123 |     Parameters
124 |     ----------
125 |     x : Tensor
126 |         Input tensor to convolve.
127 |     n_filters : int
128 |         Number of filters to apply.
129 |     k_h : int, optional
130 |         Kernel height.
131 |     k_w : int, optional
132 |         Kernel width.
133 |     stride_h : int, optional
134 |         Stride in rows.
135 |     stride_w : int, optional
136 |         Stride in cols.
137 |     stddev : float, optional
138 |         Initialization's standard deviation.
139 |     activation : arguments, optional
140 |         Function which applies a nonlinearity
141 |     padding : str, optional
142 |         'SAME' or 'VALID'
143 |     name : str, optional
144 |         Variable scope to use.
145 | 
146 |     Returns
147 |     -------
148 |     x : Tensor
149 |         Convolved input.
150 |     """
151 |     with tf.variable_scope(name):
152 |         w = tf.get_variable(
153 |             'w', [k_h, k_w, x.get_shape()[-1], n_filters],
154 |             initializer=tf.truncated_normal_initializer(stddev=stddev))
155 |         conv = tf.nn.conv2d(
156 |             x, w, strides=[1, stride_h, stride_w, 1], padding=padding)
157 |         if bias:
158 |             b = tf.get_variable(
159 |                 'b', [n_filters],
160 |                 initializer=tf.truncated_normal_initializer(stddev=stddev))
161 |             conv = tf.nn.bias_add(conv, b)
162 |         if activation:
163 |             conv = activation(conv)
164 |         return conv
165 | 


--------------------------------------------------------------------------------
/libs/dataset_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle
  3 | import numpy as np
  4 | 
  5 | 
  6 | def cifar10_download(dst='cifar10'):
  7 |     from six.moves import urllib
  8 |     import tarfile
  9 |     if not os.path.exists(dst):
 10 |         os.makedirs(dst)
 11 |     path = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
 12 |     filepath, _ = urllib.request.urlretrieve(path, './')
 13 |     tarfile.open(filepath, 'r:gz').extractall(dst)
 14 | 
 15 | 
 16 | def cifar10_load(dst='cifar10'):
 17 |     if not os.path.exists(dst):
 18 |         cifar10_download(dst)
 19 |     Xs = None
 20 |     ys = None
 21 |     for f in range(1, 6):
 22 |         cf = pickle.load(open(
 23 |             '%s/data_batch_%d' % (dst, f), 'rb'),
 24 |             encoding='LATIN')
 25 |         if Xs is not None:
 26 |             Xs = np.r_[Xs, cf['data']]
 27 |             ys = np.r_[ys, np.array(cf['labels'])]
 28 |         else:
 29 |             Xs = cf['data']
 30 |             ys = cf['labels']
 31 |     return Xs, ys
 32 | 
 33 | 
 34 | def dense_to_one_hot(labels, n_classes=2):
 35 |     """Convert class labels from scalars to one-hot vectors."""
 36 |     labels = np.array(labels)
 37 |     n_labels = labels.shape[0]
 38 |     index_offset = np.arange(n_labels) * n_classes
 39 |     labels_one_hot = np.zeros((n_labels, n_classes), dtype=np.float32)
 40 |     labels_one_hot.flat[index_offset + labels.ravel()] = 1
 41 |     return labels_one_hot
 42 | 
 43 | 
 44 | class DatasetSplit(object):
 45 |     def __init__(self, images, labels):
 46 |         self.images = np.array(images).astype(np.float32)
 47 |         self.labels = np.array(labels).astype(np.int32)
 48 |         self.n_labels = len(np.unique(labels))
 49 |         self.num_examples = len(self.images)
 50 | 
 51 |     def next_batch(self, batch_size=100):
 52 |         # Shuffle each epoch
 53 |         current_permutation = np.random.permutation(range(len(self.images)))
 54 |         epoch_images = self.images[current_permutation, ...]
 55 |         epoch_labels = dense_to_one_hot(
 56 |             self.labels[current_permutation, ...], self.n_labels)
 57 | 
 58 |         # Then iterate over the epoch
 59 |         self.current_batch_idx = 0
 60 |         while self.current_batch_idx < len(self.images):
 61 |             end_idx = min(
 62 |                 self.current_batch_idx + batch_size, len(self.images))
 63 |             this_batch = {
 64 |                 'images': epoch_images[self.current_batch_idx:end_idx],
 65 |                 'labels': epoch_labels[self.current_batch_idx:end_idx]
 66 |             }
 67 |             self.current_batch_idx += batch_size
 68 |             yield this_batch['images'], this_batch['labels']
 69 | 
 70 | 
 71 | class Dataset(object):
 72 |     def __init__(self, Xs, ys, split=[0.8, 0.1, 0.1]):
 73 | 
 74 |         self.all_idxs = []
 75 |         self.all_labels = []
 76 |         self.all_inputs = []
 77 |         self.train_idxs = []
 78 |         self.valid_idxs = []
 79 |         self.test_idxs = []
 80 |         self.n_labels = 0
 81 |         self.split = split
 82 | 
 83 |         # Now mix all the labels that are currently stored as blocks
 84 |         self.all_inputs = Xs
 85 |         self.all_labels = ys
 86 |         n_idxs = len(self.all_inputs)
 87 |         idxs = range(n_idxs)
 88 |         rand_idxs = np.random.permutation(idxs)
 89 |         self.all_inputs = self.all_inputs[rand_idxs, ...]
 90 |         self.all_labels = self.all_labels[rand_idxs, ...]
 91 | 
 92 |         # Get splits
 93 |         self.train_idxs = idxs[:round(split[0] * n_idxs)]
 94 |         self.valid_idxs = idxs[len(self.train_idxs):
 95 |                                len(self.train_idxs) + round(split[1] * n_idxs)]
 96 |         self.test_idxs = idxs[len(self.valid_idxs):
 97 |                               len(self.valid_idxs) + round(split[2] * n_idxs)]
 98 | 
 99 |     @property
100 |     def train(self):
101 |         inputs = self.all_inputs[self.train_idxs, ...]
102 |         labels = self.all_labels[self.train_idxs, ...]
103 |         return DatasetSplit(inputs, labels)
104 | 
105 |     @property
106 |     def valid(self):
107 |         inputs = self.all_inputs[self.valid_idxs, ...]
108 |         labels = self.all_labels[self.valid_idxs, ...]
109 |         return DatasetSplit(inputs, labels)
110 | 
111 |     @property
112 |     def test(self):
113 |         inputs = self.all_inputs[self.test_idxs, ...]
114 |         labels = self.all_labels[self.test_idxs, ...]
115 |         return DatasetSplit(inputs, labels)
116 | 
117 |     def mean(self):
118 |         return np.mean(self.all_inputs, axis=0)
119 | 
120 |     def std(self):
121 |         return np.std(self.all_inputs, axis=0)
122 | 


--------------------------------------------------------------------------------
/libs/datasets.py:
--------------------------------------------------------------------------------
 1 | """Loading datasets.
 2 | 
 3 | Parag K. Mital, Jan. 2016
 4 | """
 5 | import tensorflow.examples.tutorials.mnist.input_data as input_data
 6 | from .dataset_utils import *
 7 | 
 8 | 
 9 | def MNIST(one_hot=True):
10 |     """Returns the MNIST dataset.
11 | 
12 |     Returns
13 |     -------
14 |     mnist : DataSet
15 |         DataSet object w/ convenienve props for accessing
16 |         train/validation/test sets and batches.
17 |     """
18 |     return input_data.read_data_sets('MNIST_data/', one_hot=one_hot)
19 | 
20 | 
21 | def CIFAR10():
22 |     # plt.imshow(np.transpose(np.reshape(cifar.train.images[10], (3, 32, 32)), [1, 2, 0]))
23 |     Xs, ys = cifar10_load()
24 |     return Dataset(Xs, ys)
25 | 


--------------------------------------------------------------------------------
/libs/utils.py:
--------------------------------------------------------------------------------
  1 | """Some useful utilities when dealing with neural nets w/ tensorflow.
  2 | 
  3 | Parag K. Mital, Jan. 2016
  4 | """
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | 
  8 | 
  9 | def montage_batch(images):
 10 |     """Draws all filters (n_input * n_output filters) as a
 11 |     montage image separated by 1 pixel borders.
 12 | 
 13 |     Parameters
 14 |     ----------
 15 |     batch : numpy.ndarray
 16 |         Input array to create montage of.
 17 | 
 18 |     Returns
 19 |     -------
 20 |     m : numpy.ndarray
 21 |         Montage image.
 22 |     """
 23 |     img_h = images.shape[1]
 24 |     img_w = images.shape[2]
 25 |     n_plots = int(np.ceil(np.sqrt(images.shape[0])))
 26 |     m = np.ones(
 27 |         (images.shape[1] * n_plots + n_plots + 1,
 28 |          images.shape[2] * n_plots + n_plots + 1, 3)) * 0.5
 29 | 
 30 |     for i in range(n_plots):
 31 |         for j in range(n_plots):
 32 |             this_filter = i * n_plots + j
 33 |             if this_filter < images.shape[0]:
 34 |                 this_img = images[this_filter, ...]
 35 |                 m[1 + i + i * img_h:1 + i + (i + 1) * img_h,
 36 |                   1 + j + j * img_w:1 + j + (j + 1) * img_w, :] = this_img
 37 |     return m
 38 | 
 39 | 
 40 | # %%
 41 | def montage(W):
 42 |     """Draws all filters (n_input * n_output filters) as a
 43 |     montage image separated by 1 pixel borders.
 44 | 
 45 |     Parameters
 46 |     ----------
 47 |     W : numpy.ndarray
 48 |         Input array to create montage of.
 49 | 
 50 |     Returns
 51 |     -------
 52 |     m : numpy.ndarray
 53 |         Montage image.
 54 |     """
 55 |     W = np.reshape(W, [W.shape[0], W.shape[1], 1, W.shape[2] * W.shape[3]])
 56 |     n_plots = int(np.ceil(np.sqrt(W.shape[-1])))
 57 |     m = np.ones(
 58 |         (W.shape[0] * n_plots + n_plots + 1,
 59 |          W.shape[1] * n_plots + n_plots + 1)) * 0.5
 60 |     for i in range(n_plots):
 61 |         for j in range(n_plots):
 62 |             this_filter = i * n_plots + j
 63 |             if this_filter < W.shape[-1]:
 64 |                 m[1 + i + i * W.shape[0]:1 + i + (i + 1) * W.shape[0],
 65 |                   1 + j + j * W.shape[1]:1 + j + (j + 1) * W.shape[1]] = (
 66 |                     np.squeeze(W[:, :, :, this_filter]))
 67 |     return m
 68 | 
 69 | 
 70 | 
 71 | 
 72 | # %%
 73 | def corrupt(x):
 74 |     """Take an input tensor and add uniform masking.
 75 | 
 76 |     Parameters
 77 |     ----------
 78 |     x : Tensor/Placeholder
 79 |         Input to corrupt.
 80 | 
 81 |     Returns
 82 |     -------
 83 |     x_corrupted : Tensor
 84 |         50 pct of values corrupted.
 85 |     """
 86 |     return tf.multiply(x, tf.cast(tf.random_uniform(shape=tf.shape(x),
 87 |                                                minval=0,
 88 |                                                maxval=2,
 89 |                                                dtype=tf.int32), tf.float32))
 90 | 
 91 | 
 92 | # %%
 93 | def weight_variable(shape):
 94 |     '''Helper function to create a weight variable initialized with
 95 |     a normal distribution
 96 | 
 97 |     Parameters
 98 |     ----------
 99 |     shape : list
100 |         Size of weight variable
101 |     '''
102 |     initial = tf.random_normal(shape, mean=0.0, stddev=0.01)
103 |     return tf.Variable(initial)
104 | 
105 | 
106 | # %%
107 | def bias_variable(shape):
108 |     '''Helper function to create a bias variable initialized with
109 |     a constant value.
110 | 
111 |     Parameters
112 |     ----------
113 |     shape : list
114 |         Size of weight variable
115 |     '''
116 |     initial = tf.random_normal(shape, mean=0.0, stddev=0.01)
117 |     return tf.Variable(initial)
118 | 


--------------------------------------------------------------------------------
/mnist/mnist.pkl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wiibrew/DeepLearningCourseCodes/6b20c12415893f270b30c3cba640732c090b49ba/mnist/mnist.pkl.gz


--------------------------------------------------------------------------------
/mnist/t10k-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wiibrew/DeepLearningCourseCodes/6b20c12415893f270b30c3cba640732c090b49ba/mnist/t10k-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/mnist/t10k-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wiibrew/DeepLearningCourseCodes/6b20c12415893f270b30c3cba640732c090b49ba/mnist/t10k-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------
/mnist/train-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wiibrew/DeepLearningCourseCodes/6b20c12415893f270b30c3cba640732c090b49ba/mnist/train-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/mnist/train-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wiibrew/DeepLearningCourseCodes/6b20c12415893f270b30c3cba640732c090b49ba/mnist/train-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------
/tf_1_try.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "#\n",
 12 |     "import tensorflow as tf"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 4,
 18 |    "metadata": {
 19 |     "collapsed": false
 20 |    },
 21 |    "outputs": [
 22 |     {
 23 |      "name": "stdout",
 24 |      "output_type": "stream",
 25 |      "text": [
 26 |       "5\n",
 27 |       "6\n"
 28 |      ]
 29 |     }
 30 |    ],
 31 |    "source": [
 32 |     "# direct sum with constand value\n",
 33 |     "a = tf.constant(2)\n",
 34 |     "b = tf.constant(3)\n",
 35 |     "c=a+b\n",
 36 |     "d=a*b\n",
 37 |     "\n",
 38 |     "sess=tf.Session()\n",
 39 |     "print sess.run(c)\n",
 40 |     "print sess.run(d)"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 5,
 46 |    "metadata": {
 47 |     "collapsed": false
 48 |    },
 49 |    "outputs": [
 50 |     {
 51 |      "name": "stdout",
 52 |      "output_type": "stream",
 53 |      "text": [
 54 |       "5\n",
 55 |       "6\n"
 56 |      ]
 57 |     }
 58 |    ],
 59 |    "source": [
 60 |     "# \n",
 61 |     "a = tf.placeholder(tf.int16)\n",
 62 |     "b = tf.placeholder(tf.int16)\n",
 63 |     "\n",
 64 |     "# \n",
 65 |     "add = tf.add(a, b)\n",
 66 |     "mul = tf.multiply(a, b)\n",
 67 |     "print sess.run(add, feed_dict={a: 2, b: 3})\n",
 68 |     "print sess.run(mul, feed_dict={a: 2, b: 3})"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 6,
 74 |    "metadata": {
 75 |     "collapsed": false
 76 |    },
 77 |    "outputs": [
 78 |     {
 79 |      "name": "stdout",
 80 |      "output_type": "stream",
 81 |      "text": [
 82 |       "[[ 6.  6.]\n",
 83 |       " [ 6.  6.]]\n"
 84 |      ]
 85 |     }
 86 |    ],
 87 |    "source": [
 88 |     "#\n",
 89 |     "matrix1 = tf.constant([[3., 3.]])\n",
 90 |     "matrix2 = tf.constant([[2.],[2.]])\n",
 91 |     "product = tf.matmul(matrix2, matrix1)\n",
 92 |     "print sess.run(product)"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 7,
 98 |    "metadata": {
 99 |     "collapsed": false
100 |    },
101 |    "outputs": [
102 |     {
103 |      "name": "stdout",
104 |      "output_type": "stream",
105 |      "text": [
106 |       "[[  6.   8.  10.]\n",
107 |       " [  7.   6.   5.]\n",
108 |       " [  5.  10.  15.]]\n"
109 |      ]
110 |     }
111 |    ],
112 |    "source": [
113 |     "mat1=tf.Variable(tf.random_normal([3,2]))\n",
114 |     "mat2=tf.Variable(tf.random_normal([2,3]))\n",
115 |     "product=tf.matmul(mat1,mat2)\n",
116 |     "\n",
117 |     "m1=[[1,3],[2,1],[0,5]]\n",
118 |     "m2=[[3,2,1],[1,2,3]]\n",
119 |     "\n",
120 |     "print sess.run(product,feed_dict={mat1:m1,mat2:m2})"
121 |    ]
122 |   }
123 |  ],
124 |  "metadata": {
125 |   "kernelspec": {
126 |    "display_name": "Python 2",
127 |    "language": "python",
128 |    "name": "python2"
129 |   },
130 |   "language_info": {
131 |    "codemirror_mode": {
132 |     "name": "ipython",
133 |     "version": 2
134 |    },
135 |    "file_extension": ".py",
136 |    "mimetype": "text/x-python",
137 |    "name": "python",
138 |    "nbconvert_exporter": "python",
139 |    "pygments_lexer": "ipython2",
140 |    "version": "2.7.12"
141 |   }
142 |  },
143 |  "nbformat": 4,
144 |  "nbformat_minor": 2
145 | }
146 | 


--------------------------------------------------------------------------------