├── faus_frontpage.png
├── tfd
    ├── data_paths.py
    ├── model.py
    ├── tfd_single_checkpoint_evaluator.py
    ├── tfd_checkpoint_checker.py
    ├── cnn
    │   └── train.py
    ├── cnn_d
    │   └── train.py
    ├── cnn_a
    │   └── train.py
    ├── cnn_ad
    │   └── train.py
    └── README.md
├── ck_plus
    ├── data_paths.py
    ├── data_fold_loader.py
    ├── model.py
    ├── ck_plus_single_checkpoint_evaluator.py
    ├── ck_plus_checkpoint_checker.py
    ├── cnn
    │   └── train.py
    ├── cnn_d
    │   └── train.py
    ├── cnn_a
    │   └── train.py
    ├── cnn_ad
    │   └── train.py
    └── README.md
├── ck_plus_six_class
    ├── data_paths.py
    ├── data_fold_loader.py
    ├── model.py
    ├── README.md
    ├── ck_plus_single_checkpoint_evaluator.py
    ├── cnn_ad
    │   └── train.py
    └── ck_plus_checkpoint_checker.py
├── .gitignore
├── LICENSE
├── README.md
└── data_scripts
    ├── README.md
    ├── make_tfd_dataset.py
    └── make_ck_plus_dataset.py


/faus_frontpage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ifp-uiuc/do-neural-networks-learn-faus-iccvw-2015/HEAD/faus_frontpage.png


--------------------------------------------------------------------------------
/tfd/data_paths.py:
--------------------------------------------------------------------------------
1 | tfd_data_path = '/experiments/do-neural-networks-learn-faus-iccvw-2015/\
2 | data_scripts/TFD_HERE/'
3 | 


--------------------------------------------------------------------------------
/ck_plus/data_paths.py:
--------------------------------------------------------------------------------
1 | ck_plus_data_path = '/experiments/do-neural-networks-learn-faus-iccvw-2015/\
2 | data_scripts/CK_PLUS_HERE/npy_files/'
3 | 


--------------------------------------------------------------------------------
/ck_plus_six_class/data_paths.py:
--------------------------------------------------------------------------------
1 | ck_plus_data_path = '/experiments/do-neural-networks-learn-faus-iccvw-2015/\
2 | data_scripts/CK_PLUS_HERE/npy_files/'
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Binary Files
 2 | *.npy
 3 | *.pkl
 4 | *.pickle
 5 | 
 6 | # Byte-compiled / optimized / DLL files
 7 | __pycache__/
 8 | *.py[cod]
 9 | 
10 | # C extensions
11 | *.so
12 | 
13 | # Distribution / packaging
14 | .Python
15 | env/
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | 
30 | # PyInstaller
31 | #  Usually these files are written by a python script from a template
32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 | 
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 | 
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .coverage
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | 
55 | # Sphinx documentation
56 | docs/_build/
57 | 
58 | # PyBuilder
59 | target/
60 | 
61 | # Other
62 | pid*
63 | log*
64 | *.txt
65 | *.jpeg
66 | *.png
67 | 


--------------------------------------------------------------------------------
/ck_plus/data_fold_loader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy
 3 | 
 4 | 
 5 | def load_folds(dataset_path, fold_nums):
 6 |     X = numpy.load(os.path.join(dataset_path, 'X.npy'))
 7 |     y = numpy.load(os.path.join(dataset_path, 'y.npy'))
 8 |     folds = numpy.load(os.path.join(dataset_path, 'folds.npy'))
 9 | 
10 |     #assert fold <= folds.max(), \
11 |     #    'Fold number exceeds available number of folds. Please try again.'
12 | 
13 |     X_all = []
14 |     y_all = []
15 |     for fold in fold_nums:
16 |         mask = (folds == fold)
17 | 
18 | 	print 'Fold %d' % fold
19 | 	X_fold = X[mask, :, :, :]
20 | 	y_fold = y[mask]
21 | 	print X_fold.shape, y_fold.shape
22 | 
23 |         X_all.append(X_fold)
24 |         y_all.append(y_fold)
25 | 
26 |     X_all = numpy.concatenate(X_all, axis=0)
27 |     y_all = numpy.concatenate(y_all, axis=0)
28 |     print 'X_all shape: ', X_all.shape
29 |     print 'y_all shape: ', y_all.shape
30 | 
31 |     return X_all, y_all
32 | 
33 | 
34 | def load_fold_assignment(test_fold):
35 |     val_fold = (test_fold + 1) % 10
36 |     train_fold = list(set(range(0, 10)) - set([test_fold]) - set([val_fold]))
37 | 
38 |     print 'Test_fold: ', test_fold
39 |     print 'Val_fold: ', val_fold
40 |     print 'Train_folds: ', train_fold
41 | 
42 |     return train_fold, val_fold, test_fold
43 | 


--------------------------------------------------------------------------------
/ck_plus_six_class/data_fold_loader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy
 3 | 
 4 | 
 5 | def load_folds(dataset_path, fold_nums):
 6 |     X = numpy.load(os.path.join(dataset_path, 'X.npy'))
 7 |     y = numpy.load(os.path.join(dataset_path, 'y.npy'))
 8 |     folds = numpy.load(os.path.join(dataset_path, 'folds.npy'))
 9 | 
10 |     #assert fold <= folds.max(), \
11 |     #    'Fold number exceeds available number of folds. Please try again.'
12 | 
13 |     X_all = []
14 |     y_all = []
15 |     for fold in fold_nums:
16 |         mask = (folds == fold)
17 | 
18 | 	print 'Fold %d' % fold
19 | 	X_fold = X[mask, :, :, :]
20 | 	y_fold = y[mask]
21 | 	print X_fold.shape, y_fold.shape
22 | 
23 |         X_all.append(X_fold)
24 |         y_all.append(y_fold)
25 | 
26 |     X_all = numpy.concatenate(X_all, axis=0)
27 |     y_all = numpy.concatenate(y_all, axis=0)
28 |     print 'X_all shape: ', X_all.shape
29 |     print 'y_all shape: ', y_all.shape
30 | 
31 |     return X_all, y_all
32 | 
33 | 
34 | def load_fold_assignment(test_fold):
35 |     val_fold = (test_fold + 1) % 10
36 |     train_fold = list(set(range(0, 10)) - set([test_fold]) - set([val_fold]))
37 | 
38 |     print 'Test_fold: ', test_fold
39 |     print 'Val_fold: ', val_fold
40 |     print 'Train_folds: ', train_fold
41 | 
42 |     return train_fold, val_fold, test_fold
43 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015, Image Formation and Processing at UIUC
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | * Neither the name of do-neural-networks-learn-faus-iccvw-2015 nor the names of its
15 |   contributors may be used to endorse or promote products derived from
16 |   this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 
29 | 


--------------------------------------------------------------------------------
/tfd/model.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | from anna.layers import layers
 4 | import anna.models
 5 | 
 6 | 
 7 | class SupervisedModel(anna.models.SupervisedModel):
 8 |     batch = 64
 9 |     input = layers.Input2DLayer(batch, 1, 96, 96)
10 | 
11 |     k = float(numpy.random.rand()*1+0.2)
12 |     print '## k = %.3f' % k
13 |     winit1 = k/numpy.sqrt(5*5*1)
14 |     winit2 = k/numpy.sqrt(5*5*64)
15 |     winit3 = k/numpy.sqrt(5*5*128)
16 | 
17 |     def trec(x):
18 |         return x*(x > 0.0)
19 | 
20 |     nonlinearity = trec
21 | 
22 |     conv1 = layers.Conv2DLayer(
23 |         input,
24 |         n_features=64,
25 |         filter_size=5,
26 |         weights_std=winit1,
27 |         pad=(2, 2))
28 |     relu1 = layers.NonlinearityLayer(
29 |         input=conv1,
30 |         nonlinearity=nonlinearity)
31 |     pool1 = layers.Pool2DLayer(
32 |         input=relu1,
33 |         filter_size=2,
34 |         stride=(2, 2))
35 |     conv2 = layers.Conv2DLayer(
36 |         input=pool1,
37 |         n_features=128,
38 |         filter_size=5,
39 |         weights_std=winit2,
40 |         pad=(2, 2))
41 |     relu2 = layers.NonlinearityLayer(
42 |         input=conv2,
43 |         nonlinearity=nonlinearity)
44 |     pool2 = layers.Pool2DLayer(
45 |         input=relu2,
46 |         filter_size=2,
47 |         stride=(2, 2))
48 |     conv3 = layers.Conv2DLayer(
49 |         input=pool2,
50 |         n_features=256,
51 |         filter_size=5,
52 |         weights_std=winit3,
53 |         pad=(2, 2))
54 |     relu3 = layers.NonlinearityLayer(
55 |         input=conv3,
56 |         nonlinearity=nonlinearity)
57 |     pool3 = layers.Pool2DLayer(
58 |         input=relu3,
59 |         filter_size=12,
60 |         stride=(12, 12))
61 | 
62 |     winitD1 = k/numpy.sqrt(numpy.prod(pool3.get_output_shape()))
63 |     winitD2 = k/numpy.sqrt(300)
64 | 
65 |     fc4 = layers.DenseLayer(
66 |         input_layer=pool3,
67 |         n_outputs=300,
68 |         weights_std=winitD1,
69 |         init_bias_value=1.0,
70 |         nonlinearity=layers.rectify,
71 |         dropout=0.0)
72 |     output = layers.DenseLayer(
73 |         input_layer=fc4,
74 |         n_outputs=7,
75 |         weights_std=winitD2,
76 |         init_bias_value=0.0,
77 |         nonlinearity=layers.softmax)
78 | 


--------------------------------------------------------------------------------
/ck_plus/model.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | from anna.layers import layers
 4 | import anna.models
 5 | 
 6 | 
 7 | class SupervisedModel(anna.models.SupervisedModel):
 8 |     batch = 64
 9 |     input = layers.Input2DLayer(batch, 1, 96, 96)
10 | 
11 |     k = float(numpy.random.rand()*1+0.2)
12 |     print '## k = %.3f' % k
13 |     winit1 = k/numpy.sqrt(5*5*1)
14 |     winit2 = k/numpy.sqrt(5*5*64)
15 |     winit3 = k/numpy.sqrt(5*5*128)
16 | 
17 |     def trec(x):
18 |         return x*(x > 0.0)
19 | 
20 |     nonlinearity = trec
21 | 
22 |     conv1 = layers.Conv2DLayer(
23 |         input,
24 |         n_features=64,
25 |         filter_size=5,
26 |         weights_std=winit1,
27 |         pad=(2, 2))
28 |     relu1 = layers.NonlinearityLayer(
29 |         input=conv1,
30 |         nonlinearity=nonlinearity)
31 |     pool1 = layers.Pool2DLayer(
32 |         input=relu1,
33 |         filter_size=2,
34 |         stride=(2, 2))
35 |     conv2 = layers.Conv2DLayer(
36 |         input=pool1,
37 |         n_features=128,
38 |         filter_size=5,
39 |         weights_std=winit2,
40 |         pad=(2, 2))
41 |     relu2 = layers.NonlinearityLayer(
42 |         input=conv2,
43 |         nonlinearity=nonlinearity)
44 |     pool2 = layers.Pool2DLayer(
45 |         input=relu2,
46 |         filter_size=2,
47 |         stride=(2, 2))
48 |     conv3 = layers.Conv2DLayer(
49 |         input=pool2,
50 |         n_features=256,
51 |         filter_size=5,
52 |         weights_std=winit3,
53 |         pad=(2, 2))
54 |     relu3 = layers.NonlinearityLayer(
55 |         input=conv3,
56 |         nonlinearity=nonlinearity)
57 |     pool3 = layers.Pool2DLayer(
58 |         input=relu3,
59 |         filter_size=12,
60 |         stride=(12, 12))
61 | 
62 |     winitD1 = k/numpy.sqrt(numpy.prod(pool3.get_output_shape()))
63 |     winitD2 = k/numpy.sqrt(300)
64 | 
65 |     fc4 = layers.DenseLayer(
66 |         input_layer=pool3,
67 |         n_outputs=300,
68 |         weights_std=winitD1,
69 |         init_bias_value=1.0,
70 |         nonlinearity=layers.rectify,
71 |         dropout=0.0)
72 |     output = layers.DenseLayer(
73 |         input_layer=fc4,
74 |         n_outputs=8,
75 |         weights_std=winitD2,
76 |         init_bias_value=0.0,
77 |         nonlinearity=layers.softmax)
78 | 


--------------------------------------------------------------------------------
/ck_plus_six_class/model.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | from anna.layers import layers
 4 | import anna.models
 5 | 
 6 | 
 7 | class SupervisedModel(anna.models.SupervisedModel):
 8 |     batch = 64
 9 |     input = layers.Input2DLayer(batch, 1, 96, 96)
10 | 
11 |     k = float(numpy.random.rand()*1+0.2)
12 |     print '## k = %.3f' % k
13 |     winit1 = k/numpy.sqrt(5*5*1)
14 |     winit2 = k/numpy.sqrt(5*5*64)
15 |     winit3 = k/numpy.sqrt(5*5*128)
16 | 
17 |     def trec(x):
18 |         return x*(x > 0.0)
19 | 
20 |     nonlinearity = trec
21 | 
22 |     conv1 = layers.Conv2DLayer(
23 |         input,
24 |         n_features=64,
25 |         filter_size=5,
26 |         weights_std=winit1,
27 |         pad=(2, 2))
28 |     relu1 = layers.NonlinearityLayer(
29 |         input=conv1,
30 |         nonlinearity=nonlinearity)
31 |     pool1 = layers.Pool2DLayer(
32 |         input=relu1,
33 |         filter_size=2,
34 |         stride=(2, 2))
35 |     conv2 = layers.Conv2DLayer(
36 |         input=pool1,
37 |         n_features=128,
38 |         filter_size=5,
39 |         weights_std=winit2,
40 |         pad=(2, 2))
41 |     relu2 = layers.NonlinearityLayer(
42 |         input=conv2,
43 |         nonlinearity=nonlinearity)
44 |     pool2 = layers.Pool2DLayer(
45 |         input=relu2,
46 |         filter_size=2,
47 |         stride=(2, 2))
48 |     conv3 = layers.Conv2DLayer(
49 |         input=pool2,
50 |         n_features=256,
51 |         filter_size=5,
52 |         weights_std=winit3,
53 |         pad=(2, 2))
54 |     relu3 = layers.NonlinearityLayer(
55 |         input=conv3,
56 |         nonlinearity=nonlinearity)
57 |     pool3 = layers.Pool2DLayer(
58 |         input=relu3,
59 |         filter_size=12,
60 |         stride=(12, 12))
61 | 
62 |     winitD1 = k/numpy.sqrt(numpy.prod(pool3.get_output_shape()))
63 |     winitD2 = k/numpy.sqrt(300)
64 | 
65 |     fc4 = layers.DenseLayer(
66 |         input_layer=pool3,
67 |         n_outputs=300,
68 |         weights_std=winitD1,
69 |         init_bias_value=1.0,
70 |         nonlinearity=layers.rectify,
71 |         dropout=0.0)
72 |     output = layers.DenseLayer(
73 |         input_layer=fc4,
74 |         n_outputs=6,
75 |         weights_std=winitD2,
76 |         init_bias_value=0.0,
77 |         nonlinearity=layers.softmax)
78 | 


--------------------------------------------------------------------------------
/tfd/tfd_single_checkpoint_evaluator.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import glob
 3 | import os
 4 | import sys
 5 | 
 6 | import numpy
 7 | 
 8 | from anna.datasets.supervised_data_loader import SupervisedDataLoader
 9 | from anna import util
10 | 
11 | import data_paths
12 | from model import SupervisedModel
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     parser = argparse.ArgumentParser(
17 |         prog='tfd_single_checkpoint_evaluator',
18 |         description='Script to evaluate single checkpoint on TFD.')
19 |     parser.add_argument("-s", "--split", default='0',
20 |                         help='Training split of TFD to use. (0-4)')
21 |     parser.add_argument("--which_set", choices=['train', 'val', 'test'],
22 |                         help='Which dataset to use (train, val, test)')
23 |     parser.add_argument("checkpoint_file",
24 |                         help='Path to single model checkpoint (.pkl) file.')
25 |     args = parser.parse_args()
26 | 
27 |     checkpoint_file = args.checkpoint_file
28 |     fold = int(args.split)
29 |     dataset_path = os.path.join(data_paths.tfd_data_path, 'npy_files/TFD_96/split_'+str(fold))
30 | 
31 |     if args.which_set == 'train':
32 |         set_num = 0
33 |     elif args.which_set == 'val':
34 |         set_num = 1
35 |     else:
36 |         set_num = 2
37 | 
38 |     print 'Checkpoint: %s' % checkpoint_file
39 |     print 'Evaluating on split %d' % fold
40 |     print 'Using %s set\n' % args.which_set    
41 | 
42 |     # Load model
43 |     model = SupervisedModel('evaluation', './')
44 | 
45 |     # Load dataset
46 |     supervised_data_loader = SupervisedDataLoader(dataset_path)
47 |     data_container = supervised_data_loader.load(set_num)
48 |     data_container.X = numpy.float32(data_container.X)
49 |     data_container.X /= 255.0
50 |     data_container.X *= 2.0
51 |     print data_container.X.shape
52 | 
53 |     # Construct evaluator
54 |     preprocessor = [util.Normer3(filter_size=5, num_channels=1)]
55 | 
56 |     evaluator = util.Evaluator(model, data_container,
57 |                                checkpoint_file, preprocessor)
58 | 
59 |     # For the inputted checkpoint, compute the overall accuracy
60 |     accuracies = []
61 |     print 'Checkpoint: %s' % os.path.split(checkpoint_file)[1]
62 |     evaluator.set_checkpoint(checkpoint_file)
63 |     accuracy = evaluator.run()
64 |     print 'Accuracy: %f\n' % accuracy
65 |     accuracies.append(accuracy)
66 | 


--------------------------------------------------------------------------------
/tfd/tfd_checkpoint_checker.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import glob
 3 | import os
 4 | import sys
 5 | 
 6 | import numpy
 7 | 
 8 | from anna.datasets.supervised_data_loader import SupervisedDataLoader
 9 | from anna import util
10 | 
11 | import data_paths
12 | from model import SupervisedModel
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     parser = argparse.ArgumentParser(
17 |         prog='tfd_plus_checkpoint_checker',
18 |         description='Script to select best performing checkpoint on TFD.')
19 |     parser.add_argument("-s", "--split", default='0',
20 |                         help='Training split of TFD to use. (0-4)')
21 |     parser.add_argument("checkpoint_dir",
22 |                         help='Folder containing all .pkl checkpoint files.')
23 |     args = parser.parse_args()
24 | 
25 |     checkpoint_dir = args.checkpoint_dir
26 |     fold = int(args.split)
27 |     dataset_path = os.path.join(data_paths.tfd_data_path, 'npy_files/TFD_96/split_'+str(fold))
28 | 
29 |     print 'Checkpoint directory: %s' % checkpoint_dir
30 |     print 'Testing on split %d\n' % fold
31 | 
32 |     # Load model
33 |     model = SupervisedModel('evaluation', './')
34 | 
35 |     # Load data
36 |     supervised_data_loader = SupervisedDataLoader(dataset_path)
37 |     val_data_container = supervised_data_loader.load(1)
38 |     val_data_container.X = numpy.float32(val_data_container.X)
39 |     val_data_container.X /= 255.0
40 |     val_data_container.X *= 2.0
41 | 
42 |     # Construct evaluator
43 |     preprocessor = [util.Normer3(filter_size=5, num_channels=1)]
44 | 
45 |     checkpoint_file_list = sorted(
46 |         glob.glob(os.path.join(checkpoint_dir, '*.pkl')))
47 |     evaluator = util.Evaluator(model, val_data_container,
48 |                                checkpoint_file_list[0], preprocessor)
49 | 
50 |     # For each checkpoint, compute the overall val accuracy
51 |     accuracies = []
52 |     for checkpoint in checkpoint_file_list:
53 |         print 'Checkpoint: %s' % os.path.split(checkpoint)[1]
54 |         evaluator.set_checkpoint(checkpoint)
55 |         accuracy = evaluator.run()
56 |         print 'Accuracy: %f\n' % accuracy
57 |         accuracies.append(accuracy)
58 | 
59 |     # Find checkpoint that produced the highest accuracy
60 |     max_accuracy = numpy.max(accuracies)
61 |     max_index = numpy.argmax(accuracies)
62 |     max_checkpoint = checkpoint_file_list[max_index]
63 |     print 'Max Checkpoint: %s' % max_checkpoint
64 |     print 'Max Accuracy: %f' % max_accuracy
65 | 


--------------------------------------------------------------------------------
/ck_plus/ck_plus_single_checkpoint_evaluator.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import glob
 3 | import os
 4 | import sys
 5 | 
 6 | import numpy
 7 | 
 8 | from anna.datasets.supervised_data_loader import SupervisedDataContainer
 9 | from anna import util
10 | 
11 | import data_fold_loader
12 | import data_paths
13 | from model import SupervisedModel
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     parser = argparse.ArgumentParser(
18 |         prog='ck_plus_single_checkpoint_evaluator',
19 |         description='Script to evaluate the performance of a checkpoint \
20 |                      on CK+.')
21 |     parser.add_argument("-s", "--split", default='0',
22 |                         help='Testing split of CK+ to use. (0-9)')
23 |     parser.add_argument("checkpoint_file",
24 |                         help='Path to a single model checkpoint (.pkl file).')
25 |     args = parser.parse_args()
26 | 
27 |     checkpoint_file = args.checkpoint_file
28 |     test_split = int(args.split)
29 |     dataset_path = data_paths.ck_plus_data_path
30 | 
31 |     print 'Checkpoint: %s' % checkpoint_file
32 |     print 'Testing on split %d\n' % test_split
33 | 
34 |     # Load model
35 |     model = SupervisedModel('evaluation', './')
36 | 
37 |     # Load dataset
38 |     train_folds, val_fold, _ = data_fold_loader.load_fold_assignment(test_fold=test_split)
39 |     X_val, y_val = data_fold_loader.load_folds(data_paths.ck_plus_data_path, [val_fold])
40 |     X_test, y_test = data_fold_loader.load_folds(data_paths.ck_plus_data_path, [test_split])
41 |     print X_val.shape, y_val.shape
42 |     print X_test.shape, y_test.shape
43 | 
44 |     X_val = numpy.float32(X_val)
45 |     X_val /= 255.0
46 |     X_val *= 2.0
47 | 
48 |     X_test = numpy.float32(X_test)
49 |     X_test /= 255.0
50 |     X_test *= 2.0
51 | 
52 |     val_data_container = SupervisedDataContainer(X_val, y_val)
53 |     test_data_container = SupervisedDataContainer(X_test, y_test)
54 | 
55 |     # Construct evaluator
56 |     preprocessor = [util.Normer3(filter_size=5, num_channels=1)]
57 | 
58 |     val_evaluator = util.Evaluator(model, val_data_container,
59 |                                    checkpoint_file, preprocessor)
60 | 
61 |     test_evaluator = util.Evaluator(model, test_data_container,
62 |                                     checkpoint_file, preprocessor)
63 | 
64 |     # For the inputted checkpoint, compute the overall val accuracy
65 |     print 'Checkpoint: %s' % os.path.split(checkpoint_file)[1]
66 |     val_evaluator.set_checkpoint(checkpoint_file)
67 |     val_accuracy = val_evaluator.run()
68 |     print 'Val Accuracy: %f\n' % val_accuracy
69 | 
70 |     # For the inputted checkpoint, cmopute the overall test accuracy
71 |     print 'Checkoint: %s' % os.path.split(checkpoint_file)[1]
72 |     test_evaluator.set_checkpoint(checkpoint_file)
73 |     test_accuracy = test_evaluator.run()
74 |     print 'Test Accuracy: %f\n' % test_accuracy
75 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Do Deep Neural Networks Learn Facial Action Units When Doing Expression Recognition?
 2 | This repository contains all of experiment files for the paper "Do Deep Neural Networks Learn Facial Action Units When Doing Expression Recognition?", available here: http://arxiv.org/abs/1510.02969
 3 | 
 4 | ![faus_frontpage](./faus_frontpage.png)
 5 | 
 6 | ## Abstract
 7 | Despite being the appearance-based classifier of choice in recent years, relatively few works have examined how much convolutional neural networks (CNNs) can improve performance on accepted expression recognition benchmarks and, more importantly, examine what it is they actually learn. In this work, not only do we show that CNNs can achieve strong performance, but we also introduce an approach to decipher which portions of the face influence the CNN's predictions. First, we train a zero-bias CNN on facial expression data and achieve, to our knowledge, state-of-the-art performance on two expression recognition benchmarks: the extended Cohn-Kanade (CK+) dataset and the Toronto Face Dataset (TFD). We then qualitatively analyze the network by visualizing the spatial patterns that maximally excite different neurons in the convolutional layers and show how they resemble Facial Action Units (FAUs). Finally, we use the FAU labels provided in the CK+ dataset to verify that the FAUs observed in our filter visualizations indeed align with the subject's facial movements. 
 8 | 
 9 | ### Bibtex
10 | ```
11 | @article{khorrami2015deep,
12 |   title={Do Deep Neural Networks Learn Facial Action Units When Doing
13 |          Expression Recognition?},
14 |   author={Khorrami, Pooya and Paine, Tom Le and Huang, Thomas S},
15 |   journal={arXiv preprint arXiv:1510.02969},
16 |   year={2015}
17 | }
18 | ```
19 | 
20 | ## About the repo
21 | 
22 | The experiments are broken up by dataset:
23 | 
24 | + ck_plus 
25 | + ck_plus_six_class
26 | + tfd
27 | 
28 | The difference between ``ck_plus`` and ``ck_plus_six_class`` is the ``ck_plus_six_class`` folder evaluates our model's performance on the six basic emotions (anger, disgust, fear, happy, sad, surprise) while the ``ck_plus`` folder
29 | contains the basic six along with neutral and contempt.
30 | 
31 | The ``README.md`` file in each folder will provide more information on how
32 | to run and evaluate the experiments.
33 | 
34 | ## Requisite Libraries
35 | 
36 | In order to run our experiments, you will need the following software:
37 | + Python 2.7 (preferably the [anaconda][anaconda] distribution)
38 | + [numpy] - a standard numerical computing library for python
39 | + [anna] - our neural network library, which itself depends on [theano] and [pylearn2]
40 | + [OpenCV] - common open source computer vision library (needed for face detection)
41 | + [cudnn] - a GPU-accelerated library of primitives for deep neural networks
42 | 
43 | 
44 | [anaconda]:https://www.continuum.io/why-anaconda
45 | [numpy]:http://www.numpy.org/
46 | [anna]:https://github.com/ifp-uiuc/anna
47 | [theano]:http://deeplearning.net/software/theano/
48 | [pylearn2]:http://deeplearning.net/software/pylearn2/
49 | [OpenCV]:http://opencv.org/
50 | [cudnn]:https://developer.nvidia.com/cudnn
51 | 
52 | 


--------------------------------------------------------------------------------
/tfd/cnn/train.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import sys
 4 | sys.path.append('..')
 5 | 
 6 | import numpy
 7 | 
 8 | from anna import util
 9 | from anna.datasets import supervised_dataset
10 | from anna.datasets.supervised_data_loader import SupervisedDataLoader
11 | 
12 | import data_paths
13 | from model import SupervisedModel
14 | 
15 | 
16 | parser = argparse.ArgumentParser(prog='train_cnn',
17 |                                  description='Script to train convolutional\
18 |                                   network from random initialization.')
19 | parser.add_argument("-s", "--split", default='0',
20 |                     help='Training split of TFD to use. (0-4)')
21 | args = parser.parse_args()
22 | 
23 | print('Start')
24 | train_split = int(args.split)
25 | if train_split < 0 or train_split > 4:
26 |     raise Exception("Training Split must be in range 0-4.")
27 | print('Using TFD training split: {}'.format(train_split))
28 | 
29 | pid = os.getpid()
30 | print('PID: {}'.format(pid))
31 | f = open('pid_'+str(train_split), 'wb')
32 | f.write(str(pid)+'\n')
33 | f.close()
34 | 
35 | # Load model
36 | model = SupervisedModel('experiment', './', learning_rate=1e-2)
37 | monitor = util.Monitor(model,
38 |                        checkpoint_directory='checkpoints_'+str(train_split),
39 |                        save_steps=1000)
40 | 
41 | # Loading TFD dataset
42 | print('Loading Data')
43 | supervised_data_loader = SupervisedDataLoader(
44 |     os.path.join(data_paths.tfd_data_path, 'npy_files/TFD_96/split_'+str(train_split)))
45 | train_data_container = supervised_data_loader.load(0)
46 | val_data_container = supervised_data_loader.load(1)
47 | test_data_container = supervised_data_loader.load(2)
48 | 
49 | X_train = train_data_container.X
50 | y_train = train_data_container.y
51 | X_val = val_data_container.X
52 | y_val = val_data_container.y
53 | X_test = test_data_container.X
54 | y_test = test_data_container.y
55 | 
56 | X_train = numpy.float32(X_train)
57 | X_train /= 255.0
58 | X_train *= 2.0
59 | 
60 | X_val = numpy.float32(X_val)
61 | X_val /= 255.0
62 | X_val *= 2.0
63 | 
64 | X_test = numpy.float32(X_test)
65 | X_test /= 255.0
66 | X_test *= 2.0
67 | 
68 | train_dataset = supervised_dataset.SupervisedDataset(X_train, y_train)
69 | val_dataset = supervised_dataset.SupervisedDataset(X_val, y_val)
70 | train_iterator = train_dataset.iterator(
71 |     mode='random_uniform', batch_size=64, num_batches=31000)
72 | val_iterator = val_dataset.iterator(
73 |     mode='random_uniform', batch_size=64, num_batches=31000)
74 | 
75 | # Create object to local contrast normalize a batch.
76 | # Note: Every batch must be normalized before use.
77 | normer = util.Normer3(filter_size=5, num_channels=1)
78 | module_list = [normer]
79 | preprocessor = util.Preprocessor(module_list)
80 | 
81 | print('Training Model')
82 | for x_batch, y_batch in train_iterator:
83 |     x_batch = preprocessor.run(x_batch)
84 |     monitor.start()
85 |     log_prob, accuracy = model.train(x_batch, y_batch)
86 |     monitor.stop(1-accuracy)
87 | 
88 |     if monitor.test:
89 |         monitor.start()
90 |         x_val_batch, y_val_batch = val_iterator.next()
91 |         x_val_batch = preprocessor.run(x_val_batch)
92 |         val_accuracy = model.eval(x_val_batch, y_val_batch)
93 |         monitor.stop_test(1-val_accuracy)
94 | 


--------------------------------------------------------------------------------
/ck_plus/ck_plus_checkpoint_checker.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import glob
 3 | import os
 4 | import sys
 5 | 
 6 | import numpy
 7 | 
 8 | from anna.datasets.supervised_data_loader import SupervisedDataContainer
 9 | from anna import util
10 | 
11 | import data_fold_loader
12 | import data_paths
13 | from model import SupervisedModel
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     parser = argparse.ArgumentParser(
18 |         prog='ck_plus_checkpoint_checker',
19 |         description='Script to select best performing checkpoint on CK+.')
20 |     parser.add_argument("-s", "--split", default='0',
21 |                         help='Testing split of CK+ to use. (0-9)')
22 |     parser.add_argument("checkpoint_dir",
23 |                         help='Folder containing all .pkl checkpoint files.')
24 |     args = parser.parse_args()
25 | 
26 |     checkpoint_dir = args.checkpoint_dir
27 |     test_split = int(args.split)
28 |     dataset_path = data_paths.ck_plus_data_path
29 | 
30 |     print 'Checkpoint directory: %s' % checkpoint_dir
31 |     print 'Testing on split %d\n' % test_split
32 | 
33 |     # Load model
34 |     model = SupervisedModel('evaluation', './')
35 | 
36 |     # Load data
37 |     train_folds, val_fold, _ = data_fold_loader.load_fold_assignment(test_fold=test_split)
38 |     X_val, y_val = data_fold_loader.load_folds(data_paths.ck_plus_data_path, [val_fold])
39 |     X_test, y_test = data_fold_loader.load_folds(data_paths.ck_plus_data_path, [test_split])
40 |     print X_val.shape, y_val.shape
41 |     print X_test.shape, y_test.shape
42 | 
43 |     X_val = numpy.float32(X_val)
44 |     X_val /= 255.0
45 |     X_val *= 2.0
46 | 
47 |     X_test = numpy.float32(X_test)
48 |     X_test /= 255.0
49 |     X_test *= 2.0
50 | 
51 |     val_data_container = SupervisedDataContainer(X_val, y_val)
52 |     test_data_container = SupervisedDataContainer(X_test, y_test)
53 | 
54 |     # Construct evaluator
55 |     preprocessor = [util.Normer3(filter_size=5, num_channels=1)]
56 | 
57 |     checkpoint_file_list = sorted(
58 |         glob.glob(os.path.join(checkpoint_dir, '*.pkl')))
59 |     val_evaluator = util.Evaluator(model, val_data_container,
60 |                                    checkpoint_file_list[0], preprocessor)
61 |     test_evaluator = util.Evaluator(model, test_data_container,
62 |                                     checkpoint_file_list[0], preprocessor)
63 | 
64 |     # For each checkpoint, compute the overall val accuracy
65 |     val_accuracies = []
66 |     for checkpoint in checkpoint_file_list:
67 |         print 'Checkpoint: %s' % os.path.split(checkpoint)[1]
68 |         val_evaluator.set_checkpoint(checkpoint)
69 |         val_accuracy = val_evaluator.run()
70 |         print 'Val Accuracy: %f\n' % val_accuracy
71 |         val_accuracies.append(val_accuracy)
72 | 
73 |     # Find checkpoint that produced the highest val accuracy
74 |     max_val_accuracy = numpy.max(val_accuracies)
75 |     max_index = numpy.argmax(val_accuracies)
76 |     max_checkpoint = checkpoint_file_list[max_index]
77 |     print 'Max Checkpoint: %s' % max_checkpoint
78 |     print 'Max Val Accuracy: %f' % max_val_accuracy
79 | 
80 |     # Compute test accuracy of chosen checkpoint
81 |     test_evaluator.set_checkpoint(max_checkpoint)
82 |     test_accuracy = test_evaluator.run()
83 |     print 'Test Accuracy: %f' % test_accuracy
84 | 


--------------------------------------------------------------------------------
/tfd/cnn_d/train.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import sys
 4 | sys.path.append('..')
 5 | 
 6 | import numpy
 7 | 
 8 | from anna import util
 9 | from anna.datasets import supervised_dataset
10 | from anna.datasets.supervised_data_loader import SupervisedDataLoader
11 | 
12 | import data_paths
13 | from model import SupervisedModel
14 | 
15 | 
16 | parser = argparse.ArgumentParser(prog='train_cnn_with_dropout',
17 |                                  description='Script to train convolutional\
18 |                                   network from random initialization with \
19 |                                   dropout.')
20 | parser.add_argument("-s", "--split", default='0', help='Training split of TFD \
21 |                                                         to use. (0-4)')
22 | args = parser.parse_args()
23 | 
24 | print('Start')
25 | train_split = int(args.split)
26 | if train_split < 0 or train_split > 4:
27 |     raise Exception("Training Split must be in range 0-4.")
28 | print('Using TFD training split: {}'.format(train_split))
29 | 
30 | pid = os.getpid()
31 | print('PID: {}'.format(pid))
32 | f = open('pid_'+str(train_split), 'wb')
33 | f.write(str(pid)+'\n')
34 | f.close()
35 | 
36 | # Load model
37 | model = SupervisedModel('experiment', './', learning_rate=1e-2)
38 | monitor = util.Monitor(model,
39 |                        checkpoint_directory='checkpoints_'+str(train_split),
40 |                        save_steps=1000)
41 | 
42 | # Add dropout flag to fully-connected layer
43 | model.fc4.dropout = 0.5
44 | model._compile()
45 | 
46 | # Loading TFD dataset
47 | print('Loading Data')
48 | supervised_data_loader = SupervisedDataLoader(
49 |     os.path.join(data_paths.tfd_data_path, 'npy_files/TFD_96/split_'+str(train_split)))
50 | train_data_container = supervised_data_loader.load(0)
51 | val_data_container = supervised_data_loader.load(1)
52 | test_data_container = supervised_data_loader.load(2)
53 | 
54 | X_train = train_data_container.X
55 | y_train = train_data_container.y
56 | X_val = val_data_container.X
57 | y_val = val_data_container.y
58 | X_test = test_data_container.X
59 | y_test = test_data_container.y
60 | 
61 | X_train = numpy.float32(X_train)
62 | X_train /= 255.0
63 | X_train *= 2.0
64 | 
65 | X_val = numpy.float32(X_val)
66 | X_val /= 255.0
67 | X_val *= 2.0
68 | 
69 | X_test = numpy.float32(X_test)
70 | X_test /= 255.0
71 | X_test *= 2.0
72 | 
73 | train_dataset = supervised_dataset.SupervisedDataset(X_train, y_train)
74 | val_dataset = supervised_dataset.SupervisedDataset(X_val, y_val)
75 | train_iterator = train_dataset.iterator(
76 |     mode='random_uniform', batch_size=64, num_batches=31000)
77 | val_iterator = val_dataset.iterator(
78 |     mode='random_uniform', batch_size=64, num_batches=31000)
79 | 
80 | # Create object to local contrast normalize a batch.
81 | # Note: Every batch must be normalized before use.
82 | normer = util.Normer3(filter_size=5, num_channels=1)
83 | module_list = [normer]
84 | preprocessor = util.Preprocessor(module_list)
85 | 
86 | print('Training Model')
87 | for x_batch, y_batch in train_iterator:
88 |     x_batch = preprocessor.run(x_batch)
89 |     monitor.start()
90 |     log_prob, accuracy = model.train(x_batch, y_batch)
91 |     monitor.stop(1-accuracy)
92 | 
93 |     if monitor.test:
94 |         monitor.start()
95 |         x_val_batch, y_val_batch = val_iterator.next()
96 |         x_val_batch = preprocessor.run(x_val_batch)
97 |         val_accuracy = model.eval(x_val_batch, y_val_batch)
98 |         monitor.stop_test(1-val_accuracy)
99 | 


--------------------------------------------------------------------------------
/tfd/cnn_a/train.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import sys
 4 | sys.path.append('..')
 5 | 
 6 | import numpy
 7 | 
 8 | from anna import util
 9 | from anna.datasets import supervised_dataset
10 | from anna.datasets.supervised_data_loader import SupervisedDataLoader
11 | 
12 | import data_paths
13 | from model import SupervisedModel
14 | 
15 | 
16 | parser = argparse.ArgumentParser(prog='train_cnn_with_data_augmentation',
17 |                                  description='Script to train convolutional\
18 |                                   network from random initialization with \
19 |                                   data augmentation.')
20 | parser.add_argument("-s", "--split", default='0', help='Training split of TFD \
21 |                     to use. (0-4)')
22 | args = parser.parse_args()
23 | 
24 | print('Start')
25 | train_split = int(args.split)
26 | if train_split < 0 or train_split > 4:
27 |     raise Exception("Training Split must be in range 0-4.")
28 | print('Using TFD training split: {}'.format(train_split))
29 | 
30 | pid = os.getpid()
31 | print('PID: {}'.format(pid))
32 | f = open('pid_'+str(train_split), 'wb')
33 | f.write(str(pid)+'\n')
34 | f.close()
35 | 
36 | # Load model
37 | model = SupervisedModel('experiment', './', learning_rate=1e-2)
38 | monitor = util.Monitor(model,
39 |                        checkpoint_directory='checkpoints_'+str(train_split),
40 |                        save_steps=1000)
41 | 
42 | # Loading TFD dataset
43 | print('Loading Data')
44 | supervised_data_loader = SupervisedDataLoader(
45 |     os.path.join(data_paths.tfd_data_path, 'npy_files/TFD_96/split_'+str(train_split)))
46 | train_data_container = supervised_data_loader.load(0)
47 | val_data_container = supervised_data_loader.load(1)
48 | test_data_container = supervised_data_loader.load(2)
49 | 
50 | X_train = train_data_container.X
51 | y_train = train_data_container.y
52 | X_val = val_data_container.X
53 | y_val = val_data_container.y
54 | X_test = test_data_container.X
55 | y_test = test_data_container.y
56 | 
57 | X_train = numpy.float32(X_train)
58 | X_train /= 255.0
59 | X_train *= 2.0
60 | 
61 | X_val = numpy.float32(X_val)
62 | X_val /= 255.0
63 | X_val *= 2.0
64 | 
65 | X_test = numpy.float32(X_test)
66 | X_test /= 255.0
67 | X_test *= 2.0
68 | 
69 | train_dataset = supervised_dataset.SupervisedDataset(X_train, y_train)
70 | val_dataset = supervised_dataset.SupervisedDataset(X_val, y_val)
71 | train_iterator = train_dataset.iterator(
72 |     mode='random_uniform', batch_size=64, num_batches=31000)
73 | val_iterator = val_dataset.iterator(
74 |     mode='random_uniform', batch_size=64, num_batches=31000)
75 | 
76 | # Do data augmentation (crops, flips, rotations, scales, intensity)
77 | data_augmenter = util.DataAugmenter2(crop_shape=(96, 96),
78 |                                      flip=True, gray_on=True)
79 | normer = util.Normer3(filter_size=5, num_channels=1)
80 | module_list_train = [data_augmenter, normer]
81 | module_list_val = [normer]
82 | preprocessor_train = util.Preprocessor(module_list_train)
83 | preprocessor_val = util.Preprocessor(module_list_val)
84 | 
85 | print('Training Model')
86 | for x_batch, y_batch in train_iterator:
87 |     x_batch = preprocessor_train.run(x_batch)
88 |     monitor.start()
89 |     log_prob, accuracy = model.train(x_batch, y_batch)
90 |     monitor.stop(1-accuracy)
91 | 
92 |     if monitor.test:
93 |         monitor.start()
94 |         x_val_batch, y_val_batch = val_iterator.next()
95 |         x_val_batch = preprocessor_val.run(x_val_batch)
96 |         val_accuracy = model.eval(x_val_batch, y_val_batch)
97 |         monitor.stop_test(1-val_accuracy)
98 | 


--------------------------------------------------------------------------------
/ck_plus_six_class/README.md:
--------------------------------------------------------------------------------
 1 | # Running the CK+ experiments (six classes)
 2 | 
 3 | # Contents
 4 | + [Introduction](#introduction)
 5 | + [Folder contents](#folder-contents)
 6 | + [Running experiments](#running-experiments)
 7 | 
 8 | # Introduction
 9 | 
10 | This folder contains some of the code used to obtain our results on the [CK+][CK+] dataset. However, instead of training and evaluating on all eight expression classes, this model only deals with the **six** basic emotions: (anger, disgust, fear, happy, sad, and surprise). Our experiment uses both data augmentation and dropout as forms of regularization.
11 | 
12 | We will first describe the contents of this folder, and then walk you through
13 | how to run the experiments.
14 | 
15 | ## Folder contents
16 | The folder contains:
17 | ``` shell
18 | /cnn_ad/train.py
19 | ck_plus_checkpoint_checker.py
20 | ck_plus_single_checkpoint_evaluator.py
21 | data_paths.py
22 | model.py
23 | ```
24 | 
25 | #### `train.py`
26 | Our train.py trains the CNN model specified in the ``model.py`` file. It also outputs a 
27 | directory of model checkpoint files, and a log of the training process.
28 | 
29 | #### `ck_plus_single_checkpoint_evaluator.py`
30 | This file contains a script that outputs the performance of a single save checkpoint.
31 | 
32 | #### `ck_plus_checkpoint_checker.py`
33 | This file contains a script that examines all the checkpoints created by a 
34 | single experiment, and chooses the best one.
35 | 
36 | #### `data_paths.py`
37 | This file contains a single variable (``ck_plus_six_class_data_path``) which indicates the path to load the CK+ ``.npy`` files 
38 | created when running the ``make_ck_plus_dataset.py`` file.
39 | 
40 | #### `model.py`
41 | This file contains the CNN model used that is loaded by the ``train.py`` file.
42 | 
43 | 
44 | # Running experiments
45 | 
46 | ## CNN Training
47 | 
48 | You are now ready to train the CNN.  
49 | 
50 | You can train the cnn with following command: 
51 | ``` shell
52 | # Snippet: cnn training - six class
53 | $ THEANO_FLAGS='floatX=float32,device=gpu0,nvcc.fastmath=True' \ 
54 | python -u train.py --split 0  \ 
55 | >& log0.txt & 
56 | ```
57 | 
58 | Since the [CK+][CK+] dataset is typically broken into splits and their results averaged,
59 | the `--split` option indicates which of the 10 splits (0-9) will be used for testing. For example,
60 | the command above will train the network on splits 1-9 and evaluate the results on split 0. The code 
61 | will save the `.pkl` file containing the network parameters to a directory called `./checkpoints_0/` 
62 | which will denote the split used.
63 | 
64 | ### How to evaluate a model's performance
65 | 
66 | After you have trained a split to completion, you can find the best performing
67 | checkpoint by running the checkpoint evaluator found in 
68 | `ck_plus_checkpoint_checker.py`. Simply run the following command:
69 | 
70 | ``` shell
71 | # Snippet: cnn checkpoint evaluation
72 | $ THEANO_FLAGS='floatX=float32,device=gpu0,nvcc.fastmath=True' \ 
73 | python -u ck_plus_checkpoint_checker.py --split 0 ./cnn_ad/checkpoints_0/ \
74 | >& cnn_ad_best_performance_split_0.txt &
75 | ```
76 | 
77 | With this command, `ck_plus_checkpoint_checker.py` will iterate over the list of
78 | checkpoints found in `./cnn_ad/checkpoints_0/` and compute the accuracy on 
79 | the test set. It will then select the checkpoint that yielded the highest
80 | accuracy. The command also writes all of the results to a text file called 
81 | `cnn_ad_best_performance_split_0.txt`. Please remember that the `--split`
82 | argument indicates which split is being used for test/evaluation.
83 | 
84 | 
85 | [CK+]:http://www.pitt.edu/~emotion/ck-spread.htm
86 | 


--------------------------------------------------------------------------------
/tfd/cnn_ad/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys
  4 | sys.path.append('..')
  5 | 
  6 | import numpy
  7 | 
  8 | from anna import util
  9 | from anna.datasets import supervised_dataset
 10 | from anna.datasets.supervised_data_loader import SupervisedDataLoader
 11 | 
 12 | import data_paths
 13 | from model import SupervisedModel
 14 | 
 15 | 
 16 | print 'Start'
 17 | parser = argparse.ArgumentParser(prog='train_cnn_with_dropout_\
 18 |                                       data_augmentation',
 19 |                                  description='Script to train convolutional\
 20 |                                   network from random initialization with \
 21 |                                   dropout and data augmentation.')
 22 | parser.add_argument("-s", "--split", default='0', help='Training split of TFD \
 23 |                     to use. (0-4)')
 24 | args = parser.parse_args()
 25 | 
 26 | train_split = int(args.split)
 27 | if train_split < 0 or train_split > 4:
 28 |     raise Exception("Training Split must be in range 0-4.")
 29 | print('Using TFD training split: {}'.format(train_split))
 30 | 
 31 | pid = os.getpid()
 32 | print('PID: {}'.format(pid))
 33 | f = open('pid_'+str(train_split), 'wb')
 34 | f.write(str(pid)+'\n')
 35 | f.close()
 36 | 
 37 | # Load model
 38 | model = SupervisedModel('experiment', './', learning_rate=1e-2)
 39 | monitor = util.Monitor(model,
 40 |                        checkpoint_directory='checkpoints_'+str(train_split),
 41 |                        save_steps=1000)
 42 | 
 43 | # Add dropout to fully-connected layer
 44 | model.fc4.dropout = 0.5
 45 | model._compile()
 46 | 
 47 | # Loading TFD dataset
 48 | print('Loading Data')
 49 | supervised_data_loader = SupervisedDataLoader(
 50 |     os.path.join(data_paths.tfd_data_path, 'npy_files/TFD_96/split_'+str(train_split)))
 51 | train_data_container = supervised_data_loader.load(0)
 52 | val_data_container = supervised_data_loader.load(1)
 53 | test_data_container = supervised_data_loader.load(2)
 54 | 
 55 | X_train = train_data_container.X
 56 | y_train = train_data_container.y
 57 | X_val = val_data_container.X
 58 | y_val = val_data_container.y
 59 | X_test = test_data_container.X
 60 | y_test = test_data_container.y
 61 | 
 62 | X_train = numpy.float32(X_train)
 63 | X_train /= 255.0
 64 | X_train *= 2.0
 65 | 
 66 | X_val = numpy.float32(X_val)
 67 | X_val /= 255.0
 68 | X_val *= 2.0
 69 | 
 70 | X_test = numpy.float32(X_test)
 71 | X_test /= 255.0
 72 | X_test *= 2.0
 73 | 
 74 | train_dataset = supervised_dataset.SupervisedDataset(X_train, y_train)
 75 | val_dataset = supervised_dataset.SupervisedDataset(X_val, y_val)
 76 | train_iterator = train_dataset.iterator(
 77 |     mode='random_uniform', batch_size=64, num_batches=31000)
 78 | val_iterator = val_dataset.iterator(
 79 |     mode='random_uniform', batch_size=64, num_batches=31000)
 80 | 
 81 | # Do data augmentation (crops, flips, rotations, scales, intensity)
 82 | data_augmenter = util.DataAugmenter2(crop_shape=(96, 96),
 83 |                                      flip=True, gray_on=True)
 84 | normer = util.Normer3(filter_size=5, num_channels=1)
 85 | module_list_train = [data_augmenter, normer]
 86 | module_list_val = [normer]
 87 | preprocessor_train = util.Preprocessor(module_list_train)
 88 | preprocessor_val = util.Preprocessor(module_list_val)
 89 | 
 90 | print('Training Model')
 91 | for x_batch, y_batch in train_iterator:
 92 |     x_batch = preprocessor_train.run(x_batch)
 93 |     monitor.start()
 94 |     log_prob, accuracy = model.train(x_batch, y_batch)
 95 |     monitor.stop(1-accuracy)
 96 | 
 97 |     if monitor.test:
 98 |         monitor.start()
 99 |         x_val_batch, y_val_batch = val_iterator.next()
100 |         x_val_batch = preprocessor_val.run(x_val_batch)
101 |         val_accuracy = model.eval(x_val_batch, y_val_batch)
102 |         monitor.stop_test(1-val_accuracy)
103 | 


--------------------------------------------------------------------------------
/ck_plus/cnn/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys
  4 | sys.path.append('..')
  5 | 
  6 | import numpy
  7 | 
  8 | from anna import util
  9 | from anna.datasets import supervised_dataset
 10 | from anna.datasets.supervised_data_loader import SupervisedDataLoaderCrossVal
 11 | 
 12 | import data_fold_loader
 13 | import data_paths
 14 | from model import SupervisedModel
 15 | 
 16 | 
 17 | parser = argparse.ArgumentParser(prog='train_cnn',
 18 |                                  description='Script to train convolutional \
 19 |                                  neural network from random initialization.')
 20 | parser.add_argument("-s", "--split", default='0', help='Testing split of CK+ \
 21 |                     to use. (0-9)')
 22 | parser.add_argument("--checkpoint_dir", default='./', help='Location to save \
 23 |                     model checkpoint files.')
 24 | args = parser.parse_args()
 25 | 
 26 | print('Start')
 27 | test_split = int(args.split)
 28 | if test_split < 0 or test_split > 9:
 29 |     raise Exception("Testing Split must be in range 0-9.")
 30 | print('Using CK+ testing split: {}'.format(test_split))
 31 | 
 32 | checkpoint_dir = os.path.join(args.checkpoint_dir, 'checkpoints_'+str(test_split))
 33 | print 'Checkpoint dir: ', checkpoint_dir
 34 | 
 35 | pid = os.getpid()
 36 | print('PID: {}'.format(pid))
 37 | f = open('pid_'+str(test_split), 'wb')
 38 | f.write(str(pid)+'\n')
 39 | f.close()
 40 | 
 41 | # Load model
 42 | model = SupervisedModel('experiment', './', learning_rate=1e-2)
 43 | monitor = util.Monitor(model,
 44 |                        checkpoint_directory=checkpoint_dir,
 45 |                        save_steps=1000)
 46 | 
 47 | # Loading CK+ dataset
 48 | print('Loading Data')
 49 | #supervised_data_loader = SupervisedDataLoaderCrossVal(
 50 | #    data_paths.ck_plus_data_path)
 51 | #train_data_container = supervised_data_loader.load('train', train_split)
 52 | #test_data_container = supervised_data_loader.load('test', train_split)
 53 | train_folds, val_fold, _ = data_fold_loader.load_fold_assignment(test_fold=test_split)
 54 | X_train, y_train = data_fold_loader.load_folds(data_paths.ck_plus_data_path, train_folds)
 55 | X_val, y_val = data_fold_loader.load_folds(data_paths.ck_plus_data_path, [val_fold])
 56 | X_test, y_test = data_fold_loader.load_folds(data_paths.ck_plus_data_path, [test_split])
 57 | 
 58 | print X_train.shape, y_train.shape
 59 | print X_val.shape, y_val.shape
 60 | print X_test.shape, y_test.shape
 61 | 
 62 | X_train = numpy.float32(X_train)
 63 | X_train /= 255.0
 64 | X_train *= 2.0
 65 | 
 66 | X_val = numpy.float32(X_val)
 67 | X_val /= 255.0
 68 | X_val *= 2.0
 69 | 
 70 | X_test = numpy.float32(X_test)
 71 | X_test /= 255.0
 72 | X_test *= 2.0
 73 | 
 74 | train_dataset = supervised_dataset.SupervisedDataset(X_train, y_train)
 75 | val_dataset = supervised_dataset.SupervisedDataset(X_val, y_val)
 76 | train_iterator = train_dataset.iterator(
 77 |     mode='random_uniform', batch_size=64, num_batches=31000)
 78 | val_iterator = val_dataset.iterator(
 79 |     mode='random_uniform', batch_size=64, num_batches=31000)
 80 | 
 81 | # Create object to local contrast normalize a batch.
 82 | # Note: Every batch must be normalized before use.
 83 | normer = util.Normer3(filter_size=5, num_channels=1)
 84 | module_list = [normer]
 85 | preprocessor = util.Preprocessor(module_list)
 86 | 
 87 | print('Training Model')
 88 | for x_batch, y_batch in train_iterator:
 89 |     x_batch = preprocessor.run(x_batch)
 90 |     monitor.start()
 91 |     log_prob, accuracy = model.train(x_batch, y_batch)
 92 |     monitor.stop(1-accuracy)
 93 | 
 94 |     if monitor.test:
 95 |         monitor.start()
 96 |         x_val_batch, y_val_batch = val_iterator.next()
 97 |         x_val_batch = preprocessor.run(x_val_batch)
 98 |         val_accuracy = model.eval(x_val_batch, y_val_batch)
 99 |         monitor.stop_test(1-val_accuracy)
100 | 


--------------------------------------------------------------------------------
/ck_plus/cnn_d/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys
  4 | sys.path.append('..')
  5 | 
  6 | import numpy
  7 | 
  8 | from anna import util
  9 | from anna.datasets import supervised_dataset
 10 | #from anna.datasets.supervised_data_loader import SupervisedDataLoaderCrossVal
 11 | 
 12 | import data_fold_loader
 13 | import data_paths
 14 | from model import SupervisedModel
 15 | 
 16 | 
 17 | parser = argparse.ArgumentParser(prog='train_cnn_with_dropout',
 18 |                                  description='Script to train convolutional \
 19 |                                  neural network from random initialization \
 20 |                                  with dropout.')
 21 | parser.add_argument("-s", "--split", default='0', help='Testing split of CK+ \
 22 |                     to use. (0-9)')
 23 | parser.add_argument("--checkpoint_dir", default='./', help='Location to save \
 24 |                     model checkpoint files.')
 25 | args = parser.parse_args()
 26 | 
 27 | print('Start')
 28 | test_split = int(args.split)
 29 | if test_split < 0 or test_split > 9:
 30 |     raise Exception("Testing Split must be in range 0-9.")
 31 | print('Using CK+ testing split: {}'.format(test_split))
 32 | 
 33 | checkpoint_dir = os.path.join(args.checkpoint_dir, 'checkpoints_'+str(test_split))
 34 | print 'Checkpoint dir: ', checkpoint_dir
 35 | 
 36 | pid = os.getpid()
 37 | print('PID: {}'.format(pid))
 38 | f = open('pid_'+str(test_split), 'wb')
 39 | f.write(str(pid)+'\n')
 40 | f.close()
 41 | 
 42 | # Load model
 43 | model = SupervisedModel('experiment', './', learning_rate=1e-2)
 44 | monitor = util.Monitor(model,
 45 |                        checkpoint_directory=checkpoint_dir,
 46 |                        save_steps=1000)
 47 | 
 48 | # Add dropout to fully-connected layer
 49 | model.fc4.dropout = 0.5
 50 | model._compile()
 51 | 
 52 | # Loading CK+ dataset
 53 | print('Loading Data')
 54 | #supervised_data_loader = SupervisedDataLoaderCrossVal(
 55 | #    data_paths.ck_plus_data_path)
 56 | #train_data_container = supervised_data_loader.load('train', train_split)
 57 | #test_data_container = supervised_data_loader.load('test', train_split)
 58 | train_folds, val_fold, _ = data_fold_loader.load_fold_assignment(test_fold=test_split)
 59 | X_train, y_train = data_fold_loader.load_folds(data_paths.ck_plus_data_path, train_folds)
 60 | X_val, y_val = data_fold_loader.load_folds(data_paths.ck_plus_data_path, [val_fold])
 61 | X_test, y_test = data_fold_loader.load_folds(data_paths.ck_plus_data_path, [test_split])
 62 | 
 63 | print X_train.shape, y_train.shape
 64 | print X_val.shape, y_val.shape
 65 | print X_test.shape, y_test.shape
 66 | 
 67 | X_train = numpy.float32(X_train)
 68 | X_train /= 255.0
 69 | X_train *= 2.0
 70 | 
 71 | X_val = numpy.float32(X_val)
 72 | X_val /= 255.0
 73 | X_val *= 2.0
 74 | 
 75 | X_test = numpy.float32(X_test)
 76 | X_test /= 255.0
 77 | X_test *= 2.0
 78 | 
 79 | train_dataset = supervised_dataset.SupervisedDataset(X_train, y_train)
 80 | val_dataset = supervised_dataset.SupervisedDataset(X_val, y_val)
 81 | train_iterator = train_dataset.iterator(
 82 |     mode='random_uniform', batch_size=64, num_batches=31000)
 83 | val_iterator = val_dataset.iterator(
 84 |     mode='random_uniform', batch_size=64, num_batches=31000)
 85 | 
 86 | # Create object to local contrast normalize a batch.
 87 | # Note: Every batch must be normalized before use.
 88 | normer = util.Normer3(filter_size=5, num_channels=1)
 89 | module_list = [normer]
 90 | preprocessor = util.Preprocessor(module_list)
 91 | 
 92 | print('Training Model')
 93 | for x_batch, y_batch in train_iterator:
 94 |     x_batch = preprocessor.run(x_batch)
 95 |     monitor.start()
 96 |     log_prob, accuracy = model.train(x_batch, y_batch)
 97 |     monitor.stop(1-accuracy)  # monitor takes error instead of accuracy
 98 | 
 99 |     if monitor.test:
100 |         monitor.start()
101 |         x_val_batch, y_val_batch = val_iterator.next()
102 |         x_val_batch = preprocessor.run(x_val_batch)
103 |         val_accuracy = model.eval(x_val_batch, y_val_batch)
104 |         monitor.stop_test(1-val_accuracy)
105 | 


--------------------------------------------------------------------------------
/ck_plus/cnn_a/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys
  4 | sys.path.append('..')
  5 | 
  6 | import numpy
  7 | 
  8 | from anna import util
  9 | from anna.datasets import supervised_dataset
 10 | #from anna.datasets.supervised_data_loader import SupervisedDataLoaderCrossVal
 11 | 
 12 | import data_fold_loader
 13 | import data_paths
 14 | from model import SupervisedModel
 15 | 
 16 | 
 17 | parser = argparse.ArgumentParser(prog='train_cnn_with_data_augmentation',
 18 |                                  description='Script to train convolutional \
 19 |                                  neural network from random initialization \
 20 |                                  with data augmentation.')
 21 | parser.add_argument("-s", "--split", default='0', help='Testing split of CK+ \
 22 |                     to use. (0-9)')
 23 | parser.add_argument("--checkpoint_dir", default='./', help='Location to save \
 24 |                     model checkpoint files.')
 25 | args = parser.parse_args()
 26 | 
 27 | print('Start')
 28 | test_split = int(args.split)
 29 | if test_split < 0 or test_split > 9:
 30 |     raise Exception("Testing Split must be in range 0-9.")
 31 | print('Using CK+ testing split: {}'.format(test_split))
 32 | 
 33 | checkpoint_dir = os.path.join(args.checkpoint_dir, 'checkpoints_'+str(test_split))
 34 | print 'Checkpoint dir: ', checkpoint_dir
 35 | 
 36 | pid = os.getpid()
 37 | print('PID: {}'.format(pid))
 38 | f = open('pid_'+str(test_split), 'wb')
 39 | f.write(str(pid)+'\n')
 40 | f.close()
 41 | 
 42 | # Load model
 43 | model = SupervisedModel('experiment', './', learning_rate=1e-2)
 44 | monitor = util.Monitor(model,
 45 |                        checkpoint_directory=checkpoint_dir,
 46 |                        save_steps=1000)
 47 | 
 48 | # Loading CK+ dataset
 49 | print('Loading Data')
 50 | #supervised_data_loader = SupervisedDataLoaderCrossVal(
 51 | #    data_paths.ck_plus_data_path)
 52 | #train_data_container = supervised_data_loader.load('train', train_split)
 53 | #test_data_container = supervised_data_loader.load('test', train_split)
 54 | 
 55 | train_folds, val_fold, _ = data_fold_loader.load_fold_assignment(test_fold=test_split)
 56 | X_train, y_train = data_fold_loader.load_folds(data_paths.ck_plus_data_path, train_folds)
 57 | X_val, y_val = data_fold_loader.load_folds(data_paths.ck_plus_data_path, [val_fold])
 58 | X_test, y_test = data_fold_loader.load_folds(data_paths.ck_plus_data_path, [test_split])
 59 | 
 60 | print X_train.shape, y_train.shape
 61 | print X_val.shape, y_val.shape
 62 | print X_test.shape, y_test.shape
 63 | 
 64 | X_train = numpy.float32(X_train)
 65 | X_train /= 255.0
 66 | X_train *= 2.0
 67 | 
 68 | X_val = numpy.float32(X_val)
 69 | X_val /= 255.0
 70 | X_val *= 2.0
 71 | 
 72 | X_test = numpy.float32(X_test)
 73 | X_test /= 255.0
 74 | X_test *= 2.0
 75 | 
 76 | train_dataset = supervised_dataset.SupervisedDataset(X_train, y_train)
 77 | val_dataset = supervised_dataset.SupervisedDataset(X_val, y_val)
 78 | train_iterator = train_dataset.iterator(
 79 |     mode='random_uniform', batch_size=64, num_batches=31000)
 80 | val_iterator = val_dataset.iterator(
 81 |     mode='random_uniform', batch_size=64, num_batches=31000)
 82 | 
 83 | # Do data augmentation (crops, flips, rotations, scales, intensity)
 84 | data_augmenter = util.DataAugmenter2(crop_shape=(96, 96),
 85 |                                      flip=True, gray_on=True)
 86 | normer = util.Normer3(filter_size=5, num_channels=1)
 87 | module_list_train = [data_augmenter, normer]
 88 | module_list_val = [normer]
 89 | preprocessor_train = util.Preprocessor(module_list_train)
 90 | preprocessor_val = util.Preprocessor(module_list_val)
 91 | 
 92 | print('Training Model')
 93 | for x_batch, y_batch in train_iterator:
 94 |     x_batch = preprocessor_train.run(x_batch)
 95 |     monitor.start()
 96 |     log_prob, accuracy = model.train(x_batch, y_batch)
 97 |     monitor.stop(1-accuracy)
 98 | 
 99 |     if monitor.test:
100 |         monitor.start()
101 |         x_val_batch, y_val_batch = val_iterator.next()
102 |         x_val_batch = preprocessor_val.run(x_val_batch)
103 |         val_accuracy = model.eval(x_val_batch, y_val_batch)
104 |         monitor.stop_test(1-val_accuracy)
105 | 


--------------------------------------------------------------------------------
/ck_plus/cnn_ad/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys
  4 | sys.path.append('..')
  5 | 
  6 | import numpy
  7 | 
  8 | from anna import util
  9 | from anna.datasets import supervised_dataset
 10 | #from anna.datasets.supervised_data_loader import SupervisedDataLoaderCrossVal
 11 | 
 12 | import data_fold_loader
 13 | import data_paths
 14 | from model import SupervisedModel
 15 | 
 16 | 
 17 | parser = argparse.ArgumentParser(prog='train_cnn_with_dropout_\
 18 |                                       data_augmentation',
 19 |                                  description='Script to train convolutional \
 20 |                                  network from random initialization with \
 21 |                                  dropout and data augmentation.')
 22 | parser.add_argument("-s", "--split", default='0', help='Testing split of CK+ \
 23 |                     to use. (0-9)')
 24 | parser.add_argument("--checkpoint_dir", default='./', help='Location to save \
 25 |                     model checkpoint files.')
 26 | args = parser.parse_args()
 27 | 
 28 | 
 29 | print('Start')
 30 | test_split = int(args.split)
 31 | if test_split < 0 or test_split > 9:
 32 |     raise Exception("Testing Split must be in range 0-9.")
 33 | print('Using CK+ testing split: {}'.format(test_split))
 34 | 
 35 | checkpoint_dir = os.path.join(args.checkpoint_dir, 'checkpoints_'+str(test_split))
 36 | print 'Checkpoint dir: ', checkpoint_dir
 37 | 
 38 | pid = os.getpid()
 39 | print('PID: {}'.format(pid))
 40 | f = open('pid_'+str(test_split), 'wb')
 41 | f.write(str(pid)+'\n')
 42 | f.close()
 43 | 
 44 | # Load model
 45 | model = SupervisedModel('experiment', './', learning_rate=1e-2)
 46 | monitor = util.Monitor(model,
 47 |                        checkpoint_directory=checkpoint_dir,
 48 |                        save_steps=1000)
 49 | 
 50 | # Add dropout to fully-connected layer
 51 | model.fc4.dropout = 0.5
 52 | model._compile()
 53 | 
 54 | # Loading CK+ dataset
 55 | print('Loading Data')
 56 | #supervised_data_loader = SupervisedDataLoaderCrossVal(
 57 | #    data_paths.ck_plus_data_path)
 58 | #train_data_container = supervised_data_loader.load('train', train_split)
 59 | #test_data_container = supervised_data_loader.load('test', train_split)
 60 | 
 61 | train_folds, val_fold, _ = data_fold_loader.load_fold_assignment(test_fold=test_split)
 62 | X_train, y_train = data_fold_loader.load_folds(data_paths.ck_plus_data_path, train_folds)
 63 | X_val, y_val = data_fold_loader.load_folds(data_paths.ck_plus_data_path, [val_fold])
 64 | X_test, y_test = data_fold_loader.load_folds(data_paths.ck_plus_data_path, [test_split])
 65 | 
 66 | X_train = numpy.float32(X_train)
 67 | X_train /= 255.0
 68 | X_train *= 2.0
 69 | 
 70 | X_val = numpy.float32(X_val)
 71 | X_val /= 255.0
 72 | X_val *= 2.0
 73 | 
 74 | X_test = numpy.float32(X_test)
 75 | X_test /= 255.0
 76 | X_test *= 2.0
 77 | 
 78 | train_dataset = supervised_dataset.SupervisedDataset(X_train, y_train)
 79 | val_dataset = supervised_dataset.SupervisedDataset(X_val, y_val)
 80 | train_iterator = train_dataset.iterator(
 81 |     mode='random_uniform', batch_size=64, num_batches=31000)
 82 | val_iterator = val_dataset.iterator(
 83 |     mode='random_uniform', batch_size=64, num_batches=31000)
 84 | 
 85 | 
 86 | # Do data augmentation (crops, flips, rotations, scales, intensity)
 87 | data_augmenter = util.DataAugmenter2(crop_shape=(96, 96),
 88 |                                      flip=True, gray_on=True)
 89 | normer = util.Normer3(filter_size=5, num_channels=1)
 90 | module_list_train = [data_augmenter, normer]
 91 | module_list_val = [normer]
 92 | preprocessor_train = util.Preprocessor(module_list_train)
 93 | preprocessor_val = util.Preprocessor(module_list_val)
 94 | 
 95 | print('Training Model')
 96 | for x_batch, y_batch in train_iterator:
 97 |     x_batch = preprocessor_train.run(x_batch)
 98 |     monitor.start()
 99 |     log_prob, accuracy = model.train(x_batch, y_batch)
100 |     monitor.stop(1-accuracy)
101 | 
102 |     if monitor.test:
103 |         monitor.start()
104 |         x_val_batch, y_val_batch = val_iterator.next()
105 |         x_val_batch = preprocessor_val.run(x_val_batch)
106 |         val_accuracy = model.eval(x_val_batch, y_val_batch)
107 |         monitor.stop_test(1-val_accuracy)
108 | 


--------------------------------------------------------------------------------
/tfd/README.md:
--------------------------------------------------------------------------------
  1 | # Running the TFD experiments
  2 | 
  3 | # Contents
  4 | + [Introduction](#introduction)
  5 | + [Folder contents](#folder-contents)
  6 | + [Running experiments](#running-experiments)
  7 | 
  8 | # Introduction
  9 | 
 10 | This folder contains the code used to obtain our results on the TFD dataset. Our experiments incorporate 
 11 | two particular types of regularization, they are:
 12 | 
 13 | + a = Data Augmentation
 14 | + d = Dropout
 15 | 
 16 | We will first describe the contents of this folder, and then walk you through
 17 | how to run the experiments.
 18 | 
 19 | ## Folder contents
 20 | The folder contains:
 21 | ``` shell
 22 | /cnn/train.py
 23 | /cnn_a/train.py
 24 | /cnn_d/train.py
 25 | /cnn_ad/train.py
 26 | tfd_plus_checkpoint_checker.py
 27 | tfd_plus_single_checkpoint_evaluator.py
 28 | data_paths.py
 29 | model.py
 30 | ```
 31 | 
 32 | #### `train.py`
 33 | As you can see, there are several `train.py` files. Each one trains a cnn model with or without regularization.
 34 | Basically the `train.py` files do all the heavy lifting of running the individual experiments. They output a 
 35 | directory of model checkpoint files, and a log of the training process.
 36 | 
 37 | #### `tfd_plus_single_checkpoint_evaluator.py`
 38 | This file contains a script that outputs the performance of a single save checkpoint.
 39 | 
 40 | #### `tfd_plus_checkpoint_checker.py`
 41 | This file contains a script that examines all the checkpoints created by a 
 42 | single experiment, and chooses the best one based on overall accuarcy on validation set.
 43 | 
 44 | #### `data_paths.py`
 45 | This file contains a single variable (``tfd_plus_data_path``) which indicates the path to load the TFD ``.npy`` files 
 46 | created when running the ``make_tfd_plus_dataset.py`` file.
 47 | 
 48 | #### `model.py`
 49 | This file contains the CNN model used that by the ``train.py`` files in each of our experiments.
 50 | 
 51 | 
 52 | # Running experiments
 53 | 
 54 | ## CNN Training
 55 | 
 56 | You are now ready to train one of the four CNNs. The four folders starting with `cnn_` 
 57 | each contain a `train.py` file which will train the cnn subject to the 
 58 | regularizations described in the folder's suffix. 
 59 | 
 60 | ### How to train a regular CNN
 61 | 
 62 | For example, `cnn` will train a cnn from a random initialization with no additional regularization.
 63 | 
 64 | You can train the cnn with following command: 
 65 | ``` shell
 66 | # Snippet: cnn training
 67 | $ THEANO_FLAGS='floatX=float32,device=gpu0,nvcc.fastmath=True' \ 
 68 | python -u train.py --split 0  \ 
 69 | >& log0.txt & 
 70 | ```
 71 | 
 72 | Since the TFD dataset is typically broken into splits and their results averaged,
 73 | the `--split` option indicates which of the 5 splits to use (0-4) when training. The code 
 74 | will save the `.pkl` file containing the network parameters to a directory called `./checkpoints_0/` 
 75 | which will denote the split used.
 76 | 
 77 | 
 78 | ### How to evaluate a model's performance
 79 | 
 80 | After you have trained a split to completion, you can find the best performing
 81 | checkpoint by running the checkpoint evaluator found in 
 82 | `tfd_plus_checkpoint_checker.py`. We will use the model trained in `cnn` as an 
 83 | example. Simply run the following command:
 84 | 
 85 | ``` shell
 86 | # Snippet: cnn checkpoint evaluation
 87 | $ THEANO_FLAGS='floatX=float32,device=gpu0,nvcc.fastmath=True' \ 
 88 | python -u tfd_plus_checkpoint_checker.py --split 0 ./cnn/checkpoints_0/ \
 89 | >& cnn_best_performance_split_0.txt &
 90 | ```
 91 | 
 92 | With this command, `tfd_plus_checkpoint_checker.py` will iterate over the list of
 93 | checkpoints found in `./cnn/checkpoints_0/` and compute the accuracy on 
 94 | the split's validation set. It will then select the checkpoint that yielded the highest
 95 | accuracy. The command also writes all of the results to a text file called 
 96 | `cnn_best_performance_split_0.txt`. 
 97 | 
 98 | ### How to train the rest of the cnns
 99 | 
100 | Now, if you want to train a network with specific regularizations active, 
101 | the process is very simple. 
102 | 
103 | 1. Using the legend above, create a suffix string (S) that corresponds to the 
104 |    regularizations you wish to impose. 
105 | 2. Go to the `./cnn_S/` folder.
106 | 3. Run the `train.py` file as shown in the CNN Training section.
107 | 
108 | 


--------------------------------------------------------------------------------
/ck_plus/README.md:
--------------------------------------------------------------------------------
  1 | # Running the CK+ experiments
  2 | 
  3 | # Contents
  4 | + [Introduction](#introduction)
  5 | + [Folder contents](#folder-contents)
  6 | + [Running experiments](#running-experiments)
  7 | 
  8 | # Introduction
  9 | 
 10 | This folder contains the code used to obtain our results on the [CK+][CK+] dataset. Our experiments incorporate 
 11 | two particular types of regularization, they are:
 12 | 
 13 | + a = Data Augmentation
 14 | + d = Dropout
 15 | 
 16 | We will first describe the contents of this folder, and then walk you through
 17 | how to run the experiments.
 18 | 
 19 | ## Folder contents
 20 | The folder contains:
 21 | ``` shell
 22 | /cnn/train.py
 23 | /cnn_a/train.py
 24 | /cnn_d/train.py
 25 | /cnn_ad/train.py
 26 | ck_plus_checkpoint_checker.py
 27 | ck_plus_single_checkpoint_evaluator.py
 28 | data_paths.py
 29 | model.py
 30 | ```
 31 | 
 32 | #### `train.py`
 33 | As you can see, there are several `train.py` files. Each one trains a cnn model with or without regularization.
 34 | Basically the `train.py` files do all the heavy lifting of running the individual experiments. They output a 
 35 | directory of model checkpoint files, and a log of the training process.
 36 | 
 37 | #### `ck_plus_single_checkpoint_evaluator.py`
 38 | This file contains a script that outputs the performance of a single save checkpoint.
 39 | 
 40 | #### `ck_plus_checkpoint_checker.py`
 41 | This file contains a script that examines all the checkpoints created by a 
 42 | single experiment, and chooses the best one.
 43 | 
 44 | #### `data_paths.py`
 45 | This file contains a single variable (``ck_plus_data_path``) which indicates the path to load the CK+ ``.npy`` files 
 46 | created when running the ``make_ck_plus_dataset.py`` file.
 47 | 
 48 | #### `model.py`
 49 | This file contains the CNN model used by the ``train.py`` files in each of our experiments.
 50 | 
 51 | 
 52 | # Running experiments
 53 | 
 54 | ## CNN Training
 55 | 
 56 | You are now ready to train one of the four CNNs. The four folders starting with `cnn_` 
 57 | each contain a `train.py` file which will train the cnn subject to the 
 58 | regularizations described in the folder's suffix. 
 59 | 
 60 | ### How to train a regular CNN
 61 | 
 62 | For example, `cnn` will train a cnn from a random initialization with no additional regularization.
 63 | 
 64 | You can train the cnn with following command: 
 65 | ``` shell
 66 | # Snippet: cnn training
 67 | $ THEANO_FLAGS='floatX=float32,device=gpu0,nvcc.fastmath=True' \ 
 68 | python -u train.py --split 0  \ 
 69 | >& log0.txt & 
 70 | ```
 71 | 
 72 | Since the [CK+][CK+] dataset is typically broken into splits and their results averaged,
 73 | the `--split` option indicates which of the 10 splits (0-9) will be used for testing. For example, 
 74 | the command above will train a net on folds 1-9 and evaluate the results on fold 0. The code 
 75 | will save the `.pkl` file containing the network parameters to a directory called `./checkpoints_0/` 
 76 | which will denote the split used.
 77 | 
 78 | 
 79 | ### How to evaluate a model's performance
 80 | 
 81 | After you have trained a split to completion, you can find the best performing
 82 | checkpoint by running the checkpoint evaluator found in 
 83 | `ck_plus_checkpoint_checker.py`. We will use the model trained in `cnn` as an 
 84 | example. Simply run the following command:
 85 | 
 86 | ``` shell
 87 | # Snippet: cnn checkpoint evaluation
 88 | $ THEANO_FLAGS='floatX=float32,device=gpu0,nvcc.fastmath=True' \ 
 89 | python -u ck_plus_checkpoint_checker.py --split 0 ./cnn/checkpoints_0/ \
 90 | >& cnn_best_performance_split_0.txt &
 91 | ```
 92 | 
 93 | With this command, `ck_plus_checkpoint_checker.py` will iterate over the list of
 94 | checkpoints found in `./cnn/checkpoints_0/` and compute the accuracy on 
 95 | the test set. It will then select the checkpoint that yielded the highest
 96 | accuracy. The command also writes all of the results to a text file called 
 97 | `cnn_best_performance_split_0.txt`. Please remember, the `--split` argument
 98 | indicates which fold is being used as test/evaluation set.
 99 | 
100 | ### How to train the rest of the cnns
101 | 
102 | Now, if you want to train a network with specific regularizations active, 
103 | the process is very simple. 
104 | 
105 | 1. Using the legend above, create a suffix string (S) that corresponds to the 
106 |    regularizations you wish to impose. 
107 | 2. Go to the `./cnn_S/` folder.
108 | 3. Run the `train.py` file as shown in the CNN Training section.
109 | 
110 | 
111 | [CK+]:http://www.pitt.edu/~emotion/ck-spread.htm
112 | 


--------------------------------------------------------------------------------
/data_scripts/README.md:
--------------------------------------------------------------------------------
  1 | ## Dataset Pre-processing
  2 | This folder contains code that will help you pre-process and save the [extened Cohn-Kanade (CK+)][CK+] and 
  3 | the Toronto Face Database (TFD) in ``.npy`` files to be used by our CNN models. 
  4 | 
  5 | > :pushpin: **Note:** We assume that you have already downloaded copies of the CK+ and TFD datasets 
  6 | on your machine. This code does not download the datasets from the distributors. It only pre-processes 
  7 | an already downloaded copy.
  8 | 
  9 | ## Extended Cohn-Kanade Dataset (CK+)
 10 | 
 11 | If you have downloaded the CK+ dataset properly, the folder should contain the following files:
 12 | ```shell
 13 | CK_Agreement_Form.pdf
 14 | extended-cohn-kanade-images.zip
 15 | Consent-for-publication.doc
 16 | CVPR2010_CK.pdf
 17 | Emotion_labels.zip
 18 | FACS_labels.zip
 19 | ReadMeCohnKanadeDatabase_website.txt
 20 | ```
 21 | 
 22 | First, unzip the ``extended-cohn-kanade-images.zip`` and the ``Emotion_labels.zip`` files. Make sure
 23 | that the folders that hold the extracted data are named: ``cohn-kanade-images/`` and ``Emotion_labels/``
 24 | respectively.
 25 | 
 26 | The ``make_ck_plus_dataset.py`` file will help you convert the images in the ``cohn-kanade-images`` folder and the labels in the ``Emotion_labels`` folder to ``.npy`` files. The script requires two arguments. They are:
 27 | 
 28 | 1. **Input path:** path of CK+ dataset folder
 29 | 2. **Output/save path:** path to save the ``.npy`` files (default: ``./CK_PLUS_HERE/``)
 30 | 
 31 | To run the script, simply type the following:
 32 | ```python
 33 | python make_ck_plus_dataset.py --input_path /path/to/ck+/dataset --save_path /path/to/save/npy/files
 34 | ```
 35 | 
 36 | For example, suppose the CK+ dataset was in ``/data/CK_PLUS/`` and you wanted to save 
 37 | the ```.npy``` files to ``./CK_PLUS_HERE/``, then you would use the following command:
 38 | ```python
 39 | python make_tfd_dataset.py --input_path /data/CK_PLUS/ --save_path ./CK_PLUS_HERE/
 40 | ```
 41 | 
 42 | The code will copy the CK+ datset to ``./CK_PLUS_HERE/`` and should now contain a ``npy_files/`` folder.
 43 | The ``npy_files/`` folder should contain the following:
 44 | ```shell
 45 | npy_files/
 46 |   folds.npy
 47 |   subjs.npy
 48 |   X.npy
 49 |   y.npy
 50 | ```
 51 | 
 52 | The images and the labels are stored in ``X.npy`` and ``y.npy`` respectively, while ``subjs.npy`` denotes 
 53 | the subject id of each person in the images and ``folds.npy`` says which image belongs to which fold (0-9).
 54 | 
 55 | Now, all you need to do is open the ``data_paths.py`` files in the ``ck_plus`` and ``ck_plus_six_class`` directories of this respository, and set them to the **absolute path** of the ``./CK_PLUS_HERE/npy_files/`` folder.
 56 | 
 57 | Congratulations! You are now ready to run our CNNs on the CK+ dataset!
 58 | 
 59 | 
 60 | ## Toronto Face Database (TFD)
 61 | 
 62 | Once you have downloaded the TFD dataset, you should have a folder containing the following ``.mat`` files:
 63 | 
 64 | ```shell
 65 | TFD_48x48.mat
 66 | TFD_96x96.mat
 67 | TFD_info.mat
 68 | ```
 69 | 
 70 | The ``make_tfd_dataset.py`` file will help you convert the ``.mat`` files in the Toronto Face Dataset (TFD)
 71 | to ``.npy`` files. The script requires two arguments. They are:
 72 | 
 73 | 1. **Input path:** path of folder that contains the ``.mat`` files
 74 | 2. **Output/save path:** path to save the ``.npy`` files (default: ``./TFD_HERE/``)
 75 | 
 76 | To run the script, simply type the following:
 77 | ```python
 78 | python make_tfd_dataset.py --input_path /path/to/mat/files --save_path /path/to/save/npy/files
 79 | ```
 80 | 
 81 | For example, suppose the ``.mat`` files were stored in ``/data/TFD/`` and you wanted to save 
 82 | the ```.npy``` files to ``./TFD_HERE/``, then you would use the following command:
 83 | ```python
 84 | python make_tfd_dataset.py --input_path /data/TFD/ --save_path ./TFD_HERE/
 85 | ```
 86 | 
 87 | The ``TFD_HERE`` folder should now contain the following folders:
 88 | ```shell
 89 | npy_files
 90 |   TFD_48
 91 |     split_0
 92 |     split_1
 93 |     split_2
 94 |     split_3
 95 |     split_4
 96 |     unlabeled
 97 |   TFD_96
 98 |     split_0
 99 |     split_1
100 |     split_2
101 |     split_3
102 |     split_4
103 |     unlabeled
104 | ```
105 | 
106 | Now, all you need to do is open the ``data_paths.py`` files in the ``tfd`` directory of this respository, 
107 | and set it to the **absolute path** of the ``TFD_HERE``. folder
108 | 
109 | Congratulations! You are now ready to run our CNNs on the TFD dataset!
110 | 
111 | [CK+]:http://www.pitt.edu/~emotion/ck-spread.htm
112 | 


--------------------------------------------------------------------------------
/data_scripts/make_tfd_dataset.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | 
  4 | import numpy
  5 | from scipy.io import loadmat
  6 | 
  7 | 
  8 | def load_data(TFD_dict):
  9 |     X = TFD_dict['images']
 10 |     folds = TFD_dict['folds']
 11 |     y_expression = TFD_dict['labs_ex']
 12 |     y_id = TFD_dict['labs_id']
 13 | 
 14 |     X = X[:, numpy.newaxis, :, :]
 15 | 
 16 |     # Remove samples without expression label
 17 |     y_expression = y_expression[y_expression != -1]
 18 |     y_expression -= 1
 19 |     # print y_expression.shape,  y_expression.min(), y_expression.max()
 20 | 
 21 |     # Remove samples without id label
 22 |     y_id = y_id[y_id != -1]
 23 |     y_id = reindex_labels(y_id)
 24 |     # print y_id.shape,  y_id.min(), y_id.max(), len(numpy.unique(y_id))
 25 | 
 26 |     return X, y_expression, y_id, folds
 27 | 
 28 | 
 29 | def reindex_labels(y):
 30 |     unique_classes = numpy.unique(y)
 31 |     num_classes = len(unique_classes)
 32 |     num_samples = len(y)
 33 | 
 34 |     y_reindex = numpy.zeros(num_samples)
 35 |     for i, c in enumerate(unique_classes):
 36 |         y_reindex[i] = numpy.where(unique_classes == y[i])[0]
 37 | 
 38 |     return y_reindex
 39 | 
 40 | 
 41 | def make_dirs(path):
 42 |     if not os.path.exists(path):
 43 |         os.makedirs(path)
 44 | 
 45 | 
 46 | def make_dir_structure(path):
 47 |     make_dirs(os.path.join(path, 'TFD_48'))
 48 |     make_dirs(os.path.join(path, 'TFD_96'))
 49 | 
 50 | 
 51 | def save_out_unlabeled_data_to_npy_file(save_path, X):
 52 |     save_path = os.path.join(save_path, 'unlabeled')
 53 |     make_dirs(save_path)
 54 |     numpy.save(os.path.join(save_path, 'X.npy'), X)
 55 | 
 56 | 
 57 | def save_out_labeled_data_to_npy_files(save_path, X, y, folds):
 58 |     num_folds = folds.shape[1]
 59 |     for i in range(num_folds):
 60 |         inds = folds[:, i] != 0
 61 |         X_save = X[inds, :, :, :]
 62 |         fold_save = (folds[inds, i]-1)
 63 | 
 64 |         split_path = os.path.join(save_path, 'split_'+str(i))
 65 |         make_dirs(split_path)
 66 |         numpy.save(os.path.join(split_path, 'X.npy'), X_save)
 67 |         numpy.save(os.path.join(split_path, 'y.npy'), y)
 68 |         numpy.save(os.path.join(split_path, 'folds.npy'), fold_save)
 69 | 
 70 | 
 71 | if __name__ == "__main__":
 72 |     parser = argparse.ArgumentParser(
 73 |         prog='make_tfd_dataset',
 74 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
 75 |         description='Script to load and split the Toronto Face Dataset (TFD).')
 76 |     parser.add_argument('-ip', '--input_path', dest='input_path',
 77 |                         help='Path specifying location of TFD (.mat) files.')
 78 |     parser.add_argument('-sp', '--save_path', dest='save_path',
 79 |                         default='./TFD_HERE',
 80 |                         help='Path specifying where to save \
 81 |                               the dataset (.npy) files.')
 82 |     args = parser.parse_args()
 83 | 
 84 |     print('\n================================================================')
 85 |     print('                        TFD Dataset Manager                       ')
 86 |     print('================================================================\n')
 87 | 
 88 |     input_path = args.input_path
 89 |     save_path = os.path.join(args.save_path, 'npy_files')
 90 | 
 91 |     make_dir_structure(save_path)
 92 |     save_path_48 = os.path.join(save_path, 'TFD_48')
 93 |     save_path_96 = os.path.join(save_path, 'TFD_96')
 94 | 
 95 |     # Load 48x48 and 96x96 image data
 96 |     print 'Loading the TFD dataset'
 97 |     TFD_48_dict = loadmat(os.path.join(input_path, 'TFD_48x48.mat'))
 98 |     TFD_96_dict = loadmat(os.path.join(input_path, 'TFD_96x96.mat'))
 99 |     TFD_info = loadmat(os.path.join(input_path, 'TFD_info.mat'))
100 |     [X_48, y_expression_48, y_id_48, folds_48] = load_data(TFD_48_dict)
101 |     [X_96, y_expression_96, y_id_96, folds_96] = load_data(TFD_96_dict)
102 |     # print X_48.shape, X_96.shape
103 | 
104 |     # Extracting unlabeled data
105 |     X_u_48 = X_48[folds_48[:, 0] == 0, :, :, :]
106 |     X_u_96 = X_96[folds_96[:, 0] == 0, :, :, :]
107 |     # print X_u_48.shape, X_u_96.shape
108 | 
109 |     # Save out the 98,058 unlabeled faces to disk
110 |     print '\nSaving Unlabeled 48x48 images'
111 |     save_out_unlabeled_data_to_npy_file(save_path_48, X_u_48)
112 |     print 'Saving Unlabeled 96x96 images'
113 |     save_out_unlabeled_data_to_npy_file(save_path_96, X_u_96)
114 | 
115 |     # Save each fold of the labeled data individually
116 |     # Each split folder contains:
117 |     #   X - 4178 images
118 |     #   y - 4178 expression labels ranging from 0-6
119 |     #   folds - 4178 fold labels (0-train, 1-dev, 2-test)
120 |     print '\nSaving Labeled Splits of 48x48 images'
121 |     save_out_labeled_data_to_npy_files(save_path_48, X_48,
122 |                                        y_expression_48, folds_48)
123 |     print 'Saving Labeled Splits of 96x96 images'
124 |     save_out_labeled_data_to_npy_files(save_path_96, X_96,
125 |                                        y_expression_96, folds_96)
126 | 
127 |     print '\nSuccessfully pre-processed the Toronto Face Dataset!'
128 | 


--------------------------------------------------------------------------------
/ck_plus_six_class/ck_plus_single_checkpoint_evaluator.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import glob
  3 | import os
  4 | import sys
  5 | 
  6 | import numpy
  7 | 
  8 | from anna.datasets.supervised_data_loader import SupervisedDataContainer
  9 | from anna import util
 10 | 
 11 | import data_fold_loader
 12 | import data_paths
 13 | from model import SupervisedModel
 14 | 
 15 | 
 16 | def reindex_labels(y):
 17 |     label_mapping = {1: 0, 3: 1, 4: 2, 5: 3, 6: 4, 7: 5}
 18 |     for label in label_mapping.keys():
 19 |         y[y == label] = label_mapping[label]
 20 | 
 21 |     return y
 22 | 
 23 | 
 24 | def add_padding(X, y):
 25 |     padding_shape = (64-X.shape[0], X.shape[1],
 26 |                      X.shape[2], X.shape[3])
 27 |     data_padding = numpy.zeros(padding_shape, dtype=X.dtype)
 28 |     label_padding = numpy.zeros(padding_shape[0], dtype=y.dtype)
 29 |     X_pad = numpy.concatenate((X, data_padding), axis=0)
 30 |     y_pad = numpy.concatenate((y, label_padding), axis=0)
 31 |     return X_pad, y_pad
 32 | 
 33 | if __name__ == "__main__":
 34 |     parser = argparse.ArgumentParser(
 35 |         prog='ck_plus_single_checkpoint_evaluator',
 36 |         description='Script to evaluate the performance of a checkpoint \
 37 |                      on CK+ (six classes).')
 38 |     parser.add_argument("-s", "--split", default='0',
 39 |                         help='Testing split of CK+ to use. (0-9)')
 40 |     parser.add_argument("checkpoint_file",
 41 |                         help='Path to a single model checkpoint (.pkl file).')
 42 |     args = parser.parse_args()
 43 | 
 44 |     checkpoint_file = args.checkpoint_file
 45 |     test_split = int(args.split)
 46 |     dataset_path = data_paths.ck_plus_data_path
 47 | 
 48 |     print 'Checkpoint: %s' % checkpoint_file
 49 |     print 'Testing on split %d\n' % test_split
 50 | 
 51 |     # Load model
 52 |     model = SupervisedModel('evaluation', './')
 53 | 
 54 |     # Load dataset
 55 |     #supervised_data_loader = SupervisedDataLoaderCrossVal(dataset_path)
 56 |     #test_data_container = supervised_data_loader.load(mode='test', fold=fold)
 57 |     #test_data_container.X = numpy.float32(test_data_container.X)
 58 |     #test_data_container.X /= 255.0
 59 |     #test_data_container.X *= 2.0
 60 | 
 61 |     train_folds, val_fold, _ = data_fold_loader.load_fold_assignment(test_fold=test_split)
 62 |     X_val, y_val = data_fold_loader.load_folds(data_paths.ck_plus_data_path, [val_fold])
 63 |     X_test, y_test = data_fold_loader.load_folds(data_paths.ck_plus_data_path, [test_split])
 64 |     print 'Val Data: ', X_val.shape, y_val.shape
 65 |     print 'Test Data: ', X_test.shape, y_test.shape
 66 | 
 67 |     X_val = numpy.float32(X_val)
 68 |     X_val /= 255.0
 69 |     X_val *= 2.0
 70 | 
 71 |     X_test = numpy.float32(X_test)
 72 |     X_test /= 255.0
 73 |     X_test *= 2.0
 74 | 
 75 |     val_data_container = SupervisedDataContainer(X_val, y_val)
 76 |     test_data_container = SupervisedDataContainer(X_test, y_test)
 77 | 
 78 | 
 79 |     # Remove samples with neutral and contempt labels
 80 |     val_mask = numpy.logical_and(y_val != 0, y_val != 2)
 81 |     X_val = X_val[val_mask, :, :, :]
 82 |     y_val = y_val[val_mask]
 83 |     y_val = reindex_labels(y_val)
 84 |     num_val_samples = len(y_val)
 85 | 
 86 |     mask_test = numpy.logical_and(y_test != 0, y_test != 2)
 87 |     X_test = X_test[mask_test, :, :, :]
 88 |     y_test = y_test[mask_test]
 89 |     y_test = reindex_labels(y_test)
 90 |     num_test_samples = len(y_test)
 91 | 
 92 |     print 'Reduced Val Data: ', X_val.shape, y_val.shape
 93 |     print 'Reduced Test Data: ', X_test.shape, y_test.shape
 94 | 
 95 |     if test_split == 9:
 96 |         X_test, y_test = add_padding(X_test, y_test)
 97 |     elif test_split == 8:
 98 |         X_val, y_val = add_padding(X_val, y_val)
 99 | 
100 |     val_data_container = SupervisedDataContainer(X_val, y_val)
101 |     test_data_container = SupervisedDataContainer(X_test, y_test)
102 | 
103 |     # Construct evaluator
104 |     preprocessor = [util.Normer3(filter_size=5, num_channels=1)]
105 | 
106 |     val_evaluator = util.Evaluator(model, val_data_container,
107 |                                    checkpoint_file, preprocessor)
108 |     test_evaluator = util.Evaluator(model, test_data_container,
109 |                                     checkpoint_file, preprocessor)
110 | 
111 |     # For the inputted checkpoint, compute the overall test accuracy
112 |     #accuracies = []
113 |     print 'Checkpoint: %s' % os.path.split(checkpoint_file)[1]
114 |     val_evaluator.set_checkpoint(checkpoint_file)
115 |  
116 |     if test_split != 8:
117 |         val_accuracy = val_evaluator.run()
118 |     else:
119 |         val_predictions = val_evaluator._get_predictions()
120 |         val_predictions = val_predictions[0:num_val_samples]
121 |         val_true_labels = val_data_container.y[0:num_val_samples]
122 | 
123 |         val_accuracy = 100.0 * (1.0 * numpy.sum(
124 |             val_predictions == val_true_labels) / len(val_true_labels))
125 | 
126 |     print 'Val Accuracy: %f\n' % val_accuracy
127 | 
128 | 
129 |     print 'Checkpoint: %s' % os.path.split(checkpoint_file)[1]
130 |     test_evaluator.set_checkpoint(checkpoint_file)
131 | 
132 |     if test_split != 9:
133 |         test_accuracy = test_evaluator.run()
134 |     else:
135 |         test_predictions = test_evaluator._get_predictions()
136 |         test_predictions = test_predictions[0:num_test_samples]
137 |         test_true_labels = test_data_container.y[0:num_test_samples]
138 | 
139 |         test_accuracy = 100.0 * (1.0 * numpy.sum(
140 |             test_predictions == test_true_labels) / len(test_true_labels))
141 | 
142 |     print 'Test Accuracy: %f\n' % test_accuracy
143 | 
144 | 


--------------------------------------------------------------------------------
/ck_plus_six_class/cnn_ad/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys
  4 | sys.path.append('..')
  5 | 
  6 | import numpy
  7 | 
  8 | from anna import util
  9 | from anna.datasets import supervised_dataset
 10 | #from anna.datasets.supervised_data_loader import SupervisedDataLoaderCrossVal
 11 | 
 12 | import data_fold_loader
 13 | import data_paths
 14 | from model import SupervisedModel
 15 | 
 16 | 
 17 | def reindex_labels(y):
 18 |     label_mapping = {1: 0, 3: 1, 4: 2, 5: 3, 6: 4, 7: 5}
 19 |     for label in label_mapping.keys():
 20 |         y[y == label] = label_mapping[label]
 21 | 
 22 |     return y
 23 | 
 24 | 
 25 | def add_padding(X, y):
 26 |     padding_shape = (64-X.shape[0], X.shape[1],
 27 |                      X.shape[2], X.shape[3])
 28 |     data_padding = numpy.zeros(padding_shape, dtype=X.dtype)
 29 |     label_padding = numpy.zeros(padding_shape[0], dtype=y.dtype)
 30 |     X_pad = numpy.concatenate((X, data_padding), axis=0)
 31 |     y_pad = numpy.concatenate((y, label_padding), axis=0)
 32 |     return X_pad, y_pad
 33 | 
 34 | 
 35 | parser = argparse.ArgumentParser(prog='train_cnn_with_dropout_\
 36 |                                  data_augmentation_six_class',
 37 |                                  description='Script to train convolutional \
 38 |                                  network from random initialization with \
 39 |                                  dropout and data augmentation \
 40 |                                  on six classes.')
 41 | parser.add_argument("-s", "--split", default='0', help='Testing split of CK+ \
 42 |                     to use. (0-9)')
 43 | parser.add_argument("--checkpoint_dir", default='./', help='Location to save \
 44 |                     model checkpoint files.')
 45 | args = parser.parse_args()
 46 | 
 47 | 
 48 | print('Start')
 49 | test_split = int(args.split)
 50 | if test_split < 0 or test_split > 9:
 51 |     raise Exception("Testing Split must be in range 0-9.")
 52 | print('Using CK+ testing split: {}'.format(test_split))
 53 | 
 54 | checkpoint_dir = os.path.join(args.checkpoint_dir, 'checkpoints_'+str(test_split))
 55 | print 'Checkpoint dir: ', checkpoint_dir
 56 | 
 57 | pid = os.getpid()
 58 | print('PID: {}'.format(pid))
 59 | f = open('pid_'+str(test_split), 'wb')
 60 | f.write(str(pid)+'\n')
 61 | f.close()
 62 | 
 63 | # Load model
 64 | model = SupervisedModel('experiment', './', learning_rate=1e-2)
 65 | monitor = util.Monitor(model,
 66 |                        checkpoint_directory=checkpoint_dir,
 67 |                        save_steps=1000)
 68 | 
 69 | # Add dropout to fully-connected layer
 70 | model.fc4.dropout = 0.5
 71 | model._compile()
 72 | 
 73 | # Loading CK+ dataset
 74 | print('Loading Data')
 75 | #supervised_data_loader = SupervisedDataLoaderCrossVal(
 76 | #    '/data/Expr_Recog/CK+_condensed/npy_files/')
 77 | #train_data_container = supervised_data_loader.load('train', train_split)
 78 | #test_data_container = supervised_data_loader.load('test', train_split)
 79 | 
 80 | train_folds, val_fold, _ = data_fold_loader.load_fold_assignment(test_fold=test_split)
 81 | X_train, y_train = data_fold_loader.load_folds(data_paths.ck_plus_data_path, train_folds)
 82 | X_val, y_val = data_fold_loader.load_folds(data_paths.ck_plus_data_path, [val_fold])
 83 | X_test, y_test = data_fold_loader.load_folds(data_paths.ck_plus_data_path, [test_split])
 84 | 
 85 | train_mask = numpy.logical_and(y_train != 0, y_train != 2)
 86 | X_train = X_train[train_mask, :, :, :]
 87 | y_train = y_train[train_mask]
 88 | y_train = reindex_labels(y_train)
 89 | 
 90 | val_mask = numpy.logical_and(y_val != 0, y_val != 2)
 91 | X_val = X_val[val_mask, :, :, :]
 92 | y_val = y_val[val_mask]
 93 | y_val = reindex_labels(y_val)
 94 | 
 95 | print 'Data train: ', X_train.shape, y_train.shape
 96 | print 'Data val: ', X_val.shape, y_val.shape
 97 | 
 98 | #test_mask = numpy.logical_and(y_test != 0, y_test != 2)
 99 | #X_test = X_test[test_mask, :, :, :]
100 | #y_test = y_test[test_mask]
101 | #y_test = reindex_labels(y_test)
102 | 
103 | #print ''
104 | #print 'Train Data: ', X_train.shape, y_train.shape
105 | #print 'Val Data: ', X_val.shape, y_val.shape
106 | #print 'Test Data: ', X_test.shape, y_test.shape
107 | 
108 | if test_split == 8:
109 |     # Val dataset only has 60 images (< 64 images = batch_size)
110 |     X_val, y_val = add_padding(X_val, y_val)
111 | 
112 | X_train = numpy.float32(X_train)
113 | X_train /= 255.0
114 | X_train *= 2.0
115 | 
116 | X_val = numpy.float32(X_val)
117 | X_val /= 255.0
118 | X_val *= 2.0
119 | 
120 | train_dataset = supervised_dataset.SupervisedDataset(X_train, y_train)
121 | val_dataset = supervised_dataset.SupervisedDataset(X_val, y_val)
122 | train_iterator = train_dataset.iterator(
123 |     mode='random_uniform', batch_size=64, num_batches=31000)
124 | val_iterator = val_dataset.iterator(
125 |     mode='random_uniform', batch_size=64, num_batches=31000)
126 | 
127 | # Do data augmentation (crops, flips, rotations, scales, intensity)
128 | data_augmenter = util.DataAugmenter2(crop_shape=(96, 96),
129 |                                      flip=True, gray_on=True)
130 | normer = util.Normer3(filter_size=5, num_channels=1)
131 | module_list_train = [data_augmenter, normer]
132 | module_list_val = [normer]
133 | preprocessor_train = util.Preprocessor(module_list_train)
134 | preprocessor_val = util.Preprocessor(module_list_val)
135 | 
136 | print('Training Model')
137 | for x_batch, y_batch in train_iterator:
138 |     x_batch = preprocessor_train.run(x_batch)
139 |     monitor.start()
140 |     log_prob, accuracy = model.train(x_batch, y_batch)
141 |     monitor.stop(1-accuracy)
142 | 
143 |     if monitor.test:
144 |         monitor.start()
145 |         x_val_batch, y_val_batch = val_iterator.next()
146 |         x_val_batch = preprocessor_val.run(x_val_batch)
147 |         val_accuracy = model.eval(x_val_batch, y_val_batch)
148 |         monitor.stop_test(1-val_accuracy)
149 | 


--------------------------------------------------------------------------------
/ck_plus_six_class/ck_plus_checkpoint_checker.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import glob
  3 | import os
  4 | import sys
  5 | 
  6 | import numpy
  7 | 
  8 | from anna.datasets.supervised_data_loader import SupervisedDataContainer
  9 | from anna import util
 10 | 
 11 | import data_fold_loader
 12 | import data_paths
 13 | from model import SupervisedModel
 14 | 
 15 | 
 16 | def reindex_labels(y):
 17 |     label_mapping = {1: 0, 3: 1, 4: 2, 5: 3, 6: 4, 7: 5}
 18 |     for label in label_mapping.keys():
 19 |         y[y == label] = label_mapping[label]
 20 | 
 21 |     return y
 22 | 
 23 | 
 24 | def add_padding(X, y):
 25 |     padding_shape = (64-X.shape[0], X.shape[1],
 26 |                      X.shape[2], X.shape[3])
 27 |     data_padding = numpy.zeros(padding_shape, dtype=X.dtype)
 28 |     label_padding = numpy.zeros(padding_shape[0], dtype=y.dtype)
 29 |     X_pad = numpy.concatenate((X, data_padding), axis=0)
 30 |     y_pad = numpy.concatenate((y, label_padding), axis=0)
 31 |     return X_pad, y_pad
 32 | 
 33 | if __name__ == "__main__":
 34 |     parser = argparse.ArgumentParser(
 35 |         prog='ck_plus_checkpoint_checker',
 36 |         description='Script to select best performing checkpoint \
 37 |                     on CK+ (six classes).')
 38 |     parser.add_argument("-s", "--split", default='0',
 39 |                         help='Testing split of CK+ to use. (0-9)')
 40 |     parser.add_argument("checkpoint_dir",
 41 |                         help='Folder containing all .pkl checkpoint files.')
 42 |     args = parser.parse_args()
 43 | 
 44 |     checkpoint_dir = args.checkpoint_dir
 45 |     test_split = int(args.split)
 46 |     dataset_path = data_paths.ck_plus_data_path
 47 | 
 48 |     print 'Checkpoint directory: %s' % checkpoint_dir
 49 |     print 'Testing on split %d\n' % test_split
 50 | 
 51 |     # Load model
 52 |     model = SupervisedModel('evaluation', './')
 53 | 
 54 |     # Load dataset
 55 |     #supervised_data_loader = SupervisedDataLoaderCrossVal(dataset_path)
 56 |     #test_data_container = supervised_data_loader.load(mode='test', fold=fold)
 57 |     #test_data_container.X = numpy.float32(test_data_container.X)
 58 |     #test_data_container.X /= 255.0
 59 |     #test_data_container.X *= 2.0
 60 | 
 61 |     train_folds, val_fold, _ = data_fold_loader.load_fold_assignment(test_fold=test_split)
 62 |     X_val, y_val = data_fold_loader.load_folds(data_paths.ck_plus_data_path, [val_fold])
 63 |     X_test, y_test = data_fold_loader.load_folds(data_paths.ck_plus_data_path, [test_split])
 64 |     print 'Val Data: ', X_val.shape, y_val.shape
 65 |     print 'Test Data: ', X_test.shape, y_test.shape
 66 | 
 67 |     X_val = numpy.float32(X_val)
 68 |     X_val /= 255.0
 69 |     X_val *= 2.0
 70 | 
 71 |     X_test = numpy.float32(X_test)
 72 |     X_test /= 255.0
 73 |     X_test *= 2.0
 74 | 
 75 |     val_data_container = SupervisedDataContainer(X_val, y_val)
 76 |     test_data_container = SupervisedDataContainer(X_test, y_test)
 77 | 
 78 | 
 79 |     # Remove samples with neutral and contempt labels
 80 |     val_mask = numpy.logical_and(y_val != 0, y_val != 2)
 81 |     X_val = X_val[val_mask, :, :, :]
 82 |     y_val = y_val[val_mask]
 83 |     y_val = reindex_labels(y_val)
 84 |     num_val_samples = len(y_val)
 85 | 
 86 |     test_mask = numpy.logical_and(y_test != 0, y_test != 2)
 87 |     X_test = X_test[test_mask, :, :, :]
 88 |     y_test = y_test[test_mask]
 89 |     y_test = reindex_labels(y_test)
 90 |     num_test_samples = len(y_test)
 91 | 
 92 |     print 'Reduced Val Data: ', X_val.shape, y_val.shape
 93 |     print 'Reduced Test Data: ', X_test.shape, y_test.shape
 94 | 
 95 |     if test_split == 9:
 96 |         X_test, y_test = add_padding(X_test, y_test)
 97 |     elif test_split == 8:
 98 |         X_val, y_val = add_padding(X_val, y_val)
 99 | 
100 |     val_data_container = SupervisedDataContainer(X_val, y_val)
101 |     test_data_container = SupervisedDataContainer(X_test, y_test)
102 | 
103 |     # Construct evaluator
104 |     preprocessor = [util.Normer3(filter_size=5, num_channels=1)]
105 | 
106 |     checkpoint_file_list = sorted(
107 |         glob.glob(os.path.join(checkpoint_dir, '*.pkl')))
108 |     val_evaluator = util.Evaluator(model, val_data_container,
109 |                                    checkpoint_file_list[0], preprocessor)
110 |     test_evaluator = util.Evaluator(model, test_data_container,
111 |                                     checkpoint_file_list[0], preprocessor)
112 | 
113 |     # For each checkpoint, compute the overall val accuracy
114 |     val_accuracies = []
115 |     for checkpoint in checkpoint_file_list:
116 |         print 'Checkpoint: %s' % os.path.split(checkpoint)[1]
117 |         val_evaluator.set_checkpoint(checkpoint)
118 | 
119 |         if test_split != 8:
120 |             val_accuracy = val_evaluator.run()
121 |         else:
122 |             val_predictions = val_evaluator._get_predictions()
123 |             val_predictions = val_predictions[0:num_val_samples]
124 |             val_true_labels = val_data_container.y[0:num_val_samples]
125 | 
126 |             val_accuracy = 100.0 * (1.0 * numpy.sum(
127 |                 val_predictions == val_true_labels) / len(val_true_labels))
128 | 
129 |         print 'Val Accuracy: %f\n' % val_accuracy
130 |         val_accuracies.append(val_accuracy)
131 | 
132 |     # Find checkpoint that produced the highest accuracy
133 |     max_val_accuracy = numpy.max(val_accuracies)
134 |     max_index = numpy.argmax(val_accuracies)
135 |     max_checkpoint = checkpoint_file_list[max_index]
136 |     print 'Max Checkpoint: %s' % max_checkpoint
137 |     print 'Max Val Accuracy: %f' % max_val_accuracy
138 | 
139 |     # Compute test accuracy of chosen checkpoint	
140 |     test_evaluator.set_checkpoint(max_checkpoint)
141 | 
142 |     if test_split != 9:
143 |         test_accuracy = test_evaluator.run()
144 |     else:
145 |         test_predictions = test_evaluator._get_predictions()
146 |         test_predictions = test_predictions[0:num_test_samples]
147 |         test_true_labels = test_data_container.y[0:num_test_samples]
148 | 
149 |         test_accuracy = 100.0 * (1.0 * numpy.sum(
150 |             test_predictions == test_true_labels) / len(test_true_labels))
151 | 
152 |     print 'Test Accuracy: %f\n' % test_accuracy
153 | 
154 | 


--------------------------------------------------------------------------------
/data_scripts/make_ck_plus_dataset.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import glob
  3 | import os
  4 | import shutil
  5 | import sys
  6 | import time
  7 | 
  8 | import cv2
  9 | import numpy
 10 | import skimage.color
 11 | import skimage.io
 12 | import skimage.transform
 13 | 
 14 | 
 15 | class CKPlusCondenser(object):
 16 |     def __init__(self, original_dataset_path, condensed_dataset_path):
 17 |         if os.path.exists(condensed_dataset_path):
 18 |             print 'Condensed Dataset detected.'
 19 |             print 'Removing it.'
 20 |             shutil.rmtree(condensed_dataset_path)
 21 |         print 'Copying original dataset to new condensed dataset path.'
 22 |         shutil.copytree(original_dataset_path, condensed_dataset_path)
 23 | 
 24 |         self.image_path = os.path.join(condensed_dataset_path,
 25 |                                        'cohn-kanade-images')
 26 |         self.label_path = os.path.join(condensed_dataset_path,
 27 |                                        'Emotion_labels')
 28 | 
 29 |     def run(self):
 30 |         print '\nCondensing CK+ Dataset: '
 31 |         self.condense_dataset()
 32 |         print '\nCondensed CK+ Dataset Statistics: '
 33 |         self.compute_dataset_statistics()
 34 | 
 35 |     def condense_dataset(self):
 36 |         # Get list of folders with no label file
 37 |         no_label_list = self.find_empty_folders(self.label_path)
 38 |         print '%d empty sequences to be removed.' % len(no_label_list)
 39 | 
 40 |         # Remove image sequence if label folder exists but is empty
 41 |         print '\nRemoving image sequence folders that have no label.'
 42 |         self.remove_image_sequences(self.image_path,
 43 |                                     self.label_path,
 44 |                                     no_label_list)
 45 | 
 46 |         # Remove empty folders in label directory
 47 |         print '\nRemoving empty label folders.'
 48 |         self.remove_folders_in_list(no_label_list)
 49 | 
 50 |         # Keep only the first and last three images in each sequence
 51 |         print '\nKeeping only the first and ' \
 52 |             'last three images in each sequence.'
 53 |         self.reduce_all_image_sequences(self.image_path)
 54 | 
 55 |     def find_empty_folders(self, label_path):
 56 |         folder_list = []
 57 |         for dirpath, dirs, files in os.walk(label_path):
 58 |             if not dirs and not files:
 59 |                 folder_list.append(dirpath)
 60 |         return sorted(folder_list)
 61 | 
 62 |     def remove_image_sequences(self, image_path, label_path, no_label_list):
 63 |         mismatched_image_paths = self.find_image_label_mismatch(image_path,
 64 |                                                                 label_path)
 65 |         self.remove_folders_in_list(mismatched_image_paths)
 66 | 
 67 |         # Gather folder extensions that have no label file
 68 |         folder_extension_list = []
 69 |         for folder_path in no_label_list:
 70 |             path_split_list = folder_path.split(os.sep)
 71 |             folder_extension = os.path.join(path_split_list[-2],
 72 |                                             path_split_list[-1])
 73 |             folder_extension_list.append(folder_extension)
 74 | 
 75 |         # Prepend the image_path to get the image sequence location
 76 |         image_sequence_path_list = [os.path.join(image_path, ext) for ext
 77 |                                     in folder_extension_list]
 78 | 
 79 |         # Remove image sequences in list
 80 |         self.remove_folders_in_list(image_sequence_path_list)
 81 | 
 82 |     def find_image_label_mismatch(self, image_path, label_path):
 83 |         mismatched_image_paths = []
 84 |         image_subj_list = sorted(os.listdir(image_path))
 85 | 
 86 |         for subj in image_subj_list:
 87 |             seq_list = sorted(os.listdir(os.path.join(image_path, subj)))
 88 |             for seq in seq_list:
 89 |                 if seq == '.DS_Store':
 90 |                     os.remove(os.path.join(image_path, subj, seq))
 91 |                     continue
 92 | 
 93 |                 seq_label_path = os.path.join(label_path, subj, seq)
 94 |                 if not os.path.exists(seq_label_path):
 95 |                     seq_path = os.path.join(image_path, subj, seq)
 96 |                     mismatched_image_paths.append(seq_path)
 97 | 
 98 |         print 'There are %d mismatched files.' % len(mismatched_image_paths)
 99 |         return mismatched_image_paths
100 | 
101 |     def remove_folders_in_list(self, folder_list):
102 |         #
103 |         # Helper function to remove folders listed in folder_list
104 |         #
105 |         for i, folder_path in enumerate(folder_list):
106 |             # print '%d: Removing --- %s' % (i, folder_path)
107 |             if os.path.exists(folder_path):
108 |                 shutil.rmtree(folder_path)
109 |             else:
110 |                 print 'Folder %s does not exist' % (folder_path)
111 |             time.sleep(0.1)
112 | 
113 |             parent_path, ext = os.path.split(folder_path)
114 |             # Check if parent folder is empty
115 |             if os.listdir(parent_path) == []:
116 |                 # If so, remove it
117 |                 # print 'Parent dir %s is empty' % parent_path
118 |                 shutil.rmtree(parent_path)
119 |             elif os.listdir(parent_path) == ['.DS_Store']:
120 |                 # print '.DS_Store file is present. Removing...'
121 |                 os.remove(os.path.join(parent_path, '.DS_Store'))
122 |                 # print 'Now parent dir %s is empty' % parent_path
123 |                 shutil.rmtree(parent_path)
124 | 
125 |     def reduce_all_image_sequences(self, image_path):
126 |         subj_folder_list = sorted(os.listdir(image_path))
127 |         for subj_folder in subj_folder_list:
128 |             subj_path = os.path.join(image_path, subj_folder)
129 |             print 'Processing: ', subj_path
130 |             seq_folder_list = sorted(os.listdir(subj_path))
131 |             for seq_folder in seq_folder_list:
132 |                 seq_path = os.path.join(subj_path, seq_folder)
133 |                 if not os.path.isdir(seq_path):
134 |                     continue
135 | 
136 |                 self.reduce_single_sequence(seq_path)
137 | 
138 |     def reduce_single_sequence(self, path):
139 |         file_list = sorted(os.listdir(path))
140 |         for f in file_list:
141 |             if f == '.DS_Store':
142 |                 # print 'Found it!'
143 |                 os.remove(os.path.join(path, f))
144 | 
145 |         if len(file_list) < 4:
146 |             print 'Folder contains < 4 files. No reduction needed.'
147 |             return
148 | 
149 |         remove_list = file_list[1:-3]
150 |         for remove_file in remove_list:
151 |             # print 'Remove ', remove_file
152 |             os.remove(os.path.join(path, remove_file))
153 | 
154 |     def count_num_sequences(self, path):
155 |         subj_folder_list = sorted(os.listdir(path))
156 |         num_subj_total = len(subj_folder_list)
157 | 
158 |         num_seq_per_subj = []
159 |         num_files_per_subj = []
160 |         for folder in subj_folder_list:
161 |             seq_list = os.listdir(os.path.join(path, folder))
162 |             seq_list = [s for s in seq_list if s != '.DS_Store']
163 |             num_sequences = len(seq_list)
164 |             num_seq_per_subj.append(num_sequences)
165 |             for seq in seq_list:
166 |                 num_files = len(os.listdir(os.path.join(path, folder, seq)))
167 |                 num_files_per_subj.append(num_files)
168 | 
169 |         num_seq_total = numpy.sum(num_seq_per_subj)
170 |         return num_subj_total, num_seq_total
171 | 
172 |     def compute_dataset_statistics(self):
173 |         num_subjects, num_sequences = self.count_num_sequences(self.image_path)
174 |         print 'Total Number of Image Sequences: %d' % num_sequences
175 | 
176 |         _, num_label_sequences = self.count_num_sequences(self.label_path)
177 |         print 'Total Number of Label Sequences: %d' % num_label_sequences
178 | 
179 |         # Number of sequences that have corresponding labels in Emotion_labels
180 |         glob_label_path = os.path.join(self.label_path, '*/*/*.txt')
181 |         num_label_files = len(glob.glob(glob_label_path))
182 |         print 'Number of sequences with correponding label ' \
183 |             '.txt file: %d' % num_label_files
184 | 
185 |         print 'Total Number of Subjects: %d' % num_subjects
186 | 
187 |         glob_image_path = os.path.join(self.image_path, '*/*/*.png')
188 |         num_images_total = len(glob.glob(glob_image_path))
189 |         print 'Number of image files: %d' % num_images_total
190 | 
191 | 
192 | class CKPlusFaceCropper(object):
193 |     def __init__(self, input_path):
194 |         print '\nDetecting and Cropping Faces'
195 |         self.input_path = input_path
196 |         self.image_path = os.path.join(input_path, 'cohn-kanade-images')
197 | 
198 |     def run(self):
199 |         self.crop_and_align_all_faces(self.image_path)
200 | 
201 |     def write_list_to_file(self, file_path, item_list):
202 |         f = open(file_path, 'wb')
203 |         for item in item_list:
204 |             f.write(item+'\n')
205 |         f.close()
206 | 
207 |     def crop_and_align_all_faces(self, path):
208 |         output_img_size = (96, 96)
209 |         missed_faces = []
210 | 
211 |         all_image_paths = sorted(glob.glob(os.path.join(path, '*/*/*.png')))
212 |         # print all_image_paths[0:20]
213 | 
214 |         for image_file_path in all_image_paths:
215 |             # print 'Detecting Face: %s' % image_file_path
216 |             I, success_flag = self.process_single_image(
217 |                                                    image_file_path,
218 |                                                    output_img_size)
219 |             if not success_flag:
220 |                 missed_faces.append(image_file_path)
221 |             I = numpy.squeeze(I, axis=2)
222 |             skimage.io.imsave(os.path.join(image_file_path), I)
223 | 
224 |         print 'Missed Faces: ', sorted(missed_faces)
225 |         missed_faces_file_path = os.path.join(self.input_path,
226 |                                               'missed_faces.txt')
227 |         self.write_list_to_file(missed_faces_file_path, missed_faces)
228 | 
229 |     def process_single_image(self, image_file_path, output_img_size):
230 |             # Read in the image
231 |             I = skimage.io.imread(image_file_path)
232 | 
233 |             # If image was in color:
234 |             if len(I.shape) == 3:
235 |                 I = skimage.color.rgb2gray(I)
236 |                 I *= 255
237 |                 I = I.astype('uint8')
238 | 
239 |             if len(I.shape) != 3:
240 |                 I = I[:, :, numpy.newaxis]
241 | 
242 |             # Detect face and crop it out
243 |             I_crop, success_flag = self.detect_crop_face(I)
244 |             #print I_crop.dtype, I_crop.min(), I_crop.max()
245 | 
246 |             # If face was successfully detected.
247 |             # Align face in 96x96 image
248 |             if success_flag:
249 |                 I_out = I_crop
250 |                 I_out = numpy.uint8(skimage.transform.resize(I_out, (96, 96), preserve_range=True))
251 |                 #print I_out.dtype, I_out.min(), I_out.max()
252 |             else:
253 |                 I_out = I_crop
254 | 
255 |             return I_out, success_flag
256 | 
257 |     def detect_crop_face(self, I):
258 |         success_flag = False
259 |         face_detector = FaceDetector(scale_factor=1.3, min_neighbors=5,
260 |                                      min_size_scalar=0.5, max_size_scalar=0.8)
261 |         faces = face_detector.detect_faces(I)
262 | 
263 |         # If face was not detected:
264 |         if len(faces) == 0:
265 |             # Try with more lenient conditions
266 |             face_detector = FaceDetector(scale_factor=1.3,
267 |                                          min_neighbors=3,
268 |                                          min_size_scalar=0.5,
269 |                                          max_size_scalar=0.8)
270 |             faces = face_detector.detect_faces(I)
271 |             if len(faces) == 0:
272 |                 print 'Missed the face!'
273 |                 return I, success_flag
274 | 
275 |         success_flag = True
276 |         I_crop = face_detector.crop_face_out(I, faces[0])
277 |         return I_crop, success_flag
278 | 
279 | 
280 | class CKPlusNumpyFileGenerator(object):
281 |     def __init__(self, save_path):
282 |         self.save_path = os.path.join(save_path, 'npy_files')
283 |         if not os.path.exists(self.save_path):
284 |             os.makedirs(self.save_path)
285 | 
286 |         self.image_path = os.path.join(save_path, 'cohn-kanade-images')
287 |         self.label_path = os.path.join(save_path, 'Emotion_labels')
288 | 
289 |     def run(self):
290 |         print '\nSaving CK+ images and labels to .npy files.'
291 | 
292 |         # Get number of images
293 |         glob_image_path = os.path.join(self.image_path, '*/*/*.png')
294 |         num_samples = len(glob.glob(glob_image_path))
295 | 
296 |         X, y, subjs = self.make_data_label_mats(self.image_path,
297 |                                                 self.label_path,
298 |                                                 num_samples)
299 |         folds = self.make_folds(subjs)
300 | 
301 |         self.save_out_data(self.save_path, X, y, subjs, folds)
302 | 
303 |     def make_data_label_mats(self, all_images_path,
304 |                              all_labels_path, num_samples):
305 |         # Initialize the data of interest
306 |         image_shape = (96, 96, 1)
307 |         X = numpy.zeros((num_samples, image_shape[2],
308 |                          image_shape[0], image_shape[1]), dtype='uint8')
309 |         y = numpy.zeros((num_samples), dtype='int32')
310 |         all_subjs = numpy.zeros((num_samples), dtype='int32')
311 | 
312 |         total_sample_count = 0
313 |         subj_list = sorted(os.listdir(all_images_path))
314 | 
315 |         # For each subject folder:
316 |         for i, subj in enumerate(subj_list):
317 |             print 'Subject: %d - %s' % (i, subj)
318 | 
319 |             # For each individual sequence in the subject folder:
320 |             seq_path = os.path.join(all_images_path, subj)
321 |             seq_list = sorted(os.listdir(seq_path))
322 |             for j, seq in enumerate(seq_list):
323 |                 # Get the images of the sequence and the emotion label
324 |                 images = self.read_images(all_images_path, subj, seq,
325 |                                           image_shape)
326 |                 label = self.read_label(all_labels_path, subj, seq)
327 |                 label_vec = numpy.array([0, label, label, label])
328 | 
329 |                 index_slice = slice(total_sample_count,
330 |                                     total_sample_count+len(images))
331 |                 X[index_slice] = images
332 |                 y[index_slice] = label_vec
333 |                 all_subjs[index_slice] = i
334 |                 total_sample_count += len(images)
335 | 
336 |         return X, y, all_subjs
337 | 
338 |     def read_images(self, all_images_path, subj, seq, image_shape):
339 |         image_file_path = os.path.join(all_images_path, subj, seq)
340 |         image_files = sorted(os.listdir(image_file_path))
341 |         num_images = len(image_files)
342 | 
343 |         images = numpy.zeros((num_images, image_shape[2],
344 |                               image_shape[0], image_shape[1]))
345 |         for i, image_file in enumerate(image_files):
346 |             # print image_file
347 |             I = skimage.io.imread(os.path.join(image_file_path, image_file))
348 |             I = I[:, :, numpy.newaxis]
349 |             images[i, :, :, :] = I.transpose(2, 0, 1)
350 | 
351 |         return images
352 | 
353 |     def read_label(self, all_labels_path, subj, seq):
354 |         label_file_path = os.path.join(all_labels_path, subj, seq)
355 |         label_file = os.listdir(label_file_path)[0]
356 |         f = open(os.path.join(label_file_path, label_file))
357 |         label = f.read()
358 |         f.close()
359 |         # print label
360 |         label = int(float(label))
361 | 
362 |         return label
363 | 
364 |     def make_folds(self, subjs, num_folds=10):
365 |         print '\nMaking the folds.'
366 |         folds = numpy.zeros((subjs.shape), dtype='int32')
367 |         num_subj = len(numpy.unique(subjs))
368 | 
369 |         for i in range(num_folds):
370 |             subjs_in_fold = numpy.arange(i, num_subj, 10)
371 |             print 'Subjs in fold %d: %s' % (i, subjs_in_fold)
372 | 
373 |             indices = numpy.hstack(
374 |                 [numpy.where(subjs == j)[0] for j in subjs_in_fold])
375 |             folds[indices] = i
376 | 
377 |         print 'Number of samples/fold: %s' % numpy.histogram(folds,
378 |                                                              bins=10)[0]
379 | 
380 |         return folds
381 | 
382 |     def save_out_data(self, path, X, y, subjs, folds):
383 |         if not os.path.exists(path):
384 |             os.makedirs(path)
385 | 
386 |         numpy.save(os.path.join(path, 'X.npy'), X)
387 |         numpy.save(os.path.join(path, 'y.npy'), y)
388 |         numpy.save(os.path.join(path, 'subjs.npy'), subjs)
389 |         numpy.save(os.path.join(path, 'folds.npy'), folds)
390 | 
391 | 
392 | class FaceDetector(object):
393 |     def __init__(self, scale_factor=1.3, min_neighbors=5,
394 |                  min_size_scalar=0.25, max_size_scalar=0.75):
395 |         module_path = os.path.dirname(__file__)
396 |         classifier_path = os.path.join(module_path,
397 |                                        'haarcascade_frontalface_default.xml')
398 |         self.detector = cv2.CascadeClassifier(classifier_path)
399 |         if self.detector.empty():
400 |             raise Exception('Classifier xml file was not found.')
401 |         self.scale_factor = scale_factor
402 |         self.min_neighbors = min_neighbors
403 |         self.min_size_scalar = min_size_scalar
404 |         self.max_size_scalar = max_size_scalar
405 |         # print self.detector
406 | 
407 |     def detect_faces(self, I):
408 |         height, width, num_channels = I.shape
409 |         min_dim = numpy.min([height, width])
410 |         min_size = (int(min_dim*self.min_size_scalar),
411 |                     int(min_dim*self.min_size_scalar))
412 |         max_size = (int(min_dim*self.max_size_scalar),
413 |                     int(min_dim*self.max_size_scalar))
414 | 
415 |         faces = self.detector.detectMultiScale(I, self.scale_factor,
416 |                                                self.min_neighbors, 0,
417 |                                                min_size,
418 |                                                max_size)
419 |         return faces
420 | 
421 |     def crop_face_out(self, I, loc):
422 |         (x, y, w, h) = loc
423 |         I_crop = I[y:y+h, x:x+w, :]
424 |         return I_crop
425 | 
426 | if __name__ == "__main__":
427 |     parser = argparse.ArgumentParser(
428 |         prog='make_ck_plus_dataset',
429 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
430 |         description='Script to load, process, and split the Extended '
431 |                     'Cohn-Kanade (CK+) Dataset.')
432 |     parser.add_argument('-ip', '--input_path', dest='input_path',
433 |                         help='Path specifying location of downloaded '
434 |                              'CK+ files.')
435 |     parser.add_argument('-sp', '--save_path', dest='save_path',
436 |                         default='./CK_PLUS_HERE',
437 |                         help='Path specifying where to save \
438 |                               the pre-processed dataset and the \
439 |                               output (.npy) files.')
440 |     args = parser.parse_args()
441 | 
442 |     print('\n================================================================')
443 |     print('                Extended Cohn-Kanade Dataset Manager              ')
444 |     print('================================================================\n')
445 | 
446 |     input_path = args.input_path
447 |     save_path = args.save_path
448 | 
449 |     # Condense CK+ dataset
450 |     condenser = CKPlusCondenser(input_path, save_path)
451 |     condenser.run()
452 | 
453 |     # Detect and crop faces
454 |     face_cropper = CKPlusFaceCropper(save_path)
455 |     face_cropper.run()
456 | 
457 |     # Save out CK+ .npy files
458 |     numpy_file_generator = CKPlusNumpyFileGenerator(save_path)
459 |     numpy_file_generator.run()
460 | 
461 |     print '\nSuccessfully pre-processed the Extended Cohn-Kanade Dataset!'
462 | 


--------------------------------------------------------------------------------