├── .gitignore
├── COPYING
├── README.md
├── data
    ├── categories.txt
    ├── object_categories.txt
    ├── train.txt
    └── val.txt
├── evaluation
    ├── demo.val.pred.txt
    ├── demo_eval_cls.m
    └── eval_cls.m
├── model
    ├── matconvnet
    │   ├── README.m
    │   ├── categoryIDX.mat
    │   ├── img1.jpg
    │   ├── img2.jpg
    │   ├── run_miniplacesCNN.m
    │   └── sample_refNet_initial.m
    └── tensorflow
    │   ├── DataLoader.py
    │   ├── README.md
    │   ├── alexnet_bn_train.py
    │   ├── alexnet_train.py
    │   └── prepro_data.py
├── teaser.jpg
└── util
    ├── VOCreadxml.m
    └── VOCxml2struct.m


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | 
 3 | # data
 4 | *.tar.gz
 5 | data/images
 6 | data/objects
 7 | *.h5
 8 | 
 9 | # tf checkpoints
10 | checkpoint
11 | *.data-*
12 | *.index
13 | *.meta
14 | 


--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015 Places2
 2 | 
 3 | Permission is hereby granted, free of charge, to any person
 4 | obtaining a copy of this software and associated documentation
 5 | files (the "Software"), to deal in the Software without
 6 | restriction, including without limitation the rights to use,
 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the
 9 | Software is furnished to do so, subject to the following
10 | conditions:
11 | 
12 | The above copyright notice and this permission notice shall be
13 | included in all copies or substantial portions of the Software.
14 | 
15 | Neither name of copyright holders nor the names of its contributors
16 | may be used to endorse or promote products derived from this software
17 | without specific prior written permission.
18 | 
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
21 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
23 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
26 | OTHER DEALINGS IN THE SOFTWARE.
27 | 
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # MiniPlaces Challenge: Image Classification Benchmark for the Education of Deep Learning in Computer Vision
  2 | ![teaser](teaser.jpg)
  3 | 
  4 | ## Introduction
  5 | 
  6 | We introduce the image classification benchmark, MiniPlaces Challenge, as an educational tool for practioners of deep learning in computer vision. Through improving the baseline models and building new deep neural networks for image classification, participants in the challenge are expected to gain a deeper understanding of deep neural networks and gain hands-on experience tuning models.
  7 | 
  8 | The goal of this challenge is to identify the scene category depicted in a photograph. The data for this task comes from the [Places2 dataset](http://places2.csail.mit.edu/), which contains 10+ million images belonging to 400+ unique scene categories. The challenge data will be a subset of the full Places2 dataset, coming from 100 scene categories and consisting of 100,000 images for training, 10,000 images for validation, and 10,000 images for testing. The images will be resized to 128x128 to make the data more manageable. Furthermore, while the end goal is scene recognition, a subset of the data will contain object labels that might be helpful to build better models.
  9 | 
 10 | The MiniPlaces challenge was first initiated by [Bolei Zhou](http://people.csail.mit.edu/bzhou/) and [Aditya Khosla](http://people.csail.mit.edu/khosla/) in the Fall 2015 [6.819/6.869 Advances in Computer Vision course](http://6.869.csail.mit.edu/fa15/index.html) at MIT, and is further contributed by various following TAs for that course.
 11 | 
 12 | The following is the documentation of the MiniPlaces Challenge development kit.
 13 | ```
 14 | Table of contents:
 15 |   1. Overview of challenge dataset
 16 |   2. Challenge details
 17 |     2.1 Images and annotations
 18 |     2.2 Submission format
 19 |     2.3 Evaluation routines
 20 |   3. Baselines
 21 |   4. Reference
 22 | ```
 23 | 
 24 | ## 1. Overview of challenge dataset
 25 | 
 26 | The link for downloading the image data is [here](https://places.csail.mit.edu/miniplaces/data/data.tar.gz). [Backup link at Google Drive](https://drive.google.com/file/d/16GYHdSWS3iMYwMPv5FpeDZN2rH7PR0F2/view?usp=sharing)
 27 | 
 28 | There are three types of image data for this competition, all coming from the larger Places2 dataset: training data (TRAINING), validation data (VALIDATION), and test (TEST). There is no overlap in the three splits of data.  All three splits contain images belonging to 100 scene categories.
 29 | 
 30 | ```
 31 |                Number of images
 32 |    Dataset     TRAIN        VALIDATION   TEST
 33 |   ------------------------------------------------
 34 |    MiniPlaces  100,000      10,000       10,000
 35 | ```
 36 | 
 37 | Every image in the training, validation and test sets has a single image-level label specifying the presence of one object category.
 38 | 
 39 | Challenge database statistics is as follows:
 40 | 
 41 | ```
 42 |   Training: 	100,000 images, with 1000 images per category
 43 |   Validation:	10,000 images, with 100 images per category
 44 |   Test:		10,000 images, with 100 images per category
 45 | ```
 46 | 
 47 | The 3 sets of images (training, validation and test) are available as a single tar archive. All images are in JPEG format. For the challenge, images have been resized to 128x128 to make the data manageable for students.
 48 | 
 49 | ## 2. Challenge details
 50 | 
 51 | The 100 scene categories used in the challenge dataset are part of the larger [Places2 dataset](http://places2.csail.mit.edu). Mappings from class names to ids are available in [data/categories.txt](data/categories.txt), where each line contains the scene category name followed by its id (an integer between 0 and 99).
 52 | 
 53 | ### Training data
 54 | 
 55 | Each image belongs to a particular scene category. After untarring the above file, the directory structure should look similar to the following:
 56 | 
 57 | ```   
 58 | 	train/a/abbey/00000000.jpg
 59 |      	train/a/abbey/00000001.jpg
 60 | 	...
 61 | 	train/y/yard/00000999.jpg      
 62 | 	train/y/yard/00001000.jpg
 63 | ```
 64 | 
 65 | In general, each leaf folder contains one scene category. The complete list of training images and their ground truth scene category id labels is available in [data/train.txt](data/train.txt). All images are in JPEG format.
 66 | 
 67 | ### Validation data
 68 | 
 69 | There are a total of 10,000 validation images. They are named as:
 70 | 
 71 | 
 72 | ```      
 73 | 	val/00000001.jpg
 74 | 	val/00000002.jpg
 75 | 	...
 76 | 	val/00009999.jpg
 77 | 	val/00010000.jpg
 78 | ```
 79 | 
 80 | There are 100 validation images for each scene category.
 81 | 
 82 | The classification ground truth for the validation images is in [data/val.txt](data/val.txt), where each line contains one image filename and its corresponding scene category label (from 0 to 99).
 83 | 
 84 | ### Test data
 85 | 
 86 | There are a total of 10,000 test images. The test files are named as:
 87 | 
 88 | ```      
 89 | 	test/00000001.jpg
 90 | 	test/00000002.jpg
 91 | 	...
 92 | 	test/00009999.jpg
 93 | 	test/00010000.jpg
 94 | ```
 95 | 
 96 | There are 100 test images for each scene category. Their ground truth annotations will not be released.
 97 | 
 98 | ### Object annotations
 99 | 
100 | For a subset of the images (3502 train images, and 371 validation images), we provide annotations of the common objects contained within that image. You may use these annotations in any way you see fit to enhance the performance of your algorithm on the scene recognition task. You are not required to use them -- the goal here is to provide additional avenues for exploration. You will not be judged on the quality of your object recognition.
101 | 
102 | The specific object annotations available are the bounding boxes and polygons for 175 different object categories in the 3502 train and 371 validation images. The list of object categories is provided in [data/object_categories.txt](data/object_categories.txt)
103 | 
104 | The annotations are provided in the 'objects' folder in the compressed file containing the image data. The images that have these annotations will have corresponding xml files in the objects folder. For example, the image `images/train/a/abbey/00000001.jpg` will have its object annotations located at `objects/train/a/abbey/00000001.xml`. As mentioned above, only a subset of the images are annotated with objects, so not all image files will have object annotations.
105 | 
106 | To facilitate reading of object annotations, we provide the following MATLAB function in the development kit [util/VOCreadxml.m](util/VOCreadxml.m). This function will read the XML file and convert it to a MATLAB structure containing the following fields:
107 | 
108 | ```
109 | - filename: name of the file
110 | - folder: scene class name
111 | - class: scene category id
112 | - objects: a structure containing the bounding box information and polygon points delineating the objects. Either of these annotations may be helpful depending on how you intend to use them
113 | ```
114 | 
115 | ### Submission format
116 | 
117 | The submission of results on test data will consist of a text file with one line per image, in alphabetical order of the image file names, i.e. from test/00000001.jpg to test/00010000.jpg. Each line contains up to 5 detected scenes, sorted by confidence in descending order.
118 | 
119 | The format is as follows:
120 | ```
121 |    <filename> <label(1)> <label(2)> <label(3)> <label(4)> <label(5)>
122 | ```
123 | The predicted labels are the scene categories (integers between 0 and 99).  The number of labels per line must be exactly equal to 5, or it would lead to an error. The filename is the same as mentioned above, e.g., 'test/00000001.jpg' and so on.
124 | 
125 | An example submission file for the validation data is at [evaluation/demo.val.pred.txt](evaluation/demo.val.pred.txt)
126 | 
127 | ### Evaluation routines
128 | 
129 | The Matlab routine for evaluating the submission is [evaluation/eval_cls.m](evaluation/eval_cls.m)
130 | 
131 | To see an example of using the routines, start Matlab
132 | in the 'evaluation/' folder and type
133 |        demo_eval_cls;
134 | 
135 | and you will see something similar to the following output:
136 | 
137 | ```
138 | MINI PLACES SCENE CLASSIFICATION CHALLENGE
139 | pred_file: demo.val.pred.txt
140 | ground_truth_file: ../data/val.txt
141 |   guesses vs cls error
142 |     1.0000    0.9895
143 |     2.0000    0.9791
144 |     3.0000    0.9696
145 |     4.0000    0.9602
146 |     5.0000    0.9525
147 | ```
148 | 
149 | In this demo, we take the top i (i = 1...5) predictions (and ignore the rest) from your result file and plot the error as a function of the number of guesses.
150 | 
151 | Only the error with 5 guesses will be used to determine the winner. The demo.val.pred.txt used here is a synthetic result.
152 | 
153 | ## 3. Baseline models
154 | 
155 | * The baseline models trained in [MatConvNet](http://www.vlfeat.org/matconvnet/) are available in [model/matconvnet](model/matconvnet).
156 | * Training code for baseline AlexNet models in TensorFlow is available in [model/tensorflow](model/tensorflow).
157 | * PyTorch model (TODO).
158 | 
159 | ## 4. Reference 
160 | Link: [Places2 Database](http://places2.csail.mit.edu), [Places1 Database](http://places.csail.mit.edu)
161 | 
162 | Please cite the following [places journal paper](http://places2.csail.mit.edu/PAMI_places.pdf) if you use the data or pretrained CNN models.
163 | 
164 | ```
165 |  @article{zhou2017places,
166 |    title={Places: A 10 million Image Database for Scene Recognition},
167 |    author={Zhou, Bolei and Lapedriza, Agata and Khosla, Aditya and Oliva, Aude and Torralba, Antonio},
168 |    journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
169 |    year={2017},
170 |    publisher={IEEE}
171 |  }
172 | ```
173 | 
174 | Please contact Bolei Zhou (zhoubolei@gmail.com) if you have questions or comments MiniPlaces challenge. If you are a lecturer and in need for the ground-truth of the test set, please reach out to Bolei too. If you an MIT student taking 6.819/6.869: Advances in Computer Vision, please contact the [teaching staff](http://6.869.csail.mit.edu/) for any course-related problems.
175 | 


--------------------------------------------------------------------------------
/data/categories.txt:
--------------------------------------------------------------------------------
  1 | /a/abbey 0
  2 | /a/airport_terminal 1
  3 | /a/amphitheater 2
  4 | /a/amusement_park 3
  5 | /a/aquarium 4
  6 | /a/aqueduct 5
  7 | /a/art_gallery 6
  8 | /a/assembly_line 7
  9 | /a/auditorium 8
 10 | /b/badlands 9
 11 | /b/bakery/shop 10
 12 | /b/ballroom 11
 13 | /b/bamboo_forest 12
 14 | /b/banquet_hall 13
 15 | /b/bar 14
 16 | /b/baseball_field 15
 17 | /b/bathroom 16
 18 | /b/beauty_salon 17
 19 | /b/bedroom 18
 20 | /b/boat_deck 19
 21 | /b/bookstore 20
 22 | /b/botanical_garden 21
 23 | /b/bowling_alley 22
 24 | /b/boxing_ring 23
 25 | /b/bridge 24
 26 | /b/bus_interior 25
 27 | /b/butchers_shop 26
 28 | /c/campsite 27
 29 | /c/candy_store 28
 30 | /c/canyon 29
 31 | /c/cemetery 30
 32 | /c/chalet 31
 33 | /c/church/outdoor 32
 34 | /c/classroom 33
 35 | /c/clothing_store 34
 36 | /c/coast 35
 37 | /c/cockpit 36
 38 | /c/coffee_shop 37
 39 | /c/conference_room 38
 40 | /c/construction_site 39
 41 | /c/corn_field 40
 42 | /c/corridor 41
 43 | /c/courtyard 42
 44 | /d/dam 43
 45 | /d/desert/sand 44
 46 | /d/dining_room 45
 47 | /d/driveway 46
 48 | /f/fire_station 47
 49 | /f/food_court 48
 50 | /f/fountain 49
 51 | /g/gas_station 50
 52 | /g/golf_course 51
 53 | /h/harbor 52
 54 | /h/highway 53
 55 | /h/hospital_room 54
 56 | /h/hot_spring 55
 57 | /i/ice_skating_rink/outdoor 56
 58 | /i/iceberg 57
 59 | /k/kindergarden_classroom 58
 60 | /k/kitchen 59
 61 | /l/laundromat 60
 62 | /l/lighthouse 61
 63 | /l/living_room 62
 64 | /l/lobby 63
 65 | /l/locker_room 64
 66 | /m/market/outdoor 65
 67 | /m/martial_arts_gym 66
 68 | /m/monastery/outdoor 67
 69 | /m/mountain 68
 70 | /m/museum/indoor 69
 71 | /o/office 70
 72 | /p/palace 71
 73 | /p/parking_lot 72
 74 | /p/phone_booth 73
 75 | /p/playground 74
 76 | /r/racecourse 75
 77 | /r/railroad_track 76
 78 | /r/rainforest 77
 79 | /r/restaurant 78
 80 | /r/river 79
 81 | /r/rock_arch 80
 82 | /r/runway 81
 83 | /s/shed 82
 84 | /s/shower 83
 85 | /s/ski_slope 84
 86 | /s/skyscraper 85
 87 | /s/slum 86
 88 | /s/stadium/football 87
 89 | /s/stage/indoor 88
 90 | /s/staircase 89
 91 | /s/subway_station/platform 90
 92 | /s/supermarket 91
 93 | /s/swamp 92
 94 | /s/swimming_pool/outdoor 93
 95 | /t/temple/east_asia 94
 96 | /t/track/outdoor 95
 97 | /t/trench 96
 98 | /v/valley 97
 99 | /v/volcano 98
100 | /y/yard 99
101 | 


--------------------------------------------------------------------------------
/data/object_categories.txt:
--------------------------------------------------------------------------------
  1 | arcade_machine 0
  2 | armchair 1
  3 | awning 2
  4 | bag 3
  5 | balcony 4
  6 | ball 5
  7 | barrel 6
  8 | basket 7
  9 | beam 8
 10 | bed 9
 11 | bench 10
 12 | billiard_table 11
 13 | board 12
 14 | boat 13
 15 | book 14
 16 | bookcase 15
 17 | books 16
 18 | bottle 17
 19 | bottles 18
 20 | bowl 19
 21 | box 20
 22 | boxes 21
 23 | bucket 22
 24 | building 23
 25 | buildings 24
 26 | bulletin_board 25
 27 | bus 26
 28 | bushes 27
 29 | cabinet 28
 30 | cabinets 29
 31 | can 30
 32 | candle 31
 33 | car 32
 34 | cars 33
 35 | ceiling 34
 36 | ceiling_fan 35
 37 | ceiling_lamp 36
 38 | chair 37
 39 | chandelier 38
 40 | clock 39
 41 | clothes 40
 42 | coffee_table 41
 43 | column 42
 44 | counter 43
 45 | countertop 44
 46 | cup 45
 47 | curtain 46
 48 | cushion 47
 49 | deck_chair 48
 50 | desk 49
 51 | desk_lamp 50
 52 | dishwasher 51
 53 | door 52
 54 | door_frame 53
 55 | double_door 54
 56 | drawer 55
 57 | extractor_hood 56
 58 | faucet 57
 59 | fence 58
 60 | field 59
 61 | flag 60
 62 | floor 61
 63 | floor_lamp 62
 64 | flowers 63
 65 | fluorescent_tube 64
 66 | gate 65
 67 | glass 66
 68 | grass 67
 69 | grille 68
 70 | ground 69
 71 | handrail 70
 72 | hat 71
 73 | hedge 72
 74 | hill 73
 75 | house 74
 76 | jar 75
 77 | keyboard 76
 78 | land 77
 79 | magazine 78
 80 | magazines 79
 81 | microwave 80
 82 | mirror 81
 83 | mountain 82
 84 | mug 83
 85 | napkin 84
 86 | night_table 85
 87 | ottoman 86
 88 | outlet 87
 89 | oven 88
 90 | painting 89
 91 | palm_tree 90
 92 | pane 91
 93 | paper 92
 94 | path 93
 95 | people 94
 96 | person 95
 97 | person_sitting 96
 98 | person_standing 97
 99 | person_walking 98
100 | picture 99
101 | pillow 100
102 | pipe 101
103 | plant 102
104 | plant_pot 103
105 | plants 104
106 | plate 105
107 | pole 106
108 | poster 107
109 | pot 108
110 | purse 109
111 | railing 110
112 | refrigerator 111
113 | river_water 112
114 | road 113
115 | rock 114
116 | rocks 115
117 | rocky_mountain 116
118 | rug 117
119 | sand_beach 118
120 | sconce 119
121 | screen 120
122 | sculpture 121
123 | sea_water 122
124 | seat 123
125 | seats 124
126 | shelf 125
127 | shelves 126
128 | shoe 127
129 | shoes 128
130 | shop_window 129
131 | showcase 130
132 | shutter 131
133 | side_table 132
134 | sidewalk 133
135 | sign 134
136 | sink 135
137 | sky 136
138 | skyscraper 137
139 | sneaker 138
140 | snowy_mountain 139
141 | sofa 140
142 | spotlight 141
143 | staircase 142
144 | stand 143
145 | statue 144
146 | step 145
147 | steps 146
148 | stone 147
149 | stones 148
150 | stool 149
151 | stove 150
152 | streetlight 151
153 | switch 152
154 | swivel_chair 153
155 | table 154
156 | telephone 155
157 | television 156
158 | text 157
159 | toilet 158
160 | towel 159
161 | toy 160
162 | tray 161
163 | tree 162
164 | tree_trunk 163
165 | trees 164
166 | truck 165
167 | umbrella 166
168 | van 167
169 | vase 168
170 | wall 169
171 | wardrobe 170
172 | washbasin 171
173 | water 172
174 | window 173
175 | worktop 174
176 | 


--------------------------------------------------------------------------------
/evaluation/demo_eval_cls.m:
--------------------------------------------------------------------------------
 1 | %this script demos the usage of evaluation routines
 2 | % the result file 'demo.val.pred.txt' on validation data is evaluated
 3 | % against the ground truth
 4 | 
 5 | fprintf('MINI PLACES SCENE CLASSIFICATION CHALLENGE\n');
 6 | 
 7 | pred_file='demo.val.pred.txt';
 8 | ground_truth_file='../data/val.txt';
 9 | num_predictions_per_image=5;
10 | 
11 | fprintf('pred_file: %s\n', pred_file);
12 | fprintf('ground_truth_file: %s\n', ground_truth_file);
13 | 
14 | error_cls = eval_cls(pred_file,ground_truth_file,1:num_predictions_per_image);
15 | 
16 | disp('# guesses vs cls error');
17 | disp([(1:num_predictions_per_image)',error_cls']);
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/evaluation/eval_cls.m:
--------------------------------------------------------------------------------
 1 | function [cls_error] = eval_cls(predict_file, gtruth_file, num_pred_per_image)
 2 | 
 3 | preds = cell(1, 5);
 4 | [predlist, preds{1}, preds{2}, preds{3}, preds{4}, preds{5}] = textread(predict_file, '%s %d %d %d %d %d');
 5 | preds = cell2mat(preds);
 6 | 
 7 | [filelist, labels] = textread(gtruth_file, '%s %d');
 8 | 
 9 | assert(length(filelist)==length(predlist), 'number of predictions does not match number of ground truth');
10 | 
11 | filemap = containers.Map(predlist, 1:length(predlist));
12 | pred_idx = cell2mat(filemap.values(filelist));
13 | 
14 | assert(length(pred_idx)==length(unique(pred_idx)), 'predictions are not unique');
15 | 
16 | preds = preds(pred_idx, :);
17 | 
18 | for i=1:length(num_pred_per_image)
19 |   n = num_pred_per_image(i);
20 |   cls_error(i) = 1 - mean(any(preds(:, 1:n)==repmat(labels, [1 n]), 2));
21 | end
22 | 


--------------------------------------------------------------------------------
/model/matconvnet/README.m:
--------------------------------------------------------------------------------
 1 | % Here we provide the reference network (refNet1) trained on mini-Places.
 2 | % you could check out run_miniplacesCNN.m to see how the network is loaded
 3 | % and used to predict the scene category of some image. Please intall the
 4 | % MatConvnet properly first, following this instruction 
 5 | % http://www.vlfeat.org/matconvnet/quick/
 6 | 
 7 | % refNet1 has top1 accuracy as 0.355 and top5 accuracy as 0.649 on the
 8 | % validation set of mini-Places.
 9 | 
10 | % We also provide the sample code sample_refNet_initial.m to show how the
11 | % refNet1 is initialized before training. You could adapt it to your model
12 | % training.
13 | 
14 | % I highly recommend you to go through the two examples of training model on mnist
15 | % and cifar included in the MatConvnet before you train your own network on
16 | % miniplaces.
17 | 
18 | % Bolei Zhou, Sep.30, 2015.


--------------------------------------------------------------------------------
/model/matconvnet/categoryIDX.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSAILVision/miniplaces/b180bffabd727428fe798cbfe01bf19a2e9b19e2/model/matconvnet/categoryIDX.mat


--------------------------------------------------------------------------------
/model/matconvnet/img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSAILVision/miniplaces/b180bffabd727428fe798cbfe01bf19a2e9b19e2/model/matconvnet/img1.jpg


--------------------------------------------------------------------------------
/model/matconvnet/img2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSAILVision/miniplaces/b180bffabd727428fe798cbfe01bf19a2e9b19e2/model/matconvnet/img2.jpg


--------------------------------------------------------------------------------
/model/matconvnet/run_miniplacesCNN.m:
--------------------------------------------------------------------------------
 1 | % this baseline model is implemented in matconvnet, 
 2 | % please install MatConvNet first at http://www.vlfeat.org/matconvnet/
 3 | 
 4 | % load pre-trained model
 5 | 
 6 | load('categoryIDX.mat');
 7 | path_model = 'refNet1-epoch-60.mat';
 8 | if ~exist(path_model)
 9 |     system(['wget miniplaces.csail.mit.edu/model/' path_model])
10 | end
11 | load([path_model]) ;
12 | 
13 | % load and preprocess an image
14 | im = imread('img2.jpg') ;
15 | im_resize = imresize(im, net.normalization.imageSize(1:2)) ;
16 | im_ = single(im_resize) ; 
17 | for i=1:3
18 |     im_(:,:,i) = im_(:,:,i)-net.normalization.averageImage(i);
19 | end
20 | 
21 | % change the last layer of CNN from softmaxloss to softmax
22 | net.layers{1,end}.type = 'softmax';
23 | net.layers{1,end}.name = 'prob';
24 | 
25 | % run the CNN
26 | res = vl_simplenn(net, im_) ;
27 | 
28 | scores = squeeze(gather(res(end).x)) ;
29 | [score_sort, idx_sort] = sort(scores,'descend') ;
30 | figure, imagesc(im_resize) ;
31 | for i=1:5
32 |     disp(sprintf('%s (%d), score %.3f', categoryIDX{idx_sort(i),1}, idx_sort(i), score_sort(i)));
33 | end
34 | 


--------------------------------------------------------------------------------
/model/matconvnet/sample_refNet_initial.m:
--------------------------------------------------------------------------------
  1 | function [net] = sample_refNet_initial(varargin)
  2 | % sample code for initializing the refNet1 for mini-places challenge
  3 | % adapted from matconvnet-1.0-beta14/matconvnet-1.0-beta14/examples/cnn_imagenet_init.m
  4 | 
  5 | opts.scale = 1 ;
  6 | opts.initBias = 0.1 ;
  7 | opts.weightDecay = 1 ;
  8 | opts.weightInitMethod = 'gaussian' ;
  9 | opts.model = 'refNet1' ;
 10 | opts.batchNormalization = false ;
 11 | opts = vl_argparse(opts, varargin) ;
 12 | 
 13 | % Define layers
 14 | net.normalization.imageSize = [126, 126, 3] ;
 15 | switch opts.model
 16 |   case 'refNet1'
 17 |       net = refNet1(net, opts) ;
 18 |   otherwise
 19 |     error('Unknown model ''%s''', opts.model) ;
 20 | end
 21 | 
 22 | 
 23 | switch lower(opts.weightInitMethod)
 24 |   case {'xavier', 'xavierimproved'}
 25 |     net.layers{end}.weights{1} = net.layers{end}.weights{1} / 10 ;
 26 | end
 27 | net.layers{end+1} = struct('type', 'softmaxloss', 'name', 'loss') ;
 28 | 
 29 | net.normalization.border = 128 - net.normalization.imageSize(1:2) ;
 30 | net.normalization.interpolation = 'bicubic' ;
 31 | net.normalization.averageImage = [] ;
 32 | net.normalization.keepAspect = true ;
 33 |  
 34 | 
 35 | 
 36 | end
 37 | 
 38 | 
 39 | % --------------------------------------------------------------------
 40 | function net = add_block(net, opts, id, h, w, in, out, stride, pad, init_bias)
 41 | % --------------------------------------------------------------------
 42 | info = vl_simplenn_display(net) ;
 43 | fc = (h == info.dataSize(1,end) && w == info.dataSize(2,end)) ;
 44 | if fc
 45 |   name = 'fc' ;
 46 | else
 47 |   name = 'conv' ;
 48 | end
 49 | net.layers{end+1} = struct('type', 'conv', 'name', sprintf('%s%s', name, id), ...
 50 |                            'weights', {{init_weight(opts, h, w, in, out, 'single'), zeros(out, 1, 'single')}}, ...
 51 |                            'stride', stride, ...
 52 |                            'pad', pad, ...
 53 |                            'learningRate', [1 2], ...
 54 |                            'weightDecay', [opts.weightDecay 0]) ;
 55 | if opts.batchNormalization
 56 |   net.layers{end+1} = struct('type', 'bnorm', 'name', sprintf('bn%d',id), ...
 57 |                              'weights', {{ones(out, 1, 'single'), zeros(out, 1, 'single')}}, ...
 58 |                              'learningRate', [2 1], ...
 59 |                              'weightDecay', [0 0]) ;
 60 | end
 61 | net.layers{end+1} = struct('type', 'relu', 'name', sprintf('relu%s',id)) ;
 62 | end
 63 | % -------------------------------------------------------------------------
 64 | function weights = init_weight(opts, h, w, in, out, type)
 65 | % -------------------------------------------------------------------------
 66 | % See K. He, X. Zhang, S. Ren, and J. Sun. Delving deep into
 67 | % rectifiers: Surpassing human-level performance on imagenet
 68 | % classification. CoRR, (arXiv:1502.01852v1), 2015.
 69 | 
 70 | switch lower(opts.weightInitMethod)
 71 |   case 'gaussian'
 72 |     sc = 0.01/opts.scale ;
 73 |     weights = randn(h, w, in, out, type)*sc;
 74 |   case 'xavier'
 75 |     sc = sqrt(3/(h*w*in)) ;
 76 |     weights = (rand(h, w, in, out, type)*2 - 1)*sc ;
 77 |   case 'xavierimproved'
 78 |     sc = sqrt(2/(h*w*out)) ;
 79 |     weights = randn(h, w, in, out, type)*sc ;
 80 |   otherwise
 81 |     error('Unknown weight initialization method''%s''', opts.weightInitMethod) ;
 82 | end
 83 | 
 84 | end
 85 | % --------------------------------------------------------------------
 86 | function net = add_norm(net, opts, id)
 87 | % --------------------------------------------------------------------
 88 | if ~opts.batchNormalization
 89 |   net.layers{end+1} = struct('type', 'normalize', ...
 90 |                              'name', sprintf('norm%s', id), ...
 91 |                              'param', [5 1 0.0001/5 0.75]) ;
 92 | end
 93 | end
 94 | % --------------------------------------------------------------------
 95 | function net = add_dropout(net, opts, id)
 96 | % --------------------------------------------------------------------
 97 | if ~opts.batchNormalization
 98 |   net.layers{end+1} = struct('type', 'dropout', ...
 99 |                              'name', sprintf('dropout%s', id), ...
100 |                              'rate', 0.5) ;
101 | end
102 | 
103 | end
104 | % --------------------------------------------------------------------
105 | function net = refNet1(net, opts)
106 | % 3 convnet + 1 FC + 1 softmax
107 | % --------------------------------------------------------------------
108 | %% add_block(net, opts, id, h, w, in, out, stride, pad, init_bias)
109 | 
110 | net.layers = {} ;
111 | 
112 | net = add_block(net, opts, '1', 8, 8, 3, 64, 2, 0) ;
113 | net = add_norm(net, opts, '1') ;
114 | net.layers{end+1} = struct('type', 'pool', 'name', 'pool1', ...
115 |                            'method', 'max', ...
116 |                            'pool', [3 3], ...
117 |                            'stride', 2, ...
118 |                            'pad', 0) ;
119 | 
120 | 
121 | net = add_block(net, opts, '2', 5, 5, 32, 96, 1, 2) ;
122 | net = add_norm(net, opts, '2') ;
123 | net.layers{end+1} = struct('type', 'pool', 'name', 'pool2', ...
124 |                            'method', 'max', ...
125 |                            'pool', [3 3], ...
126 |                            'stride', 2, ...
127 |                            'pad', 0) ;
128 | 
129 | net = add_block(net, opts, '3', 3, 3, 96, 128, 1, 1) ;
130 | net.layers{end+1} = struct('type', 'pool', 'name', 'pool5', ...
131 |                            'method', 'max', ...
132 |                            'pool', [3 3], ...
133 |                            'stride', 2, ...
134 |                            'pad', 0) ;
135 | 
136 | net = add_block(net, opts, '4', 6, 6, 128, 512, 1, 0) ;
137 | net = add_dropout(net, opts, '4') ;
138 | 
139 | net = add_block(net, opts, '5', 1, 1, 512, 100, 1, 0) ;
140 | net.layers(end) = [] ;
141 | if opts.batchNormalization, net.layers(end) = [] ; end
142 | end
143 | 
144 | 


--------------------------------------------------------------------------------
/model/tensorflow/DataLoader.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import scipy.misc
  4 | import h5py
  5 | np.random.seed(123)
  6 | 
  7 | # loading data from .h5
  8 | class DataLoaderH5(object):
  9 |     def __init__(self, **kwargs):
 10 |         self.load_size = int(kwargs['load_size'])
 11 |         self.fine_size = int(kwargs['fine_size'])
 12 |         self.data_mean = np.array(kwargs['data_mean'])
 13 |         self.randomize = kwargs['randomize']
 14 | 
 15 |         # read data info from lists
 16 |         f = h5py.File(kwargs['data_h5'], "r")
 17 |         self.im_set = np.array(f['images'])
 18 |         self.lab_set = np.array(f['labels'])
 19 | 
 20 |         self.num = self.im_set.shape[0]
 21 |         assert self.im_set.shape[0]==self.lab_set.shape[0], '#images and #labels do not match!'
 22 |         assert self.im_set.shape[1]==self.load_size, 'Image size error!'
 23 |         assert self.im_set.shape[2]==self.load_size, 'Image size error!'
 24 |         print('# Images found:', self.num)
 25 | 
 26 |         self.shuffle()
 27 |         self._idx = 0
 28 |         
 29 |     def next_batch(self, batch_size):
 30 |         labels_batch = np.zeros(batch_size)
 31 |         images_batch = np.zeros((batch_size, self.fine_size, self.fine_size, 3)) 
 32 |         
 33 |         for i in range(batch_size):
 34 |             image = self.im_set[self._idx]
 35 |             image = image.astype(np.float32)/255. - self.data_mean
 36 |             if self.randomize:
 37 |                 flip = np.random.random_integers(0, 1)
 38 |                 if flip>0:
 39 |                     image = image[:,::-1,:]
 40 |                 offset_h = np.random.random_integers(0, self.load_size-self.fine_size)
 41 |                 offset_w = np.random.random_integers(0, self.load_size-self.fine_size)
 42 |             else:
 43 |                 offset_h = (self.load_size-self.fine_size)//2
 44 |                 offset_w = (self.load_size-self.fine_size)//2
 45 | 
 46 |             images_batch[i, ...] = image[offset_h:offset_h+self.fine_size, offset_w:offset_w+self.fine_size, :]
 47 |             labels_batch[i, ...] = self.lab_set[self._idx]
 48 |             
 49 |             self._idx += 1
 50 |             if self._idx == self.num:
 51 |                 self._idx = 0
 52 |                 if self.randomize:
 53 |                     self.shuffle()
 54 |         
 55 |         return images_batch, labels_batch
 56 |     
 57 |     def size(self):
 58 |         return self.num
 59 | 
 60 |     def reset(self):
 61 |         self._idx = 0
 62 | 
 63 |     def shuffle(self):
 64 |         perm = np.random.permutation(self.num)
 65 |         self.im_set = self.im_set[perm] 
 66 |         self.lab_set = self.lab_set[perm]
 67 | 
 68 | # Loading data from disk
 69 | class DataLoaderDisk(object):
 70 |     def __init__(self, **kwargs):
 71 | 
 72 |         self.load_size = int(kwargs['load_size'])
 73 |         self.fine_size = int(kwargs['fine_size'])
 74 |         self.data_mean = np.array(kwargs['data_mean'])
 75 |         self.randomize = kwargs['randomize']
 76 |         self.data_root = os.path.join(kwargs['data_root'])
 77 | 
 78 |         # read data info from lists
 79 |         self.list_im = []
 80 |         self.list_lab = []
 81 |         with open(kwargs['data_list'], 'r') as f:
 82 |             for line in f:
 83 |                 path, lab =line.rstrip().split(' ')
 84 |                 self.list_im.append(os.path.join(self.data_root, path))
 85 |                 self.list_lab.append(int(lab))
 86 |         self.list_im = np.array(self.list_im, np.object)
 87 |         self.list_lab = np.array(self.list_lab, np.int64)
 88 |         self.num = self.list_im.shape[0]
 89 |         print('# Images found:', self.num)
 90 | 
 91 |         # permutation
 92 |         perm = np.random.permutation(self.num) 
 93 |         self.list_im[:, ...] = self.list_im[perm, ...]
 94 |         self.list_lab[:] = self.list_lab[perm, ...]
 95 | 
 96 |         self._idx = 0
 97 |         
 98 |     def next_batch(self, batch_size):
 99 |         images_batch = np.zeros((batch_size, self.fine_size, self.fine_size, 3)) 
100 |         labels_batch = np.zeros(batch_size)
101 |         for i in range(batch_size):
102 |             image = scipy.misc.imread(self.list_im[self._idx])
103 |             image = scipy.misc.imresize(image, (self.load_size, self.load_size))
104 |             image = image.astype(np.float32)/255.
105 |             image = image - self.data_mean
106 |             if self.randomize:
107 |                 flip = np.random.random_integers(0, 1)
108 |                 if flip>0:
109 |                     image = image[:,::-1,:]
110 |                 offset_h = np.random.random_integers(0, self.load_size-self.fine_size)
111 |                 offset_w = np.random.random_integers(0, self.load_size-self.fine_size)
112 |             else:
113 |                 offset_h = (self.load_size-self.fine_size)//2
114 |                 offset_w = (self.load_size-self.fine_size)//2
115 | 
116 |             images_batch[i, ...] =  image[offset_h:offset_h+self.fine_size, offset_w:offset_w+self.fine_size, :]
117 |             labels_batch[i, ...] = self.list_lab[self._idx]
118 |             
119 |             self._idx += 1
120 |             if self._idx == self.num:
121 |                 self._idx = 0
122 |         
123 |         return images_batch, labels_batch
124 |     
125 |     def size(self):
126 |         return self.num
127 | 
128 |     def reset(self):
129 |         self._idx = 0
130 | 


--------------------------------------------------------------------------------
/model/tensorflow/README.md:
--------------------------------------------------------------------------------
 1 | # Sample training code for miniplaces challenge
 2 | 
 3 | ## Setup
 4 | 
 5 | Please download the [MiniPlaces dataset](http://miniplaces.csail.mit.edu/data/data.tar.gz) and untar the data.
 6 | 
 7 |     tar -xvf data.tar.gz
 8 | 
 9 | You will need to modify the data paths accordingly in `alexnet_train.py` and `alexnet_bn_train.py`.
10 | 
11 | Then install the following `pip` dependencies:
12 | 
13 |     pip install h5py
14 |     pip install pillow
15 |     pip install scipy
16 | 
17 | ## Getting started
18 | 
19 | To run AlexNet training script:
20 | 
21 |     python alexnet_train.py
22 | 
23 | To run AlexNet with batch normalization:
24 | 
25 |     python alexnet_bn_train.py
26 | 
27 | ## Advanced
28 | 
29 | For faster data loading when training on cluster, preprocess data into .h5 and uncomment the lines in the code to use the h5 data loader instead of the disk data loader. You will need to modify the data paths accordingly in `prepro_data.py`.
30 | 
31 |     python prepro_data.py
32 | 
33 | ## Acknowledgement
34 | Thanks to [Hang Zhao](http://www.mit.edu/~hangzhao/) for developing this baseline training code.
35 | 


--------------------------------------------------------------------------------
/model/tensorflow/alexnet_bn_train.py:
--------------------------------------------------------------------------------
  1 | import os, datetime
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | from tensorflow.contrib.layers.python.layers import batch_norm
  5 | from DataLoader import *
  6 | 
  7 | # Dataset Parameters
  8 | batch_size = 256
  9 | load_size = 256
 10 | fine_size = 224
 11 | c = 3
 12 | data_mean = np.asarray([0.45834960097,0.44674252445,0.41352266842])
 13 | 
 14 | # Training Parameters
 15 | learning_rate = 0.001
 16 | dropout = 0.5 # Dropout, probability to keep units
 17 | training_iters = 50000
 18 | step_display = 50
 19 | step_save = 10000
 20 | path_save = 'alexnet_bn'
 21 | start_from = ''
 22 | 
 23 | def batch_norm_layer(x, train_phase, scope_bn):
 24 |     return batch_norm(x, decay=0.9, center=True, scale=True,
 25 |     updates_collections=None,
 26 |     is_training=train_phase,
 27 |     reuse=None,
 28 |     trainable=True,
 29 |     scope=scope_bn)
 30 |     
 31 | def alexnet(x, keep_dropout, train_phase):
 32 |     weights = {
 33 |         'wc1': tf.Variable(tf.random_normal([11, 11, 3, 96], stddev=np.sqrt(2./(11*11*3)))),
 34 |         'wc2': tf.Variable(tf.random_normal([5, 5, 96, 256], stddev=np.sqrt(2./(5*5*96)))),
 35 |         'wc3': tf.Variable(tf.random_normal([3, 3, 256, 384], stddev=np.sqrt(2./(3*3*256)))),
 36 |         'wc4': tf.Variable(tf.random_normal([3, 3, 384, 256], stddev=np.sqrt(2./(3*3*384)))),
 37 |         'wc5': tf.Variable(tf.random_normal([3, 3, 256, 256], stddev=np.sqrt(2./(3*3*256)))),
 38 | 
 39 |         'wf6': tf.Variable(tf.random_normal([7*7*256, 4096], stddev=np.sqrt(2./(7*7*256)))),
 40 |         'wf7': tf.Variable(tf.random_normal([4096, 4096], stddev=np.sqrt(2./4096))),
 41 |         'wo': tf.Variable(tf.random_normal([4096, 100], stddev=np.sqrt(2./4096)))
 42 |     }
 43 | 
 44 |     biases = {
 45 |         'bo': tf.Variable(tf.ones(100))
 46 |     }
 47 | 
 48 |     # Conv + ReLU + Pool, 224->55->27
 49 |     conv1 = tf.nn.conv2d(x, weights['wc1'], strides=[1, 4, 4, 1], padding='SAME')
 50 |     conv1 = batch_norm_layer(conv1, train_phase, 'bn1')
 51 |     conv1 = tf.nn.relu(conv1)
 52 |     pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
 53 | 
 54 |     # Conv + ReLU  + Pool, 27-> 13
 55 |     conv2 = tf.nn.conv2d(pool1, weights['wc2'], strides=[1, 1, 1, 1], padding='SAME')
 56 |     conv2 = batch_norm_layer(conv2, train_phase, 'bn2')
 57 |     conv2 = tf.nn.relu(conv2)
 58 |     pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
 59 | 
 60 |     # Conv + ReLU, 13-> 13
 61 |     conv3 = tf.nn.conv2d(pool2, weights['wc3'], strides=[1, 1, 1, 1], padding='SAME')
 62 |     conv3 = batch_norm_layer(conv3, train_phase, 'bn3')
 63 |     conv3 = tf.nn.relu(conv3)
 64 | 
 65 |     # Conv + ReLU, 13-> 13
 66 |     conv4 = tf.nn.conv2d(conv3, weights['wc4'], strides=[1, 1, 1, 1], padding='SAME')
 67 |     conv4 = batch_norm_layer(conv4, train_phase, 'bn4')
 68 |     conv4 = tf.nn.relu(conv4)
 69 | 
 70 |     # Conv + ReLU + Pool, 13->6
 71 |     conv5 = tf.nn.conv2d(conv4, weights['wc5'], strides=[1, 1, 1, 1], padding='SAME')
 72 |     conv5 = batch_norm_layer(conv5, train_phase, 'bn5')
 73 |     conv5 = tf.nn.relu(conv5)
 74 |     pool5 = tf.nn.max_pool(conv5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
 75 | 
 76 |     # FC + ReLU + Dropout
 77 |     fc6 = tf.reshape(pool5, [-1, weights['wf6'].get_shape().as_list()[0]])
 78 |     fc6 = tf.matmul(fc6, weights['wf6'])
 79 |     fc6 = batch_norm_layer(fc6, train_phase, 'bn6')
 80 |     fc6 = tf.nn.relu(fc6)
 81 |     fc6 = tf.nn.dropout(fc6, keep_dropout)
 82 |     
 83 |     # FC + ReLU + Dropout
 84 |     fc7 = tf.matmul(fc6, weights['wf7'])
 85 |     fc7 = batch_norm_layer(fc7, train_phase, 'bn7')
 86 |     fc7 = tf.nn.relu(fc7)
 87 |     fc7 = tf.nn.dropout(fc7, keep_dropout)
 88 | 
 89 |     # Output FC
 90 |     out = tf.add(tf.matmul(fc7, weights['wo']), biases['bo'])
 91 |     
 92 |     return out
 93 | 
 94 | # Construct dataloader
 95 | opt_data_train = {
 96 |     #'data_h5': 'miniplaces_256_train.h5',
 97 |     'data_root': '../../data/images/',   # MODIFY PATH ACCORDINGLY
 98 |     'data_list': '../../data/train.txt', # MODIFY PATH ACCORDINGLY
 99 |     'load_size': load_size,
100 |     'fine_size': fine_size,
101 |     'data_mean': data_mean,
102 |     'randomize': True
103 |     }
104 | opt_data_val = {
105 |     #'data_h5': 'miniplaces_256_val.h5',
106 |     'data_root': '../../data/images/',   # MODIFY PATH ACCORDINGLY
107 |     'data_list': '../../data/val.txt',   # MODIFY PATH ACCORDINGLY
108 |     'load_size': load_size,
109 |     'fine_size': fine_size,
110 |     'data_mean': data_mean,
111 |     'randomize': False
112 |     }
113 | 
114 | loader_train = DataLoaderDisk(**opt_data_train)
115 | loader_val = DataLoaderDisk(**opt_data_val)
116 | #loader_train = DataLoaderH5(**opt_data_train)
117 | #loader_val = DataLoaderH5(**opt_data_val)
118 | 
119 | # tf Graph input
120 | x = tf.placeholder(tf.float32, [None, fine_size, fine_size, c])
121 | y = tf.placeholder(tf.int64, None)
122 | keep_dropout = tf.placeholder(tf.float32)
123 | train_phase = tf.placeholder(tf.bool)
124 | 
125 | # Construct model
126 | logits = alexnet(x, keep_dropout, train_phase)
127 | 
128 | # Define loss and optimizer
129 | loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits))
130 | train_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
131 | 
132 | # Evaluate model
133 | accuracy1 = tf.reduce_mean(tf.cast(tf.nn.in_top_k(logits, y, 1), tf.float32))
134 | accuracy5 = tf.reduce_mean(tf.cast(tf.nn.in_top_k(logits, y, 5), tf.float32))
135 | 
136 | # define initialization
137 | init = tf.global_variables_initializer()
138 | 
139 | # define saver
140 | saver = tf.train.Saver()
141 | 
142 | # define summary writer
143 | #writer = tf.train.SummaryWriter('.', graph=tf.get_default_graph())
144 | 
145 | # Launch the graph
146 | with tf.Session() as sess:
147 |     # Initialization
148 |     if len(start_from)>1:
149 |         saver.restore(sess, start_from)
150 |     else:
151 |         sess.run(init)
152 |     
153 |     step = 0
154 | 
155 |     while step < training_iters:
156 |         # Load a batch of training data
157 |         images_batch, labels_batch = loader_train.next_batch(batch_size)
158 |         
159 |         if step % step_display == 0:
160 |             print('[%s]:' %(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
161 | 
162 |             # Calculate batch loss and accuracy on training set
163 |             l, acc1, acc5 = sess.run([loss, accuracy1, accuracy5], feed_dict={x: images_batch, y: labels_batch, keep_dropout: 1., train_phase: False}) 
164 |             print("-Iter " + str(step) + ", Training Loss= " + \
165 |                   "{:.6f}".format(l) + ", Accuracy Top1 = " + \
166 |                   "{:.4f}".format(acc1) + ", Top5 = " + \
167 |                   "{:.4f}".format(acc5))
168 | 
169 |             # Calculate batch loss and accuracy on validation set
170 |             images_batch_val, labels_batch_val = loader_val.next_batch(batch_size)    
171 |             l, acc1, acc5 = sess.run([loss, accuracy1, accuracy5], feed_dict={x: images_batch_val, y: labels_batch_val, keep_dropout: 1., train_phase: False}) 
172 |             print("-Iter " + str(step) + ", Validation Loss= " + \
173 |                   "{:.6f}".format(l) + ", Accuracy Top1 = " + \
174 |                   "{:.4f}".format(acc1) + ", Top5 = " + \
175 |                   "{:.4f}".format(acc5))
176 |         
177 |         # Run optimization op (backprop)
178 |         sess.run(train_optimizer, feed_dict={x: images_batch, y: labels_batch, keep_dropout: dropout, train_phase: True})
179 |         
180 |         step += 1
181 |         
182 |         # Save model
183 |         if step % step_save == 0:
184 |             saver.save(sess, path_save, global_step=step)
185 |             print("Model saved at Iter %d !" %(step))
186 |         
187 |     print("Optimization Finished!")
188 | 
189 | 
190 |     # Evaluate on the whole validation set
191 |     print('Evaluation on the whole validation set...')
192 |     num_batch = loader_val.size()//batch_size
193 |     acc1_total = 0.
194 |     acc5_total = 0.
195 |     loader_val.reset()
196 |     for i in range(num_batch):
197 |         images_batch, labels_batch = loader_val.next_batch(batch_size)    
198 |         acc1, acc5 = sess.run([accuracy1, accuracy5], feed_dict={x: images_batch, y: labels_batch, keep_dropout: 1., train_phase: False})
199 |         acc1_total += acc1
200 |         acc5_total += acc5
201 |         print("Validation Accuracy Top1 = " + \
202 |               "{:.4f}".format(acc1) + ", Top5 = " + \
203 |               "{:.4f}".format(acc5))
204 | 
205 |     acc1_total /= num_batch
206 |     acc5_total /= num_batch
207 |     print('Evaluation Finished! Accuracy Top1 = ' + "{:.4f}".format(acc1_total) + ", Top5 = " + "{:.4f}".format(acc5_total))
208 | 


--------------------------------------------------------------------------------
/model/tensorflow/alexnet_train.py:
--------------------------------------------------------------------------------
  1 | import os, datetime
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | from DataLoader import *
  5 | 
  6 | # Dataset Parameters
  7 | batch_size = 200
  8 | load_size = 256
  9 | fine_size = 224
 10 | c = 3
 11 | data_mean = np.asarray([0.45834960097,0.44674252445,0.41352266842])
 12 | 
 13 | # Training Parameters
 14 | learning_rate = 0.001
 15 | dropout = 0.5 # Dropout, probability to keep units
 16 | training_iters = 100000
 17 | step_display = 50
 18 | step_save = 10000
 19 | path_save = 'alexnet'
 20 | start_from = ''
 21 | 
 22 | def alexnet(x, keep_dropout):
 23 |     weights = {
 24 |         'wc1': tf.Variable(tf.random_normal([11, 11, 3, 96], stddev=np.sqrt(2./(11*11*3)))),
 25 |         'wc2': tf.Variable(tf.random_normal([5, 5, 96, 256], stddev=np.sqrt(2./(5*5*96)))),
 26 |         'wc3': tf.Variable(tf.random_normal([3, 3, 256, 384], stddev=np.sqrt(2./(3*3*256)))),
 27 |         'wc4': tf.Variable(tf.random_normal([3, 3, 384, 256], stddev=np.sqrt(2./(3*3*384)))),
 28 |         'wc5': tf.Variable(tf.random_normal([3, 3, 256, 256], stddev=np.sqrt(2./(3*3*256)))),
 29 | 
 30 |         'wf6': tf.Variable(tf.random_normal([7*7*256, 4096], stddev=np.sqrt(2./(7*7*256)))),
 31 |         'wf7': tf.Variable(tf.random_normal([4096, 4096], stddev=np.sqrt(2./4096))),
 32 |         'wo': tf.Variable(tf.random_normal([4096, 100], stddev=np.sqrt(2./4096)))
 33 |     }
 34 | 
 35 |     biases = {
 36 |         'bc1': tf.Variable(tf.zeros(96)),
 37 |         'bc2': tf.Variable(tf.zeros(256)),
 38 |         'bc3': tf.Variable(tf.zeros(384)),
 39 |         'bc4': tf.Variable(tf.zeros(256)),
 40 |         'bc5': tf.Variable(tf.zeros(256)),
 41 | 
 42 |         'bf6': tf.Variable(tf.zeros(4096)),
 43 |         'bf7': tf.Variable(tf.zeros(4096)),
 44 |         'bo': tf.Variable(tf.zeros(100))
 45 |     }
 46 | 
 47 |     # Conv + ReLU + LRN + Pool, 224->55->27
 48 |     conv1 = tf.nn.conv2d(x, weights['wc1'], strides=[1, 4, 4, 1], padding='SAME')
 49 |     conv1 = tf.nn.relu(tf.nn.bias_add(conv1, biases['bc1']))
 50 |     lrn1 = tf.nn.local_response_normalization(conv1, depth_radius=5, bias=1.0, alpha=1e-4, beta=0.75)
 51 |     pool1 = tf.nn.max_pool(lrn1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
 52 | 
 53 |     # Conv + ReLU + LRN + Pool, 27-> 13
 54 |     conv2 = tf.nn.conv2d(pool1, weights['wc2'], strides=[1, 1, 1, 1], padding='SAME')
 55 |     conv2 = tf.nn.relu(tf.nn.bias_add(conv2, biases['bc2']))
 56 |     lrn2 = tf.nn.local_response_normalization(conv2, depth_radius=5, bias=1.0, alpha=1e-4, beta=0.75)
 57 |     pool2 = tf.nn.max_pool(lrn2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
 58 | 
 59 |     # Conv + ReLU, 13-> 13
 60 |     conv3 = tf.nn.conv2d(pool2, weights['wc3'], strides=[1, 1, 1, 1], padding='SAME')
 61 |     conv3 = tf.nn.relu(tf.nn.bias_add(conv3, biases['bc3']))
 62 | 
 63 |     # Conv + ReLU, 13-> 13
 64 |     conv4 = tf.nn.conv2d(conv3, weights['wc4'], strides=[1, 1, 1, 1], padding='SAME')
 65 |     conv4 = tf.nn.relu(tf.nn.bias_add(conv4, biases['bc4']))
 66 | 
 67 |     # Conv + ReLU + Pool, 13->6
 68 |     conv5 = tf.nn.conv2d(conv4, weights['wc5'], strides=[1, 1, 1, 1], padding='SAME')
 69 |     conv5 = tf.nn.relu(tf.nn.bias_add(conv5, biases['bc5']))
 70 |     pool5 = tf.nn.max_pool(conv5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
 71 | 
 72 |     # FC + ReLU + Dropout
 73 |     fc6 = tf.reshape(pool5, [-1, weights['wf6'].get_shape().as_list()[0]])
 74 |     fc6 = tf.add(tf.matmul(fc6, weights['wf6']), biases['bf6'])
 75 |     fc6 = tf.nn.relu(fc6)
 76 |     fc6 = tf.nn.dropout(fc6, keep_dropout)
 77 |     
 78 |     # FC + ReLU + Dropout
 79 |     fc7 = tf.add(tf.matmul(fc6, weights['wf7']), biases['bf7'])
 80 |     fc7 = tf.nn.relu(fc7)
 81 |     fc7 = tf.nn.dropout(fc7, keep_dropout)
 82 | 
 83 |     # Output FC
 84 |     out = tf.add(tf.matmul(fc7, weights['wo']), biases['bo'])
 85 |     
 86 |     return out
 87 | 
 88 | # Construct dataloader
 89 | opt_data_train = {
 90 |     #'data_h5': 'miniplaces_256_train.h5',
 91 |     'data_root': '../../data/images/',   # MODIFY PATH ACCORDINGLY
 92 |     'data_list': '../../data/train.txt', # MODIFY PATH ACCORDINGLY
 93 |     'load_size': load_size,
 94 |     'fine_size': fine_size,
 95 |     'data_mean': data_mean,
 96 |     'randomize': True
 97 |     }
 98 | opt_data_val = {
 99 |     #'data_h5': 'miniplaces_256_val.h5',
100 |     'data_root': '../../data/images/',   # MODIFY PATH ACCORDINGLY
101 |     'data_list': '../../data/val.txt',   # MODIFY PATH ACCORDINGLY
102 |     'load_size': load_size,
103 |     'fine_size': fine_size,
104 |     'data_mean': data_mean,
105 |     'randomize': False
106 |     }
107 | 
108 | loader_train = DataLoaderDisk(**opt_data_train)
109 | loader_val = DataLoaderDisk(**opt_data_val)
110 | #loader_train = DataLoaderH5(**opt_data_train)
111 | #loader_val = DataLoaderH5(**opt_data_val)
112 | 
113 | # tf Graph input
114 | x = tf.placeholder(tf.float32, [None, fine_size, fine_size, c])
115 | y = tf.placeholder(tf.int64, None)
116 | keep_dropout = tf.placeholder(tf.float32)
117 | 
118 | # Construct model
119 | logits = alexnet(x, keep_dropout)
120 | 
121 | # Define loss and optimizer
122 | loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits))
123 | train_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
124 | 
125 | # Evaluate model
126 | accuracy1 = tf.reduce_mean(tf.cast(tf.nn.in_top_k(logits, y, 1), tf.float32))
127 | accuracy5 = tf.reduce_mean(tf.cast(tf.nn.in_top_k(logits, y, 5), tf.float32))
128 | 
129 | # define initialization
130 | init = tf.global_variables_initializer()
131 | 
132 | # define saver
133 | saver = tf.train.Saver()
134 | 
135 | # define summary writer
136 | #writer = tf.train.SummaryWriter('.', graph=tf.get_default_graph())
137 | 
138 | # Launch the graph
139 | with tf.Session() as sess:
140 |     # Initialization
141 |     if len(start_from)>1:
142 |         saver.restore(sess, start_from)
143 |     else:
144 |         sess.run(init)
145 |     
146 |     step = 0
147 | 
148 |     while step < training_iters:
149 |         # Load a batch of training data
150 |         images_batch, labels_batch = loader_train.next_batch(batch_size)
151 |         
152 |         if step % step_display == 0:
153 |             print('[%s]:' %(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
154 | 
155 |             # Calculate batch loss and accuracy on training set
156 |             l, acc1, acc5 = sess.run([loss, accuracy1, accuracy5], feed_dict={x: images_batch, y: labels_batch, keep_dropout: 1.}) 
157 |             print("-Iter " + str(step) + ", Training Loss= " + \
158 |                   "{:.4f}".format(l) + ", Accuracy Top1 = " + \
159 |                   "{:.2f}".format(acc1) + ", Top5 = " + \
160 |                   "{:.2f}".format(acc5))
161 | 
162 |             # Calculate batch loss and accuracy on validation set
163 |             images_batch_val, labels_batch_val = loader_val.next_batch(batch_size)    
164 |             l, acc1, acc5 = sess.run([loss, accuracy1, accuracy5], feed_dict={x: images_batch_val, y: labels_batch_val, keep_dropout: 1.}) 
165 |             print("-Iter " + str(step) + ", Validation Loss= " + \
166 |                   "{:.4f}".format(l) + ", Accuracy Top1 = " + \
167 |                   "{:.2f}".format(acc1) + ", Top5 = " + \
168 |                   "{:.2f}".format(acc5))
169 |         
170 |         # Run optimization op (backprop)
171 |         sess.run(train_optimizer, feed_dict={x: images_batch, y: labels_batch, keep_dropout: dropout})
172 |         
173 |         step += 1
174 |         
175 |         # Save model
176 |         if step % step_save == 0:
177 |             saver.save(sess, path_save, global_step=step)
178 |             print("Model saved at Iter %d !" %(step))
179 |         
180 |     print("Optimization Finished!")
181 | 
182 | 
183 |     # Evaluate on the whole validation set
184 |     print('Evaluation on the whole validation set...')
185 |     num_batch = loader_val.size()//batch_size
186 |     acc1_total = 0.
187 |     acc5_total = 0.
188 |     loader_val.reset()
189 |     for i in range(num_batch):
190 |         images_batch, labels_batch = loader_val.next_batch(batch_size)    
191 |         acc1, acc5 = sess.run([accuracy1, accuracy5], feed_dict={x: images_batch, y: labels_batch, keep_dropout: 1.})
192 |         acc1_total += acc1
193 |         acc5_total += acc5
194 |         print("Validation Accuracy Top1 = " + \
195 |               "{:.2f}".format(acc1) + ", Top5 = " + \
196 |               "{:.2f}".format(acc5))
197 | 
198 |     acc1_total /= num_batch
199 |     acc5_total /= num_batch
200 |     print('Evaluation Finished! Accuracy Top1 = ' + "{:.4f}".format(acc1_total) + ", Top5 = " + "{:.4f}".format(acc5_total))
201 | 


--------------------------------------------------------------------------------
/model/tensorflow/prepro_data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import h5py
 4 | import scipy.misc
 5 | 
 6 | def createH5(params):
 7 | 
 8 | 	# create output h5 file
 9 | 	output_h5 = '%s_%d_%s.h5' %(params['name'], params['img_resize'], params['split'])
10 | 	f_h5 = h5py.File(output_h5, "w")
11 | 
12 | 	# read data info from lists
13 | 	list_im = []
14 | 	list_lab = []
15 | 	with open(params['data_list'], 'r') as f:
16 | 	    for line in f:
17 | 	        path, lab =line.rstrip().split(' ')
18 | 	        list_im.append(os.path.join(params['data_root'], path))
19 | 	        list_lab.append(int(lab))
20 | 	list_im = np.array(list_im, np.object)
21 | 	list_lab = np.array(list_lab, np.uint8)
22 | 	N = list_im.shape[0]
23 | 	print('# Images found:', N)
24 | 	
25 | 	# permutation
26 | 	perm = np.random.permutation(N) 
27 | 	list_im = list_im[perm]
28 | 	list_lab = list_lab[perm]
29 | 
30 | 	im_set = f_h5.create_dataset("images", (N,params['img_resize'],params['img_resize'],3), dtype='uint8') # space for resized images
31 | 	f_h5.create_dataset("labels", dtype='uint8', data=list_lab)
32 | 
33 | 	for i in range(N):
34 | 		image = scipy.misc.imread(list_im[i])
35 | 		assert image.shape[2]==3, 'Channel size error!'
36 | 		image = scipy.misc.imresize(image, (params['img_resize'],params['img_resize']))
37 | 
38 | 		im_set[i] = image
39 | 
40 | 		if i % 1000 == 0:
41 | 			print('processing %d/%d (%.2f%% done)' % (i, N, i*100.0/N))
42 | 
43 | 	f_h5.close()
44 | 
45 | if __name__=='__main__':
46 | 	params_train = {
47 | 		'name': 'miniplaces',
48 | 		'split': 'train',
49 | 		'img_resize': 256,
50 | 		'data_root': '../../data/images/',	# MODIFY PATH ACCORDINGLY
51 |     		'data_list': '../../data/train.txt'
52 | 	}
53 | 
54 | 	params_val = {
55 | 		'name': 'miniplaces',
56 | 		'split': 'val',
57 | 		'img_resize': 256,
58 | 		'data_root': '../../data/images/',	# MODIFY PATH ACCORDINGLY
59 |     		'data_list': '../../data/val.txt'
60 | 	}
61 | 
62 | 	createH5(params_train)
63 | 	createH5(params_val)
64 | 


--------------------------------------------------------------------------------
/teaser.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSAILVision/miniplaces/b180bffabd727428fe798cbfe01bf19a2e9b19e2/teaser.jpg


--------------------------------------------------------------------------------
/util/VOCreadxml.m:
--------------------------------------------------------------------------------
 1 | function rec = VOCreadxml(path)
 2 | 
 3 | if length(path)>5&&strcmp(path(1:5),'http:')
 4 |     xml=urlread(path)';
 5 | else
 6 |     f=fopen(path,'r');
 7 |     xml=fread(f,'*char')';
 8 |     fclose(f);
 9 | end
10 | rec=VOCxml2struct(xml);
11 | 


--------------------------------------------------------------------------------
/util/VOCxml2struct.m:
--------------------------------------------------------------------------------
 1 | function res = VOCxml2struct(xml)
 2 | 
 3 | xml(xml==9|xml==10|xml==13)=[];
 4 | 
 5 | [res,xml]=parse(xml,1,[]);
 6 | 
 7 | function [res,ind]=parse(xml,ind,parent)
 8 | 
 9 | res=[];
10 | if ~isempty(parent)&&xml(ind)~='<'
11 |     i=findchar(xml,ind,'<');
12 |     res=trim(xml(ind:i-1));
13 |     ind=i;
14 |     [tag,ind]=gettag(xml,i);
15 |     if ~strcmp(tag,['/' parent])
16 |         error('<%s> closed with <%s>',parent,tag);
17 |     end
18 | else
19 |     while ind<=length(xml)
20 |         [tag,ind]=gettag(xml,ind);
21 |         if strcmp(tag,['/' parent])
22 |             return
23 |         else
24 |             [sub,ind]=parse(xml,ind,tag);            
25 |             if isstruct(sub)
26 |                 if isfield(res,tag)
27 |                     n=length(res.(tag));
28 |                     fn=fieldnames(sub);
29 |                     for f=1:length(fn)
30 |                         res.(tag)(n+1).(fn{f})=sub.(fn{f});
31 |                     end
32 |                 else
33 |                     res.(tag)=sub;
34 |                 end
35 |             else
36 |                 if isfield(res,tag)
37 |                     if ~iscell(res.(tag))
38 |                         res.(tag)={res.(tag)};
39 |                     end
40 |                     res.(tag){end+1}=sub;
41 |                 else
42 |                     res.(tag)=sub;
43 |                 end
44 |             end
45 |         end
46 |     end
47 | end
48 | 
49 | function i = findchar(str,ind,chr)
50 | 
51 | i=[];
52 | while ind<=length(str)
53 |     if str(ind)==chr
54 |         i=ind;
55 |         break
56 |     else
57 |         ind=ind+1;
58 |     end
59 | end
60 | 
61 | function [tag,ind]=gettag(xml,ind)
62 | 
63 | if ind>length(xml)
64 |     tag=[];
65 | elseif xml(ind)=='<'
66 |     i=findchar(xml,ind,'>');
67 |     if isempty(i)
68 |         error('incomplete tag');
69 |     end
70 |     tag=xml(ind+1:i-1);
71 |     ind=i+1;
72 | else
73 |     error('expected tag');
74 | end 
75 | 
76 | function s = trim(s)
77 | 
78 | for i=1:numel(s)
79 |     if ~isspace(s(i))
80 |         s=s(i:end);
81 |         break
82 |     end
83 | end
84 | for i=numel(s):-1:1
85 |     if ~isspace(s(i))
86 |         s=s(1:i);
87 |         break
88 |     end
89 | end
90 | 
91 | 


--------------------------------------------------------------------------------