├── kraino
    ├── __init__.py
    ├── core
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── model_zoo.pyc
    │   ├── recurrent_net.pyc
    │   ├── keras_extensions.pyc
    │   ├── theano_functions.pyc
    │   ├── visual_model_zoo.pyc
    │   ├── recurrent_net.py
    │   ├── keras_extensions.py
    │   └── visual_model_zoo.py
    ├── utils
    │   ├── __init__.py
    │   ├── vqaEvaluation
    │   │   ├── __init__.py
    │   │   ├── vqaEval.pyc
    │   │   ├── __init__.pyc
    │   │   ├── .vqaEval.py.swp
    │   │   ├── vqaClassNormalizedEval.pyc
    │   │   ├── .vqaClassNormalizedEval.py.swp
    │   │   ├── vqaEvaluateModel.py
    │   │   ├── vqaClassNormalizedEval.py
    │   │   └── vqaEval.py
    │   ├── vqaTools
    │   │   ├── __init__.py
    │   │   ├── vqa.pyc
    │   │   ├── __init__.pyc
    │   │   └── vqa.py
    │   ├── __init__.pyc
    │   ├── callbacks.pyc
    │   ├── monitor.pyc
    │   ├── parsers.pyc
    │   ├── read_write.pyc
    │   ├── compute_wups.pyc
    │   ├── data_provider.pyc
    │   ├── print_metrics.pyc
    │   ├── input_output_space.pyc
    │   ├── model_visualization.pyc
    │   ├── monitor.py
    │   ├── model_visualization.py
    │   ├── print_metrics.py
    │   ├── read_write.py
    │   ├── compute_wups.py
    │   ├── input_output_space.py
    │   ├── callbacks.py
    │   ├── data_provider.py
    │   └── parsers.py
    └── __init__.pyc
├── data
    └── set_environment.bash
├── fig
    ├── one_hot.jpg
    ├── BOW_model.jpg
    ├── LSTM_model.jpg
    ├── challenges.jpg
    ├── small_taxonomy.jpg
    ├── BOW_vision_model.jpg
    ├── LSTM_vision_model.jpg
    └── features_extractor.jpg
├── boring_function.py
├── LICENSE
├── README.md
└── neural_solver.py


/kraino/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/kraino/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/kraino/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/kraino/utils/vqaEvaluation/__init__.py:
--------------------------------------------------------------------------------
1 | author='aagrawal'
2 | 


--------------------------------------------------------------------------------
/kraino/utils/vqaTools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'aagrawal'
2 | 


--------------------------------------------------------------------------------
/data/set_environment.bash:
--------------------------------------------------------------------------------
1 | export NLTK_DATA="/home/ubuntu/data/visual_turing_test/nltk_data"
2 | 


--------------------------------------------------------------------------------
/fig/one_hot.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/fig/one_hot.jpg


--------------------------------------------------------------------------------
/fig/BOW_model.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/fig/BOW_model.jpg


--------------------------------------------------------------------------------
/fig/LSTM_model.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/fig/LSTM_model.jpg


--------------------------------------------------------------------------------
/fig/challenges.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/fig/challenges.jpg


--------------------------------------------------------------------------------
/kraino/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/__init__.pyc


--------------------------------------------------------------------------------
/fig/small_taxonomy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/fig/small_taxonomy.jpg


--------------------------------------------------------------------------------
/fig/BOW_vision_model.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/fig/BOW_vision_model.jpg


--------------------------------------------------------------------------------
/fig/LSTM_vision_model.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/fig/LSTM_vision_model.jpg


--------------------------------------------------------------------------------
/fig/features_extractor.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/fig/features_extractor.jpg


--------------------------------------------------------------------------------
/kraino/core/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/core/__init__.pyc


--------------------------------------------------------------------------------
/kraino/core/model_zoo.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/core/model_zoo.pyc


--------------------------------------------------------------------------------
/kraino/utils/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/__init__.pyc


--------------------------------------------------------------------------------
/kraino/utils/callbacks.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/callbacks.pyc


--------------------------------------------------------------------------------
/kraino/utils/monitor.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/monitor.pyc


--------------------------------------------------------------------------------
/kraino/utils/parsers.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/parsers.pyc


--------------------------------------------------------------------------------
/kraino/utils/read_write.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/read_write.pyc


--------------------------------------------------------------------------------
/kraino/core/recurrent_net.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/core/recurrent_net.pyc


--------------------------------------------------------------------------------
/kraino/utils/compute_wups.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/compute_wups.pyc


--------------------------------------------------------------------------------
/kraino/utils/data_provider.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/data_provider.pyc


--------------------------------------------------------------------------------
/kraino/utils/print_metrics.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/print_metrics.pyc


--------------------------------------------------------------------------------
/kraino/utils/vqaTools/vqa.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/vqaTools/vqa.pyc


--------------------------------------------------------------------------------
/kraino/core/keras_extensions.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/core/keras_extensions.pyc


--------------------------------------------------------------------------------
/kraino/core/theano_functions.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/core/theano_functions.pyc


--------------------------------------------------------------------------------
/kraino/core/visual_model_zoo.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/core/visual_model_zoo.pyc


--------------------------------------------------------------------------------
/kraino/utils/input_output_space.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/input_output_space.pyc


--------------------------------------------------------------------------------
/kraino/utils/model_visualization.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/model_visualization.pyc


--------------------------------------------------------------------------------
/kraino/utils/vqaTools/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/vqaTools/__init__.pyc


--------------------------------------------------------------------------------
/kraino/utils/vqaEvaluation/vqaEval.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/vqaEvaluation/vqaEval.pyc


--------------------------------------------------------------------------------
/kraino/utils/vqaEvaluation/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/vqaEvaluation/__init__.pyc


--------------------------------------------------------------------------------
/kraino/utils/vqaEvaluation/.vqaEval.py.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/vqaEvaluation/.vqaEval.py.swp


--------------------------------------------------------------------------------
/kraino/utils/vqaEvaluation/vqaClassNormalizedEval.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/vqaEvaluation/vqaClassNormalizedEval.pyc


--------------------------------------------------------------------------------
/boring_function.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import theano
3 | import keras
4 | 
5 | if __name__ == '__main__':
6 |     print sys.argv[1]
7 |     print theano.__version__
8 |     print keras.__version__
9 | 


--------------------------------------------------------------------------------
/kraino/utils/vqaEvaluation/.vqaClassNormalizedEval.py.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/vqaEvaluation/.vqaClassNormalizedEval.py.swp


--------------------------------------------------------------------------------
/kraino/core/recurrent_net.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Selects recurrent neural network based on the name.
 3 | 
 4 | Author: Mateusz Malinowski
 5 | Email: mmalinow@mpi-inf.mpg.de
 6 | """
 7 | 
 8 | from keras.layers.recurrent import GRU
 9 | from keras.layers.recurrent import LSTM
10 | from keras.layers.recurrent import SimpleRNN
11 | #from keras.layers.recurrent import JZS1
12 | #from keras.layers.recurrent import JZS2
13 | #from keras.layers.recurrent import JZS3
14 | 
15 | 
16 | select = {
17 |         'lstm':LSTM,
18 |         'gru':GRU,
19 |         'simpleRNN':SimpleRNN,
20 |         #'mut1':JZS1,
21 |         #'mut2':JZS2,
22 |         #'mut3':JZS3,
23 |         #'jzs1':JZS1,
24 |         #'jzs2':JZS2,
25 |         #'jzs3':JZS3
26 |         }
27 | 
28 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) [year] [fullname]
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/kraino/utils/monitor.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from __future__ import print_function
 3 | 
 4 | """
 5 | Monitoring tools.
 6 | 
 7 | Author: Mateusz Malinowski
 8 | Email: mmalinow@mpi-inf.mpg.de
 9 | """
10 | 
11 | from .read_write import list2file
12 | 
13 | 
14 | def _dirac(pred, gt):
15 |     return int(pred==gt)
16 | 
17 | 
18 | def print_qa(questions, answers_gt, answers_gt_original, answers_pred, 
19 |         era, similarity=_dirac, path=''):
20 |     """
21 |     In:
22 |         questions - list of questions
23 |         answers_gt - list of answers (after modifications like truncation)
24 |         answers_gt_original - list of answers (before modifications)
25 |         answers_pred - list of predicted answers
26 |         era - current era
27 |         similarity - measure that measures similarity between gt_original and prediction;
28 |             by default dirac measure
29 |         path - path for the output (if empty then stdout is used)
30 |             by fedault an empty path
31 |     Out:
32 |         the similarity score
33 |     """
34 |     assert(len(questions)==len(answers_gt))
35 |     assert(len(questions)==len(answers_pred))
36 |     output=['-'*50, 'Era {0}'.format(era)]
37 |     score = 0.0
38 |     for k, q in enumerate(questions):
39 |         a_gt=answers_gt[k]
40 |         a_gt_original=answers_gt_original[k]
41 |         a_p=answers_pred[k]
42 |         score += _dirac(a_p, a_gt_original)
43 |         output.append('question: {0}\nanswer: {1}\nanswer_original: {2}\nprediction: {3}\n'\
44 |                 .format(q, a_gt, a_gt_original, a_p))
45 |     score = (score / len(questions))*100.0
46 |     output.append('Score: {0}'.format(score))
47 |     if path == '':
48 |         print('%s' % '\n'.join(map(str, output)))
49 |     else:
50 |         list2file(path, output)
51 |     return score
52 | 
53 | 


--------------------------------------------------------------------------------
/kraino/utils/model_visualization.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | Graph-like model visualization.
 5 | 
 6 | Original work: Annet Graham [https://github.com/grahamannett]
 7 | """
 8 | import pydot
 9 | from keras.models import Graph
10 | from keras.models import Sequential
11 | 
12 | 
13 | def model_picture(model, to_file='local/model.png'):
14 | 
15 |     graph = pydot.Dot(graph_type='digraph')
16 |     if isinstance(model,Sequential):
17 |         previous_node = None
18 |         written_nodes = []
19 |         n = 1
20 |         for node in model.get_config()['layers']:
21 |             # append number in case layers have same name to differentiate
22 |             if (node['name'] + str(n)) in written_nodes:
23 |                 n += 1
24 |             current_node = pydot.Node(node['name'] + str(n))
25 |             written_nodes.append(node['name'] + str(n))
26 |             graph.add_node(current_node)
27 |             if previous_node:
28 |                 graph.add_edge(pydot.Edge(previous_node, current_node))
29 |             previous_node = current_node
30 |         graph.write_png(to_file)
31 | 
32 |     elif isinstance(model,Graph):
33 |         # don't need to append number for names since all nodes labeled
34 |         for input_node in model.input_config:
35 |             graph.add_node(pydot.Node(input_node['name']))
36 | 
37 |         # intermediate and output nodes have input defined
38 |         for layer_config in [model.node_config, model.output_config]:
39 |             for node in layer_config:
40 |                 graph.add_node(pydot.Node(node['name']))
41 |                 # possible to have multiple 'inputs' vs 1 'input'
42 |                 if node['inputs']:
43 |                     for e in node['inputs']:
44 |                         graph.add_edge(pydot.Edge(e, node['name']))
45 |                 else:
46 |                     graph.add_edge(pydot.Edge(node['input'], node['name']))
47 | 
48 |         graph.write_png(to_file)
49 | 
50 | 


--------------------------------------------------------------------------------
/kraino/core/keras_extensions.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Additional theano/keras functions.
  3 | 
  4 | Author: Mateusz Malinowski
  5 | Email: mmalinow@mpi-inf.mpg.de
  6 | """
  7 | 
  8 | #import marshal
  9 | import numpy 
 10 | #import types
 11 | 
 12 | from keras.layers.convolutional import Convolution1D
 13 | from keras.layers.convolutional import MaxPooling1D
 14 | 
 15 | from keras.layers.core import Lambda
 16 | from keras.layers.core import MaskedLayer
 17 | from keras.layers.core import TimeDistributedMerge
 18 | 
 19 | from keras import backend as K
 20 | 
 21 | 
 22 | ## functions ##
 23 | def time_distributed_nonzero_max_pooling(x):
 24 |     """
 25 |     Computes maximum along the first (time) dimension.
 26 |     It ignores the mask m.
 27 | 
 28 |     In:
 29 |         x - input; a 3D tensor
 30 |         mask_value - value to mask out, if None then no masking; 
 31 |             by default 0.0, 
 32 |     """
 33 | 
 34 |     import theano.tensor as T
 35 | 
 36 |     mask_value=0.0
 37 |     x = T.switch(T.eq(x, mask_value), -numpy.inf, x)
 38 |     masked_max_x = x.max(axis=1)
 39 |     # replace infinities with mask_value
 40 |     masked_max_x = T.switch(T.eq(masked_max_x, -numpy.inf), 0, masked_max_x)
 41 |     return masked_max_x
 42 | 
 43 | 
 44 | def time_distributed_masked_ave(x, m):
 45 |     """
 46 |     Computes average along the first (time) dimension.
 47 |     
 48 |     In:
 49 |         x - input; a 3D tensor
 50 |         m - mask
 51 |     """
 52 |     tmp = K.sum(x, axis=1)
 53 |     nonzeros = K.sum(m, axis=-1)
 54 |     return tmp / K.expand_dims(K.cast(nonzeros, tmp.dtype))
 55 | 
 56 | 
 57 | def time_distributed_masked_max(x, m):
 58 |     """
 59 |     Computes max along the first (time) dimension.
 60 | 
 61 |     In:
 62 |         x - input; a 3D tensor
 63 |         m - mask
 64 |         m_value - value for masking
 65 |     """
 66 |     # place infinities where mask is off
 67 |     m_value = 0.0
 68 |     tmp = K.switch(K.equal(m, 0.0), -numpy.inf, 0.0)
 69 |     x_with_inf = x + K.expand_dims(tmp)
 70 |     x_max = K.max(x_with_inf, axis=1) 
 71 |     r = K.switch(K.equal(x_max, -numpy.inf), m_value, x_max)
 72 |     return r 
 73 | 
 74 | 
 75 | ## classes  ##
 76 | 
 77 | # Transforms existing layers to masked layers
 78 | class MaskedTimeDistributedMerge(MaskedLayer, TimeDistributedMerge):
 79 |     pass
 80 | 
 81 | 
 82 | class MaskedConvolution1D(MaskedLayer, Convolution1D):
 83 |     pass
 84 | 
 85 | 
 86 | class MaskedMaxPooling1D(MaskedLayer, MaxPooling1D):
 87 |     pass
 88 | 
 89 | 
 90 | # auxiliary mask-aware layers
 91 | class DropMask(MaskedLayer):
 92 |     """
 93 |     Removes a mask from the layer.
 94 |     """
 95 |     def get_output_mask(self, train=False):
 96 |         return None 
 97 | 
 98 | 
 99 | class LambdaWithMask(MaskedLayer, Lambda):
100 |     """
101 |     Lambda function that takes a two argument function, and returns
102 |     a value returned by the function applied to the output of the previous layer
103 |     and the mask.
104 | 
105 |     That is: LambdaWithMask(f) = f(previous, mask)
106 |     """
107 |     def get_output(self, train=False):
108 |         #func = marshal.loads(self.function)
109 |         #func = types.FunctionType(func, globals())
110 |         func = self.function
111 |         if hasattr(self, 'previous'):
112 |             return func(self.previous.get_output(train), 
113 |                     self.previous.get_output_mask(train))
114 |         else:
115 |             return func(self.input, self.get_output_mask(train))
116 | 
117 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Tutorial
  2 | To run this tutorial you need to run jupyter.
  3 | 
  4 | If you run jupyter remotely, you can use jupyter notebook --ip=0.0.0.0
  5 | 
  6 | Main file: visual_turing_test.ipynb
  7 | The reader is however encouraged to download the notebook together
  8 | with the associated files and go through the tutorial on his own.
  9 | 
 10 | The tutorial should be run on a Linux machine.
 11 | Please also make sure that all Installation requirements are fullfiled
 12 | and you have similar versions of Theano and Keras (see 'Tested on').
 13 | 
 14 | Please contact mmalinow@mpi-inf.mpg.de if you encounter any problems.
 15 | 
 16 | # Kraino - Keras-based RNN for Visual Turing Test
 17 | Keras implementation of the 'Ask Your Neurons'.
 18 |  * Free software: MIT license
 19 |  * If you use this library, please cite our "Ask Your Neurons" paper [1]
 20 |  * Note that we use a simplified version of Kraino for the purpose of the
 21 |  Tutorial
 22 | 
 23 | ## Installation
 24 | Requirements:
 25 |  * Theano
 26 |  * Keras (fchollet)
 27 |  * toolz
 28 |  * h5py
 29 |  * Bokeh (0.10.0)
 30 |  * nltk (required by WUPS metrics)
 31 |  * pydot
 32 |  * spacy
 33 | 
 34 | Additional:
 35 |  * VQA (VT-vision-lab/VQA) for Visual Question Answering 
 36 |   * vqaEvaluation for the evaluation metrics
 37 |   * vqaTools for the dataset providers
 38 |   * both should be placed in the kraino/utils folder
 39 | 
 40 | 
 41 | ## Folders structure
 42 | data/
 43 | 
 44 |     daquar/
 45 | 
 46 |     vqa/
 47 | 
 48 |     ...
 49 | 
 50 | kraino/
 51 | 
 52 |     local/
 53 | 
 54 |         logs/
 55 | 
 56 |         weights/
 57 | 
 58 |         model-*.pkl
 59 | 
 60 |     kraino/
 61 | 
 62 |         __init__.py
 63 | 
 64 |         core/
 65 | 
 66 |         utils/
 67 | 
 68 | 
 69 | data 
 70 |  * store all datasets
 71 | 
 72 | kraino
 73 |  * source code and local ouput
 74 |  * local
 75 |     * stores logs (e.g. predictions) in the 'logs' folder
 76 |     * stores weights of different models in the 'weights' folder
 77 |     * stores model topologies as '.pkl' files
 78 |  * kraino
 79 |     * stores the models in the 'core' folder
 80 |     * stores functions (dataset providers or callbacks) in the 'utils' folder
 81 | 
 82 | ## Eras
 83 | It counts a computational cycle in eras (not epochs).
 84 | Every era ends when "MAX EPOCH" is reached, then the training proceeds to
 85 | the next era. Before and after each era the (callback) actions are executed.
 86 | 
 87 | ## Warning
 88 | The framework is under the continous development, and hence it is not warranted 
 89 | that API won't change in the future. To avoid adaptations to new API, you can 
 90 | clone from a specific commit hash.
 91 | 
 92 | ## Tested on 
 93 |  * Python 2.7.3
 94 |  * Theano:0.8.0.dev0.dev-63990436c98f107cf120f3578021a5d259ecf352
 95 |  * Keras:b587aeee1c1be3633a56b945af3e7c2c303369ca
 96 | 
 97 | ## Bibliography
 98 | 
 99 |     @article{malinowski2016ask,
100 |    
101 |        title={Ask Your Neurons: A Deep Learning Approach to Visual Question Answering},
102 |        
103 |        author={Malinowski, Mateusz and Rohrbach, Marcus and Fritz, Mario},
104 |   
105 |        journal={arXiv preprint arXiv:1605.02697},
106 |        
107 |        year={2016}
108 |        
109 |     }
110 | 
111 |     @inproceedings{malinowski2015ask,
112 | 
113 |         title={Ask your neurons: A neural-based approach to answering questions about images},
114 | 
115 |         author={Malinowski, Mateusz and Rohrbach, Marcus and Fritz, Mario},
116 | 
117 |         booktitle={Proceedings of the IEEE International Conference on Computer Vision},
118 | 
119 |         pages={1--9},
120 | 
121 |         year={2015}
122 | 
123 |     }
124 | 
125 |     @inproceedings{malinowski2014multi,
126 |     
127 |       title={A multi-world approach to question answering about real-world scenes based on uncertain input},
128 |       
129 |       author={Malinowski, Mateusz and Fritz, Mario},
130 |       
131 |       booktitle={Advances in Neural Information Processing Systems},
132 |       
133 |       pages={1682--1690},
134 |       
135 |       year={2014}
136 |       
137 |     }
138 |     
139 |     @article{malinowski2016tutorial,
140 |     
141 |       title={Tutorial on Answering Questions about Images with Deep Learning},
142 |       
143 |       author={Malinowski, Mateusz and Fritz, Mario},
144 |       
145 |       journal={arXiv preprint arXiv:1610.01076},
146 |       
147 |       year={2016}
148 |       
149 |     }
150 | 


--------------------------------------------------------------------------------
/kraino/utils/vqaEvaluation/vqaEvaluateModel.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | """
  5 | Slightly modified variant of the original script.
  6 | 
  7 | Author of the original: Aishwarya Agrawal
  8 | """
  9 | 
 10 | import sys
 11 | dataDir = '/BS/databases/vqa_1.0/VQA'
 12 | sys.path.insert(0, '../vqaTools')
 13 | from vqa import VQA
 14 | from vqaClassNormalizedEval import VQAClassNormalizedEval as VQAEval
 15 | import matplotlib.pyplot as plt
 16 | import skimage.io as io
 17 | import json
 18 | import random
 19 | import os
 20 | 
 21 | if len(sys.argv) != 4:
 22 |     print 'Usage: python vqaEvaluateModel datasetFold resultType isVisualisation'
 23 |     print 'E.g.: python vqaEvaluateModel val image_bow False'
 24 |     sys.exit(1)
 25 | 
 26 | datasetFold = sys.argv[1]
 27 | resultType  = sys.argv[2]
 28 | if sys.argv[3] == 'True':
 29 |     isVisualisation = True
 30 | elif sys.argv[3] == 'False':
 31 |     isVisualisation = False
 32 | else:
 33 |     raise NotImplementedError()
 34 | 
 35 | # set up file names and paths
 36 | taskType    ='OpenEnded'
 37 | dataType    ='mscoco'  # 'mscoco' for real and 'abstract_v002' for abstract
 38 | if datasetFold == 'train':
 39 |     dataSubType ='train2014' 
 40 | elif datasetFold == 'val':
 41 |     dataSubType = 'val2014'
 42 | else:
 43 |     raise NotImplementedError()
 44 | annFile     ='%s/Annotations/%s_%s_annotations.json'%(dataDir, dataType, dataSubType)
 45 | quesFile    ='%s/Questions/%s_%s_%s_questions.json'%(dataDir, taskType, dataType, dataSubType)
 46 | imgDir      ='%s/Images/%s/%s/' %(dataDir, dataType, dataSubType)
 47 | fileTypes   = ['results', 'accuracy', 'evalQA', 'evalQuesType', 'evalAnsType'] 
 48 | 
 49 | # An example result json file has been provided in './Results' folder.  
 50 | 
 51 | [resFile, accuracyFile, evalQAFile, evalQuesTypeFile, evalAnsTypeFile] = \
 52 |     ['../../../local/results/%s.%s.%s.%s.%s.json'%(taskType, dataType, dataSubType, resultType, fileType) for fileType in fileTypes]  
 53 | 
 54 | # create vqa object and vqaRes object
 55 | vqa = VQA(annFile, quesFile)
 56 | vqaRes = vqa.loadRes(resFile, quesFile)
 57 | # create vqaEval object by taking vqa and vqaRes
 58 | vqaEval = VQAEval(vqa, vqaRes, n=2)   #n is precision of accuracy (number of places after decimal), default is 2
 59 | 
 60 | # evaluate results
 61 | """
 62 | If you have a list of question ids on which you would like to evaluate your results, pass it as a list to below function
 63 | By default it uses all the question ids in annotation file
 64 | """
 65 | vqaEval.evaluate() 
 66 | # print accuracies
 67 | print "\n"
 68 | print "Per Question Type Accuracy is the following:"
 69 | for quesType in vqaEval.accuracy['perQuestionType']:
 70 | 	print "%s : %.02f" %(quesType, vqaEval.accuracy['perQuestionType'][quesType])
 71 | print "Overall Accuracy is: %.02f\n" %(vqaEval.accuracy['overall'])
 72 | print "\n"
 73 | print "Overall per class accuracy is %.02f\n" %(vqaEval.accuracy['classNormalizedOverall'])
 74 | print "\n"
 75 | print "Per Answer Type Accuracy is the following:"
 76 | for ansType in vqaEval.accuracy['perAnswerType']:
 77 | 	print "%s : %.02f" %(ansType, vqaEval.accuracy['perAnswerType'][ansType])
 78 | print "\n"
 79 | # demo how to use evalQA to retrieve low score result
 80 | if isVisualisation == True:
 81 |     evals = [quesId for quesId in vqaEval.evalQA if vqaEval.evalQA[quesId]<35]   #35 is per question percentage accuracy
 82 |     if len(evals) > 0:
 83 |         print 'ground truth answers'
 84 |         randomEval = random.choice(evals)
 85 |         randomAnn = vqa.loadQA(randomEval)
 86 |         vqa.showQA(randomAnn)
 87 | 
 88 |         print '\n'
 89 |         print 'generated answer (accuracy %.02f)'%(vqaEval.evalQA[randomEval])
 90 |         ann = vqaRes.loadQA(randomEval)[0]
 91 |         print "Answer:   %s\n" %(ann['answer'])
 92 | 
 93 |         imgId = randomAnn[0]['image_id']
 94 |         imgFilename = 'COCO_' + dataSubType + '_'+ str(imgId).zfill(12) + '.jpg'
 95 |         if os.path.isfile(imgDir + imgFilename):
 96 |             I = io.imread(imgDir + imgFilename)
 97 |             plt.imshow(I)
 98 |             plt.axis('off')
 99 |             plt.show()
100 | 
101 |     # plot accuracy for various question types
102 |     plt.bar(range(len(vqaEval.accuracy['perQuestionType'])), vqaEval.accuracy['perQuestionType'].values(), align='center')
103 |     plt.xticks(range(len(vqaEval.accuracy['perQuestionType'])), vqaEval.accuracy['perQuestionType'].keys(), rotation='0',fontsize=10)
104 |     plt.title('Per Question Type Accuracy', fontsize=10)
105 |     plt.xlabel('Question Types', fontsize=10)
106 |     plt.ylabel('Accuracy', fontsize=10)
107 |     plt.show()
108 | 
109 | # save evaluation results to ./Results folder
110 | json.dump(vqaEval.accuracy,                 open(accuracyFile,      'w'))
111 | json.dump(vqaEval.evalQA,                   open(evalQAFile,        'w'))
112 | json.dump(vqaEval.evalQuesType,             open(evalQuesTypeFile,  'w'))
113 | json.dump(vqaEval.evalAnsType,              open(evalAnsTypeFile,   'w'))
114 | 
115 | 


--------------------------------------------------------------------------------
/kraino/utils/print_metrics.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from __future__ import print_function
  3 | 
  4 | """
  5 | Selects and prints metrics.
  6 | 
  7 | Author: Mateusz Malinowski
  8 | Email: mmalinow@mpi-inf.mpg.de
  9 | """
 10 | 
 11 | import os
 12 | 
 13 | from uuid import uuid4 
 14 | 
 15 | from compute_wups import get_metric_score as wups_score
 16 | from compute_wups import get_class_metric_score as class_wups_score
 17 | from data_provider import vqa_save_results as vqa_store
 18 | from vqaEvaluation.vqaClassNormalizedEval import VQAClassNormalizedEval as VQAEval
 19 | 
 20 | 
 21 | def average_over_dictionary(mydict):
 22 |     """
 23 |     Average over dictionary values.
 24 |     """
 25 |     ave = sum([x for x in mydict.values()])/len(mydict)
 26 |     return ave
 27 | 
 28 | 
 29 | def show_wups(gt_list, pred_list, verbose, extra_vars):
 30 |     """
 31 |     In:
 32 |         gt_list - ground truth list 
 33 |         pred_list - list of predictions
 34 |         verbose - if greater than 0 the metric measures are printed out
 35 |         extra_vars - not used here
 36 | 
 37 |     Out:
 38 |         list of key, value pairs (dict) such that 
 39 |         'value' denotes the performance number
 40 |         and 'name' denotes the name of the metric
 41 |     """
 42 |     acc = wups_score(gt_list, pred_list, -1) * 100.0
 43 |     wups_at_09 = wups_score(gt_list, pred_list, 0.9) * 100.0
 44 |     #wups_at_0 = wups_score(gt_list, pred_list, 0.0) * 100.0
 45 |     wups_at_0 = -1.0
 46 |     per_class_acc_tmp = class_wups_score(gt_list, pred_list, -1)
 47 |     #per_class_wups_at_09_tmp = class_wups_score(gt_list, pred_list, 0.9)
 48 |     per_class_wups_at_09_tmp = None
 49 |     per_class_acc = {k:v*100.0 for k,v in per_class_acc_tmp.items()}
 50 |     if per_class_wups_at_09_tmp is not None:
 51 |         per_class_wups_at_09 = {k:v*100.0 for k,v in per_class_wups_at_09_tmp.items()}
 52 |     else:
 53 |         per_class_wups_at_09 = None
 54 |     class_acc = average_over_dictionary(per_class_acc_tmp)*100.0 
 55 |     if per_class_wups_at_09_tmp is not None:
 56 |         class_wups_at_09 = average_over_dictionary(per_class_wups_at_09_tmp)*100.0
 57 |     else:
 58 |         class_wups_at_09 = -1.0
 59 |     class_wups_at_0 = -1.0
 60 |     if verbose > 0:
 61 |         print('METRIC: Accuracy is {0}, wups at 0.9 is {1}, wups at 0.0 is {2}'.format(
 62 |             acc, wups_at_09, wups_at_0))
 63 |         print('CLASS METRIC: Accuracy is {0}, wups at 0.9 is {1}, wups at 0.0 is {2}'.format(
 64 |             class_acc, class_wups_at_09, class_wups_at_0))
 65 |     return [{'value':acc, 'name':'accuracy'},
 66 |             {'value':wups_at_09, 'name':'wups at 0.9'}, 
 67 |             {'value':wups_at_0, 'name':'wups at 0.0'},
 68 |             {'value':per_class_acc, 'name':'per class accuracy',
 69 |                 'idiosyncrasy':'long:muted'},
 70 |             {'value':per_class_wups_at_09, 'name':'per class wups at 0.9',
 71 |                 'idiosyncrasy':'long:muted'},
 72 |             {'value':class_acc, 'name':'class accuracy'},
 73 |             {'value':class_wups_at_09, 'name':'class wups at 0.9'},
 74 |             {'value':class_wups_at_0, 'name':'class wups at 0'},]
 75 | 
 76 | def show_vqa(gt_list, pred_list, verbose, extra_vars):
 77 |         #question_id, vqa_object, 
 78 |         #dataset_root=None):
 79 |     """
 80 |     In:
 81 |         gt_list - ground truth list 
 82 |         pred_list - list of predictions
 83 |         verbose - if greater than 0 the metric measures are printed out
 84 |         extra_vars - extra variables, here are:
 85 |             extra_vars['vqa'] - the vqa object
 86 |             extra_vars['resfun'] - function from the results file to the vqa object
 87 |             extra_vars['question_id'] - list of the question ids
 88 | 
 89 |     Out:
 90 |         list of key, value pairs (dict) such that 
 91 |         'value' denotes the performance number
 92 |         and 'name' denotes the name of the metric
 93 |     """
 94 |     # TODO: quite hacky way of creating and next reading the file
 95 |     if verbose > 0:
 96 |         print('dumping json file ...')
 97 |     vqa_object = extra_vars['vqa_object']
 98 |     results_path = '/tmp/vqa_metric_{0}.json'.format(uuid4())
 99 |     #print(results_path)
100 |     vqa_store(extra_vars['question_id'], pred_list, results_path)
101 |     vqa_res = extra_vars['resfun'](results_path)
102 |     os.remove(results_path)
103 |     if verbose > 0:
104 |         print('dumping finished')
105 |     ### 
106 |     vqaEval = VQAEval(vqa_object, vqa_res, n=2)
107 |     vqaEval.evaluate()
108 |     acc_overall = vqaEval.accuracy['overall']
109 |     acc_yes_no = vqaEval.accuracy['perAnswerType']['yes/no']
110 |     acc_number = vqaEval.accuracy['perAnswerType']['number']
111 |     acc_other = vqaEval.accuracy['perAnswerType']['other']
112 |     acc_per_class = vqaEval.accuracy['perAnswerClass']
113 |     acc_class_normalized = vqaEval.accuracy['classNormalizedOverall']
114 | 
115 |     if verbose > 0:
116 |         print('METRIC: Accuracy yes/no is {0}, other is {1}, number is {2}, overall is {3}, class normalized is {4}'.\
117 |                 format(acc_yes_no, acc_other, acc_number, acc_overall, acc_class_normalized))
118 |     return [{'value':acc_overall, 'name':'overall accuracy'},
119 |             {'value':acc_yes_no, 'name':'yes/no accuracy'},
120 |             {'value':acc_number, 'name':'number accuracy'},
121 |             {'value':acc_other, 'name':'other accuracy'},
122 |             {'value':acc_class_normalized, 'name':'class accuracy'},
123 |             {'value':acc_per_class, 'name':'per answer class', 
124 |                 'idiosyncrasy':'long:muted'},]
125 | 
126 | 
127 | select = {
128 |         'wups' : show_wups,
129 |         'vqa' : show_vqa
130 |         }
131 | 
132 | 


--------------------------------------------------------------------------------
/kraino/utils/vqaEvaluation/vqaClassNormalizedEval.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | """
  4 | Extension of 'Agrawals's vqa evaluation script' with a class-specific metric.
  5 | 
  6 | Mateusz Malinowski [mmalinow@mpi-inf.mpg.de]
  7 | """
  8 | 
  9 | import numpy as np
 10 | 
 11 | from toolz import frequencies
 12 | 
 13 | from vqaEval import VQAEval
 14 | 
 15 | 
 16 | class VQAClassNormalizedEval(VQAEval):
 17 |     """
 18 |     A class normalized evaluation metric. 
 19 | 
 20 |     It assignes to every answer its answer class, and next assigs the answer
 21 |     class to accuracies.
 22 |     It does the assignment based on the most frequent answers.
 23 |     """
 24 |     def __init__(self, vqa, vqaRes, n=2):
 25 |         VQAEval.__init__(self, vqa, vqaRes, n)
 26 | 
 27 |         print "Initialize class normalized evaluation..."
 28 |         # calculates answer frequencies over the current answers (train, val,
 29 |         # etc.)
 30 |         quesIds = [x for x in self.params['question_id']]
 31 |         gts = {}
 32 |         for quesId in quesIds:
 33 |             gts[quesId] = self.vqa.qa[quesId]
 34 | 
 35 |         # consider frequencies for all answers
 36 |         all_answers = [x['answer'] for y in gts for x in gts[y]['answers']]
 37 |         self.answer2freq = frequencies(all_answers)
 38 |         print "Class normalized evaluation initialized!"
 39 | 
 40 |     def evaluate(self, quesIds=None):
 41 |         if quesIds == None:
 42 |             quesIds = [quesId for quesId in self.params['question_id']]
 43 |         gts = {}
 44 |         res = {}
 45 |         for quesId in quesIds:
 46 |             gts[quesId] = self.vqa.qa[quesId]
 47 |             res[quesId] = self.vqaRes.qa[quesId]
 48 |        
 49 |         # =================================================
 50 |         # Compute accuracy
 51 |         # =================================================
 52 |         accQA       = []
 53 |         accQuesType = {}
 54 |         accAnsType  = {}
 55 |         accAnswerClass = {}
 56 |         print "computing accuracy"
 57 |         step = 0
 58 |         
 59 |         for quesId in quesIds:
 60 |             resAns      = res[quesId]['answer']
 61 |             resAns      = resAns.replace('\n', ' ')
 62 |             resAns      = resAns.replace('\t', ' ')
 63 |             resAns      = resAns.strip()
 64 |             resAns      = self.processPunctuation(resAns)
 65 |             resAns      = self.processDigitArticle(resAns)
 66 |             gtAcc  = []
 67 |             gtAnswers = [ans['answer'] for ans in gts[quesId]['answers']]
 68 | 
 69 |             # ============================================
 70 |             # Create the abstract classes
 71 |             # ============================================
 72 |             # take confident answers if possible
 73 |             gtAnswersConfident = [ans['answer'] for ans in gts[quesId]['answers'] if ans['answer_confidence'] == 'yes'] 
 74 |             if gtAnswersConfident == []:
 75 |                 gtAnswersConfident = gtAnswers
 76 |             sortedGtAnswers_y = sorted(gtAnswersConfident)
 77 |             sortedGtAnswers_x = map(lambda x:self.answer2freq[x], sortedGtAnswers_y)
 78 |             answerClass = sortedGtAnswers_y[np.argmax(sortedGtAnswers_x)]
 79 |             # ============================================
 80 |             if len(set(gtAnswers)) > 1: 
 81 |                 for ansDic in gts[quesId]['answers']:
 82 |                     ansDic['answer'] = self.processPunctuation(ansDic['answer'])
 83 |             for gtAnsDatum in gts[quesId]['answers']:
 84 |                 otherGTAns = [item for item in gts[quesId]['answers'] if item!=gtAnsDatum]
 85 |                 matchingAns = [item for item in otherGTAns if item['answer']==resAns]
 86 |                 acc = min(1, float(len(matchingAns))/3)
 87 |                 gtAcc.append(acc)
 88 |             quesType    = gts[quesId]['question_type']
 89 |             ansType     = gts[quesId]['answer_type']
 90 |             avgGTAcc = float(sum(gtAcc))/len(gtAcc)
 91 |             accQA.append(avgGTAcc)
 92 |             if quesType not in accQuesType:
 93 |                 accQuesType[quesType] = []
 94 |             accQuesType[quesType].append(avgGTAcc)
 95 |             if ansType not in accAnsType:
 96 |                 accAnsType[ansType] = []
 97 |             if answerClass not in accAnswerClass:
 98 |                 accAnswerClass[answerClass] = []
 99 |             accAnswerClass[answerClass].append(avgGTAcc)
100 |             accAnsType[ansType].append(avgGTAcc)
101 |             self.setEvalQA(quesId, avgGTAcc)
102 |             self.setEvalQuesType(quesId, quesType, avgGTAcc)
103 |             self.setEvalAnsType(quesId, ansType, avgGTAcc)
104 |             if step%100 == 0:
105 |                 self.updateProgress(step/float(len(quesIds)))
106 |             step = step + 1
107 |         self.setAccuracy(accQA, accAnswerClass, accQuesType, accAnsType)
108 |         print "Done computing accuracy"
109 | 
110 |     def setAccuracy(self, accQA, accAnswerClass, accQuesType, accAnsType):
111 |         self.accuracy['overall']  = round(100*float(sum(accQA))/len(accQA), self.n)
112 |         self.accuracy['classNormalizedOverall'] = \
113 |                 round(100*float(sum([sum(x)/len(x) for x in accAnswerClass.values()]))/len(accAnswerClass), self.n)
114 |         self.accuracy['perAnswerClass'] = \
115 |                 {answerClass: round(100*float(sum(accAnswerClass[answerClass]))/len(accAnswerClass[answerClass]), self.n) for answerClass in accAnswerClass}
116 |         self.accuracy['perQuestionType'] = \
117 |                 {quesType: round(100*float(sum(accQuesType[quesType]))/len(accQuesType[quesType]), self.n) for quesType in accQuesType}
118 |         self.accuracy['perAnswerType']   = \
119 |                 {ansType:  round(100*float(sum(accAnsType[ansType]))/len(accAnsType[ansType]), self.n) for ansType in accAnsType}
120 |  
121 | 


--------------------------------------------------------------------------------
/kraino/utils/read_write.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Reads from input file or writes to the output file.
  3 | 
  4 | Author: Mateusz Malinowski
  5 | Email: mmalinow@mpi-inf.mpg.de
  6 | """
  7 | 
  8 | ###
  9 | # Helpers
 10 | ###
 11 | def _dirac(pred, gt):
 12 |     return int(pred==gt)
 13 | 
 14 | ###
 15 | # Main functions
 16 | ###
 17 | def file2list(filepath):
 18 |     with open(filepath,'r') as f:
 19 |         lines =[k for k in 
 20 |             [k.strip() for k in f.readlines()] 
 21 |         if len(k) > 0]
 22 | 
 23 |     return lines
 24 | 
 25 | 
 26 | def list2file(filepath,mylist,permission='w'):
 27 |     mylist='\n'.join(mylist)
 28 |     if type(mylist[0]) is unicode:
 29 |         mylist=mylist.encode('utf-8')
 30 |     with open(filepath,permission) as f:
 31 |         f.writelines(mylist)
 32 | 
 33 | 
 34 | def dump_hdf5_simple(filepath, dataset_name, data):
 35 |     import h5py
 36 |     h5f = h5py.File(filepath, 'w')
 37 |     h5f.create_dataset(dataset_name, data=data)
 38 |     h5f.close()
 39 | 
 40 | 
 41 | def load_hdf5_simple(filepath, dataset_name):
 42 |     import h5py
 43 |     h5f = h5py.File(filepath, 'r')
 44 |     tmp = h5f[dataset_name][:]
 45 |     h5f.close()
 46 |     return tmp
 47 | 
 48 | 
 49 | def pickle_model(
 50 |         path, 
 51 |         model, 
 52 |         word2index_x,
 53 |         word2index_y,
 54 |         index2word_x,
 55 |         index2word_y):
 56 |     import sys
 57 |     import cPickle as pickle
 58 |     modifier=10
 59 |     tmp = sys.getrecursionlimit()
 60 |     sys.setrecursionlimit(tmp*modifier)
 61 |     with open(path, 'wb') as f:
 62 |         p_dict = {'model':model,
 63 |                 'word2index_x':word2index_x,
 64 |                 'word2index_y':word2index_y,
 65 |                 'index2word_x':index2word_x,
 66 |                 'index2word_y':index2word_y}
 67 |         pickle.dump(p_dict, f, protocol=2)
 68 |     sys.setrecursionlimit(tmp)
 69 | 
 70 | 
 71 | def unpickle_model(path):
 72 |     import cPickle as pickle
 73 |     with open(path, 'rb') as f:
 74 |         model = pickle.load(f)['model']
 75 |     return model
 76 | 
 77 | 
 78 | def unpickle_vocabulary(path):
 79 |     import cPickle as pickle
 80 |     p_dict = {}
 81 |     with open(path, 'rb') as f:
 82 |         pickle_load = pickle.load(f)
 83 |         p_dict['word2index_x'] = pickle_load['word2index_x']
 84 |         p_dict['word2index_y'] = pickle_load['word2index_y']
 85 |         p_dict['index2word_x'] = pickle_load['index2word_x']
 86 |         p_dict['index2word_y'] = pickle_load['index2word_y']
 87 |     return p_dict
 88 | 
 89 | 
 90 | def unpickle_data_provider(path):
 91 |     import cPickle as pickle
 92 |     with open(path, 'rb') as f:
 93 |         dp = pickle.load(f)['data_provider']
 94 |     return dp
 95 | 
 96 | 
 97 | def model_to_json(path, model):
 98 |     """
 99 |     Saves model as a json file under the path.
100 |     """
101 |     import json
102 |     json_model = model.to_json()
103 |     with open(path, 'w') as f:
104 |         json.dump(json_model, f)
105 | 
106 | 
107 | def json_to_model(path):
108 |     """
109 |     Loads a model from the json file.
110 |     """
111 |     import json
112 |     from keras.models import model_from_json
113 |     with open(path, 'r') as f:
114 |         json_model = json.load(f)
115 |     model = model_from_json(json_model)
116 |     return model
117 | 
118 | 
119 | def model_to_text(filepath, model_added):
120 |     """
121 |     Save the model to text file.
122 |     """
123 |     pass
124 | 
125 | 
126 | def text_to_model(filepath):
127 |     """
128 |     Loads the model from the text file.
129 |     """
130 |     pass
131 | 
132 | 
133 | def print_qa(questions, answers_gt, answers_gt_original, answers_pred, 
134 |         era, similarity=_dirac, path=''):
135 |     """
136 |     In:
137 |         questions - list of questions
138 |         answers_gt - list of answers (after modifications like truncation)
139 |         answers_gt_original - list of answers (before modifications)
140 |         answers_pred - list of predicted answers
141 |         era - current era
142 |         similarity - measure that measures similarity between gt_original and prediction;
143 |             by default dirac measure
144 |         path - path for the output (if empty then stdout is used)
145 |             by fedault an empty path
146 |     Out:
147 |         the similarity score
148 |     """
149 |     assert(len(questions)==len(answers_gt))
150 |     assert(len(questions)==len(answers_pred))
151 |     output=['-'*50, 'Era {0}'.format(era)]
152 |     score = 0.0
153 |     for k, q in enumerate(questions):
154 |         a_gt=answers_gt[k]
155 |         a_gt_original=answers_gt_original[k]
156 |         a_p=answers_pred[k]
157 |         score += _dirac(a_p, a_gt_original)
158 |         if type(q[0]) is unicode:
159 |             tmp = unicode(
160 |                     'question: {0}\nanswer: {1}\nanswer_original: {2}\nprediction: {3}\n')
161 |         else:
162 |             tmp = 'question: {0}\nanswer: {1}\nanswer_original: {2}\nprediction: {3}\n'
163 |         output.append(tmp.format(q, a_gt, a_gt_original, a_p))
164 |     score = (score / len(questions))*100.0
165 |     output.append('Score: {0}'.format(score))
166 |     if path == '':
167 |         print('%s' % '\n'.join(map(str, output)))
168 |     else:
169 |         list2file(path, output)
170 |     return score
171 | 
172 | 
173 | def dict2file(mydict, path, title=None):
174 |     """
175 |     In:
176 |         mydict - dictionary to save in a file
177 |         path - path where acc_dict is stored
178 |         title - the first sentence in the file;
179 |             useful if we write many dictionaries
180 |             into the same file
181 |     """
182 |     tmp = [str(x[0])+':'+str(x[1]) for x in mydict.items()]
183 |     if title is not None:
184 |         output_list = [title]
185 |         output_list.extend(tmp)
186 |     else:
187 |         output_list = tmp
188 |     list2file(path, output_list, 'a')
189 | 
190 | 


--------------------------------------------------------------------------------
/kraino/utils/compute_wups.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """
  4 | Author: Mateusz Malinowski
  5 | Email: mmalinow@mpi-inf.mpg.de
  6 | 
  7 | The script assumes there are two files
  8 | - first file with ground truth answers
  9 | - second file with predicted answers
 10 | both answers are line-aligned
 11 | 
 12 | The script also assumes that answer items are comma separated.
 13 | For instance, chair,table,window
 14 | 
 15 | It is also a set measure, so not exactly the same as accuracy 
 16 | even if dirac measure is used since {book,book}=={book}, also {book,chair}={chair,book}
 17 | 
 18 | Logs:
 19 |     18.02.2016 - added partitioning wrt. answers
 20 |     17.10.2015 - abstracted the metric computations away
 21 |     05.09.2015 - white spaces surrounding words are stripped away so that {book, chair}={book,chair}
 22 | """
 23 | 
 24 | import sys
 25 | 
 26 | #import enchant
 27 | 
 28 | from numpy import prod
 29 | from nltk.corpus import wordnet as wn
 30 | 
 31 | 
 32 | def file2list(filepath):
 33 |     with open(filepath,'r') as f:
 34 |         lines =[k for k in 
 35 |             [k.strip() for k in f.readlines()] 
 36 |         if len(k) > 0]
 37 | 
 38 |     return lines
 39 | 
 40 | 
 41 | def list2file(filepath,mylist):
 42 |     mylist='\n'.join(mylist)
 43 |     with open(filepath,'w') as f:
 44 |         f.writelines(mylist)
 45 | 
 46 | 
 47 | def items2list(x):
 48 |     """
 49 |     x - string of comma-separated answer items
 50 |     """
 51 |     return [l.strip() for l in x.split(',')]
 52 | 
 53 | 
 54 | def fuzzy_set_membership_measure(x,A,m):
 55 |     """
 56 |     Set membership measure.
 57 |     x: element
 58 |     A: set of elements
 59 |     m: point-wise element-to-element measure m(a,b) ~ similarity(a,b)
 60 | 
 61 |     This function implments a fuzzy set membership measure:
 62 |         m(x \in A) = max_{a \in A} m(x,a)}
 63 |     """
 64 |     return 0 if A==[] else max(map(lambda a: m(x,a), A))
 65 | 
 66 | 
 67 | def score_it(A,T,m):
 68 |     """
 69 |     A: list of A items 
 70 |     T: list of T items
 71 |     m: set membership measure
 72 |         m(a \in A) gives a membership quality of a into A 
 73 | 
 74 |     This function implements a fuzzy accuracy score:
 75 |         score(A,T) = min{prod_{a \in A} m(a \in T), prod_{t \in T} m(a \in A)}
 76 |         where A and T are set representations of the answers
 77 |         and m is a measure
 78 |     """
 79 |     if A==[] and T==[]:
 80 |         return 1
 81 | 
 82 |     # print A,T
 83 | 
 84 |     score_left=0 if A==[] else prod(map(lambda a: m(a,T), A))
 85 |     score_right=0 if T==[] else prod(map(lambda t: m(t,A),T))
 86 |     return min(score_left,score_right) 
 87 | 
 88 | 
 89 | # implementations of different measure functions
 90 | def dirac_measure(a,b):
 91 |     """
 92 |     Returns 1 iff a=b and 0 otherwise.
 93 |     """
 94 |     if a==[] or b==[]:
 95 |         return 0.0
 96 |     return float(a==b)
 97 | 
 98 | 
 99 | def wup_measure(a,b,similarity_threshold=0.925):
100 |     """
101 |     Returns Wu-Palmer similarity score.
102 |     More specifically, it computes:
103 |         max_{x \in interp(a)} max_{y \in interp(b)} wup(x,y)
104 |         where interp is a 'interpretation field'
105 |     """
106 |     def get_semantic_field(a):
107 |         weight = 1.0
108 |         semantic_field = wn.synsets(a,pos=wn.NOUN)
109 |         return (semantic_field,weight)
110 | 
111 | 
112 |     def get_stem_word(a):
113 |         """
114 |         Sometimes answer has form word\d+:wordid.
115 |         If so we return word and downweight
116 |         """
117 |         weight = 1.0
118 |         return (a,weight)
119 | 
120 | 
121 |     global_weight=1.0
122 | 
123 |     (a,global_weight_a)=get_stem_word(a)
124 |     (b,global_weight_b)=get_stem_word(b)
125 |     global_weight = min(global_weight_a,global_weight_b)
126 | 
127 |     if a==b:
128 |         # they are the same
129 |         return 1.0*global_weight
130 | 
131 |     if a==[] or b==[]:
132 |         return 0
133 | 
134 | 
135 |     interp_a,weight_a = get_semantic_field(a) 
136 |     interp_b,weight_b = get_semantic_field(b)
137 | 
138 |     if interp_a == [] or interp_b == []:
139 |         return 0
140 | 
141 |     # we take the most optimistic interpretation
142 |     global_max=0.0
143 |     for x in interp_a:
144 |         for y in interp_b:
145 |             local_score=x.wup_similarity(y)
146 |             if local_score > global_max:
147 |                 global_max=local_score
148 | 
149 |     # we need to use the semantic fields and therefore we downweight
150 |     # unless the score is high which indicates both are synonyms
151 |     if global_max < similarity_threshold:
152 |         interp_weight = 0.1
153 |     else:
154 |         interp_weight = 1.0
155 | 
156 |     final_score=global_max*weight_a*weight_b*interp_weight*global_weight
157 |     return final_score 
158 | 
159 | 
160 | def get_metric_score(gt_list, pred_list, threshold):
161 |     """
162 |     Computes metric score.
163 | 
164 |     In:
165 |         gt_list - list of gt answers
166 |         pred_list - list of predicted answers
167 |         threshold
168 | 
169 |     Out:
170 |         metric score
171 |     """
172 |     if threshold == -1:
173 |         our_element_membership=dirac_measure
174 |     else:
175 |         our_element_membership=lambda x,y: wup_measure(x,y,threshold)
176 | 
177 |     our_set_membership=\
178 |             lambda x,A: fuzzy_set_membership_measure(x,A,our_element_membership)
179 | 
180 |     score_list=[score_it(items2list(ta),items2list(pa),our_set_membership) 
181 |             for (ta,pa) in zip(gt_list,pred_list)]
182 | 
183 |     #final_score=sum(map(lambda x:float(x)/float(len(score_list)),score_list))
184 |     final_score=float(sum(score_list))/float(len(score_list))
185 |     return final_score
186 | 
187 | 
188 | def get_class_metric_score(gt_list, pred_list, threshold):
189 |     """
190 |     Computes class-based metric score.
191 | 
192 |     In:
193 |         gt_list - list of gt answers
194 |         pred_list - list of predicted answers
195 |         threshold
196 | 
197 |     Out:
198 |         class-based metric score
199 |     """
200 |     # creates abstract classes
201 |     gt_abstract_classes = set(gt_list)
202 |     # partition wrt. abstract classes
203 |     class_scores = {}
204 |     for abstract_class in gt_abstract_classes:
205 |         tmp = [(x,k) for k,x in enumerate(gt_list) if x == abstract_class]
206 |         gt_list_new, gt_indices = zip(*tmp)
207 |         gt_list_new = list(gt_list_new)
208 |         gt_indices = list(gt_indices)
209 |         pred_list_new = []
210 |         for curr_index in gt_indices:
211 |             pred_list_new.append(pred_list[curr_index])
212 |         score = get_metric_score(gt_list_new, pred_list_new, threshold)
213 |         class_scores[abstract_class] = score
214 |     return class_scores
215 | ###
216 | 
217 | 
218 | if __name__ == '__main__':
219 | 
220 |     if len(sys.argv) < 4:
221 |         print 'Usage: path to true answers, path to predicted answers, threshold'
222 |         print 'If threshold is -1, then the standard Accuracy is used'
223 |         sys.exit("3 arguments must be given")
224 | 
225 |     # folders
226 |     gt_filepath=sys.argv[1]
227 |     pred_filepath=sys.argv[2]
228 | 
229 |     input_gt=file2list(gt_filepath)
230 |     input_pred=file2list(pred_filepath)
231 | 
232 |     thresh=float(sys.argv[3])
233 | 
234 |     if thresh == -1:
235 |         print 'standard Accuracy is used'
236 |     else:
237 |         print 'soft WUPS at %1.2f is used' % thresh
238 | 
239 |     final_score = get_metric_score(input_gt, input_pred, thresh)
240 | 
241 |     # filtering to obtain the results
242 |     #print 'full score:', score_list
243 |     print 'exact final score:', final_score
244 |     print 'final score is %2.2f%%' % (final_score * 100.0)
245 | 
246 | 


--------------------------------------------------------------------------------
/kraino/core/visual_model_zoo.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Different Visual Architectures.
  3 | 
  4 | Inspired by:
  5 |     baraldilorenzo vgg16 model for Keras
  6 |     MarcBS caffe to keras transformation
  7 | 
  8 | Author: Mateusz Malinowski
  9 | Email: mmalinow@mpi-inf.mpg.de
 10 | """
 11 | 
 12 | from keras.models import Sequential
 13 | 
 14 | from keras.layers.convolutional import Convolution2D
 15 | from keras.layers.convolutional import MaxPooling2D
 16 | from keras.layers.convolutional import ZeroPadding2D
 17 | 
 18 | from keras.layers.core import Dense
 19 | from keras.layers.core import Dropout
 20 | from keras.layers.core import Flatten
 21 | from keras.layers.core import Reshape
 22 | 
 23 | 
 24 | ###
 25 | # Functions
 26 | ###
 27 | def imagenet_mean_preprocess_image_tensor_fun(x):
 28 |     """
 29 |     In:
 30 |         x - image tensor of size (#images, #channels, #dim1, #dim2)
 31 | 
 32 |     Out:
 33 |         image tensor x with subtracted imagenet mean
 34 |     """
 35 |     y = x 
 36 |     y[:,0,:,:] -= 103.939
 37 |     y[:,1,:,:] -= 116.779
 38 |     y[:,2,:,:] -= 123.68
 39 |     return y
 40 | 
 41 | 
 42 | def get_visual_features(
 43 |         data_provider,
 44 |         trainable_perception_name,
 45 |         train_or_test,
 46 |         image_names_list,
 47 |         parts_extractor,
 48 |         max_parts,
 49 |         perception,
 50 |         layer,
 51 |         second_layer,
 52 |         preprocess_image_tensor_fun
 53 |         ):
 54 |     """
 55 |     In:
 56 |         data_provider - data provider function
 57 |         train_or_test - training, validation, or test set
 58 |         image_names_list - list with names of images
 59 |         parts_extractor - name for the parts extractor
 60 |         max_parts - maximal number of parts if these are extracted
 61 |         perception - name of the perception model 
 62 |             if the perception is fixed (pre-trained)
 63 |         layer - name for the perception's layer
 64 |         second_layer - name for the second parception's layer
 65 |         trainable_perception_name - name for the perception 
 66 |             if the perception is not fixed
 67 |         preprocess_image_tensor_fun - image preprocessing function;
 68 |             only if trainable_perception_name is not 'none'
 69 | 
 70 |     Out:
 71 |         image features, or image tensor 
 72 |     """
 73 |     if trainable_perception_name == 'none':
 74 |         visual_features = data_provider['perception'](
 75 |                 train_or_test=train_or_test,
 76 |                 names_list=image_names_list,
 77 |                 parts_extractor=parts_extractor,
 78 |                 max_parts=max_parts,
 79 |                 perception=perception,
 80 |                 layer=layer,
 81 |                 second_layer=second_layer)
 82 |     else:
 83 |         visual_features = preprocess_image_tensor_fun(
 84 |                 data_provider['images'](
 85 |                     train_or_test=train_or_test,
 86 |                     names_list=image_names_list))
 87 |     return visual_features
 88 |  
 89 | 
 90 | ###
 91 | # Abstract building visual models.
 92 | ###
 93 | class AbstractVisualModel():
 94 |     """
 95 |     Abstract class to build visual models.
 96 |     """
 97 |     def __init__(self, visual_dim, weights_path=None):
 98 |         """
 99 |         In:
100 |             visual_dim - dimensionality of the input space;
101 |                 it can be a tuple, or a scalar
102 |             weights_path - path to the weights to load, by default None
103 |         """
104 |         self._weights_path = weights_path
105 |         self._visual_dim = visual_dim 
106 | 
107 |     def create(self):
108 |         """
109 |         Creates a model.
110 | 
111 |         Out:
112 |             model
113 |         """
114 |         raise NotImplementedError()
115 | 
116 |     def get_dimensionality(self):
117 |         """
118 |         Out:
119 |             Returns an output dimensionality of this layer.
120 |         """
121 |         raise NotImplementedError()
122 | 
123 | 
124 | ###
125 | # Concrete building visual models.
126 | ###
127 | class SequentialVisualModelEmpty(AbstractVisualModel):
128 |     """
129 |     Empty visual model. No model. 
130 |     """
131 |     def create(self):
132 |         model = Sequential()
133 |         model.add(Reshape(
134 |             input_shape=(self._visual_dim,),
135 |             dims=(self._visual_dim,)))
136 |         return model
137 | 
138 |     def get_dimensionality(self):
139 |         return self._visual_dim
140 | 
141 | 
142 | class SequentialVisualModelVGG16(AbstractVisualModel):
143 |     """
144 |     Sequential visual model.
145 | 
146 |     VGG16
147 |     """
148 |     def create(self):
149 |         model = Sequential()
150 | 
151 |         model.add(ZeroPadding2D((1,1), input_shape=self._visual_dim))
152 |         model.add(Convolution2D(64, 3, 3, activation='relu'))
153 |         model.add(ZeroPadding2D((1,1)))
154 |         model.add(Convolution2D(64, 3, 3, activation='relu'))
155 |         model.add(MaxPooling2D((2,2), strides=(2,2)))
156 | 
157 |         model.add(ZeroPadding2D((1,1)))
158 |         model.add(Convolution2D(128, 3, 3, activation='relu'))
159 |         model.add(ZeroPadding2D((1,1)))
160 |         model.add(Convolution2D(128, 3, 3, activation='relu'))
161 |         model.add(MaxPooling2D((2,2), strides=(2,2)))
162 | 
163 |         model.add(ZeroPadding2D((1,1)))
164 |         model.add(Convolution2D(256, 3, 3, activation='relu'))
165 |         model.add(ZeroPadding2D((1,1)))
166 |         model.add(Convolution2D(256, 3, 3, activation='relu'))
167 |         model.add(ZeroPadding2D((1,1)))
168 |         model.add(Convolution2D(256, 3, 3, activation='relu'))
169 |         model.add(MaxPooling2D((2,2), strides=(2,2)))
170 | 
171 |         model.add(ZeroPadding2D((1,1)))
172 |         model.add(Convolution2D(512, 3, 3, activation='relu'))
173 |         model.add(ZeroPadding2D((1,1)))
174 |         model.add(Convolution2D(512, 3, 3, activation='relu'))
175 |         model.add(ZeroPadding2D((1,1)))
176 |         model.add(Convolution2D(512, 3, 3, activation='relu'))
177 |         model.add(MaxPooling2D((2,2), strides=(2,2)))
178 | 
179 |         model.add(ZeroPadding2D((1,1)))
180 |         model.add(Convolution2D(512, 3, 3, activation='relu'))
181 |         model.add(ZeroPadding2D((1,1)))
182 |         model.add(Convolution2D(512, 3, 3, activation='relu'))
183 |         model.add(ZeroPadding2D((1,1)))
184 |         model.add(Convolution2D(512, 3, 3, activation='relu'))
185 |         model.add(MaxPooling2D((2,2), strides=(2,2)))
186 | 
187 |         model.add(Flatten())
188 |         model.add(Dense(4096, activation='relu'))
189 |         model.add(Dropout(0.5))
190 | 
191 |         self._model_output_dim = 4096
192 |         model.add(Dense(self._model_output_dim, activation='relu'))
193 |         model.add(Dropout(0.5))
194 | 
195 |         #model.add(Dense(1000, activation='softmax'))
196 | 
197 |         if self._weights_path:
198 |             model.load_weights(self._weights_path)
199 |         return model
200 | 
201 |     def get_dimensionality(self):
202 |         return self._model_output_dim
203 | 
204 | 
205 | class SequentialVisualModelVeryShallowCNN(AbstractVisualModel):
206 |     """
207 |     Sequential visual model.
208 | 
209 |     Small CNN.
210 |     """
211 |     def create(self):
212 |         model = Sequential()
213 | 
214 |         model.add(ZeroPadding2D((1,1), input_shape=self._visual_dim))
215 |         model.add(Convolution2D(64, 3, 3, activation='relu'))
216 | 
217 |         model.add(Flatten())
218 |         self._model_output_dim = 4096
219 |         model.add(Dense(self._model_output_dim, activation='relu'))
220 |         model.add(Dropout(0.5))
221 | 
222 |         if self._weights_path:
223 |             model.load_weights(self._weights_path)
224 |         return model
225 | 
226 |     def get_dimensionality(self):
227 |         return self._model_output_dim
228 | 
229 | 
230 | ###
231 | # Selector
232 | ###
233 | select_sequential_visual_model = {
234 |     'none':SequentialVisualModelEmpty, 
235 |     'vgg16':SequentialVisualModelVGG16,
236 |     'very_shallow_cnn':SequentialVisualModelVeryShallowCNN
237 |     }
238 | 


--------------------------------------------------------------------------------
/kraino/utils/vqaEvaluation/vqaEval.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | __author__='aagrawal'
  4 | 
  5 | # This code is based on the code written by Tsung-Yi Lin for MSCOCO Python API available at the following link: 
  6 | # (https://github.com/tylin/coco-caption/blob/master/pycocoevalcap/eval.py).
  7 | import sys
  8 | import re
  9 | 
 10 | class VQAEval:
 11 | 	def __init__(self, vqa, vqaRes, n=2):
 12 | 		self.n 			  = n
 13 | 		self.accuracy     = {}
 14 | 		self.evalQA       = {}
 15 | 		self.evalQuesType = {}
 16 | 		self.evalAnsType  = {}
 17 | 		self.vqa 		  = vqa
 18 | 		self.vqaRes       = vqaRes
 19 | 		self.params		  = {'question_id': vqa.getQuesIds()}
 20 | 		self.contractions = {"aint": "ain't", "arent": "aren't", "cant": "can't", "couldve": "could've", "couldnt": "couldn't", \
 21 | 							 "couldn'tve": "couldn’t’ve", "couldnt’ve": "couldn’t’ve", "didnt": "didn’t", "doesnt": "doesn’t", "dont": "don’t", "hadnt": "hadn’t", \
 22 | 							 "hadnt’ve": "hadn’t’ve", "hadn'tve": "hadn’t’ve", "hasnt": "hasn’t", "havent": "haven’t", "hed": "he’d", "hed’ve": "he’d’ve", \
 23 | 							 "he’dve": "he’d’ve", "hes": "he’s", "howd": "how’d", "howll": "how’ll", "hows": "how’s", "Id’ve": "I’d’ve", "I’dve": "I’d’ve", \
 24 | 							 "Im": "I’m", "Ive": "I’ve", "isnt": "isn’t", "itd": "it’d", "itd’ve": "it’d’ve", "it’dve": "it’d’ve", "itll": "it’ll", "let’s": "let’s", \
 25 | 							 "maam": "ma’am", "mightnt": "mightn’t", "mightnt’ve": "mightn’t’ve", "mightn’tve": "mightn’t’ve", "mightve": "might’ve", \
 26 | 							 "mustnt": "mustn’t", "mustve": "must’ve", "neednt": "needn’t", "notve": "not’ve", "oclock": "o’clock", "oughtnt": "oughtn’t", \
 27 | 							 "ow’s’at": "’ow’s’at", "’ows’at": "’ow’s’at", "’ow’sat": "’ow’s’at", "shant": "shan’t", "shed’ve": "she’d’ve", "she’dve": "she’d’ve", \
 28 | 							 "she’s": "she’s", "shouldve": "should’ve", "shouldnt": "shouldn’t", "shouldnt’ve": "shouldn’t’ve", "shouldn’tve": "shouldn’t’ve", \
 29 | 							 "somebody’d": "somebodyd", "somebodyd’ve": "somebody’d’ve", "somebody’dve": "somebody’d’ve", "somebodyll": "somebody’ll", \
 30 | 							 "somebodys": "somebody’s", "someoned": "someone’d", "someoned’ve": "someone’d’ve", "someone’dve": "someone’d’ve", \
 31 | 							 "someonell": "someone’ll", "someones": "someone’s", "somethingd": "something’d", "somethingd’ve": "something’d’ve", \
 32 | 							 "something’dve": "something’d’ve", "somethingll": "something’ll", "thats": "that’s", "thered": "there’d", "thered’ve": "there’d’ve", \
 33 | 							 "there’dve": "there’d’ve", "therere": "there’re", "theres": "there’s", "theyd": "they’d", "theyd’ve": "they’d’ve", \
 34 | 							 "they’dve": "they’d’ve", "theyll": "they’ll", "theyre": "they’re", "theyve": "they’ve", "twas": "’twas", "wasnt": "wasn’t", \
 35 | 							 "wed’ve": "we’d’ve", "we’dve": "we’d’ve", "weve": "we've", "werent": "weren’t", "whatll": "what’ll", "whatre": "what’re", \
 36 | 							 "whats": "what’s", "whatve": "what’ve", "whens": "when’s", "whered": "where’d", "wheres": "where's", "whereve": "where’ve", \
 37 | 							 "whod": "who’d", "whod’ve": "who’d’ve", "who’dve": "who’d’ve", "wholl": "who’ll", "whos": "who’s", "whove": "who've", "whyll": "why’ll", \
 38 | 							 "whyre": "why’re", "whys": "why’s", "wont": "won’t", "wouldve": "would’ve", "wouldnt": "wouldn’t", "wouldnt’ve": "wouldn’t’ve", \
 39 | 							 "wouldn’tve": "wouldn’t’ve", "yall": "y’all", "yall’ll": "y’all’ll", "y’allll": "y’all’ll", "yall’d’ve": "y’all’d’ve", \
 40 | 							 "y’alld’ve": "y’all’d’ve", "y’all’dve": "y’all’d’ve", "youd": "you’d", "youd’ve": "you’d’ve", "you’dve": "you’d’ve", \
 41 | 							 "youll": "you’ll", "youre": "you’re", "youve": "you’ve"}
 42 | 		self.manualMap    = { 'none': '0',
 43 | 							  'zero': '0',
 44 | 							  'one': '1',
 45 | 							  'two': '2',
 46 | 							  'three': '3',
 47 | 							  'four': '4',
 48 | 							  'five': '5',
 49 | 							  'six': '6',
 50 | 							  'seven': '7',
 51 | 							  'eight': '8',
 52 | 							  'nine': '9',
 53 | 							  'ten': '10'
 54 | 							}
 55 | 		self.articles     = ['a',
 56 | 							 'an',
 57 | 							 'the'
 58 | 							]
 59 |  
 60 | 
 61 | 		self.periodStrip  = re.compile("(?!<=\d)(\.)(?!\d)")
 62 | 		self.commaStrip   = re.compile("(\d)(\,)(\d)")
 63 | 		self.punct        = [';', r"/", '[', ']', '"', '{', '}',
 64 | 							 '(', ')', '=', '+', '\\', '_', '-',
 65 | 							 '>', '<', '@', '`', ',', '?', '!']
 66 | 
 67 | 	
 68 | 	def evaluate(self, quesIds=None):
 69 | 		if quesIds == None:
 70 | 			quesIds = [quesId for quesId in self.params['question_id']]
 71 | 		gts = {}
 72 | 		res = {}
 73 | 		for quesId in quesIds:
 74 | 			gts[quesId] = self.vqa.qa[quesId]
 75 | 			res[quesId] = self.vqaRes.qa[quesId]
 76 | 		
 77 | 		# =================================================
 78 | 		# Compute accuracy
 79 | 		# =================================================
 80 | 		accQA       = []
 81 | 		accQuesType = {}
 82 | 		accAnsType  = {}
 83 | 		print "computing accuracy"
 84 | 		step = 0
 85 | 		for quesId in quesIds:
 86 | 			resAns      = res[quesId]['answer']
 87 | 			resAns      = resAns.replace('\n', ' ')
 88 | 			resAns      = resAns.replace('\t', ' ')
 89 | 			resAns      = resAns.strip()
 90 | 			resAns      = self.processPunctuation(resAns)
 91 | 			resAns      = self.processDigitArticle(resAns)
 92 | 			gtAcc  = []
 93 | 			gtAnswers = [ans['answer'] for ans in gts[quesId]['answers']]
 94 | 			if len(set(gtAnswers)) > 1: 
 95 | 				for ansDic in gts[quesId]['answers']:
 96 | 					ansDic['answer'] = self.processPunctuation(ansDic['answer'])
 97 | 			for gtAnsDatum in gts[quesId]['answers']:
 98 | 				otherGTAns = [item for item in gts[quesId]['answers'] if item!=gtAnsDatum]
 99 | 				matchingAns = [item for item in otherGTAns if item['answer']==resAns]
100 | 				acc = min(1, float(len(matchingAns))/3)
101 | 				gtAcc.append(acc)
102 | 			quesType    = gts[quesId]['question_type']
103 | 			ansType     = gts[quesId]['answer_type']
104 | 			avgGTAcc = float(sum(gtAcc))/len(gtAcc)
105 | 			accQA.append(avgGTAcc)
106 | 			if quesType not in accQuesType:
107 | 				accQuesType[quesType] = []
108 | 			accQuesType[quesType].append(avgGTAcc)
109 | 			if ansType not in accAnsType:
110 | 				accAnsType[ansType] = []
111 | 			accAnsType[ansType].append(avgGTAcc)
112 | 			self.setEvalQA(quesId, avgGTAcc)
113 | 			self.setEvalQuesType(quesId, quesType, avgGTAcc)
114 | 			self.setEvalAnsType(quesId, ansType, avgGTAcc)
115 | 			if step%100 == 0:
116 | 				self.updateProgress(step/float(len(quesIds)))
117 | 			step = step + 1
118 | 
119 | 		self.setAccuracy(accQA, accQuesType, accAnsType)
120 | 		print "Done computing accuracy"
121 | 	
122 | 	def processPunctuation(self, inText):
123 | 		outText = inText
124 | 		for p in self.punct:
125 | 			if (p + ' ' in inText or ' ' + p in inText) or (re.search(self.commaStrip, inText) != None):
126 | 				outText = outText.replace(p, '')
127 | 			else:
128 | 				outText = outText.replace(p, ' ')	
129 | 		outText = self.periodStrip.sub("",
130 | 									  outText,
131 | 									  re.UNICODE)
132 | 		return outText
133 | 	
134 | 	def processDigitArticle(self, inText):
135 | 		outText = []
136 | 		tempText = inText.lower().split()
137 | 		for word in tempText:
138 | 			word = self.manualMap.setdefault(word, word)
139 | 			if word not in self.articles:
140 | 				outText.append(word)
141 | 			else:
142 | 				pass
143 | 		for wordId, word in enumerate(outText):
144 | 			if word in self.contractions: 
145 | 				outText[wordId] = self.contractions[word]
146 | 		outText = ' '.join(outText)
147 | 		return outText
148 | 
149 | 	def setAccuracy(self, accQA, accQuesType, accAnsType):
150 | 		self.accuracy['overall']         = round(100*float(sum(accQA))/len(accQA), self.n)
151 | 		self.accuracy['perQuestionType'] = {quesType: round(100*float(sum(accQuesType[quesType]))/len(accQuesType[quesType]), self.n) for quesType in accQuesType}
152 | 		self.accuracy['perAnswerType']   = {ansType:  round(100*float(sum(accAnsType[ansType]))/len(accAnsType[ansType]), self.n) for ansType in accAnsType}
153 | 			
154 | 	def setEvalQA(self, quesId, acc):
155 | 		self.evalQA[quesId] = round(100*acc, self.n)
156 | 
157 | 	def setEvalQuesType(self, quesId, quesType, acc):
158 | 		if quesType not in self.evalQuesType:
159 | 			self.evalQuesType[quesType] = {}
160 | 		self.evalQuesType[quesType][quesId] = round(100*acc, self.n)
161 | 	
162 | 	def setEvalAnsType(self, quesId, ansType, acc):
163 | 		if ansType not in self.evalAnsType:
164 | 			self.evalAnsType[ansType] = {}
165 | 		self.evalAnsType[ansType][quesId] = round(100*acc, self.n)
166 | 
167 | 	def updateProgress(self, progress):
168 | 		barLength = 20
169 | 		status = ""
170 | 		if isinstance(progress, int):
171 | 			progress = float(progress)
172 | 		if not isinstance(progress, float):
173 | 			progress = 0
174 | 			status = "error: progress var must be float\r\n"
175 | 		if progress < 0:
176 | 			progress = 0
177 | 			status = "Halt...\r\n"
178 | 		if progress >= 1:
179 | 			progress = 1
180 | 			status = "Done...\r\n"
181 | 		block = int(round(barLength*progress))
182 | 		text = "\rFinshed Percent: [{0}] {1}% {2}".format( "#"*block + "-"*(barLength-block), int(progress*100), status)
183 | 		sys.stdout.write(text)
184 | 		sys.stdout.flush()
185 | 
186 | 


--------------------------------------------------------------------------------
/kraino/utils/vqaTools/vqa.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'aagrawal'
  2 | __version__ = '0.9'
  3 | 
  4 | """
  5 | Modified by Mateusz Malinowski [mmalinow@mpi-inf.mpg.de] to work
  6 | with test datasets without annotations.
  7 | """
  8 | 
  9 | 
 10 | # Interface for accessing the VQA dataset.
 11 | 
 12 | # This code is based on the code written by Tsung-Yi Lin for MSCOCO Python API available at the following link: 
 13 | # (https://github.com/pdollar/coco/blob/master/PythonAPI/pycocotools/coco.py).
 14 | 
 15 | # The following functions are defined:
 16 | #  VQA        - VQA class that loads VQA annotation file and prepares data structures.
 17 | #  getQuesIds - Get question ids that satisfy given filter conditions.
 18 | #  getImgIds  - Get image ids that satisfy given filter conditions.
 19 | #  loadQA     - Load questions and answers with the specified question ids.
 20 | #  showQA     - Display the specified questions and answers.
 21 | #  loadRes    - Load result file and create result object.
 22 | 
 23 | # Help on each function can be accessed by: "help(COCO.function)"
 24 | 
 25 | import json
 26 | import datetime
 27 | import copy
 28 | 
 29 | class VQA:
 30 |     def __init__(self, annotation_file=None, question_file=None):
 31 |         """
 32 |         Constructor of VQA helper class for reading and visualizing questions and answers.
 33 |         :param annotation_file (str): location of VQA annotation file
 34 |         :return:
 35 |         """
 36 |         def init_empty_dataset():
 37 |             d = {}
 38 |             d['info'] = 'empty'
 39 |             d['annotations'] = []
 40 |             return d
 41 | 
 42 |         # load dataset
 43 |         self.dataset = {}
 44 |         self.questions = {}
 45 |         self.qa = {}
 46 |         self.qqa = {}
 47 |         self.imgToQA = {}
 48 |         if not question_file == None:
 49 |             print 'loading VQA annotations and questions into memory...'
 50 |             time_t = datetime.datetime.utcnow()
 51 |             if annotation_file is not None:
 52 |                 dataset = json.load(open(annotation_file, 'r'))
 53 |             else:
 54 |                 dataset = init_empty_dataset()
 55 |             questions = json.load(open(question_file, 'r'))
 56 |             print datetime.datetime.utcnow() - time_t
 57 |             self.dataset = dataset
 58 |             self.questions = questions
 59 |             self.createIndex()
 60 | 
 61 |     def createIndex(self):
 62 |         # create index
 63 |         print 'creating index...'
 64 |         imgToQA = {ann['image_id']: [] for ann in self.dataset['annotations']}
 65 |         qa =  {ann['question_id']:       [] for ann in self.dataset['annotations']}
 66 |         qqa = {ann['question_id']:       [] for ann in self.dataset['annotations']}
 67 |         for ann in self.dataset['annotations']:
 68 |             imgToQA[ann['image_id']] += [ann]
 69 |             qa[ann['question_id']] = ann
 70 |         for ques in self.questions['questions']:
 71 |             qqa[ques['question_id']] = ques
 72 |         print 'index created!'
 73 | 
 74 |         # create class members
 75 |         self.qa = qa
 76 |         self.qqa = qqa
 77 |         self.imgToQA = imgToQA
 78 | 
 79 |     def info(self):
 80 |         """
 81 |         Print information about the VQA annotation file.
 82 |         :return:
 83 |         """
 84 |         for key, value in self.dataset['info'].items():
 85 |             print '%s: %s'%(key, value)
 86 | 
 87 |     def getQuesIds(self, imgIds=[], quesTypes=[], ansTypes=[]):
 88 |         """
 89 |         Get question ids that satisfy given filter conditions. default skips that filter
 90 |         :param  imgIds    (int array)   : get question ids for given imgs
 91 |                 quesTypes (str array)   : get question ids for given question types
 92 |                 ansTypes  (str array)   : get question ids for given answer types
 93 |         :return:    ids   (int array)   : integer array of question ids
 94 |         """
 95 |         imgIds    = imgIds    if type(imgIds)    == list else [imgIds]
 96 |         quesTypes = quesTypes if type(quesTypes) == list else [quesTypes]
 97 |         ansTypes  = ansTypes  if type(ansTypes)  == list else [ansTypes]
 98 | 
 99 |         if len(imgIds) == len(quesTypes) == len(ansTypes) == 0:
100 |             anns = self.dataset['annotations']
101 |         else:
102 |             if not len(imgIds) == 0:
103 |                 anns = sum([self.imgToQA[imgId] for imgId in imgIds if imgId in self.imgToQA],[])
104 |             else:
105 |                 anns = self.dataset['annotations']
106 |             anns = anns if len(quesTypes) == 0 else [ann for ann in anns if ann['question_type'] in quesTypes]
107 |             anns = anns if len(ansTypes)  == 0 else [ann for ann in anns if ann['answer_type'] in ansTypes]
108 |         ids = [ann['question_id'] for ann in anns]
109 |         return ids
110 | 
111 |     def getImgIds(self, quesIds=[], quesTypes=[], ansTypes=[]):
112 |         """
113 |         Get image ids that satisfy given filter conditions. default skips that filter
114 |         :param quesIds   (int array)   : get image ids for given question ids
115 |                quesTypes (str array)   : get image ids for given question types
116 |                ansTypes  (str array)   : get image ids for given answer types
117 |         :return: ids     (int array)   : integer array of image ids
118 |         """
119 |         quesIds   = quesIds   if type(quesIds)   == list else [quesIds]
120 |         quesTypes = quesTypes if type(quesTypes) == list else [quesTypes]
121 |         ansTypes  = ansTypes  if type(ansTypes)  == list else [ansTypes]
122 | 
123 |         if len(quesIds) == len(quesTypes) == len(ansTypes) == 0:
124 |             anns = self.dataset['annotations']
125 |         else:
126 |             if not len(quesIds) == 0:
127 |                 anns = sum([self.qa[quesId] for quesId in quesIds if quesId in self.qa],[])
128 |             else:
129 |                 anns = self.dataset['annotations']
130 |             anns = anns if len(quesTypes) == 0 else [ann for ann in anns if ann['question_type'] in quesTypes]
131 |             anns = anns if len(ansTypes)  == 0 else [ann for ann in anns if ann['answer_type'] in ansTypes]
132 |         ids = [ann['image_id'] for ann in anns]
133 |         return ids
134 | 
135 |     def loadQA(self, ids=[]):
136 |         """
137 |         Load questions and answers with the specified question ids.
138 |         :param ids (int array)       : integer ids specifying question ids
139 |         :return: qa (object array)   : loaded qa objects
140 |         """
141 |         if type(ids) == list:
142 |             return [self.qa[id] for id in ids]
143 |         elif type(ids) == int:
144 |             return [self.qa[ids]]
145 | 
146 |     def showQA(self, anns):
147 |         """
148 |         Display the specified annotations.
149 |         :param anns (array of object): annotations to display
150 |         :return: None
151 |         """
152 |         if len(anns) == 0:
153 |             return 0
154 |         for ann in anns:
155 |             quesId = ann['question_id']
156 |             print "Question: %s" %(self.qqa[quesId]['question'])
157 |             for ans in ann['answers']:
158 |                 print "Answer %d: %s" %(ans['answer_id'], ans['answer'])
159 |         
160 |     def loadRes(self, resFile, quesFile):
161 |         """
162 |         Load result file and return a result object.
163 |         :param   resFile (str)     : file name of result file
164 |         :return: res (obj)         : result api object
165 |         """
166 |         res = VQA()
167 |         res.questions = json.load(open(quesFile))
168 |         res.dataset['info'] = copy.deepcopy(self.questions['info'])
169 |         res.dataset['task_type'] = copy.deepcopy(self.questions['task_type'])
170 |         res.dataset['data_type'] = copy.deepcopy(self.questions['data_type'])
171 |         res.dataset['data_subtype'] = copy.deepcopy(self.questions['data_subtype'])
172 |         res.dataset['license'] = copy.deepcopy(self.questions['license'])
173 | 
174 |         print 'Loading and preparing results...     '
175 |         time_t = datetime.datetime.utcnow()
176 |         anns    = json.load(open(resFile))
177 |         assert type(anns) == list, 'results is not an array of objects'
178 |         annsQuesIds = [ann['question_id'] for ann in anns]
179 |         assert set(annsQuesIds) == set(self.getQuesIds()), \
180 |         'Results do not correspond to current VQA set. Either the results do have predictions for all question ids in annotation file or there is one/more questions id that does not belong to the question ids in the annotation file.'
181 |         for ann in anns:
182 |             quesId               = ann['question_id']
183 |             if res.dataset['task_type'] == 'Multiple Choice':
184 |                 assert ann['answer'] in self.qqa[quesId]['multiple_choices'], 'predicted answer is not one of the multiple choices'
185 |             qaAnn                = self.qa[quesId]
186 |             ann['image_id']      = qaAnn['image_id'] 
187 |             ann['question_type'] = qaAnn['question_type']
188 |             ann['answer_type']   = qaAnn['answer_type']
189 |         print 'DONE (t=%0.2fs)'%((datetime.datetime.utcnow() - time_t).total_seconds())
190 | 
191 |         res.dataset['annotations'] = anns
192 |         res.createIndex()
193 |         return res
194 | 


--------------------------------------------------------------------------------
/kraino/utils/input_output_space.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from __future__ import unicode_literals
  3 | from __future__ import print_function
  4 | 
  5 | """
  6 | Builds input/output space.
  7 | 
  8 | Author: Mateusz Malinowski
  9 | Email: mmalinow@mpi-inf.mpg.de
 10 | """
 11 | 
 12 | import numpy as np
 13 | 
 14 | from toolz import itemmap
 15 | 
 16 | 
 17 | __all__ = ['build_vocabulary', 'index_sequence',
 18 |         'encode_questions_index','encode_questions_one_hot',
 19 |         'encode_answers_one_hot']
 20 | 
 21 | ###
 22 | ###
 23 | # Constants
 24 | ###
 25 | PADDING = '<pad>'
 26 | UNKNOWN = '<unk>'
 27 | EOA = '<eoa>'       # end of answer
 28 | EOQ = '<eoq>'       # end of question
 29 | EXTRA_WORDS_NAMES = [PADDING, UNKNOWN, EOA, EOQ]
 30 | EXTRA_WORDS = {PADDING:0, UNKNOWN:1, EOA:2, EOQ:3}
 31 | EXTRA_WORDS_ID = itemmap(reversed, EXTRA_WORDS)
 32 | 
 33 | ###
 34 | # Functions
 35 | ###
 36 | def static_vars(**kwargs):
 37 |     def decorate(func):
 38 |         for k in kwargs:
 39 |             setattr(func, k, kwargs[k])
 40 |         return func
 41 |     return decorate
 42 | 
 43 | 
 44 | @static_vars(counter=len(EXTRA_WORDS))
 45 | def _myinc(d):
 46 |     """
 47 |     Gets a tuple d, and returns d[0]: id.
 48 |     """
 49 |     x = d[0]
 50 |     _myinc.counter += 1
 51 |     return (x, _myinc.counter - 1)
 52 | 
 53 | 
 54 | def build_vocabulary(this_wordcount, extra_words=EXTRA_WORDS, 
 55 |         is_reset=True, truncate_to_most_frequent=0):
 56 |     """
 57 |     Builds vocabulary from wordcount.
 58 |     It also adds extra words to the vocabulary.
 59 | 
 60 |     In:
 61 |         this_wordcount - dictionary of wordcounts, e.g. {'cpu':3}
 62 |         extra_words - additional words to build the vocabulary
 63 |             dictionary of {word: id}
 64 |             by default {UNKNOWN: 0}
 65 |         is_reset - if True we restart the vocabulary counting
 66 |             by defaults False
 67 |         truncate_to_most_frequent - if positive then the vocabulary
 68 |             is truncated to 'truncate_to_most_frequent' words;
 69 |             by default 0 
 70 | 
 71 |     Out:
 72 |         word2index - mapping from words to indices
 73 |         index2word - mapping from indices to words
 74 |     """
 75 |     if is_reset:
 76 |         _myinc.counter=len(EXTRA_WORDS)
 77 |     if truncate_to_most_frequent > 0:
 78 |         sorted_wordcount = dict(sorted(
 79 |                 this_wordcount.items(), key=lambda x:x[1], reverse=True)[:truncate_to_most_frequent])
 80 |         this_wordcount = sorted_wordcount
 81 |         
 82 |     word2index = itemmap(_myinc, this_wordcount)
 83 |     if not extra_words == {}:
 84 |         assert(all([el not in word2index.values() for el in extra_words.values()]))
 85 |         word2index.update(extra_words)
 86 |     index2word = itemmap(reversed, word2index)
 87 |     return word2index, index2word
 88 | 
 89 | 
 90 | def index_sequence(x, word2index):
 91 |     """
 92 |     Converts list of words into a list of its indices wrt. word2index, that is into
 93 |     index encoded sequence.
 94 | 
 95 |     In:
 96 |         x - list of lines
 97 |         word2index - mapping from words to indices
 98 | 
 99 |     Out:
100 |         a list of the list of indices that encode the words
101 |     """
102 |     one_hot_x = []
103 |     for line in x:
104 |         line_list = []
105 |         for w in line.split():
106 |             w = w.strip()
107 |             if w in word2index: this_ind = word2index[w]
108 |             else: this_ind = word2index[UNKNOWN]
109 |             line_list.append(this_ind)
110 |         one_hot_x.append(line_list)
111 |     return one_hot_x
112 | 
113 | 
114 | def encode_questions_index(x, word2index_x, max_time_steps=None):
115 |     """
116 |     Index-based encoding of questions.
117 | 
118 |     In:
119 |         x - list of questions
120 |         word2index_x - mapping from question words to indices (inverted vocabulary)
121 |         max_time_steps - maximal number of words in the question (max. time steps);
122 |             if None then all question words are taken;
123 |             by default None 
124 |     Out:
125 |         a list of encoded questions
126 |     """
127 |     x_modified = [q + ' ' + EOQ for q in x]
128 |     if max_time_steps is not None:
129 |         x_modified = [' '.join(q.split()[:max_time_steps]) for q in x]
130 |     return index_sequence(x_modified, word2index_x)
131 | 
132 | 
133 | def encode_questions_one_hot(x, word2index_x, max_time_steps):
134 |     """
135 |     One-hot encoding of questions.
136 | 
137 |     In:
138 |         x - list of  questions
139 |         word2index_x - mapping from question words to indices (inverted vocabulary)
140 |         max_time_steps - maximal number of words in the sequence (max. time steps)
141 | 
142 |     Out:
143 |         boolean tensor of size: data_size x max_time_steps x vocabulary_size
144 |             for a given question and a time step there is only one '1'
145 |     """
146 |     X = np.zeros((len(x), max_time_steps, len(word2index_x.keys())), 
147 |             dtype=np.bool)
148 |     # encode questions
149 |     for question_no, question in enumerate(x):
150 |         question_word_list = question.split()
151 |         question_word_list.append(EOQ)
152 |         for word_no, word in enumerate(question_word_list):
153 |             word = word.strip()
154 |             if word_no == max_time_steps - 1:
155 |                 # we need to finish
156 |                 this_index = word2index_x[EOQ]
157 |             else:
158 |                 if word in word2index_x:
159 |                     this_index = word2index_x[word]
160 |                 else:
161 |                     this_index = word2index_x[UNKNOWN]
162 |             X[question_no, word_no, this_index] = 1
163 |     return X
164 | 
165 | def encode_questions_dense(x, word_encoder, max_time_steps, 
166 |         is_remove_question_symbol=False):
167 |     """
168 |     Dense representation of questions.
169 | 
170 |     In:
171 |         x - list of questions
172 |         word_encoder - encodes words
173 |         max_time_steps - maximal number of words in the sequence (max. time steps)
174 |         is_remove_question_symbol - true if we remove question symbols from the questions;
175 |             by default it is False
176 | 
177 |     Out:
178 |         float tensor of size: data_size x max_time_steps x dense_encoding_size
179 |     """
180 |     word_encoder_dim = word_encoder(unicode(x[0].split()[0].strip())).vector.shape[0]
181 |     X = np.zeros((len(x), max_time_steps, word_encoder_dim))
182 |     for question_no, question in enumerate(x):
183 |         question_word_list = question.split()
184 |         if is_remove_question_symbol and question_word_list[-1] == '?':
185 |             question_word_list = question_word_list[:-1]
186 |         reversed_question_word_list = question_word_list[::-1]
187 |         for word_no, raw_word in enumerate(reversed_question_word_list):
188 |             word = unicode(raw_word.strip())
189 |             this_representation = word_encoder(word).vector
190 |             if max_time_steps - word_no - 1 >= 0:
191 |                 X[question_no, max_time_steps - word_no - 1, :] = this_representation
192 |             else:
193 |                 break
194 |     return X
195 | 
196 | 
197 | def encode_answers_one_hot(y, word2index_y, max_answer_time_steps=10, 
198 |         is_only_first_answer_word=False, answer_words_delimiter=','):
199 |     """
200 |     One-hot encoding of answers.
201 |     If more than first answer word is encoded then the answer words 
202 |     are modelled as sequence.
203 | 
204 |     In:
205 |         y - list of answers
206 |         word2index_y - mapping from answer words to indices (vocabulary)
207 |         max_answer_time_steps - maximal number of words in the sequence (max. time steps)
208 |             by default 10
209 |         is_only_first_answer_word - if True then only first answer word is taken
210 |             by default False
211 |         answer_words_delimiter - a symbol for splitting answer into answer words;
212 |             if None is provided then we don't split answer into answer words 
213 |             (that is the whole answer is an answer word);
214 |             by default ','
215 | 
216 |     Out:
217 |         Y - boolean matrix of size: 
218 |                 data_size x vocabulary_size if there is only single answer word
219 |                 data_size x max_answer_time_steps x vocabulary_size otherwise
220 |                     the matrix is padded
221 |             for a given answer and a time step there is only one '1'
222 |         y_gt - list of answers
223 |             the same as input 'y' if is_only_first_answer_word==False
224 |             only first words from 'y' if is_only_first_answer_word==True
225 |     """
226 |     # encode answers
227 |     if is_only_first_answer_word:
228 |         Y = np.zeros((len(y), len(word2index_y.keys())), dtype=np.bool)
229 |         y_gt = []
230 |     else:
231 |         Y = np.zeros((len(y), max_answer_time_steps, len(word2index_y.keys())),
232 |                 dtype=np.bool)
233 |         y_gt = y
234 | 
235 |     if answer_words_delimiter is None:
236 |         assert(is_only_first_answer_word==True)
237 | 
238 |     for answer_no, answer in enumerate(y):
239 |         if answer_words_delimiter is not None:
240 |             answer_split = answer.split(answer_words_delimiter)
241 |         else:
242 |             answer_split = [answer]
243 |         for word_no, word in enumerate(answer_split):
244 |             word = word.strip()
245 |             if is_only_first_answer_word:
246 |                 y_gt.append(word)
247 |                 if word in word2index_y:
248 |                     Y[answer_no, word2index_y[word]] = 1
249 |                 else:
250 |                     Y[answer_no, word2index_y[UNKNOWN]] = 1
251 |                 break
252 |             else:
253 |                 if word_no == max_answer_time_steps - 1:
254 |                     break
255 |                 if word in word2index_y:
256 |                     Y[answer_no, word_no, word2index_y[word]] = 1
257 |                 else:
258 |                     Y[answer_no, word_no, word2index_y[UNKNOWN]] = 1
259 |         if not is_only_first_answer_word:
260 |             Y[answer_no, 
261 |                     min(len(answer_split), max_answer_time_steps-1), 
262 |                     word2index_y[EOA]] = 1
263 |     return Y, y_gt
264 | 
265 | 
266 | def shift(X, new_vector=None, time_axis=1):
267 |     """
268 |     Shifts input X along time_axis by one. 
269 |     At the new place it introduces new_word_id.
270 |     The method doesn't change the size of X, so 
271 |     the last column along time axis is forgotten.
272 | 
273 |     In:
274 |         X - input array;
275 |             X has to have one more dimension than time_axis,
276 |             so if time_axis == 1 then X has 3 dimensions (0,1,2)
277 |         new_vector - new vector that replaces the column at time axis;
278 |             if None, then the last column is added at the first position;
279 |             by default None
280 |         time_axis - axis where shifting happens
281 |     Out:
282 |         shifted version of X along the time axis
283 |     """
284 |     tmp = np.roll(X, 1, time_axis)
285 |     if new_vector is None:
286 |         return tmp
287 |     if time_axis==0:
288 |         tmp[0,:] = new_vector 
289 |     elif time_axis==1:
290 |         tmp[:,0,:] = new_vector 
291 |     elif time_axis==2:
292 |         tmp[:,:,0,:] = new_vector 
293 |     elif time_axis==3:
294 |         tmp[:,:,:,0,:] = new_vector 
295 |     else:
296 |         raise NotImplementedError
297 |     return tmp
298 | 
299 | 
300 | def shift_with_index_vector(X, index, size, time_axis, value=1, dtype=np.bool):
301 |     """
302 |     Shifts X along time_axis, and inserts a one-hot vector at the first 
303 |     column at this axis.
304 | 
305 |     In:
306 |         X - n-array
307 |         index - index for value, 
308 |             the other elements of the corresponding vector are 0
309 |         time_axis - axis where shifting happens
310 |         value - value to place at index;
311 |             by default 1
312 |         dtype - type of the new vector;
313 |             by default np.bool
314 |     """
315 |     tmp = np.zeros(size, dtype=dtype)
316 |     tmp[..., index] = value
317 |     return shift(X, tmp, time_axis)
318 | 
319 | 
320 | 


--------------------------------------------------------------------------------
/kraino/utils/callbacks.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | """
  4 | Extra set of callbacks.
  5 | 
  6 | Author: Mateusz Malinowski
  7 | Email: mmalinow@mpi-inf.mpg.de
  8 | """
  9 | 
 10 | 
 11 | import random
 12 | import warnings
 13 | import numpy as np
 14 | 
 15 | #from bokeh.plotting import cursession
 16 | #from bokeh.plotting import figure
 17 | #from bokeh.plotting import push
 18 | #from bokeh.plotting import show
 19 | 
 20 | from keras.callbacks import Callback as KerasCallback
 21 | 
 22 | #from agnez.keras_callbacks import BokehCallback
 23 | 
 24 | from ..utils import print_metrics
 25 | from ..utils.read_write import dict2file 
 26 | from ..utils.read_write import print_qa
 27 | 
 28 | 
 29 | def is_era_begin(epoch, epoch_interval):
 30 |     return (epoch+1) % epoch_interval == 0 or epoch == 0
 31 | 
 32 | 
 33 | def is_era_end(epoch, epoch_interval):
 34 |     return (epoch+1) % epoch_interval == 0 
 35 | 
 36 | 
 37 | ###
 38 | # Storing callbacks
 39 | ###
 40 | class StoreModelWeightsOnEraEnd(KerasCallback):
 41 |     def __init__(self, filepath, epoch_interval, verbose=0):
 42 |         """
 43 |         In:
 44 |             filepath - formattable filepath; possibilities:
 45 |                 * weights.{epoch:02d}
 46 |                 * weights.{era:02d}
 47 |             epoch_interval - 
 48 |                 number of epochs that must be passed from the previous saving
 49 |             verbose - if nonzero then print out information on stdout;
 50 |                 by default 0
 51 |         """
 52 |         super(KerasCallback, self).__init__()
 53 |         self.filepath = filepath
 54 |         self.epoch_interval = epoch_interval
 55 |         self.verbose = verbose
 56 |         self.era = 0
 57 | 
 58 |     def on_epoch_end(self, epoch, logs={}):
 59 |         if is_era_end(epoch, self.epoch_interval):
 60 |             filepath = self.filepath.format(
 61 |                     epoch=epoch, era=self.era, **logs)
 62 |             if self.verbose > 0:
 63 |                 print("Epoch %05d: saving model to %s" % (epoch, filepath))
 64 |             self.model.save_weights(filepath, overwrite=True)
 65 |             self.era += 1
 66 | ###
 67 | 
 68 | ###
 69 | # Printing callbacks
 70 | ###
 71 | class PrintOnEraBegin(KerasCallback):
 72 |     def __init__(self, epoch_interval, message='Era {era:02d}'):
 73 |         """
 74 |         In:
 75 |             epoch_interval - 
 76 |                 number of epochs that must be passed between two consecutive
 77 |                 invocations of this callback
 78 |             message -
 79 |                 formattable message to show;
 80 |                 by default "Era {era:02d}" showing the current era
 81 |         """
 82 |         self.epoch_interval = epoch_interval
 83 |         self.era = 0
 84 |         self.message = message
 85 | 
 86 |     def on_epoch_begin(self, epoch, logs={}):
 87 |         if is_era_begin(epoch, self.epoch_interval):
 88 |             print()
 89 |             print('-' * 50)
 90 |             print(self.message.format(era=self.era))
 91 |             self.era += 1
 92 | 
 93 | 
 94 | class PrintPerformanceMetricOnEraEnd(KerasCallback):
 95 |     def __init__(self, X, y, temperature, index2word_y, 
 96 |             metric_name, epoch_interval, extra_vars, 
 97 |             verbosity_path='logs/performance.log', verbose=1):
 98 |         """
 99 |         In:
100 |             X - encoded input
101 |             y - raw expected output
102 |             temperature - temperature for the predictions;
103 |                 the colder the temperature the more stable answers
104 |             index2word_y - mapping from the indices to words (in the y-domain)
105 |             metric_name - name of the performance metric
106 |             epoch_interval - 
107 |                 number of epochs that must be passed between two consecutive
108 |                 invocations of this callback
109 |             extra_vars - dictionary of extra variables
110 |             verbosity path - path to dumb the logs
111 |             verbose - verbosity level;
112 |                 by default 1
113 |         """
114 |         self.X = X
115 |         self.y = y
116 |         self.temperature = temperature
117 |         self.index2word_y = index2word_y
118 |         self.metric_name = metric_name
119 |         self.epoch_interval = epoch_interval
120 |         self.extra_vars = extra_vars
121 |         self.verbosity_path = verbosity_path
122 |         self.verbose = verbose
123 |         self.era = 0
124 | 
125 |     def on_epoch_end(self, epoch, logs={}):
126 |         if is_era_end(epoch, self.epoch_interval):
127 |             answer_pred = self.model.decode_predictions(
128 |                     X=self.X, 
129 |                     temperature=self.temperature, 
130 |                     index2word=self.index2word_y, 
131 |                     verbose=self.verbose)
132 |             metric_values = print_metrics.select[self.metric_name](
133 |                     gt_list=self.y, 
134 |                     pred_list=answer_pred, 
135 |                     verbose=1,
136 |                     extra_vars=self.extra_vars)
137 |             if self.verbose == 1:
138 |                 for m in metric_values:
139 |                     if 'idiosyncrasy' in m:
140 |                         idiosyncrasies = m['idiosyncrasy'].split(':')
141 |                         if 'long' in idiosyncrasies and 'muted' in idiosyncrasies:
142 |                             # long value being muted, we can only send the results
143 |                             # to the file
144 |                             filepath = self.verbosity_path.format(
145 |                                     epoch=epoch, era=self.era, **logs)
146 |                             if m['value'] is not None:
147 |                                 dict2file(m['value'], filepath, title=m['name'])
148 |             self.era += 1
149 | ###
150 | 
151 | ###
152 | # Plotting callbacks
153 | ###
154 | '''
155 | class PlotPerformanceMetricOnEraEnd(BokehCallback):
156 |     """
157 |     Plots the performance measures.
158 | 
159 |     Inspired by 
160 |         https://github.com/EderSantana/agnez/blob/master/agnez/keras_callbacks.py
161 |     """
162 |     colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
163 |               '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
164 | 
165 |     def __init__(self, X, y, temperature, index2word_y, 
166 |             metric_name, epoch_interval, extra_vars, verbose=1, 
167 |             name='experiment', fig_title='Performance', url='http://127.0.0.1:5006'):
168 |         """
169 |         In:
170 |             X - encoded input
171 |             y - raw expected output
172 |             temperature - temperature for the predictions;
173 |                 the colder the temperature the more stable answers
174 |             index2word_y - mapping from the indices to words (in the y-domain)
175 |             metric_name - name of the performance metric
176 |             epoch_interval - 
177 |                 number of epochs that must be passed between two consecutive
178 |                 invocations of this callback
179 |             extra_vars - dictionary of extra variables
180 |             verbose - verbosity level; by default 1
181 |             name - name of the bokeh document; by default 'experiment'
182 |             fig_title - title of the bokeh figure; by default 'Performance'
183 |             url - bokeh server url; 
184 |                 by default 'http://127.0.0.1:5006'
185 |         """
186 |         BokehCallback.__init__(self, name, fig_title, url)
187 |         self.X = X
188 |         self.y = y
189 |         self.temperature = temperature
190 |         self.index2word_y = index2word_y
191 |         self.metric_name = metric_name
192 |         self.epoch_interval = epoch_interval
193 |         self.extra_vars = extra_vars
194 |         self.verbose = verbose
195 |         self.era = 0
196 | 
197 |     def on_epoch_end(self, epoch, logs={}):
198 |         if not is_era_end(epoch, self.epoch_interval):
199 |             return
200 | 
201 |         answer_pred = self.model.decode_predictions(
202 |                     X=self.X, 
203 |                     temperature=self.temperature, 
204 |                     index2word=self.index2word_y, 
205 |                     verbose=self.verbose)
206 |         measures = print_metrics.select[self.metric_name](
207 |                 gt_list=self.y, 
208 |                 pred_list=answer_pred, 
209 |                 verbose=1,
210 |                 extra_vars=self.extra_vars)
211 | 
212 |         if not hasattr(self, 'fig'):
213 |             self.fig = figure(title=self.fig_title)
214 |             for i, m in enumerate(measures):
215 |                 if 'idiosyncrasy' in m:
216 |                     if 'muted' in m['idiosyncrasy'].split(':'):
217 |                         continue
218 |                 self.fig.line([self.era], [m['value']], legend=m['name'],
219 |                               name=m['name'], line_width=2,
220 |                               line_color=self.colors[i % len(self.colors)])
221 |                 renderer = self.fig.select({'name': m['name']})
222 |                 self.plots.append(renderer[0].data_source)
223 |             show(self.fig)
224 |         else:
225 |             for i, m in enumerate(measures):
226 |                 if 'idiosyncrasy' in m:
227 |                     if 'muted' in m['idiosyncrasy'].split(':'):
228 |                         continue
229 |                 self.plots[i].data['y'].append(m['value'])
230 |                 self.plots[i].data['x'].append(self.era)
231 |         cursession().store_objects(self.plots[i])
232 |         push()
233 |         self.era += 1
234 | 
235 | class StandardPerformancePlot(BokehCallback):
236 |     """
237 |     Generalizes Agnez class Plot to work with all standard performance metrics.
238 | 
239 |     Original work: Eder Santana [https://github.com/EderSantana]
240 |     """
241 |     # WIP
242 |     # TODO:
243 |     #   -[ ] Decide API for choosing channels to plot
244 |     colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
245 |               '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
246 |     """ 
247 |     Inspired by https://github.com/mila-udem/blocks-extras/blob/master/blocks/extras/extensions/plot.py
248 | 
249 |     """ 
250 |     def __init__(self, 
251 |             what_to_plot=['loss', 'val_loss'], 
252 |             name='experiment', 
253 |             fig_title='Cost functions', 
254 |             url='default'):
255 |         BokehCallback.__init__(self, name, fig_title, url)
256 |         self.totals = {}
257 |         self.what_to_plot = what_to_plot
258 | 
259 |     def on_epoch_begin(self, epoch, logs={}):
260 |         self.seen = 0
261 |         self.totals = {}
262 | 
263 |     def on_batch_end(self, batch, logs={}):
264 |         batch_size = logs.get('size', 0)
265 |         self.seen += batch_size
266 |         what_to_plot_now = list(set(['loss', 'acc']) & set(self.what_to_plot))
267 |         for v in what_to_plot_now:
268 |             if v in self.totals:
269 |                 self.totals[v] += logs.get(v) * batch_size
270 |             else:
271 |                 self.totals[v] = logs.get(v) * batch_size
272 | 
273 |     def on_epoch_end(self, epoch, logs={}):
274 |         if not hasattr(self, 'fig'):
275 |             self.fig = figure(title=self.fig_title)
276 |             for i, v in enumerate(self.what_to_plot):
277 |                 if v == 'loss':
278 |                     L = self.totals[v] / self.seen
279 |                 else:
280 |                     L = logs.get(v)
281 |                 self.fig.line([epoch], [L], legend=v,
282 |                               name=v, line_width=2,
283 |                               line_color=self.colors[i % len(self.colors)])
284 |                 renderer = self.fig.select({'name': v})
285 |                 self.plots.append(renderer[0].data_source)
286 |             show(self.fig)
287 |         else:
288 |             for i, v in enumerate(self.what_to_plot):
289 |                 if v in ['loss', 'acc']:
290 |                     L = self.totals[v] / self.seen
291 |                 else:
292 |                     L = logs.get(v)
293 |                 self.plots[i].data['y'].append(L)
294 |                 self.plots[i].data['x'].append(epoch)
295 |         cursession().store_objects(self.plots[i])
296 |         push()
297 | ###
298 | '''
299 | 
300 | ###
301 | # Monitoring callbacks
302 | ###
303 | class MonitorPredictionsOnEndEra(KerasCallback):
304 |     """
305 |     Checks the performance on a randomly chosen subset of the data.
306 |     Hopefully the network generates something interesting.
307 |     """
308 |     def __init__(self, X, x, y, temperature, index2word_y,
309 |             verbosity_path, epoch_interval, subset_size=0, verbose=0):
310 |         """
311 |         In:
312 |             X - encoded input
313 |             x - raw input
314 |             y - raw output space
315 |             temperature - temperature for the predictions;
316 |                 the colder the temperature the more stable answers
317 |             index2word_y - mapping from the indices to words (in the y-domain)
318 |             verbosity_path - parameterized filepath to store the logs;
319 |                 possibilities of the parameterization
320 |                 * weights.{epoch:02d}
321 |                 * weights.{era:02d}
322 |             epoch_interval - 
323 |                 number of epochs that must be passed between two consecutive
324 |                 invocations of this callback
325 |             subset_size - data subset's size, if 0 then all data are taken;
326 |                 by default 0
327 |             verbose - verbosity level; by default 0
328 |         """
329 |         self.X = X
330 |         self.x = x
331 |         self.y = y
332 |         self.temperature = temperature
333 |         self.index2word_y = index2word_y
334 |         self.verbosity_path = verbosity_path
335 |         self.epoch_interval = epoch_interval
336 |         self.subset_size = subset_size
337 |         self.verbose = verbose
338 |         self.era = 0
339 | 
340 |     def on_epoch_end(self, epoch, logs={}):
341 |         if is_era_end(epoch, self.epoch_interval):
342 |             if self.subset_size > 0:
343 |                 subset_indices = random.sample(
344 |                         xrange(self.X.shape[0]), self.subset_size)
345 |                 if len(self.X) == 2:
346 |                     X_subset = [self.X[0][subset_indices], self.X[1][subset_indices]]
347 |                 elif len(self.X) == 1:
348 |                     X_subset = self.X[subset_indices] 
349 |                 questions_subset = self.x[subset_indices]
350 |                 answer_gt = self.y[subset_indices]
351 |                 answer_gt_original = self.y[subset_indices]
352 |             else:
353 |                 X_subset = self.X
354 |                 questions_subset = self.x 
355 |                 answer_gt = self.y
356 |                 answer_gt_original = self.y 
357 |             answer_pred = self.model.decode_predictions(
358 |                     X=X_subset, 
359 |                     temperature=self.temperature, 
360 |                     index2word=self.index2word_y, 
361 |                     verbose=self.verbose)
362 | 
363 |             filepath = self.verbosity_path.format(
364 |                     epoch=epoch, era=self.era, **logs)
365 |             print_qa(questions_subset, answer_gt, answer_gt_original, answer_pred, 
366 |                     self.era, path=filepath)
367 |             self.era += 1
368 | ###
369 | 
370 | ###
371 | # Learning modifiers callbacks
372 | ###
373 | class LearningRateReducerWithEarlyStopping(KerasCallback):
374 |     """
375 |     Reduces learning rate during the training.
376 | 
377 |     Original work: jiumem [https://github.com/jiumem]
378 |     """
379 |     def __init__(self, 
380 |             patience=0, reduce_rate=0.5, reduce_nb=10, 
381 |             is_early_stopping=True, verbose=1):
382 |         """
383 |         In:
384 |             patience - number of beginning epochs without reduction; 
385 |                 by default 0
386 |             reduce_rate - multiplicative rate reducer; by default 0.5
387 |             reduce_nb - maximal number of reductions performed; by default 10
388 |             is_early_stopping - if true then early stopping is applied when
389 |                 reduce_nb is reached; by default True
390 |             verbose - verbosity level; by default 1
391 |         """
392 |         super(KerasCallback, self).__init__()
393 |         self.patience = patience
394 |         self.wait = 0
395 |         self.best_score = -1.
396 |         self.reduce_rate = reduce_rate
397 |         self.current_reduce_nb = 0
398 |         self.reduce_nb = reduce_nb
399 |         self.is_early_stopping = is_early_stopping
400 |         self.verbose = verbose
401 |         self.epsilon = 0.1e-10
402 | 
403 |     def on_epoch_end(self, epoch, logs={}):
404 |         current_score = logs.get('val_acc')
405 |         if current_score is None:
406 |             warnings.warn('validation score is off; ' + 
407 |                     'this reducer works only with the validation score on')
408 |             return
409 |         if current_score > self.best_score:
410 |             self.best_score = current_score
411 |             self.wait = 0
412 |             if self.verbose > 0:
413 |                 print('---current best val accuracy: %.3f' % current_score)
414 |         else:
415 |             if self.wait >= self.patience:
416 |                 self.current_reduce_nb += 1
417 |                 if self.current_reduce_nb <= self.reduce_nb:
418 |                     lr = np.float32(self.model.optimizer.lr.get_value())
419 |                     self.model.optimizer.lr.set_value(lr*self.reduce_rate)
420 |                     if self.verbose > 0:
421 |                         print("Reduction from {0:0.6f} to {1:0.6f}".\
422 |                                 format(float(lr), float(lr*self.reduce_rate)))
423 |                     if float(lr) <= self.epsilon:
424 |                         if self.verbose > 0:
425 |                             print('Learning rate too small, learning stops now')
426 |                         self.model.stop_training = True
427 |                 else:
428 |                     if self.is_early_stopping:
429 |                         if self.verbose > 0:
430 |                             print("Epoch %d: early stopping" % (epoch))
431 |                         self.model.stop_training = True
432 |             self.wait += 1 
433 | 
434 | 
435 | class LearningRateReducerEveryPatienceEpoch(KerasCallback):
436 |     """
437 |     Reduces learning rate during the training after every 'patience' epochs.
438 | 
439 |     Original work: jiumem [https://github.com/jiumem]
440 |     """
441 |     def __init__(self, 
442 |             patience=0, reduce_rate=0.5, reduce_nb=10, verbose=1):
443 |         """
444 |         In:
445 |             patience - number of epochs in stagnation; by default 0
446 |             reduce_rate - multiplicative rate reducer; by default 0.5
447 |             reduce_nb - maximal number of reductions performed; by default 10
448 |             verbose - verbosity level; by default 1
449 |         """
450 |         super(KerasCallback, self).__init__()
451 |         self.patience = patience
452 |         self.wait = 0
453 |         self.best_score = -1.
454 |         self.reduce_rate = reduce_rate
455 |         self.current_reduce_nb = 0
456 |         self.reduce_nb = reduce_nb
457 |         self.is_early_stopping = False
458 |         self.verbose = verbose
459 |         self.epsilon = 0.1e-10
460 | 
461 |     def on_epoch_end(self, epoch, logs={}):
462 |         current_score = logs.get('val_acc')
463 |         if current_score is None:
464 |             current_score = -10.0 # always reduce
465 |         if current_score > self.best_score:
466 |             self.best_score = current_score
467 |             self.wait = 0
468 |             if self.verbose > 0:
469 |                 print('---current best val accuracy: %.3f' % current_score)
470 |         else:
471 |             if self.wait >= self.patience:
472 |                 self.current_reduce_nb += 1
473 |                 if self.current_reduce_nb <= self.reduce_nb:
474 |                     lr = np.float32(self.model.optimizer.lr.get_value())
475 |                     self.model.optimizer.lr.set_value(lr*self.reduce_rate)
476 |                     if self.verbose > 0:
477 |                         print("Reduction from {0:0.6f} to {1:0.6f}".\
478 |                                 format(float(lr), float(lr*self.reduce_rate)))
479 |                     if float(lr) <= self.epsilon:
480 |                         if self.verbose > 0:
481 |                             print('Learning rate too small, learning stops now')
482 |                         self.model.stop_training = True
483 |                 else:
484 |                     if self.is_early_stopping:
485 |                         if self.verbose > 0:
486 |                             print("Epoch %d: early stopping" % (epoch))
487 |                         self.model.stop_training = True
488 |                 self.wait = 0
489 |             else:
490 |                 self.wait += 1 
491 | 


--------------------------------------------------------------------------------
/kraino/utils/data_provider.py:
--------------------------------------------------------------------------------
  1 | """
  2 | DAQUAR dataset provider.
  3 | 
  4 | Ashkan Mokarian [ashkan@mpi-inf.mpg.de]
  5 | Mateusz Malinowski [mmalinow@mpi-inf.mpg.de]
  6 | """
  7 | 
  8 | import copy
  9 | import os
 10 | import re
 11 | import json
 12 | import numpy as np
 13 | 
 14 | from read_write import file2list
 15 | from toolz import frequencies
 16 | 
 17 | from scipy.misc import imread
 18 | 
 19 | def daquar_qa_triples(
 20 |         path=None, 
 21 |         train_or_test='train', 
 22 |         keep_top_qa_pairs=0,
 23 |         **kwargs):
 24 |     """
 25 |     DAQUAR question answer pairs.
 26 | 
 27 |     In:
 28 |         path - path to DAQUAR root folder, if None then default path is chosen
 29 |             by default None
 30 |         train_or_test - switch between train and test set;
 31 |             value belongs to \{'train', 'val', 'test'\} 
 32 |             by default 'train'
 33 |         keep_top_qa_pairs - filter out question-answer pairs to the
 34 |             keep_top_qa_pairs if positive; by default 0
 35 | 
 36 |     Out:
 37 |         x - textual questions
 38 |         y - textual answers
 39 |         img_name - names of the images
 40 |         img_ind - image indices that correspond to x
 41 |         question_id - empty list as it is unused in DAQUAR
 42 |         end_of_question - end of question token
 43 |         end_of_answer - end of answer token
 44 |         answer_words_delimiter - delimiter for multiple word answers
 45 |     """
 46 |     if path is None:
 47 |         curr_dir = os.path.dirname(os.path.realpath(__file__))
 48 |         path = os.path.join(curr_dir, '..', '..', 'data', 'daquar')
 49 | 
 50 |     if train_or_test == 'val':
 51 |         # we don't have a well established split
 52 |         train_or_test = 'train'
 53 | 
 54 |     xy_list = file2list(
 55 |             os.path.join(path,'qa.894.raw.'+train_or_test+'.format_triple'))
 56 | 
 57 |     # create a dictionary of allowed qa pairs
 58 |     all_answers = xy_list[1::3]
 59 |     freq = frequencies(all_answers)
 60 |     if keep_top_qa_pairs <= 0:
 61 |         most_frequent_answers = sorted(
 62 |                 freq.items(), key=lambda x:x[1], reverse=True)
 63 |     else:
 64 |         most_frequent_answers = sorted(
 65 |                 freq.items(), key=lambda x:x[1], reverse=True)[:keep_top_qa_pairs]
 66 |     allowed_answers_dict = dict(most_frequent_answers)
 67 |     #
 68 | 
 69 |     x_list = []
 70 |     y_list = []
 71 |     img_name_list = []
 72 |     img_ind_list = []
 73 |     for x, y, image_name in zip(xy_list[::3], xy_list[1::3], xy_list[2::3]):
 74 |         if y in allowed_answers_dict:
 75 |             x_list.append(x)
 76 |             y_list.append(y)
 77 |             img_name_list.append(image_name)
 78 |             img_num = re.search('(?<=image)[0-9]+', image_name).group(0)
 79 |             img_ind_list.append(int(img_num)-1)
 80 | 
 81 |     return {'x':x_list, 
 82 |             'y':y_list, 
 83 |             'img_name':img_name_list, 
 84 |             'img_ind': img_ind_list, 
 85 |             'question_id': [],
 86 |             'end_of_question':'?', 
 87 |             'end_of_answer':'',
 88 |             'answer_words_delimiter':','}
 89 | 
 90 | 
 91 | def daquar_save_results(question_id_list, answer_list, path):
 92 |     raise NotImplementedError()
 93 | 
 94 | 
 95 | def vqa_save_results(question_id_list, answer_list, path):
 96 |     """
 97 |     Saves the answers on question_id_list in the VQA-like format.
 98 | 
 99 |     In:
100 |         question_id_list - list of the question ids
101 |         answer_list - list with the answers
102 |         path - path where the file is saved
103 |     """
104 |     question_answer_pairs = []
105 |     assert len(question_id_list) == len(answer_list), \
106 |             'must be the same number of questions and answers'
107 |     for q,a in zip(question_id_list, answer_list):
108 |         question_answer_pairs.append({'question_id':q, 'answer':str(a)})
109 |     with open(path,'w') as f:
110 |         json.dump(question_answer_pairs, f)
111 | 
112 | 
113 | def vqa_get_object(path=None, train_or_test='train', 
114 |         dataset_type='mscoco', task_type='OpenEnded', 
115 |         annotation_year='2014', question_year='2015'):
116 |     """
117 |     In:
118 |         path - path to VQA root folder, if None then default path is chosen;
119 |             by default None
120 |         train_or_test - switch between train and test set;
121 |             value belongs to \{'train', 'val', 'test', 'test_dev'\} 
122 |             by default 'train'
123 |         dataset_type - type of dataset, e.g. 'mscoco'
124 |         task_type - type of the task, e.g. 'OpenEnded'
125 |         annotation_year - annotation year
126 |         question_year - question year
127 | 
128 |     Out:
129 |         root_path - constructed root path
130 |         anno_path - constructed path to annotations
131 |         questions_path - constructed path to questions
132 |         vqa_object - constructed VQA object
133 |     """
134 |      
135 |     from vqaTools.vqa import VQA     
136 |     if path == None:
137 |         curr_dir = os.path.dirname(os.path.realpath(__file__))
138 |         root_path = os.path.join(curr_dir, '..', '..', 'data', 'vqa')
139 |     else: 
140 |         root_path = path
141 |     
142 |     train_or_test_questions = 'test-dev' if train_or_test == 'test_dev' \
143 |             else train_or_test
144 |     dataset_train_or_test = train_or_test + annotation_year
145 |     question_train_or_test = train_or_test_questions + question_year
146 | 
147 |     if train_or_test == 'test_dev':
148 |         anno_path = None
149 |     else:
150 |         anno_path = os.path.join(root_path, 
151 |                 'Annotations', '{0}_{1}_annotations.json'.format(
152 |                     dataset_type, dataset_train_or_test))
153 |     questions_path = os.path.join(root_path,
154 |             'Questions', '{0}_{1}_{2}_questions.json'.format(
155 |                 task_type, dataset_type, question_train_or_test))
156 |     vqa = VQA(anno_path, questions_path)
157 |     return {'root_path':root_path,
158 |             'anno_path':anno_path,
159 |             'questions_path':questions_path,
160 |             'vqa_object':vqa}
161 | 
162 | 
163 | def vqa_general(path=None, train_or_test='train', dataset_type='mscoco', 
164 |         task_type='OpenEnded', annotation_year='2014', question_year='2015', 
165 |         image_name_template='COCO_2014_{0:0=12}', answer_mode='single_random',
166 |         keep_top_qa_pairs=0):
167 |     """
168 |     VT-Vision-Lab VQA question answeir pairs. It is a general interface.
169 |     In:
170 |         path - path to VQA root folder, if None then default path is chosen;
171 |             by default None
172 |         train_or_test - switch between train and test set;
173 |             value belongs to \{'train', 'val', 'test', 'test_dev'\} 
174 |             by default 'train'
175 |         dataset_type - type of dataset, e.g. 'mscoco'
176 |         task_type - type of the task, e.g. 'OpenEnded'
177 |         annotation_year - annotation year
178 |         question_year - question year
179 |         image_name_template - template for giving names to images
180 |         answer_mode - possible answer modes:
181 |             'single_random' - single answer, randomly chosen
182 |             'single_confident' - single answer, randomly chosen among the confident;
183 |                 if there is no confident then randomly chosen (the same as single)
184 |             'single_frequent' - the most frequent answer
185 |             'all' - with one question all answers
186 |             'all_repeat' - all answers by repeating the same question
187 |             'all_repeat_confidentonly' - all answers that are confident (repeats the same question)
188 |         keep_top_qa_pairs - filter out question-answer pairs to the
189 |             keep_top_qa_pairs if positive; by default 0
190 | 
191 |     Out:
192 |         x - textual questions
193 |         y - textual answers
194 |         img_name - names of the images
195 |         img_ind - image indices that correspond to x
196 |         question_id - list of question indices
197 |         end_of_question - end of question token
198 |         end_of_answer - end of answer token
199 |         answer_words_delimiter - delimiter for multiple word answers
200 |         anno_path - constructed path to annotations
201 |         questions_path - constructed path to questions
202 |     """
203 | 
204 |     def preprocess_question(q):
205 |         q_tmp = q.strip().lower().encode('utf8')
206 |         if q_tmp[-1] == '?' and q_tmp[-2] != ' ':
207 |             # separate word token from the question mark
208 |             q_tmp = q_tmp[:-1] + ' ?'
209 |         # remove question mark
210 |         if q_tmp[-1] == '?': q_tmp = q_tmp[:-1]
211 |         return q_tmp
212 |     #
213 | 
214 |     assert answer_mode in ['single_random', 'single_confident', 'single_frequent', 'all', 'all_repeat', 'all_repeat_confidentonly']
215 |     assert task_type in ['OpenEnded', 'MultipleChoice'], \
216 |             'The task is either ''OpenEnded'' of ''MultipleChoice'''
217 |     assert dataset_type in ['mscoco', 'abstract_v002'], \
218 |             'The type of dataset is eigher ''mscoco'' or ''abstract_v002'''
219 | 
220 |     vqa_dict = vqa_get_object(
221 |             path=path, 
222 |             train_or_test=train_or_test, 
223 |             dataset_type=dataset_type, 
224 |             task_type=task_type, 
225 |             annotation_year=annotation_year, 
226 |             question_year=question_year)
227 |     vqa = vqa_dict['vqa_object']
228 | 
229 |     # questions can be filtered, e.g. by the question type
230 |     ann_ids = vqa.getQuesIds()     
231 |     anns = vqa.loadQA(ann_ids)
232 |    
233 |     # process annotations
234 |     question_id_list = []
235 |     image_name_list = []
236 |     image_id_list = []
237 |     x_list = []
238 |     y_list = []
239 | 
240 |     # return only questions if there are no annotations
241 |     if anns == []:
242 |         for ques in vqa.questions['questions']:
243 |             question = preprocess_question(ques['question'])
244 |             x_list.append(question)
245 |             question_id_list.append(ques['question_id'])
246 |             image_id = ques['image_id']
247 |             image_name = image_name_template.format(image_id)
248 |             image_name_list.append(image_name)
249 |             image_id_list.append(image_id)
250 | 
251 |     # create a dictionary of allowed qa pairs
252 |     all_answers = [x['answer'] for anno in anns for x in anno['answers']]
253 |     freq = frequencies(all_answers)
254 |     if keep_top_qa_pairs <= 0:
255 |         most_frequent_answers = sorted(
256 |                 freq.items(), key=lambda x:x[1], reverse=True)
257 |     else:
258 |         most_frequent_answers = sorted(
259 |                 freq.items(), key=lambda x:x[1], reverse=True)[:keep_top_qa_pairs]
260 |     allowed_answers_dict = dict(most_frequent_answers)
261 |     #
262 | 
263 |     for anno in anns:
264 |         image_id = anno['image_id']
265 |         image_name = image_name_template.format(image_id)
266 |         question_id = anno['question_id']
267 |         question = preprocess_question(vqa.qqa[question_id]['question'])
268 |         assert image_id == vqa.qqa[question_id]['image_id'], \
269 |                 'image id of the question and answer are different'
270 |         # randomizing the answers list
271 |         randomized_answers = copy.deepcopy(anno['answers'])
272 |         np.random.shuffle(randomized_answers)
273 |         randomized_allowed_answers_list = \
274 |                 [x for x in randomized_answers if x['answer'] in allowed_answers_dict]
275 |         if randomized_allowed_answers_list == []:
276 |             continue
277 |         #
278 |         if answer_mode == 'single_random':
279 |             answer = randomized_allowed_answers_list[0]['answer']
280 |         elif answer_mode == 'single_confident':
281 |             # if there is no confident answer, take a random one
282 |             confidence_list = [x['answer_confidence'] \
283 |                     for x in randomized_allowed_answers_list]
284 |             yes_list = [j for j,x in enumerate(confidence_list) if x == 'yes'] 
285 |             if yes_list == []:
286 |                 answer = randomized_allowed_answers_list[0]['answer']
287 |             else:
288 |                 answer = randomized_allowed_answers_list[yes_list[0]]['answer']
289 |         elif answer_mode == 'single_frequent':
290 |             tmp = frequencies([x['answer'] for x in randomized_allowed_answers_list])
291 |             answer = sorted(tmp.items(), key=lambda x: x[1], reverse=True)[0][0]
292 |         elif answer_mode == 'all':
293 |             raise NotImplementedError()
294 |         elif answer_mode == 'all_repeat':
295 |             answer_list_all_mode = []
296 |             for answer in randomized_allowed_answers_list:
297 |                 answer_list_all_mode.append(answer['answer'].encode('utf8'))
298 |         elif answer_mode == 'all_repeat_confidentonly':
299 |             # like repeat but consider only confident answers
300 |             confidence_list = [x['answer_confidence'] \
301 |                     for x in randomized_allowed_answers_list]
302 |             yes_list = [j for j,x in enumerate(confidence_list) if x == 'yes'] 
303 |             if yes_list == []:
304 |                 # we keep only confident qa pairs
305 |                 continue
306 |             answer_list_all_mode = []
307 |             for answer_no, answer in enumerate(randomized_allowed_answers_list):
308 |                 if answer_no in yes_list:
309 |                     answer_list_all_mode.append(answer['answer'].encode('utf8'))
310 |         else:
311 |             raise NotImplementedError()
312 | 
313 |         if 'single' in answer_mode:
314 |             answer = answer.encode('utf8')
315 |             x_list.append(question)
316 |             y_list.append(answer)
317 |             image_name_list.append(image_name)
318 |             image_id_list.append(image_id)
319 |             question_id_list.append(question_id)
320 |         elif 'all' in answer_mode:
321 |             num_answers_all_mode = len(answer_list_all_mode)
322 |             x_list.extend([question]*num_answers_all_mode)
323 |             image_name_list.extend([image_name]*num_answers_all_mode)
324 |             image_id_list.extend([image_id]*num_answers_all_mode)
325 |             question_id_list.extend([question_id]*num_answers_all_mode)
326 |             y_list.extend(answer_list_all_mode)
327 |         else:
328 |             raise NotImplementedError()
329 | 
330 |     return {'x':x_list, 'y':y_list, 
331 |             'img_name':image_name_list, 
332 |             'img_ind': image_id_list, 
333 |             'question_id': question_id_list,
334 |             'end_of_question':'?', 
335 |             'end_of_answer':'',
336 |             'answer_words_delimiter':' ',
337 |             'vqa_object':vqa,
338 |             'questions_path':vqa_dict['questions_path'],
339 |             'anno_path':vqa_dict['anno_path']}
340 | 
341 | 
342 | def vqa_real_images_open_ended(
343 |         path=None, 
344 |         train_or_test='train', 
345 |         keep_top_qa_pairs=0, 
346 |         answer_mode='single',
347 |         **kwargs):
348 |     """
349 |     VT-Vision-Lab VQA open-ended question answeir pairs.
350 | 
351 |     In:
352 |         path - path to VQA root folder, if None then default path is chosen;
353 |             by default None
354 |         train_or_test - switch between train and test set;
355 |             value belongs to \{'train', 'val', 'test', 'test_dev\} 
356 |             by default 'train'
357 |         answer_mode - possible answer modes:
358 |             'single_random' - single answer, randomly chosen
359 |             'single_confident' - single answer, randomly chosen among the confident;
360 |                 if there is no confident then randomly chosen (the same as single)
361 |             'single_frequent' - the most frequent answer
362 |             'all' - with one question all answers
363 |             'all_repeat' - all answers by repeating the same question
364 |             'all_repeat_confidentonly' - all answers that are confident (repeats the same question)
365 |         keep_top_qa_pairs - filter out question-answer pairs to the
366 |             keep_top_qa_pairs if positive; by default 0
367 | 
368 |     Out:
369 |         x - textual questions
370 |         y - textual answers
371 |         img_name - names of the images
372 |         img_ind - image indices that correspond to x
373 |         question_id - list of question indices
374 |         end_of_question - end of question token
375 |         end_of_answer - end of answer token
376 |         answer_words_delimiter - delimiter for multiple word answers
377 |     """
378 | 
379 |     dataset_type = 'mscoco'
380 |     annotation_year = '2014'
381 |     question_year = '2015' if 'test' in train_or_test else '2014'
382 |     task_type = 'OpenEnded'
383 |     train_or_test_image = 'test' if 'test' in train_or_test else train_or_test
384 |     image_name_template = 'COCO_' + train_or_test_image + question_year + '_{0:0=12}'
385 | 
386 |     return vqa_general(
387 |             path=path,
388 |             train_or_test=train_or_test,
389 |             dataset_type=dataset_type,
390 |             task_type=task_type,
391 |             annotation_year=annotation_year,
392 |             question_year=question_year,
393 |             image_name_template=image_name_template,
394 |             answer_mode=answer_mode,
395 |             keep_top_qa_pairs=keep_top_qa_pairs)
396 | 
397 | 
398 | ###
399 | # Non-dataset specific functions.
400 | ###
401 | def is_image_file(x):
402 |     return x.endswith('.png') or x.endswith('.jpg') or x.endswith('.jpeg')
403 | 
404 | 
405 | def global_visual_features(
406 |         path, perception='googlenet', layer='pool5-7x7_s1',
407 |         memory_time_steps=35, is_shuffle_memories=True, names_list=None):
408 |     """
409 |     Provides global visual features.
410 | 
411 |     In:
412 |         path - the root path
413 |         perception - the perception model; by default 'googlenet'
414 |         layer - the layer in the model; by default 'pool5-7x7_s1'
415 |         memory_time_steps - number of memories, everything outside is cut out; 
416 |             valid only if visual features are 3d tensors; by default 35
417 |         is_shuffle_memories - shuffle memories;
418 |             it's more important when they must be truncated; by default True
419 |         names_list - list of the image names, if None then all images are considered;
420 |             only valid if data are stored as mappings from names into features;
421 |             by default None
422 |     """
423 |     assert path is not None, 'Set up the path!'
424 |     if is_shuffle_memories:
425 |         print 'Shuffling memories ...'
426 |     visual_features = np.load(os.path.join(
427 |         path, perception, 'blobs.' + layer + '.npy'))
428 |     if visual_features.shape == ():
429 |         visual_features = visual_features.item()
430 | 
431 |     if names_list is None or names_list==[]:
432 |         return visual_features
433 |     else:
434 |         # either 2D or 3D tensor
435 |         tmp_feats = visual_features[visual_features.keys()[0]]
436 |         if layer.endswith('index'):
437 |             visual_features_subset = np.zeros(
438 |                     (len(names_list), memory_time_steps), dtype=int)
439 |             is_memories = True
440 |         elif tmp_feats.ndim == 1:
441 |             visual_features_subset = np.zeros(
442 |                     (len(names_list), tmp_feats.shape[0]))
443 |             is_memories = False
444 |         elif tmp_feats.ndim == 2:
445 |             # matrix has dimensions #images x #time_steps x #features
446 |             visual_features_subset = np.zeros(
447 |                     (len(names_list), memory_time_steps, tmp_feats.shape[-1]))
448 |             is_memories = True
449 |         else:
450 |             raise NotImplementedError()
451 |         skipped_image_names = set() 
452 |         for k, name_now in enumerate(names_list):
453 |             if name_now not in visual_features:
454 |                 # keep going if image doesn't exist in features
455 |                 skipped_image_names.add(name_now)
456 |                 continue
457 |             if is_memories:
458 |                 visual_features_now = visual_features[name_now]
459 |                 number_memories = visual_features_now.shape[0]
460 |                 if is_shuffle_memories:
461 |                     shuffled_memory_indices = \
462 |                             np.arange(visual_features_now.shape[0])
463 |                     np.random.shuffle(shuffled_memory_indices)
464 |                     visual_features_now = \
465 |                             visual_features_now[shuffled_memory_indices]
466 |                 if layer.endswith('index'):
467 |                     # we add one because we want to mask-out zeroes
468 |                     visual_features_subset[k,-number_memories:] = \
469 |                             np.squeeze(visual_features_now[:memory_time_steps]+1)
470 |                 else:
471 |                     visual_features_subset[k,-number_memories:,:] = \
472 |                             visual_features_now[:memory_time_steps,:]
473 |             else:
474 |                 visual_features_subset[k,:] = visual_features[name_now]
475 |         print('Skipped images {0} of them:'.format(len(skipped_image_names)))
476 |         for name_now in skipped_image_names:
477 |             print(name_now)
478 |         return visual_features_subset
479 | 
480 | 
481 | def get_global_perception(
482 |         task='daquar', train_or_test='train', extractor_fun=global_visual_features, 
483 |         path=None, perception='googlenet', layer='pool5-7x7_s1', names_list=None):
484 |     """
485 |     Provides global visual features.
486 | 
487 |     In:
488 |         task - the challenge; by default 'daquar'
489 |         train_or_test - training, validation, or test set; by default train
490 |         extractor_fun - function for extraction; 
491 |             by default global_visual_features
492 |         path - the root path, if None then default path is taken; 
493 |             by default None
494 |         perception - the perception model; by default 'googlenet'
495 |         layer - the layer in the model; by default 'pool5-7x7_s1'
496 |         names_list - list of the image names, if None then all images are considered;
497 |             only valid if data are stored as mappings from names into features;
498 |             by default None
499 |     """
500 |     if path is None:
501 |         curr_dir = os.path.dirname(os.path.realpath(__file__))
502 |         root_path = os.path.join(curr_dir, '..', '..', 'data')
503 |         if task=='daquar':
504 |             task_path = os.path.join(root_path, 'daquar', 'visual_features')
505 |         elif task == 'vqa':
506 |             if train_or_test == 'train': vqa_train_or_test='train2014'
507 |             elif train_or_test == 'val': vqa_train_or_test='val2014'
508 |             elif 'test' in train_or_test: vqa_train_or_test='test2015'
509 |             else: raise NotImplementedError()
510 |             task_path = os.path.join(root_path, 'vqa',  'visual_features', vqa_train_or_test)
511 |     else:
512 |         task_path = path
513 | 
514 |     return extractor_fun(
515 |             path=task_path, 
516 |             perception=perception, 
517 |             layer=layer, 
518 |             names_list=names_list)
519 | 
520 | 
521 | # Selector
522 | ###
523 | select = {
524 |         'daquar-triples': {
525 |             'text':daquar_qa_triples, 
526 |             'perception':lambda train_or_test, names_list, 
527 |                     parts_extractor, max_parts, perception, 
528 |                     layer, second_layer:
529 |                 get_global_perception(
530 |                     task='daquar', 
531 |                     train_or_test=train_or_test, 
532 |                     names_list=names_list,
533 |                     extractor_fun=global_visual_features,
534 |                     perception=perception,
535 |                     layer=layer), 
536 |            'save_predictions': daquar_save_results
537 |             },
538 |         'vqa-real_images-open_ended': {
539 |             'text':vqa_real_images_open_ended,
540 |             'perception':lambda train_or_test, names_list, 
541 |                     parts_extractor, max_parts, perception, 
542 |                     layer, second_layer: 
543 |                 get_global_perception(
544 |                     task='vqa', 
545 |                     train_or_test=train_or_test, 
546 |                     names_list=names_list,
547 |                     extractor_fun=global_visual_features,
548 |                     perception=perception,
549 |                     layer=layer),
550 |             'visual_parameters':lambda train_or_test, perception, params: 
551 |                 get_global_perception(
552 |                     task='vqa', train_or_test=train_or_test, 
553 |                     extractor_fun=global_visual_parameters,
554 |                     perception=perception,
555 |                     params=params),
556 |             'save_predictions': vqa_save_results
557 |             },
558 |         }
559 | 
560 | 


--------------------------------------------------------------------------------
/kraino/utils/parsers.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Parsing the input arguments.
  3 | 
  4 | Author: Mateusz Malinowski
  5 | Email: mmalinow@mpi-inf.mpg.de
  6 | """
  7 | 
  8 | import argparse
  9 | 
 10 | 
 11 | ###
 12 | # Default arguments
 13 | ###
 14 | DEFAULT_MAX_INPUT_TIME_STEPS = 32
 15 | DEFAULT_MAX_OUTPUT_TIME_STEPS = 5 
 16 | DEFAULT_MAX_MEMORY_TIME_STEPS = 35
 17 | DEFAULT_TRUNCATE_OUTPUT_SPACE = 0
 18 | DEFAULT_TRUNCATE_INPUT_SPACE = 0
 19 | DEFAULT_NUM_TOP_QA_PAIRS=0
 20 | DEFAULT_MAX_ERA = 10
 21 | DEFAULT_MAX_EPOCH = 30
 22 | DEFAULT_BATCH_SIZE = 755
 23 | DEFAULT_HIDDEN_STATE_SIZE = 1000
 24 | DEFAULT_VISUAL_HIDDEN_STATE_SIZE = 1000
 25 | DEFAULT_TEXTUAL_EMBEDDING_SIZE = 1000
 26 | DEFAULT_VISUAL_EMBEDDING_SIZE = 1000
 27 | #DEFAULT_ADAPTATION_SIZE = 1000
 28 | DEFAULT_MLP_HIDDEN_SIZE = 1000
 29 | DEFAULT_NUM_MLP_LAYERS = 0
 30 | DEFAULT_NUM_LANGUAGE_LAYERS = 1
 31 | #DEFAULT_NUM_ADAPTATION_LAYERS = 1 
 32 | DEFAULT_TEMPERATURE = 0.001
 33 | DEFAULT_VALIDATION_SPLIT = 0.0
 34 | DEFAULT_TRAINING_SUBSET = -1
 35 | DEFAULT_VAL_SUBSET = -1
 36 | DEFAULT_TEST_SUBSET = -1
 37 | DEFAULT_REDUCE_RATE = 1.0
 38 | DEFAULT_MAX_NUMBER_REDUCTIONS = 10
 39 | DEFAULT_LR = -1
 40 | DEFAULT_LR_PATIENCE = 5
 41 | DEFAULT_FUSION_LAYER_INDEX = 0
 42 | DEFAULT_LANGUAGE_CNN_FILTERS = 1000
 43 | DEFAULT_LANGUAGE_CNN_FILTER_LENGTH = 3
 44 | DEFAULT_LANGUAGE_CNN_ACTIVATION = 'relu'
 45 | DEFAULT_LANGUAGE_CNN_VIEWS = 3
 46 | DEFAULT_LANGUAGE_MAX_POOL_LENGTH = 2
 47 | DEFAULT_VERBOSITY = ''
 48 | DEFAULT_WEIGHTS_LOADER_ERA = -1
 49 | DEFAULT_MERGE_MODE = 'ave'
 50 | DEFAULT_MULTIMODAL_MERGE_MODE = 'concat'
 51 | DEFAULT_WORD_REPRESENTATION = 'one_hot'
 52 | DEFAULT_OPTIMIZER = 'adam'
 53 | DEFAULT_TEXT_ENCODER = 'lstm'
 54 | DEFAULT_TEXT_DECODER = 'lstm'
 55 | DEFAULT_VISUAL_ENCODER = 'lstm'
 56 | DEFAULT_SEQUENCE_REDUCER = 'lstm'
 57 | DEFAULT_MEMORY_MATCH_ACTIVATION = 'softmax'
 58 | DEFAULT_MLP_ACTIVATION = 'relu'
 59 | DEFAULT_PERCEPTION='googlenet'
 60 | DEFAULT_PERCEPTION_LAYER='pool5-7x7_s1'
 61 | DEFAULT_PERCEPTION_SECOND_LAYER=''
 62 | DEFAULT_TRAINABLE_PERCEPTION_NAME='none'
 63 | DEFAULT_PARAMS='loss3_classifier'
 64 | DEFAULT_WORD_GENERATOR = 'max_likelihood'
 65 | DEFAULT_DATASET = 'daquar-triples'
 66 | DEFAULT_PARTS_EXTRACTOR = 'whole'
 67 | DEFAULT_MODEL = 'sequential-blind-single_answer'
 68 | DEFAULT_LOSS = 'categorical_crossentropy'
 69 | DEFAULT_METRIC = 'wups'
 70 | DEFAULT_VQA_ANSWER_MODE = 'single_random'
 71 | DEFAULT_PREDICTION_DATASET_FOLD = 'test'
 72 | DEFAULT_VISUALIZATION_URL = 'default'
 73 | DEFAULT_VISUALIZATION_FIG_LOSS_TITLE = 'Loss'
 74 | DEFAULT_VISUALIZATION_FIG_METRIC_TITLE = 'WUPS scores'
 75 | DEFAULT_WEIGHTS_LOADER_NAME = ''
 76 | DEFAULT_RESULTS_FILENAME = 'results'
 77 | DEFAULT_IS_REVERSE_INPUT=False
 78 | DEFAULT_IS_SAVE_WEIGHTS = False
 79 | DEFAULT_IS_LR_FIXED_REDUCTION = False
 80 | DEFAULT_IS_EARLY_STOPPING = False
 81 | DEFAULT_IS_VALIDATION_SET = False
 82 | DEFAULT_IS_ONLY_FIRST_ANSWER_WORD = True
 83 | DEFAULT_IS_WHOLE_ANSWER_AS_ANSWER_WORD = False
 84 | 
 85 | 
 86 | ###
 87 | # Functions
 88 | ###
 89 | def parse_input_arguments():
 90 |     arg_parser = argparse.ArgumentParser()
 91 |     arg_parser.add_argument('--max_input_time_steps', 
 92 |             dest='MAX_INPUT_TIME_STEPS', default=DEFAULT_MAX_INPUT_TIME_STEPS, type=int,
 93 |             help='Maximal number of time steps (word positions) in a question; ' +
 94 |             'by default {0}'.format(DEFAULT_MAX_INPUT_TIME_STEPS))
 95 |     arg_parser.add_argument('--max_output_time_steps',
 96 |             dest='MAX_OUTPUT_TIME_STEPS', default=DEFAULT_MAX_OUTPUT_TIME_STEPS, type=int,
 97 |             help='Maximal number of time steps (word positions) in an answer; ' +
 98 |             'by default {0}'.format(DEFAULT_MAX_OUTPUT_TIME_STEPS))
 99 |     arg_parser.add_argument('--max_visual_time_steps',
100 |             dest='MAX_MEMORY_TIME_STEPS', default=DEFAULT_MAX_MEMORY_TIME_STEPS, type=int,
101 |             help='Maximal number of memory time steps in the input; ' +
102 |             'by default {0}'.format(DEFAULT_MAX_MEMORY_TIME_STEPS))
103 |     arg_parser.add_argument('--truncate_output_space',
104 |             dest='TRUNCATE_OUTPUT_SPACE', default=DEFAULT_TRUNCATE_OUTPUT_SPACE, type=int,
105 |             help='Restrict the output space to the most frequent items if positive, otherwise all output items; ' +
106 |             'by default {0}'.format(DEFAULT_TRUNCATE_OUTPUT_SPACE))
107 |     arg_parser.add_argument('--truncate_input_space',
108 |             dest='TRUNCATE_INPUT_SPACE', default=DEFAULT_TRUNCATE_INPUT_SPACE, type=int,
109 |             help='Restrict the input space to the most frequent items if positive, otherwise all input items; ' +
110 |             'by default {0}'.format(DEFAULT_TRUNCATE_INPUT_SPACE))
111 |     arg_parser.add_argument('--number_most_frequent_qa_pairs',
112 |             dest='NUM_TOP_QA_PAIRS', default=DEFAULT_NUM_TOP_QA_PAIRS, type=int,
113 |             help='Restrict number of answers to K most frequent if positive, otherwise no restrictions; ' +
114 |             'by default {0}'.format(DEFAULT_NUM_TOP_QA_PAIRS))
115 |     arg_parser.add_argument('--max_era',
116 |             dest='MAX_ERA', default=DEFAULT_MAX_ERA, type=int,
117 |             help='Number of eras to loop over; one era contains many epochs; ' +
118 |             'by default {0}'.format(DEFAULT_MAX_ERA))
119 |     arg_parser.add_argument('--max_epoch',
120 |             dest='MAX_EPOCH', default=DEFAULT_MAX_EPOCH, type=int,
121 |             help='Number of epochs; by default {0}'.format(DEFAULT_MAX_EPOCH))
122 |     arg_parser.add_argument('--batch_size',
123 |             dest='BATCH_SIZE', default=DEFAULT_BATCH_SIZE, type=int,
124 |             help='Number of samples in every batch; ' +
125 |             'by default {0}'.format(DEFAULT_BATCH_SIZE))
126 |     arg_parser.add_argument('--hidden_state_size',
127 |             dest='HIDDEN_STATE_SIZE', default=DEFAULT_HIDDEN_STATE_SIZE, type=int,
128 |             help='Size of the hidden state; by default {0}'.format(DEFAULT_HIDDEN_STATE_SIZE))
129 |     arg_parser.add_argument('--visual_hidden_state_size',
130 |             dest='VISUAL_HIDDEN_STATE_SIZE', default=DEFAULT_VISUAL_HIDDEN_STATE_SIZE, type=int,
131 |             help='Size of the visual hidden state; by default {0}'.format(DEFAULT_VISUAL_HIDDEN_STATE_SIZE))
132 |     arg_parser.add_argument('--textual_embedding_size',
133 |             dest='TEXTUAL_EMBEDDING_SIZE', default=DEFAULT_TEXTUAL_EMBEDDING_SIZE, type=int,
134 |             help='Size of the embedding layer; if 0 then no embedding is applied; by default {0}'.format(DEFAULT_TEXTUAL_EMBEDDING_SIZE))
135 |     arg_parser.add_argument('--visual_embedding_size',
136 |             dest='VISUAL_EMBEDDING_SIZE', default=DEFAULT_VISUAL_EMBEDDING_SIZE, type=int,
137 |             help='Size of the visual embedding layer; by default {0}'.format(DEFAULT_VISUAL_EMBEDDING_SIZE))
138 |     #arg_parser.add_argument('--adaptation_size',
139 |             #dest='ADAPTATION_SIZE', default=DEFAULT_ADAPTATION_SIZE, type=int,
140 |             #help='Size of the adaptation layer; by default {0}'.format(DEFAULT_ADAPTATION_SIZE))
141 |     arg_parser.add_argument('--mlp_hidden_size',
142 |             dest='MLP_HIDDEN_SIZE', default=DEFAULT_MLP_HIDDEN_SIZE, type=int,
143 |             help='Size of the MLP layer; by default {0}'.format(DEFAULT_MLP_HIDDEN_SIZE))
144 |     arg_parser.add_argument('--num_mlp_layers',
145 |             dest='NUM_MLP_LAYERS', default=DEFAULT_NUM_MLP_LAYERS, type=int,
146 |             help='Number of MLP layers; by default {0}'.format(DEFAULT_NUM_MLP_LAYERS))
147 |     arg_parser.add_argument('--num_language_layers',
148 |             dest='NUM_LANGUAGE_LAYERS', default=DEFAULT_NUM_LANGUAGE_LAYERS, type=int,
149 |             help='Number of language layers; by default {0}'.format(DEFAULT_NUM_LANGUAGE_LAYERS))
150 |     #arg_parser.add_argument('--num_adaptation_layers',
151 |             #dest='NUM_ADAPTATION_LAYERS', default=DEFAULT_NUM_ADAPTATION_LAYERS, type=int,
152 |             #help='Number of adaptation layers; by default {0}'.format(DEFAULT_NUM_ADAPTATION_LAYERS))
153 |     arg_parser.add_argument('--temperature',
154 |             dest='TEMPERATURE', default=DEFAULT_TEMPERATURE, type=float,
155 |             help='Temperature for the predictions; the colder the more conservative (confident) answers; ' + 
156 |             'by default {0}'.format(DEFAULT_TEMPERATURE))
157 |     arg_parser.add_argument('--validation_split',
158 |             dest='VALIDATION_SPLIT', default=DEFAULT_VALIDATION_SPLIT, type=float,
159 |             help='Fraction of training data used for validation; by default {0}'.\
160 |                     format(DEFAULT_VALIDATION_SPLIT))
161 |     arg_parser.add_argument('--training_subset_size',
162 |             dest='TRAINING_SUBSET_SIZE', default=DEFAULT_TRAINING_SUBSET, type=int,
163 |             help='Size of the training subset, only for the monitoring if verbosity is set; ' + 
164 |             'by default {0}'.format(DEFAULT_TRAINING_SUBSET))
165 |     arg_parser.add_argument('--validation_subset_size',
166 |             dest='VAL_SUBSET_SIZE', default=DEFAULT_VAL_SUBSET, type=int,
167 |             help='Size of the validation subset, only for the monitoring if verbosity is set; ' + 
168 |             'by default {0}'.format(DEFAULT_VAL_SUBSET))
169 |     arg_parser.add_argument('--test_subset_size',
170 |             dest='TEST_SUBSET_SIZE', default=DEFAULT_TEST_SUBSET, type=int,
171 |             help='Size of the test subset, only for the monitoring if verbosity is set; ' + 
172 |             'by default {0}'.format(DEFAULT_TEST_SUBSET))
173 |     arg_parser.add_argument('--reduce_rate',
174 |             dest='REDUCE_RATE', default=DEFAULT_REDUCE_RATE, type=float,
175 |             help='Reduce learning rate; by default {0}'.format(DEFAULT_REDUCE_RATE))
176 |     arg_parser.add_argument('--max_number_reductions',
177 |             dest='MAX_NUMBER_REDUCTIONS', default=DEFAULT_MAX_NUMBER_REDUCTIONS, type=int,
178 |             help='Maximal number of reductions; by default {0}'.format(DEFAULT_MAX_NUMBER_REDUCTIONS))
179 |     arg_parser.add_argument('--lr',
180 |             dest='LR', default=DEFAULT_LR, type=float,
181 |             help='Learning rate if positive, otherwise default values for individual solvers are considered; by default {0}'.format(DEFAULT_LR))
182 |     arg_parser.add_argument('--lr_patience',
183 |             dest='LR_PATIENCE', default=DEFAULT_LR_PATIENCE, type=int,
184 |             help='Patience (allowed number of epochs in stagnations); by default {0}'.format(DEFAULT_LR_PATIENCE))
185 |     arg_parser.add_argument('--fusion_layer_index',
186 |             dest='FUSION_LAYER_INDEX', default=DEFAULT_FUSION_LAYER_INDEX, type=int,
187 |             help='Index of the language layer where the multimodal fusion happens; by default {0}'.format(DEFAULT_FUSION_LAYER_INDEX))
188 |     arg_parser.add_argument('--language_cnn_filter_size',
189 |             dest='LANGUAGE_CNN_FILTERS', default=DEFAULT_LANGUAGE_CNN_FILTERS, type=int,
190 |             help='Number of filters for CNN language (dimensionality of the CNN output); by default {0}'.format(DEFAULT_LANGUAGE_CNN_FILTERS))
191 |     arg_parser.add_argument('--language_cnn_filter_length',
192 |             dest='LANGUAGE_CNN_FILTER_LENGTH', default=DEFAULT_LANGUAGE_CNN_FILTER_LENGTH, type=int,
193 |             help='Size of receptive field of the language CNN filters; by default {0}'.format(DEFAULT_LANGUAGE_CNN_FILTER_LENGTH))
194 |     arg_parser.add_argument('--language_cnn_activation',
195 |             dest='LANGUAGE_CNN_ACTIVATION', default=DEFAULT_LANGUAGE_CNN_ACTIVATION, type=str,
196 |             help='Activation for CNN language; by default {0}'.format(DEFAULT_LANGUAGE_CNN_ACTIVATION))
197 |     arg_parser.add_argument('--language_cnn_views',
198 |             dest='LANGUAGE_CNN_VIEWS', default=DEFAULT_LANGUAGE_CNN_VIEWS, type=int,
199 |             help='Number of CNN views (e.g. two views are unigram and bigram); by default {0}'.format(DEFAULT_LANGUAGE_CNN_VIEWS))
200 |     arg_parser.add_argument('--language_max_pool_length',
201 |             dest='LANGUAGE_MAX_POOL_LENGTH', default=DEFAULT_LANGUAGE_MAX_POOL_LENGTH, type=int,
202 |             help='Size of receptive field of max pooling; by default {0}'.format(DEFAULT_LANGUAGE_MAX_POOL_LENGTH))
203 |     arg_parser.add_argument('--verbosity',
204 |             dest='VERBOSITY', default=DEFAULT_VERBOSITY, type=str,
205 |             help='Verbosity level with values separated by colon, there are: monitor_training_prediction; ' +
206 |             'by default {0}'.format(DEFAULT_VERBOSITY))
207 |     arg_parser.add_argument('--weights_loader_era',
208 |             dest='WEIGHTS_LOADER_ERA', default=DEFAULT_WEIGHTS_LOADER_ERA, type=int,
209 |             help='If non-negative then it sets the era to load the weights, otherwise no model is loaded; ' + 
210 |             'by default {0}'.format(DEFAULT_WEIGHTS_LOADER_ERA))
211 |     arg_parser.add_argument('--results_filename',
212 |             dest='RESULTS_FILENAME', default=DEFAULT_RESULTS_FILENAME, type=str,
213 |             help='Filename where the results of the predictions are stored; by default {0}'.format(DEFAULT_RESULTS_FILENAME))
214 |     arg_parser.add_argument('--temporal_fusion',
215 |             dest='MERGE_MODE', default=DEFAULT_MERGE_MODE, type=str,
216 |             help='Temporal merging mode {0}'.format(DEFAULT_MERGE_MODE)) 
217 |     arg_parser.add_argument('--multimodal_fusion',
218 |             dest='MULTIMODAL_MERGE_MODE', default=DEFAULT_MULTIMODAL_MERGE_MODE, type=str,
219 |             help='Multimodal merging mode {0}'.format(DEFAULT_MULTIMODAL_MERGE_MODE))
220 |     arg_parser.add_argument('--word_representation',
221 |             dest='WORD_REPRESENTATION', default=DEFAULT_WORD_REPRESENTATION, type=str,
222 |             help='Word representation; by default {0}'.format(DEFAULT_WORD_REPRESENTATION))
223 |     arg_parser.add_argument('--optimizer',
224 |             dest='OPTIMIZER', default=DEFAULT_OPTIMIZER, type=str,
225 |             help='Optimization algorithm for training; by default {0}'.format(DEFAULT_OPTIMIZER))
226 |     arg_parser.add_argument('--text_encoder',
227 |             dest='TEXT_ENCODER', default=DEFAULT_TEXT_ENCODER, type=str,
228 |             help='Kind of used encoder; by default {0}'.format(DEFAULT_TEXT_ENCODER))
229 |     arg_parser.add_argument('--text_decoder',
230 |             dest='TEXT_DECODER', default=DEFAULT_TEXT_DECODER, type=str,
231 |             help='Kind of used decoder, valid only in encoder-decoder architectures; ' + 
232 |             'by default {0}'.format(DEFAULT_TEXT_DECODER))
233 |     arg_parser.add_argument('--visual_encoder',
234 |             dest='VISUAL_ENCODER', default=DEFAULT_VISUAL_ENCODER, type=str,
235 |             help='Kind of used visual encoder, valid only in memory-based encoder-decoder architectures; ' + 
236 |             'by default {0}'.format(DEFAULT_VISUAL_ENCODER))
237 |     arg_parser.add_argument('--sequence_reducer',
238 |             dest='SEQUENCE_REDUCER', default=DEFAULT_SEQUENCE_REDUCER, type=str,
239 |             help='Kind of used sequence reducer, valid only in memory-based encoder-decoder architectures; ' + 
240 |             'by default {0}'.format(DEFAULT_SEQUENCE_REDUCER))
241 |     arg_parser.add_argument('--memory_match_activation',
242 |             dest='MEMORY_MATCH_ACTIVATION', default=DEFAULT_MEMORY_MATCH_ACTIVATION, type=str,
243 |             help='Kind of used memory match activation, valid only in memory-based architectures; ' + 
244 |             'by default {0}'.format(DEFAULT_MEMORY_MATCH_ACTIVATION))
245 |     arg_parser.add_argument('--mlp_activation',
246 |             dest='MLP_ACTIVATION', default=DEFAULT_MLP_ACTIVATION, type=str,
247 |             help='Kind of used MLP activation unit; by default {0}'.format(DEFAULT_MLP_ACTIVATION))
248 |     arg_parser.add_argument('--perception',
249 |             dest='PERCEPTION', default=DEFAULT_PERCEPTION, type=str,
250 |             help='Kind of a pre-trained perception model used; by default {0}'.format(DEFAULT_PERCEPTION))
251 |     arg_parser.add_argument('--perception_layer',
252 |             dest='PERCEPTION_LAYER', default=DEFAULT_PERCEPTION_LAYER, type=str,
253 |             help='Kind of layer in the pre-trained perception used; by default {0}'.format(DEFAULT_PERCEPTION_LAYER))
254 |     arg_parser.add_argument('--perception_second_layer',
255 |             dest='PERCEPTION_SECOND_LAYER', default=DEFAULT_PERCEPTION_SECOND_LAYER, type=str,
256 |             help='Kind of layer in the pre-trained perception used; by default {0}'.format(DEFAULT_PERCEPTION_SECOND_LAYER))
257 |     arg_parser.add_argument('--trainable_perception',
258 |             dest='TRAINABLE_PERCEPTION_NAME', default=DEFAULT_TRAINABLE_PERCEPTION_NAME, type=str,
259 |             help='Perception that is used to train or fine-tune, or none if we want to rely on a pre-trained perception; '\
260 |                     + ' by default {0}'.format(DEFAULT_TRAINABLE_PERCEPTION_NAME))
261 |     arg_parser.add_argument('--params',
262 |             dest='PARAMS', default=DEFAULT_PARAMS, type=str,
263 |             help='Kind of params in the perception used; by default {0}'.format(DEFAULT_PARAMS))
264 |     arg_parser.add_argument('--word_generator',
265 |             dest='WORD_GENERATOR', default=DEFAULT_WORD_GENERATOR, type=str,
266 |             help='Procedure to generate single words; ' + 
267 |             'by default {0}'.format(DEFAULT_WORD_GENERATOR))
268 |     arg_parser.add_argument('--dataset',
269 |             dest='DATASET', default=DEFAULT_DATASET, type=str,
270 |             help='Kind of used dataset; by default {0}'.format(DEFAULT_DATASET))
271 |     arg_parser.add_argument('--parts_extractor',
272 |             dest='PARTS_EXTRACTOR', default=DEFAULT_PARTS_EXTRACTOR, type=str,
273 |             help='Kind of parts extractor; only if image parts are concerned; by default {0}'.format(DEFAULT_PARTS_EXTRACTOR))
274 |     arg_parser.add_argument('--model',
275 |             dest='MODEL', default=DEFAULT_MODEL, type=str,
276 |             help='Kind of used model; by default {0}'.format(DEFAULT_MODEL))
277 |     arg_parser.add_argument('--loss',
278 |             dest='LOSS', default=DEFAULT_LOSS, type=str,
279 |             help='Kind of used loss; by default {0}'.format(DEFAULT_LOSS))
280 |     arg_parser.add_argument('--metric',
281 |             dest='METRIC', default=DEFAULT_METRIC, type=str,
282 |             help='Kind of used metric; by default {0}'.format(DEFAULT_METRIC))
283 |     arg_parser.add_argument('--vqa_answer_mode',
284 |             dest='VQA_ANSWER_MODE', default=DEFAULT_VQA_ANSWER_MODE, type=str,
285 |             help='VQA answer mode; by default {0}'.format(DEFAULT_VQA_ANSWER_MODE))
286 |     arg_parser.add_argument('--prediction_dataset_fold',
287 |             dest='PREDICTION_DATASET_FOLD', default=DEFAULT_PREDICTION_DATASET_FOLD, type=str,
288 |             help='Dataset chosen for predictions; by default {0}'.format(DEFAULT_PREDICTION_DATASET_FOLD))
289 |     arg_parser.add_argument('--visualization_url',
290 |             dest='VISUALIZATION_URL', default=DEFAULT_VISUALIZATION_URL, type=str,
291 |             help='Bokeh url; by default {0}'.format(DEFAULT_VISUALIZATION_URL))
292 |     arg_parser.add_argument('--visualization_fig_loss_title',
293 |             dest='VISUALIZATION_FIG_LOSS_TITLE', default=DEFAULT_VISUALIZATION_FIG_LOSS_TITLE, type=str,
294 |             help='Bokeh loss figure title; by default {0}'.format(DEFAULT_VISUALIZATION_FIG_LOSS_TITLE))
295 |     arg_parser.add_argument('--visualization_fig_metric_title',
296 |             dest='VISUALIZATION_FIG_METRIC_TITLE', default=DEFAULT_VISUALIZATION_FIG_METRIC_TITLE, type=str,
297 |             help='Bokeh metric figure title; by default {0}'.format(DEFAULT_VISUALIZATION_FIG_METRIC_TITLE))
298 |     arg_parser.add_argument('--weights_loader_name',
299 |             dest='WEIGHTS_LOADER_NAME', default=DEFAULT_WEIGHTS_LOADER_NAME, type=str,
300 |             help='The main name for the weights loader; by default {0}'.format(DEFAULT_WEIGHTS_LOADER_NAME))
301 |     # boolean arguments
302 |     arg_parser.add_argument('--reverse_input',
303 |             dest='IS_REVERSE_INPUT', action='store_true',
304 |             help='If it is set up then the input is processed in a reverse order ' +
305 |             'by default {0}'.format('--reverse_input' if DEFAULT_IS_REVERSE_INPUT else 'no_reverse_input'))
306 |     arg_parser.add_argument('--no_reverse_input',
307 |             dest='IS_REVERSE_INPUT', action='store_false',
308 |             help='If it is set up then the input is processed in a reverse order ' +
309 |             'by default {0}'.format('--reverse_input' if DEFAULT_IS_REVERSE_INPUT else 'no_reverse_input'))
310 |     arg_parser.set_defaults(IS_REVERSE_INPUT=DEFAULT_IS_REVERSE_INPUT)
311 |     arg_parser.add_argument('--store_weights',
312 |             dest='IS_SAVE_WEIGHTS', action='store_true',
313 |             help='If it is set up then the weights are saved in each era; ' +
314 |             'by default {0}'.format('store_weights' if DEFAULT_IS_SAVE_WEIGHTS else 'no_store_weights'))
315 |     arg_parser.add_argument('--no_store_weights',
316 |             dest='IS_SAVE_WEIGHTS', action='store_false',
317 |             help='If it is set up then the weights are forgotten; ' +
318 |             'by default {0}'.format('store_weights' if DEFAULT_IS_SAVE_WEIGHTS else 'no_store_weights'))
319 |     arg_parser.set_defaults(IS_SAVE_WEIGHTS=DEFAULT_IS_SAVE_WEIGHTS)
320 |     arg_parser.add_argument('--lr_fixed_reduction',
321 |             dest='IS_LR_FIXED_REDUCTION', action='store_true',
322 |             help='If it is set up early stopping is applied based on val acc; ' +
323 |             'by default {0}'.format('early_stopping' if DEFAULT_IS_LR_FIXED_REDUCTION else 'no_early_stopping'))
324 |     arg_parser.add_argument('--no_lr_fixed_reduction',
325 |             dest='IS_LR_FIXED_REDUCTION', action='store_false',
326 |             help='If it is set up early stopping is applied based on val acc; ' +
327 |             'by default {0}'.format('early_stopping' if DEFAULT_IS_LR_FIXED_REDUCTION else 'no_early_stopping'))
328 |     arg_parser.set_defaults(IS_LR_FIXED_REDUCTION=DEFAULT_IS_LR_FIXED_REDUCTION)
329 |     arg_parser.add_argument('--early_stopping',
330 |             dest='IS_EARLY_STOPPING', action='store_true',
331 |             help='If it is set up early stopping is applied based on val acc; ' +
332 |             'by default {0}'.format('early_stopping' if DEFAULT_IS_EARLY_STOPPING else 'no_early_stopping'))
333 |     arg_parser.add_argument('--no_early_stopping',
334 |             dest='IS_EARLY_STOPPING', action='store_false',
335 |             help='If it is set up early stopping is applied based on val acc; ' +
336 |             'by default {0}'.format('early_stopping' if DEFAULT_IS_EARLY_STOPPING else 'no_early_stopping'))
337 |     arg_parser.set_defaults(IS_EARLY_STOPPING=DEFAULT_IS_EARLY_STOPPING)
338 |     arg_parser.add_argument('--use_validation',
339 |             dest='IS_VALIDATION_SET', action='store_true',
340 |             help='If it is set up then the validation set is used; ' + 
341 |             'by default {0}'.format('use_validation' if DEFAULT_IS_VALIDATION_SET else 'no_validation'))
342 |     arg_parser.add_argument('--no_validation',
343 |             dest='IS_VALIDATION_SET', action='store_false',
344 |             help='If it is set up then there is no validation set; ' +
345 |             'by default {0}'.format('use_validation' if DEFAULT_IS_VALIDATION_SET else 'no_validation'))
346 |     arg_parser.set_defaults(IS_VALIDATION_SET=DEFAULT_IS_VALIDATION_SET)
347 |     arg_parser.add_argument('--use_first_answer_words',
348 |             dest='IS_ONLY_FIRST_ANSWER_WORD', action='store_true',
349 |             help='If it is set up then first answer words are considered (otherwise, all); ' + 
350 |             'by default {0}'.format('use_first_answer_words' if DEFAULT_IS_ONLY_FIRST_ANSWER_WORD else 'use_all_answer_words'))
351 |     arg_parser.add_argument('--use_all_answer_words',
352 |             dest='IS_ONLY_FIRST_ANSWER_WORD', action='store_false',
353 |             help='If it is set up then all answer words are considered (otherwise, only the first); ' +
354 |             'by default {0}'.format('use_first_answer_words' if DEFAULT_IS_ONLY_FIRST_ANSWER_WORD else 'use_all_answer_words'))
355 |     arg_parser.set_defaults(IS_ONLY_FIRST_ANSWER_WORD=DEFAULT_IS_ONLY_FIRST_ANSWER_WORD)
356 |     arg_parser.add_argument('--use_whole_answer_as_answer_word',
357 |             dest='IS_WHOLE_ANSWER_AS_ANSWER_WORD', action='store_true',
358 |             help='If it is set up then one answer words is the whole answer; ' +
359 |             'by default {0}'.format('answer word is the whole answer' if DEFAULT_IS_WHOLE_ANSWER_AS_ANSWER_WORD else 'split answer into answer words'))
360 |     arg_parser.add_argument('--split_answer_into_answer_words',
361 |             dest='IS_WHOLE_ANSWER_AS_ANSWER_WORD', action='store_false',
362 |             help='If it is set up then one answer words is the whole answer; ' +
363 |             'by default {0}'.format('answer word is the whole answer' if DEFAULT_IS_WHOLE_ANSWER_AS_ANSWER_WORD else 'split answer into answer words'))
364 |     arg_parser.set_defaults(IS_WHOLE_ANSWER_AS_ANSWER_WORD=DEFAULT_IS_WHOLE_ANSWER_AS_ANSWER_WORD)
365 |     # not-working arguments
366 |     #arg_parser.add_argument('--gpu_core',
367 |             #dest='GPU_CORE', default=-1, type=int,
368 |             #help='GPU Core, if -1 then the core is read from the config file')
369 |     args = arg_parser.parse_args()
370 | 
371 |     return args
372 | 
373 | 


--------------------------------------------------------------------------------
/neural_solver.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from __future__ import print_function
  3 | 
  4 | """
  5 | QA model.
  6 | Uses embedding.
  7 | 
  8 | Implementation in Keras.
  9 | 
 10 | Author: Mateusz Malinowski
 11 | Email: mmalinow@mpi-inf.mpg.de
 12 | """
 13 | 
 14 | import os
 15 | import numpy as np
 16 | 
 17 | from socket import gethostname
 18 | from spacy.en import English
 19 | from toolz import compose
 20 | from toolz import frequencies 
 21 | from timeit import default_timer as timer
 22 | 
 23 | from keras.optimizers import Adadelta
 24 | from keras.optimizers import Adagrad
 25 | from keras.optimizers import Adam
 26 | from keras.optimizers import RMSprop
 27 | from keras.optimizers import SGD
 28 | 
 29 | from keras.preprocessing import sequence
 30 | 
 31 | from kraino.core import recurrent_net 
 32 | from kraino.core import model_zoo
 33 | from kraino.core.model_zoo import Config
 34 | from kraino.core.visual_model_zoo import get_visual_features
 35 | from kraino.core.visual_model_zoo import imagenet_mean_preprocess_image_tensor_fun
 36 | 
 37 | from kraino.utils import data_provider
 38 | from kraino.utils.read_write import pickle_model
 39 | #from kraino.utils.read_write import model_to_json
 40 | from kraino.utils.parsers import parse_input_arguments
 41 | from kraino.utils.callbacks import StoreModelWeightsOnEraEnd
 42 | from kraino.utils.callbacks import PrintOnEraBegin 
 43 | from kraino.utils.callbacks import PrintPerformanceMetricOnEraEnd
 44 | from kraino.utils.callbacks import MonitorPredictionsOnEndEra
 45 | #from kraino.utils.callbacks import PlotPerformanceMetricOnEraEnd
 46 | #from kraino.utils.callbacks import StandardPerformancePlot
 47 | from kraino.utils.callbacks import LearningRateReducerWithEarlyStopping
 48 | from kraino.utils.callbacks import LearningRateReducerEveryPatienceEpoch
 49 | from kraino.utils.input_output_space import build_vocabulary
 50 | from kraino.utils.input_output_space import encode_questions_index
 51 | from kraino.utils.input_output_space import encode_questions_dense
 52 | from kraino.utils.input_output_space import encode_answers_one_hot
 53 | #from kraino.utils.model_visualization import model_picture
 54 | 
 55 | from theano import config as theano_config
 56 | 
 57 | 
 58 | ###
 59 | # Constants
 60 | ###
 61 | # path to the outputted verbosity
 62 | VERBOSITY_PATH_PREFIX = os.path.join('local', 'logs', 'verbosity')
 63 | 
 64 | # class normalized logs
 65 | CLASS_NORMALIZED_PATH_PREFIX = os.path.join('local', 'logs', 'class_normalized')
 66 | 
 67 | # path where the weights are saved
 68 | WEIGHTS_PATH_PREFIX = os.path.join('local', 'weights', 'weights')
 69 | 
 70 | OPTIMIZERS = { \
 71 |         'sgd':SGD,
 72 |         'adagrad':Adagrad,
 73 |         'adadelta':Adadelta,
 74 |         'rmsprop':RMSprop,
 75 |         'adam':Adam, 
 76 |         }
 77 | ###
 78 | 
 79 | ###
 80 | # Functions
 81 | ###
 82 | def main(params):
 83 |     verbosity_tmp = params['VERBOSITY']
 84 | 
 85 |     # seperate verbosity levels by ':' or ',' or ' ' (space)
 86 |     if not verbosity_tmp == '':
 87 |         if ':' in verbosity_tmp :
 88 |             delimiter = ':'
 89 |         elif ',' in verbosity_tmp:
 90 |             delimiter = ','
 91 |         else:
 92 |             delimiter = ' '
 93 |         verbosity = verbosity_tmp.split(delimiter)
 94 | 
 95 |         verbosity_path_longprefix = VERBOSITY_PATH_PREFIX + \
 96 |                 '.' + params['MODEL'] + '.' + params['DATASET'] + \
 97 |                 '.' + gethostname() + '.' + theano_config.device + \
 98 |                 '.epoch_{epoch:02d}.era_{era:02d}'
 99 |     
100 |         class_normalized_path_longprefix = CLASS_NORMALIZED_PATH_PREFIX  + \
101 |                 '.' + params['MODEL'] + '.' + params['DATASET'] + \
102 |                 '.' + gethostname() + '.' + theano_config.device + \
103 |                 '.epoch_{epoch:02d}.era_{era:02d}'
104 |     else:
105 |         verbosity = []
106 | 
107 |     model_path=os.path.join('local', 'models', 'model.{0}.{1}.pkl'.format(
108 |             params['MODEL'], params['DATASET']))
109 | 
110 |     era_weights_loader = params['WEIGHTS_LOADER_ERA']
111 |     if era_weights_loader >= 0:
112 |         is_load_weights = True
113 |     else:
114 |         is_load_weights = False
115 | 
116 |     if params['WEIGHTS_LOADER_NAME'] == '':
117 |         weights_loader_name = gethostname()
118 |     else:
119 |         weights_loader_name = params['WEIGHTS_LOADER_NAME']
120 |     weights_path = WEIGHTS_PATH_PREFIX + '.' + \
121 |             params['MODEL'] + '.' + params['DATASET'] + \
122 |             '.' + weights_loader_name + \
123 |             '.epoch_{epoch:02d}.era_{era:02d}.hdf5'
124 | 
125 |     ###
126 |     # Load the dataset
127 |     ###
128 |     dp = data_provider.select[params['DATASET']]
129 |     train_dataset = dp['text'](
130 |             train_or_test='train',
131 |             answer_mode=params['VQA_ANSWER_MODE'],
132 |             keep_top_qa_pairs=params['NUM_TOP_QA_PAIRS'])
133 |     train_x, train_y = train_dataset['x'], train_dataset['y']
134 |     print('Number of training examples {0}'.format(len(train_x)))
135 |     train_image_names = train_dataset['img_name']
136 |     train_visual_features = get_visual_features(
137 |             data_provider=dp,
138 |             trainable_perception_name=params['TRAINABLE_PERCEPTION_NAME'],
139 |             train_or_test='train',
140 |             image_names_list=train_image_names,
141 |             parts_extractor=params['PARTS_EXTRACTOR'],
142 |             max_parts=params['MAX_MEMORY_TIME_STEPS'],
143 |             perception=params['PERCEPTION'],
144 |             layer=params['PERCEPTION_LAYER'],
145 |             second_layer=params['PERCEPTION_SECOND_LAYER'],
146 |             preprocess_image_tensor_fun=imagenet_mean_preprocess_image_tensor_fun)
147 |     train_question_id = train_dataset['question_id']
148 |     if np.any([params['IS_VALIDATION_SET'], 
149 |             'monitor_val_metric' in verbosity,
150 |             'monitor_val_predictions' in verbosity,
151 |             'plot_val_metric' in verbosity]):
152 |         val_dataset = dp['text'](
153 |                 train_or_test='val',
154 |                 answer_mode='single_confident',
155 |                 keep_top_qa_pairs=0)
156 |         val_x, val_y = val_dataset['x'], val_dataset['y']
157 |         val_question_id = val_dataset['question_id']
158 |         val_image_names = val_dataset['img_name']
159 |         val_visual_features = get_visual_features(
160 |                 data_provider=dp,
161 |                 trainable_perception_name=params['TRAINABLE_PERCEPTION_NAME'],
162 |                 train_or_test='val',
163 |                 image_names_list=val_image_names,
164 |                 parts_extractor=params['PARTS_EXTRACTOR'],
165 |                 max_parts=params['MAX_MEMORY_TIME_STEPS'],
166 |                 perception=params['PERCEPTION'],
167 |                 layer=params['PERCEPTION_LAYER'],
168 |                 second_layer=params['PERCEPTION_SECOND_LAYER'],
169 |                 preprocess_image_tensor_fun=imagenet_mean_preprocess_image_tensor_fun)
170 |     if np.any(['monitor_test_metric' in verbosity, 
171 |             'monitor_test_predictions' in verbosity,
172 |             'plot_test_metric' in verbosity]):
173 |         test_dataset = dp['text'](
174 |                 train_or_test='test',
175 |                 answer_mode='single_confident',
176 |                 keep_top_qa_pairs=0)
177 |         test_x, test_y = test_dataset['x'], test_dataset['y']
178 |         test_question_id = test_dataset['question_id']
179 |         test_image_names = test_dataset['img_name']
180 |         test_visual_features = get_visual_features(
181 |                 data_provider=dp,
182 |                 trainable_perception_name=params['TRAINABLE_PERCEPTION_NAME'],
183 |                 train_or_test='test',
184 |                 image_names_list=test_image_names,
185 |                 parts_extractor=params['PARTS_EXTRACTOR'],
186 |                 max_parts=params['MAX_MEMORY_TIME_STEPS'],
187 |                 perception=params['PERCEPTION'],
188 |                 layer=params['PERCEPTION_LAYER'],
189 |                 second_layer=params['PERCEPTION_SECOND_LAYER'],
190 |                 preprocess_image_tensor_fun=imagenet_mean_preprocess_image_tensor_fun)
191 | 
192 |     ### 
193 |     # Building vocabularies
194 |     ###
195 |     split_symbol = '{'
196 |     if type(train_x[0]) is unicode:
197 |         # choose a split symbol that doesn't exist in text
198 |         split_function = lambda x: unicode.split(x, split_symbol)
199 |     elif type(train_x[0]) is str:
200 |         split_function = lambda x: str.split(x, split_symbol)
201 |     else:
202 |         raise NotImplementedError() 
203 | 
204 |     wordcount = compose(frequencies, split_function)
205 |     wordcount_x = wordcount(split_symbol.join(train_x).replace(' ',split_symbol))
206 |     if params['IS_WHOLE_ANSWER_AS_ANSWER_WORD']:
207 |         wordcount_y = wordcount(split_symbol.join(train_y))
208 |     else:
209 |         wordcount_y = wordcount(split_symbol.join(train_y).replace(
210 |             train_dataset['answer_words_delimiter'],split_symbol))
211 | 
212 |     word2index_x, index2word_x = build_vocabulary(
213 |             this_wordcount=wordcount_x, 
214 |             is_reset=True,
215 |             truncate_to_most_frequent=params['TRUNCATE_INPUT_SPACE'])
216 |     word2index_y, index2word_y = build_vocabulary(
217 |             this_wordcount=wordcount_y, 
218 |             is_reset=True,
219 |             truncate_to_most_frequent=params['TRUNCATE_OUTPUT_SPACE'])
220 | 
221 |     print('Size of the input {0}, and output vocabularies {1}'.\
222 |             format(len(word2index_x), len(word2index_y)))
223 | 
224 |     # save vocabulary
225 |     ###
226 | 
227 |     ###
228 |     # Building input/output
229 |     # Dimensions: 
230 |     #   data points
231 |     #   time steps 
232 |     #   encodings of the words
233 |     ###
234 |     if params['WORD_REPRESENTATION'] == 'one_hot':
235 |         one_hot_x = encode_questions_index(train_x, word2index_x)
236 |         X_train = sequence.pad_sequences(one_hot_x, maxlen=params['MAX_INPUT_TIME_STEPS'])
237 |     elif params['WORD_REPRESENTATION'] == 'dense':
238 |         word_encoder = English()
239 |         X_train = encode_questions_dense(
240 |                 x=train_x, 
241 |                 word_encoder=word_encoder, 
242 |                 max_time_steps=params['MAX_INPUT_TIME_STEPS'],
243 |                 is_remove_question_symbol=True)
244 |     else:
245 |         raise NotImplementedError()
246 |     if params['IS_WHOLE_ANSWER_AS_ANSWER_WORD']:
247 |         train_answer_words_delimiter = None
248 |     else:
249 |         train_answer_words_delimiter = train_dataset['answer_words_delimiter']
250 |     Y, train_y_gt = encode_answers_one_hot(train_y, word2index_y,
251 |             max_answer_time_steps=params['MAX_OUTPUT_TIME_STEPS'],
252 |             is_only_first_answer_word=params['IS_ONLY_FIRST_ANSWER_WORD'],
253 |             answer_words_delimiter=train_answer_words_delimiter)
254 | 
255 |     if '-bidirectional-' in params['MODEL']:
256 |         train_input = [X_train] * 2
257 |     elif '-cnn_3views-' in params['MODEL']:
258 |         train_input = [X_train] * 3
259 |     elif '-cnn_kviews-' in params['MODEL']:
260 |         train_input = [X_train] * params['LANGUAGE_CNN_VIEWS']
261 |     else:
262 |         train_input = [X_train]
263 | 
264 |     if '-multimodal-' in params['MODEL']:
265 |         train_input.append(train_visual_features)
266 | 
267 |     if np.any([params['IS_VALIDATION_SET'], 
268 |             'monitor_val_metric' in verbosity,
269 |             'monitor_val_predictions' in verbosity,
270 |             'plot_val_metric' in verbosity]):
271 |         if params['WORD_REPRESENTATION'] == 'one_hot':
272 |             one_hot_x_val= encode_questions_index(val_x, word2index_x)
273 |             X_val = sequence.pad_sequences(one_hot_x_val, 
274 |                     maxlen=params['MAX_INPUT_TIME_STEPS'])
275 |         elif params['WORD_REPRESENTATION'] == 'dense':
276 |             X_val = encode_questions_dense(
277 |                 x=val_x, 
278 |                 word_encoder=word_encoder, 
279 |                 max_time_steps=params['MAX_INPUT_TIME_STEPS'],
280 |                 is_remove_question_symbol=True)
281 |         else:
282 |             NotImplementedError()
283 |  
284 |         if params['IS_WHOLE_ANSWER_AS_ANSWER_WORD']:
285 |             val_answer_words_delimiter = None
286 |         else:
287 |             val_answer_words_delimiter = val_dataset['answer_words_delimiter']
288 |         Y_val, _ = encode_answers_one_hot(val_y, word2index_y, 
289 |                 max_answer_time_steps=params['MAX_OUTPUT_TIME_STEPS'],
290 |                 is_only_first_answer_word=params['IS_ONLY_FIRST_ANSWER_WORD'],
291 |                 answer_words_delimiter=val_answer_words_delimiter)
292 |         if '-bidirectional-' in params['MODEL']:
293 |             val_input = [X_val] * 2
294 |         elif '-cnn_3views-' in params['MODEL']:
295 |             val_input = [X_val] * 3
296 |         elif '-cnn_kviews-' in params['MODEL']:
297 |             val_input = [X_val] * params['LANGUAGE_CNN_VIEWS']
298 |         else:
299 |             val_input = [X_val]
300 | 
301 |         if '-multimodal-' in params['MODEL']:
302 |             val_input.append(val_visual_features)
303 |         validation_set = (val_input, Y_val)
304 | 
305 |     if np.any(['monitor_test_metric' in verbosity, 
306 |             'monitor_test_predictions' in verbosity,
307 |             'plot_test_metric' in verbosity]):
308 |         if params['WORD_REPRESENTATION'] == 'one_hot':
309 |             one_hot_x_test = encode_questions_index(test_x, word2index_x)
310 |             X_test = sequence.pad_sequences(one_hot_x_test, 
311 |                     maxlen=params['MAX_INPUT_TIME_STEPS'])
312 |         elif params['WORD_REPRESENTATION'] == 'dense':
313 |             X_test = encode_questions_dense(
314 |                     x=test_x,
315 |                     word_encoder=word_encoder,
316 |                     max_time_steps=params['MAX_INPUT_TIME_STEPS'],
317 |                     is_remove_question_symbol=True)
318 |         else:
319 |             NotImplementedError()
320 |         if '-bidirectional-' in params['MODEL'] \
321 |                 and 'sequential-blind' in params['MODEL']:
322 |             test_input = [X_test] * 2
323 |         elif '-cnn_3views-' in params['MODEL']:
324 |             test_input = [X_test] * 3
325 |         elif '-cnn_kviews-' in params['MODEL']:
326 |             test_input = [X_test] * params['LANGUAGE_CNN_VIEWS']
327 |         else:
328 |             test_input = [X_test]
329 |         if '-multimodal-' in params['MODEL']:
330 |             test_input.append(test_visual_features)
331 | 
332 |     # convert to numpy arrays
333 |     # train_y - original training ys
334 |     # train_y_gt - training ys used to learn the model
335 |     train_x = np.asarray(train_x)
336 |     train_y = np.asarray(train_y)
337 |     train_y_gt = np.asarray(train_y_gt)
338 |     ###
339 | 
340 |     ###
341 |     # Callbacks
342 |     ###
343 |     callbacks = []
344 |     if params['IS_SAVE_WEIGHTS']:
345 |         callback_store_model = StoreModelWeightsOnEraEnd(
346 |                 filepath=weights_path,
347 |                 epoch_interval=params['MAX_EPOCH'])
348 |         callbacks.append(callback_store_model)
349 | 
350 |     callback_print_on_era_begin = PrintOnEraBegin(
351 |             epoch_interval=params['MAX_EPOCH'],
352 |             message="Era {era:2d} out of " + str(params['MAX_ERA']))
353 |     callbacks.append(callback_print_on_era_begin)
354 | 
355 |     # verbosity callbacks
356 |     if 'monitor_val_metric' in verbosity:
357 |         if params['METRIC'] == 'vqa':
358 |             results_function = lambda x: \
359 |                     val_dataset['vqa_object'].loadRes(
360 |                             x, val_dataset['questions_path'])
361 |             extra_vars = {
362 |                     'question_id':val_question_id,
363 |                     'vqa_object':val_dataset['vqa_object'],
364 |                     'resfun':results_function}
365 |         else:
366 |             extra_vars = None
367 |         callback_metric = PrintPerformanceMetricOnEraEnd(
368 |                 X=val_input, 
369 |                 y=val_y,
370 |                 temperature=params['TEMPERATURE'],
371 |                 index2word_y=index2word_y,
372 |                 metric_name=params['METRIC'],
373 |                 epoch_interval=params['MAX_EPOCH'],
374 |                 extra_vars=extra_vars,
375 |                 verbosity_path=class_normalized_path_longprefix+'.val.acc',
376 |                 verbose=1)
377 |         callbacks.append(callback_metric)
378 | 
379 |     if 'monitor_test_metric' in verbosity:
380 |         if params['METRIC'] == 'vqa':
381 |             results_function = lambda x: \
382 |                     test_dataset['vqa_object'].loadRes(
383 |                             x, test_dataset['questions_path'])
384 |             extra_vars = {
385 |                     'question_id':test_question_id,
386 |                     'vqa_object':test_dataset['vqa_object'],
387 |                     'resfun':results_function}
388 |         else:
389 |             extra_vars = None
390 |         callback_metric = PrintPerformanceMetricOnEraEnd(
391 |                 X=test_input, 
392 |                 y=test_y,
393 |                 temperature=params['TEMPERATURE'],
394 |                 index2word_y=index2word_y,
395 |                 metric_name=params['METRIC'],
396 |                 epoch_interval=params['MAX_EPOCH'],
397 |                 extra_vars=extra_vars,
398 |                 verbosity_path=class_normalized_path_longprefix+'.test.acc',
399 |                 verbose=1)
400 |         callbacks.append(callback_metric)
401 | 
402 |     if 'monitor_train_predictions' in verbosity:
403 |         callback_monitor_train_predictions = MonitorPredictionsOnEndEra(
404 |             X=train_input, x=train_x, y=train_y,
405 |             temperature=params['TEMPERATURE'],
406 |             index2word_y=index2word_y,
407 |             verbosity_path=verbosity_path_longprefix+'.train.pred',
408 |             epoch_interval=params['MAX_EPOCH'],
409 |             subset_size=params['TRAINING_SUBSET_SIZE'],
410 |             verbose=1)
411 |         callbacks.append(callback_monitor_train_predictions)
412 | 
413 |     if 'monitor_val_predictions' in verbosity:
414 |         callback_monitor_train_predictions = MonitorPredictionsOnEndEra(
415 |             X=val_input, x=val_x, y=val_y,
416 |             temperature=params['TEMPERATURE'],
417 |             index2word_y=index2word_y,
418 |             verbosity_path=verbosity_path_longprefix+'.val.pred',
419 |             epoch_interval=params['MAX_EPOCH'],
420 |             subset_size=params['VAL_SUBSET_SIZE'],
421 |             verbose=1)
422 |         callbacks.append(callback_monitor_train_predictions)
423 | 
424 |     if 'monitor_test_predictions' in verbosity:
425 |         callback_monitor_train_predictions = MonitorPredictionsOnEndEra(
426 |             X=test_input, x=test_x, y=test_y,
427 |             temperature=params['TEMPERATURE'],
428 |             index2word_y=index2word_y,
429 |             verbosity_path=verbosity_path_longprefix+'.test.pred',
430 |             epoch_interval=params['MAX_EPOCH'],
431 |             subset_size=params['TEST_SUBSET_SIZE'],
432 |             verbose=1)
433 |         callbacks.append(callback_monitor_train_predictions)
434 | 
435 |     """
436 |     if 'plot_loss' in verbosity:
437 |         callback_plot_loss = StandardPerformancePlot(
438 |                 name='Kraino', 
439 |                 fig_title=params['VISUALIZATION_FIG_LOSS_TITLE'], 
440 |                 url=params['VISUALIZATION_URL'])
441 |         callbacks.append(callback_plot_loss)
442 | 
443 |     if 'plot_train_acc' in verbosity:
444 |         callback_plot_acc = StandardPerformancePlot(
445 |                 name='Kraino',
446 |                 fig_title='Acc',
447 |                 url=params['VISUALIZATION_URL'],
448 |                 what_to_plot=['acc'])
449 | 
450 |     if 'plot_trainval_acc' in verbosity:
451 |         callback_plot_acc = StandardPerformancePlot(
452 |                 name='Kraino', 
453 |                 fig_title='Acc', 
454 |                 url=params['VISUALIZATION_URL'],
455 |                 what_to_plot=['acc', 'val_acc'])
456 |         callbacks.append(callback_plot_acc)
457 | 
458 |     if 'plot_train_metric' in verbosity:
459 |         if params['METRIC'] == 'vqa':
460 |             results_function = lambda x: \
461 |                     train_dataset['vqa_object'].loadRes(
462 |                             x, val_dataset['questions_path'])
463 |             extra_vars = {
464 |                     'question_id':train_question_id,
465 |                     'vqa_object':train_dataset['vqa_object'],
466 |                     'resfun':results_function,
467 |                     }
468 |         else:
469 |             extra_vars=None
470 |         callback_plot_train_metric = PlotPerformanceMetricOnEraEnd(
471 |                 X=train_input, 
472 |                 y=train_y,
473 |                 temperature=params['TEMPERATURE'],
474 |                 index2word_y=index2word_y,
475 |                 metric_name=params['METRIC'],
476 |                 epoch_interval=params['MAX_EPOCH'],
477 |                 extra_vars=extra_vars,
478 |                 verbose=1,
479 |                 name='Kraino',
480 |                 fig_title='{0} - {1}'.format(
481 |                     params['VISUALIZATION_FIG_METRIC_TITLE'], 'train'),
482 |                 url=params['VISUALIZATION_URL'])
483 |         callbacks.append(callback_plot_train_metric)
484 | 
485 |     if 'plot_val_metric' in verbosity:
486 |         if params['METRIC'] == 'vqa':
487 |             results_function = lambda x: \
488 |                     val_dataset['vqa_object'].loadRes(
489 |                             x, val_dataset['questions_path'])
490 |             extra_vars = {
491 |                     'question_id':val_question_id,
492 |                     'vqa_object':val_dataset['vqa_object'],
493 |                     'resfun':results_function,
494 |                     }
495 |         else:
496 |             extra_vars=None
497 |         callback_plot_val_metric = PlotPerformanceMetricOnEraEnd(
498 |                 X=val_input, 
499 |                 y=val_y,
500 |                 temperature=params['TEMPERATURE'],
501 |                 index2word_y=index2word_y,
502 |                 metric_name=params['METRIC'],
503 |                 epoch_interval=params['MAX_EPOCH'],
504 |                 extra_vars=extra_vars,
505 |                 verbose=1,
506 |                 name='Kraino',
507 |                 fig_title='{0} - {1}'.format(
508 |                     params['VISUALIZATION_FIG_METRIC_TITLE'], 'val'),
509 |                 url=params['VISUALIZATION_URL'])
510 |         callbacks.append(callback_plot_val_metric)
511 | 
512 |     if 'plot_test_metric' in verbosity:
513 |         if params['METRIC'] == 'vqa':
514 |             results_function = lambda x: \
515 |                     test_dataset['vqa_object'].loadRes(
516 |                             x, test_dataset['questions_path'])
517 |             extra_vars = {
518 |                     'question_id':test_question_id,
519 |                     'vqa_object':test_dataset['vqa_object'],
520 |                     'resfun':results_function}
521 |         else:
522 |             extra_vars = None
523 |         callback_plot_test_metric = PlotPerformanceMetricOnEraEnd(
524 |                 X=test_input, 
525 |                 y=test_y,
526 |                 temperature=params['TEMPERATURE'],
527 |                 index2word_y=index2word_y,
528 |                 metric_name=params['METRIC'],
529 |                 epoch_interval=params['MAX_EPOCH'],
530 |                 extra_vars=extra_vars,
531 |                 verbose=1,
532 |                 name='Kraino',
533 |                 fig_title='{0} - {1}'.format(
534 |                     params['VISUALIZATION_FIG_METRIC_TITLE'], 'test'),
535 |                 url=params['VISUALIZATION_URL'])
536 |         callbacks.append(callback_plot_test_metric)
537 |     """
538 | 
539 |     # training changers
540 |     if params['IS_EARLY_STOPPING']: 
541 |         callback_lr_reducer = LearningRateReducerWithEarlyStopping(
542 |                 patience=params['LR_PATIENCE'],
543 |                 reduce_rate=params['REDUCE_RATE'],
544 |                 reduce_nb=params['MAX_NUMBER_REDUCTIONS'],
545 |                 is_early_stopping=params['IS_EARLY_STOPPING'])
546 |         callbacks.append(callback_lr_reducer)
547 | 
548 |     if params['IS_LR_FIXED_REDUCTION']:
549 |         # reduction after fixed number of epochs
550 |         callback_lr_reducer_after_k_epoch = LearningRateReducerEveryPatienceEpoch(
551 |                 patience=params['LR_PATIENCE'],
552 |                 reduce_rate=params['REDUCE_RATE'],
553 |                 reduce_nb=params['MAX_NUMBER_REDUCTIONS'])
554 |         callbacks.append(callback_lr_reducer_after_k_epoch)
555 | 
556 | 
557 | 
558 |     print('Our callbacks: ' + str(callbacks))
559 |     ###
560 |  
561 |     ###
562 |     # Building model
563 |     ###
564 |     print('Building model ...')
565 |     #nb_words= max(X_train.flatten())+1
566 |     input_dim = len(word2index_x.keys()) \
567 |             if params['WORD_REPRESENTATION'] == 'one_hot' \
568 |             else X_train[0][0].shape[0]
569 |     output_dim = len(word2index_y.keys())
570 |     visual_dim = train_visual_features.shape[1:] \
571 |             if train_visual_features is not None else 0
572 |     # creating the config object that carries arguments for models
573 |     model_config = Config( 
574 |             input_dim=input_dim, 
575 |             textual_embedding_dim=0 if params['WORD_REPRESENTATION'] == 'dense' 
576 |                     else params['TEXTUAL_EMBEDDING_SIZE'], 
577 |             visual_embedding_dim=params['VISUAL_EMBEDDING_SIZE'],
578 |             hidden_state_dim=params['HIDDEN_STATE_SIZE'],
579 |             language_cnn_filter_size=params['LANGUAGE_CNN_FILTERS'],
580 |             language_cnn_filter_length=params['LANGUAGE_CNN_FILTER_LENGTH'],
581 |             language_cnn_views=params['LANGUAGE_CNN_VIEWS'],
582 |             language_max_pool_length=params['LANGUAGE_MAX_POOL_LENGTH'],
583 |             output_dim=output_dim,
584 |             visual_dim=visual_dim,
585 |             mlp_hidden_dim=params['MLP_HIDDEN_SIZE'],
586 |             merge_mode=params['MERGE_MODE'],
587 |             multimodal_merge_mode=params['MULTIMODAL_MERGE_MODE'],
588 |             num_mlp_layers=params['NUM_MLP_LAYERS'], 
589 |             num_language_layers=params['NUM_LANGUAGE_LAYERS'], 
590 |             mlp_activation=params['MLP_ACTIVATION'], 
591 |             language_cnn_activation=params['LANGUAGE_CNN_ACTIVATION'],
592 |             fusion_layer_index=params['FUSION_LAYER_INDEX'],
593 |             is_go_backwards=params['IS_REVERSE_INPUT'],
594 |             recurrent_encoder=recurrent_net.select[params['TEXT_ENCODER']], 
595 |             recurrent_decoder=recurrent_net.select[params['TEXT_DECODER']],
596 |             trainable_perception_name=params['TRAINABLE_PERCEPTION_NAME'],
597 |             word_generator=model_zoo.word_generator[params['WORD_GENERATOR']],
598 |             max_input_time_steps=params['MAX_INPUT_TIME_STEPS'],
599 |             max_output_time_steps=params['MAX_OUTPUT_TIME_STEPS'],
600 |             output_word_delimiter=train_dataset['answer_words_delimiter'])
601 |     # building the model 
602 |     model = model_zoo.select[params['MODEL']](model_config)
603 |     model.create()
604 |     #TODO: Doesn't work with very large models
605 |     #model_picture(model=model, to_file=os.path.join('local', 
606 |         #'model-{0}-{1}.png'.format(params['MODEL'], params['DATASET'])))
607 |     if params['LR'] >= 0:
608 |         current_optimizer = OPTIMIZERS[params['OPTIMIZER']](lr=params['LR'])
609 |     else:
610 |         current_optimizer = OPTIMIZERS[params['OPTIMIZER']]()
611 |     model.compile(
612 |             loss=params['LOSS'],
613 |             optimizer=current_optimizer, 
614 |             class_mode='categorical')
615 |     # pickling the model
616 |     """
617 |     pickle_model(
618 |             path=model_path, 
619 |             model=model, 
620 |             word2index_x=word2index_x,
621 |             word2index_y=word2index_y,
622 |             index2word_x=index2word_x,
623 |             index2word_y=index2word_y)
624 |     """
625 |     #model_to_json(path=model_path, model=model)
626 |     """
627 |     if is_load_weights:
628 |         start_era = era_weights_loader 
629 |         model.load_weights(weights_path.format(
630 |             epoch=start_era*params['MAX_EPOCH'],
631 |             era=start_era))
632 |         print('Restart the computations with weights from era {0}'.format(start_era))
633 |         start_era += 1
634 |     else:
635 |         start_era = 0
636 |     """
637 |     ###
638 | 
639 |     ###
640 |     # Training a model
641 |     ###
642 |     total_start_time = timer()
643 |     total_number_of_epochs=params['MAX_EPOCH'] * params['MAX_ERA']
644 | 
645 |     if params['IS_VALIDATION_SET']:
646 |         model.fit(train_input, Y, 
647 |                 batch_size=params['BATCH_SIZE'], 
648 |                 validation_data=validation_set,
649 |                 nb_epoch=total_number_of_epochs, 
650 |                 callbacks=callbacks,
651 |                 show_accuracy=True)
652 |     elif params['VALIDATION_SPLIT'] > 0.0:
653 |         model.fit(train_input, Y,
654 |                 batch_size=params['BATCH_SIZE'],
655 |                 validation_split=params['VALIDATION_SPLIT'],
656 |                 nb_epoch=total_number_of_epochs, 
657 |                 callbacks=callbacks,
658 |                 show_accuracy=True)
659 |     else:
660 |         model.fit(train_input, Y, 
661 |                 batch_size=params['BATCH_SIZE'], 
662 |                 nb_epoch=total_number_of_epochs, 
663 |                 callbacks=callbacks,
664 |                 show_accuracy=True)
665 | 
666 |     total_end_time = timer()
667 |     time_difference = total_end_time - total_start_time
668 |     print('In total is {0:.2f}s = {1:.2f}m'\
669 |             .format(time_difference, time_difference/60.0))
670 |     return True
671 | 
672 | 
673 | if __name__ == '__main__':
674 |     # setting up the input arguments
675 |     args = parse_input_arguments()
676 |     params = vars(args)
677 | 
678 |     print(params)
679 | 
680 |     main(params)
681 | 
682 |     print('Done!')
683 | 
684 | 


--------------------------------------------------------------------------------