├── kraino ├── __init__.py ├── core │ ├── __init__.py │ ├── __init__.pyc │ ├── model_zoo.pyc │ ├── recurrent_net.pyc │ ├── keras_extensions.pyc │ ├── theano_functions.pyc │ ├── visual_model_zoo.pyc │ ├── recurrent_net.py │ ├── keras_extensions.py │ └── visual_model_zoo.py ├── utils │ ├── __init__.py │ ├── vqaEvaluation │ │ ├── __init__.py │ │ ├── vqaEval.pyc │ │ ├── __init__.pyc │ │ ├── .vqaEval.py.swp │ │ ├── vqaClassNormalizedEval.pyc │ │ ├── .vqaClassNormalizedEval.py.swp │ │ ├── vqaEvaluateModel.py │ │ ├── vqaClassNormalizedEval.py │ │ └── vqaEval.py │ ├── vqaTools │ │ ├── __init__.py │ │ ├── vqa.pyc │ │ ├── __init__.pyc │ │ └── vqa.py │ ├── __init__.pyc │ ├── callbacks.pyc │ ├── monitor.pyc │ ├── parsers.pyc │ ├── read_write.pyc │ ├── compute_wups.pyc │ ├── data_provider.pyc │ ├── print_metrics.pyc │ ├── input_output_space.pyc │ ├── model_visualization.pyc │ ├── monitor.py │ ├── model_visualization.py │ ├── print_metrics.py │ ├── read_write.py │ ├── compute_wups.py │ ├── input_output_space.py │ ├── callbacks.py │ ├── data_provider.py │ └── parsers.py └── __init__.pyc ├── data └── set_environment.bash ├── fig ├── one_hot.jpg ├── BOW_model.jpg ├── LSTM_model.jpg ├── challenges.jpg ├── small_taxonomy.jpg ├── BOW_vision_model.jpg ├── LSTM_vision_model.jpg └── features_extractor.jpg ├── boring_function.py ├── LICENSE ├── README.md └── neural_solver.py /kraino/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /kraino/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /kraino/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /kraino/utils/vqaEvaluation/__init__.py: -------------------------------------------------------------------------------- 1 | author='aagrawal' 2 | -------------------------------------------------------------------------------- /kraino/utils/vqaTools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'aagrawal' 2 | -------------------------------------------------------------------------------- /data/set_environment.bash: -------------------------------------------------------------------------------- 1 | export NLTK_DATA="/home/ubuntu/data/visual_turing_test/nltk_data" 2 | -------------------------------------------------------------------------------- /fig/one_hot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/fig/one_hot.jpg -------------------------------------------------------------------------------- /fig/BOW_model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/fig/BOW_model.jpg -------------------------------------------------------------------------------- /fig/LSTM_model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/fig/LSTM_model.jpg -------------------------------------------------------------------------------- /fig/challenges.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/fig/challenges.jpg -------------------------------------------------------------------------------- /kraino/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/__init__.pyc -------------------------------------------------------------------------------- /fig/small_taxonomy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/fig/small_taxonomy.jpg -------------------------------------------------------------------------------- /fig/BOW_vision_model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/fig/BOW_vision_model.jpg -------------------------------------------------------------------------------- /fig/LSTM_vision_model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/fig/LSTM_vision_model.jpg -------------------------------------------------------------------------------- /fig/features_extractor.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/fig/features_extractor.jpg -------------------------------------------------------------------------------- /kraino/core/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/core/__init__.pyc -------------------------------------------------------------------------------- /kraino/core/model_zoo.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/core/model_zoo.pyc -------------------------------------------------------------------------------- /kraino/utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/__init__.pyc -------------------------------------------------------------------------------- /kraino/utils/callbacks.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/callbacks.pyc -------------------------------------------------------------------------------- /kraino/utils/monitor.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/monitor.pyc -------------------------------------------------------------------------------- /kraino/utils/parsers.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/parsers.pyc -------------------------------------------------------------------------------- /kraino/utils/read_write.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/read_write.pyc -------------------------------------------------------------------------------- /kraino/core/recurrent_net.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/core/recurrent_net.pyc -------------------------------------------------------------------------------- /kraino/utils/compute_wups.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/compute_wups.pyc -------------------------------------------------------------------------------- /kraino/utils/data_provider.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/data_provider.pyc -------------------------------------------------------------------------------- /kraino/utils/print_metrics.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/print_metrics.pyc -------------------------------------------------------------------------------- /kraino/utils/vqaTools/vqa.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/vqaTools/vqa.pyc -------------------------------------------------------------------------------- /kraino/core/keras_extensions.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/core/keras_extensions.pyc -------------------------------------------------------------------------------- /kraino/core/theano_functions.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/core/theano_functions.pyc -------------------------------------------------------------------------------- /kraino/core/visual_model_zoo.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/core/visual_model_zoo.pyc -------------------------------------------------------------------------------- /kraino/utils/input_output_space.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/input_output_space.pyc -------------------------------------------------------------------------------- /kraino/utils/model_visualization.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/model_visualization.pyc -------------------------------------------------------------------------------- /kraino/utils/vqaTools/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/vqaTools/__init__.pyc -------------------------------------------------------------------------------- /kraino/utils/vqaEvaluation/vqaEval.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/vqaEvaluation/vqaEval.pyc -------------------------------------------------------------------------------- /kraino/utils/vqaEvaluation/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/vqaEvaluation/__init__.pyc -------------------------------------------------------------------------------- /kraino/utils/vqaEvaluation/.vqaEval.py.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/vqaEvaluation/.vqaEval.py.swp -------------------------------------------------------------------------------- /kraino/utils/vqaEvaluation/vqaClassNormalizedEval.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/vqaEvaluation/vqaClassNormalizedEval.pyc -------------------------------------------------------------------------------- /boring_function.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import theano 3 | import keras 4 | 5 | if __name__ == '__main__': 6 | print sys.argv[1] 7 | print theano.__version__ 8 | print keras.__version__ 9 | -------------------------------------------------------------------------------- /kraino/utils/vqaEvaluation/.vqaClassNormalizedEval.py.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateuszmalinowski/visual_turing_test-tutorial/HEAD/kraino/utils/vqaEvaluation/.vqaClassNormalizedEval.py.swp -------------------------------------------------------------------------------- /kraino/core/recurrent_net.py: -------------------------------------------------------------------------------- 1 | """ 2 | Selects recurrent neural network based on the name. 3 | 4 | Author: Mateusz Malinowski 5 | Email: mmalinow@mpi-inf.mpg.de 6 | """ 7 | 8 | from keras.layers.recurrent import GRU 9 | from keras.layers.recurrent import LSTM 10 | from keras.layers.recurrent import SimpleRNN 11 | #from keras.layers.recurrent import JZS1 12 | #from keras.layers.recurrent import JZS2 13 | #from keras.layers.recurrent import JZS3 14 | 15 | 16 | select = { 17 | 'lstm':LSTM, 18 | 'gru':GRU, 19 | 'simpleRNN':SimpleRNN, 20 | #'mut1':JZS1, 21 | #'mut2':JZS2, 22 | #'mut3':JZS3, 23 | #'jzs1':JZS1, 24 | #'jzs2':JZS2, 25 | #'jzs3':JZS3 26 | } 27 | 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) [year] [fullname] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /kraino/utils/monitor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | """ 5 | Monitoring tools. 6 | 7 | Author: Mateusz Malinowski 8 | Email: mmalinow@mpi-inf.mpg.de 9 | """ 10 | 11 | from .read_write import list2file 12 | 13 | 14 | def _dirac(pred, gt): 15 | return int(pred==gt) 16 | 17 | 18 | def print_qa(questions, answers_gt, answers_gt_original, answers_pred, 19 | era, similarity=_dirac, path=''): 20 | """ 21 | In: 22 | questions - list of questions 23 | answers_gt - list of answers (after modifications like truncation) 24 | answers_gt_original - list of answers (before modifications) 25 | answers_pred - list of predicted answers 26 | era - current era 27 | similarity - measure that measures similarity between gt_original and prediction; 28 | by default dirac measure 29 | path - path for the output (if empty then stdout is used) 30 | by fedault an empty path 31 | Out: 32 | the similarity score 33 | """ 34 | assert(len(questions)==len(answers_gt)) 35 | assert(len(questions)==len(answers_pred)) 36 | output=['-'*50, 'Era {0}'.format(era)] 37 | score = 0.0 38 | for k, q in enumerate(questions): 39 | a_gt=answers_gt[k] 40 | a_gt_original=answers_gt_original[k] 41 | a_p=answers_pred[k] 42 | score += _dirac(a_p, a_gt_original) 43 | output.append('question: {0}\nanswer: {1}\nanswer_original: {2}\nprediction: {3}\n'\ 44 | .format(q, a_gt, a_gt_original, a_p)) 45 | score = (score / len(questions))*100.0 46 | output.append('Score: {0}'.format(score)) 47 | if path == '': 48 | print('%s' % '\n'.join(map(str, output))) 49 | else: 50 | list2file(path, output) 51 | return score 52 | 53 | -------------------------------------------------------------------------------- /kraino/utils/model_visualization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Graph-like model visualization. 5 | 6 | Original work: Annet Graham [https://github.com/grahamannett] 7 | """ 8 | import pydot 9 | from keras.models import Graph 10 | from keras.models import Sequential 11 | 12 | 13 | def model_picture(model, to_file='local/model.png'): 14 | 15 | graph = pydot.Dot(graph_type='digraph') 16 | if isinstance(model,Sequential): 17 | previous_node = None 18 | written_nodes = [] 19 | n = 1 20 | for node in model.get_config()['layers']: 21 | # append number in case layers have same name to differentiate 22 | if (node['name'] + str(n)) in written_nodes: 23 | n += 1 24 | current_node = pydot.Node(node['name'] + str(n)) 25 | written_nodes.append(node['name'] + str(n)) 26 | graph.add_node(current_node) 27 | if previous_node: 28 | graph.add_edge(pydot.Edge(previous_node, current_node)) 29 | previous_node = current_node 30 | graph.write_png(to_file) 31 | 32 | elif isinstance(model,Graph): 33 | # don't need to append number for names since all nodes labeled 34 | for input_node in model.input_config: 35 | graph.add_node(pydot.Node(input_node['name'])) 36 | 37 | # intermediate and output nodes have input defined 38 | for layer_config in [model.node_config, model.output_config]: 39 | for node in layer_config: 40 | graph.add_node(pydot.Node(node['name'])) 41 | # possible to have multiple 'inputs' vs 1 'input' 42 | if node['inputs']: 43 | for e in node['inputs']: 44 | graph.add_edge(pydot.Edge(e, node['name'])) 45 | else: 46 | graph.add_edge(pydot.Edge(node['input'], node['name'])) 47 | 48 | graph.write_png(to_file) 49 | 50 | -------------------------------------------------------------------------------- /kraino/core/keras_extensions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Additional theano/keras functions. 3 | 4 | Author: Mateusz Malinowski 5 | Email: mmalinow@mpi-inf.mpg.de 6 | """ 7 | 8 | #import marshal 9 | import numpy 10 | #import types 11 | 12 | from keras.layers.convolutional import Convolution1D 13 | from keras.layers.convolutional import MaxPooling1D 14 | 15 | from keras.layers.core import Lambda 16 | from keras.layers.core import MaskedLayer 17 | from keras.layers.core import TimeDistributedMerge 18 | 19 | from keras import backend as K 20 | 21 | 22 | ## functions ## 23 | def time_distributed_nonzero_max_pooling(x): 24 | """ 25 | Computes maximum along the first (time) dimension. 26 | It ignores the mask m. 27 | 28 | In: 29 | x - input; a 3D tensor 30 | mask_value - value to mask out, if None then no masking; 31 | by default 0.0, 32 | """ 33 | 34 | import theano.tensor as T 35 | 36 | mask_value=0.0 37 | x = T.switch(T.eq(x, mask_value), -numpy.inf, x) 38 | masked_max_x = x.max(axis=1) 39 | # replace infinities with mask_value 40 | masked_max_x = T.switch(T.eq(masked_max_x, -numpy.inf), 0, masked_max_x) 41 | return masked_max_x 42 | 43 | 44 | def time_distributed_masked_ave(x, m): 45 | """ 46 | Computes average along the first (time) dimension. 47 | 48 | In: 49 | x - input; a 3D tensor 50 | m - mask 51 | """ 52 | tmp = K.sum(x, axis=1) 53 | nonzeros = K.sum(m, axis=-1) 54 | return tmp / K.expand_dims(K.cast(nonzeros, tmp.dtype)) 55 | 56 | 57 | def time_distributed_masked_max(x, m): 58 | """ 59 | Computes max along the first (time) dimension. 60 | 61 | In: 62 | x - input; a 3D tensor 63 | m - mask 64 | m_value - value for masking 65 | """ 66 | # place infinities where mask is off 67 | m_value = 0.0 68 | tmp = K.switch(K.equal(m, 0.0), -numpy.inf, 0.0) 69 | x_with_inf = x + K.expand_dims(tmp) 70 | x_max = K.max(x_with_inf, axis=1) 71 | r = K.switch(K.equal(x_max, -numpy.inf), m_value, x_max) 72 | return r 73 | 74 | 75 | ## classes ## 76 | 77 | # Transforms existing layers to masked layers 78 | class MaskedTimeDistributedMerge(MaskedLayer, TimeDistributedMerge): 79 | pass 80 | 81 | 82 | class MaskedConvolution1D(MaskedLayer, Convolution1D): 83 | pass 84 | 85 | 86 | class MaskedMaxPooling1D(MaskedLayer, MaxPooling1D): 87 | pass 88 | 89 | 90 | # auxiliary mask-aware layers 91 | class DropMask(MaskedLayer): 92 | """ 93 | Removes a mask from the layer. 94 | """ 95 | def get_output_mask(self, train=False): 96 | return None 97 | 98 | 99 | class LambdaWithMask(MaskedLayer, Lambda): 100 | """ 101 | Lambda function that takes a two argument function, and returns 102 | a value returned by the function applied to the output of the previous layer 103 | and the mask. 104 | 105 | That is: LambdaWithMask(f) = f(previous, mask) 106 | """ 107 | def get_output(self, train=False): 108 | #func = marshal.loads(self.function) 109 | #func = types.FunctionType(func, globals()) 110 | func = self.function 111 | if hasattr(self, 'previous'): 112 | return func(self.previous.get_output(train), 113 | self.previous.get_output_mask(train)) 114 | else: 115 | return func(self.input, self.get_output_mask(train)) 116 | 117 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tutorial 2 | To run this tutorial you need to run jupyter. 3 | 4 | If you run jupyter remotely, you can use jupyter notebook --ip=0.0.0.0 5 | 6 | Main file: visual_turing_test.ipynb 7 | The reader is however encouraged to download the notebook together 8 | with the associated files and go through the tutorial on his own. 9 | 10 | The tutorial should be run on a Linux machine. 11 | Please also make sure that all Installation requirements are fullfiled 12 | and you have similar versions of Theano and Keras (see 'Tested on'). 13 | 14 | Please contact mmalinow@mpi-inf.mpg.de if you encounter any problems. 15 | 16 | # Kraino - Keras-based RNN for Visual Turing Test 17 | Keras implementation of the 'Ask Your Neurons'. 18 | * Free software: MIT license 19 | * If you use this library, please cite our "Ask Your Neurons" paper [1] 20 | * Note that we use a simplified version of Kraino for the purpose of the 21 | Tutorial 22 | 23 | ## Installation 24 | Requirements: 25 | * Theano 26 | * Keras (fchollet) 27 | * toolz 28 | * h5py 29 | * Bokeh (0.10.0) 30 | * nltk (required by WUPS metrics) 31 | * pydot 32 | * spacy 33 | 34 | Additional: 35 | * VQA (VT-vision-lab/VQA) for Visual Question Answering 36 | * vqaEvaluation for the evaluation metrics 37 | * vqaTools for the dataset providers 38 | * both should be placed in the kraino/utils folder 39 | 40 | 41 | ## Folders structure 42 | data/ 43 | 44 | daquar/ 45 | 46 | vqa/ 47 | 48 | ... 49 | 50 | kraino/ 51 | 52 | local/ 53 | 54 | logs/ 55 | 56 | weights/ 57 | 58 | model-*.pkl 59 | 60 | kraino/ 61 | 62 | __init__.py 63 | 64 | core/ 65 | 66 | utils/ 67 | 68 | 69 | data 70 | * store all datasets 71 | 72 | kraino 73 | * source code and local ouput 74 | * local 75 | * stores logs (e.g. predictions) in the 'logs' folder 76 | * stores weights of different models in the 'weights' folder 77 | * stores model topologies as '.pkl' files 78 | * kraino 79 | * stores the models in the 'core' folder 80 | * stores functions (dataset providers or callbacks) in the 'utils' folder 81 | 82 | ## Eras 83 | It counts a computational cycle in eras (not epochs). 84 | Every era ends when "MAX EPOCH" is reached, then the training proceeds to 85 | the next era. Before and after each era the (callback) actions are executed. 86 | 87 | ## Warning 88 | The framework is under the continous development, and hence it is not warranted 89 | that API won't change in the future. To avoid adaptations to new API, you can 90 | clone from a specific commit hash. 91 | 92 | ## Tested on 93 | * Python 2.7.3 94 | * Theano:0.8.0.dev0.dev-63990436c98f107cf120f3578021a5d259ecf352 95 | * Keras:b587aeee1c1be3633a56b945af3e7c2c303369ca 96 | 97 | ## Bibliography 98 | 99 | @article{malinowski2016ask, 100 | 101 | title={Ask Your Neurons: A Deep Learning Approach to Visual Question Answering}, 102 | 103 | author={Malinowski, Mateusz and Rohrbach, Marcus and Fritz, Mario}, 104 | 105 | journal={arXiv preprint arXiv:1605.02697}, 106 | 107 | year={2016} 108 | 109 | } 110 | 111 | @inproceedings{malinowski2015ask, 112 | 113 | title={Ask your neurons: A neural-based approach to answering questions about images}, 114 | 115 | author={Malinowski, Mateusz and Rohrbach, Marcus and Fritz, Mario}, 116 | 117 | booktitle={Proceedings of the IEEE International Conference on Computer Vision}, 118 | 119 | pages={1--9}, 120 | 121 | year={2015} 122 | 123 | } 124 | 125 | @inproceedings{malinowski2014multi, 126 | 127 | title={A multi-world approach to question answering about real-world scenes based on uncertain input}, 128 | 129 | author={Malinowski, Mateusz and Fritz, Mario}, 130 | 131 | booktitle={Advances in Neural Information Processing Systems}, 132 | 133 | pages={1682--1690}, 134 | 135 | year={2014} 136 | 137 | } 138 | 139 | @article{malinowski2016tutorial, 140 | 141 | title={Tutorial on Answering Questions about Images with Deep Learning}, 142 | 143 | author={Malinowski, Mateusz and Fritz, Mario}, 144 | 145 | journal={arXiv preprint arXiv:1610.01076}, 146 | 147 | year={2016} 148 | 149 | } 150 | -------------------------------------------------------------------------------- /kraino/utils/vqaEvaluation/vqaEvaluateModel.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | """ 5 | Slightly modified variant of the original script. 6 | 7 | Author of the original: Aishwarya Agrawal 8 | """ 9 | 10 | import sys 11 | dataDir = '/BS/databases/vqa_1.0/VQA' 12 | sys.path.insert(0, '../vqaTools') 13 | from vqa import VQA 14 | from vqaClassNormalizedEval import VQAClassNormalizedEval as VQAEval 15 | import matplotlib.pyplot as plt 16 | import skimage.io as io 17 | import json 18 | import random 19 | import os 20 | 21 | if len(sys.argv) != 4: 22 | print 'Usage: python vqaEvaluateModel datasetFold resultType isVisualisation' 23 | print 'E.g.: python vqaEvaluateModel val image_bow False' 24 | sys.exit(1) 25 | 26 | datasetFold = sys.argv[1] 27 | resultType = sys.argv[2] 28 | if sys.argv[3] == 'True': 29 | isVisualisation = True 30 | elif sys.argv[3] == 'False': 31 | isVisualisation = False 32 | else: 33 | raise NotImplementedError() 34 | 35 | # set up file names and paths 36 | taskType ='OpenEnded' 37 | dataType ='mscoco' # 'mscoco' for real and 'abstract_v002' for abstract 38 | if datasetFold == 'train': 39 | dataSubType ='train2014' 40 | elif datasetFold == 'val': 41 | dataSubType = 'val2014' 42 | else: 43 | raise NotImplementedError() 44 | annFile ='%s/Annotations/%s_%s_annotations.json'%(dataDir, dataType, dataSubType) 45 | quesFile ='%s/Questions/%s_%s_%s_questions.json'%(dataDir, taskType, dataType, dataSubType) 46 | imgDir ='%s/Images/%s/%s/' %(dataDir, dataType, dataSubType) 47 | fileTypes = ['results', 'accuracy', 'evalQA', 'evalQuesType', 'evalAnsType'] 48 | 49 | # An example result json file has been provided in './Results' folder. 50 | 51 | [resFile, accuracyFile, evalQAFile, evalQuesTypeFile, evalAnsTypeFile] = \ 52 | ['../../../local/results/%s.%s.%s.%s.%s.json'%(taskType, dataType, dataSubType, resultType, fileType) for fileType in fileTypes] 53 | 54 | # create vqa object and vqaRes object 55 | vqa = VQA(annFile, quesFile) 56 | vqaRes = vqa.loadRes(resFile, quesFile) 57 | # create vqaEval object by taking vqa and vqaRes 58 | vqaEval = VQAEval(vqa, vqaRes, n=2) #n is precision of accuracy (number of places after decimal), default is 2 59 | 60 | # evaluate results 61 | """ 62 | If you have a list of question ids on which you would like to evaluate your results, pass it as a list to below function 63 | By default it uses all the question ids in annotation file 64 | """ 65 | vqaEval.evaluate() 66 | # print accuracies 67 | print "\n" 68 | print "Per Question Type Accuracy is the following:" 69 | for quesType in vqaEval.accuracy['perQuestionType']: 70 | print "%s : %.02f" %(quesType, vqaEval.accuracy['perQuestionType'][quesType]) 71 | print "Overall Accuracy is: %.02f\n" %(vqaEval.accuracy['overall']) 72 | print "\n" 73 | print "Overall per class accuracy is %.02f\n" %(vqaEval.accuracy['classNormalizedOverall']) 74 | print "\n" 75 | print "Per Answer Type Accuracy is the following:" 76 | for ansType in vqaEval.accuracy['perAnswerType']: 77 | print "%s : %.02f" %(ansType, vqaEval.accuracy['perAnswerType'][ansType]) 78 | print "\n" 79 | # demo how to use evalQA to retrieve low score result 80 | if isVisualisation == True: 81 | evals = [quesId for quesId in vqaEval.evalQA if vqaEval.evalQA[quesId]<35] #35 is per question percentage accuracy 82 | if len(evals) > 0: 83 | print 'ground truth answers' 84 | randomEval = random.choice(evals) 85 | randomAnn = vqa.loadQA(randomEval) 86 | vqa.showQA(randomAnn) 87 | 88 | print '\n' 89 | print 'generated answer (accuracy %.02f)'%(vqaEval.evalQA[randomEval]) 90 | ann = vqaRes.loadQA(randomEval)[0] 91 | print "Answer: %s\n" %(ann['answer']) 92 | 93 | imgId = randomAnn[0]['image_id'] 94 | imgFilename = 'COCO_' + dataSubType + '_'+ str(imgId).zfill(12) + '.jpg' 95 | if os.path.isfile(imgDir + imgFilename): 96 | I = io.imread(imgDir + imgFilename) 97 | plt.imshow(I) 98 | plt.axis('off') 99 | plt.show() 100 | 101 | # plot accuracy for various question types 102 | plt.bar(range(len(vqaEval.accuracy['perQuestionType'])), vqaEval.accuracy['perQuestionType'].values(), align='center') 103 | plt.xticks(range(len(vqaEval.accuracy['perQuestionType'])), vqaEval.accuracy['perQuestionType'].keys(), rotation='0',fontsize=10) 104 | plt.title('Per Question Type Accuracy', fontsize=10) 105 | plt.xlabel('Question Types', fontsize=10) 106 | plt.ylabel('Accuracy', fontsize=10) 107 | plt.show() 108 | 109 | # save evaluation results to ./Results folder 110 | json.dump(vqaEval.accuracy, open(accuracyFile, 'w')) 111 | json.dump(vqaEval.evalQA, open(evalQAFile, 'w')) 112 | json.dump(vqaEval.evalQuesType, open(evalQuesTypeFile, 'w')) 113 | json.dump(vqaEval.evalAnsType, open(evalAnsTypeFile, 'w')) 114 | 115 | -------------------------------------------------------------------------------- /kraino/utils/print_metrics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | """ 5 | Selects and prints metrics. 6 | 7 | Author: Mateusz Malinowski 8 | Email: mmalinow@mpi-inf.mpg.de 9 | """ 10 | 11 | import os 12 | 13 | from uuid import uuid4 14 | 15 | from compute_wups import get_metric_score as wups_score 16 | from compute_wups import get_class_metric_score as class_wups_score 17 | from data_provider import vqa_save_results as vqa_store 18 | from vqaEvaluation.vqaClassNormalizedEval import VQAClassNormalizedEval as VQAEval 19 | 20 | 21 | def average_over_dictionary(mydict): 22 | """ 23 | Average over dictionary values. 24 | """ 25 | ave = sum([x for x in mydict.values()])/len(mydict) 26 | return ave 27 | 28 | 29 | def show_wups(gt_list, pred_list, verbose, extra_vars): 30 | """ 31 | In: 32 | gt_list - ground truth list 33 | pred_list - list of predictions 34 | verbose - if greater than 0 the metric measures are printed out 35 | extra_vars - not used here 36 | 37 | Out: 38 | list of key, value pairs (dict) such that 39 | 'value' denotes the performance number 40 | and 'name' denotes the name of the metric 41 | """ 42 | acc = wups_score(gt_list, pred_list, -1) * 100.0 43 | wups_at_09 = wups_score(gt_list, pred_list, 0.9) * 100.0 44 | #wups_at_0 = wups_score(gt_list, pred_list, 0.0) * 100.0 45 | wups_at_0 = -1.0 46 | per_class_acc_tmp = class_wups_score(gt_list, pred_list, -1) 47 | #per_class_wups_at_09_tmp = class_wups_score(gt_list, pred_list, 0.9) 48 | per_class_wups_at_09_tmp = None 49 | per_class_acc = {k:v*100.0 for k,v in per_class_acc_tmp.items()} 50 | if per_class_wups_at_09_tmp is not None: 51 | per_class_wups_at_09 = {k:v*100.0 for k,v in per_class_wups_at_09_tmp.items()} 52 | else: 53 | per_class_wups_at_09 = None 54 | class_acc = average_over_dictionary(per_class_acc_tmp)*100.0 55 | if per_class_wups_at_09_tmp is not None: 56 | class_wups_at_09 = average_over_dictionary(per_class_wups_at_09_tmp)*100.0 57 | else: 58 | class_wups_at_09 = -1.0 59 | class_wups_at_0 = -1.0 60 | if verbose > 0: 61 | print('METRIC: Accuracy is {0}, wups at 0.9 is {1}, wups at 0.0 is {2}'.format( 62 | acc, wups_at_09, wups_at_0)) 63 | print('CLASS METRIC: Accuracy is {0}, wups at 0.9 is {1}, wups at 0.0 is {2}'.format( 64 | class_acc, class_wups_at_09, class_wups_at_0)) 65 | return [{'value':acc, 'name':'accuracy'}, 66 | {'value':wups_at_09, 'name':'wups at 0.9'}, 67 | {'value':wups_at_0, 'name':'wups at 0.0'}, 68 | {'value':per_class_acc, 'name':'per class accuracy', 69 | 'idiosyncrasy':'long:muted'}, 70 | {'value':per_class_wups_at_09, 'name':'per class wups at 0.9', 71 | 'idiosyncrasy':'long:muted'}, 72 | {'value':class_acc, 'name':'class accuracy'}, 73 | {'value':class_wups_at_09, 'name':'class wups at 0.9'}, 74 | {'value':class_wups_at_0, 'name':'class wups at 0'},] 75 | 76 | def show_vqa(gt_list, pred_list, verbose, extra_vars): 77 | #question_id, vqa_object, 78 | #dataset_root=None): 79 | """ 80 | In: 81 | gt_list - ground truth list 82 | pred_list - list of predictions 83 | verbose - if greater than 0 the metric measures are printed out 84 | extra_vars - extra variables, here are: 85 | extra_vars['vqa'] - the vqa object 86 | extra_vars['resfun'] - function from the results file to the vqa object 87 | extra_vars['question_id'] - list of the question ids 88 | 89 | Out: 90 | list of key, value pairs (dict) such that 91 | 'value' denotes the performance number 92 | and 'name' denotes the name of the metric 93 | """ 94 | # TODO: quite hacky way of creating and next reading the file 95 | if verbose > 0: 96 | print('dumping json file ...') 97 | vqa_object = extra_vars['vqa_object'] 98 | results_path = '/tmp/vqa_metric_{0}.json'.format(uuid4()) 99 | #print(results_path) 100 | vqa_store(extra_vars['question_id'], pred_list, results_path) 101 | vqa_res = extra_vars['resfun'](results_path) 102 | os.remove(results_path) 103 | if verbose > 0: 104 | print('dumping finished') 105 | ### 106 | vqaEval = VQAEval(vqa_object, vqa_res, n=2) 107 | vqaEval.evaluate() 108 | acc_overall = vqaEval.accuracy['overall'] 109 | acc_yes_no = vqaEval.accuracy['perAnswerType']['yes/no'] 110 | acc_number = vqaEval.accuracy['perAnswerType']['number'] 111 | acc_other = vqaEval.accuracy['perAnswerType']['other'] 112 | acc_per_class = vqaEval.accuracy['perAnswerClass'] 113 | acc_class_normalized = vqaEval.accuracy['classNormalizedOverall'] 114 | 115 | if verbose > 0: 116 | print('METRIC: Accuracy yes/no is {0}, other is {1}, number is {2}, overall is {3}, class normalized is {4}'.\ 117 | format(acc_yes_no, acc_other, acc_number, acc_overall, acc_class_normalized)) 118 | return [{'value':acc_overall, 'name':'overall accuracy'}, 119 | {'value':acc_yes_no, 'name':'yes/no accuracy'}, 120 | {'value':acc_number, 'name':'number accuracy'}, 121 | {'value':acc_other, 'name':'other accuracy'}, 122 | {'value':acc_class_normalized, 'name':'class accuracy'}, 123 | {'value':acc_per_class, 'name':'per answer class', 124 | 'idiosyncrasy':'long:muted'},] 125 | 126 | 127 | select = { 128 | 'wups' : show_wups, 129 | 'vqa' : show_vqa 130 | } 131 | 132 | -------------------------------------------------------------------------------- /kraino/utils/vqaEvaluation/vqaClassNormalizedEval.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | """ 4 | Extension of 'Agrawals's vqa evaluation script' with a class-specific metric. 5 | 6 | Mateusz Malinowski [mmalinow@mpi-inf.mpg.de] 7 | """ 8 | 9 | import numpy as np 10 | 11 | from toolz import frequencies 12 | 13 | from vqaEval import VQAEval 14 | 15 | 16 | class VQAClassNormalizedEval(VQAEval): 17 | """ 18 | A class normalized evaluation metric. 19 | 20 | It assignes to every answer its answer class, and next assigs the answer 21 | class to accuracies. 22 | It does the assignment based on the most frequent answers. 23 | """ 24 | def __init__(self, vqa, vqaRes, n=2): 25 | VQAEval.__init__(self, vqa, vqaRes, n) 26 | 27 | print "Initialize class normalized evaluation..." 28 | # calculates answer frequencies over the current answers (train, val, 29 | # etc.) 30 | quesIds = [x for x in self.params['question_id']] 31 | gts = {} 32 | for quesId in quesIds: 33 | gts[quesId] = self.vqa.qa[quesId] 34 | 35 | # consider frequencies for all answers 36 | all_answers = [x['answer'] for y in gts for x in gts[y]['answers']] 37 | self.answer2freq = frequencies(all_answers) 38 | print "Class normalized evaluation initialized!" 39 | 40 | def evaluate(self, quesIds=None): 41 | if quesIds == None: 42 | quesIds = [quesId for quesId in self.params['question_id']] 43 | gts = {} 44 | res = {} 45 | for quesId in quesIds: 46 | gts[quesId] = self.vqa.qa[quesId] 47 | res[quesId] = self.vqaRes.qa[quesId] 48 | 49 | # ================================================= 50 | # Compute accuracy 51 | # ================================================= 52 | accQA = [] 53 | accQuesType = {} 54 | accAnsType = {} 55 | accAnswerClass = {} 56 | print "computing accuracy" 57 | step = 0 58 | 59 | for quesId in quesIds: 60 | resAns = res[quesId]['answer'] 61 | resAns = resAns.replace('\n', ' ') 62 | resAns = resAns.replace('\t', ' ') 63 | resAns = resAns.strip() 64 | resAns = self.processPunctuation(resAns) 65 | resAns = self.processDigitArticle(resAns) 66 | gtAcc = [] 67 | gtAnswers = [ans['answer'] for ans in gts[quesId]['answers']] 68 | 69 | # ============================================ 70 | # Create the abstract classes 71 | # ============================================ 72 | # take confident answers if possible 73 | gtAnswersConfident = [ans['answer'] for ans in gts[quesId]['answers'] if ans['answer_confidence'] == 'yes'] 74 | if gtAnswersConfident == []: 75 | gtAnswersConfident = gtAnswers 76 | sortedGtAnswers_y = sorted(gtAnswersConfident) 77 | sortedGtAnswers_x = map(lambda x:self.answer2freq[x], sortedGtAnswers_y) 78 | answerClass = sortedGtAnswers_y[np.argmax(sortedGtAnswers_x)] 79 | # ============================================ 80 | if len(set(gtAnswers)) > 1: 81 | for ansDic in gts[quesId]['answers']: 82 | ansDic['answer'] = self.processPunctuation(ansDic['answer']) 83 | for gtAnsDatum in gts[quesId]['answers']: 84 | otherGTAns = [item for item in gts[quesId]['answers'] if item!=gtAnsDatum] 85 | matchingAns = [item for item in otherGTAns if item['answer']==resAns] 86 | acc = min(1, float(len(matchingAns))/3) 87 | gtAcc.append(acc) 88 | quesType = gts[quesId]['question_type'] 89 | ansType = gts[quesId]['answer_type'] 90 | avgGTAcc = float(sum(gtAcc))/len(gtAcc) 91 | accQA.append(avgGTAcc) 92 | if quesType not in accQuesType: 93 | accQuesType[quesType] = [] 94 | accQuesType[quesType].append(avgGTAcc) 95 | if ansType not in accAnsType: 96 | accAnsType[ansType] = [] 97 | if answerClass not in accAnswerClass: 98 | accAnswerClass[answerClass] = [] 99 | accAnswerClass[answerClass].append(avgGTAcc) 100 | accAnsType[ansType].append(avgGTAcc) 101 | self.setEvalQA(quesId, avgGTAcc) 102 | self.setEvalQuesType(quesId, quesType, avgGTAcc) 103 | self.setEvalAnsType(quesId, ansType, avgGTAcc) 104 | if step%100 == 0: 105 | self.updateProgress(step/float(len(quesIds))) 106 | step = step + 1 107 | self.setAccuracy(accQA, accAnswerClass, accQuesType, accAnsType) 108 | print "Done computing accuracy" 109 | 110 | def setAccuracy(self, accQA, accAnswerClass, accQuesType, accAnsType): 111 | self.accuracy['overall'] = round(100*float(sum(accQA))/len(accQA), self.n) 112 | self.accuracy['classNormalizedOverall'] = \ 113 | round(100*float(sum([sum(x)/len(x) for x in accAnswerClass.values()]))/len(accAnswerClass), self.n) 114 | self.accuracy['perAnswerClass'] = \ 115 | {answerClass: round(100*float(sum(accAnswerClass[answerClass]))/len(accAnswerClass[answerClass]), self.n) for answerClass in accAnswerClass} 116 | self.accuracy['perQuestionType'] = \ 117 | {quesType: round(100*float(sum(accQuesType[quesType]))/len(accQuesType[quesType]), self.n) for quesType in accQuesType} 118 | self.accuracy['perAnswerType'] = \ 119 | {ansType: round(100*float(sum(accAnsType[ansType]))/len(accAnsType[ansType]), self.n) for ansType in accAnsType} 120 | 121 | -------------------------------------------------------------------------------- /kraino/utils/read_write.py: -------------------------------------------------------------------------------- 1 | """ 2 | Reads from input file or writes to the output file. 3 | 4 | Author: Mateusz Malinowski 5 | Email: mmalinow@mpi-inf.mpg.de 6 | """ 7 | 8 | ### 9 | # Helpers 10 | ### 11 | def _dirac(pred, gt): 12 | return int(pred==gt) 13 | 14 | ### 15 | # Main functions 16 | ### 17 | def file2list(filepath): 18 | with open(filepath,'r') as f: 19 | lines =[k for k in 20 | [k.strip() for k in f.readlines()] 21 | if len(k) > 0] 22 | 23 | return lines 24 | 25 | 26 | def list2file(filepath,mylist,permission='w'): 27 | mylist='\n'.join(mylist) 28 | if type(mylist[0]) is unicode: 29 | mylist=mylist.encode('utf-8') 30 | with open(filepath,permission) as f: 31 | f.writelines(mylist) 32 | 33 | 34 | def dump_hdf5_simple(filepath, dataset_name, data): 35 | import h5py 36 | h5f = h5py.File(filepath, 'w') 37 | h5f.create_dataset(dataset_name, data=data) 38 | h5f.close() 39 | 40 | 41 | def load_hdf5_simple(filepath, dataset_name): 42 | import h5py 43 | h5f = h5py.File(filepath, 'r') 44 | tmp = h5f[dataset_name][:] 45 | h5f.close() 46 | return tmp 47 | 48 | 49 | def pickle_model( 50 | path, 51 | model, 52 | word2index_x, 53 | word2index_y, 54 | index2word_x, 55 | index2word_y): 56 | import sys 57 | import cPickle as pickle 58 | modifier=10 59 | tmp = sys.getrecursionlimit() 60 | sys.setrecursionlimit(tmp*modifier) 61 | with open(path, 'wb') as f: 62 | p_dict = {'model':model, 63 | 'word2index_x':word2index_x, 64 | 'word2index_y':word2index_y, 65 | 'index2word_x':index2word_x, 66 | 'index2word_y':index2word_y} 67 | pickle.dump(p_dict, f, protocol=2) 68 | sys.setrecursionlimit(tmp) 69 | 70 | 71 | def unpickle_model(path): 72 | import cPickle as pickle 73 | with open(path, 'rb') as f: 74 | model = pickle.load(f)['model'] 75 | return model 76 | 77 | 78 | def unpickle_vocabulary(path): 79 | import cPickle as pickle 80 | p_dict = {} 81 | with open(path, 'rb') as f: 82 | pickle_load = pickle.load(f) 83 | p_dict['word2index_x'] = pickle_load['word2index_x'] 84 | p_dict['word2index_y'] = pickle_load['word2index_y'] 85 | p_dict['index2word_x'] = pickle_load['index2word_x'] 86 | p_dict['index2word_y'] = pickle_load['index2word_y'] 87 | return p_dict 88 | 89 | 90 | def unpickle_data_provider(path): 91 | import cPickle as pickle 92 | with open(path, 'rb') as f: 93 | dp = pickle.load(f)['data_provider'] 94 | return dp 95 | 96 | 97 | def model_to_json(path, model): 98 | """ 99 | Saves model as a json file under the path. 100 | """ 101 | import json 102 | json_model = model.to_json() 103 | with open(path, 'w') as f: 104 | json.dump(json_model, f) 105 | 106 | 107 | def json_to_model(path): 108 | """ 109 | Loads a model from the json file. 110 | """ 111 | import json 112 | from keras.models import model_from_json 113 | with open(path, 'r') as f: 114 | json_model = json.load(f) 115 | model = model_from_json(json_model) 116 | return model 117 | 118 | 119 | def model_to_text(filepath, model_added): 120 | """ 121 | Save the model to text file. 122 | """ 123 | pass 124 | 125 | 126 | def text_to_model(filepath): 127 | """ 128 | Loads the model from the text file. 129 | """ 130 | pass 131 | 132 | 133 | def print_qa(questions, answers_gt, answers_gt_original, answers_pred, 134 | era, similarity=_dirac, path=''): 135 | """ 136 | In: 137 | questions - list of questions 138 | answers_gt - list of answers (after modifications like truncation) 139 | answers_gt_original - list of answers (before modifications) 140 | answers_pred - list of predicted answers 141 | era - current era 142 | similarity - measure that measures similarity between gt_original and prediction; 143 | by default dirac measure 144 | path - path for the output (if empty then stdout is used) 145 | by fedault an empty path 146 | Out: 147 | the similarity score 148 | """ 149 | assert(len(questions)==len(answers_gt)) 150 | assert(len(questions)==len(answers_pred)) 151 | output=['-'*50, 'Era {0}'.format(era)] 152 | score = 0.0 153 | for k, q in enumerate(questions): 154 | a_gt=answers_gt[k] 155 | a_gt_original=answers_gt_original[k] 156 | a_p=answers_pred[k] 157 | score += _dirac(a_p, a_gt_original) 158 | if type(q[0]) is unicode: 159 | tmp = unicode( 160 | 'question: {0}\nanswer: {1}\nanswer_original: {2}\nprediction: {3}\n') 161 | else: 162 | tmp = 'question: {0}\nanswer: {1}\nanswer_original: {2}\nprediction: {3}\n' 163 | output.append(tmp.format(q, a_gt, a_gt_original, a_p)) 164 | score = (score / len(questions))*100.0 165 | output.append('Score: {0}'.format(score)) 166 | if path == '': 167 | print('%s' % '\n'.join(map(str, output))) 168 | else: 169 | list2file(path, output) 170 | return score 171 | 172 | 173 | def dict2file(mydict, path, title=None): 174 | """ 175 | In: 176 | mydict - dictionary to save in a file 177 | path - path where acc_dict is stored 178 | title - the first sentence in the file; 179 | useful if we write many dictionaries 180 | into the same file 181 | """ 182 | tmp = [str(x[0])+':'+str(x[1]) for x in mydict.items()] 183 | if title is not None: 184 | output_list = [title] 185 | output_list.extend(tmp) 186 | else: 187 | output_list = tmp 188 | list2file(path, output_list, 'a') 189 | 190 | -------------------------------------------------------------------------------- /kraino/utils/compute_wups.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Author: Mateusz Malinowski 5 | Email: mmalinow@mpi-inf.mpg.de 6 | 7 | The script assumes there are two files 8 | - first file with ground truth answers 9 | - second file with predicted answers 10 | both answers are line-aligned 11 | 12 | The script also assumes that answer items are comma separated. 13 | For instance, chair,table,window 14 | 15 | It is also a set measure, so not exactly the same as accuracy 16 | even if dirac measure is used since {book,book}=={book}, also {book,chair}={chair,book} 17 | 18 | Logs: 19 | 18.02.2016 - added partitioning wrt. answers 20 | 17.10.2015 - abstracted the metric computations away 21 | 05.09.2015 - white spaces surrounding words are stripped away so that {book, chair}={book,chair} 22 | """ 23 | 24 | import sys 25 | 26 | #import enchant 27 | 28 | from numpy import prod 29 | from nltk.corpus import wordnet as wn 30 | 31 | 32 | def file2list(filepath): 33 | with open(filepath,'r') as f: 34 | lines =[k for k in 35 | [k.strip() for k in f.readlines()] 36 | if len(k) > 0] 37 | 38 | return lines 39 | 40 | 41 | def list2file(filepath,mylist): 42 | mylist='\n'.join(mylist) 43 | with open(filepath,'w') as f: 44 | f.writelines(mylist) 45 | 46 | 47 | def items2list(x): 48 | """ 49 | x - string of comma-separated answer items 50 | """ 51 | return [l.strip() for l in x.split(',')] 52 | 53 | 54 | def fuzzy_set_membership_measure(x,A,m): 55 | """ 56 | Set membership measure. 57 | x: element 58 | A: set of elements 59 | m: point-wise element-to-element measure m(a,b) ~ similarity(a,b) 60 | 61 | This function implments a fuzzy set membership measure: 62 | m(x \in A) = max_{a \in A} m(x,a)} 63 | """ 64 | return 0 if A==[] else max(map(lambda a: m(x,a), A)) 65 | 66 | 67 | def score_it(A,T,m): 68 | """ 69 | A: list of A items 70 | T: list of T items 71 | m: set membership measure 72 | m(a \in A) gives a membership quality of a into A 73 | 74 | This function implements a fuzzy accuracy score: 75 | score(A,T) = min{prod_{a \in A} m(a \in T), prod_{t \in T} m(a \in A)} 76 | where A and T are set representations of the answers 77 | and m is a measure 78 | """ 79 | if A==[] and T==[]: 80 | return 1 81 | 82 | # print A,T 83 | 84 | score_left=0 if A==[] else prod(map(lambda a: m(a,T), A)) 85 | score_right=0 if T==[] else prod(map(lambda t: m(t,A),T)) 86 | return min(score_left,score_right) 87 | 88 | 89 | # implementations of different measure functions 90 | def dirac_measure(a,b): 91 | """ 92 | Returns 1 iff a=b and 0 otherwise. 93 | """ 94 | if a==[] or b==[]: 95 | return 0.0 96 | return float(a==b) 97 | 98 | 99 | def wup_measure(a,b,similarity_threshold=0.925): 100 | """ 101 | Returns Wu-Palmer similarity score. 102 | More specifically, it computes: 103 | max_{x \in interp(a)} max_{y \in interp(b)} wup(x,y) 104 | where interp is a 'interpretation field' 105 | """ 106 | def get_semantic_field(a): 107 | weight = 1.0 108 | semantic_field = wn.synsets(a,pos=wn.NOUN) 109 | return (semantic_field,weight) 110 | 111 | 112 | def get_stem_word(a): 113 | """ 114 | Sometimes answer has form word\d+:wordid. 115 | If so we return word and downweight 116 | """ 117 | weight = 1.0 118 | return (a,weight) 119 | 120 | 121 | global_weight=1.0 122 | 123 | (a,global_weight_a)=get_stem_word(a) 124 | (b,global_weight_b)=get_stem_word(b) 125 | global_weight = min(global_weight_a,global_weight_b) 126 | 127 | if a==b: 128 | # they are the same 129 | return 1.0*global_weight 130 | 131 | if a==[] or b==[]: 132 | return 0 133 | 134 | 135 | interp_a,weight_a = get_semantic_field(a) 136 | interp_b,weight_b = get_semantic_field(b) 137 | 138 | if interp_a == [] or interp_b == []: 139 | return 0 140 | 141 | # we take the most optimistic interpretation 142 | global_max=0.0 143 | for x in interp_a: 144 | for y in interp_b: 145 | local_score=x.wup_similarity(y) 146 | if local_score > global_max: 147 | global_max=local_score 148 | 149 | # we need to use the semantic fields and therefore we downweight 150 | # unless the score is high which indicates both are synonyms 151 | if global_max < similarity_threshold: 152 | interp_weight = 0.1 153 | else: 154 | interp_weight = 1.0 155 | 156 | final_score=global_max*weight_a*weight_b*interp_weight*global_weight 157 | return final_score 158 | 159 | 160 | def get_metric_score(gt_list, pred_list, threshold): 161 | """ 162 | Computes metric score. 163 | 164 | In: 165 | gt_list - list of gt answers 166 | pred_list - list of predicted answers 167 | threshold 168 | 169 | Out: 170 | metric score 171 | """ 172 | if threshold == -1: 173 | our_element_membership=dirac_measure 174 | else: 175 | our_element_membership=lambda x,y: wup_measure(x,y,threshold) 176 | 177 | our_set_membership=\ 178 | lambda x,A: fuzzy_set_membership_measure(x,A,our_element_membership) 179 | 180 | score_list=[score_it(items2list(ta),items2list(pa),our_set_membership) 181 | for (ta,pa) in zip(gt_list,pred_list)] 182 | 183 | #final_score=sum(map(lambda x:float(x)/float(len(score_list)),score_list)) 184 | final_score=float(sum(score_list))/float(len(score_list)) 185 | return final_score 186 | 187 | 188 | def get_class_metric_score(gt_list, pred_list, threshold): 189 | """ 190 | Computes class-based metric score. 191 | 192 | In: 193 | gt_list - list of gt answers 194 | pred_list - list of predicted answers 195 | threshold 196 | 197 | Out: 198 | class-based metric score 199 | """ 200 | # creates abstract classes 201 | gt_abstract_classes = set(gt_list) 202 | # partition wrt. abstract classes 203 | class_scores = {} 204 | for abstract_class in gt_abstract_classes: 205 | tmp = [(x,k) for k,x in enumerate(gt_list) if x == abstract_class] 206 | gt_list_new, gt_indices = zip(*tmp) 207 | gt_list_new = list(gt_list_new) 208 | gt_indices = list(gt_indices) 209 | pred_list_new = [] 210 | for curr_index in gt_indices: 211 | pred_list_new.append(pred_list[curr_index]) 212 | score = get_metric_score(gt_list_new, pred_list_new, threshold) 213 | class_scores[abstract_class] = score 214 | return class_scores 215 | ### 216 | 217 | 218 | if __name__ == '__main__': 219 | 220 | if len(sys.argv) < 4: 221 | print 'Usage: path to true answers, path to predicted answers, threshold' 222 | print 'If threshold is -1, then the standard Accuracy is used' 223 | sys.exit("3 arguments must be given") 224 | 225 | # folders 226 | gt_filepath=sys.argv[1] 227 | pred_filepath=sys.argv[2] 228 | 229 | input_gt=file2list(gt_filepath) 230 | input_pred=file2list(pred_filepath) 231 | 232 | thresh=float(sys.argv[3]) 233 | 234 | if thresh == -1: 235 | print 'standard Accuracy is used' 236 | else: 237 | print 'soft WUPS at %1.2f is used' % thresh 238 | 239 | final_score = get_metric_score(input_gt, input_pred, thresh) 240 | 241 | # filtering to obtain the results 242 | #print 'full score:', score_list 243 | print 'exact final score:', final_score 244 | print 'final score is %2.2f%%' % (final_score * 100.0) 245 | 246 | -------------------------------------------------------------------------------- /kraino/core/visual_model_zoo.py: -------------------------------------------------------------------------------- 1 | """ 2 | Different Visual Architectures. 3 | 4 | Inspired by: 5 | baraldilorenzo vgg16 model for Keras 6 | MarcBS caffe to keras transformation 7 | 8 | Author: Mateusz Malinowski 9 | Email: mmalinow@mpi-inf.mpg.de 10 | """ 11 | 12 | from keras.models import Sequential 13 | 14 | from keras.layers.convolutional import Convolution2D 15 | from keras.layers.convolutional import MaxPooling2D 16 | from keras.layers.convolutional import ZeroPadding2D 17 | 18 | from keras.layers.core import Dense 19 | from keras.layers.core import Dropout 20 | from keras.layers.core import Flatten 21 | from keras.layers.core import Reshape 22 | 23 | 24 | ### 25 | # Functions 26 | ### 27 | def imagenet_mean_preprocess_image_tensor_fun(x): 28 | """ 29 | In: 30 | x - image tensor of size (#images, #channels, #dim1, #dim2) 31 | 32 | Out: 33 | image tensor x with subtracted imagenet mean 34 | """ 35 | y = x 36 | y[:,0,:,:] -= 103.939 37 | y[:,1,:,:] -= 116.779 38 | y[:,2,:,:] -= 123.68 39 | return y 40 | 41 | 42 | def get_visual_features( 43 | data_provider, 44 | trainable_perception_name, 45 | train_or_test, 46 | image_names_list, 47 | parts_extractor, 48 | max_parts, 49 | perception, 50 | layer, 51 | second_layer, 52 | preprocess_image_tensor_fun 53 | ): 54 | """ 55 | In: 56 | data_provider - data provider function 57 | train_or_test - training, validation, or test set 58 | image_names_list - list with names of images 59 | parts_extractor - name for the parts extractor 60 | max_parts - maximal number of parts if these are extracted 61 | perception - name of the perception model 62 | if the perception is fixed (pre-trained) 63 | layer - name for the perception's layer 64 | second_layer - name for the second parception's layer 65 | trainable_perception_name - name for the perception 66 | if the perception is not fixed 67 | preprocess_image_tensor_fun - image preprocessing function; 68 | only if trainable_perception_name is not 'none' 69 | 70 | Out: 71 | image features, or image tensor 72 | """ 73 | if trainable_perception_name == 'none': 74 | visual_features = data_provider['perception']( 75 | train_or_test=train_or_test, 76 | names_list=image_names_list, 77 | parts_extractor=parts_extractor, 78 | max_parts=max_parts, 79 | perception=perception, 80 | layer=layer, 81 | second_layer=second_layer) 82 | else: 83 | visual_features = preprocess_image_tensor_fun( 84 | data_provider['images']( 85 | train_or_test=train_or_test, 86 | names_list=image_names_list)) 87 | return visual_features 88 | 89 | 90 | ### 91 | # Abstract building visual models. 92 | ### 93 | class AbstractVisualModel(): 94 | """ 95 | Abstract class to build visual models. 96 | """ 97 | def __init__(self, visual_dim, weights_path=None): 98 | """ 99 | In: 100 | visual_dim - dimensionality of the input space; 101 | it can be a tuple, or a scalar 102 | weights_path - path to the weights to load, by default None 103 | """ 104 | self._weights_path = weights_path 105 | self._visual_dim = visual_dim 106 | 107 | def create(self): 108 | """ 109 | Creates a model. 110 | 111 | Out: 112 | model 113 | """ 114 | raise NotImplementedError() 115 | 116 | def get_dimensionality(self): 117 | """ 118 | Out: 119 | Returns an output dimensionality of this layer. 120 | """ 121 | raise NotImplementedError() 122 | 123 | 124 | ### 125 | # Concrete building visual models. 126 | ### 127 | class SequentialVisualModelEmpty(AbstractVisualModel): 128 | """ 129 | Empty visual model. No model. 130 | """ 131 | def create(self): 132 | model = Sequential() 133 | model.add(Reshape( 134 | input_shape=(self._visual_dim,), 135 | dims=(self._visual_dim,))) 136 | return model 137 | 138 | def get_dimensionality(self): 139 | return self._visual_dim 140 | 141 | 142 | class SequentialVisualModelVGG16(AbstractVisualModel): 143 | """ 144 | Sequential visual model. 145 | 146 | VGG16 147 | """ 148 | def create(self): 149 | model = Sequential() 150 | 151 | model.add(ZeroPadding2D((1,1), input_shape=self._visual_dim)) 152 | model.add(Convolution2D(64, 3, 3, activation='relu')) 153 | model.add(ZeroPadding2D((1,1))) 154 | model.add(Convolution2D(64, 3, 3, activation='relu')) 155 | model.add(MaxPooling2D((2,2), strides=(2,2))) 156 | 157 | model.add(ZeroPadding2D((1,1))) 158 | model.add(Convolution2D(128, 3, 3, activation='relu')) 159 | model.add(ZeroPadding2D((1,1))) 160 | model.add(Convolution2D(128, 3, 3, activation='relu')) 161 | model.add(MaxPooling2D((2,2), strides=(2,2))) 162 | 163 | model.add(ZeroPadding2D((1,1))) 164 | model.add(Convolution2D(256, 3, 3, activation='relu')) 165 | model.add(ZeroPadding2D((1,1))) 166 | model.add(Convolution2D(256, 3, 3, activation='relu')) 167 | model.add(ZeroPadding2D((1,1))) 168 | model.add(Convolution2D(256, 3, 3, activation='relu')) 169 | model.add(MaxPooling2D((2,2), strides=(2,2))) 170 | 171 | model.add(ZeroPadding2D((1,1))) 172 | model.add(Convolution2D(512, 3, 3, activation='relu')) 173 | model.add(ZeroPadding2D((1,1))) 174 | model.add(Convolution2D(512, 3, 3, activation='relu')) 175 | model.add(ZeroPadding2D((1,1))) 176 | model.add(Convolution2D(512, 3, 3, activation='relu')) 177 | model.add(MaxPooling2D((2,2), strides=(2,2))) 178 | 179 | model.add(ZeroPadding2D((1,1))) 180 | model.add(Convolution2D(512, 3, 3, activation='relu')) 181 | model.add(ZeroPadding2D((1,1))) 182 | model.add(Convolution2D(512, 3, 3, activation='relu')) 183 | model.add(ZeroPadding2D((1,1))) 184 | model.add(Convolution2D(512, 3, 3, activation='relu')) 185 | model.add(MaxPooling2D((2,2), strides=(2,2))) 186 | 187 | model.add(Flatten()) 188 | model.add(Dense(4096, activation='relu')) 189 | model.add(Dropout(0.5)) 190 | 191 | self._model_output_dim = 4096 192 | model.add(Dense(self._model_output_dim, activation='relu')) 193 | model.add(Dropout(0.5)) 194 | 195 | #model.add(Dense(1000, activation='softmax')) 196 | 197 | if self._weights_path: 198 | model.load_weights(self._weights_path) 199 | return model 200 | 201 | def get_dimensionality(self): 202 | return self._model_output_dim 203 | 204 | 205 | class SequentialVisualModelVeryShallowCNN(AbstractVisualModel): 206 | """ 207 | Sequential visual model. 208 | 209 | Small CNN. 210 | """ 211 | def create(self): 212 | model = Sequential() 213 | 214 | model.add(ZeroPadding2D((1,1), input_shape=self._visual_dim)) 215 | model.add(Convolution2D(64, 3, 3, activation='relu')) 216 | 217 | model.add(Flatten()) 218 | self._model_output_dim = 4096 219 | model.add(Dense(self._model_output_dim, activation='relu')) 220 | model.add(Dropout(0.5)) 221 | 222 | if self._weights_path: 223 | model.load_weights(self._weights_path) 224 | return model 225 | 226 | def get_dimensionality(self): 227 | return self._model_output_dim 228 | 229 | 230 | ### 231 | # Selector 232 | ### 233 | select_sequential_visual_model = { 234 | 'none':SequentialVisualModelEmpty, 235 | 'vgg16':SequentialVisualModelVGG16, 236 | 'very_shallow_cnn':SequentialVisualModelVeryShallowCNN 237 | } 238 | -------------------------------------------------------------------------------- /kraino/utils/vqaEvaluation/vqaEval.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | __author__='aagrawal' 4 | 5 | # This code is based on the code written by Tsung-Yi Lin for MSCOCO Python API available at the following link: 6 | # (https://github.com/tylin/coco-caption/blob/master/pycocoevalcap/eval.py). 7 | import sys 8 | import re 9 | 10 | class VQAEval: 11 | def __init__(self, vqa, vqaRes, n=2): 12 | self.n = n 13 | self.accuracy = {} 14 | self.evalQA = {} 15 | self.evalQuesType = {} 16 | self.evalAnsType = {} 17 | self.vqa = vqa 18 | self.vqaRes = vqaRes 19 | self.params = {'question_id': vqa.getQuesIds()} 20 | self.contractions = {"aint": "ain't", "arent": "aren't", "cant": "can't", "couldve": "could've", "couldnt": "couldn't", \ 21 | "couldn'tve": "couldn’t’ve", "couldnt’ve": "couldn’t’ve", "didnt": "didn’t", "doesnt": "doesn’t", "dont": "don’t", "hadnt": "hadn’t", \ 22 | "hadnt’ve": "hadn’t’ve", "hadn'tve": "hadn’t’ve", "hasnt": "hasn’t", "havent": "haven’t", "hed": "he’d", "hed’ve": "he’d’ve", \ 23 | "he’dve": "he’d’ve", "hes": "he’s", "howd": "how’d", "howll": "how’ll", "hows": "how’s", "Id’ve": "I’d’ve", "I’dve": "I’d’ve", \ 24 | "Im": "I’m", "Ive": "I’ve", "isnt": "isn’t", "itd": "it’d", "itd’ve": "it’d’ve", "it’dve": "it’d’ve", "itll": "it’ll", "let’s": "let’s", \ 25 | "maam": "ma’am", "mightnt": "mightn’t", "mightnt’ve": "mightn’t’ve", "mightn’tve": "mightn’t’ve", "mightve": "might’ve", \ 26 | "mustnt": "mustn’t", "mustve": "must’ve", "neednt": "needn’t", "notve": "not’ve", "oclock": "o’clock", "oughtnt": "oughtn’t", \ 27 | "ow’s’at": "’ow’s’at", "’ows’at": "’ow’s’at", "’ow’sat": "’ow’s’at", "shant": "shan’t", "shed’ve": "she’d’ve", "she’dve": "she’d’ve", \ 28 | "she’s": "she’s", "shouldve": "should’ve", "shouldnt": "shouldn’t", "shouldnt’ve": "shouldn’t’ve", "shouldn’tve": "shouldn’t’ve", \ 29 | "somebody’d": "somebodyd", "somebodyd’ve": "somebody’d’ve", "somebody’dve": "somebody’d’ve", "somebodyll": "somebody’ll", \ 30 | "somebodys": "somebody’s", "someoned": "someone’d", "someoned’ve": "someone’d’ve", "someone’dve": "someone’d’ve", \ 31 | "someonell": "someone’ll", "someones": "someone’s", "somethingd": "something’d", "somethingd’ve": "something’d’ve", \ 32 | "something’dve": "something’d’ve", "somethingll": "something’ll", "thats": "that’s", "thered": "there’d", "thered’ve": "there’d’ve", \ 33 | "there’dve": "there’d’ve", "therere": "there’re", "theres": "there’s", "theyd": "they’d", "theyd’ve": "they’d’ve", \ 34 | "they’dve": "they’d’ve", "theyll": "they’ll", "theyre": "they’re", "theyve": "they’ve", "twas": "’twas", "wasnt": "wasn’t", \ 35 | "wed’ve": "we’d’ve", "we’dve": "we’d’ve", "weve": "we've", "werent": "weren’t", "whatll": "what’ll", "whatre": "what’re", \ 36 | "whats": "what’s", "whatve": "what’ve", "whens": "when’s", "whered": "where’d", "wheres": "where's", "whereve": "where’ve", \ 37 | "whod": "who’d", "whod’ve": "who’d’ve", "who’dve": "who’d’ve", "wholl": "who’ll", "whos": "who’s", "whove": "who've", "whyll": "why’ll", \ 38 | "whyre": "why’re", "whys": "why’s", "wont": "won’t", "wouldve": "would’ve", "wouldnt": "wouldn’t", "wouldnt’ve": "wouldn’t’ve", \ 39 | "wouldn’tve": "wouldn’t’ve", "yall": "y’all", "yall’ll": "y’all’ll", "y’allll": "y’all’ll", "yall’d’ve": "y’all’d’ve", \ 40 | "y’alld’ve": "y’all’d’ve", "y’all’dve": "y’all’d’ve", "youd": "you’d", "youd’ve": "you’d’ve", "you’dve": "you’d’ve", \ 41 | "youll": "you’ll", "youre": "you’re", "youve": "you’ve"} 42 | self.manualMap = { 'none': '0', 43 | 'zero': '0', 44 | 'one': '1', 45 | 'two': '2', 46 | 'three': '3', 47 | 'four': '4', 48 | 'five': '5', 49 | 'six': '6', 50 | 'seven': '7', 51 | 'eight': '8', 52 | 'nine': '9', 53 | 'ten': '10' 54 | } 55 | self.articles = ['a', 56 | 'an', 57 | 'the' 58 | ] 59 | 60 | 61 | self.periodStrip = re.compile("(?!<=\d)(\.)(?!\d)") 62 | self.commaStrip = re.compile("(\d)(\,)(\d)") 63 | self.punct = [';', r"/", '[', ']', '"', '{', '}', 64 | '(', ')', '=', '+', '\\', '_', '-', 65 | '>', '<', '@', '`', ',', '?', '!'] 66 | 67 | 68 | def evaluate(self, quesIds=None): 69 | if quesIds == None: 70 | quesIds = [quesId for quesId in self.params['question_id']] 71 | gts = {} 72 | res = {} 73 | for quesId in quesIds: 74 | gts[quesId] = self.vqa.qa[quesId] 75 | res[quesId] = self.vqaRes.qa[quesId] 76 | 77 | # ================================================= 78 | # Compute accuracy 79 | # ================================================= 80 | accQA = [] 81 | accQuesType = {} 82 | accAnsType = {} 83 | print "computing accuracy" 84 | step = 0 85 | for quesId in quesIds: 86 | resAns = res[quesId]['answer'] 87 | resAns = resAns.replace('\n', ' ') 88 | resAns = resAns.replace('\t', ' ') 89 | resAns = resAns.strip() 90 | resAns = self.processPunctuation(resAns) 91 | resAns = self.processDigitArticle(resAns) 92 | gtAcc = [] 93 | gtAnswers = [ans['answer'] for ans in gts[quesId]['answers']] 94 | if len(set(gtAnswers)) > 1: 95 | for ansDic in gts[quesId]['answers']: 96 | ansDic['answer'] = self.processPunctuation(ansDic['answer']) 97 | for gtAnsDatum in gts[quesId]['answers']: 98 | otherGTAns = [item for item in gts[quesId]['answers'] if item!=gtAnsDatum] 99 | matchingAns = [item for item in otherGTAns if item['answer']==resAns] 100 | acc = min(1, float(len(matchingAns))/3) 101 | gtAcc.append(acc) 102 | quesType = gts[quesId]['question_type'] 103 | ansType = gts[quesId]['answer_type'] 104 | avgGTAcc = float(sum(gtAcc))/len(gtAcc) 105 | accQA.append(avgGTAcc) 106 | if quesType not in accQuesType: 107 | accQuesType[quesType] = [] 108 | accQuesType[quesType].append(avgGTAcc) 109 | if ansType not in accAnsType: 110 | accAnsType[ansType] = [] 111 | accAnsType[ansType].append(avgGTAcc) 112 | self.setEvalQA(quesId, avgGTAcc) 113 | self.setEvalQuesType(quesId, quesType, avgGTAcc) 114 | self.setEvalAnsType(quesId, ansType, avgGTAcc) 115 | if step%100 == 0: 116 | self.updateProgress(step/float(len(quesIds))) 117 | step = step + 1 118 | 119 | self.setAccuracy(accQA, accQuesType, accAnsType) 120 | print "Done computing accuracy" 121 | 122 | def processPunctuation(self, inText): 123 | outText = inText 124 | for p in self.punct: 125 | if (p + ' ' in inText or ' ' + p in inText) or (re.search(self.commaStrip, inText) != None): 126 | outText = outText.replace(p, '') 127 | else: 128 | outText = outText.replace(p, ' ') 129 | outText = self.periodStrip.sub("", 130 | outText, 131 | re.UNICODE) 132 | return outText 133 | 134 | def processDigitArticle(self, inText): 135 | outText = [] 136 | tempText = inText.lower().split() 137 | for word in tempText: 138 | word = self.manualMap.setdefault(word, word) 139 | if word not in self.articles: 140 | outText.append(word) 141 | else: 142 | pass 143 | for wordId, word in enumerate(outText): 144 | if word in self.contractions: 145 | outText[wordId] = self.contractions[word] 146 | outText = ' '.join(outText) 147 | return outText 148 | 149 | def setAccuracy(self, accQA, accQuesType, accAnsType): 150 | self.accuracy['overall'] = round(100*float(sum(accQA))/len(accQA), self.n) 151 | self.accuracy['perQuestionType'] = {quesType: round(100*float(sum(accQuesType[quesType]))/len(accQuesType[quesType]), self.n) for quesType in accQuesType} 152 | self.accuracy['perAnswerType'] = {ansType: round(100*float(sum(accAnsType[ansType]))/len(accAnsType[ansType]), self.n) for ansType in accAnsType} 153 | 154 | def setEvalQA(self, quesId, acc): 155 | self.evalQA[quesId] = round(100*acc, self.n) 156 | 157 | def setEvalQuesType(self, quesId, quesType, acc): 158 | if quesType not in self.evalQuesType: 159 | self.evalQuesType[quesType] = {} 160 | self.evalQuesType[quesType][quesId] = round(100*acc, self.n) 161 | 162 | def setEvalAnsType(self, quesId, ansType, acc): 163 | if ansType not in self.evalAnsType: 164 | self.evalAnsType[ansType] = {} 165 | self.evalAnsType[ansType][quesId] = round(100*acc, self.n) 166 | 167 | def updateProgress(self, progress): 168 | barLength = 20 169 | status = "" 170 | if isinstance(progress, int): 171 | progress = float(progress) 172 | if not isinstance(progress, float): 173 | progress = 0 174 | status = "error: progress var must be float\r\n" 175 | if progress < 0: 176 | progress = 0 177 | status = "Halt...\r\n" 178 | if progress >= 1: 179 | progress = 1 180 | status = "Done...\r\n" 181 | block = int(round(barLength*progress)) 182 | text = "\rFinshed Percent: [{0}] {1}% {2}".format( "#"*block + "-"*(barLength-block), int(progress*100), status) 183 | sys.stdout.write(text) 184 | sys.stdout.flush() 185 | 186 | -------------------------------------------------------------------------------- /kraino/utils/vqaTools/vqa.py: -------------------------------------------------------------------------------- 1 | __author__ = 'aagrawal' 2 | __version__ = '0.9' 3 | 4 | """ 5 | Modified by Mateusz Malinowski [mmalinow@mpi-inf.mpg.de] to work 6 | with test datasets without annotations. 7 | """ 8 | 9 | 10 | # Interface for accessing the VQA dataset. 11 | 12 | # This code is based on the code written by Tsung-Yi Lin for MSCOCO Python API available at the following link: 13 | # (https://github.com/pdollar/coco/blob/master/PythonAPI/pycocotools/coco.py). 14 | 15 | # The following functions are defined: 16 | # VQA - VQA class that loads VQA annotation file and prepares data structures. 17 | # getQuesIds - Get question ids that satisfy given filter conditions. 18 | # getImgIds - Get image ids that satisfy given filter conditions. 19 | # loadQA - Load questions and answers with the specified question ids. 20 | # showQA - Display the specified questions and answers. 21 | # loadRes - Load result file and create result object. 22 | 23 | # Help on each function can be accessed by: "help(COCO.function)" 24 | 25 | import json 26 | import datetime 27 | import copy 28 | 29 | class VQA: 30 | def __init__(self, annotation_file=None, question_file=None): 31 | """ 32 | Constructor of VQA helper class for reading and visualizing questions and answers. 33 | :param annotation_file (str): location of VQA annotation file 34 | :return: 35 | """ 36 | def init_empty_dataset(): 37 | d = {} 38 | d['info'] = 'empty' 39 | d['annotations'] = [] 40 | return d 41 | 42 | # load dataset 43 | self.dataset = {} 44 | self.questions = {} 45 | self.qa = {} 46 | self.qqa = {} 47 | self.imgToQA = {} 48 | if not question_file == None: 49 | print 'loading VQA annotations and questions into memory...' 50 | time_t = datetime.datetime.utcnow() 51 | if annotation_file is not None: 52 | dataset = json.load(open(annotation_file, 'r')) 53 | else: 54 | dataset = init_empty_dataset() 55 | questions = json.load(open(question_file, 'r')) 56 | print datetime.datetime.utcnow() - time_t 57 | self.dataset = dataset 58 | self.questions = questions 59 | self.createIndex() 60 | 61 | def createIndex(self): 62 | # create index 63 | print 'creating index...' 64 | imgToQA = {ann['image_id']: [] for ann in self.dataset['annotations']} 65 | qa = {ann['question_id']: [] for ann in self.dataset['annotations']} 66 | qqa = {ann['question_id']: [] for ann in self.dataset['annotations']} 67 | for ann in self.dataset['annotations']: 68 | imgToQA[ann['image_id']] += [ann] 69 | qa[ann['question_id']] = ann 70 | for ques in self.questions['questions']: 71 | qqa[ques['question_id']] = ques 72 | print 'index created!' 73 | 74 | # create class members 75 | self.qa = qa 76 | self.qqa = qqa 77 | self.imgToQA = imgToQA 78 | 79 | def info(self): 80 | """ 81 | Print information about the VQA annotation file. 82 | :return: 83 | """ 84 | for key, value in self.dataset['info'].items(): 85 | print '%s: %s'%(key, value) 86 | 87 | def getQuesIds(self, imgIds=[], quesTypes=[], ansTypes=[]): 88 | """ 89 | Get question ids that satisfy given filter conditions. default skips that filter 90 | :param imgIds (int array) : get question ids for given imgs 91 | quesTypes (str array) : get question ids for given question types 92 | ansTypes (str array) : get question ids for given answer types 93 | :return: ids (int array) : integer array of question ids 94 | """ 95 | imgIds = imgIds if type(imgIds) == list else [imgIds] 96 | quesTypes = quesTypes if type(quesTypes) == list else [quesTypes] 97 | ansTypes = ansTypes if type(ansTypes) == list else [ansTypes] 98 | 99 | if len(imgIds) == len(quesTypes) == len(ansTypes) == 0: 100 | anns = self.dataset['annotations'] 101 | else: 102 | if not len(imgIds) == 0: 103 | anns = sum([self.imgToQA[imgId] for imgId in imgIds if imgId in self.imgToQA],[]) 104 | else: 105 | anns = self.dataset['annotations'] 106 | anns = anns if len(quesTypes) == 0 else [ann for ann in anns if ann['question_type'] in quesTypes] 107 | anns = anns if len(ansTypes) == 0 else [ann for ann in anns if ann['answer_type'] in ansTypes] 108 | ids = [ann['question_id'] for ann in anns] 109 | return ids 110 | 111 | def getImgIds(self, quesIds=[], quesTypes=[], ansTypes=[]): 112 | """ 113 | Get image ids that satisfy given filter conditions. default skips that filter 114 | :param quesIds (int array) : get image ids for given question ids 115 | quesTypes (str array) : get image ids for given question types 116 | ansTypes (str array) : get image ids for given answer types 117 | :return: ids (int array) : integer array of image ids 118 | """ 119 | quesIds = quesIds if type(quesIds) == list else [quesIds] 120 | quesTypes = quesTypes if type(quesTypes) == list else [quesTypes] 121 | ansTypes = ansTypes if type(ansTypes) == list else [ansTypes] 122 | 123 | if len(quesIds) == len(quesTypes) == len(ansTypes) == 0: 124 | anns = self.dataset['annotations'] 125 | else: 126 | if not len(quesIds) == 0: 127 | anns = sum([self.qa[quesId] for quesId in quesIds if quesId in self.qa],[]) 128 | else: 129 | anns = self.dataset['annotations'] 130 | anns = anns if len(quesTypes) == 0 else [ann for ann in anns if ann['question_type'] in quesTypes] 131 | anns = anns if len(ansTypes) == 0 else [ann for ann in anns if ann['answer_type'] in ansTypes] 132 | ids = [ann['image_id'] for ann in anns] 133 | return ids 134 | 135 | def loadQA(self, ids=[]): 136 | """ 137 | Load questions and answers with the specified question ids. 138 | :param ids (int array) : integer ids specifying question ids 139 | :return: qa (object array) : loaded qa objects 140 | """ 141 | if type(ids) == list: 142 | return [self.qa[id] for id in ids] 143 | elif type(ids) == int: 144 | return [self.qa[ids]] 145 | 146 | def showQA(self, anns): 147 | """ 148 | Display the specified annotations. 149 | :param anns (array of object): annotations to display 150 | :return: None 151 | """ 152 | if len(anns) == 0: 153 | return 0 154 | for ann in anns: 155 | quesId = ann['question_id'] 156 | print "Question: %s" %(self.qqa[quesId]['question']) 157 | for ans in ann['answers']: 158 | print "Answer %d: %s" %(ans['answer_id'], ans['answer']) 159 | 160 | def loadRes(self, resFile, quesFile): 161 | """ 162 | Load result file and return a result object. 163 | :param resFile (str) : file name of result file 164 | :return: res (obj) : result api object 165 | """ 166 | res = VQA() 167 | res.questions = json.load(open(quesFile)) 168 | res.dataset['info'] = copy.deepcopy(self.questions['info']) 169 | res.dataset['task_type'] = copy.deepcopy(self.questions['task_type']) 170 | res.dataset['data_type'] = copy.deepcopy(self.questions['data_type']) 171 | res.dataset['data_subtype'] = copy.deepcopy(self.questions['data_subtype']) 172 | res.dataset['license'] = copy.deepcopy(self.questions['license']) 173 | 174 | print 'Loading and preparing results... ' 175 | time_t = datetime.datetime.utcnow() 176 | anns = json.load(open(resFile)) 177 | assert type(anns) == list, 'results is not an array of objects' 178 | annsQuesIds = [ann['question_id'] for ann in anns] 179 | assert set(annsQuesIds) == set(self.getQuesIds()), \ 180 | 'Results do not correspond to current VQA set. Either the results do have predictions for all question ids in annotation file or there is one/more questions id that does not belong to the question ids in the annotation file.' 181 | for ann in anns: 182 | quesId = ann['question_id'] 183 | if res.dataset['task_type'] == 'Multiple Choice': 184 | assert ann['answer'] in self.qqa[quesId]['multiple_choices'], 'predicted answer is not one of the multiple choices' 185 | qaAnn = self.qa[quesId] 186 | ann['image_id'] = qaAnn['image_id'] 187 | ann['question_type'] = qaAnn['question_type'] 188 | ann['answer_type'] = qaAnn['answer_type'] 189 | print 'DONE (t=%0.2fs)'%((datetime.datetime.utcnow() - time_t).total_seconds()) 190 | 191 | res.dataset['annotations'] = anns 192 | res.createIndex() 193 | return res 194 | -------------------------------------------------------------------------------- /kraino/utils/input_output_space.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import unicode_literals 3 | from __future__ import print_function 4 | 5 | """ 6 | Builds input/output space. 7 | 8 | Author: Mateusz Malinowski 9 | Email: mmalinow@mpi-inf.mpg.de 10 | """ 11 | 12 | import numpy as np 13 | 14 | from toolz import itemmap 15 | 16 | 17 | __all__ = ['build_vocabulary', 'index_sequence', 18 | 'encode_questions_index','encode_questions_one_hot', 19 | 'encode_answers_one_hot'] 20 | 21 | ### 22 | ### 23 | # Constants 24 | ### 25 | PADDING = '' 26 | UNKNOWN = '' 27 | EOA = '' # end of answer 28 | EOQ = '' # end of question 29 | EXTRA_WORDS_NAMES = [PADDING, UNKNOWN, EOA, EOQ] 30 | EXTRA_WORDS = {PADDING:0, UNKNOWN:1, EOA:2, EOQ:3} 31 | EXTRA_WORDS_ID = itemmap(reversed, EXTRA_WORDS) 32 | 33 | ### 34 | # Functions 35 | ### 36 | def static_vars(**kwargs): 37 | def decorate(func): 38 | for k in kwargs: 39 | setattr(func, k, kwargs[k]) 40 | return func 41 | return decorate 42 | 43 | 44 | @static_vars(counter=len(EXTRA_WORDS)) 45 | def _myinc(d): 46 | """ 47 | Gets a tuple d, and returns d[0]: id. 48 | """ 49 | x = d[0] 50 | _myinc.counter += 1 51 | return (x, _myinc.counter - 1) 52 | 53 | 54 | def build_vocabulary(this_wordcount, extra_words=EXTRA_WORDS, 55 | is_reset=True, truncate_to_most_frequent=0): 56 | """ 57 | Builds vocabulary from wordcount. 58 | It also adds extra words to the vocabulary. 59 | 60 | In: 61 | this_wordcount - dictionary of wordcounts, e.g. {'cpu':3} 62 | extra_words - additional words to build the vocabulary 63 | dictionary of {word: id} 64 | by default {UNKNOWN: 0} 65 | is_reset - if True we restart the vocabulary counting 66 | by defaults False 67 | truncate_to_most_frequent - if positive then the vocabulary 68 | is truncated to 'truncate_to_most_frequent' words; 69 | by default 0 70 | 71 | Out: 72 | word2index - mapping from words to indices 73 | index2word - mapping from indices to words 74 | """ 75 | if is_reset: 76 | _myinc.counter=len(EXTRA_WORDS) 77 | if truncate_to_most_frequent > 0: 78 | sorted_wordcount = dict(sorted( 79 | this_wordcount.items(), key=lambda x:x[1], reverse=True)[:truncate_to_most_frequent]) 80 | this_wordcount = sorted_wordcount 81 | 82 | word2index = itemmap(_myinc, this_wordcount) 83 | if not extra_words == {}: 84 | assert(all([el not in word2index.values() for el in extra_words.values()])) 85 | word2index.update(extra_words) 86 | index2word = itemmap(reversed, word2index) 87 | return word2index, index2word 88 | 89 | 90 | def index_sequence(x, word2index): 91 | """ 92 | Converts list of words into a list of its indices wrt. word2index, that is into 93 | index encoded sequence. 94 | 95 | In: 96 | x - list of lines 97 | word2index - mapping from words to indices 98 | 99 | Out: 100 | a list of the list of indices that encode the words 101 | """ 102 | one_hot_x = [] 103 | for line in x: 104 | line_list = [] 105 | for w in line.split(): 106 | w = w.strip() 107 | if w in word2index: this_ind = word2index[w] 108 | else: this_ind = word2index[UNKNOWN] 109 | line_list.append(this_ind) 110 | one_hot_x.append(line_list) 111 | return one_hot_x 112 | 113 | 114 | def encode_questions_index(x, word2index_x, max_time_steps=None): 115 | """ 116 | Index-based encoding of questions. 117 | 118 | In: 119 | x - list of questions 120 | word2index_x - mapping from question words to indices (inverted vocabulary) 121 | max_time_steps - maximal number of words in the question (max. time steps); 122 | if None then all question words are taken; 123 | by default None 124 | Out: 125 | a list of encoded questions 126 | """ 127 | x_modified = [q + ' ' + EOQ for q in x] 128 | if max_time_steps is not None: 129 | x_modified = [' '.join(q.split()[:max_time_steps]) for q in x] 130 | return index_sequence(x_modified, word2index_x) 131 | 132 | 133 | def encode_questions_one_hot(x, word2index_x, max_time_steps): 134 | """ 135 | One-hot encoding of questions. 136 | 137 | In: 138 | x - list of questions 139 | word2index_x - mapping from question words to indices (inverted vocabulary) 140 | max_time_steps - maximal number of words in the sequence (max. time steps) 141 | 142 | Out: 143 | boolean tensor of size: data_size x max_time_steps x vocabulary_size 144 | for a given question and a time step there is only one '1' 145 | """ 146 | X = np.zeros((len(x), max_time_steps, len(word2index_x.keys())), 147 | dtype=np.bool) 148 | # encode questions 149 | for question_no, question in enumerate(x): 150 | question_word_list = question.split() 151 | question_word_list.append(EOQ) 152 | for word_no, word in enumerate(question_word_list): 153 | word = word.strip() 154 | if word_no == max_time_steps - 1: 155 | # we need to finish 156 | this_index = word2index_x[EOQ] 157 | else: 158 | if word in word2index_x: 159 | this_index = word2index_x[word] 160 | else: 161 | this_index = word2index_x[UNKNOWN] 162 | X[question_no, word_no, this_index] = 1 163 | return X 164 | 165 | def encode_questions_dense(x, word_encoder, max_time_steps, 166 | is_remove_question_symbol=False): 167 | """ 168 | Dense representation of questions. 169 | 170 | In: 171 | x - list of questions 172 | word_encoder - encodes words 173 | max_time_steps - maximal number of words in the sequence (max. time steps) 174 | is_remove_question_symbol - true if we remove question symbols from the questions; 175 | by default it is False 176 | 177 | Out: 178 | float tensor of size: data_size x max_time_steps x dense_encoding_size 179 | """ 180 | word_encoder_dim = word_encoder(unicode(x[0].split()[0].strip())).vector.shape[0] 181 | X = np.zeros((len(x), max_time_steps, word_encoder_dim)) 182 | for question_no, question in enumerate(x): 183 | question_word_list = question.split() 184 | if is_remove_question_symbol and question_word_list[-1] == '?': 185 | question_word_list = question_word_list[:-1] 186 | reversed_question_word_list = question_word_list[::-1] 187 | for word_no, raw_word in enumerate(reversed_question_word_list): 188 | word = unicode(raw_word.strip()) 189 | this_representation = word_encoder(word).vector 190 | if max_time_steps - word_no - 1 >= 0: 191 | X[question_no, max_time_steps - word_no - 1, :] = this_representation 192 | else: 193 | break 194 | return X 195 | 196 | 197 | def encode_answers_one_hot(y, word2index_y, max_answer_time_steps=10, 198 | is_only_first_answer_word=False, answer_words_delimiter=','): 199 | """ 200 | One-hot encoding of answers. 201 | If more than first answer word is encoded then the answer words 202 | are modelled as sequence. 203 | 204 | In: 205 | y - list of answers 206 | word2index_y - mapping from answer words to indices (vocabulary) 207 | max_answer_time_steps - maximal number of words in the sequence (max. time steps) 208 | by default 10 209 | is_only_first_answer_word - if True then only first answer word is taken 210 | by default False 211 | answer_words_delimiter - a symbol for splitting answer into answer words; 212 | if None is provided then we don't split answer into answer words 213 | (that is the whole answer is an answer word); 214 | by default ',' 215 | 216 | Out: 217 | Y - boolean matrix of size: 218 | data_size x vocabulary_size if there is only single answer word 219 | data_size x max_answer_time_steps x vocabulary_size otherwise 220 | the matrix is padded 221 | for a given answer and a time step there is only one '1' 222 | y_gt - list of answers 223 | the same as input 'y' if is_only_first_answer_word==False 224 | only first words from 'y' if is_only_first_answer_word==True 225 | """ 226 | # encode answers 227 | if is_only_first_answer_word: 228 | Y = np.zeros((len(y), len(word2index_y.keys())), dtype=np.bool) 229 | y_gt = [] 230 | else: 231 | Y = np.zeros((len(y), max_answer_time_steps, len(word2index_y.keys())), 232 | dtype=np.bool) 233 | y_gt = y 234 | 235 | if answer_words_delimiter is None: 236 | assert(is_only_first_answer_word==True) 237 | 238 | for answer_no, answer in enumerate(y): 239 | if answer_words_delimiter is not None: 240 | answer_split = answer.split(answer_words_delimiter) 241 | else: 242 | answer_split = [answer] 243 | for word_no, word in enumerate(answer_split): 244 | word = word.strip() 245 | if is_only_first_answer_word: 246 | y_gt.append(word) 247 | if word in word2index_y: 248 | Y[answer_no, word2index_y[word]] = 1 249 | else: 250 | Y[answer_no, word2index_y[UNKNOWN]] = 1 251 | break 252 | else: 253 | if word_no == max_answer_time_steps - 1: 254 | break 255 | if word in word2index_y: 256 | Y[answer_no, word_no, word2index_y[word]] = 1 257 | else: 258 | Y[answer_no, word_no, word2index_y[UNKNOWN]] = 1 259 | if not is_only_first_answer_word: 260 | Y[answer_no, 261 | min(len(answer_split), max_answer_time_steps-1), 262 | word2index_y[EOA]] = 1 263 | return Y, y_gt 264 | 265 | 266 | def shift(X, new_vector=None, time_axis=1): 267 | """ 268 | Shifts input X along time_axis by one. 269 | At the new place it introduces new_word_id. 270 | The method doesn't change the size of X, so 271 | the last column along time axis is forgotten. 272 | 273 | In: 274 | X - input array; 275 | X has to have one more dimension than time_axis, 276 | so if time_axis == 1 then X has 3 dimensions (0,1,2) 277 | new_vector - new vector that replaces the column at time axis; 278 | if None, then the last column is added at the first position; 279 | by default None 280 | time_axis - axis where shifting happens 281 | Out: 282 | shifted version of X along the time axis 283 | """ 284 | tmp = np.roll(X, 1, time_axis) 285 | if new_vector is None: 286 | return tmp 287 | if time_axis==0: 288 | tmp[0,:] = new_vector 289 | elif time_axis==1: 290 | tmp[:,0,:] = new_vector 291 | elif time_axis==2: 292 | tmp[:,:,0,:] = new_vector 293 | elif time_axis==3: 294 | tmp[:,:,:,0,:] = new_vector 295 | else: 296 | raise NotImplementedError 297 | return tmp 298 | 299 | 300 | def shift_with_index_vector(X, index, size, time_axis, value=1, dtype=np.bool): 301 | """ 302 | Shifts X along time_axis, and inserts a one-hot vector at the first 303 | column at this axis. 304 | 305 | In: 306 | X - n-array 307 | index - index for value, 308 | the other elements of the corresponding vector are 0 309 | time_axis - axis where shifting happens 310 | value - value to place at index; 311 | by default 1 312 | dtype - type of the new vector; 313 | by default np.bool 314 | """ 315 | tmp = np.zeros(size, dtype=dtype) 316 | tmp[..., index] = value 317 | return shift(X, tmp, time_axis) 318 | 319 | 320 | -------------------------------------------------------------------------------- /kraino/utils/callbacks.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | """ 4 | Extra set of callbacks. 5 | 6 | Author: Mateusz Malinowski 7 | Email: mmalinow@mpi-inf.mpg.de 8 | """ 9 | 10 | 11 | import random 12 | import warnings 13 | import numpy as np 14 | 15 | #from bokeh.plotting import cursession 16 | #from bokeh.plotting import figure 17 | #from bokeh.plotting import push 18 | #from bokeh.plotting import show 19 | 20 | from keras.callbacks import Callback as KerasCallback 21 | 22 | #from agnez.keras_callbacks import BokehCallback 23 | 24 | from ..utils import print_metrics 25 | from ..utils.read_write import dict2file 26 | from ..utils.read_write import print_qa 27 | 28 | 29 | def is_era_begin(epoch, epoch_interval): 30 | return (epoch+1) % epoch_interval == 0 or epoch == 0 31 | 32 | 33 | def is_era_end(epoch, epoch_interval): 34 | return (epoch+1) % epoch_interval == 0 35 | 36 | 37 | ### 38 | # Storing callbacks 39 | ### 40 | class StoreModelWeightsOnEraEnd(KerasCallback): 41 | def __init__(self, filepath, epoch_interval, verbose=0): 42 | """ 43 | In: 44 | filepath - formattable filepath; possibilities: 45 | * weights.{epoch:02d} 46 | * weights.{era:02d} 47 | epoch_interval - 48 | number of epochs that must be passed from the previous saving 49 | verbose - if nonzero then print out information on stdout; 50 | by default 0 51 | """ 52 | super(KerasCallback, self).__init__() 53 | self.filepath = filepath 54 | self.epoch_interval = epoch_interval 55 | self.verbose = verbose 56 | self.era = 0 57 | 58 | def on_epoch_end(self, epoch, logs={}): 59 | if is_era_end(epoch, self.epoch_interval): 60 | filepath = self.filepath.format( 61 | epoch=epoch, era=self.era, **logs) 62 | if self.verbose > 0: 63 | print("Epoch %05d: saving model to %s" % (epoch, filepath)) 64 | self.model.save_weights(filepath, overwrite=True) 65 | self.era += 1 66 | ### 67 | 68 | ### 69 | # Printing callbacks 70 | ### 71 | class PrintOnEraBegin(KerasCallback): 72 | def __init__(self, epoch_interval, message='Era {era:02d}'): 73 | """ 74 | In: 75 | epoch_interval - 76 | number of epochs that must be passed between two consecutive 77 | invocations of this callback 78 | message - 79 | formattable message to show; 80 | by default "Era {era:02d}" showing the current era 81 | """ 82 | self.epoch_interval = epoch_interval 83 | self.era = 0 84 | self.message = message 85 | 86 | def on_epoch_begin(self, epoch, logs={}): 87 | if is_era_begin(epoch, self.epoch_interval): 88 | print() 89 | print('-' * 50) 90 | print(self.message.format(era=self.era)) 91 | self.era += 1 92 | 93 | 94 | class PrintPerformanceMetricOnEraEnd(KerasCallback): 95 | def __init__(self, X, y, temperature, index2word_y, 96 | metric_name, epoch_interval, extra_vars, 97 | verbosity_path='logs/performance.log', verbose=1): 98 | """ 99 | In: 100 | X - encoded input 101 | y - raw expected output 102 | temperature - temperature for the predictions; 103 | the colder the temperature the more stable answers 104 | index2word_y - mapping from the indices to words (in the y-domain) 105 | metric_name - name of the performance metric 106 | epoch_interval - 107 | number of epochs that must be passed between two consecutive 108 | invocations of this callback 109 | extra_vars - dictionary of extra variables 110 | verbosity path - path to dumb the logs 111 | verbose - verbosity level; 112 | by default 1 113 | """ 114 | self.X = X 115 | self.y = y 116 | self.temperature = temperature 117 | self.index2word_y = index2word_y 118 | self.metric_name = metric_name 119 | self.epoch_interval = epoch_interval 120 | self.extra_vars = extra_vars 121 | self.verbosity_path = verbosity_path 122 | self.verbose = verbose 123 | self.era = 0 124 | 125 | def on_epoch_end(self, epoch, logs={}): 126 | if is_era_end(epoch, self.epoch_interval): 127 | answer_pred = self.model.decode_predictions( 128 | X=self.X, 129 | temperature=self.temperature, 130 | index2word=self.index2word_y, 131 | verbose=self.verbose) 132 | metric_values = print_metrics.select[self.metric_name]( 133 | gt_list=self.y, 134 | pred_list=answer_pred, 135 | verbose=1, 136 | extra_vars=self.extra_vars) 137 | if self.verbose == 1: 138 | for m in metric_values: 139 | if 'idiosyncrasy' in m: 140 | idiosyncrasies = m['idiosyncrasy'].split(':') 141 | if 'long' in idiosyncrasies and 'muted' in idiosyncrasies: 142 | # long value being muted, we can only send the results 143 | # to the file 144 | filepath = self.verbosity_path.format( 145 | epoch=epoch, era=self.era, **logs) 146 | if m['value'] is not None: 147 | dict2file(m['value'], filepath, title=m['name']) 148 | self.era += 1 149 | ### 150 | 151 | ### 152 | # Plotting callbacks 153 | ### 154 | ''' 155 | class PlotPerformanceMetricOnEraEnd(BokehCallback): 156 | """ 157 | Plots the performance measures. 158 | 159 | Inspired by 160 | https://github.com/EderSantana/agnez/blob/master/agnez/keras_callbacks.py 161 | """ 162 | colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', 163 | '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] 164 | 165 | def __init__(self, X, y, temperature, index2word_y, 166 | metric_name, epoch_interval, extra_vars, verbose=1, 167 | name='experiment', fig_title='Performance', url='http://127.0.0.1:5006'): 168 | """ 169 | In: 170 | X - encoded input 171 | y - raw expected output 172 | temperature - temperature for the predictions; 173 | the colder the temperature the more stable answers 174 | index2word_y - mapping from the indices to words (in the y-domain) 175 | metric_name - name of the performance metric 176 | epoch_interval - 177 | number of epochs that must be passed between two consecutive 178 | invocations of this callback 179 | extra_vars - dictionary of extra variables 180 | verbose - verbosity level; by default 1 181 | name - name of the bokeh document; by default 'experiment' 182 | fig_title - title of the bokeh figure; by default 'Performance' 183 | url - bokeh server url; 184 | by default 'http://127.0.0.1:5006' 185 | """ 186 | BokehCallback.__init__(self, name, fig_title, url) 187 | self.X = X 188 | self.y = y 189 | self.temperature = temperature 190 | self.index2word_y = index2word_y 191 | self.metric_name = metric_name 192 | self.epoch_interval = epoch_interval 193 | self.extra_vars = extra_vars 194 | self.verbose = verbose 195 | self.era = 0 196 | 197 | def on_epoch_end(self, epoch, logs={}): 198 | if not is_era_end(epoch, self.epoch_interval): 199 | return 200 | 201 | answer_pred = self.model.decode_predictions( 202 | X=self.X, 203 | temperature=self.temperature, 204 | index2word=self.index2word_y, 205 | verbose=self.verbose) 206 | measures = print_metrics.select[self.metric_name]( 207 | gt_list=self.y, 208 | pred_list=answer_pred, 209 | verbose=1, 210 | extra_vars=self.extra_vars) 211 | 212 | if not hasattr(self, 'fig'): 213 | self.fig = figure(title=self.fig_title) 214 | for i, m in enumerate(measures): 215 | if 'idiosyncrasy' in m: 216 | if 'muted' in m['idiosyncrasy'].split(':'): 217 | continue 218 | self.fig.line([self.era], [m['value']], legend=m['name'], 219 | name=m['name'], line_width=2, 220 | line_color=self.colors[i % len(self.colors)]) 221 | renderer = self.fig.select({'name': m['name']}) 222 | self.plots.append(renderer[0].data_source) 223 | show(self.fig) 224 | else: 225 | for i, m in enumerate(measures): 226 | if 'idiosyncrasy' in m: 227 | if 'muted' in m['idiosyncrasy'].split(':'): 228 | continue 229 | self.plots[i].data['y'].append(m['value']) 230 | self.plots[i].data['x'].append(self.era) 231 | cursession().store_objects(self.plots[i]) 232 | push() 233 | self.era += 1 234 | 235 | class StandardPerformancePlot(BokehCallback): 236 | """ 237 | Generalizes Agnez class Plot to work with all standard performance metrics. 238 | 239 | Original work: Eder Santana [https://github.com/EderSantana] 240 | """ 241 | # WIP 242 | # TODO: 243 | # -[ ] Decide API for choosing channels to plot 244 | colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', 245 | '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] 246 | """ 247 | Inspired by https://github.com/mila-udem/blocks-extras/blob/master/blocks/extras/extensions/plot.py 248 | 249 | """ 250 | def __init__(self, 251 | what_to_plot=['loss', 'val_loss'], 252 | name='experiment', 253 | fig_title='Cost functions', 254 | url='default'): 255 | BokehCallback.__init__(self, name, fig_title, url) 256 | self.totals = {} 257 | self.what_to_plot = what_to_plot 258 | 259 | def on_epoch_begin(self, epoch, logs={}): 260 | self.seen = 0 261 | self.totals = {} 262 | 263 | def on_batch_end(self, batch, logs={}): 264 | batch_size = logs.get('size', 0) 265 | self.seen += batch_size 266 | what_to_plot_now = list(set(['loss', 'acc']) & set(self.what_to_plot)) 267 | for v in what_to_plot_now: 268 | if v in self.totals: 269 | self.totals[v] += logs.get(v) * batch_size 270 | else: 271 | self.totals[v] = logs.get(v) * batch_size 272 | 273 | def on_epoch_end(self, epoch, logs={}): 274 | if not hasattr(self, 'fig'): 275 | self.fig = figure(title=self.fig_title) 276 | for i, v in enumerate(self.what_to_plot): 277 | if v == 'loss': 278 | L = self.totals[v] / self.seen 279 | else: 280 | L = logs.get(v) 281 | self.fig.line([epoch], [L], legend=v, 282 | name=v, line_width=2, 283 | line_color=self.colors[i % len(self.colors)]) 284 | renderer = self.fig.select({'name': v}) 285 | self.plots.append(renderer[0].data_source) 286 | show(self.fig) 287 | else: 288 | for i, v in enumerate(self.what_to_plot): 289 | if v in ['loss', 'acc']: 290 | L = self.totals[v] / self.seen 291 | else: 292 | L = logs.get(v) 293 | self.plots[i].data['y'].append(L) 294 | self.plots[i].data['x'].append(epoch) 295 | cursession().store_objects(self.plots[i]) 296 | push() 297 | ### 298 | ''' 299 | 300 | ### 301 | # Monitoring callbacks 302 | ### 303 | class MonitorPredictionsOnEndEra(KerasCallback): 304 | """ 305 | Checks the performance on a randomly chosen subset of the data. 306 | Hopefully the network generates something interesting. 307 | """ 308 | def __init__(self, X, x, y, temperature, index2word_y, 309 | verbosity_path, epoch_interval, subset_size=0, verbose=0): 310 | """ 311 | In: 312 | X - encoded input 313 | x - raw input 314 | y - raw output space 315 | temperature - temperature for the predictions; 316 | the colder the temperature the more stable answers 317 | index2word_y - mapping from the indices to words (in the y-domain) 318 | verbosity_path - parameterized filepath to store the logs; 319 | possibilities of the parameterization 320 | * weights.{epoch:02d} 321 | * weights.{era:02d} 322 | epoch_interval - 323 | number of epochs that must be passed between two consecutive 324 | invocations of this callback 325 | subset_size - data subset's size, if 0 then all data are taken; 326 | by default 0 327 | verbose - verbosity level; by default 0 328 | """ 329 | self.X = X 330 | self.x = x 331 | self.y = y 332 | self.temperature = temperature 333 | self.index2word_y = index2word_y 334 | self.verbosity_path = verbosity_path 335 | self.epoch_interval = epoch_interval 336 | self.subset_size = subset_size 337 | self.verbose = verbose 338 | self.era = 0 339 | 340 | def on_epoch_end(self, epoch, logs={}): 341 | if is_era_end(epoch, self.epoch_interval): 342 | if self.subset_size > 0: 343 | subset_indices = random.sample( 344 | xrange(self.X.shape[0]), self.subset_size) 345 | if len(self.X) == 2: 346 | X_subset = [self.X[0][subset_indices], self.X[1][subset_indices]] 347 | elif len(self.X) == 1: 348 | X_subset = self.X[subset_indices] 349 | questions_subset = self.x[subset_indices] 350 | answer_gt = self.y[subset_indices] 351 | answer_gt_original = self.y[subset_indices] 352 | else: 353 | X_subset = self.X 354 | questions_subset = self.x 355 | answer_gt = self.y 356 | answer_gt_original = self.y 357 | answer_pred = self.model.decode_predictions( 358 | X=X_subset, 359 | temperature=self.temperature, 360 | index2word=self.index2word_y, 361 | verbose=self.verbose) 362 | 363 | filepath = self.verbosity_path.format( 364 | epoch=epoch, era=self.era, **logs) 365 | print_qa(questions_subset, answer_gt, answer_gt_original, answer_pred, 366 | self.era, path=filepath) 367 | self.era += 1 368 | ### 369 | 370 | ### 371 | # Learning modifiers callbacks 372 | ### 373 | class LearningRateReducerWithEarlyStopping(KerasCallback): 374 | """ 375 | Reduces learning rate during the training. 376 | 377 | Original work: jiumem [https://github.com/jiumem] 378 | """ 379 | def __init__(self, 380 | patience=0, reduce_rate=0.5, reduce_nb=10, 381 | is_early_stopping=True, verbose=1): 382 | """ 383 | In: 384 | patience - number of beginning epochs without reduction; 385 | by default 0 386 | reduce_rate - multiplicative rate reducer; by default 0.5 387 | reduce_nb - maximal number of reductions performed; by default 10 388 | is_early_stopping - if true then early stopping is applied when 389 | reduce_nb is reached; by default True 390 | verbose - verbosity level; by default 1 391 | """ 392 | super(KerasCallback, self).__init__() 393 | self.patience = patience 394 | self.wait = 0 395 | self.best_score = -1. 396 | self.reduce_rate = reduce_rate 397 | self.current_reduce_nb = 0 398 | self.reduce_nb = reduce_nb 399 | self.is_early_stopping = is_early_stopping 400 | self.verbose = verbose 401 | self.epsilon = 0.1e-10 402 | 403 | def on_epoch_end(self, epoch, logs={}): 404 | current_score = logs.get('val_acc') 405 | if current_score is None: 406 | warnings.warn('validation score is off; ' + 407 | 'this reducer works only with the validation score on') 408 | return 409 | if current_score > self.best_score: 410 | self.best_score = current_score 411 | self.wait = 0 412 | if self.verbose > 0: 413 | print('---current best val accuracy: %.3f' % current_score) 414 | else: 415 | if self.wait >= self.patience: 416 | self.current_reduce_nb += 1 417 | if self.current_reduce_nb <= self.reduce_nb: 418 | lr = np.float32(self.model.optimizer.lr.get_value()) 419 | self.model.optimizer.lr.set_value(lr*self.reduce_rate) 420 | if self.verbose > 0: 421 | print("Reduction from {0:0.6f} to {1:0.6f}".\ 422 | format(float(lr), float(lr*self.reduce_rate))) 423 | if float(lr) <= self.epsilon: 424 | if self.verbose > 0: 425 | print('Learning rate too small, learning stops now') 426 | self.model.stop_training = True 427 | else: 428 | if self.is_early_stopping: 429 | if self.verbose > 0: 430 | print("Epoch %d: early stopping" % (epoch)) 431 | self.model.stop_training = True 432 | self.wait += 1 433 | 434 | 435 | class LearningRateReducerEveryPatienceEpoch(KerasCallback): 436 | """ 437 | Reduces learning rate during the training after every 'patience' epochs. 438 | 439 | Original work: jiumem [https://github.com/jiumem] 440 | """ 441 | def __init__(self, 442 | patience=0, reduce_rate=0.5, reduce_nb=10, verbose=1): 443 | """ 444 | In: 445 | patience - number of epochs in stagnation; by default 0 446 | reduce_rate - multiplicative rate reducer; by default 0.5 447 | reduce_nb - maximal number of reductions performed; by default 10 448 | verbose - verbosity level; by default 1 449 | """ 450 | super(KerasCallback, self).__init__() 451 | self.patience = patience 452 | self.wait = 0 453 | self.best_score = -1. 454 | self.reduce_rate = reduce_rate 455 | self.current_reduce_nb = 0 456 | self.reduce_nb = reduce_nb 457 | self.is_early_stopping = False 458 | self.verbose = verbose 459 | self.epsilon = 0.1e-10 460 | 461 | def on_epoch_end(self, epoch, logs={}): 462 | current_score = logs.get('val_acc') 463 | if current_score is None: 464 | current_score = -10.0 # always reduce 465 | if current_score > self.best_score: 466 | self.best_score = current_score 467 | self.wait = 0 468 | if self.verbose > 0: 469 | print('---current best val accuracy: %.3f' % current_score) 470 | else: 471 | if self.wait >= self.patience: 472 | self.current_reduce_nb += 1 473 | if self.current_reduce_nb <= self.reduce_nb: 474 | lr = np.float32(self.model.optimizer.lr.get_value()) 475 | self.model.optimizer.lr.set_value(lr*self.reduce_rate) 476 | if self.verbose > 0: 477 | print("Reduction from {0:0.6f} to {1:0.6f}".\ 478 | format(float(lr), float(lr*self.reduce_rate))) 479 | if float(lr) <= self.epsilon: 480 | if self.verbose > 0: 481 | print('Learning rate too small, learning stops now') 482 | self.model.stop_training = True 483 | else: 484 | if self.is_early_stopping: 485 | if self.verbose > 0: 486 | print("Epoch %d: early stopping" % (epoch)) 487 | self.model.stop_training = True 488 | self.wait = 0 489 | else: 490 | self.wait += 1 491 | -------------------------------------------------------------------------------- /kraino/utils/data_provider.py: -------------------------------------------------------------------------------- 1 | """ 2 | DAQUAR dataset provider. 3 | 4 | Ashkan Mokarian [ashkan@mpi-inf.mpg.de] 5 | Mateusz Malinowski [mmalinow@mpi-inf.mpg.de] 6 | """ 7 | 8 | import copy 9 | import os 10 | import re 11 | import json 12 | import numpy as np 13 | 14 | from read_write import file2list 15 | from toolz import frequencies 16 | 17 | from scipy.misc import imread 18 | 19 | def daquar_qa_triples( 20 | path=None, 21 | train_or_test='train', 22 | keep_top_qa_pairs=0, 23 | **kwargs): 24 | """ 25 | DAQUAR question answer pairs. 26 | 27 | In: 28 | path - path to DAQUAR root folder, if None then default path is chosen 29 | by default None 30 | train_or_test - switch between train and test set; 31 | value belongs to \{'train', 'val', 'test'\} 32 | by default 'train' 33 | keep_top_qa_pairs - filter out question-answer pairs to the 34 | keep_top_qa_pairs if positive; by default 0 35 | 36 | Out: 37 | x - textual questions 38 | y - textual answers 39 | img_name - names of the images 40 | img_ind - image indices that correspond to x 41 | question_id - empty list as it is unused in DAQUAR 42 | end_of_question - end of question token 43 | end_of_answer - end of answer token 44 | answer_words_delimiter - delimiter for multiple word answers 45 | """ 46 | if path is None: 47 | curr_dir = os.path.dirname(os.path.realpath(__file__)) 48 | path = os.path.join(curr_dir, '..', '..', 'data', 'daquar') 49 | 50 | if train_or_test == 'val': 51 | # we don't have a well established split 52 | train_or_test = 'train' 53 | 54 | xy_list = file2list( 55 | os.path.join(path,'qa.894.raw.'+train_or_test+'.format_triple')) 56 | 57 | # create a dictionary of allowed qa pairs 58 | all_answers = xy_list[1::3] 59 | freq = frequencies(all_answers) 60 | if keep_top_qa_pairs <= 0: 61 | most_frequent_answers = sorted( 62 | freq.items(), key=lambda x:x[1], reverse=True) 63 | else: 64 | most_frequent_answers = sorted( 65 | freq.items(), key=lambda x:x[1], reverse=True)[:keep_top_qa_pairs] 66 | allowed_answers_dict = dict(most_frequent_answers) 67 | # 68 | 69 | x_list = [] 70 | y_list = [] 71 | img_name_list = [] 72 | img_ind_list = [] 73 | for x, y, image_name in zip(xy_list[::3], xy_list[1::3], xy_list[2::3]): 74 | if y in allowed_answers_dict: 75 | x_list.append(x) 76 | y_list.append(y) 77 | img_name_list.append(image_name) 78 | img_num = re.search('(?<=image)[0-9]+', image_name).group(0) 79 | img_ind_list.append(int(img_num)-1) 80 | 81 | return {'x':x_list, 82 | 'y':y_list, 83 | 'img_name':img_name_list, 84 | 'img_ind': img_ind_list, 85 | 'question_id': [], 86 | 'end_of_question':'?', 87 | 'end_of_answer':'', 88 | 'answer_words_delimiter':','} 89 | 90 | 91 | def daquar_save_results(question_id_list, answer_list, path): 92 | raise NotImplementedError() 93 | 94 | 95 | def vqa_save_results(question_id_list, answer_list, path): 96 | """ 97 | Saves the answers on question_id_list in the VQA-like format. 98 | 99 | In: 100 | question_id_list - list of the question ids 101 | answer_list - list with the answers 102 | path - path where the file is saved 103 | """ 104 | question_answer_pairs = [] 105 | assert len(question_id_list) == len(answer_list), \ 106 | 'must be the same number of questions and answers' 107 | for q,a in zip(question_id_list, answer_list): 108 | question_answer_pairs.append({'question_id':q, 'answer':str(a)}) 109 | with open(path,'w') as f: 110 | json.dump(question_answer_pairs, f) 111 | 112 | 113 | def vqa_get_object(path=None, train_or_test='train', 114 | dataset_type='mscoco', task_type='OpenEnded', 115 | annotation_year='2014', question_year='2015'): 116 | """ 117 | In: 118 | path - path to VQA root folder, if None then default path is chosen; 119 | by default None 120 | train_or_test - switch between train and test set; 121 | value belongs to \{'train', 'val', 'test', 'test_dev'\} 122 | by default 'train' 123 | dataset_type - type of dataset, e.g. 'mscoco' 124 | task_type - type of the task, e.g. 'OpenEnded' 125 | annotation_year - annotation year 126 | question_year - question year 127 | 128 | Out: 129 | root_path - constructed root path 130 | anno_path - constructed path to annotations 131 | questions_path - constructed path to questions 132 | vqa_object - constructed VQA object 133 | """ 134 | 135 | from vqaTools.vqa import VQA 136 | if path == None: 137 | curr_dir = os.path.dirname(os.path.realpath(__file__)) 138 | root_path = os.path.join(curr_dir, '..', '..', 'data', 'vqa') 139 | else: 140 | root_path = path 141 | 142 | train_or_test_questions = 'test-dev' if train_or_test == 'test_dev' \ 143 | else train_or_test 144 | dataset_train_or_test = train_or_test + annotation_year 145 | question_train_or_test = train_or_test_questions + question_year 146 | 147 | if train_or_test == 'test_dev': 148 | anno_path = None 149 | else: 150 | anno_path = os.path.join(root_path, 151 | 'Annotations', '{0}_{1}_annotations.json'.format( 152 | dataset_type, dataset_train_or_test)) 153 | questions_path = os.path.join(root_path, 154 | 'Questions', '{0}_{1}_{2}_questions.json'.format( 155 | task_type, dataset_type, question_train_or_test)) 156 | vqa = VQA(anno_path, questions_path) 157 | return {'root_path':root_path, 158 | 'anno_path':anno_path, 159 | 'questions_path':questions_path, 160 | 'vqa_object':vqa} 161 | 162 | 163 | def vqa_general(path=None, train_or_test='train', dataset_type='mscoco', 164 | task_type='OpenEnded', annotation_year='2014', question_year='2015', 165 | image_name_template='COCO_2014_{0:0=12}', answer_mode='single_random', 166 | keep_top_qa_pairs=0): 167 | """ 168 | VT-Vision-Lab VQA question answeir pairs. It is a general interface. 169 | In: 170 | path - path to VQA root folder, if None then default path is chosen; 171 | by default None 172 | train_or_test - switch between train and test set; 173 | value belongs to \{'train', 'val', 'test', 'test_dev'\} 174 | by default 'train' 175 | dataset_type - type of dataset, e.g. 'mscoco' 176 | task_type - type of the task, e.g. 'OpenEnded' 177 | annotation_year - annotation year 178 | question_year - question year 179 | image_name_template - template for giving names to images 180 | answer_mode - possible answer modes: 181 | 'single_random' - single answer, randomly chosen 182 | 'single_confident' - single answer, randomly chosen among the confident; 183 | if there is no confident then randomly chosen (the same as single) 184 | 'single_frequent' - the most frequent answer 185 | 'all' - with one question all answers 186 | 'all_repeat' - all answers by repeating the same question 187 | 'all_repeat_confidentonly' - all answers that are confident (repeats the same question) 188 | keep_top_qa_pairs - filter out question-answer pairs to the 189 | keep_top_qa_pairs if positive; by default 0 190 | 191 | Out: 192 | x - textual questions 193 | y - textual answers 194 | img_name - names of the images 195 | img_ind - image indices that correspond to x 196 | question_id - list of question indices 197 | end_of_question - end of question token 198 | end_of_answer - end of answer token 199 | answer_words_delimiter - delimiter for multiple word answers 200 | anno_path - constructed path to annotations 201 | questions_path - constructed path to questions 202 | """ 203 | 204 | def preprocess_question(q): 205 | q_tmp = q.strip().lower().encode('utf8') 206 | if q_tmp[-1] == '?' and q_tmp[-2] != ' ': 207 | # separate word token from the question mark 208 | q_tmp = q_tmp[:-1] + ' ?' 209 | # remove question mark 210 | if q_tmp[-1] == '?': q_tmp = q_tmp[:-1] 211 | return q_tmp 212 | # 213 | 214 | assert answer_mode in ['single_random', 'single_confident', 'single_frequent', 'all', 'all_repeat', 'all_repeat_confidentonly'] 215 | assert task_type in ['OpenEnded', 'MultipleChoice'], \ 216 | 'The task is either ''OpenEnded'' of ''MultipleChoice''' 217 | assert dataset_type in ['mscoco', 'abstract_v002'], \ 218 | 'The type of dataset is eigher ''mscoco'' or ''abstract_v002''' 219 | 220 | vqa_dict = vqa_get_object( 221 | path=path, 222 | train_or_test=train_or_test, 223 | dataset_type=dataset_type, 224 | task_type=task_type, 225 | annotation_year=annotation_year, 226 | question_year=question_year) 227 | vqa = vqa_dict['vqa_object'] 228 | 229 | # questions can be filtered, e.g. by the question type 230 | ann_ids = vqa.getQuesIds() 231 | anns = vqa.loadQA(ann_ids) 232 | 233 | # process annotations 234 | question_id_list = [] 235 | image_name_list = [] 236 | image_id_list = [] 237 | x_list = [] 238 | y_list = [] 239 | 240 | # return only questions if there are no annotations 241 | if anns == []: 242 | for ques in vqa.questions['questions']: 243 | question = preprocess_question(ques['question']) 244 | x_list.append(question) 245 | question_id_list.append(ques['question_id']) 246 | image_id = ques['image_id'] 247 | image_name = image_name_template.format(image_id) 248 | image_name_list.append(image_name) 249 | image_id_list.append(image_id) 250 | 251 | # create a dictionary of allowed qa pairs 252 | all_answers = [x['answer'] for anno in anns for x in anno['answers']] 253 | freq = frequencies(all_answers) 254 | if keep_top_qa_pairs <= 0: 255 | most_frequent_answers = sorted( 256 | freq.items(), key=lambda x:x[1], reverse=True) 257 | else: 258 | most_frequent_answers = sorted( 259 | freq.items(), key=lambda x:x[1], reverse=True)[:keep_top_qa_pairs] 260 | allowed_answers_dict = dict(most_frequent_answers) 261 | # 262 | 263 | for anno in anns: 264 | image_id = anno['image_id'] 265 | image_name = image_name_template.format(image_id) 266 | question_id = anno['question_id'] 267 | question = preprocess_question(vqa.qqa[question_id]['question']) 268 | assert image_id == vqa.qqa[question_id]['image_id'], \ 269 | 'image id of the question and answer are different' 270 | # randomizing the answers list 271 | randomized_answers = copy.deepcopy(anno['answers']) 272 | np.random.shuffle(randomized_answers) 273 | randomized_allowed_answers_list = \ 274 | [x for x in randomized_answers if x['answer'] in allowed_answers_dict] 275 | if randomized_allowed_answers_list == []: 276 | continue 277 | # 278 | if answer_mode == 'single_random': 279 | answer = randomized_allowed_answers_list[0]['answer'] 280 | elif answer_mode == 'single_confident': 281 | # if there is no confident answer, take a random one 282 | confidence_list = [x['answer_confidence'] \ 283 | for x in randomized_allowed_answers_list] 284 | yes_list = [j for j,x in enumerate(confidence_list) if x == 'yes'] 285 | if yes_list == []: 286 | answer = randomized_allowed_answers_list[0]['answer'] 287 | else: 288 | answer = randomized_allowed_answers_list[yes_list[0]]['answer'] 289 | elif answer_mode == 'single_frequent': 290 | tmp = frequencies([x['answer'] for x in randomized_allowed_answers_list]) 291 | answer = sorted(tmp.items(), key=lambda x: x[1], reverse=True)[0][0] 292 | elif answer_mode == 'all': 293 | raise NotImplementedError() 294 | elif answer_mode == 'all_repeat': 295 | answer_list_all_mode = [] 296 | for answer in randomized_allowed_answers_list: 297 | answer_list_all_mode.append(answer['answer'].encode('utf8')) 298 | elif answer_mode == 'all_repeat_confidentonly': 299 | # like repeat but consider only confident answers 300 | confidence_list = [x['answer_confidence'] \ 301 | for x in randomized_allowed_answers_list] 302 | yes_list = [j for j,x in enumerate(confidence_list) if x == 'yes'] 303 | if yes_list == []: 304 | # we keep only confident qa pairs 305 | continue 306 | answer_list_all_mode = [] 307 | for answer_no, answer in enumerate(randomized_allowed_answers_list): 308 | if answer_no in yes_list: 309 | answer_list_all_mode.append(answer['answer'].encode('utf8')) 310 | else: 311 | raise NotImplementedError() 312 | 313 | if 'single' in answer_mode: 314 | answer = answer.encode('utf8') 315 | x_list.append(question) 316 | y_list.append(answer) 317 | image_name_list.append(image_name) 318 | image_id_list.append(image_id) 319 | question_id_list.append(question_id) 320 | elif 'all' in answer_mode: 321 | num_answers_all_mode = len(answer_list_all_mode) 322 | x_list.extend([question]*num_answers_all_mode) 323 | image_name_list.extend([image_name]*num_answers_all_mode) 324 | image_id_list.extend([image_id]*num_answers_all_mode) 325 | question_id_list.extend([question_id]*num_answers_all_mode) 326 | y_list.extend(answer_list_all_mode) 327 | else: 328 | raise NotImplementedError() 329 | 330 | return {'x':x_list, 'y':y_list, 331 | 'img_name':image_name_list, 332 | 'img_ind': image_id_list, 333 | 'question_id': question_id_list, 334 | 'end_of_question':'?', 335 | 'end_of_answer':'', 336 | 'answer_words_delimiter':' ', 337 | 'vqa_object':vqa, 338 | 'questions_path':vqa_dict['questions_path'], 339 | 'anno_path':vqa_dict['anno_path']} 340 | 341 | 342 | def vqa_real_images_open_ended( 343 | path=None, 344 | train_or_test='train', 345 | keep_top_qa_pairs=0, 346 | answer_mode='single', 347 | **kwargs): 348 | """ 349 | VT-Vision-Lab VQA open-ended question answeir pairs. 350 | 351 | In: 352 | path - path to VQA root folder, if None then default path is chosen; 353 | by default None 354 | train_or_test - switch between train and test set; 355 | value belongs to \{'train', 'val', 'test', 'test_dev\} 356 | by default 'train' 357 | answer_mode - possible answer modes: 358 | 'single_random' - single answer, randomly chosen 359 | 'single_confident' - single answer, randomly chosen among the confident; 360 | if there is no confident then randomly chosen (the same as single) 361 | 'single_frequent' - the most frequent answer 362 | 'all' - with one question all answers 363 | 'all_repeat' - all answers by repeating the same question 364 | 'all_repeat_confidentonly' - all answers that are confident (repeats the same question) 365 | keep_top_qa_pairs - filter out question-answer pairs to the 366 | keep_top_qa_pairs if positive; by default 0 367 | 368 | Out: 369 | x - textual questions 370 | y - textual answers 371 | img_name - names of the images 372 | img_ind - image indices that correspond to x 373 | question_id - list of question indices 374 | end_of_question - end of question token 375 | end_of_answer - end of answer token 376 | answer_words_delimiter - delimiter for multiple word answers 377 | """ 378 | 379 | dataset_type = 'mscoco' 380 | annotation_year = '2014' 381 | question_year = '2015' if 'test' in train_or_test else '2014' 382 | task_type = 'OpenEnded' 383 | train_or_test_image = 'test' if 'test' in train_or_test else train_or_test 384 | image_name_template = 'COCO_' + train_or_test_image + question_year + '_{0:0=12}' 385 | 386 | return vqa_general( 387 | path=path, 388 | train_or_test=train_or_test, 389 | dataset_type=dataset_type, 390 | task_type=task_type, 391 | annotation_year=annotation_year, 392 | question_year=question_year, 393 | image_name_template=image_name_template, 394 | answer_mode=answer_mode, 395 | keep_top_qa_pairs=keep_top_qa_pairs) 396 | 397 | 398 | ### 399 | # Non-dataset specific functions. 400 | ### 401 | def is_image_file(x): 402 | return x.endswith('.png') or x.endswith('.jpg') or x.endswith('.jpeg') 403 | 404 | 405 | def global_visual_features( 406 | path, perception='googlenet', layer='pool5-7x7_s1', 407 | memory_time_steps=35, is_shuffle_memories=True, names_list=None): 408 | """ 409 | Provides global visual features. 410 | 411 | In: 412 | path - the root path 413 | perception - the perception model; by default 'googlenet' 414 | layer - the layer in the model; by default 'pool5-7x7_s1' 415 | memory_time_steps - number of memories, everything outside is cut out; 416 | valid only if visual features are 3d tensors; by default 35 417 | is_shuffle_memories - shuffle memories; 418 | it's more important when they must be truncated; by default True 419 | names_list - list of the image names, if None then all images are considered; 420 | only valid if data are stored as mappings from names into features; 421 | by default None 422 | """ 423 | assert path is not None, 'Set up the path!' 424 | if is_shuffle_memories: 425 | print 'Shuffling memories ...' 426 | visual_features = np.load(os.path.join( 427 | path, perception, 'blobs.' + layer + '.npy')) 428 | if visual_features.shape == (): 429 | visual_features = visual_features.item() 430 | 431 | if names_list is None or names_list==[]: 432 | return visual_features 433 | else: 434 | # either 2D or 3D tensor 435 | tmp_feats = visual_features[visual_features.keys()[0]] 436 | if layer.endswith('index'): 437 | visual_features_subset = np.zeros( 438 | (len(names_list), memory_time_steps), dtype=int) 439 | is_memories = True 440 | elif tmp_feats.ndim == 1: 441 | visual_features_subset = np.zeros( 442 | (len(names_list), tmp_feats.shape[0])) 443 | is_memories = False 444 | elif tmp_feats.ndim == 2: 445 | # matrix has dimensions #images x #time_steps x #features 446 | visual_features_subset = np.zeros( 447 | (len(names_list), memory_time_steps, tmp_feats.shape[-1])) 448 | is_memories = True 449 | else: 450 | raise NotImplementedError() 451 | skipped_image_names = set() 452 | for k, name_now in enumerate(names_list): 453 | if name_now not in visual_features: 454 | # keep going if image doesn't exist in features 455 | skipped_image_names.add(name_now) 456 | continue 457 | if is_memories: 458 | visual_features_now = visual_features[name_now] 459 | number_memories = visual_features_now.shape[0] 460 | if is_shuffle_memories: 461 | shuffled_memory_indices = \ 462 | np.arange(visual_features_now.shape[0]) 463 | np.random.shuffle(shuffled_memory_indices) 464 | visual_features_now = \ 465 | visual_features_now[shuffled_memory_indices] 466 | if layer.endswith('index'): 467 | # we add one because we want to mask-out zeroes 468 | visual_features_subset[k,-number_memories:] = \ 469 | np.squeeze(visual_features_now[:memory_time_steps]+1) 470 | else: 471 | visual_features_subset[k,-number_memories:,:] = \ 472 | visual_features_now[:memory_time_steps,:] 473 | else: 474 | visual_features_subset[k,:] = visual_features[name_now] 475 | print('Skipped images {0} of them:'.format(len(skipped_image_names))) 476 | for name_now in skipped_image_names: 477 | print(name_now) 478 | return visual_features_subset 479 | 480 | 481 | def get_global_perception( 482 | task='daquar', train_or_test='train', extractor_fun=global_visual_features, 483 | path=None, perception='googlenet', layer='pool5-7x7_s1', names_list=None): 484 | """ 485 | Provides global visual features. 486 | 487 | In: 488 | task - the challenge; by default 'daquar' 489 | train_or_test - training, validation, or test set; by default train 490 | extractor_fun - function for extraction; 491 | by default global_visual_features 492 | path - the root path, if None then default path is taken; 493 | by default None 494 | perception - the perception model; by default 'googlenet' 495 | layer - the layer in the model; by default 'pool5-7x7_s1' 496 | names_list - list of the image names, if None then all images are considered; 497 | only valid if data are stored as mappings from names into features; 498 | by default None 499 | """ 500 | if path is None: 501 | curr_dir = os.path.dirname(os.path.realpath(__file__)) 502 | root_path = os.path.join(curr_dir, '..', '..', 'data') 503 | if task=='daquar': 504 | task_path = os.path.join(root_path, 'daquar', 'visual_features') 505 | elif task == 'vqa': 506 | if train_or_test == 'train': vqa_train_or_test='train2014' 507 | elif train_or_test == 'val': vqa_train_or_test='val2014' 508 | elif 'test' in train_or_test: vqa_train_or_test='test2015' 509 | else: raise NotImplementedError() 510 | task_path = os.path.join(root_path, 'vqa', 'visual_features', vqa_train_or_test) 511 | else: 512 | task_path = path 513 | 514 | return extractor_fun( 515 | path=task_path, 516 | perception=perception, 517 | layer=layer, 518 | names_list=names_list) 519 | 520 | 521 | # Selector 522 | ### 523 | select = { 524 | 'daquar-triples': { 525 | 'text':daquar_qa_triples, 526 | 'perception':lambda train_or_test, names_list, 527 | parts_extractor, max_parts, perception, 528 | layer, second_layer: 529 | get_global_perception( 530 | task='daquar', 531 | train_or_test=train_or_test, 532 | names_list=names_list, 533 | extractor_fun=global_visual_features, 534 | perception=perception, 535 | layer=layer), 536 | 'save_predictions': daquar_save_results 537 | }, 538 | 'vqa-real_images-open_ended': { 539 | 'text':vqa_real_images_open_ended, 540 | 'perception':lambda train_or_test, names_list, 541 | parts_extractor, max_parts, perception, 542 | layer, second_layer: 543 | get_global_perception( 544 | task='vqa', 545 | train_or_test=train_or_test, 546 | names_list=names_list, 547 | extractor_fun=global_visual_features, 548 | perception=perception, 549 | layer=layer), 550 | 'visual_parameters':lambda train_or_test, perception, params: 551 | get_global_perception( 552 | task='vqa', train_or_test=train_or_test, 553 | extractor_fun=global_visual_parameters, 554 | perception=perception, 555 | params=params), 556 | 'save_predictions': vqa_save_results 557 | }, 558 | } 559 | 560 | -------------------------------------------------------------------------------- /kraino/utils/parsers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Parsing the input arguments. 3 | 4 | Author: Mateusz Malinowski 5 | Email: mmalinow@mpi-inf.mpg.de 6 | """ 7 | 8 | import argparse 9 | 10 | 11 | ### 12 | # Default arguments 13 | ### 14 | DEFAULT_MAX_INPUT_TIME_STEPS = 32 15 | DEFAULT_MAX_OUTPUT_TIME_STEPS = 5 16 | DEFAULT_MAX_MEMORY_TIME_STEPS = 35 17 | DEFAULT_TRUNCATE_OUTPUT_SPACE = 0 18 | DEFAULT_TRUNCATE_INPUT_SPACE = 0 19 | DEFAULT_NUM_TOP_QA_PAIRS=0 20 | DEFAULT_MAX_ERA = 10 21 | DEFAULT_MAX_EPOCH = 30 22 | DEFAULT_BATCH_SIZE = 755 23 | DEFAULT_HIDDEN_STATE_SIZE = 1000 24 | DEFAULT_VISUAL_HIDDEN_STATE_SIZE = 1000 25 | DEFAULT_TEXTUAL_EMBEDDING_SIZE = 1000 26 | DEFAULT_VISUAL_EMBEDDING_SIZE = 1000 27 | #DEFAULT_ADAPTATION_SIZE = 1000 28 | DEFAULT_MLP_HIDDEN_SIZE = 1000 29 | DEFAULT_NUM_MLP_LAYERS = 0 30 | DEFAULT_NUM_LANGUAGE_LAYERS = 1 31 | #DEFAULT_NUM_ADAPTATION_LAYERS = 1 32 | DEFAULT_TEMPERATURE = 0.001 33 | DEFAULT_VALIDATION_SPLIT = 0.0 34 | DEFAULT_TRAINING_SUBSET = -1 35 | DEFAULT_VAL_SUBSET = -1 36 | DEFAULT_TEST_SUBSET = -1 37 | DEFAULT_REDUCE_RATE = 1.0 38 | DEFAULT_MAX_NUMBER_REDUCTIONS = 10 39 | DEFAULT_LR = -1 40 | DEFAULT_LR_PATIENCE = 5 41 | DEFAULT_FUSION_LAYER_INDEX = 0 42 | DEFAULT_LANGUAGE_CNN_FILTERS = 1000 43 | DEFAULT_LANGUAGE_CNN_FILTER_LENGTH = 3 44 | DEFAULT_LANGUAGE_CNN_ACTIVATION = 'relu' 45 | DEFAULT_LANGUAGE_CNN_VIEWS = 3 46 | DEFAULT_LANGUAGE_MAX_POOL_LENGTH = 2 47 | DEFAULT_VERBOSITY = '' 48 | DEFAULT_WEIGHTS_LOADER_ERA = -1 49 | DEFAULT_MERGE_MODE = 'ave' 50 | DEFAULT_MULTIMODAL_MERGE_MODE = 'concat' 51 | DEFAULT_WORD_REPRESENTATION = 'one_hot' 52 | DEFAULT_OPTIMIZER = 'adam' 53 | DEFAULT_TEXT_ENCODER = 'lstm' 54 | DEFAULT_TEXT_DECODER = 'lstm' 55 | DEFAULT_VISUAL_ENCODER = 'lstm' 56 | DEFAULT_SEQUENCE_REDUCER = 'lstm' 57 | DEFAULT_MEMORY_MATCH_ACTIVATION = 'softmax' 58 | DEFAULT_MLP_ACTIVATION = 'relu' 59 | DEFAULT_PERCEPTION='googlenet' 60 | DEFAULT_PERCEPTION_LAYER='pool5-7x7_s1' 61 | DEFAULT_PERCEPTION_SECOND_LAYER='' 62 | DEFAULT_TRAINABLE_PERCEPTION_NAME='none' 63 | DEFAULT_PARAMS='loss3_classifier' 64 | DEFAULT_WORD_GENERATOR = 'max_likelihood' 65 | DEFAULT_DATASET = 'daquar-triples' 66 | DEFAULT_PARTS_EXTRACTOR = 'whole' 67 | DEFAULT_MODEL = 'sequential-blind-single_answer' 68 | DEFAULT_LOSS = 'categorical_crossentropy' 69 | DEFAULT_METRIC = 'wups' 70 | DEFAULT_VQA_ANSWER_MODE = 'single_random' 71 | DEFAULT_PREDICTION_DATASET_FOLD = 'test' 72 | DEFAULT_VISUALIZATION_URL = 'default' 73 | DEFAULT_VISUALIZATION_FIG_LOSS_TITLE = 'Loss' 74 | DEFAULT_VISUALIZATION_FIG_METRIC_TITLE = 'WUPS scores' 75 | DEFAULT_WEIGHTS_LOADER_NAME = '' 76 | DEFAULT_RESULTS_FILENAME = 'results' 77 | DEFAULT_IS_REVERSE_INPUT=False 78 | DEFAULT_IS_SAVE_WEIGHTS = False 79 | DEFAULT_IS_LR_FIXED_REDUCTION = False 80 | DEFAULT_IS_EARLY_STOPPING = False 81 | DEFAULT_IS_VALIDATION_SET = False 82 | DEFAULT_IS_ONLY_FIRST_ANSWER_WORD = True 83 | DEFAULT_IS_WHOLE_ANSWER_AS_ANSWER_WORD = False 84 | 85 | 86 | ### 87 | # Functions 88 | ### 89 | def parse_input_arguments(): 90 | arg_parser = argparse.ArgumentParser() 91 | arg_parser.add_argument('--max_input_time_steps', 92 | dest='MAX_INPUT_TIME_STEPS', default=DEFAULT_MAX_INPUT_TIME_STEPS, type=int, 93 | help='Maximal number of time steps (word positions) in a question; ' + 94 | 'by default {0}'.format(DEFAULT_MAX_INPUT_TIME_STEPS)) 95 | arg_parser.add_argument('--max_output_time_steps', 96 | dest='MAX_OUTPUT_TIME_STEPS', default=DEFAULT_MAX_OUTPUT_TIME_STEPS, type=int, 97 | help='Maximal number of time steps (word positions) in an answer; ' + 98 | 'by default {0}'.format(DEFAULT_MAX_OUTPUT_TIME_STEPS)) 99 | arg_parser.add_argument('--max_visual_time_steps', 100 | dest='MAX_MEMORY_TIME_STEPS', default=DEFAULT_MAX_MEMORY_TIME_STEPS, type=int, 101 | help='Maximal number of memory time steps in the input; ' + 102 | 'by default {0}'.format(DEFAULT_MAX_MEMORY_TIME_STEPS)) 103 | arg_parser.add_argument('--truncate_output_space', 104 | dest='TRUNCATE_OUTPUT_SPACE', default=DEFAULT_TRUNCATE_OUTPUT_SPACE, type=int, 105 | help='Restrict the output space to the most frequent items if positive, otherwise all output items; ' + 106 | 'by default {0}'.format(DEFAULT_TRUNCATE_OUTPUT_SPACE)) 107 | arg_parser.add_argument('--truncate_input_space', 108 | dest='TRUNCATE_INPUT_SPACE', default=DEFAULT_TRUNCATE_INPUT_SPACE, type=int, 109 | help='Restrict the input space to the most frequent items if positive, otherwise all input items; ' + 110 | 'by default {0}'.format(DEFAULT_TRUNCATE_INPUT_SPACE)) 111 | arg_parser.add_argument('--number_most_frequent_qa_pairs', 112 | dest='NUM_TOP_QA_PAIRS', default=DEFAULT_NUM_TOP_QA_PAIRS, type=int, 113 | help='Restrict number of answers to K most frequent if positive, otherwise no restrictions; ' + 114 | 'by default {0}'.format(DEFAULT_NUM_TOP_QA_PAIRS)) 115 | arg_parser.add_argument('--max_era', 116 | dest='MAX_ERA', default=DEFAULT_MAX_ERA, type=int, 117 | help='Number of eras to loop over; one era contains many epochs; ' + 118 | 'by default {0}'.format(DEFAULT_MAX_ERA)) 119 | arg_parser.add_argument('--max_epoch', 120 | dest='MAX_EPOCH', default=DEFAULT_MAX_EPOCH, type=int, 121 | help='Number of epochs; by default {0}'.format(DEFAULT_MAX_EPOCH)) 122 | arg_parser.add_argument('--batch_size', 123 | dest='BATCH_SIZE', default=DEFAULT_BATCH_SIZE, type=int, 124 | help='Number of samples in every batch; ' + 125 | 'by default {0}'.format(DEFAULT_BATCH_SIZE)) 126 | arg_parser.add_argument('--hidden_state_size', 127 | dest='HIDDEN_STATE_SIZE', default=DEFAULT_HIDDEN_STATE_SIZE, type=int, 128 | help='Size of the hidden state; by default {0}'.format(DEFAULT_HIDDEN_STATE_SIZE)) 129 | arg_parser.add_argument('--visual_hidden_state_size', 130 | dest='VISUAL_HIDDEN_STATE_SIZE', default=DEFAULT_VISUAL_HIDDEN_STATE_SIZE, type=int, 131 | help='Size of the visual hidden state; by default {0}'.format(DEFAULT_VISUAL_HIDDEN_STATE_SIZE)) 132 | arg_parser.add_argument('--textual_embedding_size', 133 | dest='TEXTUAL_EMBEDDING_SIZE', default=DEFAULT_TEXTUAL_EMBEDDING_SIZE, type=int, 134 | help='Size of the embedding layer; if 0 then no embedding is applied; by default {0}'.format(DEFAULT_TEXTUAL_EMBEDDING_SIZE)) 135 | arg_parser.add_argument('--visual_embedding_size', 136 | dest='VISUAL_EMBEDDING_SIZE', default=DEFAULT_VISUAL_EMBEDDING_SIZE, type=int, 137 | help='Size of the visual embedding layer; by default {0}'.format(DEFAULT_VISUAL_EMBEDDING_SIZE)) 138 | #arg_parser.add_argument('--adaptation_size', 139 | #dest='ADAPTATION_SIZE', default=DEFAULT_ADAPTATION_SIZE, type=int, 140 | #help='Size of the adaptation layer; by default {0}'.format(DEFAULT_ADAPTATION_SIZE)) 141 | arg_parser.add_argument('--mlp_hidden_size', 142 | dest='MLP_HIDDEN_SIZE', default=DEFAULT_MLP_HIDDEN_SIZE, type=int, 143 | help='Size of the MLP layer; by default {0}'.format(DEFAULT_MLP_HIDDEN_SIZE)) 144 | arg_parser.add_argument('--num_mlp_layers', 145 | dest='NUM_MLP_LAYERS', default=DEFAULT_NUM_MLP_LAYERS, type=int, 146 | help='Number of MLP layers; by default {0}'.format(DEFAULT_NUM_MLP_LAYERS)) 147 | arg_parser.add_argument('--num_language_layers', 148 | dest='NUM_LANGUAGE_LAYERS', default=DEFAULT_NUM_LANGUAGE_LAYERS, type=int, 149 | help='Number of language layers; by default {0}'.format(DEFAULT_NUM_LANGUAGE_LAYERS)) 150 | #arg_parser.add_argument('--num_adaptation_layers', 151 | #dest='NUM_ADAPTATION_LAYERS', default=DEFAULT_NUM_ADAPTATION_LAYERS, type=int, 152 | #help='Number of adaptation layers; by default {0}'.format(DEFAULT_NUM_ADAPTATION_LAYERS)) 153 | arg_parser.add_argument('--temperature', 154 | dest='TEMPERATURE', default=DEFAULT_TEMPERATURE, type=float, 155 | help='Temperature for the predictions; the colder the more conservative (confident) answers; ' + 156 | 'by default {0}'.format(DEFAULT_TEMPERATURE)) 157 | arg_parser.add_argument('--validation_split', 158 | dest='VALIDATION_SPLIT', default=DEFAULT_VALIDATION_SPLIT, type=float, 159 | help='Fraction of training data used for validation; by default {0}'.\ 160 | format(DEFAULT_VALIDATION_SPLIT)) 161 | arg_parser.add_argument('--training_subset_size', 162 | dest='TRAINING_SUBSET_SIZE', default=DEFAULT_TRAINING_SUBSET, type=int, 163 | help='Size of the training subset, only for the monitoring if verbosity is set; ' + 164 | 'by default {0}'.format(DEFAULT_TRAINING_SUBSET)) 165 | arg_parser.add_argument('--validation_subset_size', 166 | dest='VAL_SUBSET_SIZE', default=DEFAULT_VAL_SUBSET, type=int, 167 | help='Size of the validation subset, only for the monitoring if verbosity is set; ' + 168 | 'by default {0}'.format(DEFAULT_VAL_SUBSET)) 169 | arg_parser.add_argument('--test_subset_size', 170 | dest='TEST_SUBSET_SIZE', default=DEFAULT_TEST_SUBSET, type=int, 171 | help='Size of the test subset, only for the monitoring if verbosity is set; ' + 172 | 'by default {0}'.format(DEFAULT_TEST_SUBSET)) 173 | arg_parser.add_argument('--reduce_rate', 174 | dest='REDUCE_RATE', default=DEFAULT_REDUCE_RATE, type=float, 175 | help='Reduce learning rate; by default {0}'.format(DEFAULT_REDUCE_RATE)) 176 | arg_parser.add_argument('--max_number_reductions', 177 | dest='MAX_NUMBER_REDUCTIONS', default=DEFAULT_MAX_NUMBER_REDUCTIONS, type=int, 178 | help='Maximal number of reductions; by default {0}'.format(DEFAULT_MAX_NUMBER_REDUCTIONS)) 179 | arg_parser.add_argument('--lr', 180 | dest='LR', default=DEFAULT_LR, type=float, 181 | help='Learning rate if positive, otherwise default values for individual solvers are considered; by default {0}'.format(DEFAULT_LR)) 182 | arg_parser.add_argument('--lr_patience', 183 | dest='LR_PATIENCE', default=DEFAULT_LR_PATIENCE, type=int, 184 | help='Patience (allowed number of epochs in stagnations); by default {0}'.format(DEFAULT_LR_PATIENCE)) 185 | arg_parser.add_argument('--fusion_layer_index', 186 | dest='FUSION_LAYER_INDEX', default=DEFAULT_FUSION_LAYER_INDEX, type=int, 187 | help='Index of the language layer where the multimodal fusion happens; by default {0}'.format(DEFAULT_FUSION_LAYER_INDEX)) 188 | arg_parser.add_argument('--language_cnn_filter_size', 189 | dest='LANGUAGE_CNN_FILTERS', default=DEFAULT_LANGUAGE_CNN_FILTERS, type=int, 190 | help='Number of filters for CNN language (dimensionality of the CNN output); by default {0}'.format(DEFAULT_LANGUAGE_CNN_FILTERS)) 191 | arg_parser.add_argument('--language_cnn_filter_length', 192 | dest='LANGUAGE_CNN_FILTER_LENGTH', default=DEFAULT_LANGUAGE_CNN_FILTER_LENGTH, type=int, 193 | help='Size of receptive field of the language CNN filters; by default {0}'.format(DEFAULT_LANGUAGE_CNN_FILTER_LENGTH)) 194 | arg_parser.add_argument('--language_cnn_activation', 195 | dest='LANGUAGE_CNN_ACTIVATION', default=DEFAULT_LANGUAGE_CNN_ACTIVATION, type=str, 196 | help='Activation for CNN language; by default {0}'.format(DEFAULT_LANGUAGE_CNN_ACTIVATION)) 197 | arg_parser.add_argument('--language_cnn_views', 198 | dest='LANGUAGE_CNN_VIEWS', default=DEFAULT_LANGUAGE_CNN_VIEWS, type=int, 199 | help='Number of CNN views (e.g. two views are unigram and bigram); by default {0}'.format(DEFAULT_LANGUAGE_CNN_VIEWS)) 200 | arg_parser.add_argument('--language_max_pool_length', 201 | dest='LANGUAGE_MAX_POOL_LENGTH', default=DEFAULT_LANGUAGE_MAX_POOL_LENGTH, type=int, 202 | help='Size of receptive field of max pooling; by default {0}'.format(DEFAULT_LANGUAGE_MAX_POOL_LENGTH)) 203 | arg_parser.add_argument('--verbosity', 204 | dest='VERBOSITY', default=DEFAULT_VERBOSITY, type=str, 205 | help='Verbosity level with values separated by colon, there are: monitor_training_prediction; ' + 206 | 'by default {0}'.format(DEFAULT_VERBOSITY)) 207 | arg_parser.add_argument('--weights_loader_era', 208 | dest='WEIGHTS_LOADER_ERA', default=DEFAULT_WEIGHTS_LOADER_ERA, type=int, 209 | help='If non-negative then it sets the era to load the weights, otherwise no model is loaded; ' + 210 | 'by default {0}'.format(DEFAULT_WEIGHTS_LOADER_ERA)) 211 | arg_parser.add_argument('--results_filename', 212 | dest='RESULTS_FILENAME', default=DEFAULT_RESULTS_FILENAME, type=str, 213 | help='Filename where the results of the predictions are stored; by default {0}'.format(DEFAULT_RESULTS_FILENAME)) 214 | arg_parser.add_argument('--temporal_fusion', 215 | dest='MERGE_MODE', default=DEFAULT_MERGE_MODE, type=str, 216 | help='Temporal merging mode {0}'.format(DEFAULT_MERGE_MODE)) 217 | arg_parser.add_argument('--multimodal_fusion', 218 | dest='MULTIMODAL_MERGE_MODE', default=DEFAULT_MULTIMODAL_MERGE_MODE, type=str, 219 | help='Multimodal merging mode {0}'.format(DEFAULT_MULTIMODAL_MERGE_MODE)) 220 | arg_parser.add_argument('--word_representation', 221 | dest='WORD_REPRESENTATION', default=DEFAULT_WORD_REPRESENTATION, type=str, 222 | help='Word representation; by default {0}'.format(DEFAULT_WORD_REPRESENTATION)) 223 | arg_parser.add_argument('--optimizer', 224 | dest='OPTIMIZER', default=DEFAULT_OPTIMIZER, type=str, 225 | help='Optimization algorithm for training; by default {0}'.format(DEFAULT_OPTIMIZER)) 226 | arg_parser.add_argument('--text_encoder', 227 | dest='TEXT_ENCODER', default=DEFAULT_TEXT_ENCODER, type=str, 228 | help='Kind of used encoder; by default {0}'.format(DEFAULT_TEXT_ENCODER)) 229 | arg_parser.add_argument('--text_decoder', 230 | dest='TEXT_DECODER', default=DEFAULT_TEXT_DECODER, type=str, 231 | help='Kind of used decoder, valid only in encoder-decoder architectures; ' + 232 | 'by default {0}'.format(DEFAULT_TEXT_DECODER)) 233 | arg_parser.add_argument('--visual_encoder', 234 | dest='VISUAL_ENCODER', default=DEFAULT_VISUAL_ENCODER, type=str, 235 | help='Kind of used visual encoder, valid only in memory-based encoder-decoder architectures; ' + 236 | 'by default {0}'.format(DEFAULT_VISUAL_ENCODER)) 237 | arg_parser.add_argument('--sequence_reducer', 238 | dest='SEQUENCE_REDUCER', default=DEFAULT_SEQUENCE_REDUCER, type=str, 239 | help='Kind of used sequence reducer, valid only in memory-based encoder-decoder architectures; ' + 240 | 'by default {0}'.format(DEFAULT_SEQUENCE_REDUCER)) 241 | arg_parser.add_argument('--memory_match_activation', 242 | dest='MEMORY_MATCH_ACTIVATION', default=DEFAULT_MEMORY_MATCH_ACTIVATION, type=str, 243 | help='Kind of used memory match activation, valid only in memory-based architectures; ' + 244 | 'by default {0}'.format(DEFAULT_MEMORY_MATCH_ACTIVATION)) 245 | arg_parser.add_argument('--mlp_activation', 246 | dest='MLP_ACTIVATION', default=DEFAULT_MLP_ACTIVATION, type=str, 247 | help='Kind of used MLP activation unit; by default {0}'.format(DEFAULT_MLP_ACTIVATION)) 248 | arg_parser.add_argument('--perception', 249 | dest='PERCEPTION', default=DEFAULT_PERCEPTION, type=str, 250 | help='Kind of a pre-trained perception model used; by default {0}'.format(DEFAULT_PERCEPTION)) 251 | arg_parser.add_argument('--perception_layer', 252 | dest='PERCEPTION_LAYER', default=DEFAULT_PERCEPTION_LAYER, type=str, 253 | help='Kind of layer in the pre-trained perception used; by default {0}'.format(DEFAULT_PERCEPTION_LAYER)) 254 | arg_parser.add_argument('--perception_second_layer', 255 | dest='PERCEPTION_SECOND_LAYER', default=DEFAULT_PERCEPTION_SECOND_LAYER, type=str, 256 | help='Kind of layer in the pre-trained perception used; by default {0}'.format(DEFAULT_PERCEPTION_SECOND_LAYER)) 257 | arg_parser.add_argument('--trainable_perception', 258 | dest='TRAINABLE_PERCEPTION_NAME', default=DEFAULT_TRAINABLE_PERCEPTION_NAME, type=str, 259 | help='Perception that is used to train or fine-tune, or none if we want to rely on a pre-trained perception; '\ 260 | + ' by default {0}'.format(DEFAULT_TRAINABLE_PERCEPTION_NAME)) 261 | arg_parser.add_argument('--params', 262 | dest='PARAMS', default=DEFAULT_PARAMS, type=str, 263 | help='Kind of params in the perception used; by default {0}'.format(DEFAULT_PARAMS)) 264 | arg_parser.add_argument('--word_generator', 265 | dest='WORD_GENERATOR', default=DEFAULT_WORD_GENERATOR, type=str, 266 | help='Procedure to generate single words; ' + 267 | 'by default {0}'.format(DEFAULT_WORD_GENERATOR)) 268 | arg_parser.add_argument('--dataset', 269 | dest='DATASET', default=DEFAULT_DATASET, type=str, 270 | help='Kind of used dataset; by default {0}'.format(DEFAULT_DATASET)) 271 | arg_parser.add_argument('--parts_extractor', 272 | dest='PARTS_EXTRACTOR', default=DEFAULT_PARTS_EXTRACTOR, type=str, 273 | help='Kind of parts extractor; only if image parts are concerned; by default {0}'.format(DEFAULT_PARTS_EXTRACTOR)) 274 | arg_parser.add_argument('--model', 275 | dest='MODEL', default=DEFAULT_MODEL, type=str, 276 | help='Kind of used model; by default {0}'.format(DEFAULT_MODEL)) 277 | arg_parser.add_argument('--loss', 278 | dest='LOSS', default=DEFAULT_LOSS, type=str, 279 | help='Kind of used loss; by default {0}'.format(DEFAULT_LOSS)) 280 | arg_parser.add_argument('--metric', 281 | dest='METRIC', default=DEFAULT_METRIC, type=str, 282 | help='Kind of used metric; by default {0}'.format(DEFAULT_METRIC)) 283 | arg_parser.add_argument('--vqa_answer_mode', 284 | dest='VQA_ANSWER_MODE', default=DEFAULT_VQA_ANSWER_MODE, type=str, 285 | help='VQA answer mode; by default {0}'.format(DEFAULT_VQA_ANSWER_MODE)) 286 | arg_parser.add_argument('--prediction_dataset_fold', 287 | dest='PREDICTION_DATASET_FOLD', default=DEFAULT_PREDICTION_DATASET_FOLD, type=str, 288 | help='Dataset chosen for predictions; by default {0}'.format(DEFAULT_PREDICTION_DATASET_FOLD)) 289 | arg_parser.add_argument('--visualization_url', 290 | dest='VISUALIZATION_URL', default=DEFAULT_VISUALIZATION_URL, type=str, 291 | help='Bokeh url; by default {0}'.format(DEFAULT_VISUALIZATION_URL)) 292 | arg_parser.add_argument('--visualization_fig_loss_title', 293 | dest='VISUALIZATION_FIG_LOSS_TITLE', default=DEFAULT_VISUALIZATION_FIG_LOSS_TITLE, type=str, 294 | help='Bokeh loss figure title; by default {0}'.format(DEFAULT_VISUALIZATION_FIG_LOSS_TITLE)) 295 | arg_parser.add_argument('--visualization_fig_metric_title', 296 | dest='VISUALIZATION_FIG_METRIC_TITLE', default=DEFAULT_VISUALIZATION_FIG_METRIC_TITLE, type=str, 297 | help='Bokeh metric figure title; by default {0}'.format(DEFAULT_VISUALIZATION_FIG_METRIC_TITLE)) 298 | arg_parser.add_argument('--weights_loader_name', 299 | dest='WEIGHTS_LOADER_NAME', default=DEFAULT_WEIGHTS_LOADER_NAME, type=str, 300 | help='The main name for the weights loader; by default {0}'.format(DEFAULT_WEIGHTS_LOADER_NAME)) 301 | # boolean arguments 302 | arg_parser.add_argument('--reverse_input', 303 | dest='IS_REVERSE_INPUT', action='store_true', 304 | help='If it is set up then the input is processed in a reverse order ' + 305 | 'by default {0}'.format('--reverse_input' if DEFAULT_IS_REVERSE_INPUT else 'no_reverse_input')) 306 | arg_parser.add_argument('--no_reverse_input', 307 | dest='IS_REVERSE_INPUT', action='store_false', 308 | help='If it is set up then the input is processed in a reverse order ' + 309 | 'by default {0}'.format('--reverse_input' if DEFAULT_IS_REVERSE_INPUT else 'no_reverse_input')) 310 | arg_parser.set_defaults(IS_REVERSE_INPUT=DEFAULT_IS_REVERSE_INPUT) 311 | arg_parser.add_argument('--store_weights', 312 | dest='IS_SAVE_WEIGHTS', action='store_true', 313 | help='If it is set up then the weights are saved in each era; ' + 314 | 'by default {0}'.format('store_weights' if DEFAULT_IS_SAVE_WEIGHTS else 'no_store_weights')) 315 | arg_parser.add_argument('--no_store_weights', 316 | dest='IS_SAVE_WEIGHTS', action='store_false', 317 | help='If it is set up then the weights are forgotten; ' + 318 | 'by default {0}'.format('store_weights' if DEFAULT_IS_SAVE_WEIGHTS else 'no_store_weights')) 319 | arg_parser.set_defaults(IS_SAVE_WEIGHTS=DEFAULT_IS_SAVE_WEIGHTS) 320 | arg_parser.add_argument('--lr_fixed_reduction', 321 | dest='IS_LR_FIXED_REDUCTION', action='store_true', 322 | help='If it is set up early stopping is applied based on val acc; ' + 323 | 'by default {0}'.format('early_stopping' if DEFAULT_IS_LR_FIXED_REDUCTION else 'no_early_stopping')) 324 | arg_parser.add_argument('--no_lr_fixed_reduction', 325 | dest='IS_LR_FIXED_REDUCTION', action='store_false', 326 | help='If it is set up early stopping is applied based on val acc; ' + 327 | 'by default {0}'.format('early_stopping' if DEFAULT_IS_LR_FIXED_REDUCTION else 'no_early_stopping')) 328 | arg_parser.set_defaults(IS_LR_FIXED_REDUCTION=DEFAULT_IS_LR_FIXED_REDUCTION) 329 | arg_parser.add_argument('--early_stopping', 330 | dest='IS_EARLY_STOPPING', action='store_true', 331 | help='If it is set up early stopping is applied based on val acc; ' + 332 | 'by default {0}'.format('early_stopping' if DEFAULT_IS_EARLY_STOPPING else 'no_early_stopping')) 333 | arg_parser.add_argument('--no_early_stopping', 334 | dest='IS_EARLY_STOPPING', action='store_false', 335 | help='If it is set up early stopping is applied based on val acc; ' + 336 | 'by default {0}'.format('early_stopping' if DEFAULT_IS_EARLY_STOPPING else 'no_early_stopping')) 337 | arg_parser.set_defaults(IS_EARLY_STOPPING=DEFAULT_IS_EARLY_STOPPING) 338 | arg_parser.add_argument('--use_validation', 339 | dest='IS_VALIDATION_SET', action='store_true', 340 | help='If it is set up then the validation set is used; ' + 341 | 'by default {0}'.format('use_validation' if DEFAULT_IS_VALIDATION_SET else 'no_validation')) 342 | arg_parser.add_argument('--no_validation', 343 | dest='IS_VALIDATION_SET', action='store_false', 344 | help='If it is set up then there is no validation set; ' + 345 | 'by default {0}'.format('use_validation' if DEFAULT_IS_VALIDATION_SET else 'no_validation')) 346 | arg_parser.set_defaults(IS_VALIDATION_SET=DEFAULT_IS_VALIDATION_SET) 347 | arg_parser.add_argument('--use_first_answer_words', 348 | dest='IS_ONLY_FIRST_ANSWER_WORD', action='store_true', 349 | help='If it is set up then first answer words are considered (otherwise, all); ' + 350 | 'by default {0}'.format('use_first_answer_words' if DEFAULT_IS_ONLY_FIRST_ANSWER_WORD else 'use_all_answer_words')) 351 | arg_parser.add_argument('--use_all_answer_words', 352 | dest='IS_ONLY_FIRST_ANSWER_WORD', action='store_false', 353 | help='If it is set up then all answer words are considered (otherwise, only the first); ' + 354 | 'by default {0}'.format('use_first_answer_words' if DEFAULT_IS_ONLY_FIRST_ANSWER_WORD else 'use_all_answer_words')) 355 | arg_parser.set_defaults(IS_ONLY_FIRST_ANSWER_WORD=DEFAULT_IS_ONLY_FIRST_ANSWER_WORD) 356 | arg_parser.add_argument('--use_whole_answer_as_answer_word', 357 | dest='IS_WHOLE_ANSWER_AS_ANSWER_WORD', action='store_true', 358 | help='If it is set up then one answer words is the whole answer; ' + 359 | 'by default {0}'.format('answer word is the whole answer' if DEFAULT_IS_WHOLE_ANSWER_AS_ANSWER_WORD else 'split answer into answer words')) 360 | arg_parser.add_argument('--split_answer_into_answer_words', 361 | dest='IS_WHOLE_ANSWER_AS_ANSWER_WORD', action='store_false', 362 | help='If it is set up then one answer words is the whole answer; ' + 363 | 'by default {0}'.format('answer word is the whole answer' if DEFAULT_IS_WHOLE_ANSWER_AS_ANSWER_WORD else 'split answer into answer words')) 364 | arg_parser.set_defaults(IS_WHOLE_ANSWER_AS_ANSWER_WORD=DEFAULT_IS_WHOLE_ANSWER_AS_ANSWER_WORD) 365 | # not-working arguments 366 | #arg_parser.add_argument('--gpu_core', 367 | #dest='GPU_CORE', default=-1, type=int, 368 | #help='GPU Core, if -1 then the core is read from the config file') 369 | args = arg_parser.parse_args() 370 | 371 | return args 372 | 373 | -------------------------------------------------------------------------------- /neural_solver.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | """ 5 | QA model. 6 | Uses embedding. 7 | 8 | Implementation in Keras. 9 | 10 | Author: Mateusz Malinowski 11 | Email: mmalinow@mpi-inf.mpg.de 12 | """ 13 | 14 | import os 15 | import numpy as np 16 | 17 | from socket import gethostname 18 | from spacy.en import English 19 | from toolz import compose 20 | from toolz import frequencies 21 | from timeit import default_timer as timer 22 | 23 | from keras.optimizers import Adadelta 24 | from keras.optimizers import Adagrad 25 | from keras.optimizers import Adam 26 | from keras.optimizers import RMSprop 27 | from keras.optimizers import SGD 28 | 29 | from keras.preprocessing import sequence 30 | 31 | from kraino.core import recurrent_net 32 | from kraino.core import model_zoo 33 | from kraino.core.model_zoo import Config 34 | from kraino.core.visual_model_zoo import get_visual_features 35 | from kraino.core.visual_model_zoo import imagenet_mean_preprocess_image_tensor_fun 36 | 37 | from kraino.utils import data_provider 38 | from kraino.utils.read_write import pickle_model 39 | #from kraino.utils.read_write import model_to_json 40 | from kraino.utils.parsers import parse_input_arguments 41 | from kraino.utils.callbacks import StoreModelWeightsOnEraEnd 42 | from kraino.utils.callbacks import PrintOnEraBegin 43 | from kraino.utils.callbacks import PrintPerformanceMetricOnEraEnd 44 | from kraino.utils.callbacks import MonitorPredictionsOnEndEra 45 | #from kraino.utils.callbacks import PlotPerformanceMetricOnEraEnd 46 | #from kraino.utils.callbacks import StandardPerformancePlot 47 | from kraino.utils.callbacks import LearningRateReducerWithEarlyStopping 48 | from kraino.utils.callbacks import LearningRateReducerEveryPatienceEpoch 49 | from kraino.utils.input_output_space import build_vocabulary 50 | from kraino.utils.input_output_space import encode_questions_index 51 | from kraino.utils.input_output_space import encode_questions_dense 52 | from kraino.utils.input_output_space import encode_answers_one_hot 53 | #from kraino.utils.model_visualization import model_picture 54 | 55 | from theano import config as theano_config 56 | 57 | 58 | ### 59 | # Constants 60 | ### 61 | # path to the outputted verbosity 62 | VERBOSITY_PATH_PREFIX = os.path.join('local', 'logs', 'verbosity') 63 | 64 | # class normalized logs 65 | CLASS_NORMALIZED_PATH_PREFIX = os.path.join('local', 'logs', 'class_normalized') 66 | 67 | # path where the weights are saved 68 | WEIGHTS_PATH_PREFIX = os.path.join('local', 'weights', 'weights') 69 | 70 | OPTIMIZERS = { \ 71 | 'sgd':SGD, 72 | 'adagrad':Adagrad, 73 | 'adadelta':Adadelta, 74 | 'rmsprop':RMSprop, 75 | 'adam':Adam, 76 | } 77 | ### 78 | 79 | ### 80 | # Functions 81 | ### 82 | def main(params): 83 | verbosity_tmp = params['VERBOSITY'] 84 | 85 | # seperate verbosity levels by ':' or ',' or ' ' (space) 86 | if not verbosity_tmp == '': 87 | if ':' in verbosity_tmp : 88 | delimiter = ':' 89 | elif ',' in verbosity_tmp: 90 | delimiter = ',' 91 | else: 92 | delimiter = ' ' 93 | verbosity = verbosity_tmp.split(delimiter) 94 | 95 | verbosity_path_longprefix = VERBOSITY_PATH_PREFIX + \ 96 | '.' + params['MODEL'] + '.' + params['DATASET'] + \ 97 | '.' + gethostname() + '.' + theano_config.device + \ 98 | '.epoch_{epoch:02d}.era_{era:02d}' 99 | 100 | class_normalized_path_longprefix = CLASS_NORMALIZED_PATH_PREFIX + \ 101 | '.' + params['MODEL'] + '.' + params['DATASET'] + \ 102 | '.' + gethostname() + '.' + theano_config.device + \ 103 | '.epoch_{epoch:02d}.era_{era:02d}' 104 | else: 105 | verbosity = [] 106 | 107 | model_path=os.path.join('local', 'models', 'model.{0}.{1}.pkl'.format( 108 | params['MODEL'], params['DATASET'])) 109 | 110 | era_weights_loader = params['WEIGHTS_LOADER_ERA'] 111 | if era_weights_loader >= 0: 112 | is_load_weights = True 113 | else: 114 | is_load_weights = False 115 | 116 | if params['WEIGHTS_LOADER_NAME'] == '': 117 | weights_loader_name = gethostname() 118 | else: 119 | weights_loader_name = params['WEIGHTS_LOADER_NAME'] 120 | weights_path = WEIGHTS_PATH_PREFIX + '.' + \ 121 | params['MODEL'] + '.' + params['DATASET'] + \ 122 | '.' + weights_loader_name + \ 123 | '.epoch_{epoch:02d}.era_{era:02d}.hdf5' 124 | 125 | ### 126 | # Load the dataset 127 | ### 128 | dp = data_provider.select[params['DATASET']] 129 | train_dataset = dp['text']( 130 | train_or_test='train', 131 | answer_mode=params['VQA_ANSWER_MODE'], 132 | keep_top_qa_pairs=params['NUM_TOP_QA_PAIRS']) 133 | train_x, train_y = train_dataset['x'], train_dataset['y'] 134 | print('Number of training examples {0}'.format(len(train_x))) 135 | train_image_names = train_dataset['img_name'] 136 | train_visual_features = get_visual_features( 137 | data_provider=dp, 138 | trainable_perception_name=params['TRAINABLE_PERCEPTION_NAME'], 139 | train_or_test='train', 140 | image_names_list=train_image_names, 141 | parts_extractor=params['PARTS_EXTRACTOR'], 142 | max_parts=params['MAX_MEMORY_TIME_STEPS'], 143 | perception=params['PERCEPTION'], 144 | layer=params['PERCEPTION_LAYER'], 145 | second_layer=params['PERCEPTION_SECOND_LAYER'], 146 | preprocess_image_tensor_fun=imagenet_mean_preprocess_image_tensor_fun) 147 | train_question_id = train_dataset['question_id'] 148 | if np.any([params['IS_VALIDATION_SET'], 149 | 'monitor_val_metric' in verbosity, 150 | 'monitor_val_predictions' in verbosity, 151 | 'plot_val_metric' in verbosity]): 152 | val_dataset = dp['text']( 153 | train_or_test='val', 154 | answer_mode='single_confident', 155 | keep_top_qa_pairs=0) 156 | val_x, val_y = val_dataset['x'], val_dataset['y'] 157 | val_question_id = val_dataset['question_id'] 158 | val_image_names = val_dataset['img_name'] 159 | val_visual_features = get_visual_features( 160 | data_provider=dp, 161 | trainable_perception_name=params['TRAINABLE_PERCEPTION_NAME'], 162 | train_or_test='val', 163 | image_names_list=val_image_names, 164 | parts_extractor=params['PARTS_EXTRACTOR'], 165 | max_parts=params['MAX_MEMORY_TIME_STEPS'], 166 | perception=params['PERCEPTION'], 167 | layer=params['PERCEPTION_LAYER'], 168 | second_layer=params['PERCEPTION_SECOND_LAYER'], 169 | preprocess_image_tensor_fun=imagenet_mean_preprocess_image_tensor_fun) 170 | if np.any(['monitor_test_metric' in verbosity, 171 | 'monitor_test_predictions' in verbosity, 172 | 'plot_test_metric' in verbosity]): 173 | test_dataset = dp['text']( 174 | train_or_test='test', 175 | answer_mode='single_confident', 176 | keep_top_qa_pairs=0) 177 | test_x, test_y = test_dataset['x'], test_dataset['y'] 178 | test_question_id = test_dataset['question_id'] 179 | test_image_names = test_dataset['img_name'] 180 | test_visual_features = get_visual_features( 181 | data_provider=dp, 182 | trainable_perception_name=params['TRAINABLE_PERCEPTION_NAME'], 183 | train_or_test='test', 184 | image_names_list=test_image_names, 185 | parts_extractor=params['PARTS_EXTRACTOR'], 186 | max_parts=params['MAX_MEMORY_TIME_STEPS'], 187 | perception=params['PERCEPTION'], 188 | layer=params['PERCEPTION_LAYER'], 189 | second_layer=params['PERCEPTION_SECOND_LAYER'], 190 | preprocess_image_tensor_fun=imagenet_mean_preprocess_image_tensor_fun) 191 | 192 | ### 193 | # Building vocabularies 194 | ### 195 | split_symbol = '{' 196 | if type(train_x[0]) is unicode: 197 | # choose a split symbol that doesn't exist in text 198 | split_function = lambda x: unicode.split(x, split_symbol) 199 | elif type(train_x[0]) is str: 200 | split_function = lambda x: str.split(x, split_symbol) 201 | else: 202 | raise NotImplementedError() 203 | 204 | wordcount = compose(frequencies, split_function) 205 | wordcount_x = wordcount(split_symbol.join(train_x).replace(' ',split_symbol)) 206 | if params['IS_WHOLE_ANSWER_AS_ANSWER_WORD']: 207 | wordcount_y = wordcount(split_symbol.join(train_y)) 208 | else: 209 | wordcount_y = wordcount(split_symbol.join(train_y).replace( 210 | train_dataset['answer_words_delimiter'],split_symbol)) 211 | 212 | word2index_x, index2word_x = build_vocabulary( 213 | this_wordcount=wordcount_x, 214 | is_reset=True, 215 | truncate_to_most_frequent=params['TRUNCATE_INPUT_SPACE']) 216 | word2index_y, index2word_y = build_vocabulary( 217 | this_wordcount=wordcount_y, 218 | is_reset=True, 219 | truncate_to_most_frequent=params['TRUNCATE_OUTPUT_SPACE']) 220 | 221 | print('Size of the input {0}, and output vocabularies {1}'.\ 222 | format(len(word2index_x), len(word2index_y))) 223 | 224 | # save vocabulary 225 | ### 226 | 227 | ### 228 | # Building input/output 229 | # Dimensions: 230 | # data points 231 | # time steps 232 | # encodings of the words 233 | ### 234 | if params['WORD_REPRESENTATION'] == 'one_hot': 235 | one_hot_x = encode_questions_index(train_x, word2index_x) 236 | X_train = sequence.pad_sequences(one_hot_x, maxlen=params['MAX_INPUT_TIME_STEPS']) 237 | elif params['WORD_REPRESENTATION'] == 'dense': 238 | word_encoder = English() 239 | X_train = encode_questions_dense( 240 | x=train_x, 241 | word_encoder=word_encoder, 242 | max_time_steps=params['MAX_INPUT_TIME_STEPS'], 243 | is_remove_question_symbol=True) 244 | else: 245 | raise NotImplementedError() 246 | if params['IS_WHOLE_ANSWER_AS_ANSWER_WORD']: 247 | train_answer_words_delimiter = None 248 | else: 249 | train_answer_words_delimiter = train_dataset['answer_words_delimiter'] 250 | Y, train_y_gt = encode_answers_one_hot(train_y, word2index_y, 251 | max_answer_time_steps=params['MAX_OUTPUT_TIME_STEPS'], 252 | is_only_first_answer_word=params['IS_ONLY_FIRST_ANSWER_WORD'], 253 | answer_words_delimiter=train_answer_words_delimiter) 254 | 255 | if '-bidirectional-' in params['MODEL']: 256 | train_input = [X_train] * 2 257 | elif '-cnn_3views-' in params['MODEL']: 258 | train_input = [X_train] * 3 259 | elif '-cnn_kviews-' in params['MODEL']: 260 | train_input = [X_train] * params['LANGUAGE_CNN_VIEWS'] 261 | else: 262 | train_input = [X_train] 263 | 264 | if '-multimodal-' in params['MODEL']: 265 | train_input.append(train_visual_features) 266 | 267 | if np.any([params['IS_VALIDATION_SET'], 268 | 'monitor_val_metric' in verbosity, 269 | 'monitor_val_predictions' in verbosity, 270 | 'plot_val_metric' in verbosity]): 271 | if params['WORD_REPRESENTATION'] == 'one_hot': 272 | one_hot_x_val= encode_questions_index(val_x, word2index_x) 273 | X_val = sequence.pad_sequences(one_hot_x_val, 274 | maxlen=params['MAX_INPUT_TIME_STEPS']) 275 | elif params['WORD_REPRESENTATION'] == 'dense': 276 | X_val = encode_questions_dense( 277 | x=val_x, 278 | word_encoder=word_encoder, 279 | max_time_steps=params['MAX_INPUT_TIME_STEPS'], 280 | is_remove_question_symbol=True) 281 | else: 282 | NotImplementedError() 283 | 284 | if params['IS_WHOLE_ANSWER_AS_ANSWER_WORD']: 285 | val_answer_words_delimiter = None 286 | else: 287 | val_answer_words_delimiter = val_dataset['answer_words_delimiter'] 288 | Y_val, _ = encode_answers_one_hot(val_y, word2index_y, 289 | max_answer_time_steps=params['MAX_OUTPUT_TIME_STEPS'], 290 | is_only_first_answer_word=params['IS_ONLY_FIRST_ANSWER_WORD'], 291 | answer_words_delimiter=val_answer_words_delimiter) 292 | if '-bidirectional-' in params['MODEL']: 293 | val_input = [X_val] * 2 294 | elif '-cnn_3views-' in params['MODEL']: 295 | val_input = [X_val] * 3 296 | elif '-cnn_kviews-' in params['MODEL']: 297 | val_input = [X_val] * params['LANGUAGE_CNN_VIEWS'] 298 | else: 299 | val_input = [X_val] 300 | 301 | if '-multimodal-' in params['MODEL']: 302 | val_input.append(val_visual_features) 303 | validation_set = (val_input, Y_val) 304 | 305 | if np.any(['monitor_test_metric' in verbosity, 306 | 'monitor_test_predictions' in verbosity, 307 | 'plot_test_metric' in verbosity]): 308 | if params['WORD_REPRESENTATION'] == 'one_hot': 309 | one_hot_x_test = encode_questions_index(test_x, word2index_x) 310 | X_test = sequence.pad_sequences(one_hot_x_test, 311 | maxlen=params['MAX_INPUT_TIME_STEPS']) 312 | elif params['WORD_REPRESENTATION'] == 'dense': 313 | X_test = encode_questions_dense( 314 | x=test_x, 315 | word_encoder=word_encoder, 316 | max_time_steps=params['MAX_INPUT_TIME_STEPS'], 317 | is_remove_question_symbol=True) 318 | else: 319 | NotImplementedError() 320 | if '-bidirectional-' in params['MODEL'] \ 321 | and 'sequential-blind' in params['MODEL']: 322 | test_input = [X_test] * 2 323 | elif '-cnn_3views-' in params['MODEL']: 324 | test_input = [X_test] * 3 325 | elif '-cnn_kviews-' in params['MODEL']: 326 | test_input = [X_test] * params['LANGUAGE_CNN_VIEWS'] 327 | else: 328 | test_input = [X_test] 329 | if '-multimodal-' in params['MODEL']: 330 | test_input.append(test_visual_features) 331 | 332 | # convert to numpy arrays 333 | # train_y - original training ys 334 | # train_y_gt - training ys used to learn the model 335 | train_x = np.asarray(train_x) 336 | train_y = np.asarray(train_y) 337 | train_y_gt = np.asarray(train_y_gt) 338 | ### 339 | 340 | ### 341 | # Callbacks 342 | ### 343 | callbacks = [] 344 | if params['IS_SAVE_WEIGHTS']: 345 | callback_store_model = StoreModelWeightsOnEraEnd( 346 | filepath=weights_path, 347 | epoch_interval=params['MAX_EPOCH']) 348 | callbacks.append(callback_store_model) 349 | 350 | callback_print_on_era_begin = PrintOnEraBegin( 351 | epoch_interval=params['MAX_EPOCH'], 352 | message="Era {era:2d} out of " + str(params['MAX_ERA'])) 353 | callbacks.append(callback_print_on_era_begin) 354 | 355 | # verbosity callbacks 356 | if 'monitor_val_metric' in verbosity: 357 | if params['METRIC'] == 'vqa': 358 | results_function = lambda x: \ 359 | val_dataset['vqa_object'].loadRes( 360 | x, val_dataset['questions_path']) 361 | extra_vars = { 362 | 'question_id':val_question_id, 363 | 'vqa_object':val_dataset['vqa_object'], 364 | 'resfun':results_function} 365 | else: 366 | extra_vars = None 367 | callback_metric = PrintPerformanceMetricOnEraEnd( 368 | X=val_input, 369 | y=val_y, 370 | temperature=params['TEMPERATURE'], 371 | index2word_y=index2word_y, 372 | metric_name=params['METRIC'], 373 | epoch_interval=params['MAX_EPOCH'], 374 | extra_vars=extra_vars, 375 | verbosity_path=class_normalized_path_longprefix+'.val.acc', 376 | verbose=1) 377 | callbacks.append(callback_metric) 378 | 379 | if 'monitor_test_metric' in verbosity: 380 | if params['METRIC'] == 'vqa': 381 | results_function = lambda x: \ 382 | test_dataset['vqa_object'].loadRes( 383 | x, test_dataset['questions_path']) 384 | extra_vars = { 385 | 'question_id':test_question_id, 386 | 'vqa_object':test_dataset['vqa_object'], 387 | 'resfun':results_function} 388 | else: 389 | extra_vars = None 390 | callback_metric = PrintPerformanceMetricOnEraEnd( 391 | X=test_input, 392 | y=test_y, 393 | temperature=params['TEMPERATURE'], 394 | index2word_y=index2word_y, 395 | metric_name=params['METRIC'], 396 | epoch_interval=params['MAX_EPOCH'], 397 | extra_vars=extra_vars, 398 | verbosity_path=class_normalized_path_longprefix+'.test.acc', 399 | verbose=1) 400 | callbacks.append(callback_metric) 401 | 402 | if 'monitor_train_predictions' in verbosity: 403 | callback_monitor_train_predictions = MonitorPredictionsOnEndEra( 404 | X=train_input, x=train_x, y=train_y, 405 | temperature=params['TEMPERATURE'], 406 | index2word_y=index2word_y, 407 | verbosity_path=verbosity_path_longprefix+'.train.pred', 408 | epoch_interval=params['MAX_EPOCH'], 409 | subset_size=params['TRAINING_SUBSET_SIZE'], 410 | verbose=1) 411 | callbacks.append(callback_monitor_train_predictions) 412 | 413 | if 'monitor_val_predictions' in verbosity: 414 | callback_monitor_train_predictions = MonitorPredictionsOnEndEra( 415 | X=val_input, x=val_x, y=val_y, 416 | temperature=params['TEMPERATURE'], 417 | index2word_y=index2word_y, 418 | verbosity_path=verbosity_path_longprefix+'.val.pred', 419 | epoch_interval=params['MAX_EPOCH'], 420 | subset_size=params['VAL_SUBSET_SIZE'], 421 | verbose=1) 422 | callbacks.append(callback_monitor_train_predictions) 423 | 424 | if 'monitor_test_predictions' in verbosity: 425 | callback_monitor_train_predictions = MonitorPredictionsOnEndEra( 426 | X=test_input, x=test_x, y=test_y, 427 | temperature=params['TEMPERATURE'], 428 | index2word_y=index2word_y, 429 | verbosity_path=verbosity_path_longprefix+'.test.pred', 430 | epoch_interval=params['MAX_EPOCH'], 431 | subset_size=params['TEST_SUBSET_SIZE'], 432 | verbose=1) 433 | callbacks.append(callback_monitor_train_predictions) 434 | 435 | """ 436 | if 'plot_loss' in verbosity: 437 | callback_plot_loss = StandardPerformancePlot( 438 | name='Kraino', 439 | fig_title=params['VISUALIZATION_FIG_LOSS_TITLE'], 440 | url=params['VISUALIZATION_URL']) 441 | callbacks.append(callback_plot_loss) 442 | 443 | if 'plot_train_acc' in verbosity: 444 | callback_plot_acc = StandardPerformancePlot( 445 | name='Kraino', 446 | fig_title='Acc', 447 | url=params['VISUALIZATION_URL'], 448 | what_to_plot=['acc']) 449 | 450 | if 'plot_trainval_acc' in verbosity: 451 | callback_plot_acc = StandardPerformancePlot( 452 | name='Kraino', 453 | fig_title='Acc', 454 | url=params['VISUALIZATION_URL'], 455 | what_to_plot=['acc', 'val_acc']) 456 | callbacks.append(callback_plot_acc) 457 | 458 | if 'plot_train_metric' in verbosity: 459 | if params['METRIC'] == 'vqa': 460 | results_function = lambda x: \ 461 | train_dataset['vqa_object'].loadRes( 462 | x, val_dataset['questions_path']) 463 | extra_vars = { 464 | 'question_id':train_question_id, 465 | 'vqa_object':train_dataset['vqa_object'], 466 | 'resfun':results_function, 467 | } 468 | else: 469 | extra_vars=None 470 | callback_plot_train_metric = PlotPerformanceMetricOnEraEnd( 471 | X=train_input, 472 | y=train_y, 473 | temperature=params['TEMPERATURE'], 474 | index2word_y=index2word_y, 475 | metric_name=params['METRIC'], 476 | epoch_interval=params['MAX_EPOCH'], 477 | extra_vars=extra_vars, 478 | verbose=1, 479 | name='Kraino', 480 | fig_title='{0} - {1}'.format( 481 | params['VISUALIZATION_FIG_METRIC_TITLE'], 'train'), 482 | url=params['VISUALIZATION_URL']) 483 | callbacks.append(callback_plot_train_metric) 484 | 485 | if 'plot_val_metric' in verbosity: 486 | if params['METRIC'] == 'vqa': 487 | results_function = lambda x: \ 488 | val_dataset['vqa_object'].loadRes( 489 | x, val_dataset['questions_path']) 490 | extra_vars = { 491 | 'question_id':val_question_id, 492 | 'vqa_object':val_dataset['vqa_object'], 493 | 'resfun':results_function, 494 | } 495 | else: 496 | extra_vars=None 497 | callback_plot_val_metric = PlotPerformanceMetricOnEraEnd( 498 | X=val_input, 499 | y=val_y, 500 | temperature=params['TEMPERATURE'], 501 | index2word_y=index2word_y, 502 | metric_name=params['METRIC'], 503 | epoch_interval=params['MAX_EPOCH'], 504 | extra_vars=extra_vars, 505 | verbose=1, 506 | name='Kraino', 507 | fig_title='{0} - {1}'.format( 508 | params['VISUALIZATION_FIG_METRIC_TITLE'], 'val'), 509 | url=params['VISUALIZATION_URL']) 510 | callbacks.append(callback_plot_val_metric) 511 | 512 | if 'plot_test_metric' in verbosity: 513 | if params['METRIC'] == 'vqa': 514 | results_function = lambda x: \ 515 | test_dataset['vqa_object'].loadRes( 516 | x, test_dataset['questions_path']) 517 | extra_vars = { 518 | 'question_id':test_question_id, 519 | 'vqa_object':test_dataset['vqa_object'], 520 | 'resfun':results_function} 521 | else: 522 | extra_vars = None 523 | callback_plot_test_metric = PlotPerformanceMetricOnEraEnd( 524 | X=test_input, 525 | y=test_y, 526 | temperature=params['TEMPERATURE'], 527 | index2word_y=index2word_y, 528 | metric_name=params['METRIC'], 529 | epoch_interval=params['MAX_EPOCH'], 530 | extra_vars=extra_vars, 531 | verbose=1, 532 | name='Kraino', 533 | fig_title='{0} - {1}'.format( 534 | params['VISUALIZATION_FIG_METRIC_TITLE'], 'test'), 535 | url=params['VISUALIZATION_URL']) 536 | callbacks.append(callback_plot_test_metric) 537 | """ 538 | 539 | # training changers 540 | if params['IS_EARLY_STOPPING']: 541 | callback_lr_reducer = LearningRateReducerWithEarlyStopping( 542 | patience=params['LR_PATIENCE'], 543 | reduce_rate=params['REDUCE_RATE'], 544 | reduce_nb=params['MAX_NUMBER_REDUCTIONS'], 545 | is_early_stopping=params['IS_EARLY_STOPPING']) 546 | callbacks.append(callback_lr_reducer) 547 | 548 | if params['IS_LR_FIXED_REDUCTION']: 549 | # reduction after fixed number of epochs 550 | callback_lr_reducer_after_k_epoch = LearningRateReducerEveryPatienceEpoch( 551 | patience=params['LR_PATIENCE'], 552 | reduce_rate=params['REDUCE_RATE'], 553 | reduce_nb=params['MAX_NUMBER_REDUCTIONS']) 554 | callbacks.append(callback_lr_reducer_after_k_epoch) 555 | 556 | 557 | 558 | print('Our callbacks: ' + str(callbacks)) 559 | ### 560 | 561 | ### 562 | # Building model 563 | ### 564 | print('Building model ...') 565 | #nb_words= max(X_train.flatten())+1 566 | input_dim = len(word2index_x.keys()) \ 567 | if params['WORD_REPRESENTATION'] == 'one_hot' \ 568 | else X_train[0][0].shape[0] 569 | output_dim = len(word2index_y.keys()) 570 | visual_dim = train_visual_features.shape[1:] \ 571 | if train_visual_features is not None else 0 572 | # creating the config object that carries arguments for models 573 | model_config = Config( 574 | input_dim=input_dim, 575 | textual_embedding_dim=0 if params['WORD_REPRESENTATION'] == 'dense' 576 | else params['TEXTUAL_EMBEDDING_SIZE'], 577 | visual_embedding_dim=params['VISUAL_EMBEDDING_SIZE'], 578 | hidden_state_dim=params['HIDDEN_STATE_SIZE'], 579 | language_cnn_filter_size=params['LANGUAGE_CNN_FILTERS'], 580 | language_cnn_filter_length=params['LANGUAGE_CNN_FILTER_LENGTH'], 581 | language_cnn_views=params['LANGUAGE_CNN_VIEWS'], 582 | language_max_pool_length=params['LANGUAGE_MAX_POOL_LENGTH'], 583 | output_dim=output_dim, 584 | visual_dim=visual_dim, 585 | mlp_hidden_dim=params['MLP_HIDDEN_SIZE'], 586 | merge_mode=params['MERGE_MODE'], 587 | multimodal_merge_mode=params['MULTIMODAL_MERGE_MODE'], 588 | num_mlp_layers=params['NUM_MLP_LAYERS'], 589 | num_language_layers=params['NUM_LANGUAGE_LAYERS'], 590 | mlp_activation=params['MLP_ACTIVATION'], 591 | language_cnn_activation=params['LANGUAGE_CNN_ACTIVATION'], 592 | fusion_layer_index=params['FUSION_LAYER_INDEX'], 593 | is_go_backwards=params['IS_REVERSE_INPUT'], 594 | recurrent_encoder=recurrent_net.select[params['TEXT_ENCODER']], 595 | recurrent_decoder=recurrent_net.select[params['TEXT_DECODER']], 596 | trainable_perception_name=params['TRAINABLE_PERCEPTION_NAME'], 597 | word_generator=model_zoo.word_generator[params['WORD_GENERATOR']], 598 | max_input_time_steps=params['MAX_INPUT_TIME_STEPS'], 599 | max_output_time_steps=params['MAX_OUTPUT_TIME_STEPS'], 600 | output_word_delimiter=train_dataset['answer_words_delimiter']) 601 | # building the model 602 | model = model_zoo.select[params['MODEL']](model_config) 603 | model.create() 604 | #TODO: Doesn't work with very large models 605 | #model_picture(model=model, to_file=os.path.join('local', 606 | #'model-{0}-{1}.png'.format(params['MODEL'], params['DATASET']))) 607 | if params['LR'] >= 0: 608 | current_optimizer = OPTIMIZERS[params['OPTIMIZER']](lr=params['LR']) 609 | else: 610 | current_optimizer = OPTIMIZERS[params['OPTIMIZER']]() 611 | model.compile( 612 | loss=params['LOSS'], 613 | optimizer=current_optimizer, 614 | class_mode='categorical') 615 | # pickling the model 616 | """ 617 | pickle_model( 618 | path=model_path, 619 | model=model, 620 | word2index_x=word2index_x, 621 | word2index_y=word2index_y, 622 | index2word_x=index2word_x, 623 | index2word_y=index2word_y) 624 | """ 625 | #model_to_json(path=model_path, model=model) 626 | """ 627 | if is_load_weights: 628 | start_era = era_weights_loader 629 | model.load_weights(weights_path.format( 630 | epoch=start_era*params['MAX_EPOCH'], 631 | era=start_era)) 632 | print('Restart the computations with weights from era {0}'.format(start_era)) 633 | start_era += 1 634 | else: 635 | start_era = 0 636 | """ 637 | ### 638 | 639 | ### 640 | # Training a model 641 | ### 642 | total_start_time = timer() 643 | total_number_of_epochs=params['MAX_EPOCH'] * params['MAX_ERA'] 644 | 645 | if params['IS_VALIDATION_SET']: 646 | model.fit(train_input, Y, 647 | batch_size=params['BATCH_SIZE'], 648 | validation_data=validation_set, 649 | nb_epoch=total_number_of_epochs, 650 | callbacks=callbacks, 651 | show_accuracy=True) 652 | elif params['VALIDATION_SPLIT'] > 0.0: 653 | model.fit(train_input, Y, 654 | batch_size=params['BATCH_SIZE'], 655 | validation_split=params['VALIDATION_SPLIT'], 656 | nb_epoch=total_number_of_epochs, 657 | callbacks=callbacks, 658 | show_accuracy=True) 659 | else: 660 | model.fit(train_input, Y, 661 | batch_size=params['BATCH_SIZE'], 662 | nb_epoch=total_number_of_epochs, 663 | callbacks=callbacks, 664 | show_accuracy=True) 665 | 666 | total_end_time = timer() 667 | time_difference = total_end_time - total_start_time 668 | print('In total is {0:.2f}s = {1:.2f}m'\ 669 | .format(time_difference, time_difference/60.0)) 670 | return True 671 | 672 | 673 | if __name__ == '__main__': 674 | # setting up the input arguments 675 | args = parse_input_arguments() 676 | params = vars(args) 677 | 678 | print(params) 679 | 680 | main(params) 681 | 682 | print('Done!') 683 | 684 | --------------------------------------------------------------------------------