├── src ├── nn │ ├── repeat.py │ ├── __init__.py │ ├── alltests.py │ ├── sum2.py │ ├── lstm_profile.py │ ├── const_value.py │ ├── lstm_test_target.csv │ ├── normalize.py │ ├── elem_prod.py │ ├── active.py │ ├── sum.py │ ├── model.py │ ├── valid_tool.py │ ├── loader.py │ ├── meanpool1d.py │ ├── selector.py │ ├── dropout.py │ ├── tester.py │ ├── const_weights.py │ ├── maxpool1d.py │ ├── inner_prod.py │ ├── sequential.py │ ├── active_func.py │ ├── cos_sim.py │ ├── ordinal.py │ ├── lut.py │ ├── sum_prod.py │ ├── sequential_tests.py │ ├── conv1d.py │ ├── map.py │ ├── lstm_test.py │ ├── func.py │ ├── container.py │ ├── reshape.py │ ├── lstm.py │ ├── stage.py │ ├── recurrent_tests.py │ ├── lstm_old.py │ └── trainer.py ├── imageqa_crosstest.py ├── imageqa_layout.py ├── calculate_wups.py ├── imageqa_modelavg.py ├── imageqa_adhoc.py ├── imageqa_compare.py ├── imageqa_test.py ├── imageqa_ensemble.py └── train.py ├── results └── README.md ├── data └── README.md ├── config └── train.yml ├── .gitignore ├── LICENSE ├── models ├── img_bow.model.yml ├── vis_lstm.model.yml └── 2_vis_blstm.model.yml └── README.md /src/nn/repeat.py: -------------------------------------------------------------------------------- 1 | class Repeat(Stage): 2 | pass -------------------------------------------------------------------------------- /results/README.md: -------------------------------------------------------------------------------- 1 | # Image QA results folder 2 | 3 | This folder is for storing training results. 4 | 5 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | # Image QA data folder 2 | 3 | Please download the data of this directory 4 | * http://www.cs.toronto.edu/~mren/imageqa/data/hidden_oxford_mscoco.h5 5 | * http://www.cs.toronto.edu/~mren/imageqa/data/cocoqa.zip 6 | 7 | -------------------------------------------------------------------------------- /src/nn/__init__.py: -------------------------------------------------------------------------------- 1 | # Neural network package 2 | # Package level functions 3 | from loader import * 4 | from func import * 5 | from active_func import * 6 | from tester import * 7 | 8 | # Classes 9 | from trainer import * 10 | 11 | -------------------------------------------------------------------------------- /config/train.yml: -------------------------------------------------------------------------------- 1 | numEpoch: 2000 2 | batchSize: 100 3 | shuffle: true 4 | writeRecord: true 5 | saveModel: true 6 | plotFigs: true 7 | calcError: true 8 | stopCost: 0.01 9 | progress: true 10 | patience: 10 11 | criterion: loss 12 | needValid: true 13 | sendEmail: false 14 | -------------------------------------------------------------------------------- /src/nn/alltests.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import unittest 3 | 4 | test_file_strings = glob.glob('*_test*.py') 5 | module_strings = [str[0:len(str)-3] for str in test_file_strings] 6 | suites = [unittest.defaultTestLoader.loadTestsFromName(str) for str 7 | in module_strings] 8 | testSuite = unittest.TestSuite(suites) 9 | text_runner = unittest.TextTestRunner().run(testSuite) -------------------------------------------------------------------------------- /src/nn/sum2.py: -------------------------------------------------------------------------------- 1 | from stage import * 2 | 3 | class Sum2(Stage): 4 | """Stage summing first half of the input with second half.""" 5 | def __init__(self, name, inputNames, outputDim, 6 | defaultValue=0.0): 7 | Stage.__init__( 8 | self, 9 | name=name, 10 | inputNames=inputNames, 11 | outputDim=outputDim, 12 | defaultValue=defaultValue) 13 | def forward(self, X): 14 | self.numComponents = X.shape[1] 15 | return np.sum(X, axis=1) 16 | def backward(self, dEdY): 17 | self.dEdW = 0.0 18 | return np.tile(dEdY.reshape(dEdY.shape[0], 1, dEdY.shape[1]), (1, self.numComponents, 1)) 19 | -------------------------------------------------------------------------------- /src/nn/lstm_profile.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import numpy as np 4 | import lstm_old as l 5 | 6 | start = time.time() 7 | timespan = 100 8 | multiErr = len(sys.argv) > 1 and sys.argv[1] == 'm' 9 | for i in range(0, 10): 10 | lstm = l.LSTM_Old( 11 | inputDim=100, 12 | outputDim=100, 13 | initRange=.1, 14 | initSeed=3, 15 | cutOffZeroEnd=True, 16 | multiErr=multiErr, 17 | outputdEdX=True) 18 | X = np.random.rand(10, timespan, 100) 19 | Y = lstm.forward(X) 20 | dEdY = np.random.rand(10, timespan, 100) if multiErr else np.random.rand(10, 100) 21 | dEdY = lstm.backward(dEdY) 22 | print '%.4f s' % (time.time() - start) -------------------------------------------------------------------------------- /src/nn/const_value.py: -------------------------------------------------------------------------------- 1 | from stage import * 2 | 3 | class ConstValue(Stage): 4 | def __init__(self, 5 | name, 6 | inputNames, 7 | outputDim, 8 | value): 9 | Stage.__init__(self, 10 | name=name, 11 | outputDim=outputDim, 12 | inputNames=inputNames, 13 | outputdEdX=False) 14 | self.dEdW = 0 15 | self.value = value 16 | 17 | def graphBackward(self): 18 | self.backward(self.dEdY) 19 | 20 | def forward(self, X): 21 | return np.zeros((X.shape[0], self.outputDim)) + self.value 22 | 23 | def backward(self, dEdY): 24 | return None -------------------------------------------------------------------------------- /src/nn/lstm_test_target.csv: -------------------------------------------------------------------------------- 1 | 1.000000000000000000e+00 2 | 1.000000000000000000e+00 3 | 1.000000000000000000e+00 4 | 1.000000000000000000e+00 5 | 1.000000000000000000e+00 6 | 1.000000000000000000e+00 7 | 1.000000000000000000e+00 8 | 1.000000000000000000e+00 9 | 1.000000000000000000e+00 10 | 1.000000000000000000e+00 11 | 1.000000000000000000e+00 12 | 1.000000000000000000e+00 13 | 1.000000000000000000e+00 14 | 1.000000000000000000e+00 15 | 1.000000000000000000e+00 16 | 1.000000000000000000e+00 17 | 1.000000000000000000e+00 18 | 1.000000000000000000e+00 19 | 1.000000000000000000e+00 20 | 1.000000000000000000e+00 21 | 1.000000000000000000e+00 22 | 1.000000000000000000e+00 23 | 1.000000000000000000e+00 24 | 1.000000000000000000e+00 25 | 1.000000000000000000e+00 26 | -------------------------------------------------------------------------------- /src/nn/normalize.py: -------------------------------------------------------------------------------- 1 | from stage import * 2 | 3 | class Normalize(Stage): 4 | def __init__(self, 5 | outputDim, 6 | mean, 7 | std, 8 | name=None, 9 | inputNames=None, 10 | outputdEdX=True): 11 | Stage.__init__(self, 12 | name=name, 13 | inputNames=inputNames, 14 | outputDim=outputDim, 15 | outputdEdX=outputdEdX) 16 | self.mean = mean 17 | self.std = std 18 | self.X = 0 19 | self.Y = 0 20 | pass 21 | 22 | def forward(self, X): 23 | return (X - self.mean) / self.std 24 | 25 | def backward(self, dEdY): 26 | return dEdY / self.std -------------------------------------------------------------------------------- /src/nn/elem_prod.py: -------------------------------------------------------------------------------- 1 | from stage import * 2 | 3 | class ElementProduct(Stage): 4 | """Stage multiplying first half of the input with second half""" 5 | def __init__(self, name, inputNames, outputDim, 6 | defaultValue=0.0): 7 | Stage.__init__( 8 | self, 9 | name=name, 10 | inputNames=inputNames, 11 | outputDim=outputDim, 12 | defaultValue=defaultValue) 13 | def forward(self, X): 14 | self.X = X 15 | return X[:,:X.shape[1]/2] * X[:,X.shape[1]/2:] 16 | def backward(self, dEdY): 17 | self.dEdW = 0.0 18 | return np.concatenate( 19 | (self.X[:,self.X.shape[1]/2:] * dEdY, 20 | self.X[:,:self.X.shape[1]/2] * dEdY), 21 | axis=-1) -------------------------------------------------------------------------------- /src/nn/active.py: -------------------------------------------------------------------------------- 1 | from stage import * 2 | 3 | class Active(Stage): 4 | def __init__(self, 5 | activeFn, 6 | inputNames, 7 | outputDim, 8 | defaultValue=0.0, 9 | outputdEdX=True, 10 | name=None): 11 | Stage.__init__(self, 12 | name=name, 13 | inputNames=inputNames, 14 | outputDim=outputDim, 15 | defaultValue=defaultValue, 16 | outputdEdX=outputdEdX) 17 | self.activeFn = activeFn 18 | def forward(self, X): 19 | self.Y = self.activeFn.forward(X) 20 | return self.Y 21 | def backward(self, dEdY): 22 | self.dEdW = 0 23 | return self.activeFn.backward(dEdY, self.Y, 0) -------------------------------------------------------------------------------- /src/nn/sum.py: -------------------------------------------------------------------------------- 1 | from stage import * 2 | 3 | class Sum(Stage): 4 | """Stage summing first half of the input with second half.""" 5 | def __init__(self, name, inputNames, numComponents, outputDim, 6 | defaultValue=0.0): 7 | Stage.__init__( 8 | self, 9 | name=name, 10 | inputNames=inputNames, 11 | outputDim=outputDim, 12 | defaultValue=defaultValue) 13 | self.numComponents = numComponents 14 | def forward(self, X): 15 | return np.sum( 16 | X.reshape(X.shape[0], 17 | self.numComponents, 18 | X.shape[1] / self.numComponents), 19 | axis=1) 20 | def backward(self, dEdY): 21 | self.dEdW = 0.0 22 | return np.tile(dEdY, self.numComponents) 23 | -------------------------------------------------------------------------------- /src/nn/model.py: -------------------------------------------------------------------------------- 1 | from container import * 2 | 3 | class GraphModel(Container): 4 | def __init__(self, 5 | stages, 6 | outputStageNames, 7 | costFn, 8 | inputDim=0, 9 | outputDim=0, 10 | name=None, 11 | decisionFn=None, 12 | specFilename=None): 13 | Container.__init__(self, 14 | name=name, 15 | stages=stages, 16 | inputNames=['input'], 17 | outputStageNames=outputStageNames, 18 | inputDim=inputDim, 19 | outputDim=outputDim, 20 | outputdEdX=True) 21 | self.getCost = costFn 22 | self.predict = decisionFn 23 | self.specFilename = specFilename -------------------------------------------------------------------------------- /src/nn/valid_tool.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def splitData(trainInput, trainTarget, heldOutRatio, validNumber): 4 | s = np.round(trainInput.shape[0] * heldOutRatio) 5 | start = s * validNumber 6 | validInput = trainInput[start : start + s] 7 | validTarget = trainTarget[start : start + s] 8 | if validNumber == 0: 9 | trainInput = trainInput[s:] 10 | trainTarget = trainTarget[s:] 11 | else: 12 | trainInput = np.concatenate((trainInput[0:start], trainInput[start + s:])) 13 | trainTarget = np.concatenate((trainTarget[0:start], trainTarget[start + s:])) 14 | return trainInput, trainTarget, validInput, validTarget 15 | 16 | def shuffleData(X, T, random=None): 17 | if random is None: 18 | random = np.random.RandomState() 19 | shuffle = np.arange(0, X.shape[0]) 20 | shuffle = random.permutation(shuffle) 21 | X = X[shuffle] 22 | T = T[shuffle] 23 | return X, T -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /src/nn/loader.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import router 3 | from model import * 4 | 5 | def load(modelSpecFilename): 6 | """ 7 | Need the following items in the model spec file: 8 | costFn 9 | decisionFn 10 | stages 11 | specs 12 | :param modelSpecFilename: 13 | :return: 14 | """ 15 | with open(modelSpecFilename) as f: 16 | modelDict = yaml.load(f) 17 | 18 | for stageDict in modelDict['specs']: 19 | router.addStage(stageDict) 20 | 21 | modelStages = [] 22 | for s in modelDict['stages']: 23 | modelStages.append(router.routeStage(s)) 24 | costFn=router.routeFn(modelDict['costFn']) 25 | 26 | if modelDict.has_key('decisionFn'): 27 | decisionFn = router.routeFn(modelDict['decisionFn']) 28 | else: 29 | decisionFn = None 30 | outputList = modelDict['outputs'].split(',') 31 | for i in range(len(outputList)): 32 | outputList[i] = outputList[i].strip() 33 | model = GraphModel( 34 | name=modelDict['name'] if modelDict.has_key('name') else None, 35 | stages=modelStages, 36 | outputStageNames=outputList, 37 | costFn=costFn, 38 | decisionFn=decisionFn, 39 | specFilename=modelSpecFilename) 40 | 41 | return model -------------------------------------------------------------------------------- /src/nn/meanpool1d.py: -------------------------------------------------------------------------------- 1 | from stage import * 2 | 3 | class MeanPool1D(Stage): 4 | """ 5 | 1D mean pooling. 6 | Padding no longer make sense now. 7 | Make sure you have the right size. 8 | """ 9 | def __init__(self, 10 | outputDim, 11 | windowSize, 12 | inputNames=None, 13 | defaultValue=0.0, 14 | outputdEdX=True, 15 | name=None): 16 | Stage.__init__(self, 17 | name=name, 18 | inputNames=inputNames, 19 | outputDim=outputDim, 20 | defaultValue=defaultValue, 21 | outputdEdX=outputdEdX) 22 | self.windowSize = windowSize 23 | self.X = 0 24 | self.Y = 0 25 | 26 | def forward(self, X): 27 | X = X.reshape(X.shape[0], self.windowSize, X.shape[1] / self.windowSize, X.shape[2]) 28 | Y = np.mean(X, axis=1) 29 | self.X = X 30 | return Y 31 | 32 | def backward(self, dEdY): 33 | dEdX = np.tile( 34 | dEdY.reshape(dEdY.shape[0], 1, dEdY.shape[1], dEdY.shape[2]), 35 | (1, self.windowSize, 1, 1)) 36 | dEdX /= float(self.windowSize) 37 | dEdX = dEdX.reshape(dEdX.shape[0], dEdX.shape[1] * dEdX.shape[2], dEdX.shape[3]) 38 | return dEdX -------------------------------------------------------------------------------- /src/nn/selector.py: -------------------------------------------------------------------------------- 1 | from stage import * 2 | 3 | class Selector(Stage): 4 | def __init__(self, 5 | name, 6 | inputNames, 7 | start, 8 | end, 9 | axis=-1): 10 | Stage.__init__( 11 | self, 12 | name=name, 13 | inputNames=inputNames, 14 | outputDim=end-start) 15 | self.start = start 16 | self.end = end 17 | self.axis = axis 18 | if axis < -2 or axis > 2: 19 | raise Exception('Selector axis=%d not supported' % axis) 20 | 21 | def forward(self, X): 22 | self.X = X 23 | if self.axis == -1: 24 | self.axis = len(X.shape) - 1 25 | if self.axis == 0: 26 | return X[self.start:self.end] 27 | elif self.axis == 1: 28 | return X[:, self.start:self.end] 29 | elif self.axis == 2: 30 | return X[:, :, self.start:self.end] 31 | 32 | def backward(self, dEdY): 33 | dEdX = np.zeros(self.X.shape) 34 | if self.axis == 0: 35 | dEdX[self.start:self.end] = dEdY 36 | elif self.axis == 1: 37 | dEdX[:, self.start:self.end] = dEdY 38 | elif self.axis == 2: 39 | dEdX[:, :, self.start:self.end] = dEdY 40 | return dEdX -------------------------------------------------------------------------------- /src/nn/dropout.py: -------------------------------------------------------------------------------- 1 | from stage import * 2 | 3 | class Dropout(Stage): 4 | def __init__(self, 5 | name, 6 | inputNames, 7 | outputDim, 8 | dropoutRate, 9 | initSeed, 10 | debug=False): 11 | Stage.__init__(self, 12 | name=name, 13 | inputNames=inputNames, 14 | outputDim=outputDim) 15 | self.dropout = True 16 | self.dropoutVec = 0 17 | self.dropoutRate = dropoutRate 18 | self.debug = debug 19 | self.random = np.random.RandomState(initSeed) 20 | self.seed = initSeed 21 | 22 | def forward(self, X): 23 | if self.dropoutRate > 0.0 and self.dropout: 24 | if self.debug: 25 | self.random = np.random.RandomState(self.seed) 26 | self.dropoutVec = (self.random.uniform(0, 1, (X.shape[-1])) > 27 | self.dropoutRate) 28 | Y = X * self.dropoutVec 29 | else: 30 | Y = X * (1 - self.dropoutRate) 31 | self.X = X 32 | return Y 33 | 34 | def backward(self, dEdY): 35 | dEdX = None 36 | if self.outputdEdX: 37 | if self.dropout: 38 | dEdX = dEdY * self.dropoutVec 39 | else: 40 | dEdX = dEdY / (1 - self.dropoutRate) 41 | return dEdX -------------------------------------------------------------------------------- /src/nn/tester.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def test(model, X, numExPerBat=100, layerNames=None): 4 | N = X.shape[0] 5 | batchStart = 0 6 | Y = None 7 | layers = {} 8 | if layerNames is not None: 9 | for layerName in layerNames: 10 | layers[layerName] = [] 11 | while batchStart < N: 12 | # Batch info 13 | batchEnd = min(N, batchStart + numExPerBat) 14 | Ytmp = model.forward(X[batchStart:batchEnd], dropout=False) 15 | if Y is None: 16 | Yshape = np.copy(Ytmp.shape) 17 | Yshape[0] = N 18 | Y = np.zeros(Yshape) 19 | if layerNames is not None: 20 | for layerName in layerNames: 21 | stage = model 22 | for stageName in layerName.split(':'): 23 | stage = stage.stageDict[stageName] 24 | layers[layerName].append(stage.getValue()) 25 | Y[batchStart:batchEnd] = Ytmp 26 | batchStart += numExPerBat 27 | if layerNames is not None: 28 | for layerName in layerNames: 29 | layers[layerName] = np.concatenate(layers[layerName], axis=0) 30 | return Y, layers 31 | else: 32 | return Y 33 | 34 | def calcRate(model, Y, T): 35 | Yfinal = model.predict(Y) 36 | correct = np.sum(Yfinal.reshape(Yfinal.size) == T.reshape(T.size)) 37 | total = Yfinal.size 38 | rate = correct / float(total) 39 | return rate, correct, total -------------------------------------------------------------------------------- /src/nn/const_weights.py: -------------------------------------------------------------------------------- 1 | from stage import * 2 | 3 | class ConstWeights(Stage): 4 | def __init__(self, 5 | name, 6 | outputDim=0, 7 | inputDim=0, 8 | initRange=1.0, 9 | initSeed=2, 10 | needInit=True, 11 | initWeights=0, 12 | learningRate=0.0, 13 | learningRateAnnealConst=0.0, 14 | momentum=0.0, 15 | deltaMomentum=0.0, 16 | weightClip=0.0, 17 | gradientClip=0.0, 18 | weightRegConst=0.0): 19 | Stage.__init__(self, 20 | name=name, 21 | outputDim=outputDim, 22 | inputNames=['input'], 23 | learningRate=learningRate, 24 | learningRateAnnealConst=learningRateAnnealConst, 25 | momentum=momentum, 26 | deltaMomentum=deltaMomentum, 27 | weightClip=weightClip, 28 | gradientClip=gradientClip, 29 | weightRegConst=weightRegConst, 30 | outputdEdX=False) 31 | if needInit: 32 | self.random = np.random.RandomState(initSeed) 33 | self.W = self.random.uniform( 34 | -initRange/2.0, initRange/2.0, (outputDim, inputDim)) 35 | else: 36 | self.W = initWeights 37 | self.dEdW = 0 38 | 39 | def graphBackward(self): 40 | self.backward(self.dEdY) 41 | 42 | def forward(self, X): 43 | return self.W 44 | 45 | def backward(self, dEdY): 46 | self.dEdW = dEdY 47 | return None -------------------------------------------------------------------------------- /src/nn/maxpool1d.py: -------------------------------------------------------------------------------- 1 | from stage import * 2 | 3 | class MaxPool1D(Stage): 4 | """ 5 | 1D max pooling. 6 | """ 7 | def __init__(self, 8 | outputDim, 9 | windowSize, 10 | inputNames=None, 11 | defaultValue=0.0, 12 | outputdEdX=True, 13 | name=None): 14 | Stage.__init__(self, 15 | name=name, 16 | inputNames=inputNames, 17 | outputDim=outputDim, 18 | defaultValue=defaultValue, 19 | outputdEdX=outputdEdX) 20 | self.windowSize = windowSize 21 | self.X = 0 22 | self.Y = 0 23 | 24 | def forward(self, X): 25 | mod = np.mod(X.shape[1], self.windowSize) 26 | if mod > 0: 27 | X = np.concatenate((X, np.zeros((X.shape[0], self.windowSize - mod, X.shape[2]))), axis=1) 28 | X = X.reshape(X.shape[0], self.windowSize, X.shape[1] / self.windowSize, X.shape[2]) 29 | self.argX = np.argmax(X, axis=1) 30 | Y = np.max(X, axis=1) 31 | self.X = X 32 | self.mod = mod 33 | return Y 34 | 35 | def backward(self, dEdY): 36 | """ 37 | Assuming the last dimension is the largest. 38 | """ 39 | self.dEdW = 0 40 | dEdX = np.zeros(self.X.shape) 41 | for i in range(self.X.shape[0]): 42 | for j in range(self.X.shape[2]): 43 | dEdX[i, self.argX[i, j, :], j, range(0, self.X.shape[3])] = dEdY[i, j, :] 44 | dEdX = dEdX.reshape(dEdX.shape[0], dEdX.shape[1] * dEdX.shape[2], dEdX.shape[3]) 45 | if self.mod > 0: 46 | dEdX = dEdX[:, :-(self.windowSize - self.mod), :] 47 | return dEdX -------------------------------------------------------------------------------- /src/nn/inner_prod.py: -------------------------------------------------------------------------------- 1 | from stage import * 2 | 3 | class InnerProduct(Stage): 4 | """ 5 | Inner product calculates the inner product of two input vectors. 6 | Two vectors aligns on the second axis (time-axis). 7 | """ 8 | def __init__(self, 9 | name, 10 | inputNames, 11 | outputDim, 12 | learningRate=0.0, 13 | learningRateAnnealConst=0.0, 14 | momentum=0.0, 15 | deltaMomentum=0.0, 16 | weightClip=0.0, 17 | gradientClip=0.0, 18 | weightRegConst=0.0, 19 | outputdEdX=True): 20 | Stage.__init__(self, 21 | name=name, 22 | outputDim=outputDim, 23 | inputNames=inputNames, 24 | learningRate=learningRate, 25 | learningRateAnnealConst=learningRateAnnealConst, 26 | momentum=momentum, 27 | deltaMomentum=deltaMomentum, 28 | weightClip=weightClip, 29 | gradientClip=gradientClip, 30 | weightRegConst=weightRegConst, 31 | outputdEdX=outputdEdX) 32 | self.W = 1 33 | def forward(self, X): 34 | Y = np.sum(X[:, 0, :] * X[:, 1, :], axis=-1) + self.W 35 | self.Y = Y 36 | self.X = X 37 | return Y 38 | 39 | def backward(self, dEdY): 40 | self.dEdW = np.sum(dEdY,axis=0) 41 | #print dEdY 42 | dEdX = np.zeros(self.X.shape) 43 | dEdX[:, 1, :] = dEdY.reshape(dEdY.size, 1) * self.X[:, 0, :] 44 | dEdX[:, 0, :] = dEdY.reshape(dEdY.size, 1) * self.X[:, 1, :] 45 | return dEdX -------------------------------------------------------------------------------- /src/nn/sequential.py: -------------------------------------------------------------------------------- 1 | from container import * 2 | 3 | class Sequential(Stage): 4 | def __init__(self, stages, inputNames=None, name=None, outputDim=0, outputdEdX=True): 5 | Stage.__init__(self, 6 | name=name, 7 | outputDim=outputDim, 8 | inputNames=inputNames, 9 | outputdEdX=outputdEdX) 10 | self.stages = stages 11 | 12 | def forward(self, X, dropout=True): 13 | X1 = X 14 | for stage in self.stages: 15 | if isinstance(stage, Container) or isinstance(stage, Sequential): 16 | X1 = stage.forward(X1, dropout) 17 | elif hasattr(stage, 'dropout'): 18 | stage.dropout = dropout 19 | X1 = stage.forward(X1) 20 | else: 21 | X1 = stage.forward(X1) 22 | return X1 23 | 24 | def backward(self, dEdY): 25 | for stage in reversed(self.stages): 26 | dEdY = stage.backward(dEdY) 27 | if dEdY is None: break 28 | return dEdY if self.outputdEdX else None 29 | 30 | def updateWeights(self): 31 | for stage in self.stages: 32 | stage.updateWeights() 33 | return 34 | 35 | def updateLearningParams(self, numEpoch): 36 | for stage in self.stages: 37 | stage.updateLearningParams(numEpoch) 38 | return 39 | 40 | def getWeights(self): 41 | weights = [] 42 | for stage in self.stages: 43 | weights.append(stage.getWeights()) 44 | return np.array(weights, dtype=object) 45 | 46 | def loadWeights(self, W): 47 | for i in range(W.shape[0]): 48 | self.stages[i].loadWeights(W[i]) -------------------------------------------------------------------------------- /src/nn/active_func.py: -------------------------------------------------------------------------------- 1 | from func import * 2 | 3 | class SoftmaxActiveFn(): 4 | def __init__(self): 5 | pass 6 | 7 | @staticmethod 8 | def forward(Z): 9 | expY = np.exp(Z) 10 | expYshape = np.copy(Z.shape) 11 | expYshape[-1] = 1 12 | Y = expY / np.sum(expY, axis=-1).reshape(expYshape).repeat(Z.shape[-1], axis=-1) 13 | return Y 14 | 15 | @staticmethod 16 | def backward(dEdY, Y, Z): 17 | timespan = Y.shape[0] 18 | U = dEdY * Y 19 | dEdZ = U - np.sum(U, axis=-1).reshape(timespan, 1) * Y 20 | return dEdZ 21 | 22 | class SigmoidActiveFn(): 23 | def __init__(self): 24 | pass 25 | 26 | @staticmethod 27 | def forward(Z): 28 | Y = sigmoidFn(Z) 29 | return Y 30 | 31 | @staticmethod 32 | def backward(dEdY, Y, Z): 33 | dEdZ = dEdY * Y * (1 - Y) 34 | return dEdZ 35 | 36 | class TanhActiveFn(): 37 | def __init__(self): 38 | pass 39 | 40 | @staticmethod 41 | def forward(Z): 42 | Y = np.tanh(Z) 43 | return Y 44 | 45 | @staticmethod 46 | def backward(dEdY, Y, Z): 47 | dEdZ = dEdY * (1 - Y * Y) 48 | return dEdZ 49 | 50 | class IdentityActiveFn(): 51 | def __init__(self): 52 | pass 53 | 54 | @staticmethod 55 | def forward(Z): 56 | return Z 57 | 58 | @staticmethod 59 | def backward(dEdY, Y, Z): 60 | return dEdY 61 | 62 | class ReluActiveFn(): 63 | def __init__(self): 64 | pass 65 | 66 | @staticmethod 67 | def forward(Z): 68 | return np.maximum(0, Z) 69 | 70 | @staticmethod 71 | def backward(dEdY, Y, Z): 72 | return (Y > 0).astype(int) * dEdY 73 | -------------------------------------------------------------------------------- /models/img_bow.model.yml: -------------------------------------------------------------------------------- 1 | name: 'img_bow' 2 | costFn: 'crossEntIdx' 3 | decisionFn: 'argmax' 4 | stages: 5 | - 'imgSel' 6 | - 'imgFeat' 7 | - 'txtSel' 8 | - 'txtUnfold' 9 | - 'txtDict' 10 | - 'txtFold' 11 | - 'bow' 12 | - 'softmax' 13 | outputs: 'softmax' 14 | specs: 15 | - name: 'imgSel' 16 | type: 'selector' 17 | inputs: 'input' 18 | start: 0 19 | end: 1 20 | axis: 1 21 | - name: 'txtSel' 22 | type: 'selector' 23 | inputs: 'input' 24 | start: 1 25 | end: 56 26 | axis: 1 27 | - name: 'imgFeat' 28 | type: 'lut' 29 | inputs: 'imgSel' 30 | inputDim: 123288 31 | outputDim: 4096 32 | initWeights: '../data/hidden_oxford_mscoco.h5' 33 | sparse: true 34 | format: 'h5' 35 | h5key: 'hidden7' 36 | learningRate: 0.0 37 | outputdEdX: false 38 | - name: 'txtUnfold' 39 | type: 'timeUnfold' 40 | inputs: 'txtSel' 41 | outputdEdX: false 42 | - name: 'txtDict' 43 | type: 'lut' 44 | inputs: 'txtUnfold' 45 | intConversion: true 46 | inputDim: 9738 47 | outputDim: 500 48 | initRange: 1.0 49 | initSeed: 2 50 | learningRate: 0.8 51 | momentum: 0.9 52 | gradientClip: 0.1 53 | weightClip: 1000.0 54 | - name: 'txtFold' 55 | type: 'timeFold' 56 | inputs: 'txtDict' 57 | timespan: 55 58 | - name: 'bow' 59 | type: 'sum2' 60 | inputs: 'txtFold' 61 | outputDim: 500 62 | - name: 'softmax' 63 | type: 'map' 64 | inputs: 'bow, imgFeat' 65 | activeFn: 'softmax' 66 | outputDim: 431 67 | initRange: 0.01 68 | initSeed: 7 69 | learningRate: 0.01 70 | learningRateAnnealConst: 0.0 71 | momentum: 0.9 72 | gradientClip: 0.1 73 | weightClip: 15.0 74 | weightRegConst: 0.00005 75 | -------------------------------------------------------------------------------- /src/nn/cos_sim.py: -------------------------------------------------------------------------------- 1 | from stage import * 2 | 3 | class CosSimilarity(Stage): 4 | """ 5 | Compute the cosine similartiy of vectors with a bank of vectors 6 | """ 7 | def __init__(self, bankDim, inputNames, outputDim, name=None): 8 | Stage.__init__(self, name=name, inputNames=inputNames, outputDim=outputDim) 9 | self.bankDim = bankDim 10 | self.A = 0 11 | self.Z = 0 12 | self.Anorm = 0 13 | self.Znorm = 0 14 | self.Auni = 0 15 | self.Zuni = 0 16 | 17 | def forward(self, X): 18 | bankDim = self.bankDim 19 | A = X[:bankDim] 20 | Z = X[bankDim:] 21 | Xnorm2 = np.sum(np.power(X, 2), axis=-1) 22 | Xnorm = np.sqrt(Xnorm2) 23 | Anorm = Xnorm[:bankDim] 24 | Znorm = Xnorm[bankDim:] 25 | Zuni = Z / Znorm.reshape(Z.shape[0], 1) 26 | Auni = A / Anorm.reshape(bankDim, 1) 27 | self.Y = np.inner(Zuni, Auni) 28 | self.A = A 29 | self.Z = Z 30 | self.Anorm = Anorm 31 | self.Znorm = Znorm 32 | self.Auni = Auni 33 | self.Zuni = Zuni 34 | return self.Y 35 | 36 | def backward(self, dEdY): 37 | # For now, output gradient towards the vector bank. 38 | self.dEdW = 0 39 | Z = self.Z 40 | A = self.A 41 | Anorm = self.Anorm 42 | Znorm = self.Znorm 43 | Auni = self.Auni 44 | Zuni = self.Zuni 45 | 46 | V = np.dot(dEdY, Auni) 47 | dEdZ = np.sum(V * Z, axis=-1).reshape(Z.shape[0], 1) * \ 48 | (-Z / (Znorm ** 3).reshape(Z.shape[0], 1)) + \ 49 | V / Znorm.reshape(Z.shape[0], 1) 50 | 51 | U = np.dot(dEdY.transpose(), Zuni) 52 | dEdA = np.sum(U * A, axis=-1).reshape(A.shape[0], 1) * \ 53 | (-A / (Anorm ** 3).reshape(A.shape[0], 1)) + \ 54 | U / Anorm.reshape(A.shape[0], 1) 55 | 56 | dEdX = np.concatenate((dEdA, dEdZ), axis=0) 57 | return dEdX -------------------------------------------------------------------------------- /src/imageqa_crosstest.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import numpy as np 4 | import imageqa_test as it 5 | import prep 6 | import nn 7 | 8 | def reindexDataset( 9 | srcQuestions, 10 | srcAnswers, 11 | srcQuestionIdict, 12 | dstQuestionDict, 13 | srcAnsIdict, 14 | dstAnsDict): 15 | dstQuestions = np.zeros(srcQuestions.shape, dtype='int') 16 | dstAnswers = np.zeros(srcAnswers.shape, dtype='int') 17 | for n in range(srcQuestions.shape[0]): 18 | dstQuestions[n, 0, 0] = srcQuestions[n, 0, 0] 19 | for t in range(1, srcQuestions.shape[1]): 20 | word = srcQuestionIdict[srcQuestions[n, t, 0] - 1] 21 | if dstQuestionDict.has_key(word): 22 | dstQuestions[n, t, 0] = dstQuestionDict[word] 23 | else: 24 | dstQuestions[n, t, 0] = dstQuestionDict['UNK'] 25 | word = srcAnsIdict[srcAnswers[n, 0]] 26 | if dstAnsDict.has_key(word): 27 | dstAnswers[n, 0] = dstAnsDict[word] 28 | else: 29 | dstAnswers[n, 0] = dstAnsDict['UNK'] 30 | return dstQuestions, dstAnswers 31 | 32 | if __name__ == '__main__': 33 | """ 34 | Usage: python imageqa_crosstest.py 35 | -m[odel] {model id} 36 | -d[ata] {model data folder} 37 | -td[ata] {test data folder} 38 | [-reindex {whether to reindex the test data, default false}] 39 | [-r[esults] {results folder}] 40 | [-dataset {cocoqa/daquar, default cocoqa}] 41 | """ 42 | resultsFolder = '../results' 43 | needReindex = False 44 | dataset = 'cocoqa' 45 | for i, flag in enumerate(sys.argv): 46 | if flag == '-m' or flag == '-model': 47 | modelId = sys.argv[i + 1] 48 | elif flag == '-d' or flag == '-data': 49 | dataFolder = sys.argv[i + 1] 50 | elif flag == '-td' or flag == '-tdata': 51 | testDataFolder = sys.argv[i + 1] 52 | elif flag == '-reindex': 53 | needReindex = True 54 | elif flag == '-r' or flag == '-results': 55 | resultsFolder = sys.argv[i + 1] 56 | elif flag == '-dataset': 57 | dataset = sys.argv[i + 1] 58 | 59 | model = it.loadModel(modelId, resultsFolder) 60 | data = it.loadDataset(dataFolder) 61 | testdata = it.loadDataset(testDataFolder) 62 | if needReindex: 63 | testQuestions, testAnswers = reindexDataset( 64 | testdata['testData'][0], 65 | testdata['testData'][1], 66 | testdata['questionIdict'], 67 | data['questionDict'], 68 | testdata['ansIdict'], 69 | data['ansDict']) 70 | else: 71 | testQuestions = testdata['testData'][0] 72 | testAnswers = testdata['testData'][1] 73 | outputTest = nn.test(model, testQuestions) 74 | rate, correct, total = nn.calcRate(model, outputTest, testAnswers) 75 | print 'rate: %.4f' % rate 76 | 77 | -------------------------------------------------------------------------------- /src/nn/ordinal.py: -------------------------------------------------------------------------------- 1 | from stage import * 2 | import numpy as np 3 | 4 | 5 | class OrdinalRegression(Stage): 6 | def __init__( 7 | self, 8 | outputDim, 9 | fixExtreme=True, 10 | inputNames=None, 11 | name=None, 12 | outputdEdX=True, 13 | learningRate=0.0, 14 | learningRateAnnealConst=0.0, 15 | momentum=0.0, 16 | deltaMomentum=0.0, 17 | weightClip=0.0, 18 | gradientClip=0.0, 19 | weightRegConst=0.0): 20 | Stage.__init__( 21 | self, 22 | name=name, 23 | inputNames=inputNames, 24 | outputDim=outputDim, 25 | outputdEdX=outputdEdX, 26 | learningRate=learningRate, 27 | learningRateAnnealConst=learningRateAnnealConst, 28 | momentum=momentum, 29 | deltaMomentum=deltaMomentum, 30 | weightClip=weightClip, 31 | gradientClip=gradientClip, 32 | weightRegConst=weightRegConst) 33 | # Uniform initialization 34 | # mu_0 = -1 35 | # mu_(n-1) = 1 36 | # mu_i = -1 + 2 * (i / n) 37 | self.fixExtreme = fixExtreme 38 | mu = np.linspace(-1, 1, self.outputDim) 39 | # pi_i = 1/n 40 | pi = np.zeros(self.outputDim) + np.log(1 / float(self.outputDim)) 41 | self.W = np.zeros((2, self.outputDim)) 42 | self.W[0] = mu 43 | self.W[1] = pi 44 | 45 | def forward(self, X): 46 | mu = self.W[0].reshape(1, self.W.shape[1]) 47 | pi = self.W[1].reshape(1, self.W.shape[1]) 48 | self.Xshape = X.shape 49 | X = X.reshape(X.shape[0], 1) 50 | self.X = X 51 | Z = np.exp(mu * X + (pi - np.power(mu, 2) / 2)) 52 | Y = Z / np.sum(Z, axis=-1).reshape(X.shape[0], 1) 53 | self.Z = Z 54 | self.Y = Y 55 | return Y 56 | 57 | def backward(self, dEdY): 58 | # Here we ignore the dEdY because this is always the last layer... 59 | target = dEdY != 0.0 60 | targetInt = target.astype('int') 61 | targetIdx = np.nonzero(target)[1] 62 | mu = self.W[0] 63 | pi = self.W[1] 64 | dEdX = (-mu[targetIdx] + np.dot(self.Y, mu)) / float(self.X.shape[0]) 65 | dEdX = dEdX.reshape(self.Xshape) 66 | dEdMu = np.mean( 67 | (self.X - mu) * 68 | (self.Y - targetInt), axis=0) 69 | # Fix extreme mu's 70 | if self.fixExtreme: 71 | dEdMu[0] = 0 72 | dEdMu[-1] = 0 73 | dEdPi = np.mean(self.Y - targetInt, axis=0) 74 | self.dEdW = np.zeros(self.W.shape) 75 | self.dEdW[0] = dEdMu 76 | self.dEdW[1] = dEdPi 77 | return dEdX 78 | 79 | def updateLearningParams(self, numEpoch): 80 | Stage.updateLearningParams(self, numEpoch) 81 | print 'mu:', 82 | for i in range(self.W.shape[-1]): 83 | print '%.3f' % self.W[0, i], 84 | print 'pi:', 85 | for i in range(self.W.shape[-1]): 86 | print '%.3f' % self.W[1, i], 87 | print 88 | 89 | -------------------------------------------------------------------------------- /src/imageqa_layout.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import nn 4 | import numpy as np 5 | import imageqa_test as it 6 | import imageqa_visprior as ip 7 | import imageqa_ensemble as ie 8 | import imageqa_render as ir 9 | 10 | def parseInputFile(filename): 11 | caption = '' 12 | selIds = [] 13 | selComments = [] 14 | with open(filename) as f: 15 | i = 0 16 | for line in f: 17 | if i == 0 and line.startswith('caption:'): 18 | caption = line[8:-1] 19 | else: 20 | parts = line.split(',') 21 | selIds.append(int(parts[0])) 22 | if len(parts) > 1: 23 | selComments.append(parts[1][:-1]) 24 | else: 25 | selComments.append('') 26 | i += 1 27 | return caption, selIds, selComments 28 | 29 | if __name__ == '__main__': 30 | """ 31 | Render a selection of examples into LaTeX. 32 | Usage: python imageqa_layout.py 33 | -m[odel] {name1:modelId1} 34 | -m[odel] {name2:modelId2} 35 | -em[odel] {name3:ensembleModelId3,ensembleModelId4,...} 36 | -pem[odel] {name4:ensembleModelId5,ensembleModelId6,...} 37 | ... 38 | -d[ata] {dataFolder} 39 | -i[nput] {listFile} 40 | -o[utput] {outputFolder} 41 | [-k {top K answers}] 42 | [-p[icture] {pictureFolder}] 43 | [-f[ile] {outputFilename}] 44 | [-daquar/-cocoqa] 45 | Input file format: 46 | QID1[,Comment1] 47 | QID2[,Comment2] 48 | ... 49 | """ 50 | params = ir.parseComparativeParams(sys.argv) 51 | 52 | urlDict = ir.loadImgUrl(params['dataset'], params['dataFolder']) 53 | data = it.loadDataset(params['dataFolder']) 54 | 55 | print('Parsing input file...') 56 | caption, selIds, selComments = parseInputFile(params['inputFile']) 57 | 58 | print('Running models...') 59 | idx = np.array(selIds, dtype='int') 60 | inputTestSel = inputTest[idx] 61 | targetTestSel = targetTest[idx] 62 | inputTest = data['testData'][0] 63 | questionTypeArray = data['questionTypeArray'] 64 | modelOutputs = ie.runAllModels( 65 | inputTestSel, 66 | questionTypeArray[idx], 67 | params['models'], 68 | params['resultsFolder'], 69 | params['dataset'], 70 | params['dataFolder']): 71 | 72 | # Render 73 | print('Rendering LaTeX...') 74 | 75 | # Replace escape char 76 | data['questionIdict'] = ir.escapeLatexIdict(data['questionIdict']) 77 | data['ansIdict'] = ir.escapeLatexIdict(data['ansIdict']) 78 | 79 | if not os.path.exists(outputFolder): 80 | os.makedirs(outputFolder) 81 | ir.renderLatex( 82 | inputTestSel, 83 | targetTestSel, 84 | data['questionIdict'], 85 | data['ansIdict'], 86 | urlDict, 87 | topK=params['topK'], 88 | outputFolder=params['outputFolder'], 89 | pictureFolder=params['pictureFolder'], 90 | comments=selComments, 91 | caption=caption, 92 | modelOutputs=modelOutputs, 93 | modelNames=ir.getModelNames(params['models']), 94 | questionIds=idx, 95 | filename=params['outputFilename'] + '.tex') -------------------------------------------------------------------------------- /src/calculate_wups.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Mateusz Malinowski 4 | # mmalinow@mpi-inf.mpg.de 5 | # 6 | # Modified by Mengye Ren 7 | # mren@cs.toronto.edu 8 | # 9 | # This evaluation code will only work for single word answers. 10 | 11 | # it assumes there are two files 12 | # - first file with ground truth answers 13 | # - second file with predicted answers 14 | # both answers are line-aligned 15 | 16 | import sys 17 | import re 18 | 19 | from numpy import prod 20 | from nltk.corpus import wordnet as wn 21 | 22 | word_pair_dict = {} 23 | 24 | def file2list(filepath): 25 | with open(filepath,'r') as f: 26 | lines = [k for k in 27 | [k.strip() for k in f.readlines()] 28 | if len(k) > 0] 29 | return lines 30 | 31 | def dirac_measure(a, b): 32 | """ 33 | Returns 1 iff a = b and 0 otherwise. 34 | """ 35 | return float(a == b) 36 | 37 | 38 | def wup_measure(a, b, similarity_threshold = 0.925, debug = False): 39 | """ 40 | Returns Wu-Palmer similarity score. 41 | More specifically, it computes: 42 | max_{x \in interp(a)} max_{y \in interp(b)} wup(x,y) 43 | where interp is a 'interpretation field' 44 | """ 45 | if debug: print 'Original', a, b 46 | if word_pair_dict.has_key(a+','+b): 47 | return word_pair_dict[a+','+b] 48 | 49 | def get_semantic_field(a): 50 | return wn.synsets(a, pos=wn.NOUN) 51 | 52 | if a == b: return 1.0 53 | 54 | interp_a = get_semantic_field(a) 55 | interp_b = get_semantic_field(b) 56 | if debug: print(interp_a) 57 | 58 | if interp_a == [] or interp_b == []: 59 | return 0.0 60 | 61 | if debug: print 'Stem', a, b 62 | global_max=0.0 63 | for x in interp_a: 64 | for y in interp_b: 65 | local_score=x.wup_similarity(y) 66 | if debug: print 'Local', local_score 67 | if local_score > global_max: 68 | global_max=local_score 69 | if debug: print 'Global', global_max 70 | 71 | # we need to use the semantic fields and therefore we downweight 72 | # unless the score is high which indicates both are synonyms 73 | if global_max < similarity_threshold: 74 | interp_weight = 0.1 75 | else: 76 | interp_weight = 1.0 77 | 78 | final_score = global_max * interp_weight 79 | word_pair_dict[a+','+b] = final_score 80 | return final_score 81 | 82 | def runAll(gt_filepath, pred_filepath, thresh): 83 | global word_pair_dict 84 | word_pair_dict = {} 85 | input_gt=file2list(gt_filepath) 86 | input_pred=file2list(pred_filepath) 87 | 88 | if thresh == -1: 89 | measure = dirac_measure 90 | else: 91 | measure = lambda x, y: wup_measure(x, y, thresh) 92 | 93 | if thresh == -1: 94 | print 'standard Accuracy is used' 95 | else: 96 | print 'soft WUPS is used' 97 | score_list = [measure(ta, pa) for (ta, pa) in zip(input_gt, input_pred)] 98 | final_score = sum(map( 99 | lambda x: float(x) / float(len(score_list)), score_list)) 100 | 101 | print 'final score:', final_score 102 | return final_score 103 | 104 | 105 | if __name__ == '__main__': 106 | if len(sys.argv) < 4: 107 | print 'Usage: true answers file, predicted answers file, threshold' 108 | print 'If threshold is -1, then the standard Accuracy is used' 109 | sys.exit("3 arguments must be given") 110 | gt_filepath=sys.argv[1] 111 | pred_filepath=sys.argv[2] 112 | thresh=float(sys.argv[3]) 113 | runAll(gt_filepath, pred_filepath, thresh) 114 | -------------------------------------------------------------------------------- /models/vis_lstm.model.yml: -------------------------------------------------------------------------------- 1 | name: 'vis_lstm' 2 | costFn: 'crossEntIdx' 3 | decisionFn: 'argmax' 4 | stages: 5 | - 'imgSel' 6 | - 'imgUnfold' 7 | - 'imgFeat' 8 | - 'imgFeatNorm' 9 | - 'imgMap' 10 | - 'imgFold' 11 | - 'txtSel' 12 | - 'txtUnfold' 13 | - 'txtDict' 14 | - 'txtFold' 15 | - 'concat' 16 | - 'dropout' 17 | - 'lstm' 18 | - 'softmax' 19 | outputs: 'softmax' 20 | specs: 21 | - name: 'imgSel' 22 | type: 'selector' 23 | inputs: 'input' 24 | start: 0 25 | end: 1 26 | axis: 1 27 | - name: 'txtSel' 28 | type: 'selector' 29 | inputs: 'input' 30 | start: 1 31 | end: 56 32 | axis: 1 33 | - name: 'imgUnfold' 34 | type: 'timeUnfold' 35 | inputs: 'imgSel' 36 | - name: 'imgFeat' 37 | type: 'lut' 38 | inputs: 'imgUnfold' 39 | inputDim: 123288 40 | outputDim: 4096 41 | initWeights: '../data/hidden_oxford_mscoco.h5' 42 | sparse: true 43 | format: 'h5' 44 | h5key: 'hidden7' 45 | learningRate: 0.0 46 | outputdEdX: false 47 | - name: 'imgFeatNorm' 48 | type: 'normalize' 49 | inputs: 'imgFeat' 50 | mean: '../data/hidden_oxford_mscoco.h5' 51 | meanKey: 'hidden7_mean' 52 | std: '../data/hidden_oxford_mscoco.h5' 53 | stdKey: 'hidden7_std' 54 | format: 'h5' 55 | outputDim: 4096 56 | - name: 'imgMap' 57 | type: 'map' 58 | inputs: 'imgFeatNorm' 59 | activeFn: 'identity' 60 | bias: false 61 | outputDim: 500 62 | initRange: 0.05 63 | initSeed: 1 64 | learningRate: 0.8 65 | momentum: 0.9 66 | gradientClip: 0.1 67 | weightClip: 100.0 68 | outputdEdX: false 69 | - name: 'imgFold' 70 | type: 'timeFold' 71 | inputs: 'imgMap' 72 | timespan: 1 73 | - name: 'txtUnfold' 74 | type: 'timeUnfold' 75 | inputs: 'txtSel' 76 | - name: 'txtDict' 77 | type: 'lut' 78 | intConversion: true 79 | inputs: 'txtUnfold' 80 | inputDim: 9738 81 | outputDim: 500 82 | initRange: 1.0 83 | initSeed: 2 84 | learningRate: 0.8 85 | momentum: 0.9 86 | gradientClip: 0.1 87 | weightClip: 1000.0 88 | outputdEdX: false 89 | - name: 'txtFold' 90 | type: 'timeFold' 91 | inputs: 'txtDict' 92 | timespan: 55 93 | - name: 'concat' 94 | type: 'concat' 95 | inputs: 'imgFold, txtFold' 96 | axis: 1 97 | - name: 'dropout' 98 | type: 'dropout' 99 | inputs: 'concat' 100 | dropoutRate: 0.2 101 | initSeed: 3 102 | outputDim: 500 103 | - name: 'lstm' 104 | type: 'lstm' 105 | inputs: 'dropout' 106 | inputDim: 500 107 | outputDim: 300 108 | timespan: 56 109 | initRange: 0.1 110 | initSeed: 4 111 | cutOffZeroEnd: true 112 | multiErr: false 113 | learningRate: 0.8 114 | learningRateAnnealConst: 0.0 115 | momentum: 0.9 116 | gradientClip: 0.1 117 | weightClip: 100.0 118 | weightRegConst: 0.00005 119 | - name: 'softmax' 120 | type: 'map' 121 | inputs: 'lstm' 122 | activeFn: 'softmax' 123 | outputDim: 431 124 | initRange: 0.1 125 | initSeed: 7 126 | learningRate: 0.01 127 | learningRateAnnealConst: 0.0 128 | momentum: 0.9 129 | gradientClip: 0.1 130 | weightClip: 15.0 131 | weightRegConst: 0.00005 132 | -------------------------------------------------------------------------------- /src/nn/lut.py: -------------------------------------------------------------------------------- 1 | from stage import * 2 | import os 3 | use_gpu = os.environ.get('GNUMPY_USE_GPU', 'yes') == 'yes' 4 | 5 | class LUT(Stage): 6 | """ 7 | Look-up table. 8 | WARNING: this implementation of LUT is index 1-based. 9 | 0 will mean an all-zero entry. 10 | The first row of the weight matrix is one. 11 | """ 12 | def __init__(self, 13 | inputNames, 14 | inputDim, 15 | outputDim, 16 | lazyInit=True, 17 | initRange=1.0, 18 | initSeed=2, 19 | intConversion=False, 20 | needInit=True, 21 | initWeights=0, 22 | sparse=False, 23 | learningRate=0.0, 24 | learningRateAnnealConst=0.0, 25 | momentum=0.0, 26 | deltaMomentum=0.0, 27 | weightClip=0.0, 28 | gradientClip=0.0, 29 | weightRegConst=0.0, 30 | outputdEdX=False, 31 | name=None): 32 | Stage.__init__(self, 33 | name=name, 34 | inputNames=inputNames, 35 | learningRate=learningRate, 36 | outputDim=outputDim, 37 | learningRateAnnealConst=learningRateAnnealConst, 38 | momentum=momentum, 39 | deltaMomentum=deltaMomentum, 40 | weightClip=weightClip, 41 | gradientClip=gradientClip, 42 | weightRegConst=weightRegConst, 43 | gpu=False, 44 | outputdEdX=outputdEdX) 45 | self.outputDim = outputDim 46 | self.inputDim = inputDim 47 | self.initRange = initRange 48 | self.random = np.random.RandomState(initSeed) 49 | self.needInit = needInit 50 | self.intConversion = intConversion 51 | 52 | # Zeroth rows of the weight matrix is reserved 53 | # for empty word at the end of a sentence. 54 | if needInit: 55 | if lazyInit: 56 | self.W = None 57 | else: 58 | self.initWeights() 59 | else: 60 | self.W = initWeights 61 | if use_gpu and self.W.dtype != np.float32: 62 | self.W = self.W.astype('float32') 63 | self.X = 0 64 | self.Y = 0 65 | self.sparse = sparse 66 | self.dEdW = 0.0 67 | 68 | def initWeights(self): 69 | # print self.name 70 | self.W = self.random.uniform( 71 | -self.initRange/2.0, self.initRange/2.0, 72 | (self.inputDim, self.outputDim)) 73 | if use_gpu and self.W.dtype != np.float32: 74 | self.W = self.W.astype('float32') 75 | 76 | def forward(self, X): 77 | if self.W is None: self.initWeights() 78 | if self.intConversion: X = X.astype(int) 79 | X = X.reshape(X.size) 80 | self.X = X 81 | Y = np.zeros((X.shape[0], self.outputDim), self.W.dtype) 82 | for n in range(0, X.shape[0]): 83 | if self.sparse: 84 | if X[n] != 0: 85 | Y[n] = self.W[X[n] - 1].todense() 86 | else: 87 | if X[n] != 0: 88 | Y[n] = self.W[X[n] - 1] 89 | return Y 90 | 91 | def backward(self, dEdY): 92 | X = self.X 93 | if self.learningRate > 0.0: 94 | self.dEdW = np.zeros(self.W.shape, self.W.dtype) 95 | for n in range(0, X.shape[0]): 96 | if X[n] != 0: 97 | self.dEdW[X[n] - 1] += dEdY[n] 98 | if self.outputdEdX: 99 | return np.zeros(X.shape) 100 | else: 101 | return None 102 | 103 | def loadWeights(self, W): 104 | if self.learningRate == 0.0: 105 | return 106 | else: 107 | Stage.loadWeights(self, W) 108 | 109 | def getWeights(self): 110 | if self.learningRate == 0.0: 111 | return 0 112 | else: 113 | return self.W 114 | -------------------------------------------------------------------------------- /src/nn/sum_prod.py: -------------------------------------------------------------------------------- 1 | from stage import * 2 | #use_gpu = os.environ.get('GNUMPY_USE_GPU', 'yes') == 'yes' 3 | use_gpu = False 4 | if use_gpu: 5 | import gnumpy as gpu 6 | import gnumpy as gnp 7 | 8 | class SumProduct(Stage): 9 | def __init__(self, 10 | name, 11 | inputNames, 12 | sumAxis, 13 | outputDim, 14 | gpu=use_gpu, 15 | beta=1.0): 16 | Stage.__init__(self, 17 | name=name, 18 | inputNames=inputNames, 19 | gpu=gpu, 20 | outputDim=outputDim) 21 | self.sumAxis = sumAxis 22 | self.beta = beta 23 | 24 | def getInput(self): 25 | # Assume that the input size is always 2 26 | # Rewrite get input logic into two separate arrays 27 | if len(self.inputs) == 2: 28 | return [self.inputs[0].Y, self.inputs[1].Y] 29 | elif len(self.inputs) == 3: 30 | return [self.inputs[0].Y, self.inputs[1].Y, self.inputs[2].Y] 31 | 32 | def sendError(self, dEdX): 33 | self.inputs[0].dEdY += dEdX[0] 34 | self.inputs[0].receivedError = True 35 | self.inputs[1].dEdY += dEdX[1] 36 | self.inputs[1].receivedError = True 37 | if len(self.inputs) == 3: 38 | self.inputs[2].dEdY += dEdX[2] 39 | self.inputs[2].receivedError = True 40 | 41 | def forward(self, X): 42 | if self.gpu: 43 | self.X = [] 44 | self.X.append(gpu.as_garray(X[0].astype('float32'))) 45 | self.X.append(gpu.as_garray(X[1].astype('float32'))) 46 | if len(X) == 2: 47 | Y = self.beta * gpu.sum(self.X[0] * self.X[1], axis=self.sumAxis) 48 | elif len(X) == 3: 49 | self.X.append(gpu.as_garray(X[2].astype('float32'))) 50 | self.Z = gpu.sum(self.X[0] * self.X[1], axis=self.sumAxis) 51 | Y = self.X[2] * self.Z 52 | Y = Y.as_numpy_array(dtype='float32') 53 | else: 54 | self.X = X 55 | if len(self.X) == 2: 56 | Y = self.beta * np.sum(self.X[0] * self.X[1], axis=self.sumAxis) 57 | elif len(self.X) == 3: 58 | self.Z = np.sum(self.X[0] * self.X[1], axis=self.sumAxis) 59 | Y = self.X[2] * self.Z 60 | return Y 61 | 62 | def backward(self, dEdY): 63 | # Need to generalize, but now, let's assume it's the attention model. 64 | dEdX = [] 65 | if self.gpu: 66 | if len(self.X) == 2: 67 | dEdY = dEdY.reshape(dEdY.shape[0], 1, dEdY.shape[1]) 68 | dEdY = gpu.as_garray(dEdY) 69 | dEdX1 = self.beta * gpu.sum(dEdY * self.X[1], axis=2) 70 | dEdX2 = self.beta * dEdY * self.X[0] 71 | dEdX.append(dEdX1.as_numpy_array(dtype='float32')) 72 | dEdX.append(dEdX2.as_numpy_array(dtype='float32')) 73 | elif len(self.X) == 3: 74 | dEdY = gpu.as_garray(dEdY) 75 | dEdY2 = dEdY.reshape(dEdY.shape[0], 1, dEdY.shape[1]) 76 | dEdY2 = gpu.as_garray(dEdY2) 77 | dEdX1 = self.X[2] * gpu.sum(dEdY2 * self.X[1], axis=2) 78 | dEdX2 = self.X[2].reshape(self.X[2].shape[0], 1, 1) * dEdY2 * self.X[0] 79 | dEdX3 = gpu.sum(dEdY * self.Z, axis=-1).reshape(self.X[2].shape[0], 1) 80 | dEdX.append(dEdX1.as_numpy_array(dtype='float32')) 81 | dEdX.append(dEdX2.as_numpy_array(dtype='float32')) 82 | dEdX.append(dEdX3.as_numpy_array(dtype='float32')) 83 | else: 84 | if len(self.X) == 2: 85 | dEdY = dEdY.reshape(dEdY.shape[0], 1, dEdY.shape[1]) 86 | dEdX.append(self.beta * np.sum(dEdY * self.X[1], axis=2)) 87 | dEdX.append(self.beta * dEdY * self.X[0]) 88 | elif len(self.X) == 3: 89 | dEdY2 = dEdY.reshape(dEdY.shape[0], 1, dEdY.shape[1]) 90 | dEdX.append(self.X[2] * np.sum(dEdY2 * self.X[1], axis=2)) 91 | dEdX.append(self.X[2].reshape(self.X[2].shape[0], 1, 1) * dEdY2 * self.X[0]) 92 | dEdX.append(np.sum(dEdY * self.Z, axis=-1).reshape(self.X[2].shape[0], 1)) 93 | return dEdX 94 | -------------------------------------------------------------------------------- /src/nn/sequential_tests.py: -------------------------------------------------------------------------------- 1 | from sequential import * 2 | from lstm_old import * 3 | from map import * 4 | from dropout import * 5 | from reshape import * 6 | from lut import * 7 | from active_func import * 8 | import unittest 9 | 10 | class Sequential_Tests(unittest.TestCase): 11 | """Sequential stacks of stages tests""" 12 | def setUp(self): 13 | random = np.random.RandomState(2) 14 | self.trainInput = random.uniform(0, 10, (5, 5, 1)).astype(int) 15 | self.trainTarget = random.uniform(0, 1, (5, 1)).astype(int) 16 | 17 | def test_grad(self): 18 | wordEmbed = np.random.rand(np.max(self.trainInput), 5) 19 | timespan = self.trainInput.shape[1] 20 | time_unfold = TimeUnfold() 21 | 22 | lut = LUT( 23 | inputDim=np.max(self.trainInput)+1, 24 | outputDim=5, 25 | inputNames=None, 26 | needInit=False, 27 | initWeights=wordEmbed 28 | ) 29 | 30 | m = Map( 31 | outputDim=5, 32 | activeFn=IdentityActiveFn(), 33 | inputNames=None, 34 | initRange=0.1, 35 | initSeed=1, 36 | ) 37 | 38 | time_fold = TimeFold( 39 | timespan=timespan 40 | ) 41 | 42 | lstm = LSTM_Old( 43 | inputDim=5, 44 | outputDim=5, 45 | initRange=.1, 46 | initSeed=3, 47 | cutOffZeroEnd=True, 48 | multiErr=True 49 | ) 50 | 51 | dropout = Dropout( 52 | name='d1', 53 | dropoutRate=0.5, 54 | inputNames=None, 55 | outputDim=5, 56 | initSeed=2, 57 | debug=True 58 | ) 59 | 60 | lstm_second = LSTM_Old( 61 | inputDim=5, 62 | outputDim=5, 63 | initRange=.1, 64 | initSeed=3, 65 | cutOffZeroEnd=True, 66 | multiErr=False 67 | ) 68 | 69 | soft = Map( 70 | outputDim=2, 71 | activeFn=SoftmaxActiveFn, 72 | initRange=0.1, 73 | initSeed=5 74 | ) 75 | 76 | self.model = Sequential( 77 | stages=[ 78 | time_unfold, 79 | lut, 80 | m, 81 | time_fold, 82 | lstm, 83 | dropout, 84 | lstm_second, 85 | soft 86 | ]) 87 | self.hasDropout = True 88 | costFn = crossEntIdx 89 | output = self.model.forward(self.trainInput, dropout=self.hasDropout) 90 | E, dEdY = costFn(output, self.trainTarget) 91 | dEdX = self.model.backward(dEdY) 92 | self.chkgrd(soft.dEdW, self.evaluateGrad(soft.getWeights(), costFn)) 93 | #self.chkgrd(lstm_second.dEdW, self.evaluateGrad(lstm_second.getWeights(), costFn)) 94 | #self.chkgrd(lstm.dEdW, self.evaluateGrad(lstm.getWeights(), costFn)) 95 | self.chkgrd(m.dEdW, self.evaluateGrad(m.getWeights(), costFn)) 96 | 97 | def chkgrd(self, dE, dETmp): 98 | #print dE/dETmp 99 | dE = dE.reshape(dE.size) 100 | dETmp = dETmp.reshape(dE.size) 101 | tolerance = 5e-1 102 | for i in range(dE.size): 103 | self.assertTrue( 104 | (dE[i] == 0 and dETmp[i] == 0) or 105 | (np.abs(dE[i] / dETmp[i] - 1) < tolerance)) 106 | 107 | def evaluateGrad(self, W, costFn): 108 | eps = 1 109 | dEdW = np.zeros(W.shape) 110 | for i in range(W.shape[0]): 111 | for j in range(W.shape[1]): 112 | W[i,j] += eps 113 | output = self.model.forward(self.trainInput, dropout=self.hasDropout) 114 | Etmp1, d1 = costFn(output, self.trainTarget) 115 | 116 | W[i,j] -= 2 * eps 117 | output = self.model.forward(self.trainInput, dropout=self.hasDropout) 118 | Etmp2, d2 = costFn(output, self.trainTarget) 119 | 120 | dEdW[i,j] = (Etmp1 - Etmp2) / 2.0 / eps 121 | W[i,j] += eps 122 | return dEdW 123 | 124 | if __name__ == '__main__': 125 | suite = unittest.TestSuite() 126 | suite.addTests( 127 | unittest.TestLoader().loadTestsFromTestCase(Sequential_Tests)) 128 | unittest.TextTestRunner(verbosity=2).run(suite) 129 | -------------------------------------------------------------------------------- /src/nn/conv1d.py: -------------------------------------------------------------------------------- 1 | import os 2 | use_gpu = os.environ.get('GNUMPY_USE_GPU', 'yes') == 'yes' 3 | if use_gpu: 4 | import gnumpy as gpu 5 | import gnumpy as gnp 6 | from stage import * 7 | 8 | class Conv1D(Stage): 9 | """ 10 | 1D temporal convolution. 11 | No padding, stride=1. 12 | """ 13 | def __init__(self, 14 | numChannels, 15 | windowSize, 16 | numFilters, 17 | inputNames=None, 18 | initRange=1.0, 19 | initSeed=2, 20 | needInit=True, 21 | initWeights=None, 22 | learningRate=0.0, 23 | learningRateAnnealConst=0.0, 24 | momentum=0.0, 25 | deltaMomentum=0.0, 26 | weightClip=0.0, 27 | gradientClip=0.0, 28 | weightRegConst=0.0, 29 | defaultValue=0.0, 30 | outputdEdX=True, 31 | gpu=use_gpu, 32 | name=None): 33 | Stage.__init__(self, 34 | name=name, 35 | inputNames=inputNames, 36 | outputDim=numFilters, 37 | defaultValue=defaultValue, 38 | learningRate=learningRate, 39 | learningRateAnnealConst=learningRateAnnealConst, 40 | momentum=momentum, 41 | deltaMomentum=deltaMomentum, 42 | weightClip=weightClip, 43 | gradientClip=gradientClip, 44 | weightRegConst=weightRegConst, 45 | gpu=gpu, 46 | outputdEdX=outputdEdX) 47 | self.numFilters = numFilters 48 | self.numChannels = numChannels 49 | self.windowSize = windowSize 50 | self.random = np.random.RandomState(initSeed) 51 | if needInit: 52 | self.W = self.random.uniform(-initRange/2.0, initRange/2.0, 53 | (self.windowSize * self.numChannels, self.numFilters)) 54 | else: 55 | self.W = initWeights 56 | if self.gpu: 57 | self.W = gnp.as_garray(self.W.astype('float32')) 58 | self.X = 0 59 | self.Y = 0 60 | 61 | def forward(self, X): 62 | self.X = X 63 | # Num of examples 64 | N = X.shape[0] 65 | # Timespan 66 | T = X.shape[1] 67 | # Windows size 68 | S = self.windowSize 69 | # Channels 70 | D = self.numChannels 71 | # Num filters 72 | F = self.numFilters 73 | Z = np.zeros((N, T - S + 1, S, D), X.dtype) 74 | for i in range(T - S + 1): 75 | Z[:, i, :, :] = X[:, i : i + S, :] 76 | Z = Z.reshape(N * (T - S + 1), S * D) 77 | if self.gpu: 78 | Z = gpu.as_garray(Z.astype('float32')) 79 | Y = gpu.dot(Z, self.W) 80 | Y = gpu.as_numpy_array(Y) 81 | else: 82 | Y = np.dot(Z, self.W) 83 | 84 | Y = Y.reshape(N, T - S + 1, F) 85 | self.Z = Z 86 | return Y 87 | 88 | def backward(self, dEdY): 89 | N = dEdY.shape[0] 90 | S = self.windowSize 91 | T = dEdY.shape[1] + S - 1 92 | F = dEdY.shape[2] 93 | D = self.X.shape[2] 94 | dEdY = dEdY.reshape(N * (T - S + 1), F) 95 | dEdX = np.zeros(self.X.shape, self.X.dtype) 96 | 97 | if self.gpu: 98 | gdEdY = gpu.as_garray(dEdY.astype('float32')) 99 | self.dEdW = gpu.dot(self.Z.transpose(), gdEdY) 100 | else: 101 | self.dEdW = np.dot(self.Z.transpose(), dEdY) 102 | 103 | if self.outputdEdX: 104 | if self.gpu: 105 | gdEdZ = gpu.dot(gdEdY, self.W.transpose()) 106 | dEdZ = gpu.as_numpy_array(gdEdZ) 107 | else: 108 | dEdZ = np.dot(dEdY, self.W.transpose()) 109 | 110 | dEdZ = dEdZ.reshape(N, T - S + 1, S, D) 111 | for t in range(0, T): 112 | if t <= S - 1: 113 | dEdX[:, t, :] = np.sum(dEdZ[:, range(0, t + 1), range(t, -1, -1), :], axis=1) 114 | elif t >= T - S + 1: 115 | dEdX[:, t, :] = np.sum(dEdZ[:, range(t - S + 1, T - S + 1), range(S - 1, S - (T - t) - 1, -1), :], axis=1) 116 | else: 117 | dEdX[:, t, :] = np.sum(dEdZ[:, range(t - S + 1, t + 1), range(S - 1, -1, -1), :], axis=1) 118 | return dEdX 119 | -------------------------------------------------------------------------------- /src/imageqa_modelavg.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | import numpy as np 5 | 6 | import imageqa_test as it 7 | import nn 8 | 9 | def runAvgAll(models, data): 10 | print 'Running model %s' % modelId 11 | modelOutput = nn.test(model, data['testData'][0]) 12 | modelOutputs.append(modelOutput) 13 | finalOutput = np.zeros(modelOutputs[0].shape) 14 | for output in modelOutputs: 15 | shape0 = min(finalOutput.shape[0], output.shape[0]) 16 | shape1 = min(finalOutput.shape[1], output.shape[1]) 17 | finalOutput[:shape0, :shape1] += output[:shape0, :shape1] / float(len(modelOutputs)) 18 | return finalOutput 19 | 20 | def testAvgAll(modelOutputs, mixRatio, data, outputFolder): 21 | # finalOutput = mixRatio * modelOutputs[0] + \ 22 | # (1 - mixRatio) * modelOutputs[1] 23 | finalOutput = np.zeros(modelOutputs[0].shape) 24 | for output in modelOutputs: 25 | shape0 = min(finalOutput.shape[0], output.shape[0]) 26 | shape1 = min(finalOutput.shape[1], output.shape[1]) 27 | finalOutput[:shape0, :shape1] += output[:shape0, :shape1] / float(len(modelOutputs)) 28 | testAnswerFile = it.getAnswerFilename(outputFolder, resultsFolder) 29 | testTruthFile = it.getTruthFilename(outputFolder, resultsFolder) 30 | resultsRank, \ 31 | resultsCategory, \ 32 | resultsWups = it.runAllMetrics( 33 | data['testData'][0], 34 | finalOutput, 35 | data['testData'][1], 36 | data['ansIdict'], 37 | data['questionTypeArray'], 38 | testAnswerFile, 39 | testTruthFile) 40 | it.writeMetricsToFile( 41 | outputFolder, 42 | resultsRank, 43 | resultsCategory, 44 | resultsWups, 45 | resultsFolder) 46 | 47 | def testAvg(modelOutputs, mixRatio, target): 48 | finalOutput = mixRatio * modelOutputs[0] + \ 49 | (1 - mixRatio) * modelOutputs[1] 50 | rate, _, __ = it.calcPrecision(finalOutput, target) 51 | return rate 52 | 53 | def validAvg(modelOutputs, mixRatios, target): 54 | bestRate = 0.0 55 | bestMixRatio = 0.0 56 | for mixRatio in mixRatios: 57 | rate = testAvg(modelOutputs, mixRatio, target) 58 | print 'Mix ratio %.4f Rate %.4f' % (mixRatio, rate) 59 | if rate > bestRate: 60 | bestMixRatio = mixRatio 61 | bestRate = rate 62 | return bestMixRatio 63 | 64 | if __name__ == '__main__': 65 | """ 66 | Usage: python imageqa_modelavg.py 67 | -m[odel] {modelId1} 68 | -m[odel] {modelId2} 69 | -vm[odel] {validModelId1} 70 | -vm[odel] {validModelId2} 71 | -d[ata] {dataFolder} 72 | -o[utput] {outputFolder} 73 | [-r[esults] {resultsFolder}] 74 | """ 75 | resultsFolder = '../results' 76 | modelIds = [] 77 | validModelIds = [] 78 | for i, flag in enumerate(sys.argv): 79 | if flag == '-m' or flag == '-model': 80 | modelIds.append(sys.argv[i + 1]) 81 | elif flag == '-vm' or flag == '-vmodel': 82 | validModelIds.append(sys.argv[i + 1]) 83 | elif flag == '-r' or flag == '-results': 84 | resultsFolder = sys.argv[i + 1] 85 | elif flag == '-d' or flag == '-data': 86 | dataFolder = sys.argv[i + 1] 87 | elif flag == '-o' or flag == '-output': 88 | outputFolder = sys.argv[i + 1] 89 | data = it.loadDataset(dataFolder) 90 | 91 | models = [] 92 | validModels = [] 93 | for modelId in modelIds: 94 | print 'Loading model %s' % modelId 95 | models.append(it.loadModel(modelId, resultsFolder)) 96 | for modelId in validModelIds: 97 | print 'Loading model %s' % modelId 98 | validModels.append(it.loadModel(modelId, resultsFolder)) 99 | 100 | modelOutputs = [] 101 | validModelOutputs = [] 102 | # for modelId, model in zip(validModelIds, validModels): 103 | # print 'Running model %s' % modelId 104 | # modelOutput = nn.test(model, data['validData'][0]) 105 | # validModelOutputs.append(modelOutput) 106 | # 107 | # mixRatios = np.arange(0, 11) * 0.1 108 | # bestMixRatio = validAvg(validModelOutputs, mixRatios, data['validData'][1]) 109 | # print 'Best ratio found: %.4f' % bestMixRatio 110 | bestMixRatio = 0.5 111 | shape = None 112 | for modelId, model in zip(modelIds, models): 113 | print 'Running model %s' % modelId 114 | modelOutput = nn.test(model, data['testData'][0]) 115 | if shape is None: 116 | shape = modelOutput.shape 117 | else: 118 | modelOutput = modelOutput[:shape[0],:shape[1]] 119 | modelOutputs.append(modelOutput) 120 | 121 | testAvgAll(modelOutputs, bestMixRatio, data, outputFolder) 122 | -------------------------------------------------------------------------------- /models/2_vis_blstm.model.yml: -------------------------------------------------------------------------------- 1 | name: '2_vis_blstm' 2 | costFn: 'crossEntIdx' 3 | decisionFn: 'argmax' 4 | stages: 5 | - 'imgSel' 6 | - 'txtSel' 7 | - 'txtDict' 8 | - 'txtFold' 9 | - 'imgFeat' 10 | - 'imgFeatNorm' 11 | - 'imgMapFirst' 12 | - 'imgMapLast' 13 | - 'imgFoldFirst' 14 | - 'imgFoldLast' 15 | - 'concat' 16 | - 'concatRev' 17 | - 'concatLast' 18 | - 'concatLastRev' 19 | - 'dropoutForward' 20 | - 'dropoutBackward' 21 | - 'lstmF' 22 | - 'lstmB' 23 | - 'answer' 24 | outputs: 'answer' 25 | specs: 26 | - name: 'imgSel' 27 | type: 'selector' 28 | inputs: 'input' 29 | start: 0 30 | end: 1 31 | axis: 1 32 | - name: 'txtSel' 33 | type: 'selector' 34 | inputs: 'input' 35 | start: 1 36 | end: 56 37 | axis: 1 38 | - name: 'txtDict' 39 | type: 'lut' 40 | intConversion: true 41 | inputs: 'txtSel' 42 | inputDim: 9738 43 | outputDim: 500 44 | initRange: 1.0 45 | initSeed: 2 46 | learningRate: 0.8 47 | momentum: 0.9 48 | gradientClip: 0.1 49 | weightClip: 2000.0 50 | outputdEdX: false 51 | - name: 'txtFold' 52 | type: 'timeFold' 53 | inputs: 'txtDict' 54 | timespan: 55 55 | - name: 'imgFeat' 56 | type: 'lut' 57 | inputs: 'imgSel' 58 | inputDim: 123288 59 | outputDim: 4096 60 | initWeights: '../data/hidden_oxford_mscoco.h5' 61 | sparse: true 62 | format: 'h5' 63 | h5key: 'hidden7' 64 | learningRate: 0.0 65 | outputdEdX: false 66 | - name: 'imgFeatNorm' 67 | type: 'normalize' 68 | inputs: 'imgFeat' 69 | mean: '../data/hidden_oxford_mscoco.h5' 70 | meanKey: 'hidden7_mean' 71 | std: '../data/hidden_oxford_mscoco.h5' 72 | stdKey: 'hidden7_std' 73 | format: 'h5' 74 | outputDim: 4096 75 | - name: 'imgMapFirst' 76 | type: 'map' 77 | inputs: 'imgFeatNorm' 78 | activeFn: 'identity' 79 | outputDim: 500 80 | bias: false 81 | initRange: 0.05 82 | initSeed: 1 83 | learningRate: 0.8 84 | momentum: 0.9 85 | gradientClip: 0.1 86 | weightClip: 100.0 87 | outputdEdX: false 88 | - name: 'imgMapLast' 89 | type: 'map' 90 | inputs: 'imgFeatNorm' 91 | activeFn: 'identity' 92 | outputDim: 500 93 | bias: false 94 | initRange: 0.05 95 | initSeed: 15 96 | learningRate: 0.8 97 | momentum: 0.9 98 | gradientClip: 0.1 99 | weightClip: 100.0 100 | outputdEdX: false 101 | - name: 'imgFoldFirst' 102 | type: 'timeFold' 103 | inputs: 'imgMapFirst' 104 | timespan: 1 105 | - name: 'imgFoldLast' 106 | type: 'timeFold' 107 | inputs: 'imgMapLast' 108 | timespan: 1 109 | - name: 'concat' 110 | type: 'concat' 111 | inputs: 'imgFoldFirst, txtFold' 112 | axis: 1 113 | - name: 'concatRev' 114 | type: 'timeReverse' 115 | inputs: 'concat' 116 | - name: 'concatLast' 117 | type: 'concat' 118 | inputs: 'imgFoldLast, concatRev' 119 | axis: 1 120 | - name: 'concatLastRev' 121 | type: 'timeReverse' 122 | inputs: 'concatLast' 123 | - name: 'dropoutForward' 124 | type: 'dropout' 125 | inputs: 'concatLastRev' 126 | dropoutRate: 0.4 127 | initSeed: 3 128 | outputDim: 500 129 | - name: 'dropoutBackward' 130 | type: 'dropout' 131 | inputs: 'concatLast' 132 | dropoutRate: 0.4 133 | initSeed: 4 134 | outputDim: 500 135 | - name: 'lstmF' 136 | type: 'lstm' 137 | inputs: 'dropoutForward' 138 | inputDim: 500 139 | outputDim: 300 140 | timespan: 57 141 | initRange: 0.1 142 | initSeed: 5 143 | multiOutput: false 144 | learningRate: 0.8 145 | learningRateAnnealConst: 0.0 146 | momentum: 0.9 147 | gradientClip: 0.1 148 | weightClip: 100.0 149 | weightRegConst: 0.00005 150 | outputdEdX: true 151 | - name: 'lstmB' 152 | type: 'lstm' 153 | inputs: 'dropoutBackward' 154 | inputDim: 500 155 | outputDim: 300 156 | timespan: 57 157 | initRange: 0.1 158 | initSeed: 6 159 | multiOutput: false 160 | learningRate: 0.8 161 | learningRateAnnealConst: 0.0 162 | momentum: 0.9 163 | gradientClip: 0.1 164 | weightClip: 100.0 165 | weightRegConst: 0.00005 166 | outputdEdX: true 167 | - name: 'answer' 168 | type: 'map' 169 | inputs: 'lstmF, lstmB' 170 | outputDim: 431 171 | activeFn: 'softmax' 172 | initRange: 0.01 173 | initSeed: 6 174 | learningRate: 0.01 175 | momentum: 0.9 176 | gradientClip: 0.1 177 | weightClip: 15.0 178 | weightRegConst: 0.00005 179 | -------------------------------------------------------------------------------- /src/nn/map.py: -------------------------------------------------------------------------------- 1 | from stage import * 2 | import os 3 | use_gpu = os.environ.get('GNUMPY_USE_GPU', 'yes') == 'yes' 4 | if use_gpu: 5 | import gnumpy as gpu 6 | import gnumpy as gnp 7 | 8 | class Map(Stage): 9 | def __init__(self, 10 | outputDim, 11 | activeFn, 12 | inputNames=None, 13 | initRange=1.0, 14 | bias=True, 15 | biasInitConst=-1.0, 16 | initSeed=2, 17 | needInit=True, 18 | initWeights=0, 19 | initType='zeroMean', 20 | learningRate=0.0, 21 | learningRateAnnealConst=0.0, 22 | momentum=0.0, 23 | deltaMomentum=0.0, 24 | weightClip=0.0, 25 | gradientClip=0.0, 26 | weightRegConst=0.0, 27 | outputdEdX=True, 28 | defaultValue=0.0, 29 | gpu=use_gpu, 30 | name=None): 31 | Stage.__init__(self, 32 | name=name, 33 | inputNames=inputNames, 34 | outputDim=outputDim, 35 | defaultValue=defaultValue, 36 | learningRate=learningRate, 37 | learningRateAnnealConst=learningRateAnnealConst, 38 | momentum=momentum, 39 | deltaMomentum=deltaMomentum, 40 | weightClip=weightClip, 41 | gradientClip=gradientClip, 42 | weightRegConst=weightRegConst, 43 | gpu=gpu, 44 | outputdEdX=outputdEdX) 45 | self.bias = bias 46 | self.activeFn = activeFn 47 | self.inputDim = None 48 | self.random = np.random.RandomState(initSeed) 49 | if not needInit: 50 | if self.gpu: 51 | self.W = gnp.as_garray(initWeights) 52 | else: 53 | self.W = initWeights 54 | else: 55 | # Lazy initialize the weights until the first data arrives 56 | self.W = None 57 | self.initRange = initRange 58 | self.biasInitConst = biasInitConst 59 | self.initType = initType 60 | self.X = 0 61 | self.Y = 0 62 | pass 63 | 64 | def initWeights(self): 65 | if self.initType == 'zeroMean': 66 | r0 = -self.initRange/2.0 67 | r1 = self.initRange/2.0 68 | elif self.initType == 'positive': 69 | r0 = 0.0 70 | r1 = self.initRange 71 | else: 72 | raise Exception('Unknown initialization type: ' + self.initType) 73 | if self.bias: 74 | if self.biasInitConst >= 0.0: 75 | self.W = np.concatenate((self.random.uniform( 76 | r0, r1, (self.inputDim, self.outputDim)), 77 | np.ones((1, self.outputDim)) * self.biasInitConst), axis=0) 78 | else: 79 | self.W = self.random.uniform( 80 | r0, r1, (self.inputDim + 1, self.outputDim)) 81 | else: 82 | self.W = self.random.uniform( 83 | -self.initRange/2.0, self.initRange/2.0, (self.inputDim, self.outputDim)) 84 | if self.gpu: 85 | self.W = gpu.as_garray(self.W.astype('float32')) 86 | 87 | def forward(self, X): 88 | if self.inputDim is None: self.inputDim = X.shape[-1] 89 | if self.W is None: self.initWeights() 90 | if self.bias: 91 | self.X = np.concatenate((X, np.ones((X.shape[0], 1), dtype=X.dtype)), axis=-1) 92 | else: 93 | self.X = X 94 | if self.gpu: 95 | self.X = gpu.as_garray(self.X.astype('float32')) 96 | Z = gpu.dot(self.X, self.W) 97 | Z = Z.as_numpy_array(dtype='float32') 98 | self.Y = self.activeFn.forward(Z) 99 | else: 100 | Z = np.dot(self.X, self.W) 101 | self.Y = self.activeFn.forward(Z) 102 | return self.Y 103 | 104 | def backward(self, dEdY): 105 | dEdZ = self.activeFn.backward(dEdY, self.Y, 0) 106 | if self.gpu: 107 | gdEdZ = gpu.as_garray(dEdZ.astype('float32')) 108 | self.dEdW = gpu.dot(self.X.transpose(), gdEdZ) 109 | if self.bias: 110 | dEdX = gpu.dot(gdEdZ, self.W[:-1, :].transpose()) 111 | else: 112 | dEdX = gpu.dot(gdEdZ, self.W.transpose()) 113 | dEdX = gpu.as_numpy_array(dEdX) 114 | else: 115 | self.dEdW = np.dot(self.X.transpose(), dEdZ) 116 | if self.bias: 117 | dEdX = np.dot(dEdZ, self.W[:-1, :].transpose()) 118 | else: 119 | dEdX = np.dot(dEdZ, self.W.transpose()) 120 | return dEdX if self.outputdEdX else None 121 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Image QA 2 | This repository contains code to reproduce results in paper *Exploring Models 3 | and Data for Image Question Answering*. Mengye Ren, Ryan Kiros, Richard Zemel. 4 | NIPS 2015 (to appear). 5 | 6 | ## Rendered results 7 | Results for each model can be viewed directly at 8 | http://www.cs.toronto.edu/~mren/imageqa/results 9 | 10 | ## Dataset 11 | COCO-QA dataset is released at 12 | http://www.cs.toronto.edu/~mren/imageqa/data/cocoqa 13 | 14 | ## Prerequisites 15 | ### Dependencies 16 | Please install the following dependencies: 17 | * python 2.7 18 | * numpy 19 | * scipy 20 | * hdf5 21 | * h5py (python package for read/write h5 files) 22 | * pyyaml (python pakcage for parse yaml format) 23 | * cuda (optional, if you want to run on GPU) 24 | * cudamat (optional, python wrapper for cuda) 25 | 26 | ### Repository structure 27 | The repository contains the following folders: 28 | * *src*: Source code folder 29 | * *data*: Empty folder, to store dataset 30 | * *results*: Empty folder, to store results 31 | * *models*: Model architecture description files 32 | * *config*: Training loop hyperparameters (batch size, etc.) 33 | 34 | ### Data files 35 | Please download the following files from my server: 36 | * Image features from VGG-19 37 | * http://www.cs.toronto.edu/~mren/imageqa/data/hidden_oxford_mscoco.h5 38 | * about 1.1G 39 | * Encoded COCO-QA dataset 40 | * http://www.cs.toronto.edu/~mren/imageqa/data/cocoqa.zip 41 | * about 5.4M 42 | 43 | After downloading the files, please place *hidden_oxford_mscoco.h5* inside 44 | *data* folder, extract *cocoqa* folder inside *data*. 45 | 46 | Now your data folder should contain the following files: 47 | * *hidden_oxford_mscoco.h5* - the last hidden layer activation from the VGG-19 48 | conv net on the entire MS-COCO dataset. It is stored as a scipy sparse row 49 | matrix format. Each row represents an image. 50 | * *cocoqa/imgid_dict.pkl* - a list telling you which row 51 | corresponding to which original MS-COCO image ID. 52 | * *cocoqa/train.npy* - training set (not including hold-out set) 53 | * *cocoqa/valid.npy* - validation set to determine early stop. 54 | * *cocoqa/test.npy* - test set 55 | * *cocoqa/qdict.pkl* - question word dictionary 56 | * *cocoqa/ansdict.pkl* - answer class definition 57 | 58 | All numpy files above (train, valid, test) stores two objects, the input data 59 | and the target value. The input data is 3-d matrix, with first dimension to be 60 | number of example, second dimension to be time, third dimension to be feature. 61 | The first time step is the image ID, and later the word ID. The target value is 62 | the answer class ID. The IDs dictionary can be found in qdict.pkl and 63 | ansdict.pkl, which are python pickle files storing the dictionary object. All 64 | unseen words in the test set are encoded as 'UNK' and has its own ID. Note that 65 | the word ID is 1-based, 0 is reserved for empty word, which has a zero word 66 | embedding vector. 67 | 68 | ## Training 69 | 70 | After setting up the dataset, call the following command to train a model. For 71 | IMG+BOW, {model file} is *models/img_bow.model.yml*. VIS+LSTM and 2-VIS+BLSTM 72 | can also be found in the *models* folder. 73 | 74 | ``` 75 | cd src 76 | 77 | GNUMPY_USE_GPU={yes|no} python train.py \ 78 | -model ../models/{model file} \ 79 | -output ../results \ 80 | -data ../data/cocoqa \ 81 | -config ../config/train.yml \ 82 | [-board {gpu board id} (optional)] 83 | ``` 84 | 85 | While training, it will print some statuses, and here is how to decode them: 86 | * N: number of epochs 87 | * T: number of seconds elapsed 88 | * TE: training loss 89 | * TR: accuracy on training set 90 | * VE: validation loss 91 | * VR: accuracy on validation set 92 | * ST: layer name 93 | * GN: euclidean norm of the gradient of the layer 94 | * GC: gradient clip 95 | * WN: euclidean norm of the weights of the layer 96 | * WC: weight clip 97 | 98 | First round it will train using only the training set and validate on the 99 | hold-out set, to determine the number of epoch to train. Then it will start 100 | another job to train the training set plus the hold out set together. It will 101 | not print test set performance until everything has been finished. 102 | 103 | ## Reading trained weight matrices 104 | 105 | The weights are stored in results folder named 106 | {model}-{timestamp}/{model}-{timestamp}.w.npy 107 | 108 | If you load the weights in python, it will be a list of arrays. 109 | Non-parameterized layers have a single 0 value in the list. For IMG+BOW model, 110 | there are only 2 non-zero entries, one is the word embedding matrix, and the 111 | other is the softmax weights. The softmax weights have the last row as the 112 | bias. 113 | 114 | For LSTM weights, the weight for the entire LSTM unit is reshaped into one 115 | matrix, 116 | 117 | * W = [W_I, W_F, W_Z, W_O]^T. 118 | 119 | W_I is for the input gate, W_F is for the 120 | forget gate, W_Z is for the input transformation, and W_O is for the output 121 | gate. The weights for each W has the last row as the bias, 122 | i.e. (InDim + 1) x OutDim. 123 | 124 | * W_I = [W_XI, W_HI, W_CI, b_I]^T 125 | * W_F = [W_XF, W_HF, W_CF, b_F]^T 126 | * W_Z = [W_XZ, W_HZ, b_Z]^T 127 | * W_O = [W_XO, W_HO, W_CO, b_O]^T 128 | -------------------------------------------------------------------------------- /src/nn/lstm_test.py: -------------------------------------------------------------------------------- 1 | from sequential import * 2 | from lstm_old import * 3 | from dropout import * 4 | from reshape import * 5 | from lut import * 6 | from lstm import * 7 | import unittest 8 | 9 | class LSTM_Recurrent_Real_Tests(unittest.TestCase): 10 | def test_all(self): 11 | trainInput = np.loadtxt('lstm_test_input.csv', delimiter=',') 12 | trainInput = trainInput.reshape(trainInput.shape[0], trainInput.shape[1], 1) 13 | trainTarget = np.loadtxt('lstm_test_target.csv', delimiter=',') 14 | trainTarget = trainTarget.reshape(trainTarget.shape[0], 1) 15 | wordEmbed = np.loadtxt('lstm_test_word.csv', delimiter=',') 16 | D = 300 17 | D2 = 10 18 | N = trainInput.shape[0] 19 | Time = trainInput.shape[1] 20 | multiOutput = False 21 | time_unfold = TimeUnfold() 22 | lut = LUT( 23 | inputDim=np.max(trainInput)+1, 24 | outputDim=D, 25 | inputNames=None, 26 | needInit=False, 27 | initWeights=wordEmbed 28 | ) 29 | 30 | time_fold = TimeFold( 31 | timespan=Time, 32 | inputNames=None 33 | ) 34 | 35 | dropout = Dropout( 36 | name='d1', 37 | dropoutRate=0.2, 38 | initSeed=2, 39 | inputNames=None, 40 | outputDim=D2 41 | ) 42 | dropout2 = Dropout( 43 | name='d2', 44 | dropoutRate=0.2, 45 | initSeed=2, 46 | inputNames=None, 47 | outputDim=D2 48 | ) 49 | lstm = LSTM( 50 | name='lstm', 51 | timespan=Time, 52 | inputDim=D, 53 | outputDim=D2, 54 | inputNames=None, 55 | multiOutput=multiOutput, 56 | cutOffZeroEnd=True, 57 | learningRate=0.8, 58 | momentum=0.9, 59 | outputdEdX=True) 60 | 61 | lstm2 = LSTM_Old( 62 | name='lstm', 63 | inputDim=D, 64 | outputDim=D2, 65 | needInit=False, 66 | initRange=0.1, 67 | initSeed=0, 68 | cutOffZeroEnd=True, 69 | multiErr=multiOutput, 70 | learningRate=0.8, 71 | momentum=0.9, 72 | outputdEdX=True) 73 | 74 | sig = Map( 75 | name='sig', 76 | outputDim=1, 77 | activeFn=SigmoidActiveFn(), 78 | initRange=0.1, 79 | initSeed=5, 80 | learningRate=0.01, 81 | momentum=0.9, 82 | weightClip=10.0, 83 | gradientClip=0.1, 84 | weightRegConst=0.00005 85 | ) 86 | sig2 = Map( 87 | name='sig', 88 | outputDim=1, 89 | activeFn=SigmoidActiveFn(), 90 | initRange=0.1, 91 | initSeed=5, 92 | learningRate=0.01, 93 | momentum=0.9, 94 | weightClip=10.0, 95 | gradientClip=0.1, 96 | weightRegConst=0.00005 97 | ) 98 | 99 | costFn = crossEntOne 100 | model1 = Sequential( 101 | stages=[ 102 | time_unfold, 103 | lut, 104 | time_fold, 105 | dropout, 106 | lstm, 107 | sig 108 | ] 109 | ) 110 | 111 | model2 = Sequential( 112 | stages=[ 113 | time_unfold, 114 | lut, 115 | time_fold, 116 | dropout2, 117 | lstm2, 118 | sig2 119 | ] 120 | ) 121 | 122 | input_ = trainInput[0:N, 0:Time] 123 | target_ = trainTarget[0:N] 124 | Y1 = model1.forward(input_) 125 | 126 | W = lstm.getWeights() 127 | lstm2.W = W.transpose() 128 | Y2 = model2.forward(input_) 129 | self.chkEqual(Y1, Y2) 130 | 131 | E, dEdY1 = costFn(Y1, target_) 132 | E, dEdY2 = costFn(Y2, target_) 133 | model1.backward(dEdY1) 134 | model2.backward(dEdY2) 135 | 136 | dEdW = lstm.getGradient() 137 | self.chkEqual(dEdW.transpose(), lstm2.dEdW) 138 | lstm.updateWeights() 139 | lstm2.updateWeights() 140 | W = lstm.getWeights() 141 | self.chkEqual(W.transpose(), lstm2.W) 142 | 143 | def chkEqual(self, a, b): 144 | tolerance = 1e-1 145 | a = a.reshape(a.size) 146 | b = b.reshape(b.size) 147 | for i in range(a.size): 148 | if not ((a[i] == 0 and b[i] == 0) or 149 | (np.abs(a[i]) < 1e-7 and np.abs(b[i]) < 1e-7) or 150 | (np.abs(a[i] / b[i] - 1) < tolerance)): 151 | print a[i], b[i], a[i]/b[i] 152 | self.assertTrue( 153 | (a[i] == 0 and b[i] == 0) or 154 | (np.abs(a[i]) < 1e-7 and np.abs(b[i]) < 1e-7) or 155 | (np.abs(a[i] / b[i] - 1) < tolerance)) 156 | 157 | if __name__ == '__main__': 158 | unittest.main() -------------------------------------------------------------------------------- /src/nn/func.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def meanSqErr(Y, T, weights=None): 4 | diff = Y - T.reshape(Y.shape) 5 | diff2 = np.sum(np.power(diff, 2), axis=-1) 6 | if weights is not None: 7 | diff2 *= weights 8 | weights = weights.reshape(weights.shape[0], 1) 9 | diff *= weights 10 | E = 0.5 * np.sum(diff2) / float(Y.shape[0]) 11 | dEdY = diff / float(Y.shape[0]) 12 | return E, dEdY 13 | 14 | def hardLimit(Y): 15 | return (Y > 0.5).astype(int) 16 | 17 | def sigmoidFn(X): 18 | return 1 / (1 + np.exp(-X)) 19 | 20 | def crossEntIdx(Y, T, weights=None): 21 | eps = 1e-8 22 | Y2 = Y.reshape(Y.size / Y.shape[-1], Y.shape[-1]) 23 | T2 = T.reshape(T.size) 24 | E = 0.0 25 | dEdY = np.zeros(Y2.shape, float) 26 | if weights is None: 27 | for n in range(0, Y2.shape[0]): 28 | E += -np.log(Y2[n, T2[n]] + eps) 29 | dEdY[n, T2[n]] = -1 / (Y2[n, T2[n]] + eps) 30 | else: 31 | for n in range(0, Y2.shape[0]): 32 | E += -np.log(Y2[n, T2[n]] + eps) * weights[n] 33 | dEdY[n, T2[n]] = -1 / (Y2[n, T2[n]] + eps) * weights[n] 34 | E /= Y2.shape[0] 35 | dEdY /= Y2.shape[0] 36 | dEdY = dEdY.reshape(Y.shape) 37 | return E, dEdY 38 | 39 | def crossEntOneIdx(Y, T, weights=None): 40 | eps = 1e-8 41 | Y2 = Y.reshape(Y.size / Y.shape[-1], Y.shape[-1]) 42 | T2 = T.reshape(T.size) 43 | E = 0.0 44 | dEdY = np.zeros(Y2.shape, float) 45 | if weights is None: 46 | for n in range(0, Y.shape[0]): 47 | E += -np.log(Y2[n, T2[n]] + eps) + np.log(1 - Y2[n, T2[n] + eps]) 48 | E += -np.sum(np.log(1 - Y2[n, :] + eps)) 49 | dEdY[n, :] = 1 / (1 - Y2[n] + eps) 50 | dEdY[n, T2[n]] = -1 / (Y2[n, T2[n]] + eps) 51 | else: 52 | for n in range(0, Y.shape[0]): 53 | E += (-np.log(Y2[n, T2[n]] + eps) + \ 54 | np.log(1 - Y2[n, T2[n] + eps])) * weights[n] 55 | E += (-np.sum(np.log(1 - Y2[n, :] + eps))) * weights[n] 56 | dEdY[n, :] = (1 / (1 - Y2[n] + eps)) * weights[n] 57 | dEdY[n, T2[n]] = (-1 / (Y2[n, T2[n]] + eps)) * weights[n] 58 | E /= Y2.shape[0] 59 | dEdY /= Y2.shape[0] 60 | dEdY = dEdY.reshape(Y.shape) 61 | return E, dEdY 62 | 63 | def crossEntOneAccIdx(Y, T, weights=None): 64 | eps = 1e-8 65 | Y2 = Y.reshape(Y.size / Y.shape[-1], Y.shape[-1]) 66 | T2 = T.reshape(T.size) 67 | E = 0.0 68 | dEdY = np.zeros(Y2.shape, float) 69 | if weights is None: 70 | for n in range(0, Y.shape[0]): 71 | t = T2[n] 72 | E += -np.sum(np.log(Y2[n, t + 1:] + eps)) 73 | E += -np.sum(np.log(1 - Y2[n, :t + 1] + eps)) 74 | dEdY[n, t + 1:] = -1 / (Y2[n, t + 1:] + eps) 75 | dEdY[n, :t + 1] = 1/ (1 - Y2[n, :t + 1] + eps) 76 | else: 77 | for n in range(0, Y.shape[0]): 78 | t = T2[n] 79 | E += -np.sum(np.log(Y2[n, t + 1:] + eps)) * weights[n] 80 | E += -np.sum(np.log(1 - Y2[n, :t + 1] + eps)) * weights[n] 81 | dEdY[n, t + 1:] = -1 / (Y2[n, t + 1:] + eps) * weights[n] 82 | dEdY[n, :t + 1] = 1/ (1 - Y2[n, :t + 1] + eps) * weights[n] 83 | E /= Y2.shape[0] 84 | dEdY /= Y2.shape[0] 85 | dEdY = dEdY.reshape(Y.shape) 86 | return E, dEdY 87 | 88 | def crossEntOne(Y, T, weights=None): 89 | eps = 1e-8 90 | T = T.reshape(Y.shape) 91 | cost = -T * np.log(Y + eps) - (1 - T) * np.log(1 - Y + eps) 92 | dcost = -T / (Y + eps) + (1 - T) / (1 - Y + eps) 93 | if weights is not None: 94 | cost *= weights 95 | dcost *= weights.reshape(weights.shape[0], 1) 96 | if len(Y.shape) == 0: 97 | E = cost 98 | dEdY = dcost 99 | else: 100 | E = np.sum(cost) / float(Y.size) 101 | dEdY = dcost / float(Y.size) 102 | return E, dEdY 103 | 104 | def argmax(Y): 105 | return np.argmax(Y, axis=-1) 106 | 107 | def argmaxDiff(Y): 108 | Y2 = Y.reshape(Y.size / Y.shape[-1], Y.shape[-1]) 109 | Ydiff = np.zeros(Y2.shape) 110 | for i in range(Y2.shape[1] - 1): 111 | Ydiff[:, i] = Y2[:, i + 1] - Y2[:, i] 112 | Ydiff2 = np.reshape(Ydiff, Y.shape) 113 | return np.argmax(Ydiff2, axis=-1) 114 | 115 | def meanSqErrEye(Y, T, weights=None): 116 | eye = np.eye(Y.shape[-1]) 117 | T2 = T.reshape(T.size) 118 | T3 = eye[T2] 119 | return meanSqErr(Y, T3, weights=weights) 120 | 121 | def roundInt(Y): 122 | return np.round(Y).astype('int') 123 | 124 | def rankingLoss(Y, T, weights=None): 125 | alpha = 0.1 126 | dEdY = np.zeros(Y.shape) 127 | E = 0.0 128 | for n in range(T.size): 129 | cost = Y[n] - Y[n, T[n]] + alpha 130 | valid = (cost > 0).astype(int) 131 | nvalid = np.sum(valid) - 1 132 | cost = cost * valid 133 | dEdY[n] = valid 134 | dEdY[n, T[n]] = -nvalid 135 | if weights is not None: 136 | cost *= weights[n] 137 | dEdY[n] *= weights[n] 138 | E += np.sum(cost) - alpha 139 | E /= float(T.size) 140 | dEdY /= float(T.size) 141 | return E, dEdY 142 | -------------------------------------------------------------------------------- /src/imageqa_adhoc.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import nn 4 | import numpy as np 5 | import imageqa_test as it 6 | import imageqa_ensemble as ie 7 | import imageqa_render as ir 8 | import prep 9 | 10 | def parseInputFile(filename): 11 | qids = [] 12 | questions = [] 13 | answers = [] 14 | caption = '' 15 | i = 0 16 | with open(filename) as f: 17 | for line in f: 18 | if i == 0 and line.startswith('caption:'): 19 | caption = line[8:-1] 20 | else: 21 | parts = line.split(',') 22 | qids.append(int(parts[0])) 23 | questions.append(parts[1]) 24 | answers.append(parts[2].strip('\n')) 25 | i += 1 26 | return caption, qids, questions, answers 27 | 28 | if __name__ == '__main__': 29 | """ 30 | Ask adhoc questions with trained models. 31 | 32 | Usage: python imageqa_adhoc.py 33 | -m[odel] {name1:modelId1} 34 | -m[odel] {name2:modelId2} 35 | -em[odel] {name3:ensembleModelId3,ensembleModelId4,...} 36 | -pem[odel] {name3:ensembleModelId5,ensembleModelId6,...} 37 | -aem[odel] {name3:ensembleModelId7,ensembleModelId8,...} 38 | ... 39 | -d[ata] {dataFolder} 40 | -i[nput] {listFile} 41 | -o[utput] {outputFolder} 42 | [-k {top K answers}] 43 | [-p[icture] {pictureFolder}] 44 | [-r[esults] {resultsFolder}] 45 | [-f[ile] {outputTexFilename}] 46 | [-dataset {daquar/cocoqa}] 47 | [-format {html/latex}] 48 | Parameters: 49 | -m[odel]: Model name and model ID 50 | -d[ata]: Dataset dataFolder 51 | -i[nput]: Adhoc question list filename 52 | -o[utput]: Output folder of the rendered results 53 | -k: Render top-K answers (default 1) 54 | -p[icture]: Picture folder, only required in LaTeX mode (default "img") 55 | -r[esults]: Results folder where trained models are stored (default "../results") 56 | -f[ile]: Output filename, only required in LaTex mode 57 | -dataset: Use DAQUAR/COCO-QA dataset (default "cocoqa") 58 | -format: Set output format to HTML/LaTeX (default "html") 59 | 60 | Input question list format: 61 | QID1,Question1,GroundTruthAnswer1 62 | QID2,Question2,GroundTruthAnswer2 63 | ... 64 | """ 65 | params = ir.parseComparativeParams(sys.argv) 66 | 67 | urlDict = ir.loadImgUrl(params['dataset'], params['dataFolder']) 68 | data = it.loadDataset(params['dataFolder']) 69 | maxlen = data['testData'][0].shape[1] 70 | 71 | print('Parsing input file...') 72 | caption, qids, questions, answers = parseInputFile(params['inputFile']) 73 | idx = np.array(qids, dtype='int') 74 | #inputTestSel = data['testData'][0][idx] 75 | #targetTestSel = data['testData'][1][idx] 76 | imgids = qids 77 | #imgids = inputTestSel[:, 0, 0] 78 | inputTest = prep.combine(\ 79 | prep.lookupQID(questions, data['questionDict'], maxlen), imgids) 80 | targetTest = prep.lookupAnsID(answers, data['ansDict']) 81 | questionTypeArray = data['questionTypeArray'][idx] 82 | 83 | print('Running models...') 84 | modelOutputs = ie.runAllModels( 85 | inputTest, 86 | questionTypeArray, 87 | params['models'], 88 | params['resultsFolder'], 89 | params['dataset'], 90 | params['dataFolder']) 91 | 92 | # Render 93 | if not os.path.exists(params['outputFolder']): 94 | os.makedirs(params['outputFolder']) 95 | if params['format'] == 'html': 96 | print('Rendering HTML...') 97 | pages = ir.renderHtml( 98 | inputTest, 99 | targetTest, 100 | data['questionIdict'], 101 | data['ansIdict'], 102 | urlDict, 103 | topK=params['topK'], 104 | modelOutputs=modelOutputs, 105 | modelNames=ir.getModelNames(params['models']), 106 | questionIds=idx) 107 | for i, page in enumerate(pages): 108 | with open(os.path.join(params['outputFolder'], 109 | '%s-%d.html' % (params['outputFilename'], i)), 'w') as f: 110 | f.write(page) 111 | elif params['format'] == 'latex': 112 | # For LaTeX only, replace underscore in vocabulary. 113 | data['questionIdict'] = ir.escapeLatexIdict(data['questionIdict']) 114 | data['ansIdict'] = ir.escapeLatexIdict(data['ansIdict']) 115 | ir.renderLatex( 116 | inputTest, 117 | targetTest, 118 | data['questionIdict'], 119 | data['ansIdict'], 120 | urlDict, 121 | topK=params['topK'], 122 | outputFolder=params['outputFolder'], 123 | pictureFolder=params['pictureFolder'], 124 | comments=None, 125 | caption=caption, 126 | modelOutputs=modelOutputs, 127 | modelNames=ir.getModelNames(params['models']), 128 | questionIds=idx, 129 | filename=params['outputFilename']+'.tex') 130 | -------------------------------------------------------------------------------- /src/nn/container.py: -------------------------------------------------------------------------------- 1 | from active_func import * 2 | from map import * 3 | 4 | class Input(Stage): 5 | def __init__(self, name, outputDim): 6 | Stage.__init__(self, 7 | name=name, 8 | inputNames=[], 9 | outputDim=outputDim) 10 | def setValue(self, value): 11 | self.Y = value 12 | def forward(self, X): 13 | return X 14 | def backward(self, dEdY): 15 | return dEdY 16 | 17 | class Output(Stage): 18 | def __init__(self, name, inputNames, outputDim=0, defaultValue=0): 19 | Stage.__init__(self, 20 | name=name, 21 | inputNames=inputNames, 22 | defaultValue=defaultValue, 23 | outputDim=outputDim) 24 | def graphForward(self): 25 | self.Y = self.getInput() 26 | def graphBackward(self): 27 | self.sendError(self.dEdY) 28 | 29 | class Container(Stage): 30 | def __init__(self, 31 | stages, 32 | outputStageNames, 33 | inputDim, 34 | outputDim, 35 | inputNames, 36 | name=None, 37 | outputdEdX=True): 38 | Stage.__init__(self, 39 | name=name, 40 | inputNames=inputNames, 41 | outputDim=outputDim, 42 | outputdEdX=outputdEdX) 43 | self.stages = [] 44 | self.stageDict = {} 45 | self.inputDim = inputDim 46 | self.outputStageNames = outputStageNames 47 | 48 | inputStage = self.createInputStage() 49 | self.stages.append(inputStage) 50 | self.stageDict['input'] = inputStage 51 | 52 | for stage in stages: 53 | self.register(stage) 54 | 55 | outputStage = self.createOutputStage() 56 | self.stages.append(outputStage) 57 | self.stageDict['output'] = outputStage 58 | 59 | self.link() 60 | self.dEdW = [] 61 | for stage in self.stages: 62 | self.dEdW.append(0.0) 63 | 64 | def createInputStage(self): 65 | return Input(name='input', outputDim=self.inputDim) 66 | 67 | def createOutputStage(self): 68 | return Output(name='output', inputNames=self.outputStageNames) 69 | 70 | def register(self, stage): 71 | """ 72 | Register a substage 73 | :param stage: new recurrent substage 74 | :return: 75 | """ 76 | #print stage 77 | if not hasattr(stage, 'used'): 78 | stage.used = False 79 | self.stages.append(stage) 80 | self.stageDict[stage.name] = stage 81 | 82 | def link(self): 83 | """ 84 | Link substages with their input strings 85 | :return: 86 | """ 87 | for stage in self.stages: 88 | for stageName in stage.inputNames: 89 | stageInput = self.stageDict[stageName] 90 | stageInput.used = True 91 | stage.addInput(stageInput) 92 | 93 | def clearError(self): 94 | for stage in self.stages: 95 | stage.clearError() 96 | self.dEdY = 0.0 97 | self.receivedError = False 98 | 99 | def graphForward(self, dropout=True): 100 | self.X = self.getInput() 101 | self.Y = self.forward(self.X, dropout=dropout) 102 | 103 | #@profile 104 | def forward(self, X, dropout=True): 105 | self.stages[0].Y = X 106 | for s in range(1, len(self.stages) - 1): 107 | if self.stages[s].used: 108 | if hasattr(self.stages[s], 'dropout'): 109 | self.stages[s].dropout = dropout 110 | self.stages[s].graphForward() 111 | elif isinstance(self.stages[s], Container): 112 | self.stages[s].graphForward(dropout=dropout) 113 | else: 114 | self.stages[s].graphForward() 115 | self.stages[-1].graphForward() 116 | Y = self.stages[-1].Y 117 | 118 | # Clear error and ready for next batch 119 | self.clearError() 120 | 121 | self.X = X 122 | return Y 123 | 124 | #@profile 125 | def backward(self, dEdY): 126 | self.stages[-1].sendError(dEdY) 127 | for s in reversed(range(1, len(self.stages) - 1)): 128 | #print 'container backward', self.stages[s].name, self.stages[s].used, self.stages[s].receivedError 129 | if self.stages[s].used and self.stages[s].receivedError: 130 | self.stages[s].graphBackward() 131 | 132 | # Collect input error 133 | if self.outputdEdX: 134 | dEdX = self.stages[0].dEdY 135 | 136 | return dEdX if self.outputdEdX else None 137 | 138 | def updateWeights(self): 139 | for s in range(1, len(self.stages)-1): 140 | # Because all stages are "shallow copied", the weights are shared. 141 | self.stages[s].updateWeights() 142 | 143 | def updateLearningParams(self, numEpoch): 144 | for s in range(1, len(self.stages)-1): 145 | # Since only the first stage updates the weights, 146 | # learning params just need to update in the first stage. 147 | self.stages[s].updateLearningParams(numEpoch) 148 | 149 | def setGradient(self, value): 150 | if type(value) is float: 151 | for s in range(1, len(self.stages) - 1): 152 | self.stages[s].setGradient(value) 153 | elif type(value) is np.ndarray: 154 | for s in range(1, len(self.stages) - 1): 155 | self.stages[s].setGradient(value[s - 1]) 156 | else: 157 | raise Exception('Unknown type %s for setGradient' % type(value)) 158 | 159 | def getWeights(self): 160 | weights = [] 161 | for s in range(1, len(self.stages)-1): 162 | if self.stages[s].gpu: 163 | weights.append(gpu.as_numpy_array(self.stages[s].getWeights())) 164 | else: 165 | weights.append(self.stages[s].getWeights()) 166 | return np.array(weights, dtype=object) 167 | 168 | def loadWeights(self, W): 169 | for s in range(1, len(self.stages) - 1): 170 | self.stages[s].loadWeights(W[s - 1]) 171 | -------------------------------------------------------------------------------- /src/nn/reshape.py: -------------------------------------------------------------------------------- 1 | from stage import * 2 | 3 | class Reshape(Stage): 4 | def __init__(self, reshapeFn, inputNames=None, outputDim=0, name=None, outputdEdX=True): 5 | Stage.__init__(self, name=name, inputNames=inputNames, outputDim=outputDim, outputdEdX=outputdEdX) 6 | self.reshapeFn = eval('lambda x: ' + reshapeFn) 7 | self.Xshape = 0 8 | 9 | def forward(self, X): 10 | self.Xshape = X.shape 11 | return np.reshape(X, self.reshapeFn(X.shape)) 12 | 13 | def backward(self, dEdY): 14 | if self.outputdEdX: 15 | return np.reshape(dEdY, self.Xshape) 16 | 17 | class TimeUnfold(Reshape): 18 | def __init__(self, inputNames=None, name=None, outputdEdX=True): 19 | Reshape.__init__(self, 20 | name=name, 21 | inputNames=inputNames, 22 | reshapeFn='(x[0] * x[1], x[2])', 23 | outputdEdX=outputdEdX) 24 | 25 | class TimeFold(Reshape): 26 | def __init__(self, timespan, inputNames=None, name=None, outputdEdX=True): 27 | self.timespan = timespan 28 | t = str(self.timespan) 29 | Reshape.__init__(self, 30 | name=name, 31 | inputNames=inputNames, 32 | reshapeFn='(x[0] / '+t+','+t+', x[1])', 33 | outputdEdX=outputdEdX) 34 | 35 | class TimeReverse(Stage): 36 | def __init__(self, inputNames, outputDim=0, name=None, outputdEdX=True): 37 | Stage.__init__(self, 38 | name=name, 39 | inputNames=inputNames, 40 | outputDim=outputDim, 41 | outputdEdX=outputdEdX) 42 | 43 | def forward(self, X): 44 | #print self.name, X.shape 45 | N = X.shape[0] 46 | self.Xend = np.zeros(N, dtype=int) + X.shape[1] 47 | reachedEnd = np.sum(X, axis=-1) == 0.0 48 | Y = np.zeros(X.shape) 49 | # Scan for the end of the sequence. 50 | for n in range(N): 51 | found = False 52 | for t in range(X.shape[1]): 53 | if reachedEnd[n, t]: 54 | self.Xend[n] = t 55 | if t > 0: 56 | found = True 57 | Y[n, 0:t, :] = X[n, t-1::-1, :] 58 | break 59 | if found == False: 60 | self.Xend[n] = X.shape[1] 61 | Y[n, :, :] = X[n, ::-1, :] 62 | return Y 63 | 64 | def backward(self, dEdY): 65 | if self.outputdEdX: 66 | dEdX = np.zeros(dEdY.shape) 67 | for n in range(dEdY.shape[0]): 68 | t = self.Xend[n] 69 | if t > 0: 70 | dEdX[n, 0:t, :] = dEdY[n, t-1::-1, :] 71 | return dEdX 72 | else: 73 | return None 74 | 75 | class TimeRepeat(Stage): 76 | def __init__(self, numRepeats, inputNames=None, outputDim=0, name=None, outputdEdX=True): 77 | Stage.__init__(self, name=name, inputNames=inputNames, outputDim=outputDim, outputdEdX=outputdEdX) 78 | self.numRepeats = numRepeats 79 | 80 | def forward(self, X): 81 | self.Xshape = X.shape 82 | if len(X.shape) == 2: 83 | X = X.reshape(X.shape[0], 1, X.shape[1]) 84 | return np.tile(X, (1, self.numRepeats, 1)) 85 | 86 | def backward(self, dEdY): 87 | if self.outputdEdX: 88 | dEdY = dEdY.reshape( 89 | dEdY.shape[0], self.numRepeats, dEdY.shape[1] / self.numRepeats, dEdY.shape[2]) 90 | dEdX = np.sum(dEdY, axis=1) 91 | if len(self.Xshape) == 2: 92 | dEdX = dEdX.reshape(dEdX.shape[0], dEdX.shape[-1]) 93 | return dEdX 94 | 95 | class TimeFinal(Stage): 96 | """ 97 | Scans and selects the last timestep. 98 | """ 99 | def __init__(self, inputNames, outputDim=0, name=None, outputdEdX=True): 100 | Stage.__init__(self, 101 | name=name, 102 | inputNames=inputNames, 103 | outputDim=outputDim, 104 | outputdEdX=outputdEdX) 105 | self.Xend = 0.0 106 | 107 | def forward(self, X): 108 | N = X.shape[0] 109 | self.X = X 110 | self.Xend = np.zeros(N, dtype=int) + X.shape[1] 111 | reachedEnd = np.sum(X, axis=-1) == 0.0 112 | Y = np.zeros((N, X.shape[-1])) 113 | # Scan for the end of the sequence. 114 | for n in range(N): 115 | for t in range(X.shape[1]): 116 | if reachedEnd[n, t]: 117 | self.Xend[n] = t 118 | break 119 | for n in range(N): 120 | if self.Xend[n] > 0: 121 | Y[n] = X[n, self.Xend[n] - 1] 122 | return Y 123 | 124 | def backward(self, dEdY): 125 | if self.outputdEdX: 126 | dEdX = np.zeros(self.X.shape) 127 | for n in range(dEdY.shape[0]): 128 | if self.Xend[n] > 0: 129 | dEdX[n, self.Xend[n] - 1, :] = dEdY[n] 130 | return dEdX 131 | else: 132 | return None 133 | 134 | class Concat(Stage): 135 | def __init__(self, inputNames, axis, name=None): 136 | Stage.__init__(self, name=name, inputNames=inputNames, outputDim=0) 137 | self.axis = axis 138 | def getInput(self): 139 | if len(self.inputs) > 1: 140 | self.splX = [] 141 | for stage in self.inputs: 142 | X = stage.Y 143 | self.splX.append(X) 144 | return np.concatenate(self.splX, axis=self.axis) 145 | else: 146 | return self.inputs[0].Y 147 | def sendError(self, dEdX): 148 | """ 149 | Iterates over input list and sends dEdX. 150 | """ 151 | if len(self.inputs) > 1: 152 | s = 0 153 | for stage in self.inputs: 154 | s2 = s + stage.Y.shape[self.axis] 155 | if self.axis == 0: 156 | stage.dEdY += dEdX[s : s2] 157 | elif self.axis == 1: 158 | stage.dEdY += dEdX[:, s : s2] 159 | elif self.axis == 2: 160 | stage.dEdY += dEdX[:, :, s : s2] 161 | s = s2 162 | stage.receivedError = True 163 | else: 164 | self.inputs[0].dEdY += dEdX 165 | self.inputs[0].receivedError = True 166 | 167 | def forward(self, X): 168 | return X 169 | def backward(self, dEdY): 170 | return dEdY 171 | -------------------------------------------------------------------------------- /src/imageqa_compare.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import nn 4 | import numpy as np 5 | import imageqa_test as it 6 | import imageqa_render as ir 7 | import imageqa_ensemble as ie 8 | 9 | nameList = ['object', 'number', 'color', 'location'] 10 | 11 | def getCatName(i): 12 | return nameList[i] 13 | 14 | def getBinName(n): 15 | bin = [] 16 | for k in range(numModels): 17 | bin.append(str(n >> (numModels - k - 1))) 18 | n = n & (~(1 << (numModels - k - 1))) 19 | return ''.join(bin) 20 | 21 | def getName(catName, binName): 22 | return catName + '-' + binName 23 | 24 | def renderIndex(modelNames, numCategories, bins): 25 | htmlList = [] 26 | htmlList.append('' % \ 27 | 'span.good {color:green;} span.bad {color:red;} \ 28 | table{border-spacing:10px;}') 29 | numModels = len(modelNames) 30 | numCorrect = 1 << numModels 31 | htmlList.append('

Models comparisons

') 32 | htmlList.append('Notes:
\ 33 | Green means the model gets correct
') 34 | htmlList.append('\ 35 | Red means the model gets wrong') 36 | for i in range(numCategories): 37 | htmlList.append('

%s

' % getCatName(i)) 38 | htmlList.append('') 39 | for j in range(numCorrect): 40 | htmlList.append('') 41 | binId = numCorrect * i + j 42 | for k, c in enumerate(getBinName(j)): 43 | htmlList.append('') 51 | htmlList.append('' % len(bins[binId])) 52 | htmlList.append('' % \ 53 | getName(getCatName(i), getBinName(j))) 54 | htmlList.append('') 55 | 56 | htmlList.append('
') 44 | if c == '1': 45 | htmlList.append( 46 | '%s' % modelNames[k]) 47 | elif c == '0': 48 | htmlList.append( 49 | '%s' % modelNames[k]) 50 | htmlList.append('%d itemslink
') 57 | htmlList.append('') 58 | return ''.join(htmlList) 59 | 60 | if __name__ == '__main__': 61 | """ 62 | Usage: python imageqa_compare.py 63 | -m[odel] {name1:modelId1} 64 | -m[odel] {name2:modelId2} 65 | -em[odel] {name3:ensembleModelId3,ensembleModelId4,...} 66 | -pem[odel] {name3:ensembleModelId5,ensembleModelId6,...} 67 | -d[ata] {dataFolder} 68 | -o[utput] {outputFolder} 69 | [-k {top K answers}] 70 | [-r[esults] {resultsFolder}] 71 | [-dataset {daquar/cocoqa}] 72 | """ 73 | params = ir.parseComparativeParams(sys.argv) 74 | 75 | urlDict = ir.loadImgUrl(params['dataset'], params['dataFolder']) 76 | data = it.loadDataset(params['dataFolder']) 77 | 78 | print('Running models...') 79 | inputTest = data['testData'][0] 80 | targetTest = data['testData'][1] 81 | questionTypeArray = data['questionTypeArray'] 82 | modelOutputs = ie.runAllModels( 83 | inputTest, 84 | questionTypeArray, 85 | params['models'], 86 | params['resultsFolder'], 87 | params['dataset'], 88 | params['dataFolder']) 89 | 90 | # Sort questions by question types. 91 | # Sort questions by correctness differences. 92 | print('Sorting questions...') 93 | numCategories = np.max(questionTypeArray) + 1 94 | numModels = len(params['models']) 95 | numCorrect = 1 << numModels 96 | numBins = numCategories * numCorrect 97 | modelAnswers = np.zeros((numModels, inputTest.shape[0]), dtype='int') 98 | bins = [None] * numBins 99 | names = [] 100 | for i in range(numCategories): 101 | catName = getCatName(i) 102 | for j in range(numCorrect): 103 | binName = getBinName(j) 104 | names.append(getName(catName, binName)) 105 | for i in range(numModels): 106 | modelAnswers[i] = np.argmax(modelOutputs[i], axis=-1) 107 | for n in range(inputTest.shape[0]): 108 | correct = targetTest[n, 0] 109 | bintmp = 0 110 | for i in range(numModels): 111 | if modelAnswers[i, n] == correct: 112 | bintmp += 1 << (numModels - i - 1) 113 | category = questionTypeArray[n] 114 | binNum = category * numCorrect + bintmp 115 | if bins[binNum] == None: 116 | bins[binNum] = [n] 117 | else: 118 | bins[binNum].append(n) 119 | 120 | for i, bin in enumerate(bins): 121 | if bin is None: 122 | bins[i] = [] 123 | 124 | # Render 125 | print('Rendering webpages...') 126 | print('Rendering index...') 127 | outputFolder = params['outputFolder'] 128 | 129 | if not os.path.exists(outputFolder): 130 | os.makedirs(outputFolder) 131 | with open(os.path.join(outputFolder, 'index.html'), 'w') as f: 132 | f.write(renderIndex( 133 | ir.getModelNames(params['models']), numCategories, bins)) 134 | 135 | for i in range(numBins): 136 | if bins[i] is not None: 137 | print 'Rendering %s...' % names[i] 138 | outputSubFolder = os.path.join(outputFolder, names[i]) 139 | idx = np.array(bins[i], dtype='int') 140 | inputTestSubset = inputTest[idx] 141 | targetTestSubset = targetTest[idx] 142 | modelOutputsSubset = [] 143 | for j in range(numModels): 144 | modelOutputsSubset.append(modelOutputs[j][idx]) 145 | if not os.path.exists(outputSubFolder): 146 | os.makedirs(outputSubFolder) 147 | htmlHyperLink = '%d.html' 148 | pages = ir.renderHtml( 149 | inputTestSubset, 150 | targetTestSubset, 151 | data['questionIdict'], 152 | data['ansIdict'], 153 | urlDict, 154 | topK=params['topK'], 155 | modelOutputs=modelOutputsSubset, 156 | modelNames=ir.getModelNames(params['models']), 157 | questionIds=idx) 158 | for j, page in enumerate(pages): 159 | with open(os.path.join(outputSubFolder, 160 | htmlHyperLink % j), 'w') as f: 161 | f.write(page) -------------------------------------------------------------------------------- /src/nn/lstm.py: -------------------------------------------------------------------------------- 1 | from recurrent import * 2 | from elem_prod import * 3 | from sum import * 4 | from active import * 5 | 6 | class LSTM(RecurrentContainer): 7 | def __init__(self, 8 | inputDim, 9 | outputDim, 10 | timespan, 11 | inputNames, 12 | defaultValue=0.0, 13 | initRange=1.0, 14 | initSeed=2, 15 | needInit=True, 16 | initWeights=0, 17 | multiInput=True, 18 | multiOutput=False, 19 | cutOffZeroEnd=True, 20 | learningRate=0.0, 21 | learningRateAnnealConst=0.0, 22 | momentum=0.0, 23 | deltaMomentum=0.0, 24 | weightClip=0.0, 25 | gradientClip=0.0, 26 | weightRegConst=0.0, 27 | outputdEdX=True, 28 | name=None): 29 | D2 = outputDim 30 | multiOutput = multiOutput 31 | if name is None: print 'Warning: name is None.' 32 | self.inputDim = inputDim 33 | self.outputDim = outputDim 34 | self.I = RecurrentAdapter(Map( 35 | name=name + '.I', 36 | inputNames=['input(0)', name + '.H(-1)', name + '.C(-1)'], 37 | outputDim=D2, 38 | activeFn=SigmoidActiveFn(), 39 | initRange=initRange, 40 | initSeed=initSeed, 41 | biasInitConst=1.0, 42 | learningRate=learningRate, 43 | learningRateAnnealConst=learningRateAnnealConst, 44 | momentum=momentum, 45 | deltaMomentum=deltaMomentum, 46 | gradientClip=gradientClip, 47 | weightClip=weightClip, 48 | weightRegConst=weightRegConst)) 49 | 50 | self.F = RecurrentAdapter(Map( 51 | name=name + '.F', 52 | inputNames=['input(0)', name + '.H(-1)', name + '.C(-1)'], 53 | outputDim=D2, 54 | activeFn=SigmoidActiveFn(), 55 | initRange=initRange, 56 | initSeed=initSeed+1, 57 | biasInitConst=1.0, 58 | learningRate=learningRate, 59 | learningRateAnnealConst=learningRateAnnealConst, 60 | momentum=momentum, 61 | deltaMomentum=deltaMomentum, 62 | gradientClip=gradientClip, 63 | weightClip=weightClip, 64 | weightRegConst=weightRegConst)) 65 | 66 | self.Z = RecurrentAdapter(Map( 67 | name=name + '.Z', 68 | inputNames=['input(0)', name + '.H(-1)'], 69 | outputDim=D2, 70 | activeFn=TanhActiveFn(), 71 | initRange=initRange, 72 | initSeed=initSeed+2, 73 | biasInitConst=0.0, 74 | learningRate=learningRate, 75 | learningRateAnnealConst=learningRateAnnealConst, 76 | momentum=momentum, 77 | deltaMomentum=deltaMomentum, 78 | gradientClip=gradientClip, 79 | weightClip=weightClip, 80 | weightRegConst=weightRegConst)) 81 | 82 | self.O = RecurrentAdapter(Map( 83 | name=name + '.O', 84 | inputNames=['input(0)', name + '.H(-1)', name + '.C(0)'], 85 | outputDim=D2, 86 | activeFn=SigmoidActiveFn(), 87 | initRange=initRange, 88 | initSeed=initSeed+3, 89 | biasInitConst=1.0, 90 | learningRate=learningRate, 91 | learningRateAnnealConst=learningRateAnnealConst, 92 | momentum=momentum, 93 | deltaMomentum=deltaMomentum, 94 | gradientClip=gradientClip, 95 | weightClip=weightClip, 96 | weightRegConst=weightRegConst)) 97 | 98 | if not needInit: 99 | self.I.W, self.F.W, self.Z.W, self.O.W = self.splitWeights(initWeights) 100 | 101 | self.FC = RecurrentAdapter(ElementProduct( 102 | name=name + '.F*C', 103 | inputNames=[name + '.F', name + '.C(-1)'], 104 | outputDim=D2)) 105 | 106 | self.IZ = RecurrentAdapter(ElementProduct( 107 | name=name + '.I*Z', 108 | inputNames=[name + '.I', name + '.Z'], 109 | outputDim=D2)) 110 | 111 | self.C = RecurrentAdapter(Sum( 112 | name=name + '.C', 113 | inputNames=[name + '.F*C', name + '.I*Z'], 114 | numComponents=2, 115 | outputDim=D2)) 116 | 117 | self.U = RecurrentAdapter(Active( 118 | name=name + '.U', 119 | inputNames=[name + '.C'], 120 | outputDim=D2, 121 | activeFn=TanhActiveFn())) 122 | 123 | self.H = RecurrentAdapter(ElementProduct( 124 | name=name + '.H', 125 | inputNames=[name + '.O', name + '.U'], 126 | outputDim=D2, 127 | defaultValue=defaultValue)) 128 | 129 | stages = [self.I, self.F, self.Z, self.FC, self.IZ, self.C, self.O, self.U, self.H] 130 | RecurrentContainer.__init__(self, 131 | stages=stages, 132 | timespan=timespan, 133 | inputNames=inputNames, 134 | outputStageNames=[name + '.H'], 135 | inputDim=inputDim, 136 | outputDim=outputDim, 137 | multiInput=multiInput, 138 | multiOutput=multiOutput, 139 | cutOffZeroEnd=cutOffZeroEnd, 140 | name=name, 141 | outputdEdX=outputdEdX) 142 | 143 | def getWeights(self): 144 | if self.I.stages[0].gpu: 145 | return np.concatenate(( 146 | gpu.as_numpy_array(self.I.getWeights()), 147 | gpu.as_numpy_array(self.F.getWeights()), 148 | gpu.as_numpy_array(self.Z.getWeights()), 149 | gpu.as_numpy_array(self.O.getWeights())), axis=0) 150 | else: 151 | return np.concatenate((self.I.getWeights(), 152 | self.F.getWeights(), 153 | self.Z.getWeights(), 154 | self.O.getWeights()), axis=0) 155 | 156 | def getGradient(self): 157 | if self.I.stages[0].gpu: 158 | return np.concatenate(( 159 | gpu.as_numpy_array(self.I.getGradient()), 160 | gpu.as_numpy_array(self.F.getGradient()), 161 | gpu.as_numpy_array(self.Z.getGradient()), 162 | gpu.as_numpy_array(self.O.getGradient())), axis=0) 163 | else: 164 | return np.concatenate((self.I.getGradient(), 165 | self.F.getGradient(), 166 | self.Z.getGradient(), 167 | self.O.getGradient()), axis=0) 168 | 169 | def splitWeights(self, W): 170 | D = self.inputDim 171 | D2 = self.outputDim 172 | s = D + D2 + D2 + 1 173 | s2 = D + D2 + 1 174 | IW = W[:s, :] 175 | FW = W[s:s + s, :] 176 | ZW = W[s + s:s + s + s2, :] 177 | OW = W[s + s +s2:s + s + s2 + s, :] 178 | return IW, FW, ZW, OW 179 | 180 | def loadWeights(self, W): 181 | IW, FW, ZW, OW = self.splitWeights(W) 182 | if self.I.stages[0].gpu: 183 | self.I.loadWeights(gpu.as_garray(IW)) 184 | self.F.loadWeights(gpu.as_garray(FW)) 185 | self.Z.loadWeights(gpu.as_garray(ZW)) 186 | self.O.loadWeights(gpu.as_garray(OW)) 187 | else: 188 | self.I.loadWeights(IW) 189 | self.F.loadWeights(FW) 190 | self.Z.loadWeights(ZW) 191 | self.O.loadWeights(OW) 192 | -------------------------------------------------------------------------------- /src/nn/stage.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import copy 3 | import os 4 | use_gpu = os.environ.get('GNUMPY_USE_GPU', 'yes') == 'yes' 5 | verbose = os.environ.get('VERBOSE', 'no') == 'yes' 6 | if use_gpu: 7 | import gnumpy as gpu 8 | 9 | class Stage: 10 | def __init__(self, 11 | name, 12 | inputNames, 13 | outputDim, 14 | defaultValue=0.0, 15 | learningRate=0.0, 16 | learningRateAnnealConst=0.0, 17 | momentum=0.0, 18 | deltaMomentum=0.0, 19 | weightClip=0.0, 20 | gradientClip=0.0, 21 | weightRegConst=0.0, 22 | gpu=False, 23 | outputdEdX=True): 24 | self.name = name 25 | self.inputNames = inputNames 26 | self.inputs = None 27 | self.outputDim = outputDim 28 | self.defaultValue = np.zeros(outputDim) + defaultValue 29 | self.startLearningRate = learningRate 30 | self.learningRate = learningRate 31 | self.learningRateAnnealConst = learningRateAnnealConst 32 | self.momentum = momentum 33 | self.deltaMomentum = deltaMomentum 34 | self.weightClip = weightClip 35 | self.gradientClip = gradientClip 36 | self.weightRegConst = weightRegConst 37 | self.outputdEdX=outputdEdX 38 | self.dEdWnorm = 0.0 39 | self.Wnorm = 0.0 40 | self.dEdW = 0.0 41 | self.lastdW = 0.0 42 | self.W = 0.0 43 | self.Y = 0.0 44 | self.X = 0.0 45 | self.dEdY = 0.0 46 | self.gpu = gpu 47 | self.splX = None 48 | self.receivedError = False 49 | def __str__(self): 50 | return self.name 51 | 52 | def addInput(self, stage): 53 | if self.inputs is None: 54 | self.inputs = [stage] 55 | else: 56 | self.inputs.append(stage) 57 | 58 | def getInput(self): 59 | """ 60 | Fetches input from each input stage. 61 | Concatenates input into one vector. 62 | """ 63 | #print self.name 64 | if len(self.inputs) > 1: 65 | self.splX = [] 66 | for stage in self.inputs: 67 | X = stage.Y 68 | self.splX.append(X) 69 | #print self.name, 'get input', stage.name, X.dtype 70 | #print '>', stage.name, X.shape 71 | return np.concatenate(self.splX, axis=-1) 72 | else: 73 | #print self.name,'get input', self.inputs[0].Y.dtype 74 | return self.inputs[0].Y 75 | 76 | def clearError(self): 77 | self.dEdY = 0.0 78 | self.receivedError = False 79 | 80 | def sendError(self, dEdX): 81 | """ 82 | Iterates over input list and sends dEdX. 83 | """ 84 | if len(self.inputs) > 1: 85 | s = 0 86 | for stage in self.inputs: 87 | s2 = s + stage.Y.shape[-1] 88 | stage.dEdY += dEdX[:, s : s2] 89 | s = s2 90 | stage.receivedError = True 91 | else: 92 | #if type(self.inputs[0].dEdY) == np.ndarray: 93 | # print self.name, self.inputs[0].name, self.inputs[0].dEdY.shape, dEdX.shape 94 | self.inputs[0].dEdY += dEdX 95 | self.inputs[0].receivedError = True 96 | #print self.name, 'send error', self.inputs[0].name 97 | 98 | def getValue(self): 99 | """ 100 | Gets the output value. 101 | """ 102 | return self.Y 103 | 104 | def getGradient(self): 105 | """ 106 | Gets the gradient with regard to the weights. 107 | """ 108 | return self.dEdW 109 | 110 | def setGradient(self, value): 111 | """ 112 | Sets the gradient with regard to the weights. 113 | :param value: float or numpy array 114 | :return: 115 | """ 116 | self.dEdW = value 117 | 118 | def graphForward(self): 119 | """ 120 | Forward propagates. 121 | """ 122 | self.X = self.getInput() 123 | if verbose and hasattr(self.X, 'shape'): 124 | print 'forward in', self.name, self.X.shape 125 | self.Y = self.forward(self.X) 126 | if verbose and hasattr(self.Y, 'shape'): 127 | print 'forward out', self.name, self.Y.shape 128 | 129 | def forward(self, X): 130 | """ 131 | Abstract method. Forward pass input to the stage. 132 | :param X: The input. At least two dimensional numpy array. 133 | The first dimension is always the number of examples. 134 | :return: The output of the stage. 135 | """ 136 | return 137 | 138 | def graphBackward(self): 139 | """ 140 | Backward propagates. 141 | """ 142 | if verbose and hasattr(self.dEdY, 'shape'): 143 | print 'backward in', self.name, self.dEdY.shape, np.mean(self.dEdY) 144 | dEdX = self.backward(self.dEdY) 145 | if self.outputdEdX: 146 | self.sendError(dEdX) 147 | if verbose and hasattr(dEdX, 'shape'): 148 | print 'backward out', self.name, dEdX.shape, np.mean(dEdX) 149 | 150 | def backward(self, dEdY): 151 | """ 152 | Abstract method. Backward propagate error in the stage. 153 | :param dEdY: The error of the output. 154 | :return: The error of the input. 155 | """ 156 | return 157 | 158 | def updateWeights(self): 159 | self._updateWeights(self.dEdW) 160 | 161 | def _updateWeights(self, dEdW): 162 | if self.gpu: 163 | if self.gradientClip > 0.0: 164 | self.dEdWnorm = gpu.sqrt(gpu.sum(dEdW ** 2)) 165 | if self.dEdWnorm > self.gradientClip: 166 | dEdW *= self.gradientClip / self.dEdWnorm 167 | if self.learningRate > 0.0: 168 | self.lastdW = -self.learningRate * dEdW + \ 169 | self.momentum * self.lastdW 170 | self.W += self.lastdW 171 | if self.weightRegConst > 0.0: 172 | a = self.learningRate * self.weightRegConst 173 | self.W -= a * self.W 174 | if self.weightClip > 0.0: 175 | self.Wnorm = gpu.sqrt(gpu.sum(self.W ** 2)) 176 | if self.Wnorm > self.weightClip: 177 | self.W *= self.weightClip / self.Wnorm 178 | else: 179 | if self.gradientClip > 0.0: 180 | self.dEdWnorm = np.sqrt(np.sum(np.power(dEdW, 2))) 181 | if self.dEdWnorm > self.gradientClip: 182 | dEdW *= self.gradientClip / self.dEdWnorm 183 | if self.learningRate > 0.0: 184 | self.lastdW = -self.learningRate * dEdW + \ 185 | self.momentum * self.lastdW 186 | self.W += self.lastdW 187 | if self.weightRegConst > 0.0: 188 | a = self.learningRate * self.weightRegConst 189 | self.W -= a * self.W 190 | if self.weightClip > 0.0: 191 | self.Wnorm = np.sqrt(np.sum(np.power(self.W, 2))) 192 | if self.Wnorm > self.weightClip: 193 | self.W *= self.weightClip / self.Wnorm 194 | 195 | def updateLearningParams(self, numEpoch): 196 | self.learningRate = self.startLearningRate / \ 197 | (1.0 + self.learningRateAnnealConst * numEpoch) 198 | self.momentum -= self.deltaMomentum 199 | 200 | if self.gradientClip > 0.0 or self.weightClip > 0.0: 201 | print 'ST: %11s ' % self.name, 202 | if self.gradientClip > 0.0: 203 | print 'GN: %8.4f ' % self.dEdWnorm, 204 | print 'GC: %8.4f ' % self.gradientClip, 205 | if self.weightClip > 0.0: 206 | print 'WN: %8.4f ' % self.Wnorm, 207 | print 'WC: %8.4f ' % self.weightClip, 208 | print 209 | 210 | def getWeights(self): 211 | if self.gpu: 212 | return gpu.as_numpy_array(self.W) 213 | else: 214 | return self.W 215 | 216 | def loadWeights(self, W): 217 | if self.gpu: 218 | self.W = gpu.as_garray(W) 219 | else: 220 | self.W = W 221 | 222 | def copy(self): 223 | return copy.copy(self) -------------------------------------------------------------------------------- /src/nn/recurrent_tests.py: -------------------------------------------------------------------------------- 1 | from recurrent import * 2 | from lstm_old import * 3 | import stage_tests 4 | import unittest 5 | from map import * 6 | from active import * 7 | from elem_prod import * 8 | from sum import * 9 | 10 | class Recurrent_Tests(stage_tests.StageTests): 11 | def setUp(self): 12 | self.N = 5 13 | self.T = 5 14 | self.D = 10 15 | self.D2 = 5 16 | self.sigm_ = Map( 17 | name='sigm', 18 | inputNames=['input(0)', 'sigm(-1)', 'sigm(-2)'], 19 | outputDim=self.D2, 20 | activeFn=SigmoidActiveFn(), 21 | initRange=1, 22 | initSeed=5, 23 | learningRate=0.9 24 | ) 25 | self.sigm = RecurrentAdapter( 26 | stage=self.sigm_) 27 | self.stage = self.sigm.getStage(time=0) 28 | self.model = RecurrentContainer( 29 | stages=[self.sigm], 30 | timespan=self.T, 31 | inputDim=self.D, 32 | outputDim=self.D2, 33 | outputStageNames=['sigm'], 34 | multiOutput=True, 35 | name='container', 36 | outputdEdX=True) 37 | 38 | self.testInputErr = True 39 | self.costFn = meanSqErr 40 | 41 | def test_grad(self): 42 | random = np.random.RandomState(1) 43 | X = random.rand(self.N, self.T, self.D) 44 | T = random.rand(self.N, self.T, self.D2) 45 | dEdW, dEdWTmp, dEdX, dEdXTmp = self.calcgrd(X, T) 46 | self.chkgrd(dEdW, dEdWTmp) 47 | self.chkgrd(dEdX, dEdXTmp) 48 | 49 | def test_forward(self): 50 | random = np.random.RandomState(1) 51 | X = random.rand(self.N, self.T, self.D) 52 | tolerance = 1e-4 53 | Y2 = self.realForward(X) 54 | Y = self.model.forward(X) 55 | Y2 = Y2.reshape(Y2.size) 56 | Y = Y.reshape(Y.size) 57 | for i in range(Y.size): 58 | self.assertTrue((Y[i] == 0 and Y2[i] == 0) or (np.abs(Y[i] / Y2[i] - 1) < tolerance)) 59 | 60 | def realForward(self, X): 61 | Y2 = np.zeros((self.N, self.T, self.D2)) 62 | for t in range(self.T): 63 | Y2[:, t, :] = self.sigm_.forward( 64 | np.concatenate((X[:, t, :], Y2[:, t-1, :], Y2[:, t-2, :]), axis=-1)) 65 | return Y2 66 | 67 | class LSTM_Recurrent_Random_Tests(unittest.TestCase): 68 | def test_singleErr(self): 69 | self.func(False) 70 | 71 | def test_multiErr(self): 72 | self.func(True) 73 | 74 | def func(self, multiOutput): 75 | N = 5 76 | D = 10 77 | D2 = 5 78 | Time = 5 79 | I = RecurrentAdapter(Map( 80 | name='I', 81 | inputNames=['input(0)', 'H(-1)', 'C(-1)'], 82 | outputDim=D2, 83 | activeFn=SigmoidActiveFn(), 84 | initRange=0.1, 85 | initSeed=5, 86 | biasInitConst=1.0, 87 | learningRate=0.8, 88 | momentum=0.9 89 | )) 90 | 91 | F = RecurrentAdapter(Map( 92 | name='F', 93 | inputNames=['input(0)', 'H(-1)', 'C(-1)'], 94 | outputDim=D2, 95 | activeFn=SigmoidActiveFn(), 96 | initRange=0.1, 97 | initSeed=6, 98 | biasInitConst=1.0, 99 | learningRate=0.8, 100 | momentum=0.9 101 | )) 102 | 103 | Z = RecurrentAdapter(Map( 104 | name='Z', 105 | inputNames=['input(0)', 'H(-1)'], 106 | outputDim=D2, 107 | activeFn=TanhActiveFn(), 108 | initRange=0.1, 109 | initSeed=7, 110 | biasInitConst=0.0, 111 | learningRate=0.8, 112 | momentum=0.9 113 | )) 114 | 115 | FC = RecurrentAdapter(ElementProduct( 116 | name='F.C', 117 | inputNames=['F(0)', 'C(-1)'], 118 | outputDim=D2 119 | )) 120 | 121 | IZ = RecurrentAdapter(ElementProduct( 122 | name='I.Z', 123 | inputNames=['I(0)', 'Z(0)'], 124 | outputDim=D2 125 | )) 126 | 127 | C = RecurrentAdapter(Sum( 128 | name='C', 129 | inputNames=['F.C(0)', 'I.Z(0)'], 130 | numComponents=2, 131 | outputDim=D2 132 | )) 133 | 134 | O = RecurrentAdapter(Map( 135 | name='O', 136 | inputNames=['input(0)', 'H(-1)', 'C(0)'], 137 | outputDim=D2, 138 | activeFn=SigmoidActiveFn(), 139 | initRange=0.1, 140 | initSeed=8, 141 | biasInitConst=1.0, 142 | learningRate=0.8, 143 | momentum=0.9 144 | )) 145 | 146 | U = RecurrentAdapter(Active( 147 | name='U', 148 | inputNames=['C(0)'], 149 | outputDim=D2, 150 | activeFn=TanhActiveFn() 151 | )) 152 | 153 | H = RecurrentAdapter(ElementProduct( 154 | name='H', 155 | inputNames=['O(0)', 'U(0)'], 156 | outputDim=D2 157 | )) 158 | 159 | lstm = RecurrentContainer( 160 | name='lstm', 161 | stages=[I, F, Z, FC, IZ, C, O, U, H], 162 | timespan=Time, 163 | inputDim=D, 164 | outputDim=D2, 165 | outputStageNames=['H'], 166 | multiOutput=multiOutput, 167 | outputdEdX=True) 168 | 169 | lstm2 = LSTM_Old( 170 | name='lstm2', 171 | inputDim=D, 172 | outputDim=D2, 173 | needInit=False, 174 | initRange=0.1, 175 | initSeed=0, 176 | cutOffZeroEnd=True, 177 | multiErr=multiOutput, 178 | learningRate=0.8, 179 | momentum=0.9 180 | ) 181 | 182 | random = np.random.RandomState(1) 183 | costFn = crossEntOne 184 | for i in range(3): 185 | X = random.rand(N, Time, D) * 0.1 186 | if multiOutput: 187 | T = random.rand(N, Time, D2) * 0.1 188 | else: 189 | T = random.rand(N, D2) * 0.1 190 | 191 | Y = lstm.forward(X) 192 | E, dEdY = costFn(Y, T) 193 | dEdX = lstm.backward(dEdY) 194 | if i == 0: 195 | if I.stages[0].gpu: 196 | W = np.concatenate(( 197 | gpu.as_numpy_array(I.getWeights()), 198 | gpu.as_numpy_array(F.getWeights()), 199 | gpu.as_numpy_array(Z.getWeights()), 200 | gpu.as_numpy_array(O.getWeights())), axis=0) 201 | else: 202 | W = np.concatenate((I.getWeights(), 203 | F.getWeights(), Z.getWeights(), 204 | O.getWeights()), axis=0) 205 | lstm2.W = W.transpose() 206 | if multiOutput: 207 | Y2 = lstm2.forward(X)[:,:-1] 208 | else: 209 | Y2 = lstm2.forward(X) 210 | 211 | E, dEdY2 = costFn(Y2, T) 212 | if multiOutput: 213 | dEdX2 = lstm2.backward(np.concatenate((dEdY2, np.zeros((N, 1, D2))), axis=1)) 214 | else: 215 | dEdX2 = lstm2.backward(dEdY2) 216 | 217 | if I.stages[0].gpu: 218 | dEdW = np.concatenate(( 219 | gpu.as_numpy_array(I.getGradient()), 220 | gpu.as_numpy_array(F.getGradient()), 221 | gpu.as_numpy_array(Z.getGradient()), 222 | gpu.as_numpy_array(O.getGradient())), axis=0) 223 | else: 224 | dEdW = np.concatenate((I.getGradient(), 225 | F.getGradient(), 226 | Z.getGradient(), 227 | O.getGradient()), axis=0) 228 | dEdW2 = lstm2.dEdW 229 | lstm.updateWeights() 230 | lstm2.updateWeights() 231 | #self.chkEqual(Y, Y2) 232 | 233 | #self.chkEqual(dEdX, dEdX2) 234 | self.chkEqual(dEdW.transpose(), dEdW2) 235 | if I.stages[0].gpu: 236 | W = np.concatenate(( 237 | gpu.as_numpy_array(I.getWeights()), 238 | gpu.as_numpy_array(F.getWeights()), 239 | gpu.as_numpy_array(Z.getWeights()), 240 | gpu.as_numpy_array(O.getWeights())), axis=0) 241 | else: 242 | W = np.concatenate((I.getWeights(), 243 | F.getWeights(), Z.getWeights(), 244 | O.getWeights()), axis=0) 245 | W2 = lstm2.W 246 | #self.chkEqual(W.transpose(), W2) 247 | 248 | def chkEqual(self, a, b): 249 | tolerance = 1e-1 250 | a = a.reshape(a.size) 251 | b = b.reshape(b.size) 252 | for i in range(a.size): 253 | if not ((a[i] == 0 and b[i] == 0) or 254 | (np.abs(a[i]) < 1e-8 and np.abs(b[i]) < 1e-8) or 255 | (np.abs(a[i] / b[i] - 1) < tolerance)): 256 | print a[i], b[i], a[i]/b[i] 257 | self.assertTrue( 258 | (a[i] == 0 and b[i] == 0) or 259 | (np.abs(a[i]) < 1e-8 and np.abs(b[i]) < 1e-8) or 260 | (np.abs(a[i] / b[i] - 1) < tolerance)) 261 | if __name__ == '__main__': 262 | unittest.main() 263 | -------------------------------------------------------------------------------- /src/imageqa_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | import numpy as np 5 | 6 | import calculate_wups 7 | import nn 8 | 9 | def decodeQuestion( 10 | modelInput, 11 | questionIdict): 12 | sentence = '' 13 | for t in range(1, modelInput.shape[0]): 14 | if modelInput[t, 0] == 0: 15 | break 16 | sentence += questionIdict[modelInput[t, 0]- 1] + ' ' 17 | sentence += '?' 18 | return sentence 19 | 20 | def estimateQuestionType(question): 21 | if 'how many' in question: 22 | typ = 1 23 | elif question.startswith('what is the color') or \ 24 | question.startswith('what') and 'colour' in question: 25 | typ = 2 26 | elif question.startswith('where'): 27 | typ = 3 28 | else: 29 | typ = 0 30 | return typ 31 | 32 | def calcRate( 33 | modelInput, 34 | modelOutput, 35 | target, 36 | questionIdict=None, 37 | questionTypeArray=None): 38 | correct = np.zeros(4, dtype=int) 39 | total = np.zeros(4, dtype=int) 40 | for n in range(0, modelOutput.shape[0]): 41 | sortIdx = np.argsort(modelOutput[n], axis=0) 42 | sortIdx = sortIdx[::-1] 43 | answer = sortIdx[0] 44 | if questionTypeArray is None: 45 | question = decodeQuestion(modelInput[n], questionIdict) 46 | typ = estimateQuestionType(question) 47 | else: 48 | typ = questionTypeArray[n] 49 | total[typ] += 1 50 | if answer == target[n, 0]: 51 | correct[typ] += 1 52 | rate = correct / total.astype('float') 53 | print 'object: %.4f' % rate[0] 54 | print 'number: %.4f' % rate[1] 55 | print 'color: %.4f' % rate[2] 56 | print 'location: %.4f' % rate[3] 57 | return correct, total 58 | 59 | def calcPrecision( 60 | modelOutput, 61 | target): 62 | # Calculate precision 63 | correctAt1 = 0 64 | correctAt5 = 0 65 | correctAt10 = 0 66 | for n in range(0, modelOutput.shape[0]): 67 | sortIdx = np.argsort(modelOutput[n], axis=0) 68 | sortIdx = sortIdx[::-1] 69 | for i in range(0, 10): 70 | if modelOutput.shape[1] < 10 and i > 0: 71 | break 72 | if len(target.shape) == 2: 73 | ans = target[n, 0] 74 | else: 75 | ans = target[n] 76 | if sortIdx[i] == ans: 77 | if i == 0: 78 | correctAt1 += 1 79 | if i <= 4: 80 | correctAt5 += 1 81 | correctAt10 += 1 82 | r1 = correctAt1 / float(modelOutput.shape[0]) 83 | r5 = correctAt5 / float(modelOutput.shape[0]) 84 | r10 = correctAt10 / float(modelOutput.shape[0]) 85 | print 'rate @ 1: %.4f' % r1 86 | print 'rate @ 5: %.4f' % r5 87 | print 'rate @ 10: %.4f' % r10 88 | return (r1, r5, r10) 89 | 90 | def outputTxt( 91 | modelOutput, 92 | target, 93 | answerArray, 94 | answerFilename, 95 | truthFilename, 96 | topK=1, 97 | outputProb=False): 98 | """ 99 | Output the results of all examples into a text file. 100 | topK: top k answers, separated by comma. 101 | outputProb: whether to output the probability of the answer as well. 102 | 103 | Format will look like this: 104 | q1ans1,0.99,a1ans2,0.01... 105 | q2ans1,0.90,q2ans2,0.02... 106 | """ 107 | with open(truthFilename, 'w+') as f: 108 | for n in range(0, target.shape[0]): 109 | f.write(answerArray[target[n, 0]] + '\n') 110 | with open(answerFilename, 'w+') as f: 111 | for n in range(0, modelOutput.shape[0]): 112 | if topK == 1: 113 | f.write(answerArray[np.argmax(modelOutput[n, :])]) 114 | if outputProb: 115 | f.write(',%.4f' % modelOutput[n, np.argmax(modelOutput[n, :])]) 116 | f.write('\n') 117 | else: 118 | sortIdx = np.argsort(modelOutput[n], axis=0) 119 | sortIdx = sortIdx[::-1] 120 | for i in range(0, topK): 121 | f.write(answerArray[sortIdx[i]]) 122 | if outputProb: 123 | f.write(',%.4f' % modelOutput[n, sortIdx[i]]) 124 | f.write('\n') 125 | 126 | def runWups( 127 | answerFilename, 128 | truthFilename): 129 | w10 = calculate_wups.runAll(truthFilename, answerFilename, -1) 130 | w09 = calculate_wups.runAll(truthFilename, answerFilename, 0.9) 131 | w00 = calculate_wups.runAll(truthFilename, answerFilename, 0.0) 132 | print 'WUPS @ 1.0: %.4f' % w10 133 | print 'WUPS @ 0.9: %.4f' % w09 134 | print 'WUPS @ 0.0: %.4f' % w00 135 | return (w10, w09, w00) 136 | 137 | def getAnswerFilename( 138 | taskId, 139 | resultsFolder): 140 | folder = os.path.join(resultsFolder, taskId) 141 | if not os.path.exists(folder): 142 | os.makedirs(folder) 143 | return os.path.join( 144 | folder, 145 | '%s.test.o.txt' % taskId) 146 | 147 | def getTruthFilename( 148 | taskId, 149 | resultsFolder): 150 | folder = os.path.join(resultsFolder, taskId) 151 | if not os.path.exists(folder): 152 | os.makedirs(folder) 153 | return os.path.join( 154 | folder, 155 | '%s.test.t.txt' % taskId) 156 | 157 | def loadDataset(dataFolder): 158 | print 'Loading dataset...' 159 | trainDataFile = os.path.join(dataFolder, 'train.npy') 160 | validDataFile = os.path.join(dataFolder, 'valid.npy') 161 | testDataFile = os.path.join(dataFolder, 'test.npy') 162 | vocabDictFile = os.path.join(dataFolder, 'vocab-dict.npy') 163 | qtypeFile = os.path.join(dataFolder, 'test-qtype.npy') 164 | trainData = np.load(trainDataFile) 165 | validData = np.load(validDataFile) 166 | testData = np.load(testDataFile) 167 | vocabDict = np.load(vocabDictFile) 168 | qTypeArray = np.load(qtypeFile) 169 | inputTest = testData[0] 170 | targetTest = testData[1] 171 | qDict = vocabDict[0] 172 | qIdict = vocabDict[1] 173 | aDict = vocabDict[2] 174 | aIdict = vocabDict[3] 175 | return { 176 | 'trainData': trainData, 177 | 'validData': validData, 178 | 'testData': testData, 179 | 'questionDict': qDict, 180 | 'questionIdict': qIdict, 181 | 'ansDict': aDict, 182 | 'ansIdict': aIdict, 183 | 'questionTypeArray': qTypeArray} 184 | 185 | def loadModel( 186 | taskId, 187 | resultsFolder): 188 | print 'Loading model...' 189 | modelSpecFile = '%s/%s/%s.model.yml' % (resultsFolder, taskId, taskId) 190 | modelWeightsFile = '%s/%s/%s.w.npy' % (resultsFolder, taskId, taskId) 191 | model = nn.load(modelSpecFile) 192 | model.loadWeights(np.load(modelWeightsFile)) 193 | return model 194 | 195 | def testAll( 196 | modelId, 197 | model, 198 | dataFolder, 199 | resultsFolder): 200 | testAnswerFile = getAnswerFilename(modelId, resultsFolder) 201 | testTruthFile = getTruthFilename(modelId, resultsFolder) 202 | data = loadDataset(dataFolder) 203 | outputTest = nn.test(model, data['testData'][0]) 204 | rate, correct, total = nn.calcRate(model, outputTest, data['testData'][1]) 205 | print 'rate: %.4f' % rate 206 | resultsRank, \ 207 | resultsCategory, \ 208 | resultsWups = runAllMetrics( 209 | data['testData'][0], 210 | outputTest, 211 | data['testData'][1], 212 | data['ansIdict'], 213 | data['questionTypeArray'], 214 | testAnswerFile, 215 | testTruthFile) 216 | writeMetricsToFile( 217 | modelId, 218 | rate, 219 | resultsRank, 220 | resultsCategory, 221 | resultsWups, 222 | resultsFolder) 223 | return outputTest 224 | 225 | def runAllMetrics( 226 | inputTest, 227 | outputTest, 228 | targetTest, 229 | answerArray, 230 | questionTypeArray, 231 | testAnswerFile, 232 | testTruthFile): 233 | outputTxt(outputTest, targetTest, answerArray, 234 | testAnswerFile, testTruthFile) 235 | resultsRank = calcPrecision(outputTest, targetTest) 236 | correct, total = calcRate(inputTest, 237 | outputTest, targetTest, questionTypeArray=questionTypeArray) 238 | resultsCategory = correct / total.astype(float) 239 | resultsWups = runWups(testAnswerFile, testTruthFile) 240 | return (resultsRank, resultsCategory, resultsWups) 241 | 242 | def writeMetricsToFile( 243 | taskId, 244 | rate, 245 | resultsRank, 246 | resultsCategory, 247 | resultsWups, 248 | resultsFolder): 249 | resultsFile = os.path.join(resultsFolder, taskId, 'result.txt') 250 | with open(resultsFile, 'w') as f: 251 | f.write('rate: %.4f\n' % rate) 252 | f.write('rate @ 1: %.4f\n' % resultsRank[0]) 253 | f.write('rate @ 5: %.4f\n' % resultsRank[1]) 254 | f.write('rate @ 10: %.4f\n' % resultsRank[2]) 255 | f.write('object: %.4f\n' % resultsCategory[0]) 256 | f.write('number: %.4f\n' % resultsCategory[1]) 257 | f.write('color: %.4f\n' % resultsCategory[2]) 258 | f.write('location: %.4f\n' % resultsCategory[3]) 259 | f.write('WUPS 1.0: %.4f\n' % resultsWups[0]) 260 | f.write('WUPS 0.9: %.4f\n' % resultsWups[1]) 261 | f.write('WUPS 0.0: %.4f\n' % resultsWups[2]) 262 | 263 | if __name__ == '__main__': 264 | """ 265 | Usage python imageqa_test.py -m[odel] {Model ID} 266 | -d[ata] {dataFolder} 267 | [-r[esults] {resultsFolder}] 268 | """ 269 | dataFolder = None 270 | resultsFolder = '../results' 271 | for i, flag in enumerate(sys.argv): 272 | if flag == '-m' or flag == '-model': 273 | modelId = sys.argv[i + 1] 274 | elif flag == '-d' or flag == '-data': 275 | dataFolder = sys.argv[i + 1] 276 | elif flag == '-r' or flag == '-result': 277 | resultsFolder = sys.argv[i + 1] 278 | print modelId 279 | model = loadModel(modelId, resultsFolder) 280 | testAll(modelId, model, dataFolder, resultsFolder) 281 | -------------------------------------------------------------------------------- /src/nn/lstm_old.py: -------------------------------------------------------------------------------- 1 | from stage import * 2 | from func import * 3 | 4 | def sliceWeights(inputDim,outputDim,W): 5 | s1 = inputDim + outputDim * 2 + 1 6 | s2 = s1 * 2 7 | s3 = s2 + inputDim + outputDim + 1 8 | s4 = s3 + s1 9 | Wi = W[:, 0 : s1] 10 | Wf = W[:, s1 : s2] 11 | Wc = W[:, s2 : s3] 12 | Wo = W[:, s3 : s4] 13 | 14 | return Wi, Wf, Wc, Wo 15 | 16 | def sliceWeightsSmall(inputDim,outputDim,W): 17 | Wi, Wf, Wc, Wo = sliceWeights(inputDim, outputDim, W) 18 | 19 | Wxi = Wi[:, 0 : inputDim] 20 | Wyi = Wi[:, inputDim : inputDim + outputDim] 21 | Wci = Wi[:, inputDim + outputDim : inputDim + outputDim + outputDim] 22 | Wxf = Wf[:, 0 : inputDim] 23 | Wyf = Wf[:, inputDim : inputDim + outputDim] 24 | Wcf = Wf[:, inputDim + outputDim : inputDim + outputDim + outputDim] 25 | Wxc = Wc[:, 0 : inputDim] 26 | Wyc = Wc[:, inputDim : inputDim + outputDim] 27 | Wxo = Wo[:, 0 : inputDim] 28 | Wyo = Wo[:, inputDim : inputDim + outputDim] 29 | Wco = Wo[:, inputDim + outputDim : inputDim + outputDim + outputDim] 30 | 31 | return Wxi, Wyi, Wci, Wxf, Wyf, Wcf, Wxc, Wyc, Wxo, Wyo, Wco 32 | 33 | def forwardPassN(X,cutOffZeroEnd,W): 34 | numEx = X.shape[0] 35 | timespan = X.shape[1] 36 | inputDim = X.shape[2] 37 | outputDim = W.shape[0] 38 | Wi, Wf, Wc, Wo = sliceWeights(inputDim, outputDim, W) 39 | Xend = np.zeros(numEx) 40 | Gi = np.zeros((numEx,timespan,outputDim)) 41 | Gf = np.zeros((numEx,timespan,outputDim)) 42 | Go = np.zeros((numEx,timespan,outputDim)) 43 | Z = np.zeros((numEx,timespan,outputDim)) 44 | C = np.zeros((numEx,timespan,outputDim)) 45 | myShape = (numEx,timespan,outputDim) 46 | if cutOffZeroEnd: 47 | Y = np.zeros((numEx, timespan + 1, outputDim),) 48 | reachedEnd = np.sum(X, axis=-1) == 0.0 49 | else: 50 | Y = np.zeros(myShape,) 51 | reachedEnd = np.zeros((numEx, timespan)) 52 | 53 | for n in range(0, numEx): 54 | Y[n], C[n], Z[n], \ 55 | Gi[n], Gf[n], Go[n], \ 56 | Xend[n] = \ 57 | forwardPassOne( 58 | X[n], reachedEnd[n], cutOffZeroEnd, Wi, Wf, Wc, Wo) 59 | 60 | return Y, C, Z, Gi, Gf, Go, Xend 61 | 62 | def forwardPassOne(X,reachedEnd,cutOffZeroEnd, 63 | Wi,Wf,Wc,Wo): 64 | timespan = X.shape[0] 65 | outputDim = Wi.shape[0] 66 | # Last time step is reserved for final output of the entire input. 67 | if cutOffZeroEnd: 68 | Y = np.zeros((timespan + 1, outputDim)) 69 | else: 70 | Y = np.zeros((timespan, outputDim)) 71 | C = np.zeros((timespan, outputDim)) 72 | Z = np.zeros((timespan, outputDim)) 73 | Gi = np.zeros((timespan, outputDim)) 74 | Gf = np.zeros((timespan, outputDim)) 75 | Go = np.zeros((timespan, outputDim)) 76 | Xend = timespan 77 | for t in range(0, timespan): 78 | if cutOffZeroEnd and reachedEnd[t]: 79 | Xend = t 80 | Y[-1, :] = Y[t - 1, :] 81 | break 82 | 83 | states1 = np.concatenate((X[t, :], \ 84 | Y[t-1, :], \ 85 | C[t-1, :], \ 86 | np.ones(1))) 87 | states2 = np.concatenate((X[t, :], \ 88 | Y[t-1, :], \ 89 | np.ones(1))) 90 | Gi[t, :] = sigmoidFn(np.dot(Wi, states1)) 91 | Gf[t, :] = sigmoidFn(np.dot(Wf, states1)) 92 | Z[t, :] = np.tanh(np.dot(Wc, states2)) 93 | C[t, :] = Gf[t, :] * C[t-1, :] + Gi[t, :] * Z[t, :] 94 | states3 = np.concatenate((X[t, :], \ 95 | Y[t-1, :], \ 96 | C[t, :], \ 97 | np.ones(1))) 98 | Go[t, :] = sigmoidFn(np.dot(Wo, states3)) 99 | Y[t, :] = Go[t, :] * np.tanh(C[t, :]) 100 | if cutOffZeroEnd and not reachedEnd[-1]: 101 | Y[-1] = Y[-2] 102 | Xend = timespan 103 | return Y, C, Z, Gi, Gf, Go, Xend 104 | 105 | def backPropagateN(dEdY,X,Y,C,Z,Gi,Gf,Go,Xend, 106 | cutOffZeroEnd,multiErr, 107 | outputdEdX, 108 | W): 109 | numEx = X.shape[0] 110 | inputDim = X.shape[2] 111 | outputDim = Y.shape[2] 112 | Wxi,Wyi,Wci,Wxf,Wyf,Wcf,Wxc,Wyc,Wxo,Wyo,Wco = \ 113 | sliceWeightsSmall(inputDim, outputDim, W) 114 | dEdW = np.zeros((W.shape[0], W.shape[1])) 115 | dEdX = np.zeros((X.shape[0], X.shape[1], X.shape[2])) 116 | for n in range(0, numEx): 117 | dEdWtmp, dEdX[n] = \ 118 | backPropagateOne(dEdY[n],X[n],Y[n], 119 | C[n],Z[n],Gi[n], 120 | Gf[n],Go[n], 121 | Xend[n],cutOffZeroEnd, 122 | multiErr,outputdEdX, 123 | Wxi,Wyi,Wci,Wxf,Wyf,Wcf,Wxc, 124 | Wyc,Wxo,Wyo,Wco, 125 | (W.shape[0], W.shape[1])) 126 | dEdW += dEdWtmp 127 | return dEdW, dEdX 128 | 129 | def backPropagateOne(dEdY,X,Y,C,Z, 130 | Gi,Gf,Go,Xend, 131 | cutOffZeroEnd,multiErr, 132 | outputdEdX, 133 | Wxi,Wyi,Wci, 134 | Wxf,Wyf,Wcf, 135 | Wxc,Wyc,Wxo,Wyo, 136 | Wco,Wshape): 137 | Xend = int(Xend) 138 | if cutOffZeroEnd and multiErr: 139 | dEdY[Xend - 1] += dEdY[-1] 140 | inputDim = X.shape[1] 141 | outputDim = Y.shape[1] 142 | dEdW = np.zeros(Wshape) 143 | if Xend == 0: return dEdW, np.zeros(X.shape) 144 | dEdWi,dEdWf,dEdWc,dEdWo = sliceWeights(inputDim, outputDim, dEdW) 145 | ddim = (outputDim, Xend) 146 | 147 | # (j, t) 148 | dEdGi = np.zeros(ddim) 149 | dEdGf = np.zeros(ddim) 150 | dEdZ = np.zeros(ddim) 151 | dEdGo = np.zeros(ddim) 152 | 153 | dEdX = np.zeros(X.shape) 154 | 155 | # (k -> t) 156 | one = np.ones((Xend, 1)) 157 | Yt1 = np.concatenate((np.zeros((1, outputDim)), Y[:Xend-1])) 158 | Ct1 = np.concatenate((np.zeros((1, outputDim)), C[:Xend-1])) 159 | states1T = np.concatenate((X[:Xend], Yt1, Ct1, one), axis=-1) 160 | states2T = np.concatenate((X[:Xend], Yt1, one), axis=-1) 161 | states3T = np.concatenate((X[:Xend], Yt1, C[:Xend], one), axis=-1) 162 | U = np.tanh(C[:Xend]) 163 | dU = 1 - U[:Xend] * U[:Xend] 164 | dZ = 1 - Z[:Xend] * Z[:Xend] 165 | dGi = Gi[:Xend] * (1 - Gi[:Xend]) 166 | dGf = Gf[:Xend] * (1 - Gf[:Xend]) 167 | dGo = Go[:Xend] * (1 - Go[:Xend]) 168 | 169 | # (j, t) 170 | dCdGi = (Z[:Xend] * dGi).transpose() 171 | dCdGf = (Ct1 * dGf).transpose() 172 | dCdZ = (Gi[:Xend] * dZ).transpose() 173 | dYdGo = (U[:Xend] * dGo).transpose() 174 | 175 | for t in reversed(range(0, Xend)): 176 | dEdYnow = dEdY[t] if multiErr else 0 177 | dYdC = np.diag(Go[t] * dU[t]) + \ 178 | dYdGo[:,t:t+1] * Wco 179 | if t < Xend - 1: 180 | dEdYt = np.dot(dEdYt, dYdY) + np.dot(dEdCt, dCdY) + dEdYnow 181 | dEdCt = np.dot(dEdCt, dCdC) + np.dot(dEdYt, dYdC) 182 | else: 183 | dEdYt = dEdYnow if multiErr else dEdY 184 | dEdCt = np.dot(dEdYt, dYdC) 185 | dEdGi[:, t] = dEdCt * dCdGi[:,t] 186 | dEdGf[:, t] = dEdCt * dCdGf[:,t] 187 | dEdZ[:, t] = dEdCt * dCdZ[:,t] 188 | dEdGo[:, t] = dEdYt * dYdGo[:,t] 189 | dCdC = dCdGf[:,t:t+1] * Wcf + \ 190 | np.diag(Gf[t]) + \ 191 | dCdGi[:,t:t+1] * Wci 192 | dCdY = dCdGf[:,t:t+1] * Wyf + \ 193 | dCdZ[:,t:t+1] * Wyc + \ 194 | dCdGi[:,t:t+1] * Wyi 195 | dYdY = dYdGo[:,t:t+1] * Wyo 196 | 197 | dEdWi += np.dot(dEdGi, states1T) 198 | dEdWf += np.dot(dEdGf, states1T) 199 | dEdWc += np.dot(dEdZ, states2T) 200 | dEdWo += np.dot(dEdGo, states3T) 201 | 202 | if outputdEdX: 203 | dEdX[0:Xend] = np.dot(dEdGi.transpose(), Wxi) + \ 204 | np.dot(dEdGf.transpose(), Wxf) + \ 205 | np.dot(dEdZ.transpose(), Wxc) + \ 206 | np.dot(dEdGo.transpose(), Wxo) 207 | 208 | return dEdW, dEdX 209 | 210 | class LSTM_Old(Stage): 211 | def __init__(self, 212 | inputDim, 213 | outputDim, 214 | inputNames=None, 215 | initRange=1.0, 216 | initSeed=2, 217 | needInit=True, 218 | initWeights=0, 219 | cutOffZeroEnd=False, 220 | multiErr=False, 221 | learningRate=0.0, 222 | learningRateAnnealConst=0.0, 223 | momentum=0.0, 224 | deltaMomentum=0.0, 225 | weightClip=0.0, 226 | gradientClip=0.0, 227 | weightRegConst=0.0, 228 | outputdEdX=True, 229 | name=None): 230 | Stage.__init__(self, 231 | name=name, 232 | inputNames=inputNames, 233 | outputDim=outputDim, 234 | learningRate=learningRate, 235 | learningRateAnnealConst=learningRateAnnealConst, 236 | momentum=momentum, 237 | deltaMomentum=deltaMomentum, 238 | weightClip=weightClip, 239 | gradientClip=gradientClip, 240 | weightRegConst=weightRegConst, 241 | outputdEdX=outputdEdX) 242 | self.inputDim = inputDim 243 | self.outputDim = outputDim 244 | self.cutOffZeroEnd = cutOffZeroEnd 245 | self.multiErr = multiErr 246 | self.random = np.random.RandomState(initSeed) 247 | 248 | if needInit: 249 | start = -initRange / 2.0 250 | end = initRange / 2.0 251 | Wxi = self.random.uniform(start, end, (self.outputDim, self.inputDim)) 252 | Wxf = self.random.uniform(start, end, (self.outputDim, self.inputDim)) 253 | Wxc = self.random.uniform(start, end, (self.outputDim, self.inputDim)) 254 | Wxo = self.random.uniform(start, end, (self.outputDim, self.inputDim)) 255 | Wyi = self.random.uniform(start, end, (self.outputDim, self.outputDim)) 256 | Wyf = self.random.uniform(start, end, (self.outputDim, self.outputDim)) 257 | Wyc = self.random.uniform(start, end, (self.outputDim, self.outputDim)) 258 | Wyo = self.random.uniform(start, end, (self.outputDim, self.outputDim)) 259 | Wci = self.random.uniform(start, end, (self.outputDim, self.outputDim)) 260 | Wcf = self.random.uniform(start, end, (self.outputDim, self.outputDim)) 261 | Wco = self.random.uniform(start, end, (self.outputDim, self.outputDim)) 262 | Wbi = np.ones((self.outputDim, 1)) 263 | Wbf = np.ones((self.outputDim, 1)) 264 | Wbc = np.zeros((self.outputDim, 1)) 265 | Wbo = np.ones((self.outputDim, 1)) 266 | 267 | Wi = np.concatenate((Wxi, Wyi, Wci, Wbi), axis=1) 268 | Wf = np.concatenate((Wxf, Wyf, Wcf, Wbf), axis=1) 269 | Wc = np.concatenate((Wxc, Wyc, Wbc), axis=1) 270 | Wo = np.concatenate((Wxo, Wyo, Wco, Wbo), axis=1) 271 | self.W = np.concatenate((Wi, Wf, Wc, Wo), axis = 1) 272 | else: 273 | self.W = initWeights 274 | self.X = 0 275 | self.Xend = 0 276 | self.Y = 0 277 | self.C = 0 278 | self.Z = 0 279 | self.Gi = 0 280 | self.Gf = 0 281 | self.Go = 0 282 | pass 283 | 284 | def forward(self, X): 285 | Y, C, Z, Gi, Gf, Go, Xend = \ 286 | forwardPassN( 287 | X, self.cutOffZeroEnd, self.W) 288 | 289 | self.X = X 290 | self.Y = Y 291 | self.C = C 292 | self.Z = Z 293 | self.Gi = Gi 294 | self.Gf = Gf 295 | self.Go = Go 296 | self.Xend = Xend 297 | 298 | return Y if self.multiErr else Y[:,-1] 299 | 300 | def backward(self, dEdY): 301 | self.dEdW, dEdX = backPropagateN(dEdY,self.X,self.Y, 302 | self.C,self.Z,self.Gi, 303 | self.Gf,self.Go, 304 | self.Xend,self.cutOffZeroEnd, 305 | self.multiErr,self.outputdEdX, 306 | self.W) 307 | self.dEdX = dEdX 308 | return dEdX if self.outputdEdX else None 309 | -------------------------------------------------------------------------------- /src/imageqa_ensemble.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import nn 4 | import numpy as np 5 | import imageqa_test as it 6 | import imageqa_visprior as ip 7 | import imageqa_modelavg as ia 8 | 9 | def loadEnsemble( 10 | taskIds, 11 | resultsFolder): 12 | """ 13 | Load class specific models. 14 | """ 15 | models = [] 16 | for taskId in taskIds: 17 | taskFolder = os.path.join(resultsFolder, taskId) 18 | modelSpec = os.path.join(taskFolder, '%s.model.yml' % taskId) 19 | modelWeights = os.path.join(taskFolder, '%s.w.npy' % taskId) 20 | model = nn.load(modelSpec) 21 | model.loadWeights(np.load(modelWeights)) 22 | models.append(model) 23 | return models 24 | 25 | def __runEnsemble( 26 | inputTest, 27 | models, 28 | ansDict, 29 | classAnsIdict, 30 | questionTypeArray): 31 | allOutput = [] 32 | for i, model in enumerate(models): 33 | print 'Running test data on model #%d...' % i 34 | outputTest = nn.test(model, inputTest) 35 | allOutput.append(outputTest) 36 | ensembleOutputTest = np.zeros((inputTest.shape[0], len(ansDict))) 37 | for n in range(allOutput[0].shape[0]): 38 | qtype = questionTypeArray[n] 39 | output = allOutput[qtype] 40 | for i in range(output.shape[1]): 41 | ansId = ansDict[classAnsIdict[qtype][i]] 42 | ensembleOutputTest[n, ansId] = output[n, i] 43 | return ensembleOutputTest 44 | 45 | def getClassDataFolders(dataset, dataFolder): 46 | """ 47 | Get different original data folder name for class specific models. 48 | """ 49 | if dataset == 'daquar': 50 | classDataFolders = [ 51 | dataFolder + '-object', 52 | dataFolder + '-number', 53 | dataFolder + '-color' 54 | ] 55 | elif dataset == 'cocoqa': 56 | classDataFolders = [ 57 | dataFolder + '-object', 58 | dataFolder + '-number', 59 | dataFolder + '-color', 60 | dataFolder + '-location' 61 | ] 62 | return classDataFolders 63 | 64 | def runEnsemble( 65 | inputTest, 66 | models, 67 | dataFolder, 68 | classDataFolders, 69 | questionTypeArray): 70 | """ 71 | Run a class specific model on any dataset. 72 | """ 73 | data = it.loadDataset(dataFolder) 74 | classAnsIdict = [] 75 | for df in classDataFolders: 76 | data_c = it.loadDataset(df) 77 | classAnsIdict.append(data_c['ansIdict']) 78 | 79 | ensembleOutputTest = __runEnsemble( 80 | inputTest, 81 | models, 82 | data['ansDict'], 83 | classAnsIdict, 84 | questionTypeArray) 85 | return ensembleOutputTest 86 | 87 | def testEnsemble( 88 | ensembleId, 89 | models, 90 | dataFolder, 91 | classDataFolders, 92 | resultsFolder): 93 | """ 94 | Test a class specific model in its original dataset. 95 | """ 96 | data = it.loadDataset(dataFolder) 97 | inputTest = data['testData'][0] 98 | targetTest = data['testData'][1] 99 | 100 | ensembleOutputTest = runEnsemble( 101 | inputTest, 102 | models, 103 | dataFolder, 104 | classDataFolders, 105 | data['questionTypeArray']) 106 | ensembleAnswerFile = getAnswerFilename(ensembleId, resultsFolder) 107 | ensembleTruthFile = getTruthFilename(ensembleId, resultsFolder) 108 | 109 | rate, correct, total = nn.calcRate( 110 | model, ensembleOutputTest, data['testData'][1]) 111 | print 'rate: %.4f' % rate 112 | resultsRank, \ 113 | resultsCategory, \ 114 | resultsWups = it.runAllMetrics( 115 | inputTest, 116 | ensembleOutputTest, 117 | targetTest, 118 | data['ansIdict'], 119 | data['questionTypeArray'], 120 | ensembleAnswerFile, 121 | ensembleTruthFile) 122 | it.writeMetricsToFile( 123 | ensembleId, 124 | rate, 125 | resultsRank, 126 | resultsCategory, 127 | resultsWups, 128 | resultsFolder) 129 | 130 | return ensembleOutputTest 131 | 132 | def runEnsemblePrior( 133 | inputTest, 134 | models, 135 | dataFolder, 136 | classDataFolders, 137 | questionTypeArray): 138 | """ 139 | Similar to "testEnsemble" in imageqa_test. 140 | Run visprior on number and color questions. 141 | """ 142 | data = it.loadDataset(dataFolder) 143 | numAns = len(data['ansIdict']) 144 | outputTest = np.zeros((inputTest.shape[0], numAns)) 145 | count = 0 146 | 147 | allOutput = [] 148 | ensembleOutputTest = np.zeros((inputTest.shape[0], numAns)) 149 | classAnsIdict = [] 150 | 151 | for i, model in enumerate(models): 152 | data_m = it.loadDataset(classDataFolders[i]) 153 | classAnsIdict.append(data_m['ansIdict']) 154 | tvData_m = ip.combineTrainValid(data_m['trainData'], data_m['validData']) 155 | print 'Running test data on model #%d...' % i 156 | if i == 0: 157 | # Object questions 158 | print 'No prior' 159 | outputTest = nn.test(model, data_m['testData'][0]) 160 | print 'Accuracy:', 161 | print ip.calcRate(outputTest, data_m['testData'][1]) 162 | elif i == 1 or i == 2 or i == 3: 163 | # Number and color and location questions 164 | print 'Prior' 165 | # Delta is pre-determined 166 | if i == 1: 167 | delta = 1e-6 168 | questionType = "number" 169 | elif i == 2: 170 | delta = 5e-4 171 | questionType = "color" 172 | elif i == 3: 173 | delta = 1.0 174 | questionType = "location" 175 | outputTest = ip.runVisPrior( 176 | tvData_m, 177 | data_m['testData'], 178 | questionType, 179 | model, 180 | data_m['questionDict'], 181 | data_m['questionIdict'], 182 | len(data_m['ansIdict']), 183 | delta) 184 | allOutput.append(outputTest) 185 | counter = [0, 0, 0, 0] 186 | for n in range(inputTest.shape[0]): 187 | qtype = questionTypeArray[n] 188 | output = allOutput[qtype] 189 | for i in range(output.shape[1]): 190 | ansId = data['ansDict'][classAnsIdict[qtype][i]] 191 | ensembleOutputTest[n, ansId] = output[counter[qtype], i] 192 | counter[qtype] += 1 193 | return ensembleOutputTest 194 | 195 | def testEnsemblePrior( 196 | ensembleId, 197 | models, 198 | dataFolder, 199 | classDataFolders, 200 | resultsFolder): 201 | data = it.loadDataset(dataFolder) 202 | inputTest = data['testData'][0] 203 | targetTest = data['testData'][1] 204 | ensembleOutputTest = runEnsemblePrior( 205 | inputTest, 206 | models, 207 | dataFolder, 208 | classDataFolders, 209 | data['questionTypeArray']) 210 | ensembleAnswerFile = it.getAnswerFilename(ensembleId, resultsFolder) 211 | ensembleTruthFile = it.getTruthFilename(ensembleId, resultsFolder) 212 | 213 | rate, correct, total = nn.calcRate( 214 | model, ensembleOutputTest, data['testData'][1]) 215 | print 'rate: %.4f' % rate 216 | resultsRank, \ 217 | resultsCategory, \ 218 | resultsWups = it.runAllMetrics( 219 | inputTest, 220 | ensembleOutputTest, 221 | targetTest, 222 | data['ansIdict'], 223 | data['questionTypeArray'], 224 | ensembleAnswerFile, 225 | ensembleTruthFile) 226 | it.writeMetricsToFile( 227 | ensembleId, 228 | rate, 229 | resultsRank, 230 | resultsCategory, 231 | resultsWups, 232 | resultsFolder) 233 | return ensembleOutputTest 234 | 235 | def runAllModels( 236 | inputTest, 237 | questionTypeArray, 238 | modelSpecs, 239 | resultsFolder, 240 | dataset, 241 | dataFolder): 242 | allOutputs = [] 243 | for modelSpec in modelSpecs: 244 | if modelSpec['isClassEnsemble']: 245 | print 'Running test data on ensemble model %s...' \ 246 | % modelSpec['name'] 247 | models = loadEnsemble(modelSpec['id'].split(','), resultsFolder) 248 | classDataFolders = getClassDataFolders(dataset, dataFolder) 249 | if modelSpec['runPrior']: 250 | outputTest = runEnsemblePrior( 251 | inputTest, 252 | models, 253 | dataFolder, 254 | classDataFolders, 255 | questionTypeArray) 256 | else: 257 | outputTest = runEnsemble( 258 | inputTest, 259 | models, 260 | dataFolder, 261 | classDataFolders, 262 | questionTypeArray) 263 | elif modelSpec['isAverageEnsemble']: 264 | modelOutputs = [] 265 | for modelId in modelSpec['id'].split(','): 266 | model = it.loadModel(modelId, resultsFolder) 267 | modelOutputs.append(nn.test(model, inputTest)) 268 | outputTest = np.zeros(modelOutputs[0].shape) 269 | for output in modelOutputs: 270 | shape0 = min(outputTest.shape[0], output.shape[0]) 271 | shape1 = min(outputTest.shape[1], output.shape[1]) 272 | outputTest[:shape0, :shape1] += output[:shape0, :shape1] / \ 273 | float(len(modelOutputs)) 274 | else: 275 | print 'Running test data on model %s...' \ 276 | % modelSpec['name'] 277 | model = it.loadModel(modelSpec['id'], resultsFolder) 278 | outputTest = nn.test(model, inputTest) 279 | allOutputs.append(outputTest) 280 | return allOutputs 281 | 282 | if __name__ == '__main__': 283 | """ 284 | Test a type-specific ensemble model 285 | Usage: 286 | python imageqa_ensemble.py -e[nsemble] {ensembleId} 287 | -m[odel] {modelId1} 288 | -m[odel] {modelId2},... 289 | -d[ata] {dataFolder} 290 | -dataset {daquar/cocoqa} 291 | [-r[esults] {resultsFolder}] 292 | [-prior] 293 | Results folder by default is '../results' 294 | """ 295 | resultsFolder = '../results' 296 | taskIds = [] 297 | dataset = 'cocoqa' 298 | runPrior = False 299 | for i, flag in enumerate(sys.argv): 300 | if flag == '-m' or flag == '-model': 301 | taskIds.append(sys.argv[i + 1]) 302 | elif flag == '-e' or flag == '-ensemble': 303 | ensembleId = sys.argv[i + 1] 304 | elif flag == '-d' or flag == '-data': 305 | dataFolder = sys.argv[i + 1] 306 | elif flag == '-r' or flag == '-results': 307 | resultsFolder = sys.argv[i + 1] 308 | elif flag == '-dataset': 309 | dataset = sys.argv[i + 1] 310 | elif flag == '-prior': 311 | runPrior = True 312 | models = loadEnsemble(taskIds, resultsFolder) 313 | classDataFolders = getClassDataFolders(dataset, dataFolder) 314 | if runPrior: 315 | testEnsemblePrior( 316 | ensembleId=ensembleId, 317 | models=models, 318 | dataFolder=dataFolder, 319 | classDataFolders=classDataFolders, 320 | resultsFolder=resultsFolder) 321 | else: 322 | testEnsemble( 323 | ensembleId=ensembleId, 324 | models=models, 325 | dataFolder=dataFolder, 326 | classDataFolders=classDataFolders, 327 | resultsFolder=resultsFolder) 328 | -------------------------------------------------------------------------------- /src/train.py: -------------------------------------------------------------------------------- 1 | import nn 2 | import numpy as np 3 | import os 4 | import sys 5 | import yaml 6 | import imageqa_test 7 | 8 | """ 9 | Train a neural network 10 | Usage: python train.py 11 | -n[ame] {name} 12 | -d[ata] {train/valid/test folder} 13 | -m[odel] {model spec} 14 | -s[aved] {saved model id} 15 | -e[arly] {early stop score} 16 | -c[onfig] {config filename} 17 | -w[eights] {input weights} 18 | -o[utput] {output folder} 19 | -b[oard] {board id} 20 | [-imageqa] 21 | Prameters: 22 | -n[ame] Name of the model 23 | -d[ata] Data folder that contains 'train.npy', 'valid.npy', and 'test.npy' 24 | -m[odel] Model specification file name 25 | -s[aved] Saved model ID 26 | -e[arly] Early stop score 27 | -c[onfig] Train config file name 28 | -w[eights] Weighted input (for boosting), filename before '-train' or '-test' 29 | -o[utput] Training results output folder 30 | -b[oard] GPU board ID 31 | [-imageqa] Run Image-QA test scripts 32 | """ 33 | 34 | def readFlags(): 35 | params = {} 36 | params['name'] = None 37 | params['outputFolder'] = None 38 | params['configFilename'] = None 39 | params['trainDataFilename'] = None 40 | params['testDataFilename'] = None 41 | params['validDataFilename'] = None 42 | params['allDataFilename'] = None 43 | params['modelFilename'] = None 44 | params['savedModelId'] = None 45 | params['earlyStopScore'] = None 46 | params['imageqa'] = True 47 | params['trainInputWeightsFilename'] = None 48 | params['validInputWeightsFilename'] = None 49 | for i, flag in enumerate(sys.argv): 50 | if flag == '-n' or flag == '-name': 51 | params['name'] = sys.argv[i + 1] 52 | elif flag == '-o' or flag == '-output': 53 | params['outputFolder'] = sys.argv[i + 1] 54 | elif flag == '-d' or flag == '-data': 55 | dataFolder = sys.argv[i + 1] 56 | trainPath = os.path.join(dataFolder, 'train.npy') 57 | params['trainDataFilename'] = trainPath if os.path.isfile(trainPath) else None 58 | validPath = os.path.join(dataFolder, 'valid.npy') 59 | params['validDataFilename'] = validPath if os.path.isfile(validPath) else None 60 | testPath = os.path.join(dataFolder, 'test.npy') 61 | params['testDataFilename'] = testPath if os.path.isfile(testPath) else None 62 | params['dataFolder'] = dataFolder 63 | elif flag == '-w' or flag == '-weights': 64 | weightsPath = sys.argv[i + 1] 65 | params['trainInputWeightsFilename'] = os.path.join(dataFolder, weightsPath + '-train.npy') 66 | params['validInputWeightsFilename'] = os.path.join(dataFolder, weightsPath + '-valid.npy') 67 | params['testInputWeightsFilename'] = os.path.join(dataFolder, weightsPath + '-test.npy') 68 | elif flag == '-m' or flag == '-model': 69 | params['modelFilename'] = sys.argv[i + 1] 70 | elif flag == '-s' or flag == '-saved': 71 | params['savedModelId'] = sys.argv[i + 1] 72 | elif flag == '-e' or flag == '-early': 73 | params['earlyStopScore'] = float(sys.argv[i + 1]) 74 | elif flag == '-c' or flag == '-config': 75 | params['configFilename'] = sys.argv[i + 1] 76 | elif flag == '-b' or flag == '-board': 77 | os.environ['GNUMPY_BOARD_ID'] = sys.argv[i + 1] 78 | elif flag == '-imageqa': 79 | params['imageqa'] = True 80 | elif flag == '-noimageqa': 81 | params['imageqa'] = False 82 | 83 | # Check required parameters. 84 | if params['configFilename'] is None: 85 | raise Exception('Config file not specified') 86 | if params['trainDataFilename'] is None: 87 | raise Exception('Data file not specified') 88 | if params['modelFilename'] is None: 89 | raise Exception('Model file not specified') 90 | if params['name'] is None: 91 | params['name'] = params['modelFilename'].split('/')[-1].split('.')[0] 92 | 93 | return params 94 | 95 | def runTests(params, model, trainer): 96 | if params['testDataFilename'] is not None: 97 | if params['imageqa']: 98 | imageqa_test.testAll( 99 | trainer.name, model, params['dataFolder'], params['outputFolder']) 100 | else: 101 | testData = np.load(params['testDataFilename']) 102 | testInput = testData[0] 103 | testTarget = testData[1] 104 | model.loadWeights(np.load(trainer.modelFilename)) 105 | testOutput = nn.test(model, testInput) 106 | testRate, c, t = nn.calcRate(model, testOutput, testTarget) 107 | print 'Test rate: ', testRate 108 | with open(os.path.join( 109 | trainer.outputFolder, 'result.txt'), 'w+') as f: 110 | f.write('Test rate: %f\n' % testRate) 111 | 112 | def combineInputs( 113 | trainInput, 114 | trainTarget, 115 | trainInputWeights, 116 | validInput, 117 | validTarget, 118 | validInputWeights): 119 | allInput = np.concatenate((trainInput, validInput), axis=0) 120 | allTarget = np.concatenate((trainTarget, validTarget), axis=0) 121 | if trainInputWeights is not None: 122 | allInputWeights = np.concatenate( 123 | (trainInputWeights, validInputWeights), axis=0) 124 | else: 125 | allInputWeights = None 126 | return allInput, allTarget, allInputWeights 127 | 128 | def trainValid( 129 | params, 130 | trainOpt, 131 | trainInput, 132 | trainTarget, 133 | trainInputWeights, 134 | validInput, 135 | validTarget, 136 | validInputWeights, 137 | initWeights=None): 138 | model = nn.load(params['modelFilename']) 139 | if initWeights is not None: 140 | model.loadWeights(initWeights) 141 | trainer = nn.Trainer( 142 | name=params['name']+\ 143 | ('-v' if params['validDataFilename'] is not None else ''), 144 | model=model, 145 | trainOpt=trainOpt, 146 | outputFolder=params['outputFolder'] 147 | ) 148 | 149 | # Validation training 150 | trainer.train( 151 | trainInput=trainInput, 152 | trainTarget=trainTarget, 153 | trainInputWeights=trainInputWeights, 154 | validInput=validInput, 155 | validTarget=validTarget, 156 | validInputWeights=validInputWeights) 157 | return model, trainer 158 | 159 | def trainAll( 160 | params, 161 | trainOpt, 162 | trainInput, 163 | trainTarget, 164 | trainInputWeights, 165 | validInput, 166 | validTarget, 167 | validInputWeights, 168 | initWeights=None): 169 | model = nn.load(params['modelFilename']) 170 | if initWeights is not None: 171 | model.loadWeights(initWeights) 172 | trainer = nn.Trainer( 173 | name=params['name'], 174 | model=model, 175 | trainOpt=trainOpt, 176 | outputFolder=params['outputFolder'] 177 | ) 178 | # Combine train & valid set 179 | allInput, allTarget, allInputWeights = combineInputs( 180 | trainInput, 181 | trainTarget, 182 | trainInputWeights, 183 | validInput, 184 | validTarget, 185 | validInputWeights) 186 | 187 | trainer.train( 188 | trainInput=allInput, 189 | trainTarget=allTarget, 190 | trainInputWeights=allInputWeights) 191 | return model, trainer 192 | 193 | if __name__ == '__main__': 194 | # Read params 195 | params = readFlags() 196 | 197 | # Load train options 198 | with open(params['configFilename']) as f: 199 | trainOpt = yaml.load(f) 200 | 201 | # Load dataset 202 | trainData = np.load(params['trainDataFilename']) 203 | trainInput = trainData[0] 204 | trainTarget = trainData[1] 205 | 206 | if params['validDataFilename'] is not None: 207 | validData = np.load(params['validDataFilename']) 208 | validInput = validData[0] 209 | validTarget = validData[1] 210 | else: 211 | validInput = None 212 | validTarget = None 213 | 214 | if params['trainInputWeightsFilename'] is not None: 215 | trainInputWeights = np.load(params['trainInputWeightsFilename']) 216 | validInputWeights = np.load(params['validInputWeightsFilename']) 217 | else: 218 | trainInputWeights = None 219 | validInputWeights = None 220 | 221 | if params['savedModelId'] is not None: 222 | modelFolder = os.path.join(params['outputFolder'], params['savedModelId']) 223 | initWeights = np.load(os.path.join(modelFolder, params['savedModelId'] + '.w.npy')) 224 | if '-v-' in params['savedModelId']: 225 | # Train model 226 | model, trainer = trainValid( 227 | params, 228 | trainOpt, 229 | trainInput, 230 | trainTarget, 231 | trainInputWeights, 232 | validInput, 233 | validTarget, 234 | validInputWeights, 235 | initWeights=initWeights) 236 | 237 | # Reload model 238 | model = nn.load(params['modelFilename']) 239 | model.loadWeights(np.load(trainer.modelFilename)) 240 | 241 | # Run tests 242 | runTests(params, model, trainer) 243 | 244 | # Re-train 245 | if params['testDataFilename'] is not None and \ 246 | params['validDataFilename'] is not None: 247 | 248 | # Setup options 249 | trainOpt['needValid'] = False 250 | print 'Stopped score:', trainer.stoppedTrainScore 251 | trainOpt['stopScore'] = trainer.stoppedTrainScore 252 | 253 | # Train train+valid 254 | model, trainer = trainAll( 255 | params, 256 | trainOpt, 257 | trainInput, 258 | trainTarget, 259 | trainInputWeights, 260 | validInput, 261 | validTarget, 262 | validInputWeights) 263 | 264 | # Reload model 265 | model = nn.load(params['modelFilename']) 266 | model.loadWeights(np.load(trainer.modelFilename)) 267 | 268 | # Run tests 269 | runTests(params, model, trainer) 270 | else: 271 | # Set up options 272 | trainOpt['needValid'] = False 273 | if params['earlyStopScore'] is not None: 274 | trainOpt['stopScore'] = params['earlyStopScore'] 275 | else: 276 | raise Exception('Need to provide early stop score.') 277 | 278 | # Train train+valid 279 | model, trainer = trainAll( 280 | params, 281 | trainOpt, 282 | trainInput, 283 | trainTarget, 284 | trainInputWeights, 285 | validInput, 286 | validTarget, 287 | validInputWeights, 288 | initWeights=initWeights) 289 | 290 | # Reload model 291 | model = nn.load(params['modelFilename']) 292 | model.loadWeights(np.load(trainer.modelFilename)) 293 | 294 | # Run tests 295 | runTests(params, model, trainer) 296 | else: 297 | if params['earlyStopScore'] is None: 298 | # Train model 299 | model, trainer = trainValid( 300 | params, 301 | trainOpt, 302 | trainInput, 303 | trainTarget, 304 | trainInputWeights, 305 | validInput, 306 | validTarget, 307 | validInputWeights) 308 | 309 | # Reload model 310 | model = nn.load(params['modelFilename']) 311 | model.loadWeights(np.load(trainer.modelFilename)) 312 | 313 | # Run tests 314 | runTests(params, model, trainer) 315 | 316 | # Re-train 317 | if params['testDataFilename'] is not None and \ 318 | params['validDataFilename'] is not None: 319 | 320 | # Setup options 321 | trainOpt['needValid'] = False 322 | print 'Stopped score:', trainer.stoppedTrainScore 323 | trainOpt['stopScore'] = trainer.stoppedTrainScore 324 | 325 | # Train train+valid 326 | model, trainer = trainAll( 327 | params, 328 | trainOpt, 329 | trainInput, 330 | trainTarget, 331 | trainInputWeights, 332 | validInput, 333 | validTarget, 334 | validInputWeights) 335 | 336 | # Reload model 337 | model = nn.load(params['modelFilename']) 338 | model.loadWeights(np.load(trainer.modelFilename)) 339 | 340 | # Run tests 341 | runTests(params, model, trainer) 342 | else: 343 | # Set up options 344 | trainOpt['needValid'] = False 345 | trainOpt['stopScore'] = params['earlyStopScore'] 346 | 347 | # Train train+valid 348 | model, trainer = trainAll( 349 | params, 350 | trainOpt, 351 | trainInput, 352 | trainTarget, 353 | trainInputWeights, 354 | validInput, 355 | validTarget, 356 | validInputWeights) 357 | 358 | # Reload model 359 | model = nn.load(params['modelFilename']) 360 | model.loadWeights(np.load(trainer.modelFilename)) 361 | 362 | # Run tests 363 | runTests(params, model, trainer) 364 | 365 | -------------------------------------------------------------------------------- /src/nn/trainer.py: -------------------------------------------------------------------------------- 1 | import time 2 | import sys 3 | import os 4 | import shutil 5 | import matplotlib 6 | import valid_tool as vt 7 | import tester 8 | from func import * 9 | matplotlib.use('Agg') 10 | import matplotlib.pyplot as plt 11 | plt.ion() 12 | 13 | class ProgressWriter: 14 | def __init__(self, total, width=80): 15 | self.total = total 16 | self.counter = 0 17 | self.progress = 0 18 | self.width = width 19 | 20 | def increment(self, amount=1): 21 | self.counter += amount 22 | while self.counter / float(self.total) > \ 23 | self.progress / float(self.width): 24 | sys.stdout.write('.') 25 | sys.stdout.flush() 26 | self.progress += 1 27 | if self.counter == self.total and \ 28 | self.progress < self.width: 29 | print 30 | 31 | class Logger: 32 | def __init__(self, trainer, csv=True, filename=None): 33 | self.trainer = trainer 34 | self.startTime = time.time() 35 | self.saveCsv = csv 36 | if filename is None: 37 | self.outFilename = os.path.join( 38 | trainer.outputFolder, trainer.name + '.csv') 39 | else: 40 | self.outFilename = filename 41 | 42 | def logMsg(self, msg): 43 | print msg 44 | 45 | def logTrainStats(self): 46 | timeElapsed = time.time() - self.startTime 47 | stats = 'N: %3d T: %5d TE: %8.4f TR: %8.4f VE: %8.4f VR: %8.4f' % \ 48 | (self.trainer.epoch, 49 | timeElapsed, 50 | self.trainer.loss[self.trainer.totalStep], 51 | self.trainer.rate[self.trainer.totalStep], 52 | self.trainer.validLoss[self.trainer.totalStep], 53 | self.trainer.validRate[self.trainer.totalStep]) 54 | print stats 55 | 56 | if self.saveCsv: 57 | statsCsv = '%d,%.4f,%.4f,%.4f,%.4f' % \ 58 | (self.trainer.epoch, 59 | self.trainer.loss[self.trainer.totalStep], 60 | self.trainer.rate[self.trainer.totalStep], 61 | self.trainer.validLoss[self.trainer.totalStep], 62 | self.trainer.validRate[self.trainer.totalStep]) 63 | with open(self.outFilename, 'a+') as f: 64 | f.write('%s\n' % statsCsv) 65 | pass 66 | 67 | class Plotter: 68 | def __init__(self, trainer): 69 | self.trainer = trainer 70 | self.startTime = time.time() 71 | self.trainer.epoch = 0 72 | self.lossFigFilename = \ 73 | os.path.join(trainer.outputFolder, trainer.name + '_loss.png') 74 | self.errFigFilename = \ 75 | os.path.join(trainer.outputFolder, trainer.name + '_err.png') 76 | self.trainer.epoch = 0 77 | 78 | def plot(self): 79 | plt.figure(1) 80 | plt.clf() 81 | plt.plot(np.arange(self.trainer.totalStep + 1), 82 | self.trainer.loss[0 : self.trainer.totalStep + 1], 83 | 'b-x') 84 | if self.trainer.trainOpt['needValid']: 85 | plt.plot(np.arange(self.trainer.totalStep + 1), 86 | self.trainer.validLoss[0 : self.trainer.totalStep + 1], 87 | 'g-o') 88 | plt.legend(['Train', 'Valid']) 89 | plt.title('Train/Valid Loss Curve') 90 | else: 91 | plt.legend(['Train']) 92 | plt.title('Train Loss Curve') 93 | 94 | plt.xlabel('Epoch') 95 | plt.ylabel('Loss') 96 | plt.draw() 97 | plt.savefig(self.lossFigFilename) 98 | 99 | if self.trainer.trainOpt['calcError']: 100 | plt.figure(2) 101 | plt.clf() 102 | plt.plot(np.arange(self.trainer.totalStep + 1), 103 | 1 - self.trainer.rate[0 : self.trainer.totalStep + 1], 104 | 'b-x') 105 | if self.trainer.trainOpt['needValid']: 106 | plt.plot(np.arange(self.trainer.totalStep + 1), 107 | 1 - self.trainer.validRate[\ 108 | 0 : self.trainer.totalStep + 1], 109 | 'g-o') 110 | plt.legend(['Train', 'Valid']) 111 | plt.title('Train/Valid Error Curve') 112 | else: 113 | plt.legend(['Train']) 114 | plt.title('Train Error Curve') 115 | 116 | plt.xlabel('Epoch') 117 | plt.ylabel('Prediction Error') 118 | plt.draw() 119 | plt.savefig(self.errFigFilename) 120 | 121 | class Trainer: 122 | def __init__(self, 123 | name, 124 | model, 125 | trainOpt, 126 | outputFolder='', 127 | seed=1000): 128 | self.model = model 129 | self.name = name + time.strftime("-%Y%m%d-%H%M%S") 130 | self.resultsFolder = outputFolder 131 | self.outputFolder = os.path.join(outputFolder, self.name) 132 | self.modelFilename = \ 133 | os.path.join(self.outputFolder, self.name + '.w.npy') 134 | self.trainOpt = trainOpt 135 | self.startTime = time.time() 136 | self.random = np.random.RandomState(seed) 137 | numEpoch = trainOpt['numEpoch'] 138 | self.loss = 0 139 | self.validLoss = 0 140 | self.rate = 0 141 | self.validRate = 0 142 | self.epoch = 0 143 | 144 | def initFolder(self): 145 | if not os.path.exists(self.outputFolder): 146 | os.makedirs(self.outputFolder) 147 | if self.model.specFilename is not None: 148 | shutil.copyfile( 149 | self.model.specFilename, 150 | os.path.join(self.outputFolder, self.name + '.model.yml')) 151 | 152 | def initData(self, X, T, split=True): 153 | VX = None 154 | VT = None 155 | X, T = vt.shuffleData(X, T, self.random) 156 | if split: 157 | X, T, VX, VT = \ 158 | vt.splitData(X, T, 159 | self.trainOpt['heldOutRatio'], 160 | self.trainOpt['xvalidNo']) 161 | return X, T, VX, VT 162 | 163 | def train( 164 | self, 165 | trainInput, 166 | trainTarget, 167 | trainInputWeights=None, 168 | validInput=None, 169 | validTarget=None, 170 | validInputWeights=None): 171 | self.initFolder() 172 | trainOpt = self.trainOpt 173 | if validInput is None and validTarget is None: 174 | X, T, VX, VT = self.initData(\ 175 | trainInput, trainTarget, \ 176 | split=self.trainOpt['needValid']) 177 | else: 178 | X = trainInput 179 | T = trainTarget 180 | VX = validInput 181 | VT = validTarget 182 | N = X.shape[0] 183 | print 'Epoch size:', N 184 | numEpoch = trainOpt['numEpoch'] 185 | calcError = trainOpt['calcError'] 186 | numExPerBat = trainOpt['batchSize'] 187 | print 'Batch size:', numExPerBat 188 | numBatPerStep = trainOpt['stepSize'] \ 189 | if trainOpt.has_key('stepSize') \ 190 | else int(np.ceil(N / float(numExPerBat))) 191 | print 'Step size:', numBatPerStep 192 | numExPerStep = numExPerBat * numBatPerStep \ 193 | if trainOpt.has_key('stepSize') \ 194 | else N 195 | print 'Examples per step:', numExPerStep 196 | numStepPerEpoch = int(np.ceil( 197 | N / float(numExPerStep))) \ 198 | if trainOpt.has_key('stepSize') \ 199 | else 1 200 | print 'Steps per epoch:', numStepPerEpoch 201 | progressWriter = ProgressWriter(numExPerStep, width=80) 202 | logger = Logger(self, csv=trainOpt['writeRecord']) 203 | logger.logMsg('Trainer ' + self.name) 204 | plotter = Plotter(self) 205 | bestVscore = None 206 | bestTscore = None 207 | bestStep = 0 208 | totalBat = 0 209 | step = 0 210 | totalStep = 0 211 | nAfterBest = 0 212 | stop = False 213 | self.loss = np.zeros((numStepPerEpoch * numEpoch)) 214 | self.validLoss = np.zeros((numStepPerEpoch * numEpoch)) 215 | self.rate = np.zeros((numStepPerEpoch * numEpoch)) 216 | self.validRate = np.zeros((numStepPerEpoch * numEpoch)) 217 | 218 | # Train loop through epochs 219 | for epoch in range(0, numEpoch): 220 | self.epoch = epoch 221 | epochE = 0 222 | epochCorrect = 0 223 | epochTotal = 0 224 | 225 | # Shuffle data 226 | if trainOpt['shuffle']: 227 | X, T = vt.shuffleData(X, T, self.random) 228 | 229 | # Every step, validate 230 | for step in range(0, numStepPerEpoch): 231 | stepStart = step * numExPerStep 232 | stepEnd = min((step + 1) * numExPerStep, N) 233 | numExThisStep = stepEnd - stepStart 234 | E = 0 235 | correct = 0 236 | total = 0 237 | self.totalStep = totalStep 238 | 239 | # Every batch forward-backward 240 | for batch in range(0, numBatPerStep): 241 | batchStart = stepStart + batch * numExPerBat 242 | if batchStart > N: 243 | break 244 | batchEnd = min( 245 | stepStart + (batch + 1) * numExPerBat, stepEnd) 246 | numExThisBat = batchEnd - batchStart 247 | self.totalBatch = totalBat 248 | 249 | if trainOpt['progress']: 250 | progressWriter.increment(amount=numExThisBat) 251 | 252 | # Forward 253 | Y_bat = self.model.forward( 254 | X[batchStart:batchEnd], dropout=True) 255 | T_bat = T[batchStart:batchEnd] 256 | 257 | # Loss 258 | Etmp, dEdY = self.model.getCost( 259 | Y_bat, T_bat, weights=trainInputWeights) 260 | E += Etmp * numExThisBat / float(numExThisStep) 261 | epochE += Etmp * numExThisBat / float(N) 262 | 263 | # Backward 264 | self.model.backward(dEdY) 265 | 266 | # Update 267 | self.model.updateWeights() 268 | 269 | # Prediction error 270 | if calcError: 271 | rate_, correct_, total_ = \ 272 | tester.calcRate(self.model, Y_bat, T_bat) 273 | correct += correct_ 274 | total += total_ 275 | epochCorrect += correct_ 276 | epochTotal += total_ 277 | 278 | totalBat += 1 279 | 280 | # Store train statistics 281 | if calcError: 282 | rate = correct / float(total) 283 | self.rate[totalStep] = rate 284 | self.loss[totalStep] = E 285 | 286 | # Early stop 287 | if not trainOpt.has_key('criterion'): 288 | Tscore = E 289 | else: 290 | if trainOpt['criterion'] == 'loss': 291 | Tscore = E 292 | elif trainOpt['criterion'] == 'rate': 293 | Tscore = 1 - rate 294 | else: 295 | raise Exception('Unknown stopping criterion "%s"' % \ 296 | trainOpt['criterion']) 297 | 298 | # Run validation 299 | if trainOpt['needValid']: 300 | VY = tester.test(self.model, VX) 301 | VE, dVE = self.model.getCost( 302 | VY, VT, weights=validInputWeights) 303 | self.validLoss[totalStep] = VE 304 | if calcError: 305 | Vrate, correct, total = tester.calcRate( 306 | self.model, VY, VT) 307 | self.validRate[totalStep] = Vrate 308 | 309 | # Check stopping criterion 310 | if not trainOpt.has_key('criterion'): 311 | Vscore = VE 312 | else: 313 | if trainOpt['criterion'] == 'loss': 314 | Vscore = VE 315 | elif trainOpt['criterion'] == 'rate': 316 | Vscore = 1 - Vrate 317 | else: 318 | raise Exception( 319 | 'Unknown stopping criterion "%s"' % \ 320 | trainOpt['criterion']) 321 | if (bestVscore is None) or (Vscore < bestVscore): 322 | bestVscore = Vscore 323 | bestTscore = Tscore 324 | nAfterBest = 0 325 | bestStep = totalStep 326 | 327 | # Save trainer if VE is best 328 | if trainOpt['saveModel']: 329 | self.save() 330 | else: 331 | nAfterBest += 1 332 | # Stop training if above patience level 333 | if nAfterBest > trainOpt['patience']: 334 | print 'Patience level reached, early stop.' 335 | print 'Will stop at score ', bestTscore 336 | stop = True 337 | else: 338 | if trainOpt['saveModel']: 339 | self.save() 340 | if trainOpt.has_key('stopScore') and \ 341 | Tscore < trainOpt['stopScore']: 342 | print \ 343 | 'Training score is lower than %.4f , ealy stop.' % \ 344 | trainOpt['stopScore'] 345 | stop = True 346 | 347 | logger.logTrainStats() 348 | if trainOpt['needValid']: 349 | print 'P: %d' % nAfterBest, 350 | print self.name 351 | 352 | if stop: 353 | break 354 | 355 | # Store train statistics 356 | if calcError: 357 | epochRate = epochCorrect / float(epochTotal) 358 | print 'Epoch Final: %d TE: %.4f TR:%.4f' % \ 359 | (epoch, epochE, epochRate) 360 | 361 | # Anneal learning rate 362 | self.model.updateLearningParams(epoch) 363 | 364 | # Plot train curves 365 | if trainOpt['plotFigs']: 366 | plotter.plot() 367 | 368 | # Terminate 369 | if stop: 370 | break 371 | 372 | # Report best train score 373 | self.stoppedTrainScore = bestTscore 374 | 375 | def save(self, filename=None): 376 | if filename is None: 377 | filename = self.modelFilename 378 | try: 379 | np.save(filename, self.model.getWeights()) 380 | except Exception: 381 | print 'Exception occurred. Cannot save weights' 382 | 383 | --------------------------------------------------------------------------------