├── README.md ├── buildCython.sh ├── buildCythonWindows.cmd ├── dataUtils ├── __init__.py └── data.py ├── nn ├── __init__.py ├── autorec │ ├── __init__.py │ ├── ae.py │ ├── ae_utils.py │ ├── config │ │ └── 1M │ │ │ ├── 1M.yaml │ │ │ └── 1MU.yaml │ ├── cython_matmul.c │ ├── cython_matmul.pyx │ ├── generateNNFeatures.py │ ├── learner.py │ ├── lossDeriv.py │ ├── modelLoader.py │ ├── optimizers.py │ └── setup_matmul.py ├── blocks │ ├── __init__.py │ ├── activations.py │ ├── cython_activations.c │ ├── cython_activations.pyx │ ├── networkConfigParser.py │ ├── nn.py │ └── setup_activations.py └── cfrbm │ ├── cfRBM.py │ ├── config │ └── 1M │ │ ├── 1M.yaml │ │ └── 1MU.yaml │ ├── cython_rbm_matmul.c │ ├── cython_rbm_matmul.pyx │ ├── learner.py │ └── setup_rbm_matmul.py └── utils ├── __init__.py ├── datetimeUtils.py ├── metrics ├── __init__.py └── evaluate.py └── statUtil.py /README.md: -------------------------------------------------------------------------------- 1 | Source code for, AutoRec, an autoencoder based model for collaborative filtering. This package also includes implementation of 2 | RBM based collaborative filtering model(RBM-CF). 3 | 4 | 5 | Dependencies 6 | ============ 7 | * cython 8 | * progressbar 9 | * envoy 10 | * climin 11 | 12 | 13 | Configuration 14 | ============= 15 | Models are defined in yaml configuration file. Configuration file consists of three sections 16 | * **data**: 17 | In this section, we define data sources and model save path 18 | - **train** : path of the training file 19 | - **test** : path of the test file 20 | - **save** : path for saving the model 21 | * **param**: 22 | In this section, we define network training parameters 23 | - **lamda**: list of regularization paramter per each layer 24 | - **max_iter**: maximum number of iteration 25 | - **batch_size**: size of the batch 26 | - **optimizer**: Choice of the optimizer (lbfgs, rprop, rmsprop) 27 | - **reg_bias**: whether to regularize bias or not 28 | - **beta**: sparsity control parameter 29 | - **num_threads**: maximum number of threads to be used while doing some of the matrix operations (set it to number of CPU cores) 30 | * **layer**: 31 | In this section, we define the network architecture. Layers are defined by layer index(starting from 1). 32 | Note that, layer index should be defined in ascending order (For eg: 1, 2, 3). 33 | Each layer is defined as 34 | - Layer index: 35 | + **activation**: Type of activation function (identity, sigmoid, relu, nrelu, tanh) 36 | + **num_nodes**: number of nodes in the given layer 37 | + **type**: layer type (input, hidden, output) 38 | + **partial**: whether the data in the given layer is partially observed or not (applicable only to input/output nodes) 39 | + **binary** : whether to enforce binary coding in the layer or not 40 | 41 | Installation/Running 42 | ==================== 43 | 44 | First, you will need to build the cython modules. Build cython modules by running 45 | * bash buildCython.sh 46 | 47 | Running Autorec model 48 | * cd nn/autorec 49 | * PYTHONPATH=\ python learner.py -c \ 50 | 51 | Running RBMCF model 52 | * cd nn/cfrbm 53 | * PYTHONPATH=\ python learner.py -c \ 54 | 55 | Data 56 | ==================== 57 | This program expects input in tab separated format. 58 | 59 | For U-AutoRec: 60 | * \\\t\\\t\ 61 | 62 | For I-AutoRec 63 | * \\\t\\\t\ 64 | 65 | Contact 66 | ======= 67 | If you have any queries, please contact me at mesuvash@gmail.com. 68 | 69 | -------------------------------------------------------------------------------- /buildCython.sh: -------------------------------------------------------------------------------- 1 | cd nn/blocks 2 | python setup_activations.py build_ext --inplace 3 | cd ../autorec/ 4 | python setup_matmul.py build_ext --inplace 5 | cd ../cfrbm/ 6 | python setup_rbm_matmul.py build_ext --inplace -------------------------------------------------------------------------------- /buildCythonWindows.cmd: -------------------------------------------------------------------------------- 1 | 2 | REM A compiler (For example : visual c compiler) is required 3 | 4 | pip install Cython 5 | cd nn\blocks 6 | python setup_activations.py build_ext --inplace 7 | cd ..\autorec\ 8 | python setup_matmul.py build_ext --inplace 9 | cd ..\cfrbm\ 10 | python setup_rbm_matmul.py build_ext --inplace -------------------------------------------------------------------------------- /dataUtils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mesuvash/NNRec/31839522c9691d43d01987b5f67a7ed5ea5fcd81/dataUtils/__init__.py -------------------------------------------------------------------------------- /dataUtils/data.py: -------------------------------------------------------------------------------- 1 | import envoy 2 | import progressbar 3 | import scipy.sparse 4 | 5 | 6 | class Data(object): 7 | 8 | def __init__(self): 9 | self.users = {} 10 | self.items = {} 11 | self.nusers = 0 12 | self.nitems = 0 13 | self.include_time = False 14 | 15 | def update_user_item(self, user, item): 16 | if user not in self.users: 17 | self.users[user] = self.nusers 18 | self.nusers += 1 19 | if item not in self.items: 20 | self.items[item] = self.nitems 21 | self.nitems += 1 22 | 23 | def import_ratings(self, filename, shape=None): 24 | r = envoy.run('wc -l {}'.format(filename)) 25 | num_lines = int(r.std_out.strip().partition(' ')[0]) 26 | bar = progressbar.ProgressBar(maxval=num_lines, widgets=["Loading ratings: ", 27 | progressbar.Bar( 28 | '=', '[', ']'), 29 | ' ', progressbar.Percentage(), 30 | 31 | ' ', progressbar.ETA()]).start() 32 | I, J, V = [], [], [] 33 | with open(filename) as f: 34 | for i, line in enumerate(f): 35 | if (i % 1000) == 0: 36 | bar.update(i % bar.maxval) 37 | userid, itemid, rating = line.split() 38 | self.update_user_item(userid, itemid) 39 | uid = self.users[userid] 40 | iid = self.items[itemid] 41 | I.append(uid) 42 | J.append(iid) 43 | V.append(float(rating)) 44 | bar.finish() 45 | if shape is not None: 46 | _shape = (self.nusers if shape[0] is None else shape[0], 47 | self.nitems if shape[1] is None else shape[1]) 48 | R = scipy.sparse.coo_matrix( 49 | (V, (I, J)), shape=_shape) 50 | else: 51 | R = scipy.sparse.coo_matrix( 52 | (V, (I, J)), shape=(self.nusers, self.nitems)) 53 | self.R = R.tocsr() 54 | 55 | 56 | def loadTestData(d, testpath): 57 | r = envoy.run('wc -l {}'.format(testpath)) 58 | num_lines = int(r.std_out.strip().partition(' ')[0]) 59 | bar = progressbar.ProgressBar(maxval=num_lines, widgets=['Loading test ratings: ', 60 | progressbar.Bar( 61 | '=', '[', ']'), 62 | ' ', progressbar.Percentage(), 63 | 64 | ' ', progressbar.ETA()]).start() 65 | users = set(d.users.keys()) 66 | items = set(d.items.keys()) 67 | cold_start_ratings = [] 68 | 69 | I, J, V = [], [], [] 70 | with open(testpath) as fp: 71 | for i, line in enumerate(fp): 72 | if (i % 1000) == 0: 73 | bar.update(i % bar.maxval) 74 | user, item, rating = map( 75 | lambda x: x.lower(), line.strip().split("\t")) 76 | if user in users and item in items: 77 | I.append(d.users[user]) 78 | J.append(d.items[item]) 79 | V.append(float(rating)) 80 | else: 81 | cold_start_ratings.append(float(rating)) 82 | bar.finish() 83 | R = scipy.sparse.coo_matrix( 84 | (V, (I, J)), shape=(len(d.users), len(d.items))) 85 | return R.tocsr(), cold_start_ratings 86 | 87 | 88 | def loadColdStartTestData(d, testpath): 89 | users = set(d.users.keys()) 90 | items = set(d.items.keys()) 91 | cold_start_ratings = [] 92 | with open(testpath) as fp: 93 | for i, line in enumerate(fp): 94 | user, item, rating = map( 95 | lambda x: x.lower(), line.strip().split("\t")) 96 | if (user not in users) or (item not in items): 97 | cold_start_ratings.append(float(rating)) 98 | return cold_start_ratings 99 | 100 | 101 | def loadTrainTest(train_path, test_path, shape=None): 102 | d = Data() 103 | d.import_ratings(train_path, shape) 104 | test, cold = loadTestData(d, test_path) 105 | train = d.R.copy() 106 | return train, test, cold 107 | -------------------------------------------------------------------------------- /nn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mesuvash/NNRec/31839522c9691d43d01987b5f67a7ed5ea5fcd81/nn/__init__.py -------------------------------------------------------------------------------- /nn/autorec/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mesuvash/NNRec/31839522c9691d43d01987b5f67a7ed5ea5fcd81/nn/autorec/__init__.py -------------------------------------------------------------------------------- /nn/autorec/ae.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cPickle as pkl 3 | from cython_matmul import * 4 | from lossDeriv import * 5 | from nn.blocks.activations import * 6 | from nn.blocks.nn import * 7 | 8 | 9 | class AE: 10 | 11 | def __init__(self, nn, modelArgs, debug=True): 12 | self.nn = nn 13 | self.debug = debug 14 | self.modelArgs = modelArgs 15 | self.nn.setLimits() 16 | 17 | def setParameters(self, theta): 18 | weights = [] 19 | for i in range(len(self.nn.weights_limit) - 1): 20 | weight = theta[self.nn.weights_limit[i]:self.nn.weights_limit[ 21 | i + 1]].reshape(self.nn.weights[i].shape) 22 | weights.append(weight) 23 | 24 | biases = [] 25 | offset = self.nn.weights_limit[-1] 26 | for i in range(len(self.nn.bias_limit) - 1): 27 | bias = theta[offset + self.nn.bias_limit[i]:offset + 28 | self.nn.bias_limit[i + 1]] 29 | bias = bias.reshape(self.nn.layers[i + 1].bias.shape) 30 | biases.append(bias) 31 | self.nn.weights = weights 32 | self.nn.biases = biases 33 | 34 | def getParameters(self): 35 | params = [] 36 | for weight in self.nn.weights: 37 | params.append(weight.flatten()) 38 | for bias in self.nn.biases: 39 | params.append(bias.flatten()) 40 | return np.concatenate(params) 41 | 42 | def predict(self, train, test): 43 | 44 | inputActivation = train 45 | for i in range(len(self.nn.layers) - 2): 46 | if scipy.sparse.isspmatrix(inputActivation): 47 | forward = inputActivation * self.nn.weights[i] 48 | else: 49 | forward = np.dot(inputActivation, self.nn.weights[i]) 50 | if self.nn.layers[i].dropout is not None: 51 | forward *= (1 - self.nn.layers[i].dropout) 52 | inputActivation = self.nn.layers[ 53 | i + 1].activation.activation(forward + self.nn.biases[i]) 54 | if self.nn.layers[i + 1].isBinary(): 55 | inputActivation = self.nn.layers[ 56 | i + 1].activation.binarize(inputActivation) 57 | output_layer = self.nn.layers[-1] 58 | if output_layer.isPartial(): 59 | output = multiplyOuterSparseLayer(inputActivation, 60 | self.nn.weights[-1], 61 | self.nn.biases[-1], test.data, 62 | test.indices, test.indptr, 63 | self.modelArgs.num_threads) 64 | else: 65 | output = np.dot( 66 | inputActivation, self.nn.weights[-1]) + self.nn.biases[-1] 67 | 68 | if self.nn.layers[-2].hasDropout(): 69 | output *= (1 - self.nn.layers[-2].dropout) 70 | 71 | output = output_layer.activation.activation(output) 72 | if self.modelArgs.mean > 0.0: 73 | output += self.modelArgs.mean 74 | 75 | if output_layer.isPartial(): 76 | _max, _min = train.data.max(), train.data.min() 77 | output[output > _max] = _max 78 | output[output < _min] = _min 79 | output = scipy.sparse.csr_matrix((output, test.indices, 80 | test.indptr), shape=test.shape) 81 | 82 | return output 83 | 84 | def getActivationOfLayer(self, train, layerno): 85 | inputActivation = train 86 | assert((layerno > 0) and (layerno < len(self.nn.layers))) 87 | for i in range(layerno): 88 | if scipy.sparse.isspmatrix(inputActivation): 89 | forward = inputActivation * self.nn.weights[i] 90 | else: 91 | forward = np.dot(inputActivation, self.nn.weights[i]) 92 | if self.nn.layers[i].dropout is not None: 93 | forward *= (1 - self.nn.layers[i].dropout) 94 | inputActivation = self.nn.layers[ 95 | i + 1].activation.activation(forward + self.nn.biases[i]) 96 | return inputActivation 97 | 98 | def saveModel(self, path): 99 | print "Saving model to path : ", path 100 | pkl.dump(self, open(path, "wb")) 101 | -------------------------------------------------------------------------------- /nn/autorec/ae_utils.py: -------------------------------------------------------------------------------- 1 | class Counter(object): 2 | 3 | """docstring for Counter""" 4 | 5 | def __init__(self): 6 | super(Counter, self).__init__() 7 | self.count = 0 8 | 9 | def increment(self): 10 | self.count += 1 11 | 12 | 13 | class ModelArgs(object): 14 | 15 | """docstring for ModelArgs""" 16 | 17 | def __init__(self, learn_rate=0.001, lamda=1.0, regularize_bias=True, 18 | isDenoising=False, noisePercent=0.0, beta=None, momentum=0.8, 19 | num_threads=16, mean=0.0, max_iter=200, optimizer=None, 20 | batch_size=20000): 21 | super(ModelArgs, self).__init__() 22 | self.learn_rate = learn_rate 23 | self.lamda = lamda 24 | self.regularize_bias = regularize_bias 25 | self.isDenoising = isDenoising 26 | self.noisePercent = noisePercent 27 | self.beta = beta 28 | self.momentum = momentum 29 | self.num_threads = num_threads 30 | self.mean = mean 31 | self.max_iter = max_iter 32 | self.optimizer = optimizer 33 | self.batch_size = batch_size 34 | 35 | def __str__(self): 36 | string = "" 37 | for key in self.__dict__.keys(): 38 | string += "%s: %s\t" % (key, str(self.__dict__[key])) 39 | return string 40 | -------------------------------------------------------------------------------- /nn/autorec/config/1M/1M.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | train: /Users/suvashsedhain/Desktop/90folds/train.flipped.1 3 | test: /Users/suvashsedhain/Desktop/90folds/test.flipped.1 4 | save: /Users/suvashsedhain/Desktop/90folds/model.1 5 | params: 6 | reg_bias: False 7 | beta: None 8 | lamda: [100.0,100.0] 9 | mean_normalization: False 10 | pretraining: False 11 | max_iter: [500] 12 | batch_size: 10000 13 | optimizer: lbfgs 14 | num_threads: 16 15 | layers: 16 | 1: 17 | activation: identity 18 | num_nodes: 6040 19 | partial: False 20 | type: input 21 | 2: 22 | activation: sigmoid 23 | num_nodes: 500 24 | partial: False 25 | type: hidden 26 | binary: True 27 | 3: 28 | activation: identity 29 | num_nodes: 6040 30 | partial: True 31 | type: output 32 | 33 | 34 | -------------------------------------------------------------------------------- /nn/autorec/config/1M/1MU.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | train: /Users/suvashsedhain/Desktop/90folds/train.flipped.1 3 | test: /Users/suvashsedhain/Desktop/90folds/test.flipped.1 4 | save: /Users/suvashsedhain/Desktop/90folds/umodel.1 5 | params: 6 | reg_bias: False 7 | beta: None 8 | lamda: [100.0,100.0] 9 | mean_normalization: False 10 | pretraining: False 11 | max_iter: [500] 12 | batch_size: 10000 13 | optimizer: rprop 14 | num_threads: 16 15 | layers: 16 | 1: 17 | activation: identity 18 | num_nodes: 3706 19 | partial: False 20 | type: input 21 | 2: 22 | activation: sigmoid 23 | num_nodes: 500 24 | partial: False 25 | type: hidden 26 | binary: True 27 | 3: 28 | activation: identity 29 | num_nodes: 3706 30 | partial: True 31 | type: output 32 | 33 | 34 | -------------------------------------------------------------------------------- /nn/autorec/cython_matmul.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | from cython.parallel import prange 5 | 6 | @cython.boundscheck(False) 7 | @cython.wraparound(False) 8 | @cython.nonecheck(False) 9 | cpdef np.ndarray[double, ndim = 2] multiplyWithSelectedIndices(double[:, :] x, 10 | int[:] indices, 11 | double[:, :] w): 12 | cdef: 13 | np.ndarray[double, ndim = 2] result 14 | int i, n, m, j 15 | 16 | m = x.shape[1] 17 | n = w.shape[1] 18 | 19 | result = np.zeros((1, n)) 20 | 21 | for i in range(n): 22 | for j in range(m): 23 | result[0, i] = result[0, i] + w[indices[j], i] * x[0, j] 24 | return result 25 | 26 | @cython.boundscheck(False) 27 | @cython.wraparound(False) 28 | @cython.nonecheck(False) 29 | cpdef np.ndarray[double, ndim = 2] multiplyWithSelectedIndicesTrans(double[:, :] x, 30 | int[:] indices, 31 | double[:, :] w): 32 | cdef: 33 | np.ndarray[double, ndim = 2] result 34 | int i, n, m, j, index 35 | 36 | m = x.shape[1] 37 | n = len(indices) 38 | 39 | result = np.zeros((1, n)) 40 | 41 | for i in range(n): 42 | index = indices[i] 43 | for j in range(m): 44 | result[0, i] = result[0, i] + w[j, index] * x[0, j] 45 | return result 46 | 47 | 48 | @cython.boundscheck(False) 49 | @cython.wraparound(False) 50 | @cython.nonecheck(False) 51 | cpdef np.ndarray[double, ndim = 1] multiplyOuterSparseLayer(double[:, :] hiddenActivation, 52 | double[:, :] W2, 53 | double[:, :] vis_bias, 54 | double[:] data, 55 | int[:] indices, 56 | int[:] indptr, 57 | int num_threads): 58 | cdef: 59 | np.ndarray[double, ndim = 1] result 60 | int i, j, k, l, m, n, start, end 61 | double _buffer 62 | 63 | m = hiddenActivation.shape[0] 64 | n = hiddenActivation.shape[1] 65 | result = np.zeros(len(data)) 66 | for i in prange(m, nogil=True, num_threads=num_threads): 67 | start = indptr[i] 68 | end = indptr[i + 1] 69 | for j in range(indptr[i + 1] - indptr[i]): 70 | l = indices[start] 71 | _buffer = 0.0 72 | for k in range(n): 73 | _buffer = _buffer + hiddenActivation[i, k] * W2[k, l] 74 | result[start] = _buffer + vis_bias[0, l] 75 | start = start + 1 76 | return result 77 | 78 | 79 | @cython.boundscheck(False) 80 | @cython.wraparound(False) 81 | @cython.nonecheck(False) 82 | cpdef np.ndarray[double, ndim = 2] addWithSelectedIndices(double[:, :] x, 83 | int[:] indices, 84 | double[:, :] y): 85 | cdef: 86 | np.ndarray[double, ndim = 2] result 87 | int i, m 88 | 89 | m = x.shape[1] 90 | 91 | result = np.zeros((1, m)) 92 | 93 | for i in range(m): 94 | result[0, i] = x[0, i] + y[0, indices[i]] 95 | return result 96 | 97 | 98 | @cython.boundscheck(False) 99 | @cython.wraparound(False) 100 | @cython.nonecheck(False) 101 | cpdef addMatricesWithSelectedIndices(double[:, :] w1, 102 | int[:] indices, int indexAxis, 103 | double[:, :] w2): 104 | cdef: 105 | int i, j, m, n, index 106 | 107 | m = w2.shape[0] 108 | n = w2.shape[1] 109 | 110 | if (indexAxis == 1): 111 | for j in range(n): 112 | index = indices[j] 113 | for i in range(m): 114 | w1[i, index] = w1[i, index] + w2[i, j] 115 | else: 116 | for i in range(m): 117 | index = indices[i] 118 | for j in range(n): 119 | w1[index, j] = w1[index, j] + w2[i, j] 120 | 121 | 122 | 123 | @cython.boundscheck(False) 124 | @cython.wraparound(False) 125 | @cython.nonecheck(False) 126 | cpdef addMatricesWithSelectedIndicesSGD(double[:, :] w1, 127 | int[:] indices, int indexAxis, 128 | double[:, :] w2, 129 | double learn_rate, 130 | double decay): 131 | 132 | cdef: 133 | int i, j, m, n, index 134 | 135 | m = w2.shape[0] 136 | n = w2.shape[1] 137 | 138 | if (indexAxis == 1): 139 | for j in range(n): 140 | index = indices[j] 141 | for i in range(m): 142 | w1[i, index] = w1[i, index] - learn_rate * \ 143 | (w2[i, j] + decay * w1[i, index]) 144 | else: 145 | for i in range(m): 146 | index = indices[i] 147 | for j in range(n): 148 | w1[index, j] = w1[index, j] - learn_rate * \ 149 | (w2[i, j] + decay * w1[index, j]) 150 | 151 | 152 | @cython.boundscheck(False) 153 | @cython.wraparound(False) 154 | @cython.nonecheck(False) 155 | cpdef dropout(double[:, :] mat, double p): 156 | cdef: 157 | int i, j, m, n, numdrop 158 | np.ndarray[long, ndim = 1] indices 159 | m = mat.shape[0] 160 | n = mat.shape[1] 161 | numdrop = int(n * p) 162 | for i in range(m): 163 | indices = np.random.choice(n, numdrop, replace=False) 164 | for j in indices: 165 | mat[i, j] = 0.0 166 | -------------------------------------------------------------------------------- /nn/autorec/generateNNFeatures.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from nn.blocks.networkConfigParser import NetworkConfigParser 3 | from modelLoader import loadModel, LoadDataAndMapping 4 | 5 | 6 | def dumpArray(array, outpath, mapping): 7 | fp = open(outpath, "wb") 8 | m, n = array.shape 9 | for i in range(m): 10 | for j in range(n): 11 | value = array[i, j] 12 | if value != 0: 13 | fp.write("%s\t%d\t%f\n" % (mapping[i], j, array[i, j])) 14 | fp.close() 15 | 16 | 17 | def dumpFeatures(config_path, mtype, outpath): 18 | model = loadModel(config_path) 19 | train, test, usermap, itemmap = LoadDataAndMapping(config_path) 20 | target_layer = 1 21 | targetLayerData = model.getActivationOfLayer(train, target_layer) 22 | if mtype == "user": 23 | dumpArray(targetLayerData, outpath, usermap) 24 | if mtype == "item": 25 | dumpArray(targetLayerData, outpath, itemmap) 26 | 27 | if __name__ == '__main__': 28 | import argparse 29 | parser = argparse.ArgumentParser(description='Description') 30 | parser.add_argument( 31 | '--config', '-c', help='configuration file', required=True) 32 | parser.add_argument( 33 | '--mtype', '-m', help='configuration file', required=True) 34 | parser.add_argument( 35 | '--outfile', '-o', help='configuration file', required=True) 36 | args = parser.parse_args() 37 | config_path = args.config 38 | mtype = args.mtype 39 | outfile = args.outfile 40 | dumpFeatures(config_path, mtype, outfile) 41 | -------------------------------------------------------------------------------- /nn/autorec/learner.py: -------------------------------------------------------------------------------- 1 | from utils.metrics.evaluate import EvaluateNN 2 | from nn.blocks.networkConfigParser import NetworkConfigParser 3 | from lossDeriv import * 4 | from dataUtils.data import loadTrainTest 5 | from ae import AE 6 | from optimizers import getOptimizer 7 | from ae_utils import Counter, ModelArgs 8 | 9 | 10 | def train(config_path): 11 | modelArgs = NetworkConfigParser.constructModelArgs(config_path, ModelArgs) 12 | nn = NetworkConfigParser.constructNetwork(config_path) 13 | train_path, test_path, save_path = NetworkConfigParser.getDataInfo( 14 | config_path) 15 | print nn 16 | # TODO : Arguments 17 | num_hid = nn.layers[1].num_units 18 | shape = (None, nn.layers[0].num_units) 19 | train, test, cold = loadTrainTest(train_path, test_path, 20 | shape=shape) 21 | ae = AE(nn, modelArgs) 22 | evaluate = EvaluateNN(ae) 23 | theta = ae.nn.getFlattenParams() 24 | ae.setParameters(theta) 25 | iterCounter = Counter() 26 | optimizer = getOptimizer(modelArgs.optimizer, ae, evaluate, theta, 27 | train, test, nn, modelArgs, iterCounter, 28 | modelArgs.batch_size, 29 | modelArgs.max_iter[0]) 30 | 31 | optimizer.step_grow = 5.0 32 | k = 0 33 | for info in optimizer: 34 | print "Iteration %d" % k 35 | if k == 5: 36 | optimizer.step_grow = 1.2 37 | if k % 5 == 0: 38 | ae.setParameters(theta) 39 | rmse, mae = evaluate.calculateRMSEandMAE(train, test) 40 | print "Fold :%d Test RMSE: %f Test MAE: %f" % (i, 41 | rmse, mae) 42 | if k > modelArgs.max_iter[0]: 43 | break 44 | k += 1 45 | if save_path: 46 | _theta = ae.getParameters() 47 | np.save(save_path, _theta) 48 | 49 | 50 | if __name__ == '__main__': 51 | import argparse 52 | parser = argparse.ArgumentParser(description='Description') 53 | parser.add_argument( 54 | '--config', '-c', help='configuration file', required=True) 55 | args = parser.parse_args() 56 | config_path = args.config 57 | i = 1 58 | train(config_path) 59 | -------------------------------------------------------------------------------- /nn/autorec/lossDeriv.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse 3 | from nn.blocks.cython_activations import * 4 | from cython_matmul import * 5 | from nn.blocks.nn import LayerType 6 | # from sklearn.utils import shuffle 7 | from copy import deepcopy 8 | 9 | 10 | EPS = 10e-15 11 | 12 | 13 | def _getLossUpdateDerivative(batch_data, weights, biases, 14 | dWeights, dBiases, NN, modelArg): 15 | batch_shape = batch_data.shape 16 | ######################Forward pass###################### 17 | fActivation = [] 18 | layerInput = batch_data 19 | cost = 0.0 20 | for l, layer in enumerate(NN.layers): 21 | if layer.layerType == LayerType.INPUT: 22 | activation = layerInput 23 | elif layer.layerType == LayerType.HIDDEN: 24 | if scipy.sparse.isspmatrix(layerInput): 25 | x = layerInput * weights[l - 1] + biases[l - 1] 26 | else: 27 | x = np.dot(layerInput, weights[l - 1]) + biases[l - 1] 28 | activation = layer.activation.activation(x) 29 | elif layer.layerType == LayerType.OUTPUT: 30 | if layer.isPartial(): 31 | x = multiplyOuterSparseLayer(layerInput, weights[l - 1], 32 | biases[l - 1], 33 | batch_data.data, 34 | batch_data.indices, 35 | batch_data.indptr, 36 | modelArg.num_threads) 37 | activation = layer.activation.activation(x) 38 | activation = scipy.sparse.csr_matrix((activation, 39 | batch_data.indices, 40 | batch_data.indptr), 41 | shape=batch_shape) 42 | else: 43 | x = np.dot(layerInput, weights[l - 1]) + biases[l - 1] 44 | activation = layer.activation.activation(x) 45 | 46 | if (layer.dropout is not None) and (layer.dropout != 0): 47 | dropout(activation, layer.dropout) 48 | fActivation.append(activation) 49 | # binarize for the forward propagation 50 | if layer.isBinary(): 51 | layerInput = layer.activation.binarize(activation) 52 | else: 53 | layerInput = activation 54 | 55 | ######################Calculate error###################### 56 | # sparse csr matrix 57 | if NN.layers[-1].isPartial(): 58 | diff = fActivation[-1].data - batch_data.data 59 | else: 60 | diff = fActivation[-1] - batch_data 61 | sum_of_squares_error = 0.5 * np.sum(np.power(diff, 2)) 62 | cost += sum_of_squares_error 63 | 64 | ######################BackPropagation###################### 65 | l = len(NN.layers) - 1 66 | for layer in NN.layers[::-1]: 67 | if layer.layerType == LayerType.OUTPUT: 68 | if layer.isPartial(): 69 | delta = np.multiply( 70 | diff, layer.activation.derivative(fActivation[l].data)) 71 | delta = scipy.sparse.csr_matrix((delta, 72 | batch_data.indices, 73 | batch_data.indptr), 74 | shape=batch_shape) 75 | else: 76 | delta = np.multiply( 77 | diff, layer.activation.derivative(fActivation[l])) 78 | 79 | if (scipy.sparse.isspmatrix(fActivation[l - 1]) or 80 | scipy.sparse.isspmatrix(delta)): 81 | 82 | wderiv = fActivation[l - 1].T * delta 83 | else: 84 | wderiv = np.dot(fActivation[l - 1].T, delta) 85 | bderiv = delta.sum(axis=0) 86 | dWeights[l - 1] += wderiv 87 | dBiases[l - 1] += bderiv 88 | 89 | if layer.layerType == LayerType.HIDDEN: 90 | if layer.isSparse(): 91 | rho_hat = fActivation[l].sum( 92 | axis=0) / fActivation[l].shape[0] 93 | rho = layer.sparsity 94 | KL_divergence = modelArg.beta * np.sum( 95 | rho * np.log(rho / rho_hat) + 96 | (1 - rho) * np.log((1 - rho) / ((1 - rho_hat) + EPS))) 97 | cost += KL_divergence 98 | KL_grad = modelArg.beta * \ 99 | (-(rho / rho_hat) + 100 | ((1 - rho) / ((1 - rho_hat) + EPS))) 101 | 102 | if scipy.sparse.issparse(delta): 103 | if layer.isSparse(): 104 | delta = np.multiply( 105 | delta * weights[l].T + KL_grad, 106 | layer.activation.derivative(fActivation[l])) 107 | else: 108 | delta = np.multiply( 109 | delta * weights[l].T, 110 | layer.activation.derivative(fActivation[l])) 111 | else: 112 | if layer.isSparse(): 113 | delta = np.multiply( 114 | np.dot(delta, weights[l].T) + KL_grad, 115 | layer.activation.derivative(fActivation[l])) 116 | else: 117 | delta = np.multiply( 118 | np.dot(delta, weights[l].T), 119 | layer.activation.derivative(fActivation[l])) 120 | 121 | if (scipy.sparse.isspmatrix(fActivation[l - 1]) 122 | or scipy.sparse.isspmatrix(delta)): 123 | wderiv = fActivation[l - 1].T * delta 124 | else: 125 | wderiv = np.dot(fActivation[l - 1].T, delta) 126 | dWeights[l - 1] += wderiv 127 | if layer.isBiasEnabled: 128 | bderiv = delta.sum(axis=0) 129 | dBiases[l - 1] += bderiv 130 | l = l - 1 131 | return cost 132 | 133 | 134 | def getCostDeriv(theta, user_item_rating, NN, 135 | modelArg, counter, batch_size): 136 | counter.increment() 137 | ##################################### Unrolling/ Initialization ########## 138 | weights = [] 139 | for i in range(len(NN.weights_limit) - 1): 140 | weight = theta[NN.weights_limit[i]:NN.weights_limit[i + 1]] 141 | weight = weight.reshape(NN.weights[i].shape) 142 | weights.append(weight) 143 | 144 | biases = [] 145 | offset = NN.weights_limit[-1] 146 | for i in range(len(NN.bias_limit) - 1): 147 | bias = theta[offset + NN.bias_limit[i]:offset + 148 | NN.bias_limit[i + 1]].reshape(NN.layers[i + 1].bias.shape) 149 | biases.append(bias) 150 | 151 | dWeights = [] 152 | for weight in weights: 153 | dWeights.append(np.zeros(shape=weight.shape)) 154 | 155 | dBiases = [] 156 | for bias in biases: 157 | dBiases.append(np.zeros(shape=bias.shape)) 158 | 159 | ##################################### Batch loop ######################### 160 | 161 | m, n = user_item_rating.shape 162 | batches = range(0, m, batch_size) 163 | if batches[-1] != m: 164 | batches.append(m) 165 | 166 | cost = 0.0 167 | for i in range(len(batches) - 1): 168 | start = batches[i] 169 | end = batches[i + 1] 170 | batch_data = user_item_rating[start:end, :] 171 | loss = _getLossUpdateDerivative(batch_data, weights, biases, 172 | dWeights, dBiases, NN, modelArg) 173 | cost += loss 174 | 175 | if not modelArg.regularize_bias: 176 | weight_decay = reduce( 177 | lambda x, y: x + y, map(lambda z: 178 | np.power( 179 | weights[z], 2).sum() * 180 | modelArg.lamda[z], 181 | range(len(weights)))) 182 | else: 183 | weight_decay = reduce( 184 | lambda x, y: x + y, map(lambda z: 185 | np.power( 186 | weights[z], 2).sum() * 187 | modelArg.lamda[z], 188 | range(len(weights)))) 189 | weight_decay = reduce( 190 | lambda x, y: x + y, map(lambda z: 191 | np.power( 192 | biases[z], 2).sum() * 193 | modelArg.lamda[z], 194 | range(len(biases)))) 195 | cost += weight_decay 196 | 197 | for i in range(len(dWeights)): 198 | # dWeights[i] += modelArg.lamda * weights[i] 199 | dWeights[i] += 2 * modelArg.lamda[i] * weights[i] 200 | 201 | if modelArg.regularize_bias: 202 | for i in range(len(dBiases)): 203 | # dBiases[i] += modelArg.lamda * biases[i] 204 | dBiases[i] += 2 * modelArg.lamda[i] * biases[i] 205 | 206 | theta_grad = np.concatenate(map(lambda x: x.flatten(), dWeights + dBiases)) 207 | return [cost, theta_grad] 208 | 209 | 210 | def getCostDerivBatch(theta, user_item_rating, NN, 211 | modelArg, counter, batch_size): 212 | counter.increment() 213 | # user_item_rating = shuffle(user_item_rating) 214 | ##################################### Unrolling/ Initialization ########## 215 | weights = [] 216 | for i in range(len(NN.weights_limit) - 1): 217 | weight = theta[NN.weights_limit[i]:NN.weights_limit[i + 1]] 218 | weight = weight.reshape(NN.weights[i].shape) 219 | weights.append(weight) 220 | 221 | biases = [] 222 | offset = NN.weights_limit[-1] 223 | for i in range(len(NN.bias_limit) - 1): 224 | bias = theta[offset + NN.bias_limit[i]:offset + 225 | NN.bias_limit[i + 1]].reshape(NN.layers[i + 1].bias.shape) 226 | biases.append(bias) 227 | 228 | dWeights = [] 229 | for weight in weights: 230 | dWeights.append(np.zeros(shape=weight.shape)) 231 | 232 | dBiases = [] 233 | for bias in biases: 234 | dBiases.append(np.zeros(shape=bias.shape)) 235 | 236 | ##################################### Batch loop ######################### 237 | 238 | m, n = user_item_rating.shape 239 | batches = range(0, m, batch_size) 240 | if batches[-1] != m: 241 | batches.append(m) 242 | 243 | cost = 0.0 244 | for i in range(len(batches) - 1): 245 | start = batches[i] 246 | end = batches[i + 1] 247 | batch_data = user_item_rating[start:end, :] 248 | loss = _getLossUpdateDerivative(batch_data, weights, biases, 249 | dWeights, dBiases, NN, modelArg) 250 | cost += loss 251 | 252 | if not modelArg.regularize_bias: 253 | weight_decay = reduce( 254 | lambda x, y: x + y, map(lambda z: 255 | np.power( 256 | weights[z], 2).sum() * 257 | modelArg.lamda[z], 258 | range(len(weights)))) 259 | else: 260 | weight_decay = reduce( 261 | lambda x, y: x + y, map(lambda z: 262 | np.power( 263 | weights[z], 2).sum() * 264 | modelArg.lamda[z], 265 | range(len(weights)))) 266 | weight_decay = reduce( 267 | lambda x, y: x + y, map(lambda z: 268 | np.power( 269 | biases[z], 2).sum() * 270 | modelArg.lamda[z], 271 | range(len(biases)))) 272 | cost += weight_decay 273 | 274 | for i in range(len(dWeights)): 275 | dWeights[i] += 2 * modelArg.lamda[i] * weights[i] 276 | 277 | if modelArg.regularize_bias: 278 | for i in range(len(dBiases)): 279 | dBiases[i] += 2 * modelArg.lamda[i] * biases[i] 280 | 281 | theta_grad = np.concatenate( 282 | map(lambda x: x.flatten(), dWeights + dBiases)) 283 | return [cost, theta_grad] 284 | 285 | 286 | def updateSGD(user_item_rating, NN, modelArg, counter, batch_size, 287 | alpha, dWeights_old, dBiases_old): 288 | counter.increment() 289 | 290 | # user_item_rating = shuffle(user_item_rating) 291 | weights = NN.weights 292 | biases = NN.biases 293 | 294 | dWeights = [] 295 | for weight in weights: 296 | dWeights.append(np.zeros(shape=weight.shape)) 297 | 298 | dBiases = [] 299 | for bias in biases: 300 | dBiases.append(np.zeros(shape=bias.shape)) 301 | 302 | ##################################### Batch loop ######################### 303 | 304 | m, n = user_item_rating.shape 305 | batches = range(0, m, batch_size) 306 | if batches[-1] != m: 307 | batches.append(m) 308 | cost = 0.0 309 | for i in range(len(batches) - 1): 310 | start = batches[i] 311 | end = batches[i + 1] 312 | batch_data = user_item_rating[start:end, :] 313 | loss = _getLossUpdateDerivative(batch_data, weights, biases, 314 | dWeights, dBiases, NN, modelArg) 315 | cost += loss 316 | 317 | if not modelArg.regularize_bias: 318 | weight_decay = reduce( 319 | lambda x, y: x + y, map(lambda z: 320 | np.power( 321 | weights[z], 2).sum() * 322 | modelArg.lamda[z], 323 | range(len(weights)))) 324 | else: 325 | weight_decay = reduce( 326 | lambda x, y: x + y, map(lambda z: 327 | np.power( 328 | weights[z], 2).sum() * 329 | modelArg.lamda[z], 330 | range(len(weights)))) 331 | weight_decay = reduce( 332 | lambda x, y: x + y, map(lambda z: 333 | np.power( 334 | biases[z], 2).sum() * 335 | modelArg.lamda[z], 336 | range(len(biases)))) 337 | cost += weight_decay 338 | 339 | for i in range(len(dWeights)): 340 | # dWeights[i] += modelArg.lamda * weights[i] 341 | dWeights[i] += 2 * modelArg.lamda[i] * weights[i] 342 | 343 | if modelArg.regularize_bias: 344 | for i in range(len(dBiases)): 345 | # dBiases[i] += modelArg.lamda * biases[i] 346 | dBiases[i] = dBiases[i].reshape(dBiases_old[i].shape) 347 | dBiases[i] += 2 * modelArg.lamda[i] * biases[i] 348 | 349 | for i in range(len(weights)): 350 | temp_wderiv = ( 351 | alpha * dWeights[i] + dWeights_old[i] * modelArg.momentum) 352 | weights[i] -= temp_wderiv 353 | dWeights_old[i] = temp_wderiv 354 | 355 | for i in range(len(biases)): 356 | temp_bderiv = ( 357 | alpha * dBiases[i] + dBiases_old[i] * modelArg.momentum) 358 | biases[i] -= temp_bderiv 359 | dBiases_old[i] = temp_bderiv 360 | return dWeights_old, dBiases_old 361 | 362 | 363 | def updateAdagrad(user_item_rating, NN, modelArg, counter, batch_size, 364 | alpha, dWeights_old, dBiases_old): 365 | counter.increment() 366 | 367 | # user_item_rating = shuffle(user_item_rating) 368 | weights = NN.weights 369 | biases = NN.biases 370 | 371 | dWeights = [] 372 | for weight in weights: 373 | dWeights.append(np.zeros(shape=weight.shape)) 374 | 375 | dBiases = [] 376 | for bias in biases: 377 | dBiases.append(np.zeros(shape=bias.shape)) 378 | 379 | ##################################### Batch loop ######################### 380 | 381 | m, n = user_item_rating.shape 382 | batches = range(0, m, batch_size) 383 | if batches[-1] != m: 384 | batches.append(m) 385 | cost = 0.0 386 | for i in range(len(batches) - 1): 387 | start = batches[i] 388 | end = batches[i + 1] 389 | batch_data = user_item_rating[start:end, :] 390 | loss = _getLossUpdateDerivative(batch_data, weights, biases, 391 | dWeights, dBiases, NN, modelArg) 392 | cost += loss 393 | if not modelArg.regularize_bias: 394 | weight_decay = reduce( 395 | lambda x, y: x + y, map(lambda z: 396 | np.power( 397 | weights[z], 2).sum() * 398 | modelArg.lamda[z], 399 | range(len(weights)))) 400 | else: 401 | weight_decay = reduce( 402 | lambda x, y: x + y, map(lambda z: 403 | np.power( 404 | weights[z], 2).sum() * 405 | modelArg.lamda[z], 406 | range(len(weights)))) 407 | weight_decay = reduce( 408 | lambda x, y: x + y, map(lambda z: 409 | np.power( 410 | biases[z], 2).sum() * 411 | modelArg.lamda[z], 412 | range(len(biases)))) 413 | cost += weight_decay 414 | 415 | for i in range(len(dWeights)): 416 | # dWeights[i] += modelArg.lamda * weights[i] 417 | dWeights[i] += 2 * modelArg.lamda[i] * weights[i] 418 | 419 | if modelArg.regularize_bias: 420 | for i in range(len(dBiases)): 421 | # dBiases[i] += modelArg.lamda * biases[i] 422 | dBiases[i] = dBiases[i].reshape(dBiases_old[i].shape) 423 | dBiases[i] += 2 * modelArg.lamda[i] * biases[i] 424 | 425 | if counter.count == 1: 426 | dWeights_old[i] += np.power(dWeights[i], 2) 427 | dBiases_old[i] += np.power(dBiases[i], 2) 428 | continue 429 | 430 | for i in range(len(weights)): 431 | temp_wderiv = np.divide( 432 | dWeights[i], np.sqrt(dWeights_old[i] + 1)) * alpha 433 | weights[i] -= temp_wderiv 434 | dWeights_old[i] += np.power(dWeights[i], 2) 435 | 436 | for i in range(len(biases)): 437 | temp_bderiv = np.divide( 438 | dBiases[i], np.sqrt(dBiases_old[i]) + 1) * alpha 439 | biases[i] -= temp_bderiv 440 | dBiases_old[i] += np.power(dBiases[i], 2) 441 | 442 | return dWeights_old, dBiases_old 443 | 444 | 445 | def trainSGD(train, test, num_iter, evaluate, weights, biases, learn_rate, modelArg, NN, counter, batch_size, driver=False): 446 | old_rmse = float("inf") 447 | dWeights_old = [] 448 | for weight in weights: 449 | dWeights_old.append(np.zeros(shape=weight.shape)) 450 | 451 | dBiases_old = [] 452 | for bias in biases: 453 | dBiases_old.append(np.zeros(shape=bias.shape)) 454 | 455 | for i in range(num_iter): 456 | # t = shuffle(train) 457 | t = train 458 | dWeights_old, dBiases_old = updateSGD(t, NN, modelArg, counter, 459 | batch_size, learn_rate, 460 | dWeights_old, dBiases_old) 461 | 462 | if (i % 5 == 0): 463 | trmse, tmae = evaluate.calculateRMSEandMAE(train, test) 464 | rmse, mae = evaluate.calculateRMSEandMAE(train, train) 465 | sign = "+" if rmse < old_rmse else "-" 466 | print "Fold :%d Test RMSE: %f MAE: %f \t %s" % (i, trmse, tmae, sign) 467 | 468 | # print "Fold :%d Train RMSE: %f MAE: %f" % (i, rmse, mae) 469 | if driver: 470 | if rmse < old_rmse: 471 | bestWeights = deepcopy(NN.weights) 472 | bestBiases = deepcopy(NN.biases) 473 | learn_rate *= 1.01 474 | old_rmse = rmse 475 | elif rmse > old_rmse: 476 | NN.weights = bestWeights 477 | NN.biases = bestBiases 478 | print "Reducing learning rate" 479 | learn_rate *= 0.5 480 | 481 | if learn_rate < EPS: 482 | break 483 | 484 | 485 | def trainAdagrad(train, test, num_iter, evaluate, weights, biases, learn_rate, modelArg, NN, counter, batch_size, driver=False): 486 | old_rmse = float("inf") 487 | dWeights_old = [] 488 | for weight in weights: 489 | dWeights_old.append(np.zeros(shape=weight.shape)) 490 | 491 | dBiases_old = [] 492 | for bias in biases: 493 | dBiases_old.append(np.zeros(shape=bias.shape)) 494 | 495 | for i in range(num_iter): 496 | # t = shuffle(train) 497 | t = trian 498 | dWeights_old, dBiases_old = updateAdagrad(t, NN, modelArg, counter, 499 | batch_size, learn_rate, 500 | dWeights_old, dBiases_old) 501 | if (i % 5 == 0): 502 | rmse, mae = evaluate.calculateRMSEandMAE(train, test) 503 | print "Fold :%d Test RMSE: %f MAE: %f" % (i, rmse, mae) 504 | -------------------------------------------------------------------------------- /nn/autorec/modelLoader.py: -------------------------------------------------------------------------------- 1 | from ae import AE 2 | import numpy as np 3 | from nn.blocks.networkConfigParser import NetworkConfigParser 4 | from dataUtils.data import Data, loadTestData 5 | from utils.metrics.evaluate import EvaluateNN 6 | from ae_utils import Counter, ModelArgs 7 | 8 | 9 | def loadModel(config_path): 10 | modelArgs = NetworkConfigParser.constructModelArgs(config_path, ModelArgs) 11 | nn = NetworkConfigParser.constructNetwork(config_path) 12 | train_path, test_path, save_path = NetworkConfigParser.getDataInfo( 13 | config_path) 14 | ae = AE(nn, modelArgs) 15 | theta = np.load(save_path + ".npy") 16 | ae.setParameters(theta) 17 | return ae 18 | 19 | 20 | def loadData(config_path): 21 | train_path, test_path, save_path = NetworkConfigParser.getDataInfo( 22 | config_path) 23 | nn = NetworkConfigParser.constructNetwork(config_path) 24 | d = Data() 25 | d.import_ratings(train_path, shape=(None, nn.layers[0].num_units)) 26 | train = d.R.copy() 27 | test = loadTestData(d, test_path) 28 | return train, test 29 | 30 | 31 | def LoadDataAndMapping(config_path): 32 | train_path, test_path, save_path = NetworkConfigParser.getDataInfo( 33 | config_path) 34 | nn = NetworkConfigParser.constructNetwork(config_path) 35 | d = Data() 36 | d.import_ratings(train_path, shape=(None, nn.layers[0].num_units)) 37 | train = d.R.copy() 38 | test = loadTestData(d, test_path) 39 | usermap = {v: k for k, v in d.users.items()} 40 | itemmap = {v: k for k, v in d.items.items()} 41 | return train, test, usermap, itemmap 42 | 43 | 44 | # def evaluateFolds(config_path, nfolds): 45 | # rmses = [] 46 | # maes = [] 47 | # for i in range(1, nfolds + 1): 48 | # model = loadModel(config_path) 49 | # train, test = loadData(config_path) 50 | # evaluate = EvaluateNN(model) 51 | # rmse, mae = evaluate.calculateRMSEandMAE(train, test) 52 | # rmses.append(rmse) 53 | # maes.append(mae) 54 | # return rmses, maes 55 | 56 | # if __name__ == '__main__': 57 | # import argparse 58 | # from utils.statUtil import getMeanCI 59 | # parser = argparse.ArgumentParser(description='Description') 60 | # parser.add_argument( 61 | # '--config', '-c', help='configuration file', required=True) 62 | # parser.add_argument( 63 | # '--nfold', '-n', help='number of folds ', required=True) 64 | # args = parser.parse_args() 65 | # nfolds = int(args.nfold) 66 | # config_path = args.config 67 | 68 | # rmses, maes = evaluateFolds(config_path, nfolds) 69 | # ci_rmse = getMeanCI(rmses, 0.95) 70 | # ci_mae = getMeanCI(maes, 0.95) 71 | # print ci_rmse 72 | # print ci_mae 73 | -------------------------------------------------------------------------------- /nn/autorec/optimizers.py: -------------------------------------------------------------------------------- 1 | import scipy.optimize 2 | from climin import * 3 | import itertools 4 | # from sklearn.utils import shuffle 5 | from lossDeriv import * 6 | 7 | 8 | class LBFGS(object): 9 | 10 | """docstring for LBFGS""" 11 | 12 | def __init__(self, ae, evaluate, theta, lossDeriv, train, test, 13 | nn, modelArgs, iterCounter, batch_size, max_iter): 14 | super(LBFGS, self).__init__() 15 | self.ae = ae 16 | self.evaluate = evaluate 17 | self.theta = theta 18 | self.lossDeriv = lossDeriv 19 | self.train = train 20 | self.test = test 21 | self.nn = nn 22 | self.modelArgs = modelArgs 23 | self.iterCounter = iterCounter 24 | self.batch_size = batch_size 25 | self.max_iter = max_iter 26 | 27 | def __iter__(self): 28 | return self 29 | 30 | def next(self): 31 | outLayer = self.ae.nn.layers[-1] 32 | 33 | def cbk(x): 34 | if (self.iterCounter.count % 5) == 0: 35 | self.ae.setParameters(x) 36 | if outLayer.isPartial(): 37 | rmse, mae = self.evaluate.calculateRMSEandMAE( 38 | self.train, self.test) 39 | else: 40 | rmse, mae = self.evaluate.calculateRMSEandMAE( 41 | self.test, self.test) 42 | print 'Iteration : %d '\ 43 | 'Test RMSE: %f MAE: %f' % ( 44 | self.iterCounter.count, rmse, mae) 45 | 46 | opt_solution = scipy.optimize.minimize(self.lossDeriv, 47 | self.theta, 48 | args=( 49 | self.train, 50 | self.ae.nn, self.modelArgs, 51 | self.iterCounter, 52 | self.batch_size), 53 | method = 'L-BFGS-B', 54 | jac = True, callback=cbk, 55 | options = 56 | {'maxiter': self.max_iter, 57 | "disp": 0}) 58 | 59 | opt_theta = opt_solution.x 60 | self.ae.setParameters(opt_theta) 61 | raise StopIteration("End of the iteration") 62 | 63 | 64 | def getMiniBatchParamsIterator(train, nn, modelArgs, iterCounter, 65 | batch_size, fn): 66 | m, n = train.shape 67 | batches = range(0, m, batch_size) 68 | if batches[-1] != m: 69 | batches.append(m) 70 | while True: 71 | # train = shuffle(train) 72 | for i in range(len(batches) - 1): 73 | start = batches[i] 74 | end = batches[i + 1] 75 | batch_data = train[start:end, :] 76 | yield ([batch_data, nn, modelArgs, iterCounter, batch_size, 77 | fn], {}) 78 | 79 | 80 | def fprime(theta, user_item_rating, NN, modelArg, counter, batch_size, fn): 81 | cost, deriv = fn( 82 | theta, user_item_rating, NN, modelArg, counter, batch_size) 83 | return deriv 84 | 85 | 86 | def getOptimizer(optimize, ae, evaluate, theta, train, test, 87 | nn, modelArgs, iterCounter, batch_size, max_iter): 88 | 89 | if optimize == "lbfgs": 90 | optimizer = LBFGS(ae, evaluate, theta, getCostDeriv, train, test, 91 | nn, modelArgs, iterCounter, batch_size, max_iter) 92 | elif optimize == "rprop": 93 | args = itertools.repeat( 94 | ([train, ae.nn, modelArgs, iterCounter, batch_size, getCostDeriv], 95 | {})) 96 | optimizer = rprop.Rprop(theta, fprime, args=args) 97 | elif optimize == "rmsprop": 98 | args = getMiniBatchParamsIterator( 99 | train, ae.nn, modelArgs, iterCounter, batch_size, 100 | getCostDerivBatch) 101 | optimizer = rmsprop.RmsProp( 102 | theta, fprime, 0.001, decay=0.0, step_adapt=False, step_rate_min=0, 103 | step_rate_max=5.0, args=args) 104 | else: 105 | raise NotImplementedError("%s optimizer not implemented" % optimize) 106 | return optimizer 107 | -------------------------------------------------------------------------------- /nn/autorec/setup_matmul.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Distutils import build_ext 4 | import numpy as np 5 | 6 | ext_module = Extension( 7 | "cython_matmul", 8 | ["cython_matmul.pyx"], 9 | extra_compile_args=['-fopenmp'], 10 | extra_link_args=['-fopenmp'], 11 | include_dirs=[np.get_include()] 12 | ) 13 | 14 | setup( 15 | name='cython helpers', 16 | cmdclass={'build_ext': build_ext}, 17 | ext_modules=[ext_module], 18 | include_dirs=[np.get_include()] 19 | ) 20 | -------------------------------------------------------------------------------- /nn/blocks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mesuvash/NNRec/31839522c9691d43d01987b5f67a7ed5ea5fcd81/nn/blocks/__init__.py -------------------------------------------------------------------------------- /nn/blocks/activations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from cython_activations import * 3 | 4 | 5 | class Activation(object): 6 | 7 | """docstring for Activation""" 8 | 9 | def activation(self, x): 10 | pass 11 | 12 | def derivative(self, x): 13 | pass 14 | 15 | def binarize(self, x): 16 | return x 17 | 18 | 19 | class Identity(Activation): 20 | 21 | """docstring for Identity""" 22 | 23 | def activation(self, x): 24 | return x 25 | 26 | def derivative(self, x): 27 | return 1 28 | 29 | 30 | class Sigmoid(Activation): 31 | 32 | """docstring for Sigmoid""" 33 | 34 | def activation(self, x): 35 | if len(x.shape) == 2: 36 | return cy_sigmoid(x) 37 | else: 38 | return cy_sigmoid1d(x) 39 | 40 | def derivative(self, x): 41 | return np.multiply(x, 1 - x) 42 | 43 | def binarize(self, x): 44 | return 1.0 * (x > 0.5) 45 | 46 | 47 | class RELU(Activation): 48 | 49 | """docstring for RELU""" 50 | 51 | def activation(self, x): 52 | return x * (x > 0) 53 | 54 | def derivative(self, x): 55 | return (x > 0) * 1 56 | 57 | 58 | class NRELU(Activation): 59 | 60 | """docstring for NRELU""" 61 | 62 | def activation(self, x): 63 | if len(x.shape) == 2: 64 | sigma = cy_sigmoid(x) 65 | else: 66 | sigma = cy_sigmoid1d(x) 67 | x += np.random.randn(x.shape[0], x.shape[1]) * np.sqrt(sigma) 68 | return x * (x > 0) 69 | 70 | def derivative(self, x): 71 | return (x > 0) * 1 72 | 73 | 74 | class Tanh(Activation): 75 | 76 | """docstring for RELU""" 77 | 78 | def activation(self, x): 79 | return np.tanh(x) 80 | 81 | def derivative(self, x): 82 | return (1 - np.power(x, 2)) 83 | 84 | def binarize(self, x): 85 | return 1.0 * (x > 0) 86 | -------------------------------------------------------------------------------- /nn/blocks/cython_activations.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | 5 | cdef extern from "math.h": 6 | double exp(double) 7 | 8 | cdef double f(double z): 9 | return exp(z) 10 | 11 | @cython.boundscheck(False) 12 | @cython.wraparound(False) 13 | @cython.nonecheck(False) 14 | def cy_sigmoid(np.ndarray[double, ndim=2] z): 15 | cdef unsigned int NX, NY, i, j 16 | cdef np.ndarray[double, ndim=2] sig 17 | 18 | NY, NX = np.shape(z) 19 | 20 | sig = np.zeros((NY,NX)) 21 | for i in xrange(NX): 22 | for j in xrange(NY): 23 | sig[j,i] = 1./(1. + exp(-z[j,i])) 24 | return sig 25 | 26 | @cython.boundscheck(False) 27 | @cython.wraparound(False) 28 | @cython.nonecheck(False) 29 | def cy_sigmoid1d(np.ndarray[double, ndim=1] z): 30 | cdef unsigned int NX, i 31 | cdef np.ndarray[double, ndim=1] sig 32 | 33 | NX = len(z) 34 | sig = np.zeros(NX) 35 | for i in xrange(NX): 36 | sig[i] = 1./(1. + exp(-z[i])) 37 | return sig 38 | 39 | cpdef cy_tanh(z): 40 | return np.tanh(z) 41 | 42 | cpdef cy_relu(z): 43 | z[z<0] = 0.0 44 | return z -------------------------------------------------------------------------------- /nn/blocks/networkConfigParser.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from nn import * 3 | from activations import * 4 | from exceptions import Exception 5 | from utils import * 6 | # from cfRBM import ModelArgs 7 | 8 | 9 | class NetworkConfigParser(object): 10 | 11 | @classmethod 12 | def getDataInfo(cls, path): 13 | with open(path) as fp: 14 | data = yaml.load(fp) 15 | data_info = data["data"] 16 | train_path = data_info["train"] 17 | test_path = data_info["test"] 18 | save_path = data_info["save"] 19 | return (train_path, test_path, save_path) 20 | 21 | @classmethod 22 | def constructModelArgs(cls, path, ModelArgs): 23 | kwargs = {} 24 | with open(path) as fp: 25 | data = yaml.load(fp) 26 | params = data["params"] 27 | if "reg_bias" in params: 28 | kwargs["regularize_bias"] = params["reg_bias"] 29 | if "momentum" in params: 30 | kwargs["momentum"] = params["momentum"] 31 | if "mean" in params: 32 | kwargs["mean"] = params["mean"] 33 | if "beta" in params: 34 | kwargs["beta"] = params["beta"] 35 | if "mean_normalization" in params: 36 | kwargs["mean"] = params["mean_normalization"] 37 | if "learn_rate" in params: 38 | kwargs["learn_rate"] = params["learn_rate"] 39 | if "num_threads" in params: 40 | kwargs["num_threads"] = params["num_threads"] 41 | kwargs["lamda"] = params["lamda"] 42 | kwargs["max_iter"] = params["max_iter"] 43 | if "optimizer" in params: 44 | kwargs["optimizer"] = params["optimizer"] 45 | if "batch_size" in params: 46 | kwargs["batch_size"] = params["batch_size"] 47 | args = ModelArgs(**kwargs) 48 | return args 49 | 50 | @classmethod 51 | def constructNetwork(cls, path): 52 | nn = NN() 53 | with open(path) as fp: 54 | data = yaml.load(fp) 55 | layers = data["layers"] 56 | layer_ids = layers.keys() 57 | layer_ids.sort() 58 | for layer_id in layer_ids: 59 | layer_info = layers[layer_id] 60 | layer = cls._constructLayer(layer_info) 61 | nn.addLayer(layer) 62 | nn.finalize() 63 | return nn 64 | 65 | @classmethod 66 | def _constructLayer(cls, layer_info): 67 | num_nodes = layer_info["num_nodes"] 68 | activation = layer_info["activation"].lower() 69 | 70 | if "partial" in layer_info: 71 | isPartial = layer_info["partial"] 72 | else: 73 | isPartial = False 74 | 75 | if "dropout" in layer_info: 76 | dropout = layer_info["dropout"] 77 | else: 78 | dropout = 0.0 79 | 80 | if "sparsity" in layer_info: 81 | sparsity = layer_info["sparsity"] 82 | else: 83 | sparsity = None 84 | 85 | if "binary" in layer_info: 86 | binary = layer_info["binary"] 87 | else: 88 | binary = False 89 | 90 | layer_type = layer_info["type"].lower() 91 | activation = cls._getActivation(activation) 92 | ltype = cls._getLayerType(layer_type) 93 | 94 | layer = Layer(num_nodes, activation, ltype) 95 | if isPartial: 96 | layer.setPartial() 97 | if dropout: 98 | layer.setDropout(dropout) 99 | if sparsity: 100 | layer.setSparsity(sparsity) 101 | if binary: 102 | layer.setBinary() 103 | return layer 104 | 105 | @classmethod 106 | def _getLayerType(cls, layer_type): 107 | if layer_type == "input": 108 | return LayerType.INPUT 109 | elif layer_type == "hidden": 110 | return LayerType.HIDDEN 111 | elif layer_type == "output": 112 | return LayerType.OUTPUT 113 | else: 114 | raise Exception("Unknown Layer Type") 115 | 116 | @classmethod 117 | def _getActivation(cls, activation): 118 | if activation == "sigmoid": 119 | return Sigmoid() 120 | elif activation == "identity": 121 | return Identity() 122 | elif activation == "relu": 123 | return RELU() 124 | elif activation == "nrelu": 125 | return NRELU() 126 | elif activation == "tanh": 127 | return Tanh() 128 | else: 129 | raise Exception("Unknown Activation Function") 130 | 131 | @classmethod 132 | def validateNetwork(cls, network, modelArgs): 133 | pass 134 | 135 | if __name__ == '__main__': 136 | # parser = NetworkConfigParser() 137 | data_info = NetworkConfigParser.getDataInfo("config/net.yaml") 138 | modelArgs = NetworkConfigParser.constructModelArgs("config/net.yaml") 139 | nn = NetworkConfigParser.constructNetwork("config/net.yaml") 140 | print nn, modelArgs, data_info 141 | -------------------------------------------------------------------------------- /nn/blocks/nn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from copy import deepcopy 3 | 4 | 5 | class LayerType(object): 6 | INPUT = 0 7 | HIDDEN = 1 8 | OUTPUT = 2 9 | 10 | 11 | class Layer: 12 | 13 | def __init__(self, num_units, activation, layerType, 14 | dropout=None, sparsity=None, partial=False, isBiasEnabled=True): 15 | self.num_units = num_units 16 | self.activation = activation 17 | self.mf = True 18 | self.dropout = dropout 19 | self.layerType = layerType 20 | self.sparsity = sparsity 21 | self.partial = partial 22 | self.isBiasEnabled = True 23 | self.binary = False 24 | self.setBias() 25 | 26 | def setBias(self): 27 | self.bias = np.random.randn(1, self.num_units) * 0.001 28 | 29 | def setSparsity(self, value): 30 | self.sparsity = value 31 | 32 | def isSparse(self): 33 | return ((self.sparsity is not None) and (self.sparsity != 1)) 34 | 35 | def setPartial(self): 36 | self.partial = True 37 | 38 | def isPartial(self): 39 | return self.partial 40 | 41 | def setBinary(self): 42 | self.binary = True 43 | 44 | def isBinary(self): 45 | return (self.binary == True) 46 | 47 | def setDropout(self, p): 48 | self.dropout = p 49 | 50 | def hasDropout(self): 51 | return ((self.dropout is not None) and (self.dropout != 0.0)) 52 | 53 | def hasBias(self): 54 | return (hasattr(self, "bias") and (self.bias is not None)) 55 | 56 | def removeBias(self): 57 | self.isBiasEnabled = False 58 | self.bias = np.zeros((1, self.num_units)) 59 | 60 | def unsetMeanField(self): 61 | self.mf = False 62 | 63 | def copy(self): 64 | return deepcopy(self) 65 | 66 | def __str__(self): 67 | 68 | layerinfo = "Number of Units = %d ; Layer type = %s\n" % (self.num_units, 69 | self.activation) 70 | 71 | drp = self.dropout if self.dropout else 0 72 | sps = self.sparsity if self.sparsity else 0 73 | 74 | additional_info = "Sparsity %f \t Dropout %f \t Partial %r " % ( 75 | sps, drp, self.partial) 76 | return layerinfo + additional_info 77 | 78 | 79 | class NN(object): 80 | 81 | def __init__(self): 82 | self.layers = [] 83 | self.weights = [] 84 | 85 | def _add_weights(self, n1, n2): 86 | w_vis2hid = 0.01 * np.random.randn(n1, n2) 87 | self.weights.append(w_vis2hid) 88 | 89 | def addLayer(self, layer1): 90 | self.layers.append(layer1) 91 | if (len(self.layers) > 1): 92 | self._add_weights( 93 | self.layers[-2].num_units, self.layers[-1].num_units) 94 | 95 | def getWeightByIndex(self, index): 96 | return self.weights[index] 97 | 98 | def setLimits(self): 99 | self.weights_limit = [0] 100 | self.bias_limit = [0] 101 | for l in range(len(self.layers) - 1): 102 | self.weights_limit.append( 103 | self.weights_limit[-1] + self.layers[l].num_units * 104 | self.layers[l + 1].num_units) 105 | 106 | self.bias_limit.append( 107 | self.bias_limit[-1] + self.layers[l + 1].num_units) 108 | 109 | def getFlattenParams(self): 110 | params = [] 111 | map(lambda x: params.append(x.flatten()), self.weights) 112 | map(lambda x: params.append(x.bias.flatten()), self.layers[1:]) 113 | return np.concatenate(params) 114 | 115 | def finalize(self): 116 | self.setLimits() 117 | 118 | def setDropout(self, layerIndex, dropout_prob): 119 | self.layers[layerIndex].setDropout(dropout_prob) 120 | self.weights[layerIndex] *= (1 / (1 - dropout_prob)) 121 | 122 | def __str__(self): 123 | representation = "" 124 | for i, l in enumerate(self.layers): 125 | representation += "Layer = %d ; " % i + str(l) + "\n" 126 | return representation 127 | -------------------------------------------------------------------------------- /nn/blocks/setup_activations.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Distutils import build_ext 4 | import numpy as np 5 | 6 | ext_module = Extension( 7 | "cython_activations", 8 | ["cython_activations.pyx"], 9 | extra_compile_args=['-fopenmp'], 10 | extra_link_args=['-fopenmp'], 11 | include_dirs=[np.get_include()] 12 | ) 13 | 14 | setup( 15 | name = 'activations', 16 | cmdclass = {'build_ext': build_ext}, 17 | ext_modules = [ext_module], 18 | include_dirs=[np.get_include()] 19 | ) 20 | -------------------------------------------------------------------------------- /nn/cfrbm/cfRBM.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from cython_rbm_matmul import cython_binarizeSparseMatrix, multiplyOuterSparseLayer 3 | from utils.metrics.evaluate import EvaluateRBM 4 | 5 | 6 | class ModelArgs(object): 7 | 8 | """docstring for ModelArgs""" 9 | 10 | def __init__(self, learn_rate=0.001, regularize_bias=True, 11 | momentum=0.6, lamda=0.001, CD=1, num_threads=10, max_iter=200, 12 | k=5, mapping=None, min_learn_rate=10e-6, 13 | batch_size=None): 14 | super(ModelArgs, self).__init__() 15 | self.learn_rate = learn_rate 16 | self.regularize_bias = regularize_bias 17 | self.CD = CD 18 | self.momentum = momentum 19 | self.lamda = lamda 20 | self.num_threads = num_threads 21 | self.max_iter = max_iter 22 | self.k = 5 23 | self.mapping = mapping 24 | self.min_learn_rate = min_learn_rate 25 | self.batch_size = batch_size 26 | 27 | def __str__(self): 28 | string = "" 29 | for key in self.__dict__.keys(): 30 | string += "%s: %s\t" % (key, str(self.__dict__[key])) 31 | return string 32 | 33 | 34 | def binarizeSparseMatrix(x, k, mapping): 35 | m, n = x.shape 36 | return cython_binarizeSparseMatrix(x.data, x.indices, x.indptr, 37 | m, n, k, mapping) 38 | 39 | 40 | class RBM(object): 41 | 42 | """docstring for RBM""" 43 | 44 | def __init__(self, nn, modelArgs, debug=True): 45 | super(RBM, self).__init__() 46 | self.nn = nn 47 | self.modelArgs = modelArgs 48 | self.debug = debug 49 | ratings_array = modelArgs.mapping.keys() 50 | ratings_array.sort() 51 | ratings_array = np.array(ratings_array) 52 | self.ratings_array = ratings_array.reshape((modelArgs.k, 1)) 53 | 54 | def getHiddenActivation(self, x): 55 | hidden = x * self.nn.weights[0] + self.nn.layers[1].bias 56 | hidden = self.nn.layers[1].activation.activation(hidden) 57 | if self.nn.layers[1].isBinary(): 58 | hidden = self.nn.layers[1].activation.binarize(hidden) 59 | return hidden 60 | 61 | def getVisibleActivation(self, x, target, ncpus=16): 62 | visible = multiplyOuterSparseLayer(x, self.nn.weights[0].T, 63 | self.nn.layers[0].bias, 64 | target.data, 65 | target.indices, 66 | target.indptr, 67 | ncpus) 68 | return self.nn.layers[0].activation.activation(visible) 69 | 70 | def __binary2Ratings(self, prediction): 71 | n = self.modelArgs.k 72 | m = int(len(prediction) / n) 73 | prediction = prediction.reshape(m, n) 74 | normalizer = prediction.sum(axis=1).reshape(m, 1) 75 | prediction = prediction / normalizer 76 | rating = np.dot(prediction, self.ratings_array) 77 | return np.ravel(rating) 78 | 79 | def predict(self, train, test, normalize=True): 80 | hidden = self.getHiddenActivation(train) 81 | visible = self.getVisibleActivation(hidden, test) 82 | # visible = np.exp(visible) 83 | if normalize: 84 | prediction = self.__binary2Ratings(visible) 85 | else: 86 | prediction = visible 87 | return prediction 88 | 89 | 90 | class RbmOptimizer(object): 91 | 92 | """docstring for RbmOptimizer""" 93 | 94 | def __init__(self, RBM): 95 | super(RbmOptimizer, self).__init__() 96 | self.RBM = RBM 97 | 98 | def train(self, train, test, rtest): 99 | self.nn = self.RBM.nn 100 | learn_rate = self.RBM.modelArgs.learn_rate 101 | max_iter = self.RBM.modelArgs.max_iter 102 | CD = self.RBM.modelArgs.CD 103 | lamda = self.RBM.modelArgs.lamda 104 | momentum = self.RBM.modelArgs.momentum 105 | min_learn_rate = self.RBM.modelArgs.min_learn_rate 106 | 107 | dW_old = np.zeros(self.nn.weights[0].shape) 108 | dv_old = np.zeros(self.nn.layers[0].bias.shape) 109 | dh_old = np.zeros(self.nn.layers[1].bias.shape) 110 | evaluate = EvaluateRBM(self.RBM) 111 | 112 | vispos = train 113 | visneg = train.copy() 114 | for i in range(max_iter): 115 | if i > 50: 116 | CD = 3 117 | momentum = 0.9 118 | 119 | hidpos = self.RBM.getHiddenActivation(vispos) 120 | hidneg = hidpos 121 | for j in range(CD): 122 | visneg_data = self.RBM.getVisibleActivation(hidneg, vispos) 123 | visneg.data = visneg_data 124 | hidneg = self.RBM.getHiddenActivation(visneg) 125 | 126 | dW = momentum * dW_old + learn_rate *\ 127 | ((vispos.T * hidpos) - 128 | (visneg.T * hidneg) - lamda * self.nn.weights[0]) 129 | dvbias = momentum * dv_old + 0.1 * learn_rate *\ 130 | ((vispos - visneg).sum(axis=0) - 131 | lamda * self.nn.layers[0].bias) 132 | dhbias = momentum * dh_old + learn_rate *\ 133 | ((hidpos - hidneg).sum(axis=0) - 134 | lamda * self.nn.layers[1].bias) 135 | 136 | dW_old = dW 137 | dv_old = dvbias 138 | dh_old = dhbias 139 | 140 | self.nn.weights[0] += dW 141 | self.nn.layers[0].bias += dvbias 142 | self.nn.layers[1].bias += dhbias 143 | if i % 5 == 0: 144 | learn_rate = max(learn_rate * 0.95, min_learn_rate) 145 | print evaluate.calculateRMSEandMAE(train, test, rtest) 146 | 147 | def minibatchTrain(self, train, test, rtest, batch_size): 148 | self.nn = self.RBM.nn 149 | slearn_rate = self.RBM.modelArgs.learn_rate 150 | max_iter = self.RBM.modelArgs.max_iter 151 | CD = self.RBM.modelArgs.CD 152 | lamda = self.RBM.modelArgs.lamda 153 | momentum = self.RBM.modelArgs.momentum 154 | min_learn_rate = self.RBM.modelArgs.min_learn_rate 155 | 156 | dW_old = np.zeros(self.nn.weights[0].shape) 157 | dv_old = np.zeros(self.nn.layers[0].bias.shape) 158 | dh_old = np.zeros(self.nn.layers[1].bias.shape) 159 | evaluate = EvaluateRBM(self.RBM) 160 | 161 | 162 | m, n = train.shape 163 | batches = range(0, m, batch_size) 164 | if batches[-1] != m: 165 | if (m - batches[-1]) < (batch_size / 2.0): 166 | batches[-1] = m 167 | else: 168 | batches.append(m) 169 | for i in range(max_iter): 170 | if i > 50: 171 | CD = 3 172 | momentum = 0.9 173 | for j in range(len(batches) - 1): 174 | start = batches[j] 175 | end = batches[j + 1] 176 | learn_rate = slearn_rate / (end - start) 177 | learn_rate = max(learn_rate, min_learn_rate) 178 | 179 | vispos = train[start:end, :] 180 | visneg = vispos.copy() 181 | hidpos = self.RBM.getHiddenActivation(vispos) 182 | hidneg = hidpos 183 | for k in range(CD): 184 | visneg_data = self.RBM.getVisibleActivation(hidneg, vispos) 185 | visneg.data = visneg_data 186 | hidneg = self.RBM.getHiddenActivation(visneg) 187 | 188 | dW = momentum * dW_old + learn_rate *\ 189 | ((vispos.T * hidpos) - 190 | (visneg.T * hidneg) - lamda * self.nn.weights[0]) 191 | dvbias = momentum * dv_old + learn_rate *\ 192 | ((vispos - visneg).sum(axis=0) - 193 | lamda * self.nn.layers[0].bias) 194 | dhbias = momentum * dh_old + 0.1 * learn_rate *\ 195 | ((hidpos - hidneg).sum(axis=0) - 196 | lamda * self.nn.layers[1].bias) 197 | 198 | dW_old = dW 199 | dv_old = dvbias 200 | dh_old = dhbias 201 | 202 | self.nn.weights[0] += dW 203 | self.nn.layers[0].bias += dvbias 204 | self.nn.layers[1].bias += dhbias 205 | if i % 5 == 0: 206 | slearn_rate *= 0.95 207 | print evaluate.calculateRMSEandMAE(train, test, rtest) 208 | 209 | def sgdTrain(self, train, test, rtest): 210 | self.nn = self.RBM.nn 211 | learn_rate = self.RBM.modelArgs.learn_rate 212 | max_iter = self.RBM.modelArgs.max_iter 213 | CD = self.RBM.modelArgs.CD 214 | lamda = self.RBM.modelArgs.lamda 215 | momentum = self.RBM.modelArgs.momentum 216 | 217 | dW_old = np.zeros(self.nn.weights[0].shape) 218 | dv_old = np.zeros(self.nn.layers[0].bias.shape) 219 | dh_old = np.zeros(self.nn.layers[1].bias.shape) 220 | evaluate = EvaluateRBM(self.RBM) 221 | # traindata = train.data 222 | # testdata = test.data 223 | 224 | m, n = train.shape 225 | for i in range(max_iter): 226 | if i > 50: 227 | CD = 3 228 | momentum = 0.9 229 | for j in range(m - 1): 230 | vispos = train.getrow(j) 231 | visneg = vispos.copy() 232 | hidpos = self.RBM.getHiddenActivation(vispos) 233 | hidneg = hidpos 234 | for k in range(CD): 235 | visneg_data = self.RBM.getVisibleActivation(hidneg, vispos) 236 | visneg.data = visneg_data 237 | hidneg = self.RBM.getHiddenActivation(visneg) 238 | 239 | dW = momentum * dW_old + learn_rate *\ 240 | ((vispos.T * hidpos) - 241 | (visneg.T * hidneg) - lamda * self.nn.weights[0]) 242 | dvbias = momentum * dv_old + learn_rate *\ 243 | ((vispos - visneg).sum(axis=0) - 244 | lamda * self.nn.layers[0].bias) 245 | dhbias = momentum * dh_old + 0.1 * learn_rate *\ 246 | ((hidpos - hidneg).sum(axis=0) - 247 | lamda * self.nn.layers[1].bias) 248 | 249 | dW_old = dW 250 | dv_old = dvbias 251 | dh_old = dhbias 252 | 253 | self.nn.weights[0] += dW 254 | self.nn.layers[0].bias += dvbias 255 | self.nn.layers[1].bias += dhbias 256 | if i % 5 == 0: 257 | slearn_rate *= 0.95 258 | print evaluate.calculateRMSEandMAE(train, test, rtest) 259 | -------------------------------------------------------------------------------- /nn/cfrbm/config/1M/1M.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | train: /data/ssedhain/datasets/movielens/1M/90folds/train.flipped.1 3 | test: /data/ssedhain/datasets/movielens/1M/90folds/test.flipped.1 4 | save: /data/ssedhain/datasets/movielens/1M/models/autoencoder/new/model.1 5 | params: 6 | reg_bias: True 7 | lamda: 0.1 8 | learn_rate: 0.1 9 | max_iter: 200 10 | batch_size: 500 11 | k : 5 12 | layers: 13 | 1: 14 | activation: sigmoid 15 | num_nodes: 30200 16 | partial: True 17 | type: input 18 | 2: 19 | activation: sigmoid 20 | num_nodes: 500 21 | partial: False 22 | type: hidden 23 | binary: True -------------------------------------------------------------------------------- /nn/cfrbm/config/1M/1MU.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | train: /Users/suvashsedhain/Desktop/90folds/train.%d 3 | test: /Users/suvashsedhain/Desktop/90folds/test.%d 4 | save: /Users/suvashsedhain/Desktop/90folds/model_noneighbor_%d_%.2f.%d 5 | params: 6 | reg_bias: True 7 | lamda: 1 8 | learn_rate: 0.1 9 | max_iter: 200 10 | batch_size: 10000 11 | layers: 12 | 1: 13 | activation: sigmoid 14 | num_nodes: 18530 15 | partial: True 16 | type: input 17 | 2: 18 | activation: identity 19 | num_nodes: 500 20 | partial: False 21 | type: hidden 22 | binary: True 23 | -------------------------------------------------------------------------------- /nn/cfrbm/cython_rbm_matmul.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse 3 | cimport numpy as np 4 | cimport cython 5 | from cython.parallel import prange 6 | 7 | 8 | @cython.boundscheck(False) 9 | @cython.wraparound(False) 10 | @cython.nonecheck(False) 11 | cpdef cython_binarizeSparseMatrix(double[:] data, int[:] ri, int[:] rptr, int m, int n, int k, mapping): 12 | 13 | cdef: 14 | np.ndarray[double, ndim = 1] V 15 | np.ndarray[int, ndim = 1] I, J 16 | int ctr, i, j, l, nratings, item 17 | double rating 18 | 19 | I = np.zeros(len(data) * k,dtype=np.int32) 20 | J = np.zeros(len(data) * k,dtype=np.int32) 21 | V = np.zeros(len(data) * k) 22 | nratings = len(mapping) 23 | ctr = 0 24 | for i in range(m): 25 | for j in range(rptr[i], rptr[i + 1]): 26 | item = ri[j] 27 | rating = mapping[data[j]] 28 | for l in range(nratings): 29 | I[ctr] = i 30 | J[ctr] = item * k + l 31 | if rating == l: 32 | V[ctr] = 1.0 33 | else: 34 | V[ctr] = 0.1 35 | ctr += 1 36 | R = scipy.sparse.coo_matrix((V, (I, J)), shape=(m, n * k)) 37 | R.data[R.data == 0.1] = 0.0 38 | return R.tocsr() 39 | 40 | @cython.boundscheck(False) 41 | @cython.wraparound(False) 42 | @cython.nonecheck(False) 43 | cpdef np.ndarray[double, ndim = 1] multiplyOuterSparseLayer(double[:, :] hiddenActivation, 44 | double[:, :] W2, 45 | double[:, :] vis_bias, 46 | double[:] data, 47 | int[:] indices, 48 | int[:] indptr, 49 | int num_threads): 50 | cdef: 51 | np.ndarray[double, ndim = 1] result 52 | int i, j, k, l, m, n, start, end 53 | double _buffer 54 | 55 | m = hiddenActivation.shape[0] 56 | n = hiddenActivation.shape[1] 57 | result = np.zeros(len(data)) 58 | for i in prange(m, nogil=True, num_threads=num_threads): 59 | start = indptr[i] 60 | end = indptr[i + 1] 61 | for j in range(indptr[i + 1] - indptr[i]): 62 | l = indices[start] 63 | _buffer = 0.0 64 | for k in range(n): 65 | _buffer = _buffer + hiddenActivation[i, k] * W2[k, l] 66 | result[start] = _buffer + vis_bias[0, l] 67 | start = start + 1 68 | return result 69 | 70 | 71 | -------------------------------------------------------------------------------- /nn/cfrbm/learner.py: -------------------------------------------------------------------------------- 1 | from cfRBM import * 2 | from dataUtils.data import loadTrainTest 3 | # from nn.blocks.nn import Layer, NN, LayerType 4 | from nn.blocks.activations import * 5 | from nn.blocks.networkConfigParser import NetworkConfigParser 6 | import yaml 7 | 8 | 9 | def train(config_path): 10 | configparser = NetworkConfigParser() 11 | nn = configparser.constructNetwork(config_path) 12 | modelArgs = configparser.constructModelArgs(config_path, ModelArgs) 13 | train_path, test_path, save_path = configparser.getDataInfo(config_path) 14 | print nn 15 | 16 | data = yaml.load(open(config_path)) 17 | params = data["params"] 18 | k = params["k"] 19 | 20 | n_vis = int(nn.layers[0].num_units / k) 21 | train, test, cold_ratings = loadTrainTest(train_path, test_path, 22 | shape=(None, n_vis)) 23 | 24 | min_rating, max_rating = train.data.min(), train.data.max() 25 | increment = 1 26 | mapping = dict(zip(np.arange(min_rating, max_rating + increment, 27 | increment), np.arange(k))) 28 | modelArgs.mapping = mapping 29 | modelArgs.k = k 30 | bintrain = binarizeSparseMatrix(train, k, mapping) 31 | bintest = binarizeSparseMatrix(test, k, mapping) 32 | del train 33 | model = RBM(nn, modelArgs) 34 | optimizer = RbmOptimizer(model) 35 | optimizer.minibatchTrain(bintrain, bintest, test, modelArgs.batch_size) 36 | 37 | 38 | if __name__ == '__main__': 39 | import argparse 40 | parser = argparse.ArgumentParser(description='Description') 41 | parser.add_argument( 42 | '--config', '-c', help='configuration file', required=True) 43 | args = parser.parse_args() 44 | config_path = args.config 45 | train(config_path) 46 | -------------------------------------------------------------------------------- /nn/cfrbm/setup_rbm_matmul.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Distutils import build_ext 4 | import numpy as np 5 | 6 | ext_module = Extension( 7 | "cython_rbm_matmul", 8 | ["cython_rbm_matmul.pyx"], 9 | extra_compile_args=['-fopenmp'], 10 | extra_link_args=['-fopenmp'], 11 | include_dirs=[np.get_include()] 12 | ) 13 | 14 | setup( 15 | name='cython helpers', 16 | cmdclass={'build_ext': build_ext}, 17 | ext_modules=[ext_module], 18 | include_dirs=[np.get_include()] 19 | ) 20 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mesuvash/NNRec/31839522c9691d43d01987b5f67a7ed5ea5fcd81/utils/__init__.py -------------------------------------------------------------------------------- /utils/datetimeUtils.py: -------------------------------------------------------------------------------- 1 | import ciso8601 2 | import datetime 3 | import envoy 4 | from datetime import timedelta 5 | 6 | 7 | def parseDateTime(datetimestring): 8 | return ciso8601.parse_datetime(datetimestring) 9 | 10 | 11 | def getDaysSinceX(inputdata, reference=None): 12 | if reference is None: 13 | reference = datetime.datetime.now() 14 | input_time = parseDateTime(inputdata) 15 | return (reference - input_time).total_seconds() / (86400.0) 16 | 17 | 18 | def testStartDate(path, days=1): 19 | r = envoy.run("tail -1 {}".format(path)) 20 | date = r.std_out.partition(" ")[0].strip().split("\t")[-1] 21 | purchased_time = parseDateTime(date) 22 | return purchased_time + timedelta(days=days) 23 | 24 | 25 | def getepochs(datetimestring): 26 | dt = parseDateTime(datetimestring) 27 | return int(dt.strftime("%s")) 28 | -------------------------------------------------------------------------------- /utils/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mesuvash/NNRec/31839522c9691d43d01987b5f67a7ed5ea5fcd81/utils/metrics/__init__.py -------------------------------------------------------------------------------- /utils/metrics/evaluate.py: -------------------------------------------------------------------------------- 1 | 2 | from math import fabs, sqrt 3 | import numpy as np 4 | import scipy.sparse 5 | 6 | 7 | class Evaluate(object): 8 | 9 | """docstring for Evaluate""" 10 | 11 | def __init__(self, predictor): 12 | super(Evaluate, self).__init__() 13 | self.predictor = predictor 14 | 15 | def calculateRMSEandMAE(self, test): 16 | pass 17 | 18 | 19 | class EvaluateConstant(Evaluate): 20 | 21 | def __init__(self, predictor): 22 | super(Evaluate, self).__init__() 23 | self.predictor = predictor 24 | 25 | def calculateRMSEandMAE(self, test, ): 26 | rmse = 0.0 27 | mae = 0.0 28 | count = 0 29 | for user in test: 30 | ratings = test[user]["ratings"] 31 | for actual in ratings: 32 | predicted = self.predictor.predict(user, 1) 33 | rmse += (actual - predicted) ** 2 34 | mae += fabs(actual - predicted) 35 | count += 1 36 | return [sqrt(rmse / count), mae / count] 37 | 38 | 39 | class EvaluateNN(Evaluate): 40 | 41 | """docstring for EvaluateRBM""" 42 | 43 | def __init__(self, predictor, scale=1.0, default=3.0): 44 | super(EvaluateNN, self).__init__(predictor) 45 | self.scale = scale 46 | self.default = default 47 | 48 | def calculateRMSEandMAE(self, train, test, cold=None): 49 | predictions = self.predictor.predict(train, test) 50 | if scipy.sparse.isspmatrix(train): 51 | predictions.data = predictions.data * self.scale 52 | err = np.fabs(predictions.data - test.data * self.scale) 53 | total_instances = len(test.data) 54 | else: 55 | err = np.fabs(predictions - test * self.scale) 56 | total_instances = test.size 57 | cold_err = [] 58 | if cold is not None: 59 | cold_err = map(lambda x: np.fabs(x - self.default), cold) 60 | total_instances += len(cold) 61 | cold_err = np.array(cold_err) 62 | 63 | rmse = np.sqrt( 64 | (np.power(err, 2).sum() + np.power(cold_err, 2).sum()) / (total_instances)) 65 | mae = (err.sum() + cold_err.sum()) / total_instances 66 | return [rmse, mae] 67 | 68 | 69 | class EvaluateRBM(EvaluateNN): 70 | 71 | """docstring for EvaluateRBM""" 72 | 73 | def __init__(self, predictor, scale=1.0, default=3.0): 74 | super(EvaluateRBM, self).__init__(predictor, scale, default) 75 | 76 | def calculateRMSEandMAE(self, btrain, btest, test, 77 | cold_ratings=None, default_rating=3.0): 78 | predictions = self.predictor.predict(btrain, btest) 79 | if scipy.sparse.isspmatrix(btrain): 80 | predictions = predictions * self.scale 81 | err = np.fabs(predictions - test.data) 82 | total_instances = len(test.data) 83 | else: 84 | err = np.fabs(predictions - test) 85 | total_instances = test.size 86 | cold_err = [] 87 | if cold_ratings: 88 | for rating in cold_ratings: 89 | cold_err.append(np.fabs(rating - default_rating)) 90 | total_instances += len(cold_err) 91 | cold_err = np.array(cold_err) 92 | # print(np.power(err, 2).sum() + np.power(cold_err, 2).sum()) 93 | rmse = np.sqrt((np.power(err, 2).sum() + 94 | np.power(cold_err, 2).sum()) / total_instances) 95 | mae = (err.sum() + cold_err.sum()) / total_instances 96 | return [rmse, mae] 97 | -------------------------------------------------------------------------------- /utils/statUtil.py: -------------------------------------------------------------------------------- 1 | from scipy import stats 2 | import math 3 | 4 | 5 | def getConfidenceInterval(data, percent, distribution="t"): 6 | n, min_max, mean, var, skew, kurt = stats.describe(data) 7 | std = math.sqrt(var) 8 | if distribution == "t": 9 | R = stats.t.interval( 10 | percent, len(data) - 1, loc=mean, scale=std / math.sqrt(len(data))) 11 | else: 12 | R = stats.norm.interval( 13 | percent, loc=mean, scale=std / math.sqrt(len(data))) 14 | return mean, R 15 | 16 | 17 | def getMeanCI(data, percent, distribution="t"): 18 | mean, errors = getConfidenceInterval(data, percent) 19 | return mean, (errors[1] - errors[0]) / 2.0 20 | 21 | if __name__ == '__main__': 22 | import numpy as np 23 | s = np.array([3, 4, 4, 4, 5, 5, 5, 5, 4, 4, 4, 6]) 24 | print getConfidenceInterval(s, 0.95) 25 | --------------------------------------------------------------------------------