├── .gitattributes
├── .gitignore
├── examples
    ├── bitstringcompression.py
    ├── digits.py
    └── dimensionality_reduction.py
├── genetic_perceptrons
    ├── ogc.py
    └── readme.md
├── rbr_experiments
    ├── rbro.py
    └── readme.md
├── readme.md
└── rftrl.py


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Windows image file caches
 2 | Thumbs.db
 3 | ehthumbs.db
 4 | 
 5 | # Folder config file
 6 | Desktop.ini
 7 | 
 8 | # Recycle Bin used on file shares
 9 | $RECYCLE.BIN/
10 | 
11 | # Windows Installer files
12 | *.cab
13 | *.msi
14 | *.msm
15 | *.msp
16 | 
17 | # Windows shortcuts
18 | *.lnk
19 | 
20 | # =========================
21 | # Operating System Files
22 | # =========================
23 | 
24 | # OSX
25 | # =========================
26 | 
27 | .DS_Store
28 | .AppleDouble
29 | .LSOverride
30 | 
31 | # Thumbnails
32 | ._*
33 | 
34 | # Files that might appear on external disk
35 | .Spotlight-V100
36 | .Trashes
37 | 
38 | # Directories potentially created on remote AFP share
39 | .AppleDB
40 | .AppleDesktop
41 | Network Trash Folder
42 | Temporary Items
43 | .apdisk
44 | 


--------------------------------------------------------------------------------
/examples/bitstringcompression.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Author: Triskelion, HJ van Veen, info@mlwave.com
 3 | 
 4 |   Description:
 5 |   
 6 |     Here will be the experiments with compression of the bitstring.
 7 | 	
 8 | 	We compress the bitstring by chopping it up into n equally sized bit chunks.
 9 | 	
10 | 	We then take the sum of the bit chunk, and look if that is over half the size of the bit chunk.
11 | 	 
12 | 	Example:
13 | 	
14 | 	original bitstring of size 10: 
15 | 	
16 | 	"1110101000"
17 | 	
18 | 	chop into 2 chunks of length 5.
19 | 	
20 | 	11101 = 4 = larger than 2.5 = 1
21 | 	01000 = 1 = smaller than 2.5 = 0
22 | 	
23 | 	output: 
24 | 	
25 | 	"10"
26 | 	
27 | 	Reference/Inspiration:
28 | 	
29 | 	Similarity Estimation Techniques From Rounding Algorithms, Moses Charikar
30 | 	http://www.cs.princeton.edu/courses/archive/spring04/cos598B/bib/CharikarEstim.pdf
31 | 	
32 |   Todo:
33 |     
34 | 	Clean up code.
35 | 	Make much faster.
36 | 	Acquire probabilistic pseudo-random supercomputer
37 | """


--------------------------------------------------------------------------------
/examples/digits.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Author: Triskelion, HJ van Veen, info@mlwave.com
 3 |   
 4 |   Description:  
 5 |   
 6 |   Creates 3 classifiers. 
 7 |   Experiments with ensembling their predictions, and studies variance.
 8 |   Uses digits dataset (the "0"'s and "1"'s)
 9 | 				
10 |   Seems that: 
11 |   
12 |   One 'overfitted' classifier can improve the ensemble.
13 |   Random_state change shows more difference than Random Forest.
14 |   Averaging 3 classifiers with different random state increases robustness.
15 |   We can approach SVM accuracy.
16 |   Weighing the predictions by the 3 classifier's progressive validation loss can be better than unweighted average.
17 |   Very similar to Vowpal Wabbit's -q and --cubic.
18 |   
19 | """
20 | from sklearn.datasets import load_digits
21 | import rftrl
22 | 
23 | def logloss(act,pred):
24 |   predicted = max(min(pred, 1. - 10e-15), 10e-15)
25 |   return -log(predicted) if act == 1. else -log(1. - predicted)
26 |     
27 | if __name__ == "__main__":      
28 |   X_train, y = load_digits().data, load_digits().target
29 |         
30 |   clf = rftrl.RandomLeaderClassifier(nr_projections=500, random_state=36, l2=1., size_projections=1, verbose=1)
31 |   clf2 = rftrl.RandomLeaderClassifier(nr_projections=100000, random_state=37, l2=1., size_projections=3, verbose=1)
32 |   clf3 = rftrl.RandomLeaderClassifier(nr_projections=1000, random_state=38, l2=1., size_projections=2, verbose=1)
33 | 
34 |   clf.project(X_train)
35 |   clf2.project(X_train)
36 |   clf3.project(X_train)
37 | 
38 |   loss = 0
39 |   loss2 = 0
40 |   loss3 = 0
41 |   loss_ensemble = 0
42 |   loss_ensemble_ranked = 0
43 |   count = 0
44 |   for e, (x,y) in enumerate(zip(X_train,y)):
45 |     if y == 0 or y == 1: # make a binary problem
46 |       count += 1.
47 |       
48 |       clf.fit(x,e,y)
49 |       pred = clf.predict()
50 |       loss += clf.logloss()
51 |       clf.update(pred)
52 |       
53 |       clf2.fit(x,e,y)
54 |       pred2 = clf2.predict()
55 |       loss2 += clf2.logloss()
56 |       clf2.update(pred2)
57 |       
58 |       clf3.fit(x,e,y)
59 |       pred3 = clf3.predict()
60 |       loss3 += clf3.logloss()
61 |       clf3.update(pred3)
62 |       
63 |       leaders = sorted([(loss/count,pred), (loss2/count,pred2),  (loss3/count,pred3)])
64 |       loss_ensemble_ranked += logloss(y,((leaders[0][1]*3)+(leaders[1][1]*2)+(leaders[2][1]*1))/6.)
65 |       loss_ensemble += logloss(y,(pred+pred2+pred3)/3.)
66 | 
67 |       print("%f\t%s\t%f\t%f\t%f\t\t%f\t%f"%(pred, y, loss/count, loss2/count,  loss3/count, loss_ensemble/count, loss_ensemble_ranked/count))


--------------------------------------------------------------------------------
/examples/dimensionality_reduction.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   Author: Triskelion, HJ van Veen, info@mlwave.com
 3 | 
 4 |   Description:
 5 |   
 6 |   Dimensionality Reduction
 7 |   
 8 |   RBR can be used for dimensionality reduction.
 9 |   
10 |   Either by trial or error, or by saving only the best (most informative/highest weighted) bits, 
11 |   you can create short bitstrings that reduce dimensionality.
12 |   
13 |   Todo:
14 |     Clean code
15 | """


--------------------------------------------------------------------------------
/genetic_perceptrons/ogc.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 	Online Genetic Classifier
  3 | 	
  4 | 	Totally experimental code/proof-of-concept.
  5 | """
  6 | from __future__ import division
  7 | from collections import defaultdict
  8 | import sys
  9 | import random
 10 | from math import exp, log
 11 | 
 12 | class GeneticClassifier(object):
 13 | 	def __init__(self, verbose=2, loc_python="python", get_train_data_function="", get_test_data_function="", loss_function="log_loss", custom_loss_function="", random_state=42):
 14 | 		self.loc_python = loc_python
 15 | 		self.verbose = verbose
 16 | 		self.get_train_data_function = get_train_data_function
 17 | 		self.get_test_data_function = get_test_data_function
 18 | 		random.seed(random_state)
 19 | 		if len(custom_loss_function) > 0:		
 20 | 			self.loss = custom_loss_function
 21 | 		else:
 22 | 			if loss_function == "log_loss":
 23 | 				self.loss = self.log_loss
 24 | 			elif loss_function == "mse":
 25 | 				self.loss = self.mse
 26 | 			else:
 27 | 				sys.exit("invalid loss function specified. Pick any of ['log_loss', 'mse']")
 28 | 		self.minmax = defaultdict(lambda: defaultdict(float))
 29 | 		
 30 | 	def __repr__(self):
 31 | 		return "GeneticClassifier()"
 32 | 	
 33 | 	def log_loss(self,y,p):
 34 | 		p = max(min(p, 1. - 10e-15), 10e-15)
 35 | 		return -log(p) if y == 1. else -log(1. - p)
 36 | 		
 37 | 	def mse(self,y_real,y_pred):
 38 | 		print "ddd"
 39 | 	
 40 | 	def random_perceptron(self, size=3):
 41 | 		perceptron = []
 42 | 		for feature_index in random.sample(self.minmax.keys(),size):
 43 | 			perceptron.append((random.uniform(-1,1), feature_index))
 44 | 		return perceptron
 45 | 
 46 | 	def calculate_perceptron(self, x, perceptron):
 47 | 		#print sum([random_weight*x[feature_index] for random_weight, feature_index in perceptron[:-1]]), perceptron[-1], "drek"
 48 | 		return sum([random_weight*x[feature_index][1] for random_weight, feature_index in perceptron[:-1]])
 49 | 			
 50 | 	def data_gen(self, data_generator):
 51 | 		return data_generator
 52 | 	
 53 | 	def bounded_sigmoid(self, wTx):
 54 | 		return 1. / (1. + exp(-max(min(wTx, 35.), -35.)))	
 55 | 	
 56 | 	def fit(self, data_generator,data_generator2,data_generator3):
 57 | 		# Calculate min and max for every column
 58 | 		k = self.data_gen(data_generator)
 59 | 		
 60 | 		if self.verbose > 0:
 61 | 			print("calculating min and max for every feature_index")
 62 | 		for i, (x, y) in enumerate(k):
 63 | 			for feature_index, feature_val in x:
 64 | 				if i == 0:
 65 | 					self.minmax[feature_index]["min"] = feature_val
 66 | 					self.minmax[feature_index]["max"] = feature_val
 67 | 				else:
 68 | 					if feature_val < self.minmax[feature_index]["min"]:
 69 | 						self.minmax[feature_index]["min"] = feature_val
 70 | 					if feature_val > self.minmax[feature_index]["max"]:
 71 | 						self.minmax[feature_index]["max"] = feature_val
 72 | 			if self.verbose > 0:
 73 | 				if i % 1000 == 0:
 74 | 					print(i)
 75 | 		#print self.minmax
 76 | 
 77 | 		# generate n random perceptrons with random threshold between min,max.
 78 | 		perceptrons = []
 79 | 		for i in range(5000):
 80 | 			perceptrons.append(self.random_perceptron())
 81 | 
 82 | 		#k = data_generator	
 83 | 		# calculate fitness of generation
 84 | 		fitness = defaultdict(list)
 85 | 		fitness = defaultdict(lambda: defaultdict(int))
 86 | 		fitness = defaultdict(float)
 87 | 		#k = self.data_gen(data_generator2)
 88 | 		for i, (x, y) in enumerate(data_generator2):
 89 | 			#print "kek"
 90 | 			for perceptron_id, perceptron in enumerate(perceptrons):
 91 | 				#print perceptron
 92 | 				#print self.calculate_perceptron(x, perceptron)
 93 | 				#fitness[perceptron_id].append(self.calculate_perceptron(x, perceptron))
 94 | 				#print self.bounded_sigmoid(self.calculate_perceptron(x, perceptron)), perceptron
 95 | 				fitness[perceptron_id] += self.log_loss( y, self.bounded_sigmoid(self.calculate_perceptron(x, perceptron)) )
 96 | 		#print fitness
 97 | 		"""
 98 | 		fitness_keys = fitness.keys()
 99 | 		for k in fitness_keys:
100 | 			#print k, fitness[k], perceptrons[k]
101 | 			total = sum(fitness[k].values())
102 | 			for label in fitness[k]:
103 | 				fitness[k][label] = fitness[k][label] / total
104 | 		for k in fitness_keys:
105 | 			print k, fitness[k]
106 | 		"""
107 | 		fittest = []
108 | 		for f in sorted(fitness, key=fitness.get)[:3]:
109 | 			#print f, fitness[f], fitness[f] / i, perceptrons[f]
110 | 			fittest.append(perceptrons[f])
111 | 		kk = []	
112 | 		for i, (x, y) in enumerate(data_generator3):
113 | 			pred = []
114 | 			for perceptron_id, perceptron in enumerate(fittest):
115 | 				pred.append(self.bounded_sigmoid(self.calculate_perceptron(x, perceptron)))
116 | 			#print y, sum(pred) / len(pred)
117 | 			kk.append((sum(pred) / len(pred), y))
118 | 		from sklearn.metrics import roc_auc_score
119 | 		preds = []
120 | 		y_real = []
121 | 		for k in sorted(kk):
122 | 			preds.append(k[0])
123 | 			y_real.append(k[1])
124 | 			print k
125 | 		print roc_auc_score(y_real, preds)
126 | 		print fittest	
127 | clf = GeneticClassifier()
128 | 
129 | from sklearn.datasets import load_boston, load_digits
130 | X, ys = load_boston().data, load_boston().target
131 | X, ys = load_digits().data, load_digits().target
132 | 
133 | X_bin = []
134 | y_bin = []
135 | for x, y in zip(X,ys):
136 | 	if y == 1 or y == 0:
137 | 		X_bin.append(list(x))
138 | 		y_bin.append(int(y))
139 | 
140 | def get_data(X,ys):
141 | 	for x, y in zip(X, ys):
142 | 		yield [(e,f) for e,f in enumerate(x)], y
143 | 
144 | clf.fit(get_data(X_bin[:200],y_bin[:200]),get_data(X_bin[:200],y_bin[:200]),get_data(X_bin[200:],y_bin[200:]))


--------------------------------------------------------------------------------
/genetic_perceptrons/readme.md:
--------------------------------------------------------------------------------
  1 | # Genetic Classifier Experiment
  2 | 
  3 | Out-of-memory Genetic Programming experiments. 
  4 | 
  5 | ## Data
  6 | 
  7 | Digit dataset turned into binary classification problem (all "1"'s and "0"'s).
  8 | 
  9 | We use 200 samples for training, rest for testing.
 10 | 
 11 | ## Epoch 1 (generate population)
 12 | 
 13 | Creates 5000 random perceptrons. Weights are random.uniform(-1,1). Variables are a random subset of size 3.
 14 | 
 15 | ## Epoch 2 (selecting)
 16 | 
 17 | Do a bounded sigmoid on the perceptron dotproduct. Compute logistic loss for every perceptron.
 18 | 
 19 | ## Epoch 3 (validation)
 20 | 
 21 | Pick top n (in our case just top 3) perceptrons with lowest loss. Calculate AUC score on their average.
 22 | 
 23 | ## Todo
 24 | 
 25 | Mutation (perturb the weights)
 26 | Cross-breeding
 27 | Stacked Generalization
 28 | Refactor code
 29 | 
 30 | ## Console
 31 | ```
 32 | (0.014806130485203578, 0)
 33 | (0.015559958197744028, 0)
 34 | (0.01587381161380923, 0)
 35 | (0.0169735033705027, 0)
 36 | (0.01717849773878812, 0)
 37 | (0.019452516605602196, 0)
 38 | (0.019935589575795432, 0)
 39 | (0.021776800144591064, 0)
 40 | (0.022564160497505852, 0)
 41 | (0.024220038565533175, 0)
 42 | (0.025746555210662218, 0)
 43 | (0.02605650144187907, 0)
 44 | (0.026810217179488924, 0)
 45 | (0.02713099408186445, 0)
 46 | (0.02723356433475863, 0)
 47 | (0.02936591408356302, 0)
 48 | (0.029978667260524033, 0)
 49 | (0.030700550772331376, 0)
 50 | (0.030844433504879143, 0)
 51 | (0.031053159900509414, 0)
 52 | (0.03123725078013455, 0)
 53 | (0.03297678325368353, 0)
 54 | (0.03332912169404179, 0)
 55 | (0.033473004426589556, 0)
 56 | (0.033473004426589556, 0)
 57 | (0.03433782823002379, 0)
 58 | (0.03542307913509152, 0)
 59 | (0.036705456537757065, 0)
 60 | (0.037034271782880074, 0)
 61 | (0.037623925905289714, 0)
 62 | (0.038775641856037484, 0)
 63 | (0.039960926109952756, 0)
 64 | (0.04044573790490704, 0)
 65 | (0.04130375894356001, 0)
 66 | (0.043281184380554755, 0)
 67 | (0.04328179360110455, 0)
 68 | (0.04538484473375132, 0)
 69 | (0.046783753657153086, 0)
 70 | (0.04844416778234802, 0)
 71 | (0.048755056454485746, 0)
 72 | (0.050116700140464994, 0)
 73 | (0.05239087672961318, 0)
 74 | (0.05316126249745066, 0)
 75 | (0.053315231941341364, 0)
 76 | (0.0546916896005725, 0)
 77 | (0.05472269649917016, 0)
 78 | (0.05554640940178249, 0)
 79 | (0.055913721143210526, 0)
 80 | (0.055913721143210526, 0)
 81 | (0.055913721143210526, 0)
 82 | (0.05679474073321927, 0)
 83 | (0.05841107658258162, 0)
 84 | (0.05912213847818546, 0)
 85 | (0.06219733610199696, 0)
 86 | (0.06301575070520711, 0)
 87 | (0.06301575070520711, 0)
 88 | (0.06518628703287722, 0)
 89 | (0.0662417195310765, 0)
 90 | (0.06791626882271846, 0)
 91 | (0.06834706228267, 0)
 92 | (0.06872344287821336, 0)
 93 | (0.07074219992113444, 0)
 94 | (0.07185115914143808, 0)
 95 | (0.07330218790504078, 0)
 96 | (0.07385290196880523, 0)
 97 | (0.07565544253872046, 0)
 98 | (0.07618661179570192, 0)
 99 | (0.08020173209230932, 0)
100 | (0.08020173209230932, 0)
101 | (0.09218162661712481, 0)
102 | (0.09480643413693064, 0)
103 | (0.10213356194781036, 0)
104 | (0.1060934551977365, 0)
105 | (0.1132913629361838, 0)
106 | (0.11741878733773632, 0)
107 | (0.16301499791048507, 0)
108 | (0.17538128558051155, 0)
109 | (0.20671005402596973, 0)
110 | (0.2516665407893916, 1)
111 | (0.3661389534525756, 1)
112 | (0.4252099449008057, 0)
113 | (0.46393919484645285, 1)
114 | (0.46393919484645285, 1)
115 | (0.5010890570451947, 1)
116 | (0.5612109193335089, 1)
117 | (0.6331774086989662, 1)
118 | (0.7429332849557856, 1)
119 | (0.7628192573041437, 1)
120 | (0.7685918165525977, 1)
121 | (0.7685918165525977, 1)
122 | (0.8570344990295519, 1)
123 | (0.9417282789353449, 1)
124 | (0.9604228201148005, 1)
125 | (0.9676786026688059, 1)
126 | (0.9772060070555658, 1)
127 | (0.9791451785550085, 1)
128 | (0.9842293187007075, 1)
129 | (0.9901488715048538, 1)
130 | (0.990436153644518, 1)
131 | (0.9919238054978897, 1)
132 | (0.9928078540976585, 1)
133 | (0.9933478035260386, 1)
134 | (0.993918811471151, 1)
135 | (0.9949463438751233, 1)
136 | (0.9950517422780601, 1)
137 | (0.9953454365707204, 1)
138 | (0.9957547029560953, 1)
139 | (0.9966372191059513, 1)
140 | (0.9968856510709517, 1)
141 | (0.9968927400719431, 1)
142 | (0.9970405821756699, 1)
143 | (0.9974510717889961, 1)
144 | (0.9976801858304766, 1)
145 | (0.9979552393408241, 1)
146 | (0.9982154865259442, 1)
147 | (0.9982203594383284, 1)
148 | (0.9982840036989512, 1)
149 | (0.998419906033074, 1)
150 | (0.9984286897033879, 1)
151 | (0.9984939983442622, 1)
152 | (0.9985027501741893, 1)
153 | (0.998549118578187, 1)
154 | (0.9986863173788039, 1)
155 | (0.9986995495829957, 1)
156 | (0.9988363408173697, 1)
157 | (0.998854496324887, 1)
158 | (0.998955527610253, 1)
159 | (0.9991916934357409, 1)
160 | (0.99919397823552, 1)
161 | (0.99919397823552, 1)
162 | (0.9992342622785184, 1)
163 | (0.9992840756930583, 1)
164 | (0.9993487207606337, 1)
165 | (0.9993487207606337, 1)
166 | (0.9993665026371147, 1)
167 | (0.9993913467408749, 1)
168 | (0.999412694902663, 1)
169 | (0.9994539795827881, 1)
170 | (0.9994588524951725, 1)
171 | (0.9994663855013514, 1)
172 | (0.9995178058181322, 1)
173 | (0.9995572280200719, 1)
174 | (0.999596894990182, 1)
175 | (0.9996193026734028, 1)
176 | (0.9996382625515509, 1)
177 | (0.9996787062495565, 1)
178 | (0.999718214845872, 1)
179 | (0.9997201285040723, 1)
180 | (0.999781483433603, 1)
181 | (0.999781483433603, 1)
182 | (0.999783492129653, 1)
183 | (0.9997936397792117, 1)
184 | (0.9998102570392101, 1)
185 | (0.999817704188042, 1)
186 | (0.999817704188042, 1)
187 | (0.9998257655652, 1)
188 | (0.9998278518376006, 1)
189 | (0.9998278518376006, 1)
190 | (0.9998278518376006, 1)
191 | (0.9998301101066053, 1)
192 | 
193 | ROC_AUC_SCORE: 0.999687451164
194 | 
195 | Top 3 Perceptrons [(random_weight, feature_index)]
196 | [
197 | [(0.6149939955332868, 36), (-0.6191801712762446, 30), (-0.8061383715423533, 50)], 
198 | [(-0.42417643001229877, 29), (0.7619881468846776, 36), (-0.08738675196793921, 47)], 
199 | [(0.4814303936739788, 36), (-0.2050123438980298, 50), (0.36938855845436214, 44)]
200 | ]
201 | ```


--------------------------------------------------------------------------------
/rbr_experiments/rbro.py:
--------------------------------------------------------------------------------
  1 | # Coding up the algorithm from http://arxiv.org/abs/1501.02990 
  2 | # "Random Bits Regression: a Strong General Predictor for Big Data"
  3 | 
  4 | import random
  5 | from datetime import datetime
  6 | 
  7 | def create_var_subset(x,size=3):
  8 | 	# (1) Randomly select a small subset of variables, e.g. x1, x3, x6.
  9 | 	return random.sample([i for i in range(len(x))],min(size,len(x)))
 10 | 	
 11 | def assign_weights(var_subset):
 12 | 	# (2) Randomly assign weights to each selected variables. The weights 
 13 | 	# are sampled from standard normal distribution, for example, 
 14 | 	# w1, w3, w6~N(0,1)
 15 | 	return [(random.random(),i) for i in var_subset]
 16 | 	
 17 | def obtain_weighted_sum(x, weighted_var_subset):
 18 | 	# (3) Obtain the weighted sum for each sample, for example
 19 | 	# (w1*x1) + (w3*x3) + (w6*x6) = zi for the ith sample.
 20 | 	weighted_sum = 0
 21 | 	for w, i in weighted_var_subset:
 22 | 		weighted_sum += w * x[i]
 23 | 	return weighted_sum
 24 | 	
 25 | def pick_random_threshold(weighted_sums):
 26 | 	# (4) Randomly pick one zi from the n generated as the threshold T.
 27 | 	return random.choice(weighted_sums)
 28 | 	
 29 | def assign_bit(weighted_sum, threshold):
 30 | 	# (5) Assign bits values to fk according to the threshold T
 31 | 	# If zi >= T then 1 else 0
 32 | 	if weighted_sum >= threshold:
 33 | 		return 1
 34 | 	else:
 35 | 		return 0
 36 | 
 37 | def process(data, K=100, size=3):
 38 | 	# The process is repeated K times.
 39 | 	start = datetime.now()
 40 | 	data_bits = []
 41 | 	for k in range(K):
 42 | 		var_subset = create_var_subset(data[0],size=size) # 1
 43 | 		weighted_var_subset = assign_weights(var_subset) #2
 44 | 		weighted_sums = []
 45 | 		for x in data:
 46 | 			weighted_sums.append(obtain_weighted_sum(x, weighted_var_subset)) # 3
 47 | 			# The first feature is fixed to 1 to act as the interceptor. 
 48 | 			if k == 0:
 49 | 				data_bits.append([1])
 50 | 
 51 | 		random_threshold = pick_random_threshold(weighted_sums) # 4 (Try picking multiple thresholds or entropy)
 52 | 
 53 | 		for i, (x, data_bit) in enumerate(zip(data, data_bits)):
 54 | 			data_bit.append( assign_bit(obtain_weighted_sum(x, weighted_var_subset),random_threshold) ) # 5
 55 | 
 56 | 		if k % 1000 == 0:
 57 | 			print k, datetime.now() - start
 58 | 	return data_bits
 59 | 
 60 | random.seed(100)
 61 | 
 62 | from sklearn import datasets
 63 | data, y = datasets.load_digits().data, datasets.load_digits().target
 64 | data = [list(x) for x in data]
 65 | 
 66 | data_bits = process(data, 10000, 3) # We generate ~10^4-10^6 random binary intermediate features for each sample.
 67 | 	
 68 | from sklearn import linear_model, ensemble, svm, neighbors, cross_validation
 69 | import numpy as np
 70 | 
 71 | # Select predictive intermediate features by regularized linear/logistic regression.
 72 | 
 73 | # KNN Classifier without intermediate features
 74 | start = datetime.now()
 75 | clf = neighbors.KNeighborsClassifier()
 76 | scores = cross_validation.cross_val_score(clf, data, y,cv=20)
 77 | print clf, np.array(data).shape
 78 | print scores
 79 | print scores.mean()
 80 | print datetime.now() - start
 81 | print
 82 | 
 83 | # KNN Classifier with intermediate features
 84 | start = datetime.now()
 85 | scores = cross_validation.cross_val_score(clf, data_bits, y, cv=20)
 86 | print clf, np.array(data_bits).shape
 87 | print scores
 88 | print scores.mean()
 89 | print datetime.now() - start
 90 | print
 91 | 
 92 | # SGD Classifier without intermediate features
 93 | start = datetime.now()
 94 | clf = linear_model.SGDClassifier(loss="log", penalty="l2", n_iter=20, random_state=1, n_jobs=-1)
 95 | scores = cross_validation.cross_val_score(clf, data, y,cv=20)
 96 | print clf, np.array(data).shape
 97 | print scores
 98 | print scores.mean()
 99 | print datetime.now() - start
100 | print
101 | 
102 | # SGD Classifier with intermediate features
103 | start = datetime.now()
104 | scores = cross_validation.cross_val_score(clf, data_bits, y, cv=20)
105 | print clf, np.array(data_bits).shape
106 | print scores
107 | print scores.mean()
108 | print datetime.now() - start
109 | print
110 | 
111 | # Logistic Regression without intermediate features
112 | start = datetime.now()
113 | clf = linear_model.LogisticRegression()
114 | scores = cross_validation.cross_val_score(clf, data, y,cv=20)
115 | print clf, np.array(data).shape
116 | print scores
117 | print scores.mean()
118 | print datetime.now() - start
119 | print
120 | 
121 | # Logistic Regression with intermediate features
122 | start = datetime.now()
123 | scores = cross_validation.cross_val_score(clf, data_bits, y, cv=20)
124 | print clf, np.array(data_bits).shape
125 | print scores
126 | print scores.mean()
127 | print datetime.now() - start
128 | print
129 | 
130 | # Standard RF without features
131 | start = datetime.now()
132 | clf = ensemble.ExtraTreesClassifier(n_estimators=500,random_state=1,n_jobs=-1)
133 | scores = cross_validation.cross_val_score(clf, data, y, cv=20)
134 | print clf, np.array(data).shape
135 | print scores
136 | print scores.mean()
137 | print datetime.now() - start
138 | print
139 | 
140 | start = datetime.now()
141 | clf = ensemble.ExtraTreesClassifier(n_estimators=500,random_state=1,n_jobs=-1)
142 | scores = cross_validation.cross_val_score(clf, data_bits, y, cv=20)
143 | print clf, np.array(data_bits).shape
144 | print scores
145 | print scores.mean()
146 | print datetime.now() - start
147 | print
148 | 
149 | start = datetime.now()
150 | clf = ensemble.RandomForestClassifier(n_estimators=500,n_jobs=-1,random_state=1)
151 | scores = cross_validation.cross_val_score(clf, data, y, cv=20)
152 | print clf, np.array(data).shape
153 | print scores
154 | print scores.mean()
155 | print datetime.now() - start
156 | print
157 | 
158 | start = datetime.now()
159 | clf = ensemble.RandomForestClassifier(n_estimators=500,n_jobs=-1,random_state=1)
160 | scores = cross_validation.cross_val_score(clf, data_bits, y, cv=20)
161 | print clf, np.array(data_bits).shape
162 | print scores
163 | print scores.mean()
164 | print datetime.now() - start
165 | print
166 | 
167 | start = datetime.now()
168 | clf = svm.SVC(kernel="linear")
169 | scores = cross_validation.cross_val_score(clf, data, y, cv=20)
170 | print clf, np.array(data).shape
171 | print scores
172 | print scores.mean()
173 | print datetime.now() - start
174 | print
175 | 
176 | start = datetime.now()
177 | clf = svm.SVC(kernel="linear")
178 | scores = cross_validation.cross_val_score(clf, data_bits, y, cv=20)
179 | print clf, np.array(data_bits).shape
180 | print scores
181 | print scores.mean()
182 | print datetime.now() - start
183 | print


--------------------------------------------------------------------------------
/rbr_experiments/readme.md:
--------------------------------------------------------------------------------
  1 | # Experiment offline RBR on digits
  2 | We write the algorithm to be as close to the paper as possible. Then we use a toy dataset `digits` shaped (1797, 64) with 10 classes. We look at algorithm performance of using 10^4 intermediate features of subset size 3.
  3 | 
  4 | ## Results
  5 | 20-fold CV acc. | Vectors | Algo
  6 | --- | --- | ---
  7 | 0.981674170670 | **RBR** | **SVM**
  8 | 0.981230593359 | RAW | KNN
  9 | 0.978857040929 | RAW | ET
 10 | 0.974951330371 | RBR | **LOGREG**
 11 | 0.974470711080 | RBR | ET
 12 | 0.972307524295 | RBR | KNN
 13 | 0.971636906430 | RBR | **RF**
 14 | 0.971165709003 | RAW | SVM
 15 | 0.967125864925 | RAW | RF
 16 | 0.965967668687 | RBR | **SGD**
 17 | 0.946672851823 | RAW | LOGREG
 18 | 0.916611431522 | RAW | SGD
 19 | 
 20 | ## Prelim
 21 | RBR SVM took `0:04:24.457000` vs. RAW KNN `0:00:00.714000`. RBR improved SVM, Logreg, RF and SGD over using the RAW original features.
 22 | 
 23 | Logistic Regression took a long time with 10k RBR features. All-in-all RBR LOGREG could be a useful diverse addition to an ensemble.
 24 | 
 25 | ## Console
 26 | ```
 27 | 0 0:00:00.012000
 28 | 1000 0:00:08.505000
 29 | 2000 0:00:17.046000
 30 | 3000 0:00:25.596000
 31 | 4000 0:00:34.179000
 32 | 5000 0:00:42.764000
 33 | 6000 0:00:51.321000
 34 | 7000 0:00:59.911000
 35 | 8000 0:01:08.479000
 36 | 9000 0:01:17.042000
 37 | 
 38 | KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
 39 |            metric_params=None, n_neighbors=5, p=2, weights='uniform') (1797L, 64L)
 40 | [ 0.92631579  0.97849462  0.98901099  1.          0.98888889  0.97777778
 41 |   0.98888889  0.96666667  0.98888889  0.94444444  1.          0.98888889
 42 |   0.98888889  1.          0.98876404  0.98876404  1.          0.95454545
 43 |   0.97701149  0.98837209]
 44 | 0.981230593359
 45 | 0:00:00.714000
 46 | 
 47 | KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
 48 |            metric_params=None, n_neighbors=5, p=2, weights='uniform') (1797L, 10001L)
 49 | [ 0.91578947  0.97849462  0.98901099  0.98888889  0.98888889  0.97777778
 50 |   0.95555556  0.96666667  0.96666667  0.93333333  0.98888889  0.97777778
 51 |   0.97777778  0.98888889  1.          0.98876404  1.          0.90909091
 52 |   0.96551724  0.98837209]
 53 | 0.972307524295
 54 | 0:01:47.331000
 55 | 
 56 | SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
 57 |        eta0=0.0, fit_intercept=True, l1_ratio=0.15,
 58 |        learning_rate='optimal', loss='log', n_iter=20, n_jobs=-1,
 59 |        penalty='l2', power_t=0.5, random_state=1, shuffle=True, verbose=0,
 60 |        warm_start=False) (1797L, 64L)
 61 | [ 0.89473684  0.90322581  0.87912088  0.94444444  0.97777778  0.9
 62 |   0.88888889  0.91111111  0.87777778  0.9         0.97777778  0.98888889
 63 |   0.98888889  0.87777778  0.94382022  0.93258427  0.85393258  0.80681818
 64 |   0.91954023  0.96511628]
 65 | 0.916611431522
 66 | 0:00:03.182000
 67 | 
 68 | SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
 69 |        eta0=0.0, fit_intercept=True, l1_ratio=0.15,
 70 |        learning_rate='optimal', loss='log', n_iter=20, n_jobs=-1,
 71 |        penalty='l2', power_t=0.5, random_state=1, shuffle=True, verbose=0,
 72 |        warm_start=False) (1797L, 10001L)
 73 | [ 0.94736842  0.96774194  1.          0.98888889  0.98888889  0.94444444
 74 |   0.93333333  0.96666667  0.97777778  0.94444444  1.          0.97777778
 75 |   0.97777778  0.97777778  0.98876404  0.97752809  0.97752809  0.875
 76 |   0.94252874  0.96511628]
 77 | 0.965967668687
 78 | 0:01:13.966000
 79 | 
 80 | LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
 81 |           intercept_scaling=1, max_iter=100, multi_class='ovr',
 82 |           penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
 83 |           verbose=0) (1797L, 64L)
 84 | [ 0.89473684  0.93548387  0.95604396  0.96666667  0.95555556  0.94444444
 85 |   0.92222222  0.93333333  0.92222222  0.97777778  0.98888889  0.98888889
 86 |   0.98888889  0.95555556  0.97752809  0.96629213  0.91011236  0.81818182
 87 |   0.96551724  0.96511628]
 88 | 0.946672851823
 89 | 0:00:06.033000
 90 | 
 91 | LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
 92 |           intercept_scaling=1, max_iter=100, multi_class='ovr',
 93 |           penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
 94 |           verbose=0) (1797L, 10001L)
 95 | [ 0.94736842  0.96774194  1.          0.98888889  0.98888889  0.97777778
 96 |   0.94444444  0.97777778  0.97777778  0.96666667  0.98888889  0.98888889
 97 |   0.97777778  0.98888889  0.98876404  0.98876404  0.98876404  0.93181818
 98 |   0.95402299  0.96511628]
 99 | 0.974951330371
100 | 0:09:41.141000
101 | 
102 | ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
103 |            max_depth=None, max_features='auto', max_leaf_nodes=None,
104 |            min_samples_leaf=1, min_samples_split=2,
105 |            min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=-1,
106 |            oob_score=False, random_state=1, verbose=0, warm_start=False) (1797L, 64L)
107 | [ 0.93684211  0.97849462  1.          1.          0.98888889  0.97777778
108 |   0.97777778  0.96666667  0.97777778  0.97777778  0.98888889  0.98888889
109 |   0.97777778  1.          0.98876404  0.98876404  1.          0.94318182
110 |   0.97701149  0.94186047]
111 | 0.978857040929
112 | 0:00:16.637000
113 | 
114 | ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
115 |            max_depth=None, max_features='auto', max_leaf_nodes=None,
116 |            min_samples_leaf=1, min_samples_split=2,
117 |            min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=-1,
118 |            oob_score=False, random_state=1, verbose=0, warm_start=False) (1797L, 10001L)
119 | [ 0.91578947  0.96774194  1.          0.98888889  0.98888889  0.97777778
120 |   0.96666667  0.96666667  0.96666667  0.98888889  1.          1.
121 |   0.98888889  0.97777778  0.97752809  0.98876404  0.96629213  0.94318182
122 |   0.96551724  0.95348837]
123 | 0.97447071108
124 | 0:02:19.255000
125 | 
126 | RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
127 |             max_depth=None, max_features='auto', max_leaf_nodes=None,
128 |             min_samples_leaf=1, min_samples_split=2,
129 |             min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=-1,
130 |             oob_score=False, random_state=1, verbose=0, warm_start=False) (1797L, 64L)
131 | [ 0.93684211  0.96774194  0.98901099  0.97777778  0.98888889  0.97777778
132 |   0.95555556  0.95555556  0.96666667  0.96666667  0.98888889  0.98888889
133 |   0.96666667  0.96666667  0.97752809  0.98876404  0.96629213  0.92045455
134 |   0.95402299  0.94186047]
135 | 0.967125864925
136 | 0:00:18.448000
137 | 
138 | RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
139 |             max_depth=None, max_features='auto', max_leaf_nodes=None,
140 |             min_samples_leaf=1, min_samples_split=2,
141 |             min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=-1,
142 |             oob_score=False, random_state=1, verbose=0, warm_start=False) (1797L, 10001L)
143 | [ 0.92631579  0.95698925  1.          0.98888889  0.97777778  0.97777778
144 |   0.96666667  0.96666667  0.97777778  0.98888889  0.98888889  0.98888889
145 |   0.98888889  0.97777778  0.97752809  0.98876404  0.95505618  0.93181818
146 |   0.96551724  0.94186047]
147 | 0.97163690643
148 | 0:01:42.010000
149 | 
150 | SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
151 |   kernel='linear', max_iter=-1, probability=False, random_state=None,
152 |   shrinking=True, tol=0.001, verbose=False) (1797L, 64L)
153 | [ 0.91578947  0.98924731  0.98901099  1.          0.98888889  0.96666667
154 |   0.95555556  0.96666667  0.95555556  0.96666667  1.          0.97777778
155 |   0.95555556  0.96666667  0.96629213  1.          0.98876404  0.92045455
156 |   0.97701149  0.97674419]
157 | 0.971165709003
158 | 0:00:01.275000
159 | 
160 | SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
161 |   kernel='linear', max_iter=-1, probability=False, random_state=None,
162 |   shrinking=True, tol=0.001, verbose=False) (1797L, 10001L)
163 | [ 0.93684211  0.97849462  1.          1.          0.97777778  0.97777778
164 |   0.97777778  1.          0.97777778  0.97777778  1.          1.
165 |   0.98888889  0.98888889  1.          0.98876404  1.          0.92045455
166 |   0.96551724  0.97674419]
167 | 0.98167417067
168 | 0:04:24.457000
169 | ```


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # Randomly Follow the Regularized Leader
 2 | 
 3 | This is a class containing a binary classifier for online machine learning. It employs approaches based on Random Bits Regression and the FTRL-Proximal algorithm
 4 | 
 5 | ##### rftrl.**RandomLeaderClassifier**(alpha=0.1, beta=1., l1=0., l2=1., nr_projections=10000, max_projections=0, subsample_projections=1., size_projections=3, random_state=0, verbose=0)
 6 | 					
 7 | ## Parameters
 8 | 
 9 | Parameter | Description
10 | --- | ---
11 | alpha. | Float. Learning Rate. *Default = 0.1*
12 | beta. | Float. Smoothing parameter for adaptive learning rate. *Default = 1.*
13 | l1. | Float. L1 Regularization. *Default = 0.1*
14 | l2. | Float. L2 Regularization. *Default = 1.0*
15 | nr_projections. | Int. Number of random linear projections to create. *Default = 10000*
16 | max_projections. | Int. Not implemented.
17 | subsample_projections. | Float. Uses subsampling when making a first pass to create the random thresholds. This is more memory friendly for larger datasets. *Default = 1.*
18 | size_projections. | Int. Number of (feature_value * random_weight) to use in the random linear functions. *Default = 3*
19 | random_state. | Int. Seed for replication. *Default = 0*
20 | Verbose. | Int. Verbosity of classifier. *Default = 0*
21 | 
22 | ## Usage
23 | 
24 | ```python
25 | clf = rftrl.RandomLeaderClassifier(nr_projections=50000, random_state=1, size_projections=3)
26 |   
27 | # Project data
28 | clf.project(X_train)
29 |   
30 | # Train
31 | loss = 0
32 | for e, (x,y) in enumerate(zip(X_train,y)):
33 |   clf.fit(x,e,y)
34 |   pred = clf.predict()
35 |   loss += clf.logloss()
36 |   clf.update(pred)
37 |   
38 | # Test
39 | y = 1 # Dummy label
40 | for e, x in enumerate(X_test):
41 |   clf.fit(x,e,y)
42 |   pred = clf.predict()
43 |   print("%s,%s"%(e,pred))
44 | ```
45 | 
46 | ## References
47 | 
48 | > Random Bit Regression (RBR).  
49 | > Random Bits Regression: a Strong General Predictor for Big Data  
50 | > Yi Wang, Yi Li, Momiao Xiong, Li Jin
51 | 
52 | http://arxiv.org/abs/1501.02990
53 | 	
54 | > Follow the Regularized Leader (FTRL)  
55 | > Ad Click prediction: A view from the trenches.  
56 | > H. Brendan McMahan, Gary Holt, D. Sculley, Michael Young, Dietmar Ebner, Julian Grady, Lan Nie, Todd Phillips, Eugene Davydov, Daniel Golovin, Sharat Chikkerur, Dan Liu, Martin Wattenberg, Arnar Mar Hrafnkelsson, Tom Boulos, Jeremy Kubica.
57 | 
58 | https://research.google.com/pubs/archive/41159.pdf
59 | 
60 | > Tinrtgu's Beat the Benchmark online FTRL proximal script's  
61 | > Beat the benchmark with less then 200MB of memory.
62 | 
63 | https://www.kaggle.com/c/criteo-display-ad-challenge/forums/t/10322/beat-the-benchmark-with-less-then-200mb-of-memory/53737
64 | 
65 | https://www.kaggle.com/c/tradeshift-text-classification/forums/t/10537/beat-the-benchmark-with-less-than-400mb-of-memory/


--------------------------------------------------------------------------------
/rftrl.py:
--------------------------------------------------------------------------------
  1 | """ Author: Triskelion, HJ van Veen, info@mlwave.com
  2 | 
  3 |   This class implements a binary classifier for online learning, which is based on descriptions in the papers:
  4 |   
  5 |   Random Bit Regression (RBR).
  6 |     Random Bits Regression: a Strong General Predictor for Big Data
  7 |     Yi Wang, Yi Li, Momiao Xiong, Li Jin
  8 |     http://arxiv.org/abs/1501.02990
  9 |   
 10 |   Follow the Regularized Leader (FTRL)
 11 |     Ad Click prediction: A view from the trenches. 
 12 |     H. Brendan McMahan, Gary Holt, D. Sculley, Michael Young, Dietmar Ebner, Julian Grady, Lan Nie, Todd Phillips, 
 13 |     Eugene Davydov, Daniel Golovin, Sharat Chikkerur, Dan Liu, Martin Wattenberg, Arnar Mar Hrafnkelsson, Tom Boulos, 
 14 |     Jeremy Kubica.
 15 |     https://research.google.com/pubs/archive/41159.pdf
 16 |     
 17 |   Random Bit Regression
 18 |   
 19 |   RBR works well on dense tall datasets. The algorithm is most succinctly described in the paper:
 20 |   
 21 |   1. Randomly select a subset of variables, eg: f1, f2, f3
 22 |   2. Assign random weights uniformly drawn from between 0 and 1 for each variable in the subset. eg: w1 = 0.4532134
 23 |   3. Obtain the weighted sum (z). eg: z = (f1 * w1) + (f2 * w2) + (f3 * w3)
 24 |   4. Randomly pick one threshold (t_random) generated from all z's (Z). eg: t_random = 15.34245
 25 |   5. Vectorize samples with bits according to the formula: if z > t_random then 1 else 0.
 26 |   
 27 |   Basically we add the result of many random linear functions (perceptrons) as binarized features to a sample: Random Bit Vectorization.
 28 | 
 29 |   Follow the Regularized Leader
 30 |   
 31 |   We then use a logistic regression algorithm with L2 regularization to do conventional supervised learning on this bit representation.
 32 |   
 33 |   The online FTRL (oFTRL) code is credit to tinrtgu (https://www.kaggle.com/ggglhf) . This is a categorical classifier that was used for 
 34 |   "ad click prediction"-competitions on Kaggle. It used the hashing trick to one-hot encode all the features and supported both L1 and 
 35 |   L2 regularization. 
 36 |   
 37 |   Modifications
 38 |   
 39 |   RBR
 40 |   
 41 |   We modify (relax) step 4. from the Random Bit Regression Algorithm. We don't want to generate all the thresholds for the 
 42 |   entire dataset, simply to obtain a single random threshold. If we do all that, then we may as well pick thresholds so they 
 43 |   better divide the classes. A single pass over a dataset or batch is still needed to get a random threshold for every random 
 44 |   linear function. Heavy subsampling and a max Z-size ensures the generation of random thresholds without wasting too much time
 45 |   building the vectorizers. There are other paths to check out: completely random thresholds, prenormalizing or online normalization 
 46 |   of features, and "Don't do linear functions, but Euclidean distance to first n noise-distorted samples".
 47 |   
 48 |   oFTRL
 49 |   
 50 |   oFTRL was originally a purely categorical classifier. Through bit vectorizing the features with random linear functions it can now 
 51 |   handle features which were originally floats or numerical. Another benefit is the added boost for none-linearity in problems.
 52 |   
 53 |   As we always know the length of our binary representation (we are using this for dense tall datasets, not sparse datasets like text), 
 54 |   we do not need the hashing trick for now. We can simply sparse encode:
 55 |   
 56 |   "11101" becomes "1:1 2:1 3:1 5:1".
 57 | 
 58 |   We call this modified algorithm "Randomly Follow the Regularized Leader"
 59 | """
 60 | import numpy as np
 61 | from math import sqrt, exp, log
 62 | 
 63 | class RandomLeaderClassifier(object):
 64 |   def __init__(self, alpha=0.1, beta=1., l1=0., l2=1., nr_projections=10000, max_projections=0, 
 65 |           subsample_projections=1., size_projections=3, random_state=0,
 66 |           verbose=0):
 67 |     self.z = [0.] * (nr_projections+1)
 68 |     self.n = [0.] * (nr_projections+1)
 69 |     self.nr_projections = nr_projections
 70 |     self.alpha = alpha
 71 |     self.beta = beta
 72 |     self.l1 = l1
 73 |     self.l2 = l2
 74 |     self.size_projections = size_projections
 75 |     self.subsample_projections = subsample_projections
 76 |     self.max_projections = max_projections
 77 |     self.random_state = random_state
 78 |     self.verbose = verbose 
 79 |     self.w = {}
 80 |     self.X = []
 81 |     self.y = 0.
 82 |     self.random_thresholds = []
 83 |     self.random_indexes = []
 84 |     self.random_weights = []
 85 |     self.Prediction = 0.
 86 |   
 87 |   def sgn(self, x):
 88 |     if x < 0:
 89 |       return -1  
 90 |     else:
 91 |       return 1
 92 | 
 93 |   def project(self, X_train):
 94 |     if self.verbose > 0:
 95 |       print("Creating %s random projections on train set shaped %s"%(self.nr_projections,str(X_train.shape)))
 96 |       print("Using random seed %s"%(self.random_state))
 97 |     np.random.seed(self.random_state)
 98 |     self.random_indexes = np.random.randint(0, high=X_train.shape[1], size=(self.nr_projections, self.size_projections))
 99 |     self.random_weights = np.random.rand(self.nr_projections,self.size_projections)
100 |     for e, x in enumerate(X_train):
101 |       if e == 0:
102 |         thresholds = np.sum(x[self.random_indexes] * self.random_weights, axis=1).reshape((1,self.nr_projections))
103 |       else:
104 |         if np.random.random() < self.subsample_projections:
105 |           thresholds = np.r_[thresholds, np.sum(x[self.random_indexes] * self.random_weights, axis=1).reshape((1,self.nr_projections))]
106 |         if self.max_projections > 0 and thresholds.shape[0] >= self.max_projections:
107 |           if self.verbose > 0:
108 |             print("Halting.")
109 |           break
110 | 
111 |     random_thresholds = []
112 |     for column_id in range(self.nr_projections):
113 |       random_thresholds.append(thresholds[np.random.randint(0,high=thresholds.shape[0])][column_id])
114 |     self.random_thresholds = np.array(random_thresholds)
115 | 
116 |   
117 |   def fit(self,x,sample_id,label):
118 |     self.ID = sample_id
119 |     self.y = float(label)
120 |     
121 |     thresholds = np.sum(x[self.random_indexes] * self.random_weights, axis=1).reshape((1,self.nr_projections))
122 |     bools = thresholds > self.random_thresholds
123 |     
124 |     self.X = [e+1 for e, f in enumerate(list(bools.astype(int)[0])) if f == 1 ] # Sparse encoding the bitstring
125 |     self.X = [0] + self.X # Prefix with a bias term
126 | 
127 |   def logloss(self):
128 |     act = self.y
129 |     pred = self.Prediction
130 |     predicted = max(min(pred, 1. - 10e-15), 10e-15)
131 |     return -log(predicted) if act == 1. else -log(1. - predicted)
132 | 
133 |   def predict(self):
134 |     W_dot_x = 0.
135 |     w = {}
136 |     for i in self.X:
137 |       if abs(self.z[i]) <= self.l1:
138 |         w[i] = 0.
139 |       else:
140 |         w[i] = (self.sgn(self.z[i]) * self.l1 - self.z[i]) / (((self.beta + sqrt(self.n[i]))/self.alpha) + self.l2)
141 |       W_dot_x += w[i]
142 |     self.w = w
143 |     self.Prediction = 1. / (1. + exp(-max(min(W_dot_x, 35.), -35.)))
144 |     return self.Prediction
145 |   
146 |   def update(self, prediction): 
147 |     for i in self.X:
148 |       g = (prediction - self.y)
149 |       sigma = (1./self.alpha) * (sqrt(self.n[i] + g*g) - sqrt(self.n[i]))
150 |       self.z[i] += g - sigma*self.w[i]
151 |       self.n[i] += g*g


--------------------------------------------------------------------------------