├── .gitignore
├── README.md
├── boolean.py
├── interpreter.py
├── model.py
├── progressbar.py
└── run.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # treehouse
 2 | Learn programs from data
 3 | 
 4 | ---------
 5 | 
 6 | ## Example
 7 | 
 8 | Learn to do the XOR function combing AND and OR gates:
 9 | 
10 | ```python
11 | 
12 | # AND gate
13 | 
14 | def _and(x):
15 |     return int(x[0] and x[1])
16 |  
17 | # OR gate
18 | 
19 | def _or(x):
20 |     return int(x[0] or x[1])
21 | 
22 | # Training data
23 | 
24 | X = [(0, 0), (0, 1), (1, 0), (1, 1)]
25 | Y = [0, 1, 1, 0]  # XOR
26 | 
27 | X = np.array(X)
28 | Y = np.array(Y)
29 | 
30 | # Create model
31 | 
32 | model = Model([_and, _or])
33 | 
34 | # Train
35 | 
36 | model.fit(X, Y)
37 | 
38 | # Predict
39 | 
40 | model.predict(X)  # >>> [0, 1, 1, 0]
41 | 
42 | # Generate python-like code:
43 | 
44 | code = model.get_program()
45 | print(code)
46 | 
47 | '''
48 | if _and(input):
49 |     if _or(input):
50 |         output = 0
51 |     else:
52 |         output = 1
53 | else:
54 |     if _or(input):
55 |         output = 1
56 |     else:
57 |         output = 0
58 | '''
59 | 
60 | 
61 | # Run the generated code on inputs using the built-in interpreter:
62 | 
63 | input = (0, 0)
64 | interpreter.get_output(code, input, context=nodes)   # >>> 0
65 | 
66 | input = (0, 1)
67 | interpreter.get_output(code, input, context=nodes)   # >>> 1
68 | 
69 | input = (1, 0)
70 | interpreter.get_output(code, input, context=nodes)   # >>> 1
71 | 
72 | input = (0, 0)
73 | interpreter.get_output(code, input, context=nodes)   # >>> 0
74 | 
75 | ```
76 | 


--------------------------------------------------------------------------------
/boolean.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class _Boolean(object):
 3 | 
 4 | 	def get_data(self):
 5 | 		raise NotImplemented
 6 | 
 7 | 	def get_callables(self):
 8 | 		raise NotImplemented
 9 | 
10 | 	def __call__(self, x):
11 | 		raise NotImplemented
12 | 
13 | 
14 | class Boolean_0(_Boolean):
15 | 
16 | 	def get_data(self):
17 | 		X = [(0, 0), (0, 1), (1, 0), (1, 1)]
18 | 		Y = [0, 1, 1, 0]
19 | 		return X, Y
20 | 
21 | 	def get_callables(self):
22 | 
23 | 		def OR(x, y):
24 | 			return int(x or y)
25 | 
26 | 		def AND(x, y):
27 | 			return int(x and y)
28 | 		return [OR, AND]
29 | 
30 | 	def __call__(self, x):
31 | 		return int((x and not y) or (y and not x))
32 | 


--------------------------------------------------------------------------------
/interpreter.py:
--------------------------------------------------------------------------------
 1 | import ast
 2 | 
 3 | _cache = {}
 4 | 
 5 | 
 6 | def get_output(code, input=None, context=[]):
 7 |     if type(context) is list:
 8 |         context = {x.__name__ : x for x in context}
 9 |     output = '__no__output__'
10 |     if code in _cache:
11 |         compiled = _cache[code]
12 |     else:
13 |         block = ast.parse(code, mode='exec')
14 |         compiled = compile(block, '<string>', mode='exec')
15 |         _cache[code] = compiled
16 |     context['input'] = input
17 |     context['output'] = output
18 |     globals().update(context)
19 |     exec(compiled, globals())
20 |     return globals()['output']
21 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import numpy.ma as ma
  3 | from progressbar import ProgressBar
  4 | import math
  5 | 
  6 | 
  7 | class Model(object):
  8 | 
  9 | 	def __init__(self, nodes, num_classes=2, depth=5, lr=0.1):
 10 | 		self.nodes = nodes
 11 | 		self.num_classes = num_classes
 12 | 		self.depth = depth
 13 | 		self.lr = lr
 14 | 		self.build()
 15 | 
 16 | 	def _random_prob_dist(self, shape):
 17 | 		x = np.random.uniform(-1, 1, shape)
 18 | 		e = np.exp(x - np.max(x, axis=-1, keepdims=True))
 19 | 		s = np.sum(e, axis=-1, keepdims=True)
 20 | 		return e / s
 21 | 
 22 | 	def build(self):
 23 | 		expd = 2 ** self.depth
 24 | 		self.nps = self._random_prob_dist((expd - 1, len(self.nodes)))
 25 | 		self.nps_lr = np.arange(self.nps.size).reshape(self.nps.shape) + 1
 26 | 		self.nps_lr = np.cast[float](self.nps_lr)
 27 | 		self.nps_lr *= self.lr
 28 | 		self.lps = self._random_prob_dist((expd, self.num_classes))
 29 | 		self.lps_lr = np.arange(self.lps.size).reshape(self.lps.shape) + 1 + self.nps.size
 30 | 		self.lps_lr = np.cast[float](self.lps_lr)
 31 | 		self.lps_lr *= self.lr
 32 | 		num_total = self.nps.size + self.lps.size
 33 | 		self.nps_lr /= num_total
 34 | 		self.lps_lr /= num_total
 35 | 		paths = {}
 36 | 		def _add_path(n=0, p=[]):
 37 | 			if n > expd - 2:
 38 | 				return
 39 | 			self.paths[n] = p
 40 | 			p += [n]
 41 | 			l = 2 * n + 1
 42 | 			r = l + 1
 43 | 			_add_path(l, p)
 44 | 		self.paths = [paths[i] for i in range(len(paths))]
 45 | 
 46 | 
 47 | 
 48 | 	def forward(self, x, return_history=True):
 49 | 		visited_nps = []
 50 | 		current_np = 0
 51 | 		nodes = self.nodes
 52 | 		nps = self.nps
 53 | 		lps = self.lps
 54 | 		num_nps = len(nps)
 55 | 		while(True):
 56 | 			if current_np >= num_nps:
 57 | 				visited_lp = current_np - num_nps
 58 | 				label = np.argmax(lps[visited_lp])
 59 | 				break
 60 | 			visited_nps.append(current_np)
 61 | 			node = nodes[np.argmax(nps[current_np])]
 62 | 			node_out = node(x)
 63 | 			current_np = 2 * current_np + 1 + node_out
 64 | 		if return_history:
 65 | 			return label, visited_lp, visited_nps
 66 | 		return label
 67 | 
 68 | 	def fit(self, X, Y, epochs=10):
 69 | 		nps = self.nps
 70 | 		lps = self.lps
 71 | 		num_nodes = len(self.nodes)
 72 | 		nps_lr = self.nps_lr
 73 | 		lps_lr = self.lps_lr
 74 | 		for epoch in range(epochs):
 75 | 			print('Epoch ' + str(epoch) + ':')
 76 | 			pbar = ProgressBar(len(X))
 77 | 			updated = False
 78 | 			for x, y in zip(X, Y):
 79 | 				y_, lp, nps_ = self.forward(x)
 80 | 				if y == y_:
 81 | 					for np_idx in nps_:
 82 | 						np_ = nps[np_idx]
 83 | 						np_lr = nps_lr[np_idx]
 84 | 						mx = np.argmax(np_)
 85 | 						update = np_.copy()
 86 | 						update *= np_lr
 87 | 						update[mx] = 0
 88 | 						s = update.sum()
 89 | 						update *= -1
 90 | 						update[mx] = s
 91 | 						np_ += update
 92 | 					lp_ = lps[lp]
 93 | 					lp_lr = lps_lr[lp]
 94 | 					mx = np.argmax(lp_)
 95 | 					update = lp_.copy()
 96 | 					update *= lp_lr
 97 | 					update[mx] = 0
 98 | 					s = update.sum()
 99 | 					update *= -1
100 | 					update[mx] = s
101 | 					lp_ += update
102 | 				else:
103 | 					'''
104 | 					__NEXUS_ONLY = True
105 | 					if __NEXUS_ONLY:
106 | 						labels = lps.argmax(-1)
107 | 						for lp_idx, label in enumerate(labels):
108 | 							if label == y:
109 | 					'''
110 | 					for np_idx in nps_:
111 | 						np_ = nps[np_idx]
112 | 						np_lr = nps_lr[np_idx]
113 | 						mx = np.argmax(np_)
114 | 						update = np.zeros_like(np_)
115 | 						mx_val = np_[mx]
116 | 						delta = mx_val * np_lr[mx]
117 | 						update += delta / (num_nodes - 1)
118 | 						update[mx] = - delta
119 | 						np_ += update
120 | 					lp_ = lps[lp]
121 | 					lp_lr = lps_lr[lp]
122 | 					mx = np.argmax(lp_)
123 | 					update = np.zeros_like(lp_)
124 | 					mx_val = lp_[mx]
125 | 					delta = mx_val * lp_lr[mx]
126 | 					update += delta / (num_nodes - 1)
127 | 					update[mx] = - delta
128 | 					lp_ += update
129 | 					updated = True
130 | 				pbar.update()
131 | 			if not updated:
132 | 				break
133 | 
134 | 
135 | 	def predict(self, X):
136 | 		f = self.forward
137 | 		Y = [f(x, False) for x in X]
138 | 		return np.array(Y)
139 | 
140 | 	def evaluate(self, X, Y):
141 | 		Y = np.array(Y, dtype=int)
142 | 		Y_ = np.array(self.predict(X), dtype=int)
143 | 		return float(np.sum(Y==Y_)) / len(Y)
144 | 
145 | 	def _get_node_name(self, node):
146 | 		if hasattr	(node, '__name__'):
147 | 			return node.__name__
148 | 		if hasattr(node, '__class__'):
149 | 			node = node.__class__.__name__
150 | 		return str(node)
151 | 
152 | 	def get_program(self):
153 | 		lines = ['\n']
154 | 		nodes = self.nodes
155 | 		lps = self.lps
156 | 		nps = self.nps
157 | 		num_nodes = len(nps)
158 | 		previous_node = None
159 | 		def _get_code(n_p=0, ind=0, values={}):
160 | 			if n_p >= num_nodes:
161 | 				n_p -= num_nodes
162 | 				label = np.argmax(lps[n_p])
163 | 				line = ' ' * ind + 'output = ' + str(label)
164 | 				lines.append(line)
165 | 			else:
166 | 					node = nodes[np.argmax(nps[n_p])]
167 | 					node_name = self._get_node_name(node)
168 | 					node_val = values.get(node, None)
169 | 					if node_val is None:
170 | 						line = ' ' * ind + 'if ' + node_name + '(input)' + ':'
171 | 						lines.append(line)
172 | 						ind += 4
173 | 						values[node] = True
174 | 						left = 2 * n_p + 1
175 | 						right = left + 1
176 | 						if_statement_idx = len(lines) - 1
177 | 						_get_code(right, ind, values)
178 | 						values[node] = False
179 | 						ind -= 4
180 | 						lines.append(' ' * ind + 'else:')
181 | 						else_statement_idx = len(lines) - 1
182 | 						ind += 4
183 | 						_get_code(left, ind, values)
184 | 						values[node] = None
185 | 						ind -= 4
186 | 						# check for if(cond){x}else{x}; kinda hackish, will fix later
187 | 						if_block = lines[if_statement_idx + 1: else_statement_idx]
188 | 						else_block = lines[else_statement_idx + 1:]
189 | 
190 | 						if if_block == else_block:
191 | 							for _ in range(len(lines) - if_statement_idx):
192 | 								lines.pop()
193 | 							for s in if_block:
194 | 								lines.append(s[4:])
195 | 					elif node_val:
196 | 						right = 2 * n_p + 2
197 | 						_get_code(right, ind, values)
198 | 					else:
199 | 						left = 2 * n_p + 1
200 | 						_get_code(left, ind, values)
201 | 
202 | 
203 | 		_get_code()
204 | 		return '\n'.join(lines)
205 | 
206 | 
207 | 	# allows the model to be used as a node in a bigger model
208 | 	def __call__(self, x):
209 | 		return self.predict([x])[0]
210 | 


--------------------------------------------------------------------------------
/progressbar.py:
--------------------------------------------------------------------------------
  1 | # credits: fchollet
  2 | 
  3 | import sys
  4 | import time
  5 | import numpy as np
  6 | 
  7 | 
  8 | class ProgressBar(object):
  9 |     """Displays a progress bar.
 10 | 
 11 |     # Arguments
 12 |         target: Total number of steps expected, None if unknown.
 13 |         interval: Minimum visual progress update interval (in seconds).
 14 |     """
 15 | 
 16 |     def __init__(self, target, width=30, verbose=1, interval=0.05):
 17 |         self.width = width
 18 |         if target is None:
 19 |             target = -1
 20 |         self.target = target
 21 |         self.sum_values = {}
 22 |         self.unique_values = []
 23 |         self.start = time.time()
 24 |         self.last_update = 0
 25 |         self.interval = interval
 26 |         self.total_width = 0
 27 |         self.seen_so_far = 0
 28 |         self.verbose = verbose
 29 | 
 30 |     def set_value(self, current, values=None, force=False):
 31 |         values = values or []
 32 |         for k, v in values:
 33 |             if k not in self.sum_values:
 34 |                 self.sum_values[k] = [v * (current - self.seen_so_far),
 35 |                                       current - self.seen_so_far]
 36 |                 self.unique_values.append(k)
 37 |             else:
 38 |                 self.sum_values[k][0] += v * (current - self.seen_so_far)
 39 |                 self.sum_values[k][1] += (current - self.seen_so_far)
 40 |         self.seen_so_far = current
 41 | 
 42 |         now = time.time()
 43 |         if self.verbose == 1:
 44 |             if not force and (now - self.last_update) < self.interval:
 45 |                 return
 46 | 
 47 |             prev_total_width = self.total_width
 48 |             sys.stdout.write('\b' * prev_total_width)
 49 |             sys.stdout.write('\r')
 50 | 
 51 |             if self.target is not -1:
 52 |                 numdigits = int(np.floor(np.log10(self.target))) + 1
 53 |                 barstr = '%%%dd/%%%dd [' % (numdigits, numdigits)
 54 |                 bar = barstr % (current, self.target)
 55 |                 prog = float(current) / self.target
 56 |                 prog_width = int(self.width * prog)
 57 |                 if prog_width > 0:
 58 |                     bar += ('=' * (prog_width - 1))
 59 |                     if current < self.target:
 60 |                         bar += '>'
 61 |                     else:
 62 |                         bar += '='
 63 |                 bar += ('.' * (self.width - prog_width))
 64 |                 bar += ']'
 65 |                 sys.stdout.write(bar)
 66 |                 self.total_width = len(bar)
 67 | 
 68 |             if current:
 69 |                 time_per_unit = (now - self.start) / current
 70 |             else:
 71 |                 time_per_unit = 0
 72 |             eta = time_per_unit * (self.target - current)
 73 |             perc = float(current) * 100 / self.target
 74 |             info = ''
 75 |             if current < self.target and self.target is not -1:
 76 |                 info += ' - %f%%' % perc
 77 |                 info += ' - ETA: %ds' % eta
 78 |             else:
 79 |                 info += ' - %ds' % (now - self.start)
 80 |             for k in self.unique_values:
 81 |                 info += ' - %s:' % k
 82 |                 if isinstance(self.sum_values[k], list):
 83 |                     avg = np.mean(self.sum_values[k][0] / max(1, self.sum_values[k][1]))
 84 |                     if abs(avg) > 1e-3:
 85 |                         info += ' %.4f' % avg
 86 |                     else:
 87 |                         info += ' %.4e' % avg
 88 |                 else:
 89 |                     info += ' %s' % self.sum_values[k]
 90 | 
 91 |             self.total_width += len(info)
 92 |             if prev_total_width > self.total_width:
 93 |                 info += ((prev_total_width - self.total_width) * ' ')
 94 | 
 95 |             sys.stdout.write(info)
 96 |             sys.stdout.flush()
 97 | 
 98 |             if current >= self.target:
 99 |                 sys.stdout.write('\n')
100 | 
101 |         if self.verbose == 2:
102 |             if current >= self.target:
103 |                 info = '%ds' % (now - self.start)
104 |                 for k in self.unique_values:
105 |                     info += ' - %s:' % k
106 |                     avg = np.mean(self.sum_values[k][0] / max(1, self.sum_values[k][1]))
107 |                     if avg > 1e-3:
108 |                         info += ' %.4f' % avg
109 |                     else:
110 |                         info += ' %.4e' % avg
111 |                 sys.stdout.write(info + "\n")
112 | 
113 |         self.last_update = now
114 | 
115 |     def update(self, n=1, values=None):
116 |         self.set_value(self.seen_so_far + n, values)
117 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
 1 | from model import Model
 2 | import numpy as np
 3 | import interpreter
 4 | 
 5 | 
 6 | # Learn to do XOR using AND and OR gates
 7 | 
 8 | 
 9 | X = [(0, 0), (0, 1), (1, 0), (1, 1)]
10 | Y = [0, 1, 1, 0]
11 | 
12 | 
13 | X = np.array(X)
14 | Y = np.array(Y)
15 | 
16 | 
17 | def _and(x):
18 |     return int(x[0] and x[1])
19 | 
20 | 
21 | def _or(x):
22 |     return int(x[0] or x[1])
23 | 
24 | 
25 | nodes = [_and, _or]
26 | 
27 | 
28 | model = Model(nodes, depth=5)
29 | 
30 | model.fit(X, Y, 20)
31 | 
32 | code = model.get_program()
33 | print(code)
34 | 
35 | assert model.evaluate(X, Y) == 1.0
36 | 
37 | # Test generated code
38 | 
39 | for x, y in zip(X, Y):
40 |     x = tuple(x)
41 |     assert interpreter.get_output(code, x, nodes) == y, interpreter.get_output(code, x, nodes) 
42 | 


--------------------------------------------------------------------------------