├── .gitignore ├── README.md ├── boolean.py ├── interpreter.py ├── model.py ├── progressbar.py └── run.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # treehouse 2 | Learn programs from data 3 | 4 | --------- 5 | 6 | ## Example 7 | 8 | Learn to do the XOR function combing AND and OR gates: 9 | 10 | ```python 11 | 12 | # AND gate 13 | 14 | def _and(x): 15 | return int(x[0] and x[1]) 16 | 17 | # OR gate 18 | 19 | def _or(x): 20 | return int(x[0] or x[1]) 21 | 22 | # Training data 23 | 24 | X = [(0, 0), (0, 1), (1, 0), (1, 1)] 25 | Y = [0, 1, 1, 0] # XOR 26 | 27 | X = np.array(X) 28 | Y = np.array(Y) 29 | 30 | # Create model 31 | 32 | model = Model([_and, _or]) 33 | 34 | # Train 35 | 36 | model.fit(X, Y) 37 | 38 | # Predict 39 | 40 | model.predict(X) # >>> [0, 1, 1, 0] 41 | 42 | # Generate python-like code: 43 | 44 | code = model.get_program() 45 | print(code) 46 | 47 | ''' 48 | if _and(input): 49 | if _or(input): 50 | output = 0 51 | else: 52 | output = 1 53 | else: 54 | if _or(input): 55 | output = 1 56 | else: 57 | output = 0 58 | ''' 59 | 60 | 61 | # Run the generated code on inputs using the built-in interpreter: 62 | 63 | input = (0, 0) 64 | interpreter.get_output(code, input, context=nodes) # >>> 0 65 | 66 | input = (0, 1) 67 | interpreter.get_output(code, input, context=nodes) # >>> 1 68 | 69 | input = (1, 0) 70 | interpreter.get_output(code, input, context=nodes) # >>> 1 71 | 72 | input = (0, 0) 73 | interpreter.get_output(code, input, context=nodes) # >>> 0 74 | 75 | ``` 76 | -------------------------------------------------------------------------------- /boolean.py: -------------------------------------------------------------------------------- 1 | 2 | class _Boolean(object): 3 | 4 | def get_data(self): 5 | raise NotImplemented 6 | 7 | def get_callables(self): 8 | raise NotImplemented 9 | 10 | def __call__(self, x): 11 | raise NotImplemented 12 | 13 | 14 | class Boolean_0(_Boolean): 15 | 16 | def get_data(self): 17 | X = [(0, 0), (0, 1), (1, 0), (1, 1)] 18 | Y = [0, 1, 1, 0] 19 | return X, Y 20 | 21 | def get_callables(self): 22 | 23 | def OR(x, y): 24 | return int(x or y) 25 | 26 | def AND(x, y): 27 | return int(x and y) 28 | return [OR, AND] 29 | 30 | def __call__(self, x): 31 | return int((x and not y) or (y and not x)) 32 | -------------------------------------------------------------------------------- /interpreter.py: -------------------------------------------------------------------------------- 1 | import ast 2 | 3 | _cache = {} 4 | 5 | 6 | def get_output(code, input=None, context=[]): 7 | if type(context) is list: 8 | context = {x.__name__ : x for x in context} 9 | output = '__no__output__' 10 | if code in _cache: 11 | compiled = _cache[code] 12 | else: 13 | block = ast.parse(code, mode='exec') 14 | compiled = compile(block, '', mode='exec') 15 | _cache[code] = compiled 16 | context['input'] = input 17 | context['output'] = output 18 | globals().update(context) 19 | exec(compiled, globals()) 20 | return globals()['output'] 21 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.ma as ma 3 | from progressbar import ProgressBar 4 | import math 5 | 6 | 7 | class Model(object): 8 | 9 | def __init__(self, nodes, num_classes=2, depth=5, lr=0.1): 10 | self.nodes = nodes 11 | self.num_classes = num_classes 12 | self.depth = depth 13 | self.lr = lr 14 | self.build() 15 | 16 | def _random_prob_dist(self, shape): 17 | x = np.random.uniform(-1, 1, shape) 18 | e = np.exp(x - np.max(x, axis=-1, keepdims=True)) 19 | s = np.sum(e, axis=-1, keepdims=True) 20 | return e / s 21 | 22 | def build(self): 23 | expd = 2 ** self.depth 24 | self.nps = self._random_prob_dist((expd - 1, len(self.nodes))) 25 | self.nps_lr = np.arange(self.nps.size).reshape(self.nps.shape) + 1 26 | self.nps_lr = np.cast[float](self.nps_lr) 27 | self.nps_lr *= self.lr 28 | self.lps = self._random_prob_dist((expd, self.num_classes)) 29 | self.lps_lr = np.arange(self.lps.size).reshape(self.lps.shape) + 1 + self.nps.size 30 | self.lps_lr = np.cast[float](self.lps_lr) 31 | self.lps_lr *= self.lr 32 | num_total = self.nps.size + self.lps.size 33 | self.nps_lr /= num_total 34 | self.lps_lr /= num_total 35 | paths = {} 36 | def _add_path(n=0, p=[]): 37 | if n > expd - 2: 38 | return 39 | self.paths[n] = p 40 | p += [n] 41 | l = 2 * n + 1 42 | r = l + 1 43 | _add_path(l, p) 44 | self.paths = [paths[i] for i in range(len(paths))] 45 | 46 | 47 | 48 | def forward(self, x, return_history=True): 49 | visited_nps = [] 50 | current_np = 0 51 | nodes = self.nodes 52 | nps = self.nps 53 | lps = self.lps 54 | num_nps = len(nps) 55 | while(True): 56 | if current_np >= num_nps: 57 | visited_lp = current_np - num_nps 58 | label = np.argmax(lps[visited_lp]) 59 | break 60 | visited_nps.append(current_np) 61 | node = nodes[np.argmax(nps[current_np])] 62 | node_out = node(x) 63 | current_np = 2 * current_np + 1 + node_out 64 | if return_history: 65 | return label, visited_lp, visited_nps 66 | return label 67 | 68 | def fit(self, X, Y, epochs=10): 69 | nps = self.nps 70 | lps = self.lps 71 | num_nodes = len(self.nodes) 72 | nps_lr = self.nps_lr 73 | lps_lr = self.lps_lr 74 | for epoch in range(epochs): 75 | print('Epoch ' + str(epoch) + ':') 76 | pbar = ProgressBar(len(X)) 77 | updated = False 78 | for x, y in zip(X, Y): 79 | y_, lp, nps_ = self.forward(x) 80 | if y == y_: 81 | for np_idx in nps_: 82 | np_ = nps[np_idx] 83 | np_lr = nps_lr[np_idx] 84 | mx = np.argmax(np_) 85 | update = np_.copy() 86 | update *= np_lr 87 | update[mx] = 0 88 | s = update.sum() 89 | update *= -1 90 | update[mx] = s 91 | np_ += update 92 | lp_ = lps[lp] 93 | lp_lr = lps_lr[lp] 94 | mx = np.argmax(lp_) 95 | update = lp_.copy() 96 | update *= lp_lr 97 | update[mx] = 0 98 | s = update.sum() 99 | update *= -1 100 | update[mx] = s 101 | lp_ += update 102 | else: 103 | ''' 104 | __NEXUS_ONLY = True 105 | if __NEXUS_ONLY: 106 | labels = lps.argmax(-1) 107 | for lp_idx, label in enumerate(labels): 108 | if label == y: 109 | ''' 110 | for np_idx in nps_: 111 | np_ = nps[np_idx] 112 | np_lr = nps_lr[np_idx] 113 | mx = np.argmax(np_) 114 | update = np.zeros_like(np_) 115 | mx_val = np_[mx] 116 | delta = mx_val * np_lr[mx] 117 | update += delta / (num_nodes - 1) 118 | update[mx] = - delta 119 | np_ += update 120 | lp_ = lps[lp] 121 | lp_lr = lps_lr[lp] 122 | mx = np.argmax(lp_) 123 | update = np.zeros_like(lp_) 124 | mx_val = lp_[mx] 125 | delta = mx_val * lp_lr[mx] 126 | update += delta / (num_nodes - 1) 127 | update[mx] = - delta 128 | lp_ += update 129 | updated = True 130 | pbar.update() 131 | if not updated: 132 | break 133 | 134 | 135 | def predict(self, X): 136 | f = self.forward 137 | Y = [f(x, False) for x in X] 138 | return np.array(Y) 139 | 140 | def evaluate(self, X, Y): 141 | Y = np.array(Y, dtype=int) 142 | Y_ = np.array(self.predict(X), dtype=int) 143 | return float(np.sum(Y==Y_)) / len(Y) 144 | 145 | def _get_node_name(self, node): 146 | if hasattr (node, '__name__'): 147 | return node.__name__ 148 | if hasattr(node, '__class__'): 149 | node = node.__class__.__name__ 150 | return str(node) 151 | 152 | def get_program(self): 153 | lines = ['\n'] 154 | nodes = self.nodes 155 | lps = self.lps 156 | nps = self.nps 157 | num_nodes = len(nps) 158 | previous_node = None 159 | def _get_code(n_p=0, ind=0, values={}): 160 | if n_p >= num_nodes: 161 | n_p -= num_nodes 162 | label = np.argmax(lps[n_p]) 163 | line = ' ' * ind + 'output = ' + str(label) 164 | lines.append(line) 165 | else: 166 | node = nodes[np.argmax(nps[n_p])] 167 | node_name = self._get_node_name(node) 168 | node_val = values.get(node, None) 169 | if node_val is None: 170 | line = ' ' * ind + 'if ' + node_name + '(input)' + ':' 171 | lines.append(line) 172 | ind += 4 173 | values[node] = True 174 | left = 2 * n_p + 1 175 | right = left + 1 176 | if_statement_idx = len(lines) - 1 177 | _get_code(right, ind, values) 178 | values[node] = False 179 | ind -= 4 180 | lines.append(' ' * ind + 'else:') 181 | else_statement_idx = len(lines) - 1 182 | ind += 4 183 | _get_code(left, ind, values) 184 | values[node] = None 185 | ind -= 4 186 | # check for if(cond){x}else{x}; kinda hackish, will fix later 187 | if_block = lines[if_statement_idx + 1: else_statement_idx] 188 | else_block = lines[else_statement_idx + 1:] 189 | 190 | if if_block == else_block: 191 | for _ in range(len(lines) - if_statement_idx): 192 | lines.pop() 193 | for s in if_block: 194 | lines.append(s[4:]) 195 | elif node_val: 196 | right = 2 * n_p + 2 197 | _get_code(right, ind, values) 198 | else: 199 | left = 2 * n_p + 1 200 | _get_code(left, ind, values) 201 | 202 | 203 | _get_code() 204 | return '\n'.join(lines) 205 | 206 | 207 | # allows the model to be used as a node in a bigger model 208 | def __call__(self, x): 209 | return self.predict([x])[0] 210 | -------------------------------------------------------------------------------- /progressbar.py: -------------------------------------------------------------------------------- 1 | # credits: fchollet 2 | 3 | import sys 4 | import time 5 | import numpy as np 6 | 7 | 8 | class ProgressBar(object): 9 | """Displays a progress bar. 10 | 11 | # Arguments 12 | target: Total number of steps expected, None if unknown. 13 | interval: Minimum visual progress update interval (in seconds). 14 | """ 15 | 16 | def __init__(self, target, width=30, verbose=1, interval=0.05): 17 | self.width = width 18 | if target is None: 19 | target = -1 20 | self.target = target 21 | self.sum_values = {} 22 | self.unique_values = [] 23 | self.start = time.time() 24 | self.last_update = 0 25 | self.interval = interval 26 | self.total_width = 0 27 | self.seen_so_far = 0 28 | self.verbose = verbose 29 | 30 | def set_value(self, current, values=None, force=False): 31 | values = values or [] 32 | for k, v in values: 33 | if k not in self.sum_values: 34 | self.sum_values[k] = [v * (current - self.seen_so_far), 35 | current - self.seen_so_far] 36 | self.unique_values.append(k) 37 | else: 38 | self.sum_values[k][0] += v * (current - self.seen_so_far) 39 | self.sum_values[k][1] += (current - self.seen_so_far) 40 | self.seen_so_far = current 41 | 42 | now = time.time() 43 | if self.verbose == 1: 44 | if not force and (now - self.last_update) < self.interval: 45 | return 46 | 47 | prev_total_width = self.total_width 48 | sys.stdout.write('\b' * prev_total_width) 49 | sys.stdout.write('\r') 50 | 51 | if self.target is not -1: 52 | numdigits = int(np.floor(np.log10(self.target))) + 1 53 | barstr = '%%%dd/%%%dd [' % (numdigits, numdigits) 54 | bar = barstr % (current, self.target) 55 | prog = float(current) / self.target 56 | prog_width = int(self.width * prog) 57 | if prog_width > 0: 58 | bar += ('=' * (prog_width - 1)) 59 | if current < self.target: 60 | bar += '>' 61 | else: 62 | bar += '=' 63 | bar += ('.' * (self.width - prog_width)) 64 | bar += ']' 65 | sys.stdout.write(bar) 66 | self.total_width = len(bar) 67 | 68 | if current: 69 | time_per_unit = (now - self.start) / current 70 | else: 71 | time_per_unit = 0 72 | eta = time_per_unit * (self.target - current) 73 | perc = float(current) * 100 / self.target 74 | info = '' 75 | if current < self.target and self.target is not -1: 76 | info += ' - %f%%' % perc 77 | info += ' - ETA: %ds' % eta 78 | else: 79 | info += ' - %ds' % (now - self.start) 80 | for k in self.unique_values: 81 | info += ' - %s:' % k 82 | if isinstance(self.sum_values[k], list): 83 | avg = np.mean(self.sum_values[k][0] / max(1, self.sum_values[k][1])) 84 | if abs(avg) > 1e-3: 85 | info += ' %.4f' % avg 86 | else: 87 | info += ' %.4e' % avg 88 | else: 89 | info += ' %s' % self.sum_values[k] 90 | 91 | self.total_width += len(info) 92 | if prev_total_width > self.total_width: 93 | info += ((prev_total_width - self.total_width) * ' ') 94 | 95 | sys.stdout.write(info) 96 | sys.stdout.flush() 97 | 98 | if current >= self.target: 99 | sys.stdout.write('\n') 100 | 101 | if self.verbose == 2: 102 | if current >= self.target: 103 | info = '%ds' % (now - self.start) 104 | for k in self.unique_values: 105 | info += ' - %s:' % k 106 | avg = np.mean(self.sum_values[k][0] / max(1, self.sum_values[k][1])) 107 | if avg > 1e-3: 108 | info += ' %.4f' % avg 109 | else: 110 | info += ' %.4e' % avg 111 | sys.stdout.write(info + "\n") 112 | 113 | self.last_update = now 114 | 115 | def update(self, n=1, values=None): 116 | self.set_value(self.seen_so_far + n, values) 117 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | from model import Model 2 | import numpy as np 3 | import interpreter 4 | 5 | 6 | # Learn to do XOR using AND and OR gates 7 | 8 | 9 | X = [(0, 0), (0, 1), (1, 0), (1, 1)] 10 | Y = [0, 1, 1, 0] 11 | 12 | 13 | X = np.array(X) 14 | Y = np.array(Y) 15 | 16 | 17 | def _and(x): 18 | return int(x[0] and x[1]) 19 | 20 | 21 | def _or(x): 22 | return int(x[0] or x[1]) 23 | 24 | 25 | nodes = [_and, _or] 26 | 27 | 28 | model = Model(nodes, depth=5) 29 | 30 | model.fit(X, Y, 20) 31 | 32 | code = model.get_program() 33 | print(code) 34 | 35 | assert model.evaluate(X, Y) == 1.0 36 | 37 | # Test generated code 38 | 39 | for x, y in zip(X, Y): 40 | x = tuple(x) 41 | assert interpreter.get_output(code, x, nodes) == y, interpreter.get_output(code, x, nodes) 42 | --------------------------------------------------------------------------------