├── hpconvnet ├── __init__.py ├── comparisons.py ├── shovel_util.py ├── foobar.py ├── cifar10.py ├── lfw.py ├── isvm_binary.py ├── isvm_boosting.py ├── utils.py ├── isvm_precomputed.py ├── slm_visitor.py ├── slm_visitor_primal.py ├── slm_visitor_esvc.py ├── slm.py ├── isvm_multi.py └── pyll_slm.py ├── requirements.txt ├── .gitignore ├── shovel ├── cifar10.py └── lfw.py ├── setup.py └── README.md /hpconvnet/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | cython 3 | scipy 4 | nose 5 | cgen 6 | codepy 7 | coverage 8 | decorator 9 | lockfile 10 | matplotlib 11 | pymongo 12 | pytools 13 | wsgiref 14 | fabric 15 | shovel 16 | joblib 17 | networkx 18 | pillow 19 | skdata 20 | hyperopt 21 | scikit-image 22 | theano 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | 21 | # Installer logs 22 | pip-log.txt 23 | 24 | # Unit test / coverage reports 25 | .coverage 26 | .tox 27 | nosetests.xml 28 | 29 | # Translations 30 | *.mo 31 | 32 | # Mr Developer 33 | .mr.developer.cfg 34 | .project 35 | .pydevproject 36 | -------------------------------------------------------------------------------- /hpconvnet/comparisons.py: -------------------------------------------------------------------------------- 1 | """ 2 | Comparison operators for pairwise image tasks (e.g. lfw, pubfig). 3 | """ 4 | 5 | import numpy as np 6 | 7 | 8 | class Comparison(object): 9 | def get_num_features(self,shp): 10 | return shp[1] * shp[2] * shp[3] 11 | 12 | 13 | class Concatenate(Comparison): 14 | def get_num_features(self, shp): 15 | return 2 * shp[1] * shp[2] * shp[3] 16 | def __call__(self, x, y): 17 | return np.concatenate([x.flatten(), y.flatten()]) 18 | concatenate = Concatenate() 19 | 20 | 21 | class Mult(Comparison): 22 | def __call__(self, x, y): 23 | return x.flatten() * y.flatten() 24 | mult = Mult() 25 | 26 | 27 | class Diff(Comparison): 28 | def __call__(self, x, y): 29 | return x.flatten() - y.flatten() 30 | diff = Diff() 31 | 32 | 33 | class Absdiff(Comparison): 34 | def __call__(self, x, y): 35 | return np.abs(x.flatten() - y.flatten()) 36 | absdiff = Absdiff() 37 | 38 | 39 | class Sqrtabsdiff(Comparison): 40 | def __call__(self, x, y): 41 | return np.sqrt(np.abs(x.flatten() - y.flatten())) 42 | sqrtabsdiff = Sqrtabsdiff() 43 | 44 | 45 | class Sqdiff(Comparison): 46 | def __call__(self, x, y): 47 | return (x.flatten() - y.flatten())**2 48 | sqdiff = Sqdiff() 49 | -------------------------------------------------------------------------------- /hpconvnet/shovel_util.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import subprocess 4 | import sys 5 | import time 6 | 7 | def configure_logging(level=logging.INFO, stream=sys.stdout, prefix=''): 8 | if isinstance(level, basestring): 9 | level = getattr(logging, level) 10 | root_handler = logging.StreamHandler(stream) 11 | root_formatter = logging.Formatter( 12 | prefix + '%(levelname)s (%(name)s): %(message)s' 13 | ) 14 | root_handler.setFormatter(root_formatter) 15 | root_logger = logging.getLogger() 16 | root_logger.setLevel(level) 17 | root_logger.addHandler(root_handler) 18 | 19 | logging.getLogger('shovel').propagate = False 20 | logging.getLogger('skimage').propagate = False 21 | 22 | 23 | def launch_workers_helper(host, port, dbname, N, walltime, rsync_data_local, 24 | mem=None): 25 | text = """#!/bin/bash 26 | %(rsync_data_local)s 27 | . VENV/eccv12/bin/activate 28 | VENV/eccv12/src/eccv12/hyperopt/bin/hyperopt-mongo-worker \ 29 | --mongo=%(host)s:%(port)s/%(dbname)s \ 30 | --workdir=/scratch_local/eccv12.workdir \ 31 | --reserve-timeout=180.0 \ 32 | --max-consecutive-failures=4 33 | """ % locals() 34 | 35 | qsub_script_name = '.worker.sh.%.3f' % time.time() 36 | 37 | script = open(qsub_script_name, 'w') 38 | script.write(text) 39 | script.close() 40 | 41 | subprocess.check_call(['chmod', '+x', qsub_script_name]) 42 | qsub_cmd = ['qsub', '-lnodes=1:gpus=1', '-lwalltime=%s' % walltime] 43 | if mem is not None: 44 | qsub_cmd.append('-lmem=%s' % mem) 45 | qsub_cmd.extend( 46 | ['-e', os.path.expanduser('~/.qsub/%s.err' % qsub_script_name)]) 47 | qsub_cmd.extend( 48 | ['-o', os.path.expanduser('~/.qsub/%s.out' % qsub_script_name)]) 49 | if int(N) > 1: 50 | qsub_cmd.extend(['-t', '1-%s' % N]) 51 | qsub_cmd.append(qsub_script_name) 52 | print qsub_cmd 53 | subprocess.check_call(qsub_cmd) 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /hpconvnet/foobar.py: -------------------------------------------------------------------------------- 1 | """ 2 | foobar.py - numeric tracing utilites 3 | 4 | """ 5 | import copy 6 | import logging 7 | import numpy as np 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | _trace_target = [] 12 | 13 | _trace = [] 14 | 15 | trace_enabled = True 16 | trace_verify = False 17 | 18 | def trace_eq(a, b): 19 | if len(a) != len(b): 20 | raise ValueError('Length mismatch', (a, b)) 21 | rval = True 22 | for i, (ai, bi) in enumerate(zip(a, b)): 23 | if isinstance(ai, basestring): 24 | if i == 0: 25 | if ai != bi: 26 | logger.error('TRACE: %s != %s' % (ai, bi)) 27 | rval = False 28 | else: 29 | if ai != bi: 30 | logger.warn('TRACE: %s != %s' % (ai, bi)) 31 | # -- we're letting this go for now... 32 | elif isinstance(ai, float): 33 | if not np.allclose(ai, bi, atol=1e-3, rtol=1e-3): 34 | logger.error('TRACE: %s != %s' % (ai, bi)) 35 | rval = False 36 | elif isinstance(ai, int): 37 | if not ai == bi: 38 | logger.error('TRACE: %s != %s' % (ai, bi)) 39 | rval = False 40 | elif isinstance(ai, (tuple, list)): 41 | if not trace_eq(ai, bi): 42 | pass # -- letting this go for now 43 | return rval 44 | 45 | 46 | 47 | def verify_last_trace(): 48 | if trace_verify: 49 | target = _trace_target[len(_trace)-1] 50 | logger.info("Verifying trace: %s" % str(target)) 51 | assert trace_eq(target, _trace[-1]) 52 | 53 | 54 | def append_trace(*args): 55 | if trace_enabled: 56 | logger.info("Appending trace: %s" % str(args)) 57 | _trace.append(args) 58 | verify_last_trace() 59 | 60 | 61 | def append_ndarray_signature(x, *args): 62 | assert isinstance(x, np.ndarray) 63 | sig = (str(x.dtype), x.shape) 64 | if x.size: 65 | sig = sig + (x.min(), x.max(), x.mean()) 66 | return append_trace(*(args + sig)) 67 | 68 | 69 | def append_randomstate(msg, rng, *args): 70 | sample = copy.deepcopy(rng).randn() 71 | return append_trace(msg, sample, *args) 72 | 73 | 74 | def reset_trace(): 75 | _trace[:] = [] 76 | 77 | 78 | def set_trace_target(trace_target): 79 | _trace_target[:] = trace_target 80 | 81 | -------------------------------------------------------------------------------- /hpconvnet/cifar10.py: -------------------------------------------------------------------------------- 1 | import cPickle 2 | import functools 3 | import logging 4 | import os 5 | 6 | import numpy as np 7 | 8 | from skdata.cifar10.views import StratifiedImageClassification 9 | 10 | import hyperopt 11 | from hyperopt import pyll 12 | 13 | import pyll_slm # adds the symbols to pyll.scope 14 | 15 | from .slm_visitor_primal import uslm_eval_helper 16 | from .slm import uslm_domain 17 | 18 | 19 | dumps = functools.partial(cPickle.dumps, protocol=-1) 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | class DataView(object): 24 | pass 25 | 26 | pyll_data_view = pyll.as_apply(DataView) 27 | 28 | 29 | @pyll.scope.define 30 | def cifar10_unsup_images(data_view, N): 31 | # -- extract training images for unsupervised learning, 32 | # and put them into channel-major format 33 | imgs = np.asarray( 34 | data_view.dataset._pixels[ 35 | data_view.fit_idxs[:N]]) 36 | assert str(imgs.dtype) == 'uint8' 37 | rval = imgs.transpose(0, 3, 1, 2).copy() 38 | assert rval.shape[1] in (1, 3) # -- channels 39 | return rval 40 | 41 | 42 | def build_search_space( 43 | max_n_features, 44 | bagging_fraction, 45 | n_unsup, 46 | abort_on_rows_larger_than, 47 | batched_lmap_speed_thresh=None, 48 | batchsize=20, 49 | output_sizes=(32, 64, 128, 200), 50 | permit_affine_warp=True, 51 | ): 52 | if batched_lmap_speed_thresh is None: 53 | batched_lmap_speed_thresh = {'seconds': 2.0, 'elements': 150} 54 | Xcm = pyll.scope.cifar10_unsup_images(pyll_data_view, n_unsup) 55 | # -- currently these sizes are in *elements* 56 | search_space = { 57 | 'data_view': pyll_data_view, 58 | 'pipeline': uslm_domain( 59 | Xcm=Xcm, 60 | chmjr_image_shape=(3, 32, 32), 61 | output_sizes=output_sizes, 62 | batchsize=batchsize, 63 | max_n_features=max_n_features, 64 | batched_lmap_speed_thresh=batched_lmap_speed_thresh, 65 | permit_affine_warp=permit_affine_warp, 66 | abort_on_rows_larger_than=abort_on_rows_larger_than, 67 | ), 68 | 'batchsize': batchsize, 69 | 'max_n_features': max_n_features, 70 | 'ctrl': hyperopt.Domain.pyll_ctrl, 71 | 'batched_lmap_speed_thresh': batched_lmap_speed_thresh, 72 | 'bagging_fraction': bagging_fraction, 73 | } 74 | return search_space 75 | 76 | 77 | def hybrid_loss(visitor, bagging_fraction): 78 | lossres = visitor._results['loss_indexed_image_classification'] 79 | loss_ensemble = lossres['val']['fit']['val']['using_history']['erate'] 80 | loss_member = lossres['val']['fit']['val']['not_using_history']['erate'] 81 | loss = (bagging_fraction * loss_member 82 | + (1 - bagging_fraction) * loss_ensemble) 83 | return loss 84 | 85 | 86 | def true_loss_fn(visitor): 87 | lossres = visitor._results['loss_indexed_image_classification'] 88 | rval = lossres['tst']['sel']['None']['using_history']['erate'] 89 | return rval 90 | 91 | 92 | @hyperopt.fmin_pass_expr_memo_ctrl 93 | def uslm_eval( 94 | expr, memo, ctrl, 95 | data_fraction=1.0, 96 | assume_promising=False, 97 | ): 98 | if ctrl.current_trial is None: 99 | assume_promising = True 100 | data_view = StratifiedImageClassification( 101 | dtype='uint8', 102 | n_train=int(40000 * data_fraction), 103 | n_valid=int(10000 * data_fraction), 104 | n_test=int(10000 * data_fraction), 105 | channel_major=False) 106 | 107 | memmap_name_template = 'cifar10_%i_%i' 108 | 109 | return uslm_eval_helper(expr, memo, ctrl, data_fraction, assume_promising, 110 | data_view, memmap_name_template, DataView, 111 | hybrid_loss, true_loss_fn) 112 | -------------------------------------------------------------------------------- /shovel/cifar10.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import logging 3 | import sys 4 | 5 | logger = logging.getLogger(__name__) 6 | logging.basicConfig(stream=sys.stdout, level=logging.INFO) 7 | 8 | import numpy as np 9 | 10 | from shovel import task 11 | 12 | import hyperopt 13 | from hyperopt import Trials 14 | from hyperopt.mongoexp import MongoTrials 15 | 16 | import hpconvnet.cifar10 17 | import hpconvnet.slm 18 | 19 | 20 | def make_trials(host, port, exp_key, refresh=True, dbname='dbname'): 21 | if (host, port) == (None, None): 22 | trials = Trials() 23 | else: 24 | if dbname == 'dbname': 25 | logger.warn('You probably want to override the default dbname') 26 | trials = MongoTrials( 27 | 'mongo://%s:%d/%s/jobs' % (host, int(port), dbname), 28 | exp_key=exp_key, 29 | refresh=refresh) 30 | return trials 31 | 32 | 33 | def dindex(d, *keys): 34 | """ 35 | dindex(d, a, b, c) returns d[a][b][c] 36 | 37 | This function makes it easier to type long indexing sequences. 38 | """ 39 | if keys: 40 | return dindex(d[keys[0]], *keys[1:]) 41 | else: 42 | return d 43 | 44 | 45 | @task 46 | def small_random_run(): 47 | # -- This is a smoke test to make sure that a lot of code paths actually 48 | # run. Some of the jobs will fail, some should succeed, the data will be 49 | # loaded and some SVMs will be fit etc. Classifier performance is expected 50 | # to be poor (70% error?), because we're using just 10% of the data and 51 | # only trying a few random architectures. 52 | # 53 | # Expected running time on CPU: ~10 mins 54 | 55 | search_space = hpconvnet.cifar10.build_search_space( 56 | max_n_features=4500, # -- smaller than normal 57 | bagging_fraction=0.5, # -- normal 58 | n_unsup=2000, # -- smaller than normal 59 | abort_on_rows_larger_than=50 * 1000, # -- smaller 60 | ) 61 | trials = Trials() 62 | hyperopt.fmin( 63 | fn=hyperopt.partial( 64 | hpconvnet.cifar10.uslm_eval, 65 | data_fraction=0.1, # -- smaller than normal 66 | ), 67 | space=search_space, 68 | algo=hyperopt.rand.suggest, 69 | max_evals=10, 70 | trials=trials) 71 | 72 | # -- Getting this far without crashing is a good indication that 73 | # everything has been installed and is probably running correctly. 74 | 75 | 76 | @task 77 | def tpe_driver( 78 | host, 79 | port, 80 | max_evals=10000, 81 | exp_key_base='cifar10_tpe' 82 | ): 83 | 84 | max_evals=int(max_evals) 85 | min_ok_per_round = int(min_ok_per_round) 86 | 87 | tpe_suggest=functools.partial( 88 | hyperopt.tpe.suggest, 89 | n_startup_jobs=50, # -- number of random jobs before optimization 90 | ) 91 | search_space = hpconvnet.cifar10.build_search_space( 92 | max_n_features=4000, 93 | bagging_fraction=1.0, 94 | n_unsup=7500, 95 | abort_on_rows_larger_than=500 * 1000, # -- elements 96 | output_sizes=(32, 64), 97 | ) 98 | hyperopt.fmin( 99 | fn=hpconvnet.cifar10.uslm_eval, 100 | space=search_space, 101 | algo=tpe_suggest, 102 | max_evals=max_evals, 103 | trials=make_trials( 104 | host, 105 | port, 106 | exp_key=exp_key, 107 | ), 108 | ) 109 | 110 | @task 111 | def best_trial(host, port, exp_key='cifar10_tpe'): 112 | trials = make_trials(host, port, exp_key=exp_key) 113 | print 'Number of trials so far', len(trials) 114 | if len(trials) == 0: 115 | print ("(Hint: did you provide with the right exp_key? Used: %s)" % 116 | exp_key) 117 | 118 | try: 119 | best_trial = trials.best_trial 120 | print 'Best trial validation error rate', best_trial['result']['loss'] 121 | print 'Best trial test error rate', best_trial['result']['true_loss'] 122 | except ValueError: 123 | pass 124 | 125 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ distribute- and pip-enabled setup.py """ 5 | 6 | import logging 7 | import os 8 | import re 9 | 10 | # ----- overrides ----- 11 | 12 | # set these to anything but None to override the automatic defaults 13 | packages = None 14 | package_name = None 15 | package_data = None 16 | scripts = None 17 | requirements_file = None 18 | requirements = None 19 | dependency_links = None 20 | 21 | # --------------------- 22 | 23 | 24 | # ----- control flags ----- 25 | 26 | # fallback to setuptools if distribute isn't found 27 | setup_tools_fallback = True 28 | 29 | # don't include subdir named 'tests' in package_data 30 | skip_tests = True 31 | 32 | # print some extra debugging info 33 | debug = True 34 | 35 | # ------------------------- 36 | 37 | if debug: logging.basicConfig(level=logging.DEBUG) 38 | # distribute import and testing 39 | try: 40 | import distribute_setup 41 | distribute_setup.use_setuptools() 42 | logging.debug("distribute_setup.py imported and used") 43 | except ImportError: 44 | # fallback to setuptools? 45 | # distribute_setup.py was not in this directory 46 | if not (setup_tools_fallback): 47 | import setuptools 48 | if not (hasattr(setuptools,'_distribute') and \ 49 | setuptools._distribute): 50 | raise ImportError("distribute was not found and fallback to setuptools was not allowed") 51 | else: 52 | logging.debug("distribute_setup.py not found, defaulted to system distribute") 53 | else: 54 | logging.debug("distribute_setup.py not found, defaulting to system setuptools") 55 | 56 | import setuptools 57 | 58 | def find_scripts(): 59 | return [s for s in setuptools.findall('scripts/') if os.path.splitext(s)[1] != '.pyc'] 60 | 61 | def package_to_path(package): 62 | """ 63 | Convert a package (as found by setuptools.find_packages) 64 | e.g. "foo.bar" to usable path 65 | e.g. "foo/bar" 66 | 67 | No idea if this works on windows 68 | """ 69 | return package.replace('.','/') 70 | 71 | def find_subdirectories(package): 72 | """ 73 | Get the subdirectories within a package 74 | This will include resources (non-submodules) and submodules 75 | """ 76 | try: 77 | subdirectories = os.walk(package_to_path(package)).next()[1] 78 | except StopIteration: 79 | subdirectories = [] 80 | return subdirectories 81 | 82 | def subdir_findall(dir, subdir): 83 | """ 84 | Find all files in a subdirectory and return paths relative to dir 85 | 86 | This is similar to (and uses) setuptools.findall 87 | However, the paths returned are in the form needed for package_data 88 | """ 89 | strip_n = len(dir.split('/')) 90 | path = '/'.join((dir, subdir)) 91 | return ['/'.join(s.split('/')[strip_n:]) for s in setuptools.findall(path)] 92 | 93 | def find_package_data(packages): 94 | """ 95 | For a list of packages, find the package_data 96 | 97 | This function scans the subdirectories of a package and considers all 98 | non-submodule subdirectories as resources, including them in 99 | the package_data 100 | 101 | Returns a dictionary suitable for setup(package_data=) 102 | """ 103 | package_data = {} 104 | for package in packages: 105 | package_data[package] = [] 106 | for subdir in find_subdirectories(package): 107 | if '.'.join((package, subdir)) in packages: # skip submodules 108 | logging.debug("skipping submodule %s/%s" % (package, subdir)) 109 | continue 110 | if skip_tests and (subdir == 'tests'): # skip tests 111 | logging.debug("skipping tests %s/%s" % (package, subdir)) 112 | continue 113 | package_data[package] += subdir_findall(package_to_path(package), subdir) 114 | return package_data 115 | 116 | def parse_requirements(file_name): 117 | """ 118 | from: 119 | http://cburgmer.posterous.com/pip-requirementstxt-and-setuppy 120 | """ 121 | requirements = [] 122 | with open(file_name, 'r') as f: 123 | for line in f: 124 | if re.match(r'(\s*#)|(\s*$)', line): continue 125 | if re.match(r'\s*-e\s+', line): 126 | requirements.append(re.sub(r'\s*-e\s+.*#egg=(.*)$',\ 127 | r'\1', line).strip()) 128 | elif re.match(r'\s*-f\s+', line): 129 | pass 130 | else: 131 | requirements.append(line.strip()) 132 | return requirements 133 | 134 | def parse_dependency_links(file_name): 135 | """ 136 | from: 137 | http://cburgmer.posterous.com/pip-requirementstxt-and-setuppy 138 | """ 139 | dependency_links = [] 140 | with open(file_name) as f: 141 | for line in f: 142 | if re.match(r'\s*-[ef]\s+', line): 143 | dependency_links.append(re.sub(r'\s*-[ef]\s+',\ 144 | '', line)) 145 | return dependency_links 146 | 147 | # ----------- Override defaults here ---------------- 148 | if packages is None: packages = setuptools.find_packages() 149 | 150 | if len(packages) == 0: raise Exception("No valid packages found") 151 | 152 | if package_name is None: package_name = packages[0] 153 | 154 | if package_data is None: package_data = find_package_data(packages) 155 | 156 | if scripts is None: scripts = find_scripts() 157 | 158 | if requirements_file is None: 159 | requirements_file = 'requirements.txt' 160 | 161 | if os.path.exists(requirements_file): 162 | if requirements is None: 163 | requirements = parse_requirements(requirements_file) 164 | if dependency_links is None: 165 | dependency_links = parse_dependency_links(requirements_file) 166 | else: 167 | if requirements is None: 168 | requirements = [] 169 | if dependency_links is None: 170 | dependency_links = [] 171 | 172 | if debug: 173 | logging.debug("Module name: %s" % package_name) 174 | for package in packages: 175 | logging.debug("Package: %s" % package) 176 | logging.debug("\tData: %s" % str(package_data[package])) 177 | logging.debug("Scripts:") 178 | for script in scripts: 179 | logging.debug("\tScript: %s" % script) 180 | logging.debug("Requirements:") 181 | for req in requirements: 182 | logging.debug("\t%s" % req) 183 | logging.debug("Dependency links:") 184 | for dl in dependency_links: 185 | logging.debug("\t%s" % dl) 186 | 187 | setuptools.setup( 188 | name = package_name, 189 | version = 'dev', 190 | packages = packages, 191 | scripts = scripts, 192 | 193 | package_data = package_data, 194 | include_package_data = True, 195 | 196 | install_requires = requirements, 197 | dependency_links = dependency_links 198 | ) 199 | 200 | -------------------------------------------------------------------------------- /shovel/lfw.py: -------------------------------------------------------------------------------- 1 | """ 2 | Driver scripts for LFW experiments 3 | 4 | """ 5 | 6 | import numpy as np 7 | from functools import partial 8 | 9 | from shovel import task 10 | 11 | import hyperopt 12 | from hyperopt import Trials 13 | from hyperopt.mongoexp import MongoTrials, MongoCtrl 14 | 15 | from hpconvnet.shovel_util import configure_logging 16 | import hpconvnet.lfw 17 | 18 | configure_logging('INFO') 19 | dbname = hpconvnet.lfw.dbname 20 | 21 | 22 | def make_trials(host, port, exp_key, refresh=True): 23 | if (host, port) == (None, None): 24 | trials = Trials() 25 | else: 26 | trials = MongoTrials( 27 | 'mongo://%s:%d/%s/jobs' % (host, int(port), dbname), 28 | exp_key=exp_key, 29 | refresh=refresh) 30 | return trials 31 | 32 | 33 | search_space = partial( 34 | hpconvnet.lfw.build_search_space, 35 | max_n_features=16000, 36 | trn='DevTrain', # -- split used for unsupervised images 37 | n_unsup=300, # -- number of images from which to draw patches 38 | ) 39 | 40 | 41 | def slm_visitor_lfw_partial( 42 | max_n_per_class, 43 | maybe_test_view2=False, # -- this still takes too much memory 44 | assume_promising=False, 45 | foobar_trace_target=None, 46 | ): 47 | # -- this curries and re-decorates hpconvnet.lfw.slm_visitor_lfw 48 | # so that we can pass it to fmin() 49 | if max_n_per_class is not None: 50 | max_n_per_class = int(max_n_per_class) 51 | return hyperopt.partial( 52 | hpconvnet.lfw.slm_visitor_lfw, 53 | max_n_per_class=max_n_per_class, 54 | maybe_test_view2=maybe_test_view2, 55 | assume_promising=assume_promising, 56 | foobar_trace_target=foobar_trace_target, 57 | ) 58 | 59 | 60 | @task 61 | def random_driver(host=None, port=None, max_evals=10000, max_n_per_class=None): 62 | hyperopt.fmin( 63 | fn=slm_visitor_lfw_partial(max_n_per_class), 64 | space=search_space(bagging_fraction=1.0), 65 | algo=hyperopt.rand.suggest, 66 | max_evals=max_evals, 67 | trials=make_trials(host, port, exp_key='random')) 68 | 69 | 70 | @task 71 | def tpe_driver(host=None, port=None, max_evals=10000, max_n_per_class=None): 72 | tpe_suggest=partial( 73 | hyperopt.tpe.suggest, 74 | n_startup_jobs=50, # -- number of random jobs before optimization 75 | ) 76 | hyperopt.fmin( 77 | fn=slm_visitor_lfw_partial(max_n_per_class), 78 | space=search_space(bagging_fraction=1.0), 79 | algo=tpe_suggest, 80 | max_evals=max_evals, 81 | trials=make_trials(host, port, exp_key='random')) 82 | 83 | 84 | @task 85 | def view2(host, port, exp_key, 86 | bagging_fraction, 87 | max_n_per_class=None, 88 | maybe_test_view2=True, 89 | assume_promising=True, 90 | tid=None, 91 | fake=False, 92 | ): 93 | fake = int(fake) 94 | real_trials = make_trials(host, port, exp_key) 95 | print 'n. real trials', len(real_trials) 96 | if tid is None: 97 | best_trial = real_trials.best_trial 98 | else: 99 | try: 100 | best_trial = [t for t in real_trials.trials if t['tid'] == int(tid)][0] 101 | except IndexError: 102 | print [t['tid'] for t in real_trials.trials] 103 | print 'Best trial' 104 | print ' ["tid"]', best_trial['tid'] 105 | best_result = best_trial['result'] 106 | print 'Best trial had loss', best_result['loss'] 107 | best_trace = best_result.get('trace') 108 | if 0: 109 | print ' ["Result trace"]', best_trace 110 | fake_trials = hyperopt.Trials() 111 | fn = slm_visitor_lfw_partial( 112 | max_n_per_class, 113 | maybe_test_view2=int(maybe_test_view2), 114 | assume_promising=int(assume_promising), 115 | foobar_trace_target=list(best_trace), 116 | ) 117 | #space = search_space 118 | space = hpconvnet.lfw.build_search_space( 119 | max_n_features=16000, 120 | trn='DevTrain', # -- split used for unsupervised images 121 | n_unsup=300, # -- number of images from which to draw patches 122 | batched_lmap_speed_thresh={'seconds': 60, 'elements': 1}, 123 | bagging_fraction=float(bagging_fraction), 124 | ) 125 | domain = hyperopt.Domain(fn, space, rseed=123) 126 | domain.rng = None # -- this rng is never to be used 127 | if fake: 128 | ctrl = hyperopt.Ctrl(trials=fake_trials, current_trial=None) 129 | print 'WARNING: running on fake ctrl object' 130 | else: 131 | ctrl = MongoCtrl( 132 | trials=real_trials, 133 | current_trial=best_trial, 134 | read_only=False) 135 | 136 | #real_trials.handle.update(best_result, msg) 137 | #ctrl.checkpoint(dict(best_trial['result'], foodebug='yes')) 138 | config = hyperopt.base.spec_from_misc(best_trial['misc']) 139 | #print 'Config', config 140 | r_dct = domain.evaluate(config, ctrl, attach_attachments=(not fake)) 141 | print 'r_dct' 142 | print r_dct 143 | if fake: 144 | print 'WARNING: running on fake ctrl object, not saving result' 145 | attachments = r_dct.pop('attachments', {}) 146 | print 'Attachments:', attachments.keys() 147 | print ' ["Best Result trace"]' 148 | def print_trace(r): 149 | trace = r['trace'] 150 | for t in trace: 151 | print ' ', t 152 | print_trace(best_result) 153 | print ' ["result trace"]' 154 | print_trace(r_dct) 155 | else: 156 | # -- the loss should have been re-computed identically 157 | r_dct['view2_recalculated_loss'] = r_dct['loss'] 158 | r_dct['loss'] = best_result['loss'] 159 | if (r_dct['view2_recalculated_loss'] > best_result['loss']): 160 | print 'WARNING: recalculated loss was worst than loss during search' 161 | print ' -> original loss', best_result['loss'] 162 | print ' -> recalculated loss', r_dct['view2_recalculated_loss'] 163 | 164 | print 'Checkpointing back to db' 165 | # -- N.B. attachments should have been saved by Domain.evaluate, 166 | # since we called it with attach_attachments=True. So they should 167 | # not be here anymore. 168 | assert 'attachments' not in r_dct 169 | 170 | ctrl.checkpoint(r_dct) 171 | 172 | @task 173 | def list_view2_scores(host, port, key): 174 | """ 175 | List any view2 scores that have been calculated within an experiment 176 | (exp_key) 177 | """ 178 | import pymongo as pm 179 | conn = pm.Connection(host=host, port=int(port)) 180 | query = {'exp_key': key} 181 | 182 | for doc in conn[dbname]['jobs'].find(query, fields=['tid', 'result', 'state']): 183 | r = doc['result'].get('loss_image_match_indexed') 184 | if r: 185 | assert len(r) == 1 186 | for member in r: 187 | fold_errs = [] 188 | for view_train_k in r[member]: 189 | if 'test' in view_train_k: 190 | fold_errs.append(r[member][view_train_k]['error_rate']) 191 | print list(enumerate(fold_errs)) 192 | print key, doc['tid'], 193 | print 'view1', doc['result']['loss'], 194 | print 'view2', np.mean(fold_errs), 195 | print doc['state'], doc['result']['status'] 196 | 197 | -------------------------------------------------------------------------------- /hpconvnet/lfw.py: -------------------------------------------------------------------------------- 1 | """ 2 | lfw.py - entry point for experiments on the LFW data set. 3 | 4 | 5 | """ 6 | import logging 7 | import os 8 | 9 | import numpy as np 10 | 11 | import hyperopt 12 | from hyperopt.base import use_obj_for_literal_in_memo 13 | from hyperopt import STATUS_OK 14 | from hyperopt import pyll 15 | 16 | from skdata import lfw 17 | from skdata import larray 18 | 19 | from .pyll_slm import view2_worth_calculating 20 | from .utils import git_versions 21 | 22 | from .slm import call_catching_pipeline_errors 23 | from .slm import USLM_Exception 24 | from .slm import uslm_domain 25 | from .slm_visitor_esvc import ESVC_SLM_Visitor 26 | import slm_visitor_esvc 27 | 28 | import foobar 29 | 30 | warn = logging.getLogger(__name__).warn 31 | info = logging.getLogger(__name__).info 32 | 33 | # XXX: this is related to a hack for cacheing features to disk 34 | # see e.g. shovel/lfw.py, slm_visitor_esvc.py 35 | dbname = 'lfw_db' 36 | 37 | 38 | class DataViewPlaceHolder(object): 39 | pass 40 | 41 | pyll_data_view = pyll.as_apply(DataViewPlaceHolder) 42 | 43 | 44 | @pyll.scope.define 45 | def unsup_images(data_view, trn, N): 46 | """ 47 | Return a block of 48 | """ 49 | if trn == 'DevTrain': 50 | # -- extract training images, and put them into channel-major format 51 | imgs = larray.reindex(data_view.image_pixels, 52 | data_view.dev_train['lpathidx'][0, :N])[:] 53 | imgs = np.asarray(imgs) 54 | assert 'int' in str(imgs.dtype) 55 | foobar.append_ndarray_signature(imgs, 'unsup_images') 56 | foobar.append_trace('unsup_images N', N) 57 | return imgs.transpose(0, 3, 1, 2).copy() 58 | else: 59 | raise NotImplementedError() 60 | 61 | 62 | def build_search_space(max_n_features, trn, n_unsup, 63 | bagging_fraction, 64 | batched_lmap_speed_thresh=None, 65 | batchsize=2, 66 | output_sizes=(32, 64, 128, 200), 67 | permit_affine_warp=True, 68 | ): 69 | image_shape = (250, 250, 1) # -- we're using lfw.Aligned below 70 | 71 | # max_n_features should be 16000 for full run 72 | # trn should be a string recognized by unsup_images() 73 | # n_unsup should be 300 for full run 74 | if batched_lmap_speed_thresh is None: 75 | batched_lmap_speed_thresh = {'seconds': 2.0, 'elements': 8} 76 | 77 | Xcm = pyll.scope.unsup_images(pyll_data_view, trn, n_unsup) 78 | search_space = { 79 | 'data_view': pyll_data_view, 80 | 'pipeline': uslm_domain( 81 | Xcm=Xcm, 82 | chmjr_image_shape=( 83 | image_shape[2], image_shape[0], image_shape[1]), 84 | output_sizes=list(output_sizes), # -- is list required? 85 | batchsize=batchsize, 86 | max_n_features=max_n_features, 87 | batched_lmap_speed_thresh=batched_lmap_speed_thresh, 88 | permit_affine_warp=permit_affine_warp, 89 | ), 90 | 'batchsize': batchsize, 91 | 'max_n_features': max_n_features, 92 | 'ctrl': hyperopt.Bandit.pyll_ctrl, 93 | 'batched_lmap_speed_thresh': batched_lmap_speed_thresh, 94 | 'bagging_fraction': bagging_fraction, 95 | } 96 | return search_space 97 | 98 | 99 | @hyperopt.fmin_pass_expr_memo_ctrl 100 | def slm_visitor_lfw(expr, memo, ctrl, 101 | maybe_test_view2=True, 102 | max_n_per_class=None, 103 | comparison_names=('mult', 'absdiff', 'sqrtabsdiff', 'sqdiff'), 104 | assume_promising=False, 105 | foobar_trace=True, 106 | foobar_trace_target=None, 107 | ): 108 | # -- possibly enable computation tracing 109 | foobar.reset_trace() 110 | foobar.trace_enabled = foobar_trace 111 | if foobar_trace_target: 112 | foobar.trace_verify = True 113 | foobar.set_trace_target(foobar_trace_target) 114 | slm_visitor_esvc._curdb = dbname # XXX tids are only unique within db 115 | 116 | versions = git_versions() 117 | info('GIT VERSIONS: %s' % str(versions)) 118 | 119 | data_view = lfw.view.Aligned( 120 | x_dtype='uint8', 121 | max_n_per_class=max_n_per_class, 122 | ) 123 | 124 | use_obj_for_literal_in_memo(expr, data_view, DataViewPlaceHolder, memo) 125 | 126 | def loss_fn(s, rdct, bagging_fraction): 127 | """ 128 | bagging_fraction - float 129 | If the function measures the loss within the ensemble (loss) 130 | as well as the loss without the ensemble (loss_last_member) then 131 | this value interpolates between boosting (0.0) and bagging (1.0). 132 | 133 | """ 134 | # -- this is the criterion we minimize during model search 135 | norm_key = s.norm_key('devTrain') 136 | task_name = 'devTrain' 137 | dct = s._results['train_image_match_indexed'][norm_key][task_name] 138 | loss = (bagging_fraction * dct['valid_error_no_ensemble'] 139 | + (1 - bagging_fraction) * dct['valid_error']) 140 | rdct['loss'] = loss 141 | rdct['status'] = STATUS_OK 142 | 143 | def foo(): 144 | argdict = pyll.rec_eval(expr, memo=memo, print_node_on_error=False) 145 | visitor = ESVC_SLM_Visitor(pipeline=argdict['pipeline'], 146 | ctrl=argdict['ctrl'], 147 | data_view=argdict['data_view'], 148 | max_n_features=argdict['max_n_features'], 149 | memmap_name='%s_%i' % (__name__, os.getpid()), 150 | svm_crossvalid_max_evals=50, 151 | optimize_l2_reg=True, 152 | batched_lmap_speed_thresh=argdict[ 153 | 'batched_lmap_speed_thresh'], 154 | comparison_names=comparison_names, 155 | batchsize=argdict['batchsize'], 156 | ) 157 | # -- drive the visitor according to the protocol of the data set 158 | protocol_iter = argdict['data_view'].protocol_iter(visitor) 159 | msg, model = protocol_iter.next() 160 | assert msg == 'model validation complete' 161 | 162 | # -- save the loss, but don't save attachments yet. 163 | rdict = visitor.hyperopt_rval(save_grams=False) 164 | rdict['in_progress'] = True 165 | loss_fn(visitor, rdict, argdict['bagging_fraction']) 166 | argdict['ctrl'].checkpoint(rdict) 167 | 168 | if assume_promising: 169 | promising = True 170 | else: 171 | promising = view2_worth_calculating( 172 | loss=rdict['loss'], 173 | ctrl=argdict['ctrl'], 174 | thresh_loss=1.0, 175 | thresh_rank=1) 176 | 177 | 178 | info('Promising: %s' % promising) 179 | 180 | if maybe_test_view2: 181 | if promising: 182 | info('Disabling trace verification for view2') 183 | foobar.trace_verify = False 184 | msg = protocol_iter.next() 185 | assert msg == 'model testing complete' 186 | else: 187 | warn('Not testing unpromising model %s' % str(model)) 188 | else: 189 | warn('Skipping view2 stuff for model %s' % str(model)) 190 | rdict = visitor.hyperopt_rval(save_grams=promising) 191 | loss_fn(visitor, rdict, argdict['bagging_fraction']) 192 | return visitor, rdict 193 | 194 | try: 195 | visitor, rdict = call_catching_pipeline_errors(foo) 196 | except USLM_Exception, e: 197 | exc, rdict = e.args 198 | print ('job failed: %s: %s' % (type(e), exc)) 199 | rdict['git_versions'] = versions 200 | return dict(rdict, in_progres=False) 201 | 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Hyperparameter Optimization for Convolutional Vision Architectures 2 | ================================================================== 3 | 4 | This package provides a [Theano](http://www.deeplearning.net/software/theano)-based implementation of convolutional networks 5 | as described in (Bergstra, Yamins, and Cox, 2013), which exposes many 6 | architectural hyperparameters for optimization by 7 | [hyperopt](http://jaberg.github.com/hyperopt). 8 | 9 | # Installation 10 | 11 | 1. Requirements: 12 | 13 | 14 | * A Python/Numpy/Scipy stack. 15 | The Python package requirements are listed in 16 | requirements.txt. 17 | 18 | * Optional (but strongly recommended) is an NVIDIA GPU device at least as 19 | fast as, say, a GTX280, and CUDA. See Theano's documentation for setting 20 | up Theano to use a GPU device. 21 | 22 | * Optional (but strongly recommended) is the MongoDB database software, 23 | which allows hyperopt to support parallel optimization. 24 | 25 | 2. Check out this project 26 | 27 | `git clone https://github.com/jaberg/hyperopt-convnet.git`. 28 | 29 | 3. Install it as a Python package. This installation makes the code files 30 | importable, which is required when running asynchronous hyperparameter 31 | optimization (i.e. with hyperopt-mongo-worker, as explained below). 32 | 33 | `python setup.py install` 34 | 35 | Consider installing this within your user account (`--user`) or within a 36 | virtualenv to avoid installing this package system-wide, and to avoid 37 | needing root privileges. 38 | 39 | Installing hyperopt-convnet will install a pile of Python packages, 40 | which are listed in requirements.txt. 41 | On my computer, I had to explicitly install a few packages, because 42 | whatever the setup.py script was doing wasn't working (I still don't 43 | understand python packaging...): 44 | * `pip install numpy`, 45 | * `pip install scipy`, 46 | * `pip install matplotlib` 47 | 48 | 4. Replace sklearn < 0.13 with git version (we need some new stuff in SVC). 49 | 50 | 51 | # Testing 52 | 53 | If installation goes well, then you will now be able to import the `hpconvnet` 54 | module. The easiest way to test your installation is 55 | 56 | 57 | ```bash 58 | THEANO_FLAGS=device=gpu shovel lfw.random_driver --max_n_per_class=20 59 | ``` 60 | 61 | This command should not crash, it should (i) download LFW if necessary and 62 | then (ii) loop indefinitely doing random search on a tiny subset of the LFW 63 | training data. 64 | 65 | 66 | 67 | # Running An Experiment in Parallel with MongoDB 68 | 69 | Running hyperparameter optimization on large convolutional networks for data 70 | sets such as [LFW](http://vis-www.cs.umass.edu/lfw/) 71 | and [CIFAR10](http://www.cs.toronto.edu/~kriz/cifar.html) takes a significant amount of time: 72 | expect a search of a few hundred points to take about a GPU-week. 73 | This cannot be completely parallelized (Bayesian optimization works on the 74 | basis of feedback about the fitness landscape after all), but in my experience 75 | it can easily be parallelized 5-fold to 10-fold. 76 | So if you have access to a small cluster you can see significant progress in 77 | an hour or two, and be done in a day. 78 | 79 | What follows here is a sketch of the unix commands you would need to do to 80 | make this happen. 81 | To get more of a sense about what's going on, read through 82 | [hyperopt documentation on using 83 | mongo](https://github.com/jaberg/hyperopt/wiki/Parallelizing-search). 84 | 85 | 86 | 1. Set up a mongodb process for inter-process communication. 87 | 88 | ` 89 | mongod --dbpath . --port PORT --directoryperdb --fork --journal --logpath log.log --nohttpinterface 90 | ` 91 | 92 | If this machine is visible to the internet, you should either bind mongod 93 | to the local loopback address and connect to the database via an ssh 94 | tunnel, or set up mongodb for password-protected access. 95 | 96 | 2. Start an asynchronous search process, that connects to the mongodb and 97 | polls a work queue created there. 98 | 99 | ` 100 | shovel cifar10.tpe_driver localhost PORT 0.0 101 | ` 102 | 103 | 3. Start one or more generic hyperopt worker processes to crank through the 104 | trials of the experiment, pointing at the database that's written into the 105 | shovel script, in this case: 106 | 107 | ` 108 | ssh WORKNODE hyperopt-mongo-worker --mongo=localhost:PORT/DBNAME 109 | ` 110 | 111 | The PORT should match the one used to launch mongodb. 112 | The DBNAME should match the one used in shovel/cifar10.py:make_trials, 113 | which is "dbname" by default. 114 | 115 | If you have a cluster with a queue system (e.g. Torque, PBS, etc.) then use 116 | that system to schedule a few hyperopt-mongo-worker processes. When they 117 | start, they will connect to the database and reserve an experiment trial. 118 | These processes will loop indefinitely by default, dequeueing/reserving trials 119 | and storing the results back to the database. They will stop when the 120 | search process no longer adds new trials to the database, or when several 121 | (4) consecutive trials fail to complete successfully (i.e. your trial 122 | evaluation code is faulty and you should either fix it or at least catch the 123 | terminating exceptions). 124 | 125 | # Rough Guide to the Code 126 | 127 | * `shovel/{cifar10,lfw,mnist}.py` driver code for various data sets. 128 | When you type `shovel lfw.foo` in bash, it will try to run the `foo` task in 129 | the lfw.py file. 130 | 131 | * `hpconvnet/lfw.py` describes the search space and the objective function 132 | that hyperopt.fmin requires to optimize LFW's view 1 data set. 133 | 134 | * `hpconvnet/cifar10.py` describes the search space and the objective function 135 | that hyperopt.fmin requires to optimize CIFAR10 validation performance. 136 | 137 | * `hpconvnet/slm_visitor_esvc.py` provides a LearningAlgo (skdata-style) derived 138 | from `SLM_Visitor` that does classification based on sklearn's SVC binary 139 | SVM and a precomputed kernel. This is generally a good choice for data sets 140 | without too many examples. The LFW experiments use this class. 141 | 142 | * `hpconvnet/slm_visitor_primal.py` has a LearningAlgo (skdata-style) derived 143 | from `SLM_Visitor` that does classification based on a primal SVM solver. 144 | This is generally a good choice for data sets with larger numbers of 145 | examples. The MNIST and CIFAR10 experiments use this class. 146 | 147 | * `hpconvnet/slm_visitor.py` provides `SLM_Visitor`, 148 | a LearningAlgo (skdata-style) base class 149 | with image feature extraction code and several LearningAlgo interface 150 | methods. 151 | 152 | * `hpconvnet/slm.py` - creates the "pipeline" part of the search space, which 153 | describes the full set of possibilities for image feature extraction (the 154 | full set of convolutional architectures). The `uslm_domain` function 155 | returns this search space as a pyll graph. 156 | Note also the `call_catching_pipeline_errors` function, which includes 157 | `except` clauses for all known errors which may arise in the course of 158 | evaluating that pyll graph. 159 | 160 | * `hpconvnet/pyll_slm.py` - defines many custom pyll.scope functions which 161 | serve to describe the `uslm_domain` search space. 162 | 163 | The basic idea of the code is that the driver code (e.g. in shovel/lfw.py) 164 | defines a search space and an objective function for hyperopt. 165 | 166 | The search space is relatively complex, not only in terms of its size (238 167 | hyperparameters) but also in its internal logic: a "sample" from the search 168 | space is a dictionary that alongside some some simpler key-value pairs, 169 | contains a "pipeline" key whose value is itself a pyll graph (seriously, pyll 170 | has support for lambda expressions), 171 | which evaluates to a theano function, which can process images. 172 | 173 | The objective function is implemented by e.g. lfw.slm_visitor_lfw which 174 | allocates a LearningAlgo (an SLM_Visitor_ESVC instance called `visitor`) 175 | to handle most of the work. 176 | The lfw.slm_visitor_lfw routine passes a LearningAlgo 177 | to the LFW data set's "protocol" function, which 178 | walks the LearningAlgo through the various steps of an LFW experiment: showing 179 | it the right data at the right time, asking it to compute various statistics, 180 | and so on. 181 | When that's all done, lfw.slm_visitor_lfw asks the LearningAlgo to make 182 | a report (`visitor.hyperopt_rval()`) in the form of a dictionary. 183 | That dictionary is augmented with what hyperopt needs to see (loss and status 184 | keys) and passed back to hyperopt. 185 | 186 | 187 | There are other files too in the hpconvnet folder, but these ones summarize 188 | the logic and control flow. 189 | 190 | 191 | # References 192 | 193 | * J. Bergstra, D. Yamins, D. D. Cox (2013). 194 | [Making a Science of Model Search: Hyperparameter Optimization in Hundreds of Dimensions for Vision Architectures](http://jmlr.csail.mit.edu/proceedings/papers/v28/bergstra13.pdf), 195 | in Proc. ICML2013. -- This paper describes the convolutional architectures 196 | implemented in this software package, and the results you should expect from 197 | hyperparameter optimization. 198 | 199 | * J. Bergstra, R. Bardenet, Y. Bengio, B. Kegl (2011). 200 | [Algorithms for Hyper-parameter Optimization](http://books.nips.cc/papers/files/nips24/NIPS2011_1385.pdf) 201 | In Proc. NIPS2011. -- This paper introduces the TPE hyperparameter optimization algorithm. 202 | -------------------------------------------------------------------------------- /hpconvnet/isvm_binary.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file supports the incremental construction of an SVM classifier 3 | by partially-corrective boosting on the hinge loss. 4 | 5 | 6 | Each incremental solver minimizes 7 | 8 | hinge( dot(X, w) + b + alpha * prev_Wx) 9 | + lambda * (|w|^2 + |alpha * prev_W|^2) 10 | 11 | 12 | Each solver is designed to be run on a subset of all available features. 13 | 14 | """ 15 | import copy 16 | 17 | import numpy as np 18 | import autodiff 19 | import theano # abstraction leak to pass mode for optimization 20 | 21 | from isvm_multi import hinge2 22 | from isvm_multi import _default_bfgs_kwargs 23 | from isvm_multi import IncrementalMultiSVM 24 | 25 | def binary_svm_hinge_loss(x, y, weights, bias, alpha, pxw, pw_l2_sqr, 26 | l2_regularization): 27 | """ 28 | x: (n_examples, n_features) feature matrix 29 | y: (n_examples,) label vector 30 | weights: (n_features,) new weights 31 | alpha: (n_prev,) multiplier on each vector of previous weights 32 | pw_l2_sqr: (n_prev,) squared l2-norm of existing weights 33 | pxw: (n_examples, n_prev) inner products of previous weights with `x` 34 | l2_regularization: multiplier on sum of all squared weights 35 | """ 36 | n_prev, = alpha.shape 37 | 38 | xw = np.dot(x, weights) 39 | if n_prev: 40 | if np.any(pw_l2_sqr < 0): 41 | raise ValueError('prev_w_l2_sqr may not be negative') 42 | prev_l2_sqr = (pw_l2_sqr * (alpha ** 2)).sum() 43 | xw += np.dot(pxw, alpha) 44 | else: 45 | prev_l2_sqr = 0.0 46 | 47 | margin = y * (xw + bias) 48 | losses = hinge2(margin) 49 | 50 | cur_l2_sqr = (weights * weights).sum() 51 | l2_reg = 0.5 * l2_regularization * (cur_l2_sqr + prev_l2_sqr) 52 | cost = losses.sum() + l2_reg 53 | return cost 54 | 55 | 56 | class IncrementalSVM(object): 57 | """ 58 | On each iteration of the incremental construction this class fits a new 59 | weight vector w to the features x, while adjusting the norm of the 60 | previously-fit weight vectors to balance the current model against the old 61 | ones. 62 | 63 | See test_hingeboost.py for an example of incremental SVM construction. 64 | 65 | """ 66 | def __init__(self, n_features, 67 | prev_w_l2_sqr=None, 68 | l2_regularization=1e-4, 69 | dtype='float64', 70 | scalar_bounds=(-1e3, 1e3), 71 | bfgs_kwargs=None, 72 | alpha=None, 73 | ): 74 | """ 75 | prev_w_l2_sqr: the (un-squared) l2-norm of each column of the existing weight vector 76 | """ 77 | self.n_features = n_features 78 | if prev_w_l2_sqr is None: 79 | self.prev_w_l2_sqr = np.empty((0,), dtype=dtype) 80 | else: 81 | self.prev_w_l2_sqr = np.asarray(prev_w_l2_sqr).astype(dtype) 82 | (self.n_prev,) = self.prev_w_l2_sqr.shape 83 | self.l2_regularization = l2_regularization 84 | self.dtype = dtype 85 | self.scalar_bounds = scalar_bounds 86 | if bfgs_kwargs is None: 87 | self.bfgs_kwargs = copy.deepcopy(_default_bfgs_kwargs) 88 | else: 89 | self.bfgs_kwargs = bfgs_kwargs 90 | 91 | self.weights = np.zeros((n_features,), dtype=dtype) 92 | self.bias = np.zeros((), dtype=dtype) 93 | if alpha is None: 94 | self.alpha = np.ones_like(self.prev_w_l2_sqr) 95 | else: 96 | self.alpha = np.array(alpha).astype(dtype) 97 | if self.alpha.shape != self.prev_w_l2_sqr.shape: 98 | raise ValueError('shape mismatch between alpha and prev_w_l2_sqr', 99 | self.alpha.shape, self.prev_w_l2_sqr.shape) 100 | 101 | @property 102 | def cumulative_alpha(self): 103 | rval = list(self.alpha) 104 | rval.append(1.0) 105 | return np.asarray(rval, dtype=self.dtype) 106 | 107 | @property 108 | def cumulative_w_l2_sqr(self): 109 | rval = list(self.prev_w_l2_sqr) 110 | rval.append(self.w_l2_sqr) 111 | return np.asarray(rval, dtype=self.dtype) 112 | 113 | @property 114 | def w_l2_sqr(self): 115 | return (self.weights * self.weights).sum() 116 | 117 | def continuation(self, n_features=None): 118 | if n_features is None: 119 | n_features = self.n_features 120 | rval = self.__class__( 121 | n_features=n_features, 122 | prev_w_l2_sqr=self.cumulative_w_l2_sqr, 123 | alpha=self.cumulative_alpha, 124 | l2_regularization=self.l2_regularization, 125 | dtype=self.dtype, 126 | scalar_bounds=self.scalar_bounds, 127 | bfgs_kwargs=self.bfgs_kwargs 128 | ) 129 | rval.bias = self.bias.copy() 130 | return rval 131 | 132 | def xw_carry_forward(self, x, pxw=None): 133 | """stack the current dot(x, weights) onto previous stack `pxw` 134 | """ 135 | pxw = self.as_xw(x, pxw) 136 | rval = np.hstack((pxw, np.dot(x, self.weights)[:, None])) 137 | print rval.shape 138 | return rval 139 | 140 | def decision_function(self, x, xw=None): 141 | xw = self.as_xw(x, xw) 142 | return np.dot(x, self.weights) + np.dot(xw, self.alpha) + self.bias 143 | 144 | def predict(self, x, xw=None): 145 | xw = self.as_xw(x, xw) 146 | return (self.decision_function(x, xw) > 0) * 2 - 1 147 | 148 | def loss(self, x, y, xw=None): 149 | # y_ind is all +-1, with 1 meaning a positive label for OvA classif 150 | assert set(y) <= set([-1, 1]) 151 | xw = self.as_xw(x, xw) 152 | 153 | return binary_svm_hinge_loss(x, y, 154 | self.weights, self.bias, self.alpha, 155 | xw, 156 | self.prev_w_l2_sqr, 157 | self.l2_regularization, 158 | ) 159 | 160 | def as_xw(self, x, xw): 161 | if xw is None: 162 | if self.n_prev == 0: 163 | return np.zeros((len(x), self.n_prev), dtype=x.dtype) 164 | else: 165 | raise TypeError('xw is required for previous models') 166 | else: 167 | xw = np.asarray(xw, dtype=self.dtype) 168 | if xw.shape != (len(x), self.n_prev): 169 | raise ValueError('xw has wrong shape', 170 | xw.shape, (len(x), self.n_prev)) 171 | return xw 172 | 173 | def fit(self, x, y, xw=None): 174 | """ 175 | x - n_examples x n_features design matrix. 176 | y - vector of integer labels 177 | xw - matrix of real-valued incoming biases obtained 178 | by multiplying the existing weight vectors by x 179 | """ 180 | assert set(y) <= set([-1, 1]) 181 | 182 | if x.shape[0] != y.shape[0]: 183 | raise ValueError('length mismatch between x and y') 184 | n_examples, n_features = x.shape 185 | if n_features != self.n_features: 186 | raise ValueError('n_feature mismatch', (n_features, 187 | self.n_features)) 188 | 189 | weights = self.weights 190 | bias = self.bias 191 | alpha = self.alpha 192 | 193 | x = x.astype(self.dtype) 194 | y = y.astype(self.dtype) 195 | 196 | xw = self.as_xw(x, xw) 197 | print 'WARNING: IncrementalSVM should use alpha0, n_sgd_iters' 198 | 199 | # -- warm up with some sgd 200 | weights, bias, alpha, = autodiff.fmin_sgd( 201 | lambda w, b, a, xi, yi, xwi: 202 | binary_svm_hinge_loss(xi, yi, w, b, a, None, 203 | None, 204 | self.l2_regularization), 205 | (weights, bias, alpha), 206 | streams={ 207 | 'xi': x.reshape((n_examples, 1, x.shape[1])), 208 | 'yi': y.reshape((n_examples, 1)), 209 | }, 210 | stepsize=0.01, 211 | loops=max(1, 100000 // len(x)), 212 | ) 213 | 214 | # -- fine-tune without alpha by L-BFGS 215 | weights, bias, alpha, = autodiff.fmin_l_bfgs_b( 216 | lambda w, b, a: 217 | binary_svm_hinge_loss(x, y, 218 | w, b, a, None, None, 219 | self.l2_regularization), 220 | (weights, bias, alpha), 221 | # -- the graph is tiny, time spent optimizing it is wasted. 222 | theano_mode=theano.Mode(linker='cvm', optimizer='fast_run'), 223 | **self.bfgs_kwargs) 224 | 225 | 226 | self.weights = weights 227 | self.bias = bias 228 | self.alpha = alpha 229 | 230 | 231 | class IncrementalSVM_MultiHack(object): 232 | 233 | def __init__(self, l2_regularization): 234 | self.l2_regularization = l2_regularization 235 | 236 | def fit(self, x, y, history): 237 | self._svm = IncrementalMultiSVM( 238 | dtype=x.dtype, 239 | n_features=x.shape[1], 240 | n_classes=2, 241 | l2_regularization=self.l2_regularization, 242 | n_sgd_iters=0, 243 | bfgs_kwargs={ 244 | 'maxfun': 1000, 245 | 'iprint': 0, 246 | 'm': 32, 247 | 'factr': 100}, 248 | ) 249 | self._svm.fit(x, (y + 1) / 2, history) 250 | 251 | def predict(self, x, history): 252 | return self._svm.predict(x, history) * 2 - 1 253 | -------------------------------------------------------------------------------- /hpconvnet/isvm_boosting.py: -------------------------------------------------------------------------------- 1 | """ 2 | Experiment classes 3 | """ 4 | 5 | import copy 6 | import logging 7 | 8 | import numpy as np 9 | 10 | import hyperopt 11 | from hyperopt.base import trials_from_docs 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | class BoostHelper(object): 16 | """ 17 | Several helper-methods related to boosting that work on the basis of 18 | a dictionary doc_by_tid rather than the original trials object. 19 | 20 | _continuing: map `tid` -> list of documents that continue `tid`. 21 | Forward arrows in the exploration of ensembles. 22 | 23 | """ 24 | 25 | @staticmethod 26 | def query_MongoTrials(mt, fields=(), query=None): 27 | """ 28 | Retrieve `docs` for the constructor without downloading the 29 | entire experiment history, as would be done by mt.refresh(). 30 | """ 31 | exp_key = mt._exp_key 32 | if query is None: 33 | query = {} 34 | else: 35 | query = copy.deepcopy(dict(query)) 36 | if exp_key != None: 37 | query.setdefault('exp_key', exp_key) 38 | query.setdefault('state', {'$ne': hyperopt.JOB_STATE_ERROR}) 39 | rval = mt.handle.jobs.find( 40 | query, 41 | fields=[ 42 | 'tid', 43 | 'result.status', 44 | 'result.loss', 45 | 'misc.boosting.continues', 46 | 'misc.boosting.variant', # -- optional 47 | '_attachments', 48 | ] + list(fields), 49 | snapshot=True) 50 | if rval is None: 51 | return [] 52 | else: 53 | return list(rval) 54 | 55 | def __init__(self, docs, doc_by_tid=None): 56 | if doc_by_tid is None: 57 | self.doc_by_tid = dict([(d['tid'], d) for d in docs]) 58 | # -- assert that every document has a unique tid 59 | assert len(self.doc_by_tid) == len(docs) 60 | else: 61 | self.doc_by_tid = doc_by_tid 62 | 63 | self._continuing = {} 64 | for d in docs: 65 | pre_tid = self.continues(d) 66 | if pre_tid is not None: 67 | pre_tid = pre_tid['tid'] 68 | self._continuing.setdefault(pre_tid, []).append(d) 69 | 70 | 71 | def ok_tids(self): 72 | return [d['tid'] for d in self.doc_by_tid.values() 73 | if d['result']['status'] == hyperopt.STATUS_OK] 74 | 75 | def best_doc(self): 76 | # -- it is no longer the case that losses can be compared 77 | # between generations, only between siblings is ok. 78 | raise NotImplementedError('since hacking bagging support') 79 | ok_tids = self.ok_tids() 80 | losses = [self.doc_by_tid[tid]['result']['loss'] for tid in ok_tids] 81 | assert None not in losses 82 | best_idx = np.argmin(losses) 83 | rval = self.doc_by_tid[ok_tids[best_idx]] 84 | return rval 85 | 86 | def continues(self, doc): 87 | """Returns the (older-than-doc) trial whose decisions `doc` built on. 88 | """ 89 | # -- hack to support `doc` that is a misc sub-doc 90 | doc = self.doc_by_tid[doc['tid']] 91 | if 'boosting' in doc['misc']: 92 | rval_tid = doc['misc']['boosting']['continues'] 93 | if rval_tid is None: 94 | return None 95 | else: 96 | return self.doc_by_tid[rval_tid] 97 | else: 98 | return None 99 | 100 | def continuing(self, doc): 101 | """Returns all docs whose decisions were built on `doc`. 102 | """ 103 | if doc is None: 104 | return self._continuing.get(None, []) 105 | else: 106 | return self._continuing.get(doc['tid'], []) 107 | 108 | def children(self, doc): 109 | return self.continuing(doc) 110 | 111 | def best_child(self, doc): 112 | losses = [(child['result']['loss'], child) 113 | for child in self.children(doc) 114 | if child['result']['status'] == hyperopt.STATUS_OK 115 | ] 116 | if losses: 117 | losses.sort() 118 | return losses[0][1] 119 | else: 120 | return None 121 | 122 | def ensemble_members(self): 123 | """Return all docs that are part of the best ensemble in order of 124 | addition to the ensemble. 125 | """ 126 | # function works by working backward through the 127 | # doc['misc']['boosting']['continues'] links 128 | reversed_members = [self.best_doc()] 129 | while self.continues(reversed_members[-1]) != None: 130 | reversed_members.append(self.continues(reversed_members[-1])) 131 | rval = list(reversed(reversed_members)) 132 | return rval 133 | 134 | def history(self, doc): 135 | """ 136 | Return a list of documents that continued previous ones, leading 137 | eventually to `doc`. `doc` itself is the last element. 138 | """ 139 | rval = [] 140 | try: 141 | doc_or_None = self.continues(doc) 142 | while doc_or_None is not None: 143 | rval.append(doc_or_None) 144 | doc_or_None = self.continues(doc_or_None) 145 | rval.reverse() 146 | rval.append(doc) 147 | return rval 148 | except KeyError, e: 149 | if 'boosting'in str(e): 150 | return [doc] 151 | raise 152 | 153 | 154 | def suggest( 155 | new_ids, domain, trials, sub_suggest, 156 | min_ok_per_round=1, 157 | min_valid_per_round=1, 158 | absolute_loss_thresh=1.0, 159 | relative_loss_thresh=None, 160 | ): 161 | """ 162 | 163 | Parameters 164 | ---------- 165 | 166 | min_ok_per_round - int 167 | A trial cannot be extended in the ensemble until it has this many 168 | siblings with status 'ok' and a loss <= absolute_loss_thresh. 169 | 170 | min_valid_per_round - int 171 | A trial cannot be extended in the ensemble until it has this many 172 | siblings whose job state is not ERROR. 173 | 174 | absolute_loss_thresh - float 175 | Jobs with loss greater than this are not counted as 'ok'. 176 | 177 | relative_loss_thresh - None or float 178 | A child cannot become a parent in the ensemble unless it improves on its 179 | parent with a loss <= relative_loss_thresh * parent_loss. 180 | 181 | This search algo works by injecting a ['misc']['boosting'] subdocument into 182 | every trial, with keys: 183 | * variant - identify the type of boosting at work 184 | * continues - the trial ID (tid) of the previously selected trial in the 185 | ensemble, or `None` for first-round trials 186 | 187 | In order for boosting to work properly, the 'loss' reported by trial must 188 | represent the CUMULATIVE ENSEMBLE LOSS if the ensemble were to be extended 189 | to include that particular trial. 190 | 191 | """ 192 | new_id, = new_ids 193 | 194 | valid_docs = [t for t in trials 195 | if t['state'] != hyperopt.JOB_STATE_ERROR] 196 | 197 | # -- ok_docs are those which are eligible to be a member of the 198 | # final ensemble. 199 | ok_docs = [t for t in valid_docs 200 | if t['result']['status'] == hyperopt.STATUS_OK 201 | and t['result']['loss'] <= absolute_loss_thresh] 202 | 203 | logger.info('n_ok: %i n_valid: %i' % (len(ok_docs), len(valid_docs))) 204 | 205 | valid_helper = BoostHelper(valid_docs) 206 | ok_helper = BoostHelper(ok_docs) 207 | 208 | cur_parent = None 209 | cur_parent_tid = None 210 | while True: 211 | n_ok_children = len(ok_helper.children(cur_parent)) 212 | n_valid_children = len(valid_helper.children(cur_parent)) 213 | logger.info('cur_parent: %s n_ok_children: %i n_valid_children: %i' 214 | % (None if cur_parent is None else cur_parent['tid'], 215 | n_ok_children, 216 | n_valid_children)) 217 | if n_ok_children < min_ok_per_round: 218 | break 219 | if n_valid_children < min_valid_per_round: 220 | break 221 | 222 | best_child = ok_helper.best_child(cur_parent) 223 | assert best_child is not None # -- because ok_helper has some elements 224 | 225 | if None not in (cur_parent, relative_loss_thresh): 226 | rel_thresh = cur_parent['result']['loss'] * relative_loss_thresh 227 | if best_child['result']['loss'] >= rel_thresh: 228 | break 229 | 230 | logger.info('best_child: %i' % best_child['tid']) 231 | cur_parent = best_child 232 | cur_parent_tid = best_child['tid'] 233 | del best_child 234 | 235 | cur_siblings = valid_helper.children(cur_parent) 236 | 237 | current_trials = trials_from_docs( 238 | cur_siblings, 239 | exp_key=trials._exp_key, 240 | # -- validate=False is much faster 241 | validate=False) 242 | 243 | new_trial_docs = sub_suggest([new_id], domain, current_trials) 244 | 245 | for trial in new_trial_docs: 246 | misc = trial['misc'] 247 | # -- boosting cannot be nested with current data structure 248 | assert 'boosting' not in misc 249 | # -- I think the following was a debugging sanity check 250 | assert trial['tid'] == new_id 251 | misc['boosting'] = { 252 | 'variant': { 253 | 'name': 'async_suggest', 254 | 'min_ok_per_round': min_ok_per_round, 255 | 'min_valid_per_round': min_valid_per_round, 256 | 'relative_loss_thresh': relative_loss_thresh, 257 | 'absolute_loss_thresh': absolute_loss_thresh, 258 | }, 259 | 'continues': cur_parent_tid} 260 | 261 | return new_trial_docs 262 | 263 | -------------------------------------------------------------------------------- /hpconvnet/utils.py: -------------------------------------------------------------------------------- 1 | import cPickle 2 | import functools 3 | import logging 4 | import sys 5 | 6 | import numpy as np 7 | from PIL import Image 8 | 9 | 10 | class ImgLoaderResizer(object): 11 | """ Load 250x250 greyscale images, return normalized 200x200 float32 ones. 12 | """ 13 | def __init__(self, inshape, 14 | shape=None, ndim=None, dtype='float32', normalize=True, 15 | crop=None): 16 | # DELETE ME 17 | assert 0 18 | self.inshape = inshape 19 | assert len(shape) == 3 20 | assert shape[0] == 1 21 | shape = tuple(shape) 22 | assert len(crop) == 4 23 | crop = tuple(crop) 24 | l, t, r, b = crop 25 | assert 0 <= l < r <= self.inshape[0] 26 | assert 0 <= t < b <= self.inshape[1] 27 | self._crop = crop 28 | assert dtype == 'float32' 29 | self._shape = shape 30 | if ndim is None: 31 | self._ndim = None if (shape is None) else len(shape) 32 | else: 33 | self._ndim = ndim 34 | self._dtype = dtype 35 | self.normalize = normalize 36 | 37 | def rval_getattr(self, attr, objs): 38 | if attr == 'shape' and self._shape is not None: 39 | return self._shape 40 | if attr == 'ndim' and self._ndim is not None: 41 | return self._ndim 42 | if attr == 'dtype': 43 | return self._dtype 44 | raise AttributeError(attr) 45 | 46 | def __call__(self, file_path): 47 | im = Image.open(file_path) 48 | if im.mode != 'L': 49 | im = im.convert('L') 50 | assert im.size == self.inshape 51 | if self._crop != (0, 0,) + self.inshape: 52 | im = im.crop(self._crop) 53 | l, t, r, b = self._crop 54 | assert im.size == (r - l, b - t) 55 | if max(im.size) != self._shape[1]: 56 | m = self._shape[1]/float(max(im.size)) 57 | new_shape = (int(round(im.size[0]*m)), int(round(im.size[1]*m))) 58 | im = im.resize(new_shape, Image.ANTIALIAS) 59 | imval = np.asarray(im, dtype=self._dtype) 60 | rval = np.zeros(self._shape, dtype=self._dtype) 61 | ctr = self._shape[1]/2 62 | cxmin = ctr - imval.shape[0] / 2 63 | cxmax = ctr - imval.shape[0] / 2 + imval.shape[0] 64 | cymin = ctr - imval.shape[1] / 2 65 | cymax = ctr - imval.shape[1] / 2 + imval.shape[1] 66 | rval[cxmin:cxmax,cymin:cymax] = imval 67 | if self.normalize: 68 | rval -= rval.mean() 69 | rval /= max(rval.std(), 1e-3) 70 | else: 71 | rval /= 255.0 72 | assert rval.dtype == self._dtype, (rval.dtype, self._dtype) 73 | assert rval.shape == self._shape, (rval.shape, self._shape) 74 | return rval 75 | 76 | 77 | import theano 78 | # -- Define theano versions of dot because my numpy installation is screwed up 79 | # and does not use a good blas. 80 | _theano_fA = theano.tensor.fmatrix() 81 | _theano_fB = theano.tensor.fmatrix() 82 | dot_f32 = theano.function( 83 | [_theano_fA, _theano_fB], 84 | theano.tensor.dot(_theano_fA, _theano_fB), 85 | allow_input_downcast=True, 86 | mode=theano.compile.Mode(linker='vm', optimizer='fast_run').excluding( 87 | 'gpu')) 88 | 89 | _theano_dA = theano.tensor.dmatrix() 90 | _theano_dB = theano.tensor.dmatrix() 91 | dot_f64 = theano.function( 92 | [_theano_dA, _theano_dB], 93 | theano.tensor.dot(_theano_dA, _theano_dB), 94 | allow_input_downcast=True, 95 | mode=theano.compile.Mode(linker='vm', optimizer='fast_run').excluding( 96 | 'gpu')) 97 | 98 | 99 | DOT_MAX_NDIMS = 256 100 | MEAN_MAX_NPOINTS = 2000 101 | STD_MAX_NPOINTS = 2000 102 | 103 | if 1: 104 | def dot(A, B): 105 | _dot = dict(float32=dot_f32, float64=dot_f64)[str(A.dtype)] 106 | return _dot(A, B) 107 | dot.__theano_op__ = theano.tensor.dot # -- used by autodiff 108 | else: 109 | dot = np.dot 110 | 111 | 112 | def chunked_linear_kernel(Xs, Ys, symmetric): 113 | """Compute a linear kernel in blocks so that it can use a GPU with limited 114 | memory. 115 | 116 | Xs is a list of feature matrices 117 | Ys ia list of feature matrices 118 | 119 | This function computes the kernel matrix with 120 | \sum_i len(Xs[i]) rows 121 | \sum_j len(Ys[j]) cols 122 | """ 123 | 124 | dtype = Xs[0].dtype 125 | 126 | def _dot(A, B): 127 | if K < DOT_MAX_NDIMS: 128 | return dot(A, B) 129 | else: 130 | out = dot(A[:,:DOT_MAX_NDIMS], B[:DOT_MAX_NDIMS]) 131 | ndims_done = DOT_MAX_NDIMS 132 | while ndims_done < K: 133 | out += dot( 134 | A[:,ndims_done : ndims_done + DOT_MAX_NDIMS], 135 | B[ndims_done : ndims_done + DOT_MAX_NDIMS]) 136 | ndims_done += DOT_MAX_NDIMS 137 | return out 138 | 139 | R = sum([len(X) for X in Xs]) 140 | C = sum([len(Y) for Y in Ys]) 141 | K = Xs[0].shape[1] 142 | 143 | rval = np.empty((R, C), dtype=dtype) 144 | 145 | if symmetric: 146 | assert R == C 147 | 148 | ii0 = 0 149 | for ii, X_i in enumerate(Xs): 150 | sys.stdout.write('.') 151 | sys.stdout.flush() 152 | ii1 = ii0 + len(X_i) # -- upper bound of X block 153 | 154 | jj0 = 0 155 | for jj, Y_j in enumerate(Ys): 156 | jj1 = jj0 + len(Y_j) # -- upper bound of Y block 157 | 158 | r_ij = rval[ii0:ii1, jj0:jj1] 159 | 160 | if symmetric and jj < ii: 161 | r_ji = rval[jj0:jj1, ii0:ii1] 162 | r_ij[:] = r_ji.T 163 | else: 164 | r_ij[:] = _dot(X_i, Y_j.T) 165 | 166 | jj0 = jj1 167 | 168 | ii0 = ii1 169 | 170 | return rval 171 | 172 | 173 | def linear_kernel(X, Y, block_size=10000): 174 | """Compute a linear kernel in blocks so that it can use a GPU with limited 175 | memory. 176 | 177 | Xs is a list of feature matrices 178 | Ys ia list of feature matrices 179 | 180 | This function computes the kernel matrix with 181 | \sum_i len(Xs[i]) rows 182 | \sum_j len(Ys[j]) cols 183 | """ 184 | 185 | def chunk(Z): 186 | Zs = [] 187 | ii = 0 188 | while len(Z[ii:ii + block_size]): 189 | Zs.append(Z[ii:ii + block_size]) 190 | ii += block_size 191 | return Zs 192 | 193 | Xs = chunk(X) 194 | Ys = chunk(Y) 195 | 196 | assert sum([len(xi) for xi in Xs]) == len(X) 197 | assert sum([len(yi) for yi in Ys]) == len(Y) 198 | return chunked_linear_kernel(Xs, Ys, symmetric=(X is Y)) 199 | 200 | 201 | def mean_and_std(X, remove_std0=False, unbiased=False, 202 | internal_dtype='float64', return_dtype=None): 203 | """Return the mean and standard deviation of each column of matrix `X` 204 | 205 | if `remove_std0` is True, then 0 elements of the std vector will be 206 | switched to 1. This is typically what you want for feature normalization. 207 | """ 208 | X = X.reshape(X.shape[0], -1) 209 | npoints, ndims = X.shape 210 | 211 | if npoints < MEAN_MAX_NPOINTS: 212 | fmean = X.mean(0, dtype=internal_dtype) 213 | else: 214 | sel = X[:MEAN_MAX_NPOINTS] 215 | fmean = np.empty_like(sel[0,:]).astype(internal_dtype) 216 | 217 | np.add.reduce(sel, axis=0, dtype=internal_dtype, out=fmean) 218 | 219 | # -- sum up the features in blocks to reduce rounding error 220 | curr = np.empty_like(fmean) 221 | npoints_done = MEAN_MAX_NPOINTS 222 | while npoints_done < npoints: 223 | sel = X[npoints_done : npoints_done + MEAN_MAX_NPOINTS] 224 | np.add.reduce(sel, axis=0, dtype=internal_dtype, out=curr) 225 | np.add(fmean, curr, fmean) 226 | npoints_done += MEAN_MAX_NPOINTS 227 | fmean /= npoints 228 | 229 | if npoints < STD_MAX_NPOINTS: 230 | fstd = X.std(0, dtype=internal_dtype) 231 | else: 232 | sel = X[:MEAN_MAX_NPOINTS] 233 | 234 | mem = np.empty_like(sel).astype(internal_dtype) 235 | curr = np.empty_like(mem[0,:]).astype(internal_dtype) 236 | 237 | seln = sel.shape[0] 238 | np.subtract(sel, fmean, mem[:seln]) 239 | np.multiply(mem[:seln], mem[:seln], mem[:seln]) 240 | fstd = np.add.reduce(mem[:seln], axis=0, dtype=internal_dtype) 241 | 242 | npoints_done = MEAN_MAX_NPOINTS 243 | # -- loop over by blocks for improved numerical accuracy 244 | while npoints_done < npoints: 245 | 246 | sel = X[npoints_done : npoints_done + MEAN_MAX_NPOINTS] 247 | seln = sel.shape[0] 248 | np.subtract(sel, fmean, mem[:seln]) 249 | np.multiply(mem[:seln], mem[:seln], mem[:seln]) 250 | np.add.reduce(mem[:seln], axis=0, dtype=internal_dtype, out=curr) 251 | np.add(fstd, curr, fstd) 252 | 253 | npoints_done += MEAN_MAX_NPOINTS 254 | 255 | if unbiased: 256 | fstd = np.sqrt(fstd / max(1, npoints - 1)) 257 | else: 258 | fstd = np.sqrt(fstd / max(1, npoints)) 259 | 260 | if remove_std0: 261 | fstd[fstd == 0] = 1 262 | 263 | if return_dtype is None: 264 | return_dtype = X.dtype 265 | 266 | return fmean.astype(return_dtype), fstd.astype(return_dtype) 267 | 268 | 269 | def assert_allclose(a, b, rtol=1e-05, atol=1e-08): 270 | if not np.allclose(a, b, rtol=rtol, atol=atol): 271 | adiff = abs(a - b).max(), 272 | rdiff = (abs(a - b) / (abs(a) + abs(b) + 1e-15)).max() 273 | raise ValueError('not close enough', (adiff, rdiff, { 274 | 'amax': a.max(), 275 | 'bmax': b.max(), 276 | 'amin': a.min(), 277 | 'bmin': b.min(), 278 | 'asum': a.sum(), 279 | 'bsum': b.sum(), 280 | })) 281 | 282 | 283 | dumps = functools.partial(cPickle.dumps, protocol=-1) 284 | loads = cPickle.loads 285 | 286 | 287 | def dumps_gram(gram): 288 | if gram.shape == gram.T.shape and np.allclose(gram, gram.T): 289 | vals = [] 290 | for i in range(gram.shape[0]): 291 | vals.extend(gram[i, i:]) 292 | return dumps(dict( 293 | shape=gram.shape, 294 | vals=np.asarray(vals, dtype=gram.dtype))) 295 | else: 296 | return dumps(gram) 297 | 298 | 299 | def loads_gram(msg): 300 | obj = loads(msg) 301 | if isinstance(obj, dict): 302 | shape = obj['shape'] 303 | vals = obj['vals'] 304 | rval = np.empty(shape, vals.dtype) 305 | jj = 0 306 | for ii in range(shape[0]): 307 | Ni = shape[0] - ii 308 | rval[ii, ii:] = vals[jj: jj + Ni] 309 | rval[ii:, ii] = vals[jj: jj + Ni] 310 | jj += Ni 311 | return rval 312 | else: 313 | return obj 314 | 315 | 316 | try: 317 | import git_head_history # -- autogenerated by fabfile.py 318 | except ImportError, e: 319 | if 'No module named git_head_history' in str(e): 320 | # -- if there was simply no file, then it sucks but its normal. 321 | logging.getLogger(__name__).warn( 322 | "failed to import git_head_history") 323 | class git_head_history(object): 324 | pass 325 | else: 326 | # -- if the file exists but failed to import, we have a problem. 327 | raise 328 | def git_versions(): 329 | rval = {} 330 | for project in dir(git_head_history): 331 | if not project.startswith("_"): 332 | obj = getattr(git_head_history, project) 333 | rval[project] = obj 334 | return rval 335 | 336 | 337 | 338 | 339 | -------------------------------------------------------------------------------- /hpconvnet/isvm_precomputed.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import logging 3 | import functools 4 | 5 | import numpy as np 6 | import hyperopt 7 | from sklearn.svm import SVC 8 | from .utils import linear_kernel 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class EnsembleSVC(object): 14 | """Fit and back-fit SVM ensemble. 15 | 16 | Without a `history` this solves cost function 17 | 18 | :math:`1/N \sum_i hinge(y_i max(1 - w \cdot x_i + b)) + \alpha ||w||^2` 19 | 20 | Parameters 21 | ---------- 22 | """ 23 | 24 | def __init__(self, train_sample='train'): 25 | self.train_sample = train_sample 26 | 27 | self._grams = {} 28 | self._weights = {} 29 | self._svm = None 30 | self._labels = {} 31 | self._features = {} 32 | self._compound_samples = {} 33 | self._kernels = {} 34 | 35 | def copy(self): 36 | rval = self.__class__(train_sample=self.train_sample) 37 | rval._grams = dict(self._grams) 38 | rval._weights = copy.deepcopy(self._weights) 39 | rval._svm = copy.deepcopy(self._svm) 40 | rval._labels = copy.deepcopy(self._labels) 41 | rval._features = dict(self._features) 42 | rval._compound_samples = copy.deepcopy(self._compound_samples) 43 | rval._kernels = copy.deepcopy(self._kernels) 44 | return rval 45 | 46 | @property 47 | def members(self): 48 | return self._weights.keys() 49 | 50 | def has_member(self, member): 51 | return member in self._weights 52 | 53 | def add_member(self, member, weight=1.0, kernel=linear_kernel): 54 | logger.debug('add_member: %s' % member) 55 | if member in self._weights: 56 | if (self._weights[member] == weight 57 | and self._kernels[member] == kernel): 58 | pass 59 | else: 60 | raise KeyError(member) 61 | else: 62 | self._weights[member] = weight 63 | self._kernels[member] = kernel 64 | 65 | def set_weight(self, member, weight): 66 | self._weights[member] = weight 67 | 68 | def add_sample(self, sample, labels=None): 69 | logger.debug('add_sample: %s' % sample) 70 | if sample in self._labels: 71 | if np.all(labels == self._labels[sample]): 72 | pass 73 | else: 74 | raise KeyError(sample) 75 | else: 76 | self._labels[sample] = labels 77 | 78 | def add_compound_sample(self, sample, subsamples): 79 | logger.debug('add_compound_sample: %s' % sample) 80 | if sample in self._compound_samples: 81 | raise KeyError(sample) 82 | else: 83 | if not isinstance(subsamples, (list, tuple)): 84 | raise TypeError(subsamples) 85 | self._compound_samples[sample] = subsamples 86 | 87 | def as_raw_samples(self, sample1): 88 | if isinstance(sample1, (tuple, list)): 89 | def add(a, b): 90 | return a + b 91 | return reduce(add, map(self.as_raw_samples, sample1)) 92 | elif sample1 in self._compound_samples: 93 | return self.as_raw_samples(self._compound_samples[sample1]) 94 | else: 95 | return [sample1] 96 | 97 | def add_features(self, member, sample, features): 98 | if member not in self._weights: 99 | raise KeyError(member) 100 | if sample not in self._labels: 101 | raise KeyError(sample) 102 | self._features[(member, sample)] = features 103 | 104 | def del_features(self, member, sample): 105 | del self._features[(member, sample)] 106 | 107 | def has_gram(self, member, sample1, sample2): 108 | return (member, sample1, sample2) in self._grams 109 | 110 | def add_gram(self, member, sample1, sample2, gram): 111 | if member not in self._weights: 112 | raise KeyError(member) 113 | if sample1 not in self._labels: 114 | raise KeyError(sample1) 115 | if sample2 not in self._labels: 116 | raise KeyError(sample2) 117 | logger.debug('add_gram: (%s, %s, %s) -> (%i, %i) array of %s' % ( 118 | member, sample1, sample2, 119 | gram.shape[0], gram.shape[1], gram.dtype)) 120 | self._grams[(member, sample1, sample2)] = gram 121 | self._grams[(member, sample2, sample1)] = gram.T 122 | 123 | def del_gram(self, member, sample1, sample2): 124 | del self._grams[(member, sample1, sample2)] 125 | del self._grams[(member, sample2, sample1)] 126 | 127 | def compute_gram(self, member, sample1, sample2, dtype=np.float): 128 | if member not in self._weights: 129 | raise KeyError(member) 130 | if sample1 not in self._labels: 131 | raise KeyError(sample1) 132 | if sample2 not in self._labels: 133 | raise KeyError(sample2) 134 | f1 = self._features[(member, sample1)] 135 | f2 = self._features[(member, sample2)] 136 | gram = self._kernels[member](f1, f2).astype(dtype) 137 | if gram.shape != (len(f1), len(f2)): 138 | raise ValueError('kernel function returned wrong shape') 139 | self.add_gram(member, sample1, sample2, gram) 140 | 141 | def compute_all_grams(self, members, samples): 142 | for member in members: 143 | for sample1 in samples: 144 | for sample2 in samples: 145 | if (member, sample1, sample2) not in self._grams: 146 | self.compute_gram(member, sample1, sample2) 147 | 148 | def gram(self, member, sample1, sample2): 149 | to_vstack = [] 150 | row_samples = self.as_raw_samples(sample1) 151 | col_samples = self.as_raw_samples(sample2) 152 | for rs in row_samples: 153 | to_hstack = [self._grams[(member, rs, cs)] 154 | for cs in col_samples] 155 | to_vstack.append(np.hstack(to_hstack)) 156 | rval = np.vstack(to_vstack) 157 | return rval 158 | 159 | def labels(self, sample): 160 | raw_samples = self.as_raw_samples(sample) 161 | raw_labels = [self._labels[s] for s in raw_samples] 162 | return np.concatenate(raw_labels) 163 | 164 | def weighted_gram(self, sample1, sample2, weights=None): 165 | if weights is None: 166 | weights = self._weights 167 | members = weights.keys() 168 | # -- sorting not strictly necessary, but helps different processes to 169 | # -- perform the same calculation. 170 | members.sort() 171 | rval = None 172 | for m in members: 173 | # -- The weights represent squared importance coefficients, one 174 | # -- on each ensemble member. 175 | gg = weights[m] * self.gram(m, sample1, sample2) 176 | rval = gg if rval is None else gg + rval 177 | return rval 178 | 179 | def fit_svm(self, l2_regularization=None, train_sample=None): 180 | if train_sample is None: 181 | train_sample = self.train_sample 182 | 183 | g_trn = self.weighted_gram(train_sample, train_sample) 184 | 185 | if l2_regularization is None: 186 | l2_regularization = 1.0 / len(g_trn) 187 | 188 | C = 1.0 / (l2_regularization * len(g_trn)) 189 | 190 | if l2_regularization is None: 191 | assert np.allclose(C, 1.0) 192 | svm = SVC( 193 | C=C, 194 | kernel='precomputed', 195 | cache_size=1.1 * 4.0 * g_trn.size / (1 << 20), 196 | max_iter=0.5 * len(g_trn) ** 2, # COMPLETE HEURISTIC GUESS 197 | ) 198 | svm.fit(g_trn, self.labels(train_sample)) 199 | self._svm = svm 200 | 201 | def fit_weights_crossvalid(self, validation_sample, max_evals, 202 | algo=None, 203 | scales=100.0, 204 | members=None, 205 | ): 206 | """Fit an SVM and optimize [some of] the kernel weights. 207 | 208 | Parameters 209 | ---------- 210 | validation_sample : sample identifier 211 | Adjust hyperparameters to optimize performance on this set. 212 | 213 | max_evals : integer 214 | Try no more than this many hyperparameter settings. 215 | 216 | algo: hyperopt.algo 217 | A hyperopt optimization algorithm for hyperparameters. 218 | Default is currently hyperopt.tpe.suggest 219 | 220 | scales: float or dict: member -> float 221 | Multiplicative uncertainty around the current weight value 222 | for each member (larger for broader search). 223 | 224 | members : None or sequence of member names 225 | Members to combine as in MKL. `None` means to use all members. 226 | 227 | 228 | TODO 229 | ---- 230 | Look at literature for MKL and do something more efficient 231 | and accurate. 232 | """ 233 | 234 | # -- N.B. 235 | # -- We don't need to take l2-regularization into account because by 236 | # -- optimizing the norm of the weights, we are implicitly optimizing 237 | # -- the l2-regularization on the model. 238 | 239 | trn_sample = self.train_sample 240 | val_sample = validation_sample 241 | 242 | labels_trn = self.labels(trn_sample) 243 | labels_val = self.labels(val_sample) 244 | 245 | if algo is None: 246 | algo = functools.partial( 247 | hyperopt.tpe.suggest, 248 | n_startup_jobs=5) 249 | 250 | if isinstance(scales, (int, float, np.number)): 251 | #scales = {m: scales for m in self._weights} 252 | scales = dict([(m, scales) for m in self._weights]) 253 | else: 254 | if set(scales.keys()) != set(self._weights.keys()): 255 | raise ValueError('wrong number of search scales') 256 | 257 | if members is None: 258 | members = self._weights.keys() 259 | else: 260 | members = list(members) 261 | # -- sorting not strictly necessary, but helps different processes to 262 | # -- perform the same calculation by presenting the same `domain` below. 263 | members.sort() 264 | 265 | def eval_weights(ws): 266 | ws = np.asarray(ws) 267 | ws_weights = copy.deepcopy(self._weights) 268 | ws_weights.update(dict(zip(members, ws))) 269 | g_trn = self.weighted_gram(trn_sample, trn_sample, ws_weights) 270 | g_val = self.weighted_gram(val_sample, trn_sample, ws_weights) 271 | 272 | logger.debug('fitting ws=%s' % str(ws)) 273 | svm = SVC( 274 | C=1.0, 275 | kernel='precomputed', 276 | cache_size=1.1 * 4.0 * g_trn.size / (1 << 20), 277 | max_iter=0.5 * len(g_trn) ** 2, # XXX: COMPLETE HEURISTIC GUESS 278 | ) 279 | 280 | TINY = 1e-8 281 | def nogood(msg): 282 | logger.debug('f(%s) -> "%s"' % (ws, msg)) 283 | return dict(loss=1.0, status='ok', svm=svm, fit=False) 284 | 285 | if not np.all(np.isfinite(g_trn)): 286 | return nogood('non-finite gram matrix (train)') 287 | if np.all(abs(g_trn) < TINY): 288 | return nogood('null gram matrix (train)') 289 | if not np.all(np.isfinite(g_val)): 290 | return nogood('non-finite gram matrix (valid)') 291 | if np.all(abs(g_val) < TINY): 292 | return nogood('null gram matrix (valid)') 293 | 294 | svm.fit(g_trn, labels_trn) 295 | logger.debug('done!') 296 | pred_val = svm.predict(g_val) 297 | assert labels_val.shape == pred_val.shape 298 | err_rate = np.mean(labels_val != pred_val) 299 | # XXX: to break ties, take smaller weights 300 | rval = err_rate + 1e-4 * np.log1p(np.sum(ws)) 301 | logger.debug('f(%s) -> %f -> %f' % (ws, err_rate, rval)) 302 | return dict(loss=rval, status='ok', svm=svm, fit=True) 303 | 304 | try: 305 | # -- This optimizer seems a little bit less finicky than the 306 | # scipy ones (!?) such as anneal (doesn't respect lower bound, 307 | # or maxeval) and slsqp (xvalid cost is not continuous)... but 308 | # -- TODO: when a GP-based optimizer is in hyperopt, that would 309 | # probably work even better. 310 | # -- Note: I emailed Jasper about pushing his GP impl to sklearn 311 | level = logging.getLogger('hyperopt').level 312 | logging.getLogger('hyperopt').setLevel(logging.WARN) 313 | trials = hyperopt.Trials() 314 | domain = [ 315 | hyperopt.hp.lognormal( 316 | str(m), np.log(self._weights[m]), np.log(scales[m])) 317 | for m in members] 318 | hyperopt.fmin( 319 | eval_weights, 320 | space=domain, 321 | trials=trials, 322 | max_evals=max_evals, 323 | algo=algo, 324 | ) 325 | finally: 326 | logging.getLogger('hyperopt').setLevel(level) 327 | final_weights = trials.argmin 328 | 329 | if not trials.best_trial['result']['fit']: 330 | # -- meant to be caught by slm.py call_catching_pipeline_errors() 331 | raise ValueError('Failed to fit SVM (non-finite features)') 332 | self._svm = trials.best_trial['result']['svm'] 333 | self._weights = final_weights 334 | 335 | def predict(self, test_sample): 336 | g = self.weighted_gram(test_sample, self.train_sample) 337 | return self._svm.predict(g) 338 | 339 | def error_rate(self, test_sample): 340 | preds = self.predict(test_sample) 341 | rval = np.mean(preds != self._labels[test_sample]) 342 | return rval 343 | 344 | def print_summary(self): 345 | print 'EnsembleSVC.print_summary()' 346 | print ' weights', self._weights 347 | 348 | -------------------------------------------------------------------------------- /hpconvnet/slm_visitor.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import logging 3 | import time 4 | 5 | import numpy as np 6 | 7 | import skdata.base 8 | from skdata.larray import cache_memmap 9 | from skdata.larray import lmap 10 | from skdata.larray import lmap_info 11 | from skdata.larray import reindex 12 | 13 | from hyperopt.pyll import scope # TODO remove-me 14 | import foobar 15 | 16 | from .pyll_slm import average_row_l2norm 17 | from .pyll_slm import pyll_theano_batched_lmap # TODO: CLEAN UP 18 | from .pyll_slm import flatten_elems 19 | 20 | from .isvm_boosting import BoostHelper 21 | 22 | from .utils import mean_and_std 23 | import comparisons 24 | 25 | info = logging.getLogger(__name__).info 26 | warn = logging.getLogger(__name__).warn 27 | 28 | 29 | class SLM_Visitor(skdata.base.LearningAlgo): 30 | """ 31 | This class takes an *evaluated* pipeline as a starting point, 32 | and applies it to a data set. Nothing symbolic here. 33 | """ 34 | def __init__(self, pipeline, ctrl, data_view, 35 | max_n_features, 36 | batchsize, 37 | memmap_name='', 38 | badfit_thresh=float('inf'), 39 | memmap_del_atexit=True, 40 | batched_lmap_speed_thresh=None, 41 | comparison_names=('mult', 'absdiff', 'sqrtabsdiff', 'sqdiff'), 42 | ): 43 | self.pipeline = pipeline 44 | self.ctrl = ctrl 45 | self.data_view = data_view 46 | self.memmap_name = memmap_name 47 | self.max_n_features = max_n_features 48 | self.badfit_thresh = badfit_thresh 49 | self.memmap_del_atexit = memmap_del_atexit 50 | self.batched_lmap_speed_thresh = batched_lmap_speed_thresh 51 | self.batchsize = batchsize 52 | 53 | self.image_features = {} 54 | self.comparison_names = comparison_names 55 | 56 | def get_image_features_lmap(self, images, batched_lmap_speed_thresh=None): 57 | N, H, W, C = images.shape 58 | assert C in (1, 3) 59 | # -- this loading must be simple, and match the unsup_images 60 | # function in lfw. Anything more elaborate must 61 | # be included in the pyll pipeline 62 | chmajor_fn = functools.partial(np.transpose, axes=(2, 0, 1)) 63 | chmajor_fn = lmap_info( 64 | shape=(C, H, W), 65 | dtype=images.dtype 66 | )(chmajor_fn) 67 | def chmajor_fn_f_map(X): 68 | return np.transpose(X, axes=(0, 3, 1, 2)) 69 | chmajor_fn.f_map = chmajor_fn_f_map 70 | 71 | rval = pyll_theano_batched_lmap( 72 | scope.partial(scope.callpipe1, self.pipeline['pipe']), 73 | lmap(chmajor_fn, images), 74 | batchsize=self.batchsize, 75 | print_progress_every=10, # -- seconds 76 | abort_on_rows_larger_than=self.max_n_features, 77 | speed_thresh=batched_lmap_speed_thresh, 78 | x_dtype='uint8', # HAS TO MATCH ./slm.py 79 | ) 80 | return rval 81 | 82 | # XXX ugly function, refactor with normalized_image_match_features 83 | # we don't even use the "cdict" anymore and I think I manually 84 | # clear the self.image_features dict after calling! 85 | def get_image_features(self, task, batched_lmap_speed_thresh=None): 86 | if batched_lmap_speed_thresh is None: 87 | batched_lmap_speed_thresh = self.batched_lmap_speed_thresh 88 | images = task.images 89 | try: 90 | rval, _images, cdict = self.image_features[images] 91 | # -- currently it is true that all tasks should be indexing into 92 | # -- the same set of images. Later when this is not the case, 93 | # -- delete this over-strict check. 94 | assert _images is images 95 | except KeyError: 96 | feature_lmap = self.get_image_features_lmap(task.images, 97 | batched_lmap_speed_thresh) 98 | 99 | rval = cache_memmap( 100 | feature_lmap, 101 | self.memmap_name + '_image_features_' + task.name, 102 | del_atexit=self.memmap_del_atexit) 103 | 104 | foobar.append_ndarray_signature(rval[0], 105 | 'get_image_features features 0', task.name) 106 | foobar.append_ndarray_signature(rval[100], 107 | 'get_image_features features 100', task.name) 108 | 109 | cdict = {} 110 | self.image_features[images] = rval, images, cdict 111 | return rval, cdict 112 | 113 | def normalized_image_features(self, images, xmean, xstd, avg_nrm, 114 | n_rows_to_estimate_stats=1000, 115 | flatten=True, 116 | batched_lmap_speed_thresh=None, 117 | ): 118 | """ 119 | svm_dct - dict 120 | dict of parameters for normalization: 121 | 'remove_std0' 122 | 'varthresh' 123 | 'divrowl2' 124 | write xmean, xstd if role is 'train' 125 | read xmean and xstd if role is 'test' 126 | role - str 127 | either 'train' or 'test' 128 | n_rows_to_estimate_stats - bool 129 | estimate xmean and xstd from the first N feature vectors 130 | flatten - bool 131 | return features flattened to vectors 132 | """ 133 | 134 | if not flatten: 135 | raise NotImplementedError('only flatten is implemented') 136 | 137 | pipeline = self.pipeline 138 | features_lmap = self.get_image_features_lmap(images) 139 | 140 | n_features = np.prod(features_lmap.shape[1:]) 141 | 142 | if xmean is None: 143 | # -- load enough training data into memory to estimate stats 144 | cache_train = flatten_elems( 145 | features_lmap[:n_rows_to_estimate_stats]) 146 | 147 | xmean, xstd = mean_and_std( 148 | cache_train, 149 | remove_std0=pipeline['remove_std0']) 150 | 151 | xstd = np.sqrt(xstd ** 2 + pipeline['varthresh']) 152 | 153 | if pipeline['divrowl2']: 154 | avg_nrm = 1e-7 + average_row_l2norm( 155 | (cache_train - xmean) / xstd) 156 | else: 157 | avg_nrm = 1 158 | 159 | def normalize(x): 160 | return (x.flatten() - xmean) / (xstd * avg_nrm) 161 | 162 | def normalize_many(x): 163 | return (x.reshape((len(x), -1)) - xmean) / (xstd * avg_nrm) 164 | 165 | normed_features = lmap( 166 | lmap_info( 167 | shape=(n_features,), 168 | dtype=features_lmap.dtype)(normalize), 169 | features_lmap, 170 | ragged=False, 171 | f_map=normalize_many) 172 | 173 | return normed_features, xmean, xstd, avg_nrm 174 | 175 | 176 | 177 | def normalized_image_match_features(self, task, svm_dct, role, 178 | batched_lmap_speed_thresh=None): 179 | assert role in ('train', 'test') 180 | if batched_lmap_speed_thresh is None: 181 | batched_lmap_speed_thresh = self.batched_lmap_speed_thresh 182 | image_features, cdict = self.get_image_features(task, 183 | batched_lmap_speed_thresh=batched_lmap_speed_thresh) 184 | del cdict # -- no longer used (waste of memory) 185 | pipeline = self.pipeline 186 | info('Indexing into image_features of shape %s' % 187 | str(image_features.shape)) 188 | 189 | comps = [getattr(comparisons, cc) 190 | for cc in self.comparison_names] 191 | n_features = np.prod(image_features.shape[1:]) 192 | n_trn = len(task.lidx) 193 | 194 | x_trn_shp = (n_trn, len(comps), n_features) 195 | info('Allocating training ndarray of shape %s' % str(x_trn_shp)) 196 | x_trn = np.empty(x_trn_shp, dtype='float32') 197 | 198 | # -- pre-compute all of the image_features we will need 199 | all_l_features = reindex(image_features, task.lidx)[:] 200 | all_r_features = reindex(image_features, task.ridx)[:] 201 | 202 | all_l_features = all_l_features.reshape(len(all_l_features), -1) 203 | all_r_features = all_r_features.reshape(len(all_r_features), -1) 204 | 205 | foobar.append_ndarray_signature(all_l_features, 206 | 'normalized_image_match l_features', task.name) 207 | foobar.append_ndarray_signature(all_r_features, 208 | 'normalized_image_match r_features', task.name) 209 | 210 | if role == 'train': 211 | if np.allclose(all_l_features.var(axis=0), 0.0): 212 | raise ValueError( 213 | 'Homogeneous features (non-finite features)') 214 | 215 | xmean_l, xstd_l = mean_and_std(all_l_features, 216 | remove_std0=pipeline['remove_std0']) 217 | xmean_r, xstd_r = mean_and_std(all_r_features, 218 | remove_std0=pipeline['remove_std0']) 219 | xmean = (xmean_l + xmean_r) / 2.0 220 | # -- this is an ad-hoc way of blending the variances. 221 | xstd = np.sqrt(np.maximum(xstd_l, xstd_r) ** 2 222 | + pipeline['varthresh']) 223 | 224 | foobar.append_ndarray_signature( 225 | xmean, 'normalized_image_match xmean', task.name) 226 | foobar.append_ndarray_signature( 227 | xstd, 'normalized_image_match xstd', task.name) 228 | 229 | 230 | svm_dct['xmean'] = xmean 231 | svm_dct['xstd'] = xstd 232 | else: 233 | xmean = svm_dct['xmean'] 234 | xstd = svm_dct['xstd'] 235 | 236 | info('Computing comparison features') 237 | 238 | # -- now compute the "comparison functions" into x_trn 239 | for jj, (lfeat, rfeat) in enumerate( 240 | zip(all_l_features, all_r_features)): 241 | lfeat_z = (lfeat - xmean) / xstd 242 | rfeat_z = (rfeat - xmean) / xstd 243 | for ci, comp in enumerate(comps): 244 | x_trn[jj, ci, :] = comp(lfeat_z, rfeat_z) 245 | 246 | if pipeline['divrowl2']: 247 | info('Dividing by feature norms') 248 | # -- now normalize by average feature norm because some 249 | # comparison functions come out smaller than others 250 | if role == 'train': 251 | svm_dct['divrowl2_avg_nrm'] = {} 252 | for ci, cname in enumerate(self.comparison_names): 253 | avg_nrm = average_row_l2norm(x_trn[:, ci, :]) + 1e-7 254 | svm_dct['divrowl2_avg_nrm'][cname] = avg_nrm 255 | 256 | avg_nrm_vec = [svm_dct['divrowl2_avg_nrm'][cname] 257 | for cname in self.comparison_names] 258 | x_trn /= np.asarray(avg_nrm_vec)[None, :, None] 259 | foobar.append_trace('get_normlized_features avg_nrm', avg_nrm_vec) 260 | 261 | # -- collapse comparison and feature dimensions 262 | x_trn.shape = (x_trn.shape[0], x_trn.shape[1] * x_trn.shape[2]) 263 | 264 | foobar.append_ndarray_signature( 265 | x_trn, 'normalized_image_match x_trn', task.name) 266 | info('normalized_image_match_features complete') 267 | return x_trn 268 | 269 | def loss(self, model, task): 270 | info('Score %s' % task.name) 271 | semantics = task.semantics 272 | methodname = 'loss_' + semantics 273 | method = getattr(self, methodname) 274 | loss = method(model, task) 275 | return loss 276 | 277 | def best_model(self, train, valid=None): 278 | semantics = train.semantics 279 | # -- train the svm 280 | info('BestModelByCrossValidation %s, %s' % ( 281 | train.name, getattr(valid, 'name', None))) 282 | model = getattr(self, 'train_' + semantics)(train, valid) 283 | return model 284 | 285 | def retrain_classifier(self, model, task): 286 | info('RetrainClassifier %s' % task.name) 287 | semantics = task.semantics 288 | methodname = 'retrain_classifier_' + semantics 289 | method = getattr(self, methodname) 290 | new_model = method(model, task) 291 | # -- measure the erate and compute the cur_xw values 292 | getattr(self, 'loss_' + semantics)(new_model, task) 293 | return new_model 294 | 295 | def _member_name(self, tid=None): 296 | if tid is None: 297 | if self.ctrl.current_trial is None: 298 | tid = 'debug' 299 | else: 300 | tid = self.ctrl.current_trial['tid'] 301 | member_name = 'member_%s' % tid 302 | return member_name 303 | 304 | def load_ensemble_history(self, fields): 305 | 306 | trials = self.ctrl.trials 307 | if hasattr(trials, 'handle'): 308 | # query mongodb directly to avoid transferring un-necessary fields 309 | docs_for_bh = BoostHelper.query_MongoTrials( 310 | trials, 311 | fields=fields) 312 | # download only those docs that are in the active history 313 | trials.refresh_tids([d['tid'] for d in docs_for_bh]) 314 | # -- XXX: relatively arbitrary assert to make sure we didn't 315 | # download a whole wack of documents... the point of 316 | # refresh_tids is to avoid this. 317 | assert len(trials.trials) < len(docs_for_bh) + 5, ( 318 | len(trials.trials), len(docs_for_bh)) 319 | else: 320 | trials.refresh() 321 | docs_for_bh = trials.trials 322 | 323 | def helper(): 324 | bh = BoostHelper(docs_for_bh) 325 | 326 | if self.ctrl.current_trial is None: 327 | history = [] 328 | else: 329 | history = bh.history(self.ctrl.current_trial) 330 | assert history[-1] is self.ctrl.current_trial 331 | history.pop(-1) 332 | info('load_ensemble_history: %i previous model documents found' 333 | % len(history)) 334 | return history 335 | 336 | retries = 20 337 | while retries: 338 | history = helper() 339 | if any(trial['result'].get('in_progress') for trial in history): 340 | warn('Previous trial is still in progress, waiting 30s') 341 | time.sleep(30) 342 | retries -= 1 343 | else: 344 | break 345 | 346 | foobar.append_trace('load ensemble history len', len(history)) 347 | 348 | if retries: 349 | self.history = history 350 | else: 351 | raise Exception('Previous trial in progress, cannot continue') 352 | 353 | -------------------------------------------------------------------------------- /hpconvnet/slm_visitor_primal.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import cPickle 3 | import functools 4 | import logging 5 | import os 6 | 7 | import numpy as np 8 | 9 | from hyperopt import pyll 10 | import hyperopt 11 | from hyperopt.base import use_obj_for_literal_in_memo 12 | 13 | from skdata.larray import cache_memmap 14 | 15 | #from .isvm_boosting import BoostHelper 16 | from .isvm_multi import IncrementalMultiSVM 17 | from .pyll_slm import error_rate 18 | from .pyll_slm import view2_worth_calculating 19 | from .slm_visitor import SLM_Visitor 20 | from .slm import USLM_Exception 21 | from .slm import call_catching_pipeline_errors 22 | from .utils import git_versions 23 | 24 | logger = logging.getLogger(__name__) 25 | info = logger.info 26 | warn = logger.warn 27 | 28 | loads = cPickle.loads 29 | dumps = functools.partial(cPickle.dumps, protocol=-1) 30 | 31 | 32 | def recupdate(dct, path, payload): 33 | if path: 34 | if not isinstance(path[0], basestring): 35 | raise TypeError(path[0]) 36 | dct.setdefault(path[0], {}) 37 | return recupdate(dct[path[0]], path[1:], payload) 38 | else: 39 | dct.update(payload) 40 | return dct 41 | 42 | 43 | class PrimalVisitor(SLM_Visitor): 44 | """ 45 | An skdata-compatible learning algorithm that implements SVM with 46 | isvm_binary and isvm_multi. 47 | 48 | This class takes an *evaluated* pipeline as a starting point, 49 | and applies it to a data set. Nothing symbolic here. 50 | """ 51 | 52 | def __init__(self, 53 | optimize_l2_reg, 54 | thresh_loss=None, 55 | thresh_rank=10, 56 | badfit_thresh=float('inf'), 57 | svm_crossvalid_max_evals=20, 58 | **kwargs 59 | ): 60 | SLM_Visitor.__init__(self, **kwargs) 61 | 62 | self.optimize_l2_reg = optimize_l2_reg 63 | self.member_name = self._member_name() 64 | self.thresh_loss = thresh_loss 65 | self.thresh_rank = thresh_rank 66 | self.svm_crossvalid_max_evals = svm_crossvalid_max_evals 67 | self.badfit_thresh = badfit_thresh 68 | 69 | self._results = {} 70 | self._obj_results = {} 71 | 72 | if not self.optimize_l2_reg: 73 | raise NotImplementedError() 74 | 75 | def hyperopt_rval(self): 76 | return copy.deepcopy(self._results) 77 | 78 | def attach_obj_results(self): 79 | ctrl = self.ctrl 80 | if ctrl.current_trial is None: 81 | return 82 | att = ctrl.trials.trial_attachments(ctrl.current_trial) 83 | def helper(dct, prefix): 84 | for key, val in dct.items(): 85 | if isinstance(val, dict): 86 | helper(val, '%s/%s' % (prefix, key)) 87 | else: 88 | att['%s/%s' % (prefix, key)] = dumps(val) 89 | helper(self._obj_results, '') 90 | 91 | def trial_obj_attachment(self, trial, rpath): 92 | key = '/' + '/'.join(rpath) 93 | att = self.ctrl.trials.trial_attachments(trial) 94 | msg = att[key] 95 | rval = loads(msg) 96 | return rval 97 | 98 | def add_results(self, path, simple, objs): 99 | for key, val in objs.items(): 100 | if isinstance(val, dict): 101 | raise TypeError('cannot attach a dict', key) 102 | recupdate(self._results, path, simple) 103 | recupdate(self._obj_results, path, objs) 104 | 105 | def load_history(self): 106 | raise NotImplementedError() 107 | 108 | def load_svm(self, train_name, valid_name, 109 | n_features, n_classes, l2_reg): 110 | if self.history: 111 | prev_doc = self.history[-1] 112 | info('load_svm: %i previous model documents found' 113 | % len(self.history)) 114 | info('load_svm: Most-previous model document tid: %s' 115 | % prev_doc['tid']) 116 | att = self.ctrl.trials.trial_attachments(prev_doc) 117 | prev_svm = self.trial_obj_attachment(prev_doc, 118 | ['train_indexed_image_classification', train_name, 119 | valid_name, 'model']) 120 | svm = prev_svm.continuation(n_features, l2_reg) 121 | info('load_svm: alpha shape %s' 122 | % str(svm.alpha.shape)) 123 | info('load_svm: prev_w_l2_sqr shape %s' 124 | % str(svm.prev_w_l2_sqr.shape)) 125 | else: 126 | info('load_svm: No previous model document found') 127 | info('load_svm: Allocating SVM for %i x %i problem' 128 | % (n_features, n_classes)) 129 | svm = IncrementalMultiSVM(n_features, n_classes, 130 | l2_regularization=l2_reg, 131 | dtype='float32', 132 | # -- TODO consider maxfun, M, tolerances? 133 | bfgs_kwargs={ 134 | 'maxfun': 1000, 135 | 'iprint': 0, 136 | 'm': 32, 137 | 'factr': 100}, 138 | print_interval=5000, 139 | n_sgd_iters=0, 140 | badfit_thresh=self.badfit_thresh, 141 | ) 142 | return svm 143 | 144 | def load_prev_xw(self, task_name, train_name, valid_name, use_history): 145 | assert use_history in ('using_history', 'not_using_history') 146 | if not self.history: 147 | info('load_prev_xw: No previous model documents for %s/%s/%s' 148 | % (task_name, train_name, valid_name)) 149 | return None 150 | prev_xw_list = [] 151 | for pm_doc in self.history: 152 | info('load_prev_xw doc %i loss %f' % ( 153 | pm_doc['tid'], pm_doc['result']['loss'])) 154 | xw = self.trial_obj_attachment(pm_doc, 155 | ['loss_indexed_image_classification', 156 | task_name, train_name, valid_name, use_history, 'xw']) 157 | prev_xw_list.append(xw.astype('float32')) 158 | 159 | info('load_prev_xw: %i previous model documents found' 160 | % len(prev_xw_list)) 161 | # -- put them into desired shape: (examples, classes, models) 162 | prev_xw = np.asarray(prev_xw_list).transpose(1, 2, 0).copy() 163 | return prev_xw 164 | 165 | def train_indexed_image_classification(self, train, valid=None): 166 | 167 | if valid is None: 168 | train_name = train.name 169 | valid_name = 'None' 170 | else: 171 | train_name = train.name 172 | valid_name = valid.name 173 | assert train.all_images is valid.all_images 174 | assert train.all_labels is valid.all_labels 175 | 176 | info('train_indexed_image_classification: %s/%s' % ( 177 | train_name, valid_name)) 178 | 179 | normed_features, xmean, xstd, avg_nrm = \ 180 | self.normalized_image_features( 181 | train.all_images, None, None, None, flatten=True) 182 | 183 | assert train.name is not None 184 | 185 | if hasattr(self, 'cmemmap'): 186 | assert train.all_images is self.cmemmap_all_images 187 | else: 188 | self.cmemmap_all_images = train.all_images 189 | self.cmemmap = cache_memmap( 190 | normed_features, 191 | self.memmap_name, 192 | del_atexit=True) 193 | 194 | if not hasattr(self, 'history'): 195 | self.load_ensemble_history(fields=[]) 196 | 197 | svm = self.load_svm( 198 | train_name, valid_name, self.cmemmap.shape[1], 199 | train.n_classes, self.pipeline['l2_reg']) 200 | svm.feature_xmean = xmean 201 | svm.feature_xstd = xstd 202 | svm.feature_avg_nrm = avg_nrm 203 | svm.train_name = train_name 204 | svm.valid_name = valid_name 205 | 206 | prev_xw_trn = self.load_prev_xw( 207 | train_name, train_name, valid_name, use_history='using_history') 208 | 209 | info('train_indexed_image_classification: Fitting SVM with prev_xw') 210 | svm.fit(self.cmemmap[train.idxs], 211 | train.all_labels[train.idxs], 212 | prev_xw_trn) 213 | 214 | info('-> loaded alpha %s' % str(svm.alpha)) 215 | info('-> loaded prvl2 %s' % str(svm.prev_l2_regularization)) 216 | info('-> loaded prvw2 %s' % str(svm.prev_w_l2_sqr)) 217 | 218 | if valid is None: 219 | # -- XXX: it is currently a hack to use the existence 220 | # of the validation set to decide when to compute 221 | # an svm without the history features... it currently 222 | # so happens that for the fit/val split we have a validation 223 | # set and we want to train both ways, and for the sel/test 224 | # split we do not have a validation set and we only want the 225 | # fit-with-history training. 226 | assert train.name == 'sel' 227 | svm0 = None 228 | else: 229 | svm0 = copy.deepcopy(svm) 230 | if (prev_xw_trn is not None) and prev_xw_trn.size: 231 | info('Fitting SVM without prev_xw') 232 | svm0.fit(self.cmemmap[train.idxs], 233 | train.all_labels[train.idxs], 234 | np.zeros_like(prev_xw_trn)) 235 | self.add_results( 236 | [ 237 | 'train_indexed_image_classification', 238 | train_name, 239 | valid_name, 240 | ], 241 | { 242 | 'train_name': train_name, 243 | 'valid used': (valid is not None), 244 | 'valid_name': valid_name, 245 | }, 246 | { 247 | 'model0': svm0, 248 | 'model': svm, 249 | }) 250 | 251 | self.loss_indexed_image_classification(svm, train) 252 | if valid is not None: 253 | self.loss_indexed_image_classification(svm, valid) 254 | self.loss_indexed_image_classification(svm0, valid, 255 | use_history='not_using_history') 256 | 257 | return svm 258 | 259 | def loss_indexed_image_classification(self, model, task, 260 | use_history='using_history'): 261 | assert task.name 262 | 263 | # -- N.B. using_history here, because we want to build on the models 264 | # that *were* using history 265 | prev_xw = self.load_prev_xw(task.name, 266 | model.train_name, model.valid_name, 267 | use_history='using_history') 268 | 269 | x = self.cmemmap[task.idxs] 270 | 271 | if (use_history == 'using_history') or (prev_xw is None): 272 | preds = model.predict(x, prev_xw) 273 | else: 274 | preds = model.predict(x, np.zeros_like(prev_xw)) 275 | erate = error_rate(preds, task.all_labels[task.idxs]) 276 | xw = np.dot(x, model.weights) 277 | 278 | assert preds.min() >= 0 279 | if preds.max() < 256: 280 | preds = preds.astype('uint8') 281 | if '64' in str(xw.dtype): 282 | xw = xw.astype('float32') 283 | 284 | self.add_results( 285 | ['loss_indexed_image_classification', 286 | task.name, 287 | model.train_name, 288 | model.valid_name, 289 | use_history, 290 | ], 291 | {'erate': erate, 292 | 'task_name': task.name, 293 | 'train_name': model.train_name, 294 | 'valid_name': model.valid_name, 295 | 'use_history': use_history, 296 | }, 297 | { 298 | 'preds': preds, 299 | 'xw': xw, 300 | }) 301 | 302 | info('loss: ERR RATE %s = %f' % (task.name, erate)) 303 | info('loss: XW STATS %f %f %f %s' % 304 | (xw.min(), xw.mean(), xw.max(), xw.shape)) 305 | 306 | return erate 307 | 308 | 309 | # -- this helper is called by mnist and svhn as well 310 | def uslm_eval_helper( 311 | expr, 312 | memo, 313 | ctrl, 314 | data_fraction, 315 | assume_promising, 316 | data_view, 317 | memmap_name_template, 318 | DataView, 319 | loss_fn, 320 | true_loss_fn, 321 | ): 322 | 323 | use_obj_for_literal_in_memo(expr, data_view, DataView, memo) 324 | versions = git_versions() 325 | logger.info('GIT VERSIONS: %s' % str(versions)) 326 | 327 | def exception_thrower(): 328 | argdict = pyll.rec_eval(expr, memo=memo, print_node_on_error=False) 329 | visitor = PrimalVisitor( 330 | pipeline=argdict['pipeline'], 331 | ctrl=argdict['ctrl'], 332 | data_view=argdict['data_view'], 333 | max_n_features=argdict['max_n_features'], 334 | # TODO: just pass memmap_name directly 335 | memmap_name=memmap_name_template % (os.getpid(), 336 | np.random.randint(10000)), 337 | thresh_rank=1, 338 | optimize_l2_reg=True, 339 | batched_lmap_speed_thresh=argdict[ 340 | 'batched_lmap_speed_thresh'], 341 | badfit_thresh=None, 342 | batchsize=argdict['batchsize'], 343 | ) 344 | 345 | protocol_iter = argdict['data_view'].protocol_iter(visitor) 346 | msg, model = protocol_iter.next() 347 | assert msg == 'model validation complete' 348 | 349 | # -- save the loss, but don't save attachments yet. 350 | rdict = visitor.hyperopt_rval() 351 | rdict['loss'] = loss_fn(visitor, argdict['bagging_fraction']) 352 | rdict['in_progress'] = True 353 | rdict['status'] = hyperopt.STATUS_OK 354 | argdict['ctrl'].checkpoint(rdict) 355 | 356 | if assume_promising: 357 | promising = True 358 | else: 359 | promising = view2_worth_calculating( 360 | loss=rdict['loss'], 361 | ctrl=argdict['ctrl'], 362 | thresh_loss=1.0, 363 | thresh_rank=1) 364 | 365 | logger.info('Promising: %s' % promising) 366 | if promising: 367 | msg, model2 = protocol_iter.next() 368 | assert msg == 'model testing complete' 369 | rdict = visitor.hyperopt_rval() 370 | rdict['loss'] = loss_fn(visitor, argdict['bagging_fraction']) 371 | rdict['true_loss'] = true_loss_fn(visitor) 372 | visitor.attach_obj_results() 373 | else: 374 | logger.warn('Not testing unpromising model %s' % str(model)) 375 | del rdict['in_progress'] 376 | return visitor, rdict 377 | 378 | try: 379 | visitor, rdict = call_catching_pipeline_errors(exception_thrower) 380 | except USLM_Exception, e: 381 | exc, rdict = e.args 382 | logger.info('job failed: %s: %s' % (type(e), exc)) 383 | rdict['git_versions'] = versions 384 | return rdict 385 | 386 | -------------------------------------------------------------------------------- /hpconvnet/slm_visitor_esvc.py: -------------------------------------------------------------------------------- 1 | import cPickle 2 | import copy 3 | import logging 4 | import os 5 | import time 6 | 7 | from hyperopt.base import SONify 8 | 9 | from .isvm_precomputed import EnsembleSVC 10 | from .slm_visitor import SLM_Visitor 11 | from .utils import loads_gram, dumps_gram 12 | from .pyll_slm import error_rate 13 | import foobar 14 | 15 | import skdata.data_home 16 | 17 | debug = logging.getLogger(__name__).debug 18 | info = logging.getLogger(__name__).info 19 | warn = logging.getLogger(__name__).warn 20 | 21 | _curdb = 'curdb' # XXX: terrible hack :( 22 | # _curdb is an abstraction leak -- MongoTrials has failed here. 23 | # we set it from lfw.py 24 | 25 | 26 | def cached_gram_load(tid, att_key): 27 | data_home = skdata.data_home.get_data_home() 28 | datafilename = os.path.join(data_home, 29 | 'hpconvnet', 'slm_visitor_esvc', _curdb, str(tid), att_key) 30 | return open(datafilename).read() 31 | 32 | 33 | def cached_gram_save(tid, att_key, data): 34 | data_home = skdata.data_home.get_data_home() 35 | cachedir = os.path.join(data_home, 36 | 'hpconvnet', 'slm_visitor_esvc', _curdb, str(tid)) 37 | datafilename = os.path.join(cachedir, att_key) 38 | info('Caching gram data %i/%s' % (tid, att_key)) 39 | if not os.path.exists(cachedir): 40 | os.makedirs(cachedir) 41 | datafile = open(datafilename, 'w+') 42 | datafile.write(data) 43 | datafile.close() 44 | 45 | 46 | class ESVC_SLM_Visitor(SLM_Visitor): 47 | """ 48 | Use an EnsembleSVC classifier, suitable for datasets with not too many 49 | examples (< 20000) and binary labels. 50 | """ 51 | def __init__(self, 52 | optimize_l2_reg=False, 53 | svm_crossvalid_max_evals=20, 54 | **kwargs): 55 | SLM_Visitor.__init__(self, **kwargs) 56 | self.optimize_l2_reg = optimize_l2_reg 57 | self.member_name = self._member_name() 58 | self.svm_crossvalid_max_evals = svm_crossvalid_max_evals 59 | 60 | self._results = { 61 | 'train_image_match_indexed': {}, 62 | 'retrain_classifier_image_match_indexed': {}, 63 | 'loss_image_match_indexed': {}, 64 | } 65 | 66 | if not self.optimize_l2_reg: 67 | raise NotImplementedError() 68 | 69 | def norm_key(self, sample, tid=None): 70 | if tid is None: 71 | member_name = self.member_name 72 | else: 73 | member_name = self._member_name(tid) 74 | norm_key = 'nkey_%s_%s' % (member_name, sample) 75 | return norm_key 76 | 77 | def load_ensemble_weights(self, norm_sample, task_name, ens): 78 | # -- load the weights from the most recent ensemble, if there is one. 79 | for trial in self.history[-1:]: 80 | info('Loading weights from document %i' % trial['tid']) 81 | trial_norm_key = self.norm_key(norm_sample, tid=trial['tid']) 82 | trial_weights = trial['result']['weights'] 83 | norm_task_weights = trial_weights[trial_norm_key][task_name] 84 | for norm_key, weight in norm_task_weights.items(): 85 | if ens.has_member(norm_key): 86 | ens.set_weight(norm_key, weight) 87 | else: 88 | ens.add_member(norm_key, weight) 89 | info(' .. weight[%s] = %s' % (norm_key, weight)) 90 | foobar.append_trace('load ensemble weights', norm_key, weight) 91 | 92 | def load_ensemble_grams(self, norm_sample, ens, sample1, sample2): 93 | trial_attachments = self.ctrl.trials.trial_attachments 94 | 95 | # -- load the gram matrices saved by each ensemble member 96 | for trial in self.history: 97 | trial_norm_key = self.norm_key(norm_sample, tid=trial['tid']) 98 | info('Loading grams from document %i' % trial['tid']) 99 | debug(' .. saved_grams: %s' % 100 | str(trial['result']['grams'][trial_norm_key])) 101 | for (s1, s2) in trial['result']['grams'][trial_norm_key]: 102 | if set([sample1, sample2]) == set([s1, s2]): 103 | if not ens.has_gram(trial_norm_key, s1, s2): 104 | att_key = 'gram_%s_%s_%s.pkl' % (trial_norm_key, s1, s2) 105 | info('retrieving gram_data %i:%s' 106 | % (trial['tid'], att_key)) 107 | try: 108 | gram_data = cached_gram_load(trial['tid'], att_key) 109 | except IOError: 110 | gram_data = trial_attachments(trial)[att_key] 111 | cached_gram_save(trial['tid'], att_key, gram_data) 112 | info('retrieved %i bytes' % len(gram_data)) 113 | gram = loads_gram(gram_data) 114 | if s1 == sample1: 115 | ens.add_gram(trial_norm_key, sample1, sample2, gram) 116 | else: 117 | ens.add_gram(trial_norm_key, sample1, sample2, 118 | gram.T) 119 | foobar.append_ndarray_signature( 120 | gram, 121 | 'load gram', trial_norm_key, sample1, sample2) 122 | info('Loading grams done') 123 | 124 | def hyperopt_rval(self, save_grams): 125 | rval = copy.deepcopy(self._results) 126 | rval['attachments'] = {} 127 | rval['grams'] = {} 128 | rval['weights'] = {} 129 | rval['trace'] = copy.deepcopy(foobar._trace) 130 | 131 | saved = set() 132 | 133 | def jsonify_train_results(rkey): 134 | for norm_key in rval[rkey]: 135 | 136 | for task_name in rval[rkey][norm_key]: 137 | svm_dct = rval[rkey][norm_key][task_name] 138 | ens = svm_dct.pop('ens') 139 | 140 | rval['weights'].setdefault(norm_key, {}) 141 | rval['weights'][norm_key][task_name] = ens._weights 142 | 143 | # -- stash these as attachments because they fill up the db. 144 | xmean = svm_dct.pop('xmean') 145 | xstd = svm_dct.pop('xstd') 146 | 147 | if save_grams: 148 | xmean_key = 'xmean_%s_%s_%s' % (rkey, norm_key, task_name) 149 | xstd_key = 'xstd_%s_%s_%s' % (rkey, norm_key, task_name) 150 | rval['attachments'][xmean_key] = cPickle.dumps(xmean, -1) 151 | rval['attachments'][xstd_key] = cPickle.dumps(xstd, -1) 152 | 153 | rval['grams'].setdefault(norm_key, []) 154 | for (inorm_key, sample1, sample2) in ens._grams: 155 | if inorm_key != norm_key: 156 | # -- we're only interested in saving the grams 157 | # calculated by this run. 158 | continue 159 | if (norm_key, sample1, sample2) in saved: 160 | # -- already saved this one 161 | continue 162 | 163 | att_key = 'gram_%s_%s_%s.pkl' % ( 164 | norm_key, sample1, sample2) 165 | 166 | info('saving %s' % att_key) 167 | 168 | gram = ens._grams[(norm_key, sample1, sample2)] 169 | rval['attachments'][att_key] = dumps_gram( 170 | gram.astype('float32')) 171 | 172 | rval['grams'][norm_key].append((sample1, sample2)) 173 | 174 | saved.add((norm_key, sample1, sample2)) 175 | saved.add((norm_key, sample2, sample1)) 176 | 177 | jsonify_train_results('train_image_match_indexed') 178 | jsonify_train_results('retrain_classifier_image_match_indexed') 179 | 180 | return SONify(rval) 181 | 182 | def forget_task(self, task_name): 183 | 184 | # free up RAM by deleting all features computed for task_name 185 | def delete_features(rkey): 186 | for norm_key in self._results[rkey]: 187 | if task_name in self._results[rkey][norm_key]: 188 | svm_dct = self._results[rkey][norm_key][task_name] 189 | svm_dct['ens'].del_features(norm_key, task_name) 190 | 191 | delete_features('train_image_match_indexed') 192 | delete_features('retrain_classifier_image_match_indexed') 193 | 194 | def train_image_match_indexed(self, task, valid=None): 195 | 196 | pipeline = self.pipeline 197 | 198 | info('training svm on %s' % task.name) 199 | ens = EnsembleSVC(task.name) 200 | 201 | norm_task = task.name 202 | norm_key = self.norm_key(norm_task) 203 | svm_dct = { 204 | 'ens': ens, 205 | 'norm_key': norm_key, 206 | 'norm_task': task.name, 207 | 'task_name': task.name, 208 | } 209 | 210 | ens.add_member(norm_key) 211 | ens.add_sample(task.name, task.y) 212 | x_trn = self.normalized_image_match_features(task, svm_dct, 213 | role='train') 214 | ens.add_features(norm_key, task.name, x_trn) 215 | 216 | foobar.append_ndarray_signature(x_trn, 217 | 'train_image x_trn', norm_key, task.name) 218 | 219 | info('computing gram: %s / %s / %s' % ( 220 | norm_key, task.name, task.name)) 221 | ens.compute_gram(norm_key, task.name, task.name, dtype='float32') 222 | 223 | foobar.append_ndarray_signature( 224 | ens._grams[(norm_key, task.name, task.name)], 225 | 'train_image train_gram', norm_key, task.name) 226 | 227 | if valid is not None: 228 | info('cross-validating svm on %s' % valid.name) 229 | x_val = self.normalized_image_match_features(valid, svm_dct, 230 | role='test', 231 | # -- assume that slow features were caught earlier 232 | batched_lmap_speed_thresh={'seconds': 30, 'elements': 1}, 233 | ) 234 | foobar.append_ndarray_signature( 235 | x_val, 236 | 'train_image x_val', norm_key, valid.name, task.name) 237 | 238 | ens.add_sample(valid.name, valid.y) 239 | ens.add_features(norm_key, valid.name, x_val) 240 | 241 | info('computing gram: %s / %s / %s' % ( 242 | norm_key, valid.name, task.name)) 243 | ens.compute_gram(norm_key, valid.name, task.name, dtype='float32') 244 | foobar.append_ndarray_signature( 245 | ens._grams[(norm_key, valid.name, task.name)], 246 | 'train_image valid_gram', norm_key, valid.name, task.name) 247 | 248 | # -- re-fit the model using best weights on train + valid sets 249 | info('computing gram: %s / %s / %s' % ( 250 | norm_key, valid.name, valid.name)) 251 | ens.compute_gram(norm_key, valid.name, valid.name, dtype='float32') 252 | 253 | train_valid = '%s_%s' % (task.name, valid.name) 254 | ens.add_compound_sample(train_valid, [task.name, valid.name]) 255 | 256 | 257 | def load_history(): 258 | info('loading history') 259 | self.load_ensemble_history( 260 | fields=['result.weights','result.grams']) 261 | self.load_ensemble_weights(norm_task, task.name, ens) 262 | self.load_ensemble_grams(norm_task, ens, task.name, task.name) 263 | if valid is not None: 264 | self.load_ensemble_grams(norm_task, ens, valid.name, task.name) 265 | self.load_ensemble_grams(norm_task, ens, valid.name, valid.name) 266 | 267 | 268 | def train_main(): 269 | ens.train_sample = task.name 270 | 271 | t0 = time.time() 272 | if valid is None: 273 | svm_dct['l2_reg'] = pipeline['l2_reg'] 274 | ens.fit_svm(svm_dct['l2_reg']) 275 | svm_dct['train_error'] = ens.error_rate(task.name) 276 | svm_dct['loss'] = svm_dct['train_error'] 277 | else: 278 | 279 | #scales = {m: 3.0 for m in ens._weights} 280 | scales = dict([(m, 3.0) for m in ens._weights]) 281 | scales[norm_key] = 100.0 282 | 283 | info('fit_weights_crossvalid(%s, %i)' % ( 284 | valid.name, self.svm_crossvalid_max_evals)) 285 | ens.fit_weights_crossvalid(valid.name, 286 | max_evals=self.svm_crossvalid_max_evals, 287 | scales=scales) 288 | 289 | foobar.append_trace('xvalid weights', sorted(ens._weights.items())) 290 | 291 | svm_dct['task_error'] = ens.error_rate(task.name) 292 | foobar.append_trace('task_error', svm_dct['task_error']) 293 | 294 | svm_dct['valid_name'] = valid.name 295 | svm_dct['valid_error'] = ens.error_rate(valid.name) 296 | info('valid_error %f' % svm_dct['valid_error']) 297 | foobar.append_trace('valid_error', svm_dct['valid_error']) 298 | 299 | svm_dct['l2_reg'] = None # -- use default when retraining 300 | 301 | # -- re-fit the model using best weights on train + valid sets 302 | ens.train_sample = train_valid 303 | ens.fit_svm() 304 | 305 | fit_time = time.time() - t0 306 | svm_dct['fit_time'] = fit_time 307 | 308 | 309 | info('training with just the current features...') 310 | train_main() 311 | svm_dct['task_error_no_ensemble'] = svm_dct['task_error'] 312 | svm_dct['valid_error_no_ensemble'] = svm_dct['valid_error'] 313 | 314 | load_history() 315 | if self.history: 316 | info('training the full ensemble...') 317 | train_main() 318 | 319 | try: 320 | print_summary = ens.print_summary 321 | except AttributeError: 322 | print_summary = lambda : None 323 | 324 | print_summary() 325 | 326 | dct = self._results['train_image_match_indexed'] 327 | dct.setdefault(norm_key, {}) 328 | if task.name in dct[norm_key]: 329 | warn('Overwriting train_image_match_indexed result: %s' 330 | % task.name) 331 | dct[norm_key][task.name] = svm_dct 332 | 333 | return svm_dct 334 | 335 | def retrain_classifier_image_match_indexed(self, model, task): 336 | # We are making the decision that retraining a classifier means not 337 | # retraining the weights or the features, but just retraining the 338 | # libsvm part. 339 | 340 | ens = model['ens'].copy() 341 | ens.train_sample = task.name 342 | svm_dct = dict( 343 | ens=ens, 344 | norm_key=model['norm_key'], 345 | norm_task=model['norm_task'], 346 | task_name=task.name, 347 | xmean=model['xmean'], 348 | xstd=model['xstd'], 349 | l2_reg=model['l2_reg'], 350 | ) 351 | if 'divrowl2_avg_nrm' in model: 352 | svm_dct['divrowl2_avg_nrm'] = model['divrowl2_avg_nrm'] 353 | norm_key = svm_dct['norm_key'] 354 | norm_task = svm_dct['norm_task'] 355 | info('retraining on %s (norm_task=%s)' % (task.name, norm_task)) 356 | 357 | ens.add_sample(task.name, task.y) 358 | x_trn = self.normalized_image_match_features(task, svm_dct, 359 | # -- do not recompute mean and var 360 | role='test', 361 | # -- assume that slow features were caught earlier 362 | batched_lmap_speed_thresh={'seconds': 30, 'elements': 1}, 363 | ) 364 | ens.add_features(norm_key, task.name, x_trn) 365 | 366 | self.load_ensemble_grams(norm_task, ens, task.name, task.name) 367 | ens.compute_gram(norm_key, task.name, task.name, dtype='float32') 368 | 369 | ens.fit_svm(svm_dct['l2_reg']) 370 | svm_dct['task_error'] = ens.error_rate(task.name) 371 | 372 | info('retrain_classifier: %s -> %f' % ( 373 | (norm_key, task.name), svm_dct['task_error'])) 374 | 375 | dct = self._results['retrain_classifier_image_match_indexed'] 376 | dct.setdefault(norm_key, {}) 377 | if task.name in dct[norm_key]: 378 | warn('Overwriting retrain_classifier_image_match_indexed result: %s' 379 | % task.name) 380 | dct[norm_key][task.name] = svm_dct 381 | return svm_dct 382 | 383 | def loss_image_match_indexed(self, svm_dct, task): 384 | norm_task = svm_dct['norm_task'] 385 | norm_key = svm_dct['norm_key'] 386 | 387 | info('loss_image_match_indexed: %s, %s' % (norm_key, task.name) ) 388 | x = self.normalized_image_match_features(task, svm_dct, 'test', 389 | # -- assume that slow features were caught earlier 390 | batched_lmap_speed_thresh={'seconds': 30, 'elements': 1}, 391 | ) 392 | svm_dct['ens'].add_sample(task.name, task.y) 393 | svm_dct['ens'].add_features(norm_key, task.name, x) 394 | 395 | self.load_ensemble_grams(norm_task, svm_dct['ens'], task.name, 396 | svm_dct['ens'].train_sample) 397 | svm_dct['ens'].compute_gram(norm_key, task.name, 398 | svm_dct['ens'].train_sample, dtype='float32') 399 | 400 | preds = svm_dct['ens'].predict(task.name) 401 | erate = error_rate(preds, task.y) 402 | info('test_image_match_indexed error_rate %s -> %f' % ( 403 | task.name, erate)) 404 | 405 | # -- add summary information to self._results 406 | dct = self._results['loss_image_match_indexed'] 407 | dct.setdefault(norm_key, {}) 408 | if task.name in dct[norm_key]: 409 | warn('Overwriting loss_image_match_indexed result: %s' 410 | % task.name) 411 | dct[norm_key][task.name] = { 412 | 'error_rate': erate, 413 | 'norm_key': norm_key, 414 | 'task_name': task.name, 415 | 'preds_01': ''.join( 416 | ['0' if p == -1 else '1' for p in preds]), 417 | } 418 | return erate 419 | 420 | -------------------------------------------------------------------------------- /hpconvnet/slm.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import time 3 | import numpy as np 4 | import StringIO 5 | import traceback 6 | 7 | from hyperopt import pyll 8 | from hyperopt.pyll import scope 9 | 10 | import hyperopt 11 | from hyperopt.pyll_utils import hp_choice 12 | from hyperopt.pyll_utils import hp_uniform 13 | from hyperopt.pyll_utils import hp_quniform 14 | from hyperopt.pyll_utils import hp_loguniform 15 | from hyperopt.pyll_utils import hp_qloguniform 16 | from hyperopt.pyll_utils import hp_normal 17 | from hyperopt.pyll_utils import hp_lognormal 18 | 19 | import pyll_slm # adds the symbols to pyll.scope 20 | 21 | pyll.scope.import_(globals(), 22 | # -- from pyll 23 | 'partial', 24 | 'callpipe1', 25 | 'switch', 26 | 'sqrt', 27 | # 28 | # -- misc. from ./pyll_slm.py 29 | 'pyll_theano_batched_lmap', 30 | 'model_predict', 31 | 'model_decisions', 32 | 'error_rate', 33 | 'mean_and_std', 34 | 'flatten_elems', 35 | 'np_transpose', 36 | 'np_RandomState', 37 | 'print_ndarray_summary', 38 | 'pickle_dumps', 39 | # 40 | # -- filterbank allocators (./pyll.slm.py) 41 | 'random_patches', 42 | 'alloc_random_uniform_filterbank', 43 | 'patch_whitening_filterbank_X', 44 | 'fb_whitened_patches', 45 | 'fb_whitened_projections', 46 | 'slm_uniform_M_FB', 47 | # 48 | # -- pipeline elements (./pyll.slm.py) 49 | 'slm_affine_image_warp', 50 | 'slm_img_uint8_to_float32', 51 | 'slm_lpool', 52 | 'slm_lnorm', 53 | 'slm_lpool_alpha', 54 | 'slm_fbncc_chmaj', 55 | 'slm_quantize_gridpool', 56 | # 57 | # -- renamed symbols 58 | **{ 59 | # NEW NAME: ORIG NAME 60 | 's_int': 'int', 61 | 's_float': 'float', 62 | 'pyll_getattr': 'getattr', 63 | }) 64 | 65 | # -- where is this supposed to go? 66 | divide_by_avg_norm=False 67 | 68 | 69 | def stable_hash(s): 70 | if isinstance(s, basestring): 71 | return int(hashlib.sha224(s).hexdigest(), 16) 72 | else: 73 | raise TypeError(s) 74 | 75 | 76 | def hp_TF(label): 77 | return hp_choice(label, [0, 1]) 78 | 79 | 80 | def rfilter_size(label, smin, smax, q=1): 81 | """Return an integer size from smin to smax inclusive with equal prob 82 | """ 83 | return s_int(hp_quniform(label, smin - q / 2.0 + 1e-5, smax + q / 2.0, q)) 84 | 85 | 86 | def logu_range(label, lower, upper): 87 | """Return a continuous replacement for one_of(.1, 1, 10)""" 88 | return hp_loguniform(label, np.log(lower), np.log(upper)) 89 | 90 | 91 | def new_fbncc_layer(prefix, Xcm, n_patches, n_filters, size, 92 | memlimit=5e8, # -- limit patches array to 500MB 93 | ): 94 | def lab(msg): 95 | return '%s_fbncc_%s' % (prefix, msg) 96 | 97 | def get_rseed(name, N): 98 | fullname = lab(name) 99 | low = stable_hash(fullname) % (2 ** 31) 100 | rval = hp_choice(fullname, range(low, low + N)) 101 | return rval 102 | 103 | patches = random_patches( 104 | Xcm, n_patches, size, size, 105 | rng=np_RandomState(get_rseed('patch_rseed', 10)), 106 | channel_major=True, 107 | memlimit=memlimit) 108 | 109 | remove_mean = hp_TF(lab('remove_mean')) 110 | beta = hp_lognormal(lab('beta'), np.log(100), np.log(100)) 111 | hard_beta = hp_TF(lab('hard')) 112 | 113 | # TODO: use different nfilters, beta etc. for each algo 114 | 115 | # -- random projections filterbank allocation 116 | random_projections = partial(slm_fbncc_chmaj, 117 | m_fb=slm_uniform_M_FB( 118 | nfilters=n_filters, 119 | size=size, 120 | channels=pyll_getattr(Xcm, 'shape')[1], 121 | rseed=get_rseed('r_rseed', 10), 122 | normalize=hp_TF(lab('r_normalize')), 123 | dtype='float32', 124 | ret_cmajor=True, 125 | ), 126 | remove_mean=remove_mean, 127 | beta=beta, 128 | hard_beta=hard_beta) 129 | 130 | # -- random whitened projections filterbank allocation 131 | random_whitened_projections = partial(slm_fbncc_chmaj, 132 | m_fb=fb_whitened_projections(patches, 133 | patch_whitening_filterbank_X(patches, 134 | gamma=hp_lognormal(lab('wr_gamma'), 135 | np.log(1e-2), np.log(100)), 136 | o_ndim=2, 137 | remove_mean=remove_mean, 138 | beta=beta, 139 | hard_beta=hard_beta, 140 | ), 141 | n_filters=n_filters, 142 | rseed=get_rseed('wr_rseed', 10), 143 | dtype='float32', 144 | ), 145 | remove_mean=remove_mean, 146 | beta=beta, 147 | hard_beta=hard_beta) 148 | 149 | # -- whitened patches filterbank allocation 150 | whitened_patches = partial(slm_fbncc_chmaj, 151 | m_fb=fb_whitened_patches(patches, 152 | patch_whitening_filterbank_X(patches, 153 | gamma=hp_lognormal(lab('wp_gamma'), 154 | np.log(1e-2), np.log(100)), 155 | o_ndim=2, 156 | remove_mean=remove_mean, 157 | beta=beta, 158 | hard_beta=hard_beta, 159 | ), 160 | n_filters=n_filters, 161 | rseed=get_rseed('wp_rseed', 10), 162 | dtype='float32', 163 | ), 164 | remove_mean=remove_mean, 165 | beta=beta, 166 | hard_beta=hard_beta) 167 | 168 | # --> MORE FB LEARNING ALGOS HERE <-- 169 | # TODO: V1-like filterbank (incl. with whitening matrix) 170 | # TODO: sparse coding 171 | # TODO: OMP from Coates 2011 172 | # TODO: K-means 173 | # TODO: RBM 174 | # TODO: DAA 175 | # TODO: ssRBM 176 | rchoice = hp_choice(lab('algo'), [ 177 | random_projections, 178 | random_whitened_projections, 179 | whitened_patches, 180 | ]) 181 | return rchoice 182 | 183 | 184 | def pipeline_extension(prefix, X, n_patches, max_filters): 185 | assert max_filters > 16 186 | f_layer = new_fbncc_layer(prefix, X, n_patches, 187 | n_filters=s_int( 188 | hp_qloguniform('%sfb_nfilters' % prefix, 189 | np.log(8.01), np.log(max_filters), q=16)), 190 | size=rfilter_size('%sfb_size' % prefix, 3, 8), 191 | ) 192 | 193 | p_layer = partial(slm_lpool, 194 | stride=hp_choice('%sp_stride' % prefix, [1, 2]), 195 | order=hp_choice('%sp_order' % prefix, 196 | [1, 2, hp_lognormal('%sp_order_real' % prefix, 197 | mu=np.log(1), sigma=np.log(3))]), 198 | ker_size=rfilter_size('%sp_size' % prefix, 2, 8)) 199 | 200 | return [f_layer, p_layer] 201 | 202 | 203 | def new_exit(pipeline, prefix): 204 | def lab(msg): 205 | return prefix % msg 206 | return { 207 | 'pipe': pipeline, 208 | 'remove_std0': 209 | hp_TF(lab('remove_std0')), 210 | 'varthresh': 211 | hp_lognormal(lab('varthresh'), 212 | np.log(1e-4), np.log(1000)), 213 | 'l2_reg': hp_lognormal(lab('l2_reg'), 214 | np.log(1e-5), np.log(1e3)), 215 | 'divrowl2': hp_TF(lab('divrowl2')), 216 | } 217 | 218 | 219 | def exit_grid(pipeline, layer_num, Xcm, n_patches, max_n_features): 220 | def lab(msg): 221 | return 'l%ieg_%s' % (layer_num, msg) 222 | 223 | fsize = rfilter_size(lab('fsize'), 3, 8) 224 | 225 | grid_res = hp_choice(lab('res'), [2, 3]) 226 | grid_features_per_filter = 2 * (grid_res ** 2) 227 | grid_nfilters = max_n_features // grid_features_per_filter 228 | 229 | grid_filtering = new_fbncc_layer( 230 | prefix='l%ieg' % layer_num, 231 | Xcm=Xcm, 232 | n_patches=n_patches, 233 | n_filters=grid_nfilters, 234 | size=fsize, 235 | ) 236 | 237 | grid_pooling = partial(slm_quantize_gridpool, 238 | alpha=hp_normal(lab('alpha'), 0.0, 1.0), 239 | use_mid=False, 240 | grid_res=grid_res, 241 | order=hp_choice(lab('order'), [ 242 | 1.0, 2.0, logu_range(lab('order_real'), .1, 10.)])) 243 | 244 | return new_exit(pipeline + [grid_filtering, grid_pooling], lab('%s')) 245 | 246 | 247 | def exit_lpool_alpha(pipeline, layer_num, Xcm, n_patches, max_n_features): 248 | def lab(msg): 249 | return 'l%ielpa_%s' % (layer_num, msg) 250 | 251 | fsize = rfilter_size(lab('fsize'), 3, 8) 252 | filtering_res = pyll_getattr(Xcm, 'shape')[2] - fsize + 1 253 | # -- N.B. Xrows depends on other params, so we can't use it to set the 254 | # upper bound on lpsize. We can only sample independently, and 255 | # then fail below with non-positive number of features. 256 | size = rfilter_size(lab('lpsize'), 1, 5) 257 | stride = hp_choice(lab('stride'), [1, 2, 3]) 258 | res = scope.ceildiv(scope.max(filtering_res - size + 1, 0), stride) 259 | if 0: 260 | # XXX: This is a smarter way to pick the n_filters, but it triggers 261 | # a bug in hyperopt.vectorize_helper. The build_idxs_vals function 262 | # there needs to be smarter -- to recognize when wanted_idxs is a 263 | # necessarily subset of the all_idxs, and then not to append 264 | # wanted_idxs to the union defining all_idxs... because that creates a 265 | # cycle. The trouble is specifically that lpool_res is used in the 266 | # switch statement below both in the condition and the response. 267 | nfilters = switch(res > 0, 268 | max_n_features // (2 * (res ** 2)), 269 | scope.Raise(ValueError, 'Non-positive number of features')) 270 | else: 271 | # this is less good because it risks dividing by zero, 272 | # and forces the bandit to catch weirder errors from new_fbncc_layer 273 | # caused by negative nfilters 274 | nfilters = max_n_features // (2 * (res ** 2)) 275 | 276 | filtering = new_fbncc_layer( 277 | prefix='l%iel' % layer_num, 278 | Xcm=Xcm, 279 | n_patches=n_patches, 280 | n_filters=nfilters, 281 | size=fsize, 282 | ) 283 | 284 | pooling = partial(slm_lpool_alpha, 285 | ker_size=size, 286 | stride=stride, 287 | alpha=hp_normal(lab('alpha'), 0.0, 1.0), 288 | order=hp_choice(lab('order_choice'), [ 289 | 1.0, 2.0, logu_range(lab('order_real'), .1, 10.)])) 290 | 291 | return new_exit(pipeline + [filtering, pooling], lab('%s')) 292 | 293 | 294 | def exit_lpool(pipeline, layer_num, Xcm, n_patches, max_n_features): 295 | def lab(msg): 296 | return 'l%i_out_lp_%s' % (layer_num, msg) 297 | 298 | fsize = rfilter_size(lab('fsize'), 3, 8) 299 | filtering_res = pyll_getattr(Xcm, 'shape')[2] - fsize + 1 300 | # -- N.B. Xrows depends on other params, so we can't use it to set the 301 | # upper bound on lpsize. We can only sample independently, and 302 | # then fail below with non-positive number of features. 303 | psize = rfilter_size(lab('psize'), 1, 5) 304 | stride = hp_choice(lab('stride'), [1, 2, 3]) 305 | pooling_res = scope.ceildiv(filtering_res - psize + 1, stride) 306 | nsize = rfilter_size(lab('nsize'), 1, 5) 307 | norm_res = pooling_res - nsize + 1 308 | 309 | # -- raises exception at rec_eval if norm_res is 0 310 | nfilters = max_n_features // (scope.max(norm_res, 0) ** 2) 311 | 312 | filtering = new_fbncc_layer( 313 | prefix='l%ielp' % layer_num, 314 | Xcm=Xcm, 315 | n_patches=n_patches, 316 | n_filters=nfilters, 317 | size=fsize, 318 | ) 319 | 320 | pooling = partial(slm_lpool, 321 | ker_size=psize, 322 | stride=stride, 323 | order=hp_choice(lab('order_choice'), [ 324 | 1.0, 2.0, logu_range(lab('order_real'), .1, 10.)])) 325 | 326 | normalization = partial(slm_lnorm, 327 | ker_size=nsize, 328 | remove_mean=hp_TF(lab('norm_rmean')), 329 | threshold=hp_lognormal(lab('norm_thresh'), 330 | np.log(1.0), np.log(3)), 331 | ) 332 | 333 | seq = hp_choice(lab('use_norm'), [ 334 | [filtering, pooling], 335 | [filtering, pooling, normalization]]) 336 | 337 | return new_exit(pipeline + seq, lab('%s')) 338 | 339 | 340 | def pipeline_exits(pipeline, layer_num, Xcm, n_patches, max_n_features): 341 | grid = exit_grid(pipeline, layer_num, Xcm, n_patches, max_n_features) 342 | 343 | lpool_alpha = exit_lpool_alpha(pipeline, layer_num, Xcm, n_patches, 344 | max_n_features) 345 | 346 | lpool = exit_lpool(pipeline, layer_num, Xcm, n_patches, max_n_features) 347 | 348 | return [grid, lpool_alpha, lpool] 349 | 350 | 351 | def uslm_domain(Xcm, 352 | batchsize, 353 | chmjr_image_shape, 354 | output_sizes, 355 | n_patches=50000, 356 | max_n_features=16000, 357 | max_layer_sizes=(64, 128), 358 | batched_lmap_speed_thresh=None, 359 | permit_affine_warp=True, 360 | abort_on_rows_larger_than=None, 361 | ): 362 | """ 363 | This function works by creating a linear pipeline, with multiple exit 364 | points that could be the feature representation for classification. 365 | 366 | The function returns a switch among all of these exit points. 367 | """ 368 | start_time = time.time() 369 | 370 | XC, XH, XW = chmjr_image_shape 371 | osize = hp_choice('warp_osize', output_sizes) 372 | 373 | assert XW > 3, chmjr_image_shape # -- make sure we don't screw up channel-major 374 | 375 | warp_options = [ 376 | # -- option 1: simple resize 377 | partial(slm_affine_image_warp, 378 | rot=0, 379 | shear=0, 380 | scale=[s_float(osize) / XH, s_float(osize) / XW], 381 | trans=[0, 0], 382 | oshape=[osize, osize]), 383 | ] 384 | if permit_affine_warp: 385 | # -- option 2: resize with rotation, shear, translation 386 | warp_options.append( 387 | partial(slm_affine_image_warp, 388 | rot=hp_uniform('warp_rot', low=-0.3, high=0.3), 389 | shear=hp_uniform('warp_shear', low=-0.3, high=0.3), 390 | # -- most of the scaling comes via osize 391 | scale=[ 392 | hp_uniform('warp_scale_h', low=0.8, high=1.2) * osize / XH, 393 | hp_uniform('warp_scale_v', low=0.8, high=1.2) * osize / XW, 394 | ], 395 | trans=[ 396 | hp_uniform('warp_trans_h', low=-0.2, high=0.2) * osize, 397 | hp_uniform('warp_trans_v', low=-0.2, high=0.2) * osize, 398 | ], 399 | oshape=[osize, osize] 400 | )) 401 | pipeline = [slm_img_uint8_to_float32, 402 | hp_choice('warp', warp_options)] 403 | Xcm = pyll_theano_batched_lmap( 404 | partial(callpipe1, pipeline), 405 | Xcm, 406 | batchsize=batchsize, 407 | print_progress_every=10, 408 | speed_thresh=batched_lmap_speed_thresh, 409 | abort_on_rows_larger_than=abort_on_rows_larger_than, 410 | x_dtype='uint8', 411 | )[:] 412 | 413 | exits = pipeline_exits( 414 | pipeline, 415 | layer_num=0, 416 | Xcm=Xcm, 417 | n_patches=n_patches, 418 | max_n_features=max_n_features) 419 | for layer_i, max_layer_size in enumerate(max_layer_sizes): 420 | extension = pipeline_extension( 421 | 'l%i' % layer_i, Xcm, n_patches, max_layer_size) 422 | 423 | pipeline.extend(extension) 424 | Xcm = pyll_theano_batched_lmap( 425 | partial(callpipe1, extension), 426 | Xcm, # scope.print_ndarray_summary('Xcm %i' % layer_i, Xcm), 427 | batchsize=batchsize, 428 | print_progress_every=10, 429 | speed_thresh=batched_lmap_speed_thresh, 430 | abort_on_rows_larger_than=abort_on_rows_larger_than, 431 | )[:] 432 | # -- indexing computes all the values (during rec_eval) 433 | exits.extend( 434 | pipeline_exits( 435 | pipeline=pipeline, 436 | layer_num=layer_i + 1, 437 | Xcm=Xcm, 438 | n_patches=n_patches, 439 | max_n_features=max_n_features)) 440 | 441 | return hp_choice("exit", exits) 442 | 443 | 444 | class USLM_Exception(Exception): 445 | pass 446 | 447 | 448 | def call_catching_pipeline_errors(fn): 449 | def raise_error(e): 450 | sio = StringIO.StringIO() 451 | traceback.print_exc(None, sio) 452 | tb = sio.getvalue() 453 | raise USLM_Exception(e, { 454 | 'loss': float(1.0), 455 | 'status': hyperopt.STATUS_FAIL, 456 | 'failure': { 457 | 'type': str(type(e)), 458 | 'exc': repr(e), 459 | 'tb': tb, 460 | }}) 461 | try: 462 | return fn() 463 | except pyll_slm.InvalidDescription, e: 464 | raise_error(e) 465 | except pyll_slm.EvalTimeout, e: 466 | raise_error(e) 467 | except ZeroDivisionError, e: 468 | raise_error(e) 469 | except MemoryError, e: 470 | raise_error(e) 471 | except OSError, e: 472 | if 'allocate memory' in str(e): 473 | raise_error(e) 474 | else: 475 | raise 476 | except ValueError, e: 477 | if (('rowlen' in str(e) and 'exceeds limit' in str(e)) 478 | or ('dimension mis-match' in str(e) and '= 0' in str(e)) 479 | or ('had size 0' in str(e)) 480 | or ('size on that axis is 0' in str(e)) 481 | or ('non-finite features' in str(e)) 482 | ): 483 | raise_error(e) 484 | else: 485 | raise 486 | except RuntimeError, e: 487 | if (('taking too long' in str(e)) 488 | or ('allocate memory' in str(e)) 489 | or ('kernel_reduce_sum' in str(e) and 'block: 0 x' in str(e)) 490 | or ('CudaNdarray has dim 0' in str(e)) 491 | ): 492 | raise_error(e) 493 | else: 494 | raise 495 | 496 | -------------------------------------------------------------------------------- /hpconvnet/isvm_multi.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file supports the incremental construction of an SVM classifier 3 | by partially-corrective boosting on the hinge loss. 4 | 5 | 6 | Each incremental solver minimizes 7 | 8 | hinge( dot(X, w) + b + alpha * prev_Wx) 9 | + lambda * (|w|^2 + |alpha * prev_W|^2) 10 | 11 | 12 | Each solver is designed to be run on a subset of all available features. 13 | 14 | """ 15 | import copy 16 | import logging 17 | import gc 18 | import os 19 | import shutil 20 | import sys 21 | 22 | import numpy as np 23 | import autodiff 24 | import theano # abstraction leak to pass mode for optimization 25 | 26 | import tempfile 27 | import skdata.larray # for large tempfile creation (tempdir isn't always big enough) 28 | 29 | from .utils import dot 30 | 31 | logger = logging.getLogger(__name__) 32 | info = logger.info 33 | warn = logger.warn 34 | error = logger.error 35 | 36 | _default_bfgs_kwargs = {'factr': 100} 37 | 38 | def hinge(margin): 39 | """ 40 | Classic hinge loss 41 | """ 42 | return np.maximum(0, 1 - margin) 43 | 44 | 45 | def hinge2(margin): 46 | """ 47 | Looks pretty much like margin, but the corner is smoothed out 48 | """ 49 | return np.log1p(np.exp(10 * (0.9 - margin))) / 10 50 | 51 | 52 | 53 | def multi_svm_hinge_loss(x, y, weights, bias, alpha, pxw, pw_l2_sqr, 54 | l2_regularization, pl2_regularization): 55 | """ 56 | x: (n_examples, n_features) 57 | y: (n_examples, n_classes) 58 | weights: (n_feaures, n_classes) 59 | bias: (n_classes,) 60 | alpha: (n_prev, n_classes) 61 | pxw: (n_examples, n_classes, n_prev) 62 | pw_l2_sqr: (n_prev, n_classes) 63 | l2_regularization: () 64 | pl2_regularization: (n_prev,) 65 | """ 66 | 67 | n_prev, n_classes = alpha.shape 68 | xw = dot(x, weights) 69 | if n_prev: 70 | assert pw_l2_sqr.shape == alpha.shape, ( 71 | 'pw_l2_sqr shape', pw_l2_sqr.shape, 72 | 'alpha shape', alpha.shape) 73 | if np.any(pw_l2_sqr < 0): 74 | raise ValueError('prev_w_l2_sqr may not be negative') 75 | prev_l2_sqr = np.sum( 76 | pl2_regularization[:, None] * pw_l2_sqr * (alpha ** 2)) 77 | n_examples, n_classes2, n_prev2 = pxw.shape 78 | assert n_prev2 == n_prev, ('n_prev', n_prev, n_prev2) 79 | assert n_classes2 == n_classes, ('n_classes', n_classes, n_classes2) 80 | prev_xw = (pxw * alpha.T).sum(axis=2) 81 | assert prev_xw.shape == xw.shape, ('xw', xw.shape, prev_xw.shape) 82 | xw = xw + prev_xw 83 | else: 84 | prev_l2_sqr = 0.0 85 | 86 | margin = y * (xw + bias ) 87 | losses = hinge2(margin).mean(axis=0).sum() 88 | 89 | cur_l2_sqr = l2_regularization * (weights * weights).sum() 90 | l2_reg = 0.5 * (cur_l2_sqr + prev_l2_sqr) 91 | cost = losses + l2_reg 92 | return cost 93 | 94 | 95 | def append_xw(pxw, x, weights): 96 | """ 97 | Append dot(x, weights) to pxw 98 | """ 99 | n_features, n_classes = weights.shape 100 | 101 | if x.size == 0: 102 | my_xw = np.zeros((len(x), n_classes, 1), dtype=pxw.dtype) 103 | elif np.all(weights == 0): 104 | my_xw = np.zeros((len(x), n_classes, 1), dtype=pxw.dtype) 105 | else: 106 | my_xw = dot(x, weights)[:, :, None] 107 | rval = np.concatenate([pxw, my_xw], axis=2) 108 | return rval.astype(pxw.dtype) 109 | 110 | 111 | def append_alpha(alpha): 112 | n_prev, n_classes = alpha.shape 113 | ones = np.ones((1, n_classes), dtype=alpha.dtype) 114 | rval = np.vstack([alpha, ones]) 115 | return rval.astype(alpha.dtype) 116 | 117 | 118 | def append_w_l2_sqr(w_l2_sqr, weights): 119 | l2_sqr = (weights * weights).sum(axis=0) 120 | rval = np.vstack([w_l2_sqr, l2_sqr[None, :]]) 121 | return rval.astype(w_l2_sqr.dtype) 122 | 123 | 124 | def append_l2_regularization(pl2reg, l2reg): 125 | rval = np.hstack([pl2reg, [l2reg]]) 126 | return rval.astype(pl2reg.dtype) 127 | 128 | 129 | def fit_sgd_0(weights, bias, x, y, l2_regularization, n_iters, 130 | print_interval): 131 | """ 132 | Refine `weights` and `bias` by n_iters steps of SGD 133 | """ 134 | if n_iters <= 0: 135 | return weights, bias 136 | 137 | n_examples = len(x) 138 | n_features, n_classes = weights.shape 139 | alpha0 = np.empty((0, n_classes), dtype=weights.dtype) 140 | 141 | # -- use the first few elements of x to estimate the average 142 | # example norm 143 | # -- fixing these learning rates makes sense to me because the 144 | # hinge loss puts a bound on the slope of the function being 145 | # optimized, the only variable is the norm / magnitude of the 146 | # data. 147 | avg_w_norm = np.mean(np.sqrt((x[:200] ** 2).sum(axis=1))) 148 | step_size_w = 0.01 / (avg_w_norm + 1e-8) 149 | step_size_b = 0.01 150 | step_size_a = 0.0 151 | 152 | weights, bias, alpha0, = autodiff.fmin_sgd( 153 | lambda w, b, a, xx, yy1: 154 | multi_svm_hinge_loss(xx, yy1, w, b, a, 155 | None, # xwi, 156 | None, # prev_w_l2_sqr, 157 | l2_regularization, 158 | None), 159 | (weights, bias, alpha0), 160 | streams={ 161 | 'xx': x.reshape((n_examples, 1, n_features)), 162 | 'yy1': y.reshape((n_examples, 1, n_classes)), 163 | }, 164 | print_interval=print_interval, 165 | step_size=(step_size_w, step_size_b, step_size_a), 166 | step_size_backoff=0.1, 167 | loops=n_iters / float(len(x)), 168 | theano_mode=theano.Mode( 169 | linker='cvm_nogc', 170 | #linker='c|py', 171 | optimizer='fast_run').excluding('gpu'), 172 | theano_device='cpu', 173 | floatX=x.dtype, 174 | ) 175 | return weights, bias 176 | 177 | 178 | l_bfgs_b_debug_feature_limit = None 179 | 180 | def fit_l_bfgs_b(weights, bias, alpha, x, y, l2reg, 181 | pxw, pw_l2_sqr, pl2reg, bfgs_kwargs, 182 | return_after_one_fit=False): 183 | """ 184 | Refine `weights, bias, alpha` by l_bfgs_b 185 | """ 186 | n_features, n_classes = weights.shape 187 | n_prev, n_classes = alpha.shape 188 | 189 | alpha_orig = alpha 190 | # -- the inplace alpha2 scaling modifies not-yet-fit weights 191 | # as the while loop below works its way across 192 | weights = weights.copy() 193 | 194 | low = 0 195 | high = n_features 196 | 197 | # -- keep trying to train on less and less of the data until it works 198 | while True: 199 | x0 = x[:, low:high] 200 | 201 | x2 = x[:, high:] 202 | pxw2 = append_xw(pxw, x2, weights[high:]) 203 | pl2reg2 = append_l2_regularization(pl2reg, l2reg) 204 | alpha2 = append_alpha(alpha) 205 | pw_l2_sqr2 = append_w_l2_sqr(pw_l2_sqr, weights[high:]) 206 | 207 | def fn(w, b, a): 208 | return multi_svm_hinge_loss(x0, y, w, b, a, 209 | pxw2, pw_l2_sqr2, l2reg, pl2reg2) 210 | try: 211 | if l_bfgs_b_debug_feature_limit is not None: 212 | # -- this mechanism is used by unit tests 213 | if (high - low) > l_bfgs_b_debug_feature_limit: 214 | raise MemoryError() 215 | (weights_, bias, alpha2), info = autodiff.fmin_l_bfgs_b(fn, 216 | (weights[low:high], bias, alpha2), 217 | return_info=True, 218 | borrowable=[x0], 219 | floatX=x.dtype, 220 | **bfgs_kwargs) 221 | info['feature_high'] = high 222 | info['feature_low'] = low 223 | gc.collect() 224 | logger.info('fitting successful for %i features' % high) 225 | break 226 | except (MemoryError, RuntimeError), e: 227 | high /= 2 228 | if low == high: 229 | raise 230 | gc.collect() 231 | logger.info('fitting required too much memory, falling back to %i' % high) 232 | continue 233 | 234 | weights[low:high] = weights_ 235 | # -- pop off the alpha we just added 236 | weights[high:] *= alpha2[-1] 237 | alpha = alpha2[:-1].copy() 238 | 239 | if high == n_features or return_after_one_fit: 240 | return (weights, bias, alpha), [info] 241 | 242 | # -- now loop over all the features, and put the results together 243 | inc = high - low 244 | w0s = [weights_] 245 | costs = [info['fopt']] 246 | infos = [info] 247 | while high < n_features: 248 | high += inc 249 | low += inc 250 | 251 | x1 = x[:, low:high] 252 | pxw1 = append_xw(pxw, x0, weights_) 253 | pl2reg1 = append_l2_regularization(pl2reg, l2reg) 254 | alpha = append_alpha(alpha) 255 | pw_l2_sqr1 = append_w_l2_sqr(pw_l2_sqr, weights_) 256 | 257 | x2 = x[:, high:] 258 | pxw2 = append_xw(pxw1, x2, weights[high:]) 259 | pl2reg2 = append_l2_regularization(pl2reg1, l2reg) 260 | alpha2 = append_alpha(alpha) 261 | pw_l2_sqr2 = append_w_l2_sqr(pw_l2_sqr1, weights[high:]) 262 | 263 | def fn(w, b, a): 264 | return multi_svm_hinge_loss(x1, y, w, b, a, 265 | pxw2, pw_l2_sqr2, l2reg, pl2reg2) 266 | (weights_, bias, alpha2), info = autodiff.fmin_l_bfgs_b(fn, 267 | (weights[low:high], bias, alpha2), 268 | return_info=True, 269 | borrowable=[x1], 270 | floatX=x.dtype, 271 | **bfgs_kwargs) 272 | 273 | info['feature_high'] = high 274 | info['feature_low'] = low 275 | 276 | # -- pop off the alpha we just added 277 | weights[high:] *= alpha2[-1] 278 | alpha = alpha2[:-1].copy() 279 | 280 | w0s.append(weights_) 281 | costs.append(info['fopt']) 282 | infos.append(info) 283 | x0 = x1 284 | pxw = pxw1 285 | pl2reg = pl2reg1 286 | pw_l2_sqr = pw_l2_sqr1 287 | 288 | old_alpha = alpha[:n_prev] 289 | new_alpha = alpha[n_prev:] 290 | assert len(new_alpha) == len(w0s) - 1 291 | 292 | if np.any(old_alpha < 0) or np.any(old_alpha > 1): 293 | warn('Alpha naturally grew beyond 0-1 range: %s' % str(old_alpha)) 294 | 295 | for w, a in zip(w0s[:-1], new_alpha): 296 | w *= a 297 | weights = np.vstack(w0s) 298 | alpha_rval = old_alpha.copy() 299 | assert alpha_rval.shape == alpha_orig.shape 300 | return (weights, bias, alpha_rval), infos 301 | 302 | 303 | class IncrementalMultiSVM(object): 304 | """ 305 | On each iteration of the incremental construction this class fits a new 306 | weight vector w to the features x, while adjusting the norm of the 307 | previously-fit weight vectors to balance the current model against the old 308 | ones. 309 | 310 | See test_hingeboost.py for an example of incremental SVM construction. 311 | 312 | """ 313 | 314 | def __init__(self, n_features, n_classes, 315 | prev_w_l2_sqr=None, 316 | l2_regularization=1e-4, 317 | prev_l2_regularization=None, 318 | dtype='float64', 319 | scalar_bounds=(-1e3, 1e3), 320 | bfgs_kwargs=None, 321 | alpha=None, 322 | print_interval=sys.maxint, 323 | n_sgd_iters=3000, 324 | bias=None, 325 | assert_clip_ok=True, 326 | badfit_thresh=float('inf'), 327 | ): 328 | 329 | self.n_features = n_features 330 | if prev_w_l2_sqr is None: 331 | self.prev_w_l2_sqr = np.empty((0, n_classes), dtype=dtype) 332 | else: 333 | self.prev_w_l2_sqr = np.asarray(prev_w_l2_sqr).astype(dtype) 334 | (self.n_prev, self.n_classes) = self.prev_w_l2_sqr.shape 335 | if n_classes != self.n_classes: 336 | raise ValueError('n_classes does not match prev_w_l2_sqr.shape', 337 | n_classes, self.prev_w_l2_sqr.shape) 338 | self.l2_regularization = l2_regularization 339 | if prev_l2_regularization is None: 340 | self.prev_l2_regularization = np.empty((0,), dtype=dtype) 341 | else: 342 | self.prev_l2_regularization = prev_l2_regularization 343 | self.dtype = dtype 344 | self.scalar_bounds = scalar_bounds 345 | self.print_interval = print_interval 346 | if bfgs_kwargs is None: 347 | self.bfgs_kwargs = copy.deepcopy(_default_bfgs_kwargs) 348 | if print_interval < sys.maxint: 349 | self.bfgs_kwargs.setdefault('iprint', 1) 350 | else: 351 | self.bfgs_kwargs = bfgs_kwargs 352 | 353 | self.weights = np.zeros((n_features, n_classes), dtype=dtype) 354 | if bias is None: 355 | self.bias = np.zeros((n_classes,), dtype=dtype) 356 | else: 357 | self.bias = np.asarray(bias).astype(dtype) 358 | if (n_classes,) != self.bias.shape: 359 | raise ValueError('bad shape for bias', self.bias.shape) 360 | if alpha is None: 361 | self.alpha = np.ones_like(self.prev_w_l2_sqr) 362 | else: 363 | self.alpha = np.array(alpha).astype(dtype) 364 | if self.alpha.shape != self.prev_w_l2_sqr.shape: 365 | raise ValueError('shape mismatch between alpha and prev_w_l2_sqr', 366 | self.alpha.shape, self.prev_w_l2_sqr.shape) 367 | self.n_sgd_iters = n_sgd_iters 368 | self.assert_clip_ok = assert_clip_ok 369 | self.badfit_thresh = badfit_thresh 370 | 371 | def print_summary(self): 372 | print 'IncrementalMultiSVM', repr(self) 373 | print '-> alpha', self.alpha 374 | print '-> prvl2', self.prev_l2_regularization 375 | print '-> prvw2', self.prev_w_l2_sqr 376 | 377 | @property 378 | def cumulative_alpha(self): 379 | return append_alpha(self.alpha) 380 | 381 | @property 382 | def cumulative_w_l2_sqr(self): 383 | return append_w_l2_sqr(self.prev_w_l2_sqr, self.weights) 384 | 385 | @property 386 | def cumulative_l2_regularization(self): 387 | return append_l2_regularization(self.prev_l2_regularization, 388 | self.l2_regularization) 389 | 390 | def xw_carry_forward(self, x, pxw=None): 391 | return append_xw(self.as_xw(x, pxw), x, self.weights) 392 | 393 | def continuation(self, n_features=None, l2_regularization=None): 394 | if n_features is None: 395 | n_features = self.n_features 396 | if l2_regularization is None: 397 | l2_regularization = self.l2_regularization 398 | 399 | rval = self.__class__( 400 | n_features=n_features, 401 | n_classes=self.n_classes, 402 | prev_w_l2_sqr=self.cumulative_w_l2_sqr, 403 | alpha=self.cumulative_alpha, 404 | prev_l2_regularization=self.cumulative_l2_regularization, 405 | l2_regularization=l2_regularization, 406 | dtype=self.dtype, 407 | scalar_bounds=self.scalar_bounds, 408 | print_interval=self.print_interval, 409 | bfgs_kwargs=self.bfgs_kwargs, 410 | n_sgd_iters=self.n_sgd_iters, 411 | bias=self.bias.copy(), 412 | assert_clip_ok=self.assert_clip_ok, 413 | ) 414 | return rval 415 | 416 | def decision_function(self, x, xw=None): 417 | rval = dot(x, self.weights) + self.bias 418 | xw = self.as_xw(x, xw) 419 | if xw.size or self.alpha.size: 420 | # -- workaround Theano's no support for tensordot 421 | rval += (xw * self.alpha.T).sum(axis=2) 422 | return rval 423 | 424 | def as_xw(self, x, xw): 425 | if xw is None: 426 | if self.n_prev == 0: 427 | return np.zeros( 428 | (len(x), self.n_classes, self.n_prev), 429 | dtype=x.dtype) 430 | else: 431 | raise TypeError('xw is required for previous models') 432 | else: 433 | xw = np.asarray(xw, dtype=self.dtype, order='C') 434 | if xw.shape != (len(x), self.n_classes, self.n_prev): 435 | raise ValueError('xw has wrong shape', 436 | (xw.shape, (len(x), self.n_classes, self.n_prev))) 437 | return xw 438 | 439 | def predict(self, x, xw=None): 440 | xw = self.as_xw(x, xw) 441 | return self.decision_function(x, xw).argmax(axis=1) 442 | 443 | def y_ind(self, y): 444 | # y_ind is all +-1, with 1 meaning a positive label for OvA classif 445 | assert y.min() == 0 # fail for +-1 labels 446 | y_ind = -np.ones((len(y), self.n_classes)).astype(self.dtype) 447 | y_ind[np.arange(len(y)), y] = 1 448 | return y_ind 449 | 450 | def loss(self, x, y, xw=None): 451 | xw = self.as_xw(x, xw) 452 | y_ind = self.y_ind(y) 453 | assert self.l2_regularization is not None 454 | return multi_svm_hinge_loss(x, y_ind, 455 | self.weights, self.bias, self.alpha, 456 | xw, 457 | self.prev_w_l2_sqr, 458 | self.l2_regularization, 459 | self.prev_l2_regularization, 460 | ) 461 | 462 | def fit(self, x, y, xw=None): 463 | """ 464 | x - n_examples x n_features design matrix. 465 | y - vector of integer labels 466 | xw - matrix of real-valued incoming biases obtained 467 | by multiplying the existing weight vectors by x 468 | """ 469 | pxw = self.as_xw(x, xw) 470 | assert y.min() == 0 # fail for +-1 labels 471 | 472 | if x.shape[0] != y.shape[0]: 473 | raise ValueError('length mismatch between x and y') 474 | 475 | n_examples, n_classes, n_prev = pxw.shape 476 | if n_prev != self.n_prev: 477 | raise ValueError('n_prev mismatch', 478 | (n_prev, self.n_prev)) 479 | if n_examples != len(x): 480 | raise ValueError('n_examples mismatch', 481 | (n_examples, len(x))) 482 | if n_classes != self.weights.shape[1]: 483 | raise ValueError('n_classes mismatch', 484 | (n_classes, self.weights.shape[1])) 485 | 486 | weights = self.weights 487 | bias = self.bias 488 | alpha = self.alpha 489 | 490 | bias0 = np.zeros_like(bias) 491 | alpha0 = np.empty((0, self.n_classes), dtype=alpha.dtype) 492 | 493 | y_ind = self.y_ind(y) 494 | 495 | bfgs_kwargs = dict(self.bfgs_kwargs) 496 | bfgs_kwargs.setdefault('factr', 100) 497 | 498 | bfgs_kwargs_precise = dict(bfgs_kwargs) 499 | bfgs_kwargs_precise['factr'] /= 100 500 | 501 | # -- warm up with some pure-online sgd 502 | # don't train alpha yet, wait until the weights and bias 503 | # are somewhat initialized. 504 | weights, bias = fit_sgd_0(weights, bias, x, y_ind, 505 | self.l2_regularization, 506 | self.n_sgd_iters, 507 | self.print_interval) 508 | 509 | c0 = n_prev / (1.0 + n_prev) 510 | c1 = 1 / (1.0 + n_prev) 511 | 512 | alpha *= c0 513 | weights *= c1 514 | bias = c0 * bias0 + c1 * bias 515 | 516 | (p_weights, p_bias, p_alpha), infos = fit_l_bfgs_b( 517 | weights, bias, alpha, 518 | x, y_ind, self.l2_regularization, 519 | pxw, 520 | self.prev_w_l2_sqr, 521 | self.prev_l2_regularization, 522 | self.bfgs_kwargs, 523 | return_after_one_fit=True) 524 | 525 | if infos[0]['feature_high'] == self.n_features: 526 | # -- the first fit did the whole feature set 527 | weights = p_weights 528 | bias = p_bias 529 | alpha = p_alpha 530 | elif infos[0]['fopt'] >= self.badfit_thresh: 531 | # -- the first fit was so bad that we're giving up 532 | weights = p_weights 533 | bias = p_bias 534 | alpha = p_alpha 535 | else: 536 | # -- we couldn't fit the whole feature set at once 537 | data_home = skdata.data_home.get_data_home() 538 | tempdirname = os.path.join(data_home, 'hpconvnet_isvm_features') 539 | if not os.path.exists(tempdirname): 540 | os.makedirs(tempdirname) 541 | dirname = tempfile.mkdtemp(dir=tempdirname) 542 | try: 543 | README = open(os.path.join(dirname, 'README'), 'w+') 544 | print >> README, ( 545 | "Feature cache created by hpconvnet/isvm_multi.py") 546 | README.close() 547 | p_x = np.memmap(os.path.join(dirname, 'p_x.npy'), 548 | dtype=x.dtype, 549 | mode='w+', 550 | shape=x.shape) 551 | 552 | for ii in range(2): 553 | # -- if there isn't enough GPU memory to fit the whole 554 | # problem at once, then use a block coordinate descent 555 | # strategy, with different blocks on each iteration. 556 | # I found that 2 passes of this kind were sufficient 557 | # for MNIST, when divided into 2 pieces. 558 | 559 | perm = np.random.RandomState(1234 + ii).permutation( 560 | self.n_features) 561 | 562 | p_weights = weights[perm] 563 | for ii in xrange(len(x)): 564 | x_ii = x[ii] * 1 # -- bring it into memory 565 | p_x[ii] = x_ii[perm] 566 | 567 | (p_weights, bias, alpha), infos2 = fit_l_bfgs_b( 568 | p_weights, bias, alpha, 569 | p_x, y_ind, self.l2_regularization, 570 | pxw, 571 | self.prev_w_l2_sqr, 572 | self.prev_l2_regularization, 573 | self.bfgs_kwargs) 574 | 575 | weights[perm] = p_weights 576 | infos.extend(infos2) 577 | 578 | finally: 579 | shutil.rmtree(dirname) 580 | 581 | self.weights = weights 582 | self.bias = bias 583 | self.alpha = alpha 584 | self.fit_infos = infos 585 | 586 | # -- in cases where the prev_l2_sqr or the prev_l2_regularization are 587 | # really tiny, alpha can do funny things, like grow greater than 1, 588 | # and/or even be slightly negative. 589 | clipped_alpha = np.clip(alpha, 0, 1) 590 | if self.assert_clip_ok: 591 | final_loss = self.loss(x, y, xw) 592 | self.alpha = clipped_alpha 593 | clipped_final_loss = self.loss(x, y, xw) 594 | 595 | if not np.allclose(final_loss, clipped_final_loss, atol=1e-3, 596 | rtol=1e-2): 597 | error('fit is significantly degraded by alpha-clipping') 598 | error('-> orig loss %f' % final_loss) 599 | error('-> clipped loss %f' % clipped_final_loss) 600 | error('-> alpha %s' % str(alpha)) 601 | else: 602 | self.alpha = clipped_alpha 603 | 604 | 605 | 606 | -------------------------------------------------------------------------------- /hpconvnet/pyll_slm.py: -------------------------------------------------------------------------------- 1 | """ 2 | A library file for the design approach of cifar10.py 3 | 4 | It includes the slm components as well as a few other things that should 5 | migrate upstream. 6 | 7 | """ 8 | import cPickle 9 | import logging 10 | import time 11 | 12 | import numpy as np 13 | from skimage.transform import AffineTransform 14 | from skimage.transform._warps_cy import _warp_fast 15 | 16 | import theano 17 | import theano.tensor as tensor 18 | from theano.tensor.nnet import conv 19 | 20 | from hyperopt import pyll 21 | import hyperopt 22 | 23 | from skdata import larray 24 | 25 | from .utils import mean_and_std 26 | from .utils import dot_f32 27 | from .utils import dot_f64 28 | import foobar 29 | 30 | import isvm_boosting # for worth_calculating 31 | 32 | 33 | logger = logging.getLogger(__name__) 34 | 35 | 36 | pyll.scope.define_info(o_len=2)(mean_and_std) 37 | 38 | 39 | class InvalidDescription(Exception): 40 | """Model description was invalid""" 41 | 42 | 43 | class EvalTimeout(Exception): 44 | """Document could not be evaluated in time 45 | """ 46 | 47 | 48 | global_timeout = float('inf') 49 | 50 | 51 | def set_timeout(t): 52 | global global_timeout 53 | global_timeout = t 54 | 55 | 56 | def raise_on_timeout(): 57 | if time.time() > global_timeout: 58 | raise EvalTimeout() 59 | 60 | 61 | 62 | class AffineImageWarp(theano.Op): 63 | 64 | def __eq__(self, other): 65 | return type(self) == type(other) 66 | 67 | def __hash__(self): 68 | return hash((type(self),)) 69 | 70 | def make_node(self, x, rot, shear, scale, trans, oshape): 71 | inputs = map(theano.tensor.as_tensor_variable, 72 | [x, rot, shear, scale, trans, oshape]) 73 | if 'float' not in x.dtype: 74 | raise TypeError('warping int images is not supported') 75 | return theano.Apply(self, inputs, [x.type()]) 76 | 77 | def perform(self, node, inputs, out_storage): 78 | x, rot, shear, scale, trans, oshape = inputs 79 | #foobar.append_ndarray_signature(x, 'AffineImageWarp x') 80 | 81 | aff = AffineTransform(rotation=rot, shear=shear, scale=scale, 82 | translation=trans) 83 | 84 | if str(x.dtype) != node.inputs[0].dtype: 85 | raise TypeError("Wrong dtype argument to AffineImageWarp", x.dtype) 86 | 87 | if np.any(x < 0): 88 | raise ValueError('X should be positive') 89 | 90 | if np.any(x > 1.0): 91 | raise ValueError('X should be less than 1') 92 | 93 | N, C, H, W = x.shape 94 | rows, cols = oshape 95 | 96 | rval = out_storage[0][0] 97 | rval_shape = (N, C, rows, cols) 98 | 99 | if ((rval is None) 100 | or (rval.dtype != x.dtype) 101 | or rval.shape != rval_shape): 102 | rval3 = np.empty((N * C, rows, cols), dtype=x.dtype) 103 | bg_check = True 104 | else: 105 | rval3 = rval.reshape((N * C, rows, cols)) 106 | bg_check = False 107 | 108 | xx = x.reshape(N * C, H, W) 109 | 110 | # -- a small exactly-representable float for out-of-bounds pixels 111 | oob = -1.0 / 2 ** 16 112 | order = 1 # TODO: TRY ORDER=2 (WHY DOES RANGE GET LARGER?) 113 | 114 | tform = np.linalg.inv(aff._matrix) 115 | 116 | for i in xrange(N * C): 117 | if bg_check and i == 0: 118 | rval3[i] = _warp_fast(xx[i], tform, 119 | output_shape=oshape, order=order, 120 | cval=oob) 121 | oob_ratio = np.mean(rval3[i] == oob) 122 | if oob_ratio > 0.5: 123 | raise InvalidDescription('too much background', oob_ratio) 124 | rval3[i] = np.maximum(0, rval3[i]) 125 | else: 126 | rval3[i] = _warp_fast(xx[i], np.linalg.inv(aff._matrix), 127 | output_shape=oshape, order=order, 128 | cval=0) 129 | 130 | if 0 and i == 0: 131 | print 'Debugprint from AffineImageWarp...' 132 | for sym in 'rot', 'shear', 'scale', 'trans', 'oshape': 133 | print sym, ':', locals()[sym] 134 | import matplotlib.pyplot as pl 135 | pl.subplot(2, 1, 1) 136 | pl.imshow(xx[i], cmap=pl.cm.gray) 137 | pl.subplot(2, 1, 2) 138 | pl.imshow(rval3[i], cmap=pl.cm.gray) 139 | pl.show() 140 | time.sleep(2) # -- give some time to ctrl-C 141 | 142 | if np.any(rval3 > 1.001) or np.any(rval3 < 0.0): 143 | min3 = np.min(rval3) 144 | max3 = np.max(rval3) 145 | raise ValueError('interpolated pixel values out of range', 146 | (min3, max3)) 147 | 148 | out_storage[0][0] = rval3.reshape(rval_shape) 149 | #foobar.append_ndarray_signature(out_storage[0][0], 'AffineImageWarp y') 150 | 151 | 152 | affine_image_warp = AffineImageWarp() 153 | 154 | 155 | @pyll.scope.define 156 | def slm_affine_image_warp((x, x_shp), 157 | rot, shear, scale, trans, oshape): 158 | assert x_shp[2] == x_shp[3] 159 | z = affine_image_warp(x, 160 | rot, shear, np.asarray(scale), np.asarray(trans), np.asarray(oshape)) 161 | z_shp = (x_shp[0], x_shp[1]) + tuple(oshape) 162 | assert z_shp[2] == z_shp[3] 163 | return z, z_shp 164 | 165 | 166 | @pyll.scope.define 167 | def slm_img_uint8_to_float32((x, x_shp),): 168 | if str(x.dtype) != 'uint8': 169 | raise TypeError('x must be uint8', x.dtype) 170 | return (x.astype('float32') / 255, x_shp) 171 | 172 | 173 | @pyll.scope.define 174 | def alloc_random_uniform_filterbank(n_filters, height, width, 175 | channels, dtype, rseed, normalize=True): 176 | """ 177 | Generate the same weights as are generated by pythor3 178 | """ 179 | if height != width: 180 | raise ValueError('filters must be square') 181 | if channels is None: 182 | filter_shape = [n_filters, height, width] 183 | else: 184 | filter_shape = [n_filters, height, width, channels] 185 | 186 | rng = np.random.RandomState(rseed) 187 | foobar.append_randomstate('alloc_random_uniform_filterbank', rng) 188 | fb_data = rng.uniform(size=filter_shape) 189 | 190 | # normalize each filter in the bank if needed 191 | if normalize: 192 | # TODO: vectorize these computations, do all at once. 193 | for fidx, filt in enumerate(fb_data): 194 | # normalization here means zero-mean, unit-L2norm 195 | filt -= filt.mean() 196 | filt_norm = np.sqrt((filt * filt).sum()) 197 | assert filt_norm != 0 198 | filt /= filt_norm 199 | fb_data[fidx] = filt 200 | 201 | foobar.append_ndarray_signature(fb_data, 'alloc_random_uniform_filterbank') 202 | return fb_data.astype(dtype) 203 | 204 | 205 | @pyll.scope.define_info(o_len=2) 206 | def boxconv((x, x_shp), kershp, channels=False): 207 | """ 208 | channels: sum over channels (T/F) 209 | """ 210 | kershp = tuple(kershp) 211 | if channels: 212 | rshp = ( x_shp[0], 213 | 1, 214 | x_shp[2] - kershp[0] + 1, 215 | x_shp[3] - kershp[1] + 1) 216 | kerns = np.ones((1, x_shp[1]) + kershp, dtype=x.dtype) 217 | else: 218 | rshp = ( x_shp[0], 219 | x_shp[1], 220 | x_shp[2] - kershp[0] + 1, 221 | x_shp[3] - kershp[1] + 1) 222 | kerns = np.ones((1, 1) + kershp, dtype=x.dtype) 223 | x_shp = (x_shp[0]*x_shp[1], 1, x_shp[2], x_shp[3]) 224 | x = x.reshape(x_shp) 225 | try: 226 | rval = tensor.reshape( 227 | conv.conv2d(x, 228 | theano.shared(kerns), 229 | image_shape=x_shp, 230 | filter_shape=kerns.shape, 231 | border_mode='valid'), 232 | rshp) 233 | except Exception, e: 234 | if "Bad size for the output shape" in str(e): 235 | raise InvalidDescription('boxconv', (x_shp, kershp, channels)) 236 | else: 237 | raise 238 | return rval, rshp 239 | 240 | 241 | @pyll.scope.define_info(o_len=2) 242 | def slm_fbcorr_chmaj((x, x_shp), kerns, stride=1, mode='valid'): 243 | """ 244 | Channel-major filterbank correlation 245 | 246 | kerns - filterbank with shape (n_filters, ker_size, ker_size, channels) 247 | 248 | """ 249 | assert x_shp[2] == x_shp[3] 250 | assert x.dtype == 'float32' 251 | # Reference implementation: 252 | # ../pythor3/pythor3/operation/fbcorr_/plugins/scipy_naive/scipy_naive.py 253 | if stride != 1: 254 | raise NotImplementedError('stride is not used in reference impl.') 255 | 256 | # -- flip the kernels so that convolution does correlation 257 | kerns = kerns[:, :, ::-1, ::-1] 258 | s_kerns = theano.shared(kerns.astype(x.dtype)) 259 | x = conv.conv2d( 260 | x, 261 | s_kerns, 262 | image_shape=x_shp, 263 | filter_shape=kerns.shape, 264 | border_mode=mode) 265 | 266 | n_filters, channels, krows, kcols = kerns.shape 267 | if mode == 'valid': 268 | x_shp = (x_shp[0], n_filters, 269 | x_shp[2] - krows + 1, 270 | x_shp[3] - kcols + 1) 271 | elif mode == 'full': 272 | x_shp = (x_shp[0], n_filters, 273 | x_shp[2] + krows - 1, 274 | x_shp[3] + kcols - 1) 275 | else: 276 | raise NotImplementedError('fbcorr mode', mode) 277 | 278 | assert x_shp[2] == x_shp[3] 279 | return x, x_shp 280 | 281 | 282 | @pyll.scope.define_info(o_len=2) 283 | def slm_clipout((x, x_shp), min_out, max_out): 284 | if min_out is None and max_out is None: 285 | return x, x_shp 286 | elif min_out is None: 287 | return tensor.minimum(x, max_out), x_shp 288 | elif max_out is None: 289 | return tensor.maximum(x, min_out), x_shp 290 | else: 291 | return tensor.clip(x, min_out, max_out), x_shp 292 | 293 | 294 | @pyll.scope.define_info(o_len=2) 295 | def slm_lpool((x, x_shp), 296 | ker_size=3, 297 | order=1, 298 | stride=1, 299 | mode='valid'): 300 | assert x.dtype == 'float32' 301 | assert x_shp[2] == x_shp[3] 302 | order=float(order) 303 | 304 | ker_shape = (ker_size, ker_size) 305 | if hasattr(order, '__iter__'): 306 | o1 = (order == 1).all() 307 | o2 = (order == order.astype(np.int)).all() 308 | else: 309 | o1 = order == 1 310 | o2 = (order == int(order)) 311 | 312 | if o1: 313 | r, r_shp = boxconv((x, x_shp), ker_shape) 314 | elif o2: 315 | r, r_shp = boxconv((x ** order, x_shp), ker_shape) 316 | r = tensor.maximum(r, 0) ** (1.0 / order) 317 | else: 318 | r, r_shp = boxconv((abs(x) ** order, x_shp), ker_shape) 319 | r = tensor.maximum(r, 0) ** (1.0 / order) 320 | 321 | if stride > 1: 322 | r = r[:, :, ::stride, ::stride] 323 | # intdiv is tricky... so just use numpy 324 | r_shp = np.empty(r_shp)[:, :, ::stride, ::stride].shape 325 | assert r_shp[2] == r_shp[3] 326 | return r, r_shp 327 | 328 | 329 | @pyll.scope.define_info(o_len=2) 330 | def slm_lnorm((x, x_shp), 331 | ker_size=3, 332 | remove_mean= False, 333 | div_method='euclidean', 334 | threshold=0.0, 335 | stretch=1.0, 336 | mode='valid', 337 | EPSILON=1e-4, 338 | ): 339 | # Reference implementation: 340 | # ../pythor3/pythor3/operation/lnorm_/plugins/scipy_naive/scipy_naive.py 341 | assert x.dtype == 'float32' 342 | assert x_shp[2] == x_shp[3] 343 | inker_shape=(ker_size, ker_size) 344 | outker_shape=(ker_size, ker_size) # (3, 3) 345 | if mode != 'valid': 346 | raise NotImplementedError('lnorm requires mode=valid', mode) 347 | 348 | threshold = float(threshold) 349 | stretch = float(stretch) 350 | 351 | if outker_shape == inker_shape: 352 | size = np.asarray(x_shp[1] * inker_shape[0] * inker_shape[1], 353 | dtype=x.dtype) 354 | ssq, ssqshp = boxconv((x ** 2, x_shp), inker_shape, 355 | channels=True) 356 | xs = inker_shape[0] // 2 357 | ys = inker_shape[1] // 2 358 | # --local contrast normalization in regions that are not symmetric 359 | # about the pixel being normalized feels weird, but we're 360 | # allowing it here. 361 | xs_inc = (inker_shape[0] + 1) % 2 362 | ys_inc = (inker_shape[1] + 1) % 2 363 | if div_method == 'euclidean': 364 | if remove_mean: 365 | arr_sum, _shp = boxconv((x, x_shp), inker_shape, 366 | channels=True) 367 | arr_num = (x[:, :, xs-xs_inc:-xs, ys-ys_inc:-ys] 368 | - arr_sum / size) 369 | arr_div = EPSILON + tensor.sqrt( 370 | tensor.maximum(0, 371 | ssq - (arr_sum ** 2) / size)) 372 | else: 373 | arr_num = x[:, :, xs-xs_inc:-xs, ys-ys_inc:-ys] 374 | arr_div = EPSILON + tensor.sqrt(ssq) 375 | else: 376 | raise NotImplementedError('div_method', div_method) 377 | else: 378 | raise NotImplementedError('outker_shape != inker_shape', 379 | outker_shape, inker_shape) 380 | 381 | if (hasattr(stretch, '__iter__') and (stretch != 1).any()) or stretch != 1: 382 | arr_num = arr_num * stretch 383 | arr_div = arr_div * stretch 384 | # XXX: IS THIS 1.0 supposed to be (threshold + EPSILON) ?? 385 | arr_div = tensor.switch(arr_div < (threshold + EPSILON), 1.0, arr_div) 386 | 387 | r = arr_num / arr_div 388 | r_shp = x_shp[0], x_shp[1], ssqshp[2], ssqshp[3] 389 | return r, r_shp 390 | 391 | 392 | @pyll.scope.define_info(o_len=2) 393 | def slm_fbncc_chmaj((x, x_shp), m_fb, remove_mean, beta, hard_beta): 394 | """ 395 | Channel-major filterbank normalized cross-correlation 396 | 397 | For each valid-mode patch (p) of the image (x), this transform computes 398 | 399 | p_c = (p - mean(p)) if (remove_mean) else (p) 400 | qA = p_c / sqrt(var(p_c) + beta) # -- Coates' sc_vq_demo 401 | qB = p_c / sqrt(max(sum(p_c ** 2), beta)) # -- Pinto's lnorm 402 | 403 | There are two differences between qA and qB: 404 | 405 | 1. the denominator contains either addition or max 406 | 407 | 2. the denominator contains either var or sum of squares 408 | 409 | The first difference corresponds to the hard_beta parameter. 410 | The second difference amounts to a decision about the scaling of the 411 | output, because for every function qA(beta_A) there is a function 412 | qB(betaB) that is identical, except for a multiplicative factor of 413 | sqrt(N - 1). 414 | 415 | I think that in the context of stacked models, the factor of sqrt(N-1) is 416 | undesirable because we want the dynamic range of all outputs to be as 417 | similar as possible. So this function implements qB. 418 | 419 | Coates' denominator had var(p_c) + 10, so what should the equivalent here 420 | be? 421 | p_c / sqrt(var(p_c) + 10) 422 | = p_c / sqrt(sum(p_c ** 2) / (108 - 1) + 10) 423 | = p_c / sqrt((sum(p_c ** 2) + 107 * 10) / 107) 424 | = sqrt(107) * p_c / sqrt((sum(p_c ** 2) + 107 * 10)) 425 | 426 | So Coates' pre-processing has beta = 1070, hard_beta=False. This function 427 | returns a result that is sqrt(107) ~= 10 times smaller than the Coates 428 | whitening step. 429 | 430 | """ 431 | # -- just to make sure things will run on GPU 432 | assert x.dtype == 'float32' 433 | w_means, w_fb = m_fb 434 | 435 | beta = float(beta) 436 | 437 | # -- kernel Number, Features, Rows, Cols 438 | kN, kF, kR, kC = w_fb.shape 439 | 440 | # -- patch-wise sums and sums-of-squares 441 | p_sum, _shp = boxconv((x, x_shp), (kR, kC), channels=True) 442 | p_mean = 0 if remove_mean else p_sum / (kF * kR * kC) 443 | p_ssq, _shp = boxconv((x ** 2, x_shp), (kR, kC), channels=True) 444 | 445 | # -- this is an important variable in the math above, but 446 | # it is not directly used in the fused lnorm_fbcorr 447 | # p_c = x[:, :, xs - xs_inc:-xs, ys - ys_inc:-ys] - p_mean 448 | 449 | # -- adjust the sum of squares to reflect remove_mean 450 | p_c_sq = p_ssq - (p_mean ** 2) * (kF * kR * kC) 451 | if hard_beta: 452 | p_div2 = tensor.maximum(p_c_sq, beta) 453 | else: 454 | p_div2 = p_c_sq + beta 455 | 456 | p_scale = 1.0 / tensor.sqrt(p_div2) 457 | 458 | # -- 459 | # from whitening, we have a shift and linear transform (P) 460 | # for each patch (as vector). 461 | # 462 | # let m be the vector [m m m m] that replicates p_mean 463 | # let a be the scalar p_scale 464 | # let x be an image patch from s_imgs 465 | # 466 | # Whitening means applying the affine transformation 467 | # (c - M) P 468 | # to contrast-normalized patch c = a (x - m), 469 | # where a = p_scale and m = p_mean. 470 | # 471 | # We also want to extract features in dictionary 472 | # 473 | # (c - M) P 474 | # = (a (x - [m,m,m]) - M) P 475 | # = (a x - a [m,m,m] - M) P 476 | # = a x P - a [m,m,m] P - M P 477 | # 478 | 479 | P = theano.shared( 480 | np.asarray(w_fb[:, :, ::-1, ::-1], order='C')) 481 | 482 | Px = conv.conv2d(x, P, 483 | image_shape=x_shp, 484 | filter_shape=w_fb.shape, 485 | border_mode='valid') 486 | 487 | s_P_sum = theano.shared(w_fb.sum(3).sum(2).sum(1)) 488 | Pmmm = p_mean * s_P_sum.dimshuffle(0, 'x', 'x') 489 | s_PM = theano.shared((w_means * w_fb).sum(3).sum(2).sum(1)) 490 | z = p_scale * (Px - Pmmm) - s_PM.dimshuffle(0, 'x', 'x') 491 | 492 | assert z.dtype == x.dtype, (z.dtype, x.dtype) 493 | return z, (_shp[0], kN, _shp[2], _shp[3]) 494 | 495 | 496 | @pyll.scope.define 497 | def slm_flatten((x, x_shp),): 498 | r = tensor.flatten(x, 2) 499 | r_shp = x_shp[0], np.prod(x_shp[1:]) 500 | return r, r_shp 501 | 502 | 503 | @pyll.scope.define_info(o_len=2) 504 | def slm_lpool_smallgrid((x, x_shp), grid_res=2, order=1): 505 | """ 506 | Like lpool, but parametrized to produce a fixed size image as output. 507 | The image is not rescaled, but rather single giant box filters are 508 | defined for each output pixel, and stored in a matrix. 509 | """ 510 | assert x.dtype == 'float32' 511 | order=float(order) 512 | 513 | if hasattr(order, '__iter__'): 514 | o1 = (order == 1).all() 515 | o2 = (order == order.astype(np.int)).all() 516 | else: 517 | o1 = order == 1 518 | o2 = (order == int(order)) 519 | 520 | # rather than convolving with a box, this function takes 521 | # a dot product with the entire image 522 | ngR = x_shp[2] // grid_res + int(x_shp[2] % grid_res > 0) 523 | ngC = x_shp[3] // grid_res + int(x_shp[3] % grid_res > 0) 524 | 525 | assert ngR * grid_res >= x_shp[2] 526 | assert ngC * grid_res >= x_shp[3] 527 | 528 | W = np.zeros((grid_res, grid_res,) + x_shp[2:], dtype=x.dtype) 529 | for rr in range(grid_res): 530 | for cc in range(grid_res): 531 | W[rr, cc, 532 | rr * ngR : (rr + 1) * ngR, 533 | cc * ngC : (cc + 1) * ngC] = 1.0 534 | sW = theano.shared(W.reshape((grid_res ** 2, -1))) 535 | 536 | xmat = x.reshape((x_shp[0] * x_shp[1], x_shp[2] * x_shp[3])) 537 | 538 | if o1: 539 | r = tensor.dot(xmat, sW.T) 540 | elif o2: 541 | r = tensor.sqrt(tensor.dot(xmat ** 2, sW.T)) 542 | else: 543 | r = tensor.dot(abs(xmat) ** order, sW.T) 544 | r = tensor.maximum(r, 0) ** (1.0 / order) 545 | 546 | r_shp = (x_shp[0], x_shp[1], grid_res, grid_res) 547 | r = r.reshape(r_shp) 548 | 549 | return r, r_shp 550 | 551 | 552 | @pyll.scope.define_info(o_len=2) 553 | def slm_quantize_gridpool((x, x_shp), alpha, 554 | use_mid=False, 555 | order=1.0, 556 | grid_res=2): 557 | hr = int(np.round(x_shp[2] / grid_res)) 558 | hc = int(np.round(x_shp[3] / grid_res)) 559 | alpha = tensor.cast(alpha, dtype=x.dtype) 560 | sXC_shp = (x_shp[0], x_shp[1], grid_res, grid_res, 3 if use_mid else 2) 561 | sXC = tensor.zeros(sXC_shp, dtype=x.dtype) 562 | 563 | for ri in range(grid_res): 564 | if ri == grid_res - 1: 565 | rslice = slice(ri * hr, None) 566 | else: 567 | rslice = slice(ri * hr, (ri + 1) * hr) 568 | for ci in range(grid_res): 569 | cslice = slice(ci * hc, (ci + 1) * hc) 570 | if ci == grid_res - 1: 571 | cslice = slice(ci * hc, None) 572 | else: 573 | cslice = slice(ci * hc, (ci + 1) * hc) 574 | xi = x[:, :, rslice, cslice] 575 | qs = [] 576 | qs.append(tensor.maximum(xi - alpha, 0)) 577 | qs.append(tensor.maximum(-xi - alpha, 0)) 578 | if use_mid: 579 | qs.append(tensor.maximum(alpha - abs(xi), 0)) 580 | 581 | for qi, q in enumerate(qs): 582 | inc = (q ** order).sum([2, 3]) ** (1. / order) 583 | assert inc.dtype == q.dtype 584 | sXC = tensor.set_subtensor(sXC[:, :, ri, ci, qi], inc) 585 | 586 | r_shp = sXC_shp[0], np.prod(sXC_shp[1:]) 587 | r = sXC.reshape(r_shp) 588 | return r, r_shp 589 | 590 | 591 | @pyll.scope.define_info(o_len=2) 592 | def slm_lpool_alpha((x, x_shp), 593 | ker_size=3, 594 | order=1, 595 | stride=1, 596 | alpha=0.0, 597 | ): 598 | """ 599 | lpool but with alpha-half-rectification 600 | """ 601 | assert x.dtype == 'float32' 602 | order=float(order) 603 | 604 | ker_shape = (ker_size, ker_size) 605 | 606 | xp = tensor.maximum(x - alpha, 0) 607 | xn = tensor.maximum(-x - alpha, 0) 608 | rp, r_shp = boxconv((xp ** order, x_shp), ker_shape) 609 | rn, r_shp = boxconv((xn ** order, x_shp), ker_shape) 610 | rp = rp ** (1. / order) 611 | rn = rn ** (1. / order) 612 | 613 | if stride > 1: 614 | # -- theano optimizations should turn this stride into conv2d 615 | # subsampling 616 | rp = rp[:, :, ::stride, ::stride] 617 | rn = rn[:, :, ::stride, ::stride] 618 | # intdiv is tricky... so just use numpy 619 | r_shp = np.empty(r_shp)[:, :, ::stride, ::stride].shape 620 | 621 | z_shp = (r_shp[0], 2 * r_shp[1], r_shp[2], r_shp[3]) 622 | z = tensor.zeros(z_shp, dtype=x.dtype) 623 | z = tensor.set_subtensor(z[:, :r_shp[1]], rp) 624 | z = tensor.set_subtensor(z[:, r_shp[1]:], rn) 625 | 626 | return z, z_shp 627 | 628 | 629 | @pyll.scope.define_info(o_len=2) 630 | def slm_gnorm((x, x_shp), 631 | remove_mean= False, 632 | div_method='euclidean', 633 | threshold=0.0, 634 | stretch=1.0, 635 | EPSILON=1e-4, 636 | across_channels=True, 637 | ): 638 | """ 639 | Global normalization, as opposed to local normalization 640 | """ 641 | 642 | threshold = float(threshold) 643 | stretch = float(stretch) 644 | 645 | if across_channels: 646 | size = x_shp[1] * x_shp[2] * x_shp[3] 647 | ssq = (x ** 2).sum(axis=[1, 2, 3]).dimshuffle(0, 'x', 'x', 'x') 648 | else: 649 | size = x_shp[2] * x_shp[3] 650 | ssq = (x ** 2).sum(axis=[2, 3]).dimshuffle(0, 1, 'x', 'x') 651 | 652 | if div_method == 'euclidean': 653 | if remove_mean: 654 | if across_channels: 655 | arr_sum = x.sum(axis=[1, 2, 3]).dimshuffle(0, 'x', 'x', 'x') 656 | else: 657 | arr_sum = x.sum(axis=[2, 3]).dimshuffle(0, 1, 'x', 'x') 658 | 659 | arr_num = x - arr_sum / size 660 | arr_div = EPSILON + tensor.sqrt( 661 | tensor.maximum(0, 662 | ssq - (arr_sum ** 2) / size)) 663 | else: 664 | arr_num = x 665 | arr_div = EPSILON + tensor.sqrt(ssq) 666 | else: 667 | raise NotImplementedError('div_method', div_method) 668 | 669 | if (hasattr(stretch, '__iter__') and (stretch != 1).any()) or stretch != 1: 670 | arr_num = arr_num * stretch 671 | arr_div = arr_div * stretch 672 | arr_div = tensor.switch(arr_div < (threshold + EPSILON), 1.0, arr_div) 673 | 674 | r = arr_num / arr_div 675 | r_shp = x_shp 676 | return r, r_shp 677 | 678 | 679 | @pyll.scope.define 680 | def contrast_normalize(patches, remove_mean, beta, hard_beta): 681 | X = patches 682 | if X.ndim != 2: 683 | raise TypeError('contrast_normalize requires flat patches') 684 | if remove_mean: 685 | xm = X.mean(1) 686 | else: 687 | xm = X[:,0] * 0 688 | Xc = X - xm[:, None] 689 | if 0: 690 | # -- for some reason the following sometimes uses gigs of RAM 691 | l2 = (Xc * Xc).sum(axis=1) 692 | else: 693 | l2 = np.zeros_like(Xc[:, 0]) 694 | for i in xrange(Xc.shape[1]): 695 | l2 += Xc[:, i] ** 2 696 | if hard_beta: 697 | div2 = np.maximum(l2, beta) 698 | else: 699 | div2 = l2 + beta 700 | Xc /= np.sqrt(div2[:, None]) 701 | foobar.append_ndarray_signature(Xc, 'contrast_normalize') 702 | return Xc 703 | 704 | 705 | @pyll.scope.define 706 | def random_patches(images, N, R, C, rng, channel_major=False, memlimit=None): 707 | """Return a stack of N image patches (channel major version)""" 708 | 709 | def N_with_memlimit(): 710 | if memlimit is not None: 711 | # -- memlimit in bytes 712 | sizelimit = memlimit / images.dtype.itemsize 713 | return min(N, sizelimit // (R * C * iF)) 714 | else: 715 | return N 716 | 717 | if channel_major: 718 | n_imgs, iF, iR, iC = images.shape 719 | N = N_with_memlimit() 720 | rval = np.empty((N, iF, R, C), dtype=images.dtype) 721 | else: 722 | n_imgs, iR, iC, iF = images.shape 723 | N = N_with_memlimit() 724 | rval = np.empty((N, R, C, iF), dtype=images.dtype) 725 | 726 | foobar.append_trace('random_patches dims', *rval.shape) 727 | foobar.append_randomstate('random_patches rng', rng) 728 | 729 | srcs = rng.randint(n_imgs, size=N) 730 | 731 | if R > iR or C > iC: 732 | raise InvalidDescription('cannot extract patches', (R, C)) 733 | roffsets = rng.randint(iR - R + 1, size=N) 734 | coffsets = rng.randint(iC - C + 1, size=N) 735 | # TODO: this can be done with one advanced index right? 736 | for rv_i, src_i, ro, co in zip(rval, srcs, roffsets, coffsets): 737 | if channel_major: 738 | rv_i[:] = images[src_i, :, ro: ro + R, co : co + C] 739 | else: 740 | rv_i[:] = images[src_i, ro: ro + R, co : co + C] 741 | foobar.append_ndarray_signature(rval, 'random_patches rval') 742 | return rval 743 | 744 | 745 | @pyll.scope.define_info(o_len=3) 746 | def patch_whitening_filterbank_X(patches, o_ndim, gamma, 747 | remove_mean, beta, hard_beta, 748 | ): 749 | """ 750 | patches - Image patches (can be uint8 pixels or floats) 751 | o_ndim - 2 to get matrix outputs, 4 to get image-stack outputs 752 | gamma - non-negative real to boost low-principle components 753 | 754 | remove_mean - see contrast_normalize 755 | beta - see contrast_normalize 756 | hard_beta - see contrast_normalize 757 | 758 | Returns: M, P, X 759 | M - mean of contrast-normalized patches 760 | P - whitening matrix / filterbank for contrast-normalized patches 761 | X - contrast-normalized patches 762 | 763 | """ 764 | # Algorithm from Coates' sc_vq_demo.m 765 | 766 | # -- patches -> column vectors 767 | X = patches.reshape(len(patches), -1).astype('float64') 768 | 769 | X = contrast_normalize(X, 770 | remove_mean=remove_mean, 771 | beta=beta, 772 | hard_beta=hard_beta) 773 | 774 | # -- ZCA whitening (with low-pass) 775 | logger.debug('patch_whitening_filterbank_X starting ZCA') 776 | M, _std = mean_and_std(X) 777 | Xm = X - M 778 | assert Xm.shape == X.shape 779 | logger.info('patch_whitening_filterbank_X starting ZCA: dot %s' % 780 | str(Xm.shape)) 781 | C = dot_f64(Xm.T, Xm) / (Xm.shape[0] - 1) 782 | logger.debug('patch_whitening_filterbank_X starting ZCA: eigh') 783 | D, V = np.linalg.eigh(C) 784 | logger.debug( 785 | 'patch_whitening_filterbank_X starting ZCA: dot %s' % str(V.shape)) 786 | P = dot_f32(np.sqrt(1.0 / (D + gamma)) * V, V.T) 787 | 788 | # -- return to image space 789 | if o_ndim == 4: 790 | M = M.reshape(patches.shape[1:]) 791 | P = P.reshape((P.shape[0],) + patches.shape[1:]) 792 | X = X.reshape((len(X),) + patches.shape[1:]) 793 | elif o_ndim == 2: 794 | pass 795 | else: 796 | raise ValueError('o_ndim not in (2, 4)', o_ndim) 797 | 798 | logger.debug('patch_whitening_filterbank_X -> done') 799 | 800 | foobar.append_ndarray_signature(M, 'patch_whitening_filterbank_X M') 801 | foobar.append_ndarray_signature(P, 'patch_whitening_filterbank_X P') 802 | foobar.append_ndarray_signature(X, 'patch_whitening_filterbank_X X') 803 | dtype = patches.dtype 804 | return M.astype(dtype), P.astype(dtype), X.astype(dtype) 805 | 806 | 807 | @pyll.scope.define_info(o_len=2) 808 | def fb_whitened_projections(patches, pwfX, n_filters, rseed, dtype): 809 | """ 810 | pwfX is the output of patch_whitening_filterbank_X with reshape=False 811 | 812 | M, and fb will be reshaped to match elements of patches 813 | """ 814 | M, P, patches_cn = pwfX 815 | if patches_cn.ndim != 2: 816 | raise TypeError('wrong shape for pwfX args, should be flattened', 817 | patches_cn.shape) 818 | rng = np.random.RandomState(rseed) 819 | foobar.append_randomstate('fb_whitened_projections', rng) 820 | 821 | D = rng.randn(n_filters, patches_cn.shape[1]) 822 | D = D / (np.sqrt((D ** 2).sum(axis=1))[:, None] + 1e-20) 823 | fb = dot_f32(D, P) 824 | fb.shape = (n_filters,) + patches.shape[1:] 825 | M.shape = patches.shape[1:] 826 | M = M.astype(dtype) 827 | fb = fb.astype(dtype) 828 | if fb.size == 0: 829 | raise ValueError('filterbank had size 0') 830 | foobar.append_ndarray_signature(M, 'fb_whitened_projections M') 831 | foobar.append_ndarray_signature(fb, 'fb_whitened_projections fb') 832 | return M, fb 833 | 834 | 835 | @pyll.scope.define_info(o_len=2) 836 | def fb_whitened_patches(patches, pwfX, n_filters, rseed, dtype): 837 | """ 838 | pwfX is the output of patch_whitening_filterbank_X with reshape=False 839 | 840 | M, and fb will be reshaped to match elements of patches 841 | 842 | """ 843 | M, P, patches_cn = pwfX 844 | rng = np.random.RandomState(rseed) 845 | foobar.append_randomstate('fb_whitened_patches', rng) 846 | d_elems = rng.randint(len(patches_cn), size=n_filters) 847 | D = dot_f64(patches_cn[d_elems] - M, P) 848 | D = D / (np.sqrt((D ** 2).sum(axis=1))[:, None] + 1e-20) 849 | fb = dot_f32(D, P) 850 | fb.shape = (n_filters,) + patches.shape[1:] 851 | M.shape = patches.shape[1:] 852 | M = M.astype(dtype) 853 | fb = fb.astype(dtype) 854 | if fb.size == 0: 855 | raise ValueError('filterbank had size 0') 856 | foobar.append_ndarray_signature(M, 'fb_whitened_patches M') 857 | foobar.append_ndarray_signature(fb, 'fb_whitened_patches fb') 858 | return M, fb 859 | 860 | 861 | @pyll.scope.define 862 | def pyll_theano_batched_lmap(pipeline, seq, batchsize, 863 | _debug_call_counts=None, 864 | print_progress_every=float('inf'), 865 | abort_on_rows_larger_than=None, 866 | speed_thresh=None, 867 | x_dtype='float32', 868 | ): 869 | """ 870 | This function returns a skdata.larray.lmap object whose function 871 | is defined by a theano expression. 872 | 873 | The theano expression will be built and compiled specifically for the 874 | dimensions of the given `seq`. Therefore, in_rows, and out_rows should 875 | actually be a *pyll* graph, that evaluates to a theano graph. 876 | """ 877 | 878 | in_shp = (batchsize,) + seq.shape[1:] 879 | batch = np.zeros(in_shp, dtype=x_dtype) 880 | s_ibatch = theano.shared(batch) 881 | s_xi = theano.tensor.as_tensor_variable(s_ibatch).type() 882 | s_N = s_xi.shape[0] 883 | s_X = theano.tensor.set_subtensor(s_ibatch[:s_N], s_xi) 884 | #print 'PIPELINE', pipeline 885 | thing = pipeline((s_X, in_shp)) 886 | #print 'THING' 887 | #print thing 888 | #print '===' 889 | s_obatch, oshp = pyll.rec_eval(thing) 890 | assert oshp[0] == batchsize 891 | logger.info('batched_lmap oshp %s' % str(oshp)) 892 | if abort_on_rows_larger_than: 893 | rowlen = np.prod(oshp[1:]) 894 | if rowlen > abort_on_rows_larger_than: 895 | raise ValueError('rowlen %i exceeds limit %i' % ( 896 | rowlen, abort_on_rows_larger_than)) 897 | 898 | # Compile a function that takes a variable number of elements in, 899 | # returns the same number of processed elements out, 900 | # but does all internal computations using a fixed number of elements, 901 | # because convolutions are fastest when they're hard-coded to a certain 902 | # size. 903 | logger.debug('pyll_theano_batched_lmap compiling fn') 904 | _fn = theano.function([theano.Param(s_xi, strict=True)], 905 | s_obatch[:s_N], 906 | updates={ 907 | s_ibatch: s_X, # this allows the inc_subtensor to be in-place 908 | }) 909 | logger.debug('pyll_theano_batched_lmap compiling fn -> done') 910 | 911 | sums = {'elems': 0, 'times': 0.0} 912 | if speed_thresh is None: 913 | time_fn = _fn 914 | else: 915 | def time_fn(X): 916 | t0 = time.time() 917 | if str(X.dtype) != x_dtype: 918 | print 'time_fn dtype problem', X.dtype, x_dtype 919 | rval = _fn(X) 920 | dt = time.time() - t0 921 | #print 'DEBUG time_fn dt:', dt 922 | sums['elems'] += len(X) 923 | sums['times'] += dt 924 | return rval 925 | 926 | def raise_if_slow(): 927 | exc = EvalTimeout( 928 | 'batched_lmap failed to compute %i elements in %f secs' 929 | % (speed_thresh['elements'], speed_thresh['seconds'])) 930 | if sums['elems'] >= speed_thresh['elements']: 931 | observed_ratio = sums['elems'] / sums['times'] 932 | required_ratio = (speed_thresh['elements'] / 933 | speed_thresh['seconds']) 934 | if observed_ratio < required_ratio: 935 | raise exc 936 | else: 937 | sums['elems'] = 0 938 | sums['times'] = 0.0 939 | 940 | def fn_1(x): 941 | if _debug_call_counts: 942 | _debug_call_counts['fn_1'] += 1 943 | return time_fn(x[None, :, :, :])[0] 944 | 945 | attrs = { 946 | 'shape': oshp[1:], 947 | 'ndim': len(oshp) -1, 948 | 'dtype': s_obatch.dtype } 949 | def rval_getattr(attr, objs): 950 | # -- objs don't matter to the structure of the return value 951 | try: 952 | return attrs[attr] 953 | except KeyError: 954 | raise AttributeError(attr) 955 | 956 | fn_1.rval_getattr = rval_getattr 957 | 958 | last_print_time = [time.time()] 959 | 960 | def check_for_print(offset, X): 961 | curtime = time.time() 962 | if (curtime - last_print_time[0]) > print_progress_every: 963 | logger.info('pyll_theano_batched_lmap.f_map %i %i' % ( 964 | offset, len(X))) 965 | last_print_time[0] = curtime 966 | 967 | if speed_thresh is not None: 968 | raise_if_slow() 969 | 970 | def f_map(X): 971 | if _debug_call_counts: 972 | _debug_call_counts['f_map'] += 1 973 | 974 | if len(X) == batchsize: 975 | check_for_print(offset=0, X=X) 976 | return time_fn(X) 977 | 978 | rval = np.empty((len(X),) + oshp[1:], dtype=s_obatch.dtype) 979 | offset = 0 980 | while offset < len(X): 981 | check_for_print(offset, X) 982 | xi = X[offset: offset + batchsize] 983 | fn_i = time_fn(xi) 984 | if not np.all(np.isfinite(fn_i)): 985 | raise ValueError('non-finite features') 986 | rval[offset:offset + len(xi)] = fn_i 987 | offset += len(xi) 988 | return rval 989 | 990 | return larray.lmap(fn_1, seq, f_map=f_map) 991 | 992 | 993 | @pyll.scope.define 994 | def np_transpose(obj, arg): 995 | return obj.transpose(*arg) 996 | 997 | 998 | @pyll.scope.define 999 | def np_RandomState(rseed): 1000 | rval = np.random.RandomState(rseed) 1001 | return rval 1002 | 1003 | 1004 | @pyll.scope.define 1005 | def flatten_elems(obj): 1006 | return obj.reshape(len(obj), -1) 1007 | 1008 | 1009 | @pyll.scope.define 1010 | def model_predict(mdl, X): 1011 | return mdl.predict(X) 1012 | 1013 | 1014 | @pyll.scope.define 1015 | def model_decisions(mdl, X): 1016 | return mdl.decisions(X) 1017 | 1018 | 1019 | @pyll.scope.define 1020 | def pickle_dumps(obj, protocol=None): 1021 | if protocol is None: 1022 | return cPickle.dumps(obj) 1023 | else: 1024 | return cPickle.dumps(obj, protocol=protocol) 1025 | 1026 | 1027 | @pyll.scope.define 1028 | def error_rate(pred, y): 1029 | return np.mean(pred != y) 1030 | 1031 | 1032 | @pyll.scope.define 1033 | def print_ndarray_summary(msg, X): 1034 | print msg, X.dtype, X.shape, X.min(), X.max(), X.mean() 1035 | return X 1036 | 1037 | 1038 | @pyll.scope.define_info(o_len=2) 1039 | def slm_uniform_M_FB(nfilters, size, channels, rseed, normalize, dtype, 1040 | ret_cmajor): 1041 | print 'Allocating uniform filterbank', nfilters, size, channels 1042 | M = np.asarray(0).reshape((1, 1, 1)).astype(dtype) 1043 | FB = alloc_random_uniform_filterbank( 1044 | nfilters, size, size, channels, 1045 | dtype=dtype, 1046 | rseed=rseed, 1047 | normalize=normalize) 1048 | if FB.size == 0: 1049 | raise ValueError('filterbank had size 0') 1050 | if ret_cmajor: 1051 | return M, FB.transpose(0, 3, 1, 2) 1052 | else: 1053 | return M, FB 1054 | 1055 | 1056 | @pyll.scope.define 1057 | def larray_cache_memory(obj): 1058 | return larray.cache_memory(obj) 1059 | 1060 | 1061 | @pyll.scope.define 1062 | def larray_cache_memmap(obj, name, basedir=None, msg=None): 1063 | return larray.cache_memmap(obj, name, basedir=basedir, msg=msg) 1064 | 1065 | 1066 | @pyll.scope.define 1067 | def ceildiv(a, b): 1068 | return int(np.ceil(float(a) / float(b))) 1069 | 1070 | 1071 | @pyll.scope.define 1072 | def view2_worth_calculating(loss, ctrl, thresh_loss, thresh_rank): 1073 | # 1074 | # Decide whether to bother calculating the view2 score, which is slow. 1075 | # 1076 | 1077 | if thresh_loss is not None and loss > thresh_loss: 1078 | logger.info('worth_calculating_view2: False (loss %f > thresh %f)' % ( 1079 | loss, thresh_loss)) 1080 | return False 1081 | elif ctrl is None: 1082 | logger.info('worth_calculating_view2: True (ctrl is None)') 1083 | return True 1084 | else: 1085 | trials = ctrl.trials 1086 | # -- old logic 1087 | if hasattr(trials, 'handle'): 1088 | # -- hack for mongodb 1089 | query = { 1090 | 'result.status': hyperopt.STATUS_OK, 1091 | 'exp_key': trials._exp_key, 1092 | } 1093 | #docs = list(trials.handle.jobs.find(query, {'result.loss': 1})) 1094 | docs = isvm_boosting.BoostHelper.query_MongoTrials(trials, 1095 | query={ 1096 | 'result.status': hyperopt.STATUS_OK, 1097 | #'misc.boosting.continues': {'$in': [None, parent_tid]}, 1098 | }) 1099 | else: 1100 | # -- impl for local trials object 1101 | trials.refresh() 1102 | docs = [d for d in trials.trials 1103 | if ( 1104 | 'result' in d 1105 | and d['result']['status'] == hyperopt.STATUS_OK 1106 | and d['exp_key'] == trials._exp_key 1107 | )] 1108 | bh = isvm_boosting.BoostHelper(docs) 1109 | cur_parent = bh.continues(ctrl.current_trial) 1110 | cur_parent_tid = cur_parent['tid'] if cur_parent else None 1111 | best_sibling = bh.best_child(cur_parent) 1112 | if best_sibling: 1113 | logger.info( 1114 | 'view2_worth_calculating cur_parent:%s best_sibling:%s(%s)' 1115 | % (cur_parent_tid, best_sibling['tid'], 1116 | best_sibling['result']['loss'])) 1117 | if loss <= best_sibling['result']['loss']: 1118 | return True 1119 | else: 1120 | if thresh_rank > 1: 1121 | raise NotImplementedError('thresh_rank') 1122 | return False 1123 | else: 1124 | logger.info( 1125 | 'view2_worth_calculating cur_parent:%s, best_child:None' 1126 | % cur_parent_tid) 1127 | return True 1128 | #losses = [d['result']['loss'] for d in docs] 1129 | #losses.sort() 1130 | #if len(losses) < thresh_rank: 1131 | #logger.info('worth_calculating_view2: True (small len(losses))') 1132 | #return True 1133 | #else: 1134 | #rank = np.searchsorted(losses, loss) 1135 | #rval = rank < thresh_rank 1136 | #logger.info('worth_calculating_view2: %s (rank %i / %i)' % ( 1137 | #rval, rank, len(losses))) 1138 | #return rval 1139 | 1140 | 1141 | @pyll.scope.define 1142 | def average_row_l2norm(X): 1143 | return np.sqrt((np.asarray(X) ** 2).sum(axis=1)).mean() 1144 | 1145 | --------------------------------------------------------------------------------