├── hpconvnet
    ├── __init__.py
    ├── comparisons.py
    ├── shovel_util.py
    ├── foobar.py
    ├── cifar10.py
    ├── lfw.py
    ├── isvm_binary.py
    ├── isvm_boosting.py
    ├── utils.py
    ├── isvm_precomputed.py
    ├── slm_visitor.py
    ├── slm_visitor_primal.py
    ├── slm_visitor_esvc.py
    ├── slm.py
    ├── isvm_multi.py
    └── pyll_slm.py
├── requirements.txt
├── .gitignore
├── shovel
    ├── cifar10.py
    └── lfw.py
├── setup.py
└── README.md


/hpconvnet/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | cython
 3 | scipy
 4 | nose
 5 | cgen
 6 | codepy
 7 | coverage
 8 | decorator
 9 | lockfile
10 | matplotlib
11 | pymongo
12 | pytools
13 | wsgiref
14 | fabric
15 | shovel
16 | joblib
17 | networkx
18 | pillow
19 | skdata
20 | hyperopt
21 | scikit-image
22 | theano
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.egg-info
 9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 | 
21 | # Installer logs
22 | pip-log.txt
23 | 
24 | # Unit test / coverage reports
25 | .coverage
26 | .tox
27 | nosetests.xml
28 | 
29 | # Translations
30 | *.mo
31 | 
32 | # Mr Developer
33 | .mr.developer.cfg
34 | .project
35 | .pydevproject
36 | 


--------------------------------------------------------------------------------
/hpconvnet/comparisons.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Comparison operators for pairwise image tasks (e.g. lfw, pubfig).
 3 | """
 4 | 
 5 | import numpy as np
 6 | 
 7 | 
 8 | class Comparison(object):
 9 |     def get_num_features(self,shp):
10 |         return shp[1] * shp[2] * shp[3]
11 | 
12 | 
13 | class Concatenate(Comparison):
14 |     def get_num_features(self, shp):
15 |         return 2 * shp[1] * shp[2] * shp[3]
16 |     def __call__(self, x, y):
17 |         return np.concatenate([x.flatten(), y.flatten()])
18 | concatenate = Concatenate()
19 | 
20 | 
21 | class Mult(Comparison):
22 |     def __call__(self, x, y):
23 |         return x.flatten() * y.flatten()
24 | mult = Mult()
25 | 
26 | 
27 | class Diff(Comparison):
28 |     def __call__(self, x, y):
29 |         return x.flatten() - y.flatten()
30 | diff = Diff()
31 | 
32 | 
33 | class Absdiff(Comparison):
34 |     def __call__(self, x, y):
35 |         return np.abs(x.flatten() - y.flatten())
36 | absdiff = Absdiff()
37 | 
38 | 
39 | class Sqrtabsdiff(Comparison):
40 |     def __call__(self, x, y):
41 |         return np.sqrt(np.abs(x.flatten() - y.flatten()))
42 | sqrtabsdiff = Sqrtabsdiff()
43 | 
44 | 
45 | class Sqdiff(Comparison):
46 |     def __call__(self, x, y):
47 |         return (x.flatten() - y.flatten())**2
48 | sqdiff = Sqdiff()
49 | 


--------------------------------------------------------------------------------
/hpconvnet/shovel_util.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import subprocess
 4 | import sys
 5 | import time
 6 | 
 7 | def configure_logging(level=logging.INFO, stream=sys.stdout, prefix=''):
 8 |     if isinstance(level, basestring):
 9 |         level = getattr(logging, level)
10 |     root_handler = logging.StreamHandler(stream)
11 |     root_formatter = logging.Formatter(
12 |         prefix + '%(levelname)s (%(name)s): %(message)s'
13 |         )
14 |     root_handler.setFormatter(root_formatter)
15 |     root_logger = logging.getLogger()
16 |     root_logger.setLevel(level)
17 |     root_logger.addHandler(root_handler)
18 | 
19 |     logging.getLogger('shovel').propagate = False
20 |     logging.getLogger('skimage').propagate = False
21 | 
22 | 
23 | def launch_workers_helper(host, port, dbname, N, walltime, rsync_data_local,
24 |                           mem=None):
25 |     text = """#!/bin/bash
26 |     %(rsync_data_local)s
27 |     . VENV/eccv12/bin/activate
28 |     VENV/eccv12/src/eccv12/hyperopt/bin/hyperopt-mongo-worker \
29 |         --mongo=%(host)s:%(port)s/%(dbname)s \
30 |         --workdir=/scratch_local/eccv12.workdir \
31 |         --reserve-timeout=180.0 \
32 |         --max-consecutive-failures=4
33 |     """ % locals()
34 | 
35 |     qsub_script_name = '.worker.sh.%.3f' % time.time()
36 | 
37 |     script = open(qsub_script_name, 'w')
38 |     script.write(text)
39 |     script.close()
40 | 
41 |     subprocess.check_call(['chmod', '+x', qsub_script_name])
42 |     qsub_cmd = ['qsub', '-lnodes=1:gpus=1', '-lwalltime=%s' % walltime]
43 |     if mem is not None:
44 |         qsub_cmd.append('-lmem=%s' % mem)
45 |     qsub_cmd.extend(
46 |             ['-e', os.path.expanduser('~/.qsub/%s.err' % qsub_script_name)])
47 |     qsub_cmd.extend(
48 |             ['-o', os.path.expanduser('~/.qsub/%s.out' % qsub_script_name)])
49 |     if int(N) > 1:
50 |         qsub_cmd.extend(['-t', '1-%s' % N])
51 |     qsub_cmd.append(qsub_script_name)
52 |     print qsub_cmd
53 |     subprocess.check_call(qsub_cmd)
54 | 
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/hpconvnet/foobar.py:
--------------------------------------------------------------------------------
 1 | """
 2 | foobar.py - numeric tracing utilites
 3 | 
 4 | """
 5 | import copy
 6 | import logging
 7 | import numpy as np
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | _trace_target = []
12 | 
13 | _trace = []
14 | 
15 | trace_enabled = True
16 | trace_verify = False
17 | 
18 | def trace_eq(a, b):
19 |     if len(a) != len(b):
20 |         raise ValueError('Length mismatch', (a, b))
21 |     rval = True
22 |     for i, (ai, bi) in enumerate(zip(a, b)):
23 |         if isinstance(ai, basestring):
24 |             if i == 0:
25 |                 if ai != bi:
26 |                     logger.error('TRACE: %s != %s' % (ai, bi))
27 |                     rval = False
28 |             else:
29 |                 if ai != bi:
30 |                     logger.warn('TRACE: %s != %s' % (ai, bi))
31 |                     # -- we're letting this go for now...
32 |         elif isinstance(ai, float):
33 |             if not np.allclose(ai, bi, atol=1e-3, rtol=1e-3):
34 |                 logger.error('TRACE: %s != %s' % (ai, bi))
35 |                 rval = False
36 |         elif isinstance(ai, int):
37 |             if not ai == bi:
38 |                 logger.error('TRACE: %s != %s' % (ai, bi))
39 |                 rval = False
40 |         elif isinstance(ai, (tuple, list)):
41 |             if not trace_eq(ai, bi):
42 |                 pass # -- letting this go for now
43 |     return rval
44 | 
45 | 
46 | 
47 | def verify_last_trace():
48 |     if trace_verify:
49 |         target = _trace_target[len(_trace)-1]
50 |         logger.info("Verifying trace: %s" % str(target))
51 |         assert trace_eq(target, _trace[-1])
52 | 
53 | 
54 | def append_trace(*args):
55 |     if trace_enabled:
56 |         logger.info("Appending trace: %s" % str(args))
57 |         _trace.append(args)
58 |         verify_last_trace()
59 | 
60 | 
61 | def append_ndarray_signature(x, *args):
62 |     assert isinstance(x, np.ndarray)
63 |     sig = (str(x.dtype), x.shape)
64 |     if x.size:
65 |         sig = sig + (x.min(), x.max(), x.mean())
66 |     return append_trace(*(args + sig))
67 | 
68 | 
69 | def append_randomstate(msg, rng, *args):
70 |     sample = copy.deepcopy(rng).randn()
71 |     return append_trace(msg, sample, *args)
72 | 
73 | 
74 | def reset_trace():
75 |     _trace[:] = []
76 | 
77 | 
78 | def set_trace_target(trace_target):
79 |     _trace_target[:] = trace_target
80 | 
81 | 


--------------------------------------------------------------------------------
/hpconvnet/cifar10.py:
--------------------------------------------------------------------------------
  1 | import cPickle
  2 | import functools
  3 | import logging
  4 | import os
  5 | 
  6 | import numpy as np
  7 | 
  8 | from skdata.cifar10.views import StratifiedImageClassification
  9 | 
 10 | import hyperopt
 11 | from hyperopt import pyll
 12 | 
 13 | import pyll_slm  # adds the symbols to pyll.scope
 14 | 
 15 | from .slm_visitor_primal import uslm_eval_helper
 16 | from .slm import uslm_domain
 17 | 
 18 | 
 19 | dumps = functools.partial(cPickle.dumps, protocol=-1)
 20 | logger = logging.getLogger(__name__)
 21 | 
 22 | 
 23 | class DataView(object):
 24 |      pass
 25 | 
 26 | pyll_data_view = pyll.as_apply(DataView)
 27 | 
 28 | 
 29 | @pyll.scope.define
 30 | def cifar10_unsup_images(data_view, N):
 31 |     # -- extract training images for unsupervised learning,
 32 |     #    and put them into channel-major format
 33 |     imgs = np.asarray(
 34 |             data_view.dataset._pixels[
 35 |                 data_view.fit_idxs[:N]])
 36 |     assert str(imgs.dtype) == 'uint8'
 37 |     rval = imgs.transpose(0, 3, 1, 2).copy()
 38 |     assert rval.shape[1] in (1, 3)  # -- channels
 39 |     return rval
 40 | 
 41 | 
 42 | def build_search_space(
 43 |     max_n_features,
 44 |     bagging_fraction,
 45 |     n_unsup,
 46 |     abort_on_rows_larger_than,
 47 |     batched_lmap_speed_thresh=None,
 48 |     batchsize=20,
 49 |     output_sizes=(32, 64, 128, 200),
 50 |     permit_affine_warp=True,
 51 |     ):
 52 |     if batched_lmap_speed_thresh is None:
 53 |         batched_lmap_speed_thresh = {'seconds': 2.0, 'elements': 150}
 54 |     Xcm = pyll.scope.cifar10_unsup_images(pyll_data_view, n_unsup)
 55 |     # -- currently these sizes are in *elements*
 56 |     search_space = {
 57 |             'data_view': pyll_data_view,
 58 |             'pipeline': uslm_domain(
 59 |                 Xcm=Xcm,
 60 |                 chmjr_image_shape=(3, 32, 32),
 61 |                 output_sizes=output_sizes,
 62 |                 batchsize=batchsize,
 63 |                 max_n_features=max_n_features,
 64 |                 batched_lmap_speed_thresh=batched_lmap_speed_thresh,
 65 |                 permit_affine_warp=permit_affine_warp,
 66 |                 abort_on_rows_larger_than=abort_on_rows_larger_than,
 67 |                 ),
 68 |             'batchsize': batchsize,
 69 |             'max_n_features': max_n_features,
 70 |             'ctrl': hyperopt.Domain.pyll_ctrl,
 71 |             'batched_lmap_speed_thresh': batched_lmap_speed_thresh,
 72 |             'bagging_fraction': bagging_fraction,
 73 |             }
 74 |     return search_space
 75 | 
 76 | 
 77 | def hybrid_loss(visitor, bagging_fraction):
 78 |     lossres = visitor._results['loss_indexed_image_classification']
 79 |     loss_ensemble = lossres['val']['fit']['val']['using_history']['erate']
 80 |     loss_member = lossres['val']['fit']['val']['not_using_history']['erate']
 81 |     loss = (bagging_fraction * loss_member
 82 |             + (1 - bagging_fraction) * loss_ensemble)
 83 |     return loss
 84 | 
 85 | 
 86 | def true_loss_fn(visitor):
 87 |     lossres = visitor._results['loss_indexed_image_classification']
 88 |     rval = lossres['tst']['sel']['None']['using_history']['erate']
 89 |     return rval
 90 | 
 91 | 
 92 | @hyperopt.fmin_pass_expr_memo_ctrl
 93 | def uslm_eval(
 94 |     expr, memo, ctrl,
 95 |     data_fraction=1.0,
 96 |     assume_promising=False,
 97 |     ):
 98 |     if ctrl.current_trial is None:
 99 |         assume_promising = True
100 |     data_view = StratifiedImageClassification(
101 |             dtype='uint8',
102 |             n_train=int(40000 * data_fraction),
103 |             n_valid=int(10000 * data_fraction),
104 |             n_test=int(10000 * data_fraction),
105 |             channel_major=False)
106 | 
107 |     memmap_name_template = 'cifar10_%i_%i'
108 | 
109 |     return uslm_eval_helper(expr, memo, ctrl, data_fraction, assume_promising,
110 |                             data_view, memmap_name_template, DataView,
111 |                             hybrid_loss, true_loss_fn)
112 | 


--------------------------------------------------------------------------------
/shovel/cifar10.py:
--------------------------------------------------------------------------------
  1 | import functools
  2 | import logging
  3 | import sys
  4 | 
  5 | logger = logging.getLogger(__name__)
  6 | logging.basicConfig(stream=sys.stdout, level=logging.INFO)
  7 | 
  8 | import numpy as np
  9 | 
 10 | from shovel import task
 11 | 
 12 | import hyperopt
 13 | from hyperopt import Trials
 14 | from hyperopt.mongoexp import MongoTrials
 15 | 
 16 | import hpconvnet.cifar10
 17 | import hpconvnet.slm
 18 | 
 19 | 
 20 | def make_trials(host, port, exp_key, refresh=True, dbname='dbname'):
 21 |     if (host, port) == (None, None):
 22 |         trials = Trials()
 23 |     else:
 24 |         if dbname == 'dbname':
 25 |             logger.warn('You probably want to override the default dbname')
 26 |         trials = MongoTrials(
 27 |                 'mongo://%s:%d/%s/jobs' % (host, int(port), dbname),
 28 |                 exp_key=exp_key,
 29 |                 refresh=refresh)
 30 |     return trials
 31 | 
 32 | 
 33 | def dindex(d, *keys):
 34 |     """
 35 |     dindex(d, a, b, c) returns d[a][b][c]
 36 | 
 37 |     This function makes it easier to type long indexing sequences.
 38 |     """
 39 |     if keys:
 40 |         return dindex(d[keys[0]], *keys[1:])
 41 |     else:
 42 |         return d
 43 | 
 44 | 
 45 | @task
 46 | def small_random_run():
 47 |     # -- This is a smoke test to make sure that a lot of code paths actually
 48 |     # run. Some of the jobs will fail, some should succeed, the data will be
 49 |     # loaded and some SVMs will be fit etc. Classifier performance is expected
 50 |     # to be poor (70% error?), because we're using just 10% of the data and
 51 |     # only trying a few random architectures.
 52 |     #
 53 |     # Expected running time on CPU: ~10 mins
 54 | 
 55 |     search_space = hpconvnet.cifar10.build_search_space(
 56 |             max_n_features=4500,  # -- smaller than normal
 57 |             bagging_fraction=0.5, # -- normal
 58 |             n_unsup=2000, # -- smaller than normal
 59 |             abort_on_rows_larger_than=50 * 1000, # -- smaller
 60 |             )
 61 |     trials = Trials()
 62 |     hyperopt.fmin(
 63 |             fn=hyperopt.partial(
 64 |                 hpconvnet.cifar10.uslm_eval,
 65 |                 data_fraction=0.1, # -- smaller than normal
 66 |                 ),
 67 |             space=search_space,
 68 |             algo=hyperopt.rand.suggest,
 69 |             max_evals=10,
 70 |             trials=trials)
 71 | 
 72 |     # -- Getting this far without crashing is a good indication that
 73 |     # everything has been installed and is probably running correctly.
 74 | 
 75 | 
 76 | @task
 77 | def tpe_driver(
 78 |     host,
 79 |     port, 
 80 |     max_evals=10000,
 81 |     exp_key_base='cifar10_tpe'
 82 |     ):
 83 | 
 84 |     max_evals=int(max_evals)
 85 |     min_ok_per_round = int(min_ok_per_round)
 86 | 
 87 |     tpe_suggest=functools.partial(
 88 |         hyperopt.tpe.suggest,
 89 |         n_startup_jobs=50, # -- number of random jobs before optimization
 90 |         )
 91 |     search_space = hpconvnet.cifar10.build_search_space(
 92 |             max_n_features=4000,
 93 |             bagging_fraction=1.0,
 94 |             n_unsup=7500, 
 95 |             abort_on_rows_larger_than=500 * 1000, # -- elements
 96 |             output_sizes=(32, 64),
 97 |             )
 98 |     hyperopt.fmin(
 99 |         fn=hpconvnet.cifar10.uslm_eval,
100 |         space=search_space,
101 |         algo=tpe_suggest,
102 |         max_evals=max_evals,
103 |         trials=make_trials(
104 |             host,
105 |             port,
106 |             exp_key=exp_key,
107 |             ),
108 |         )
109 | 
110 | @task
111 | def best_trial(host, port, exp_key='cifar10_tpe'):
112 |     trials = make_trials(host, port, exp_key=exp_key)
113 |     print 'Number of trials so far', len(trials)
114 |     if len(trials) == 0:
115 |         print ("(Hint: did you provide with the right exp_key? Used: %s)" %
116 |             exp_key)
117 | 
118 |     try:
119 |         best_trial = trials.best_trial
120 |         print 'Best trial validation error rate', best_trial['result']['loss']
121 |         print 'Best trial test error rate', best_trial['result']['true_loss']
122 |     except ValueError:
123 |         pass
124 | 
125 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """ distribute- and pip-enabled setup.py """
  5 | 
  6 | import logging
  7 | import os
  8 | import re
  9 | 
 10 | # ----- overrides -----
 11 | 
 12 | # set these to anything but None to override the automatic defaults
 13 | packages = None
 14 | package_name = None
 15 | package_data = None
 16 | scripts = None
 17 | requirements_file = None
 18 | requirements = None
 19 | dependency_links = None
 20 | 
 21 | # ---------------------
 22 | 
 23 | 
 24 | # ----- control flags -----
 25 | 
 26 | # fallback to setuptools if distribute isn't found
 27 | setup_tools_fallback = True
 28 | 
 29 | # don't include subdir named 'tests' in package_data
 30 | skip_tests = True
 31 | 
 32 | # print some extra debugging info
 33 | debug = True
 34 | 
 35 | # -------------------------
 36 | 
 37 | if debug: logging.basicConfig(level=logging.DEBUG)
 38 | # distribute import and testing
 39 | try:
 40 |     import distribute_setup
 41 |     distribute_setup.use_setuptools()
 42 |     logging.debug("distribute_setup.py imported and used")
 43 | except ImportError:
 44 |     # fallback to setuptools?
 45 |     # distribute_setup.py was not in this directory
 46 |     if not (setup_tools_fallback):
 47 |         import setuptools
 48 |         if not (hasattr(setuptools,'_distribute') and \
 49 |                 setuptools._distribute):
 50 |             raise ImportError("distribute was not found and fallback to setuptools was not allowed")
 51 |         else:
 52 |             logging.debug("distribute_setup.py not found, defaulted to system distribute")
 53 |     else:
 54 |         logging.debug("distribute_setup.py not found, defaulting to system setuptools")
 55 | 
 56 | import setuptools
 57 | 
 58 | def find_scripts():
 59 |     return [s for s in setuptools.findall('scripts/') if os.path.splitext(s)[1] != '.pyc']
 60 | 
 61 | def package_to_path(package):
 62 |     """
 63 |     Convert a package (as found by setuptools.find_packages)
 64 |     e.g. "foo.bar" to usable path
 65 |     e.g. "foo/bar"
 66 | 
 67 |     No idea if this works on windows
 68 |     """
 69 |     return package.replace('.','/')
 70 | 
 71 | def find_subdirectories(package):
 72 |     """
 73 |     Get the subdirectories within a package
 74 |     This will include resources (non-submodules) and submodules
 75 |     """
 76 |     try:
 77 |         subdirectories = os.walk(package_to_path(package)).next()[1]
 78 |     except StopIteration:
 79 |         subdirectories = []
 80 |     return subdirectories
 81 | 
 82 | def subdir_findall(dir, subdir):
 83 |     """
 84 |     Find all files in a subdirectory and return paths relative to dir
 85 | 
 86 |     This is similar to (and uses) setuptools.findall
 87 |     However, the paths returned are in the form needed for package_data
 88 |     """
 89 |     strip_n = len(dir.split('/'))
 90 |     path = '/'.join((dir, subdir))
 91 |     return ['/'.join(s.split('/')[strip_n:]) for s in setuptools.findall(path)]
 92 | 
 93 | def find_package_data(packages):
 94 |     """
 95 |     For a list of packages, find the package_data
 96 | 
 97 |     This function scans the subdirectories of a package and considers all
 98 |     non-submodule subdirectories as resources, including them in
 99 |     the package_data
100 | 
101 |     Returns a dictionary suitable for setup(package_data=<result>)
102 |     """
103 |     package_data = {}
104 |     for package in packages:
105 |         package_data[package] = []
106 |         for subdir in find_subdirectories(package):
107 |             if '.'.join((package, subdir)) in packages: # skip submodules
108 |                 logging.debug("skipping submodule %s/%s" % (package, subdir))
109 |                 continue
110 |             if skip_tests and (subdir == 'tests'): # skip tests
111 |                 logging.debug("skipping tests %s/%s" % (package, subdir))
112 |                 continue
113 |             package_data[package] += subdir_findall(package_to_path(package), subdir)
114 |     return package_data
115 | 
116 | def parse_requirements(file_name):
117 |     """
118 |     from:
119 |         http://cburgmer.posterous.com/pip-requirementstxt-and-setuppy
120 |     """
121 |     requirements = []
122 |     with open(file_name, 'r') as f:
123 |         for line in f:
124 |             if re.match(r'(\s*#)|(\s*$)', line): continue
125 |             if re.match(r'\s*-e\s+', line):
126 |                 requirements.append(re.sub(r'\s*-e\s+.*#egg=(.*)$',\
127 |                         r'\1', line).strip())
128 |             elif re.match(r'\s*-f\s+', line):
129 |                 pass
130 |             else:
131 |                 requirements.append(line.strip())
132 |     return requirements
133 | 
134 | def parse_dependency_links(file_name):
135 |     """
136 |     from:
137 |         http://cburgmer.posterous.com/pip-requirementstxt-and-setuppy
138 |     """
139 |     dependency_links = []
140 |     with open(file_name) as f:
141 |         for line in f:
142 |             if re.match(r'\s*-[ef]\s+', line):
143 |                 dependency_links.append(re.sub(r'\s*-[ef]\s+',\
144 |                         '', line))
145 |     return dependency_links
146 | 
147 | # ----------- Override defaults here ----------------
148 | if packages is None: packages = setuptools.find_packages()
149 | 
150 | if len(packages) == 0: raise Exception("No valid packages found")
151 | 
152 | if package_name is None: package_name = packages[0]
153 | 
154 | if package_data is None: package_data = find_package_data(packages)
155 | 
156 | if scripts is None: scripts = find_scripts()
157 | 
158 | if requirements_file is None:
159 |     requirements_file = 'requirements.txt'
160 | 
161 | if os.path.exists(requirements_file):
162 |     if requirements is None:
163 |         requirements = parse_requirements(requirements_file)
164 |     if dependency_links is None:
165 |         dependency_links = parse_dependency_links(requirements_file)
166 | else:
167 |     if requirements is None:
168 |         requirements = []
169 |     if dependency_links is None:
170 |         dependency_links = []
171 | 
172 | if debug:
173 |     logging.debug("Module name: %s" % package_name)
174 |     for package in packages:
175 |         logging.debug("Package: %s" % package)
176 |         logging.debug("\tData: %s" % str(package_data[package]))
177 |     logging.debug("Scripts:")
178 |     for script in scripts:
179 |         logging.debug("\tScript: %s" % script)
180 |     logging.debug("Requirements:")
181 |     for req in requirements:
182 |         logging.debug("\t%s" % req)
183 |     logging.debug("Dependency links:")
184 |     for dl in dependency_links:
185 |         logging.debug("\t%s" % dl)
186 | 
187 | setuptools.setup(
188 |     name = package_name,
189 |     version = 'dev',
190 |     packages = packages,
191 |     scripts = scripts,
192 |     
193 |     package_data = package_data,
194 |     include_package_data = True,
195 | 
196 |     install_requires = requirements,
197 |     dependency_links = dependency_links
198 | )
199 | 
200 | 


--------------------------------------------------------------------------------
/shovel/lfw.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Driver scripts for LFW experiments
  3 | 
  4 | """
  5 | 
  6 | import numpy as np
  7 | from functools import partial
  8 | 
  9 | from shovel import task
 10 | 
 11 | import hyperopt
 12 | from hyperopt import Trials
 13 | from hyperopt.mongoexp import MongoTrials, MongoCtrl
 14 | 
 15 | from hpconvnet.shovel_util import configure_logging
 16 | import hpconvnet.lfw
 17 | 
 18 | configure_logging('INFO')
 19 | dbname = hpconvnet.lfw.dbname
 20 | 
 21 | 
 22 | def make_trials(host, port, exp_key, refresh=True):
 23 |     if (host, port) == (None, None):
 24 |         trials = Trials()
 25 |     else:
 26 |         trials = MongoTrials(
 27 |                 'mongo://%s:%d/%s/jobs' % (host, int(port), dbname),
 28 |                 exp_key=exp_key,
 29 |                 refresh=refresh)
 30 |     return trials
 31 | 
 32 | 
 33 | search_space = partial(
 34 |         hpconvnet.lfw.build_search_space,
 35 |         max_n_features=16000,
 36 |         trn='DevTrain', # -- split used for unsupervised images
 37 |         n_unsup=300,    # -- number of images from which to draw patches
 38 |         )
 39 | 
 40 | 
 41 | def slm_visitor_lfw_partial(
 42 |     max_n_per_class,
 43 |     maybe_test_view2=False, # -- this still takes too much memory
 44 |     assume_promising=False,
 45 |     foobar_trace_target=None,
 46 |     ):
 47 |     # -- this curries and re-decorates hpconvnet.lfw.slm_visitor_lfw
 48 |     #    so that we can pass it to fmin()
 49 |     if max_n_per_class is not None:
 50 |         max_n_per_class = int(max_n_per_class)
 51 |     return hyperopt.partial(
 52 |         hpconvnet.lfw.slm_visitor_lfw,
 53 |         max_n_per_class=max_n_per_class,
 54 |         maybe_test_view2=maybe_test_view2,
 55 |         assume_promising=assume_promising,
 56 |         foobar_trace_target=foobar_trace_target,
 57 |         )
 58 | 
 59 | 
 60 | @task
 61 | def random_driver(host=None, port=None, max_evals=10000, max_n_per_class=None):
 62 |     hyperopt.fmin(
 63 |             fn=slm_visitor_lfw_partial(max_n_per_class),
 64 |             space=search_space(bagging_fraction=1.0),
 65 |             algo=hyperopt.rand.suggest,
 66 |             max_evals=max_evals,
 67 |             trials=make_trials(host, port, exp_key='random'))
 68 | 
 69 | 
 70 | @task
 71 | def tpe_driver(host=None, port=None, max_evals=10000, max_n_per_class=None):
 72 |     tpe_suggest=partial(
 73 |         hyperopt.tpe.suggest,
 74 |         n_startup_jobs=50, # -- number of random jobs before optimization
 75 |         )
 76 |     hyperopt.fmin(
 77 |             fn=slm_visitor_lfw_partial(max_n_per_class),
 78 |             space=search_space(bagging_fraction=1.0),
 79 |             algo=tpe_suggest,
 80 |             max_evals=max_evals,
 81 |             trials=make_trials(host, port, exp_key='random'))
 82 | 
 83 | 
 84 | @task
 85 | def view2(host, port, exp_key,
 86 |     bagging_fraction,
 87 |     max_n_per_class=None,
 88 |     maybe_test_view2=True,
 89 |     assume_promising=True,
 90 |     tid=None,
 91 |     fake=False,
 92 |     ):
 93 |     fake = int(fake)
 94 |     real_trials = make_trials(host, port, exp_key)
 95 |     print 'n. real trials', len(real_trials)
 96 |     if tid is None:
 97 |         best_trial = real_trials.best_trial
 98 |     else:
 99 |         try:
100 |             best_trial = [t for t in real_trials.trials if t['tid'] == int(tid)][0]
101 |         except IndexError:
102 |             print [t['tid'] for t in real_trials.trials]
103 |     print 'Best trial'
104 |     print ' ["tid"]', best_trial['tid']
105 |     best_result = best_trial['result']
106 |     print 'Best trial had loss', best_result['loss']
107 |     best_trace = best_result.get('trace')
108 |     if 0:
109 |         print ' ["Result trace"]', best_trace
110 |     fake_trials = hyperopt.Trials()
111 |     fn = slm_visitor_lfw_partial(
112 |         max_n_per_class,
113 |         maybe_test_view2=int(maybe_test_view2),
114 |         assume_promising=int(assume_promising),
115 |         foobar_trace_target=list(best_trace),
116 |         )
117 |     #space = search_space
118 |     space = hpconvnet.lfw.build_search_space(
119 |             max_n_features=16000,
120 |             trn='DevTrain', # -- split used for unsupervised images
121 |             n_unsup=300,    # -- number of images from which to draw patches
122 |             batched_lmap_speed_thresh={'seconds': 60, 'elements': 1},
123 |             bagging_fraction=float(bagging_fraction),
124 |             )
125 |     domain = hyperopt.Domain(fn, space, rseed=123)
126 |     domain.rng = None  # -- this rng is never to be used
127 |     if fake:
128 |         ctrl = hyperopt.Ctrl(trials=fake_trials, current_trial=None)
129 |         print 'WARNING: running on fake ctrl object'
130 |     else:
131 |         ctrl = MongoCtrl(
132 |             trials=real_trials,
133 |             current_trial=best_trial,
134 |             read_only=False)
135 | 
136 |     #real_trials.handle.update(best_result, msg)
137 |     #ctrl.checkpoint(dict(best_trial['result'], foodebug='yes'))
138 |     config = hyperopt.base.spec_from_misc(best_trial['misc'])
139 |     #print 'Config', config
140 |     r_dct = domain.evaluate(config, ctrl, attach_attachments=(not fake))
141 |     print 'r_dct'
142 |     print r_dct
143 |     if fake:
144 |         print 'WARNING: running on fake ctrl object, not saving result'
145 |         attachments = r_dct.pop('attachments', {})
146 |         print 'Attachments:', attachments.keys()
147 |         print ' ["Best Result trace"]'
148 |         def print_trace(r):
149 |             trace = r['trace']
150 |             for t in trace:
151 |                 print '  ', t
152 |         print_trace(best_result)
153 |         print ' ["result trace"]'
154 |         print_trace(r_dct)
155 |     else:
156 |         # -- the loss should have been re-computed identically
157 |         r_dct['view2_recalculated_loss'] = r_dct['loss']
158 |         r_dct['loss'] = best_result['loss']
159 |         if  (r_dct['view2_recalculated_loss'] > best_result['loss']):
160 |             print 'WARNING: recalculated loss was worst than loss during search'
161 |             print ' -> original loss', best_result['loss']
162 |             print ' -> recalculated loss', r_dct['view2_recalculated_loss']
163 | 
164 |         print 'Checkpointing back to db'
165 |         # -- N.B. attachments should have been saved by Domain.evaluate,
166 |         #    since we called it with attach_attachments=True. So they should 
167 |         #    not be here anymore.
168 |         assert 'attachments' not in r_dct
169 | 
170 |         ctrl.checkpoint(r_dct)
171 | 
172 | @task
173 | def list_view2_scores(host, port, key):
174 |     """
175 |     List any view2 scores that have been calculated within an experiment
176 |     (exp_key)
177 |     """
178 |     import pymongo as pm
179 |     conn = pm.Connection(host=host, port=int(port))
180 |     query = {'exp_key': key}
181 | 
182 |     for doc in conn[dbname]['jobs'].find(query, fields=['tid', 'result', 'state']):
183 |         r = doc['result'].get('loss_image_match_indexed')
184 |         if r:
185 |             assert len(r) == 1
186 |             for member in r:
187 |                 fold_errs = []
188 |                 for view_train_k in r[member]:
189 |                     if 'test' in view_train_k:
190 |                         fold_errs.append(r[member][view_train_k]['error_rate'])
191 |             print list(enumerate(fold_errs))
192 |             print key, doc['tid'],
193 |             print 'view1', doc['result']['loss'],
194 |             print 'view2', np.mean(fold_errs),
195 |             print doc['state'], doc['result']['status']
196 | 
197 | 


--------------------------------------------------------------------------------
/hpconvnet/lfw.py:
--------------------------------------------------------------------------------
  1 | """
  2 | lfw.py - entry point for experiments on the LFW data set.
  3 | 
  4 | 
  5 | """
  6 | import logging
  7 | import os
  8 | 
  9 | import numpy as np
 10 | 
 11 | import hyperopt
 12 | from hyperopt.base import use_obj_for_literal_in_memo
 13 | from hyperopt import STATUS_OK
 14 | from hyperopt import pyll
 15 | 
 16 | from skdata import lfw
 17 | from skdata import larray
 18 | 
 19 | from .pyll_slm import view2_worth_calculating
 20 | from .utils import git_versions
 21 | 
 22 | from .slm import call_catching_pipeline_errors
 23 | from .slm import USLM_Exception
 24 | from .slm import uslm_domain
 25 | from .slm_visitor_esvc import ESVC_SLM_Visitor
 26 | import slm_visitor_esvc
 27 | 
 28 | import foobar
 29 | 
 30 | warn = logging.getLogger(__name__).warn
 31 | info = logging.getLogger(__name__).info
 32 | 
 33 | # XXX: this is related to a hack for cacheing features to disk
 34 | #      see e.g. shovel/lfw.py, slm_visitor_esvc.py
 35 | dbname = 'lfw_db'
 36 | 
 37 | 
 38 | class DataViewPlaceHolder(object):
 39 |      pass
 40 | 
 41 | pyll_data_view = pyll.as_apply(DataViewPlaceHolder)
 42 | 
 43 | 
 44 | @pyll.scope.define
 45 | def unsup_images(data_view, trn, N):
 46 |     """
 47 |     Return a block of 
 48 |     """
 49 |     if trn == 'DevTrain':
 50 |         # -- extract training images, and put them into channel-major format
 51 |         imgs = larray.reindex(data_view.image_pixels,
 52 |                 data_view.dev_train['lpathidx'][0, :N])[:]
 53 |         imgs = np.asarray(imgs)
 54 |         assert 'int' in str(imgs.dtype)
 55 |         foobar.append_ndarray_signature(imgs, 'unsup_images')
 56 |         foobar.append_trace('unsup_images N', N)
 57 |         return imgs.transpose(0, 3, 1, 2).copy()
 58 |     else:
 59 |         raise NotImplementedError()
 60 | 
 61 | 
 62 | def build_search_space(max_n_features, trn, n_unsup,
 63 |     bagging_fraction,
 64 |     batched_lmap_speed_thresh=None,
 65 |     batchsize=2,
 66 |     output_sizes=(32, 64, 128, 200),
 67 |     permit_affine_warp=True,
 68 |     ):
 69 |     image_shape = (250, 250, 1) # -- we're using lfw.Aligned below
 70 | 
 71 |     # max_n_features should be 16000 for full run
 72 |     # trn should be a string recognized by unsup_images()
 73 |     # n_unsup should be 300 for full run
 74 |     if batched_lmap_speed_thresh is None:
 75 |         batched_lmap_speed_thresh = {'seconds': 2.0, 'elements': 8}
 76 | 
 77 |     Xcm = pyll.scope.unsup_images(pyll_data_view, trn, n_unsup)
 78 |     search_space = {
 79 |             'data_view': pyll_data_view,
 80 |             'pipeline': uslm_domain(
 81 |                 Xcm=Xcm,
 82 |                 chmjr_image_shape=(
 83 |                     image_shape[2], image_shape[0], image_shape[1]),
 84 |                 output_sizes=list(output_sizes), # -- is list required?
 85 |                 batchsize=batchsize,
 86 |                 max_n_features=max_n_features,
 87 |                 batched_lmap_speed_thresh=batched_lmap_speed_thresh,
 88 |                 permit_affine_warp=permit_affine_warp,
 89 |                 ),
 90 |             'batchsize': batchsize,
 91 |             'max_n_features': max_n_features,
 92 |             'ctrl': hyperopt.Bandit.pyll_ctrl,
 93 |             'batched_lmap_speed_thresh': batched_lmap_speed_thresh,
 94 |             'bagging_fraction': bagging_fraction,
 95 |             }
 96 |     return search_space
 97 | 
 98 | 
 99 | @hyperopt.fmin_pass_expr_memo_ctrl
100 | def slm_visitor_lfw(expr, memo, ctrl,
101 |     maybe_test_view2=True,
102 |     max_n_per_class=None,
103 |     comparison_names=('mult', 'absdiff', 'sqrtabsdiff', 'sqdiff'),
104 |     assume_promising=False,
105 |     foobar_trace=True,
106 |     foobar_trace_target=None,
107 |     ):
108 |     # -- possibly enable computation tracing
109 |     foobar.reset_trace()
110 |     foobar.trace_enabled = foobar_trace
111 |     if foobar_trace_target:
112 |         foobar.trace_verify = True
113 |         foobar.set_trace_target(foobar_trace_target)
114 |     slm_visitor_esvc._curdb = dbname # XXX tids are only unique within db
115 | 
116 |     versions = git_versions()
117 |     info('GIT VERSIONS: %s' % str(versions))
118 | 
119 |     data_view = lfw.view.Aligned(
120 |             x_dtype='uint8',
121 |             max_n_per_class=max_n_per_class,
122 |             )
123 | 
124 |     use_obj_for_literal_in_memo(expr, data_view, DataViewPlaceHolder, memo)
125 | 
126 |     def loss_fn(s, rdct, bagging_fraction):
127 |         """
128 |         bagging_fraction - float
129 |             If the function measures the loss within the ensemble (loss)
130 |             as well as the loss without the ensemble (loss_last_member) then
131 |             this value interpolates between boosting (0.0) and bagging (1.0).
132 | 
133 |         """
134 |         # -- this is the criterion we minimize during model search
135 |         norm_key = s.norm_key('devTrain')
136 |         task_name = 'devTrain'
137 |         dct = s._results['train_image_match_indexed'][norm_key][task_name]
138 |         loss = (bagging_fraction * dct['valid_error_no_ensemble']
139 |                 + (1 - bagging_fraction) * dct['valid_error'])
140 |         rdct['loss'] = loss
141 |         rdct['status'] = STATUS_OK
142 | 
143 |     def foo():
144 |         argdict = pyll.rec_eval(expr, memo=memo, print_node_on_error=False)
145 |         visitor = ESVC_SLM_Visitor(pipeline=argdict['pipeline'],
146 |                     ctrl=argdict['ctrl'],
147 |                     data_view=argdict['data_view'],
148 |                     max_n_features=argdict['max_n_features'],
149 |                     memmap_name='%s_%i' % (__name__, os.getpid()),
150 |                     svm_crossvalid_max_evals=50,
151 |                     optimize_l2_reg=True,
152 |                     batched_lmap_speed_thresh=argdict[
153 |                         'batched_lmap_speed_thresh'],
154 |                     comparison_names=comparison_names,
155 |                     batchsize=argdict['batchsize'],
156 |                     )
157 |         # -- drive the visitor according to the protocol of the data set
158 |         protocol_iter = argdict['data_view'].protocol_iter(visitor)
159 |         msg, model = protocol_iter.next()
160 |         assert msg == 'model validation complete'
161 | 
162 |         # -- save the loss, but don't save attachments yet.
163 |         rdict = visitor.hyperopt_rval(save_grams=False)
164 |         rdict['in_progress'] = True
165 |         loss_fn(visitor, rdict, argdict['bagging_fraction'])
166 |         argdict['ctrl'].checkpoint(rdict)
167 | 
168 |         if assume_promising:
169 |             promising = True
170 |         else:
171 |             promising = view2_worth_calculating(
172 |                 loss=rdict['loss'],
173 |                 ctrl=argdict['ctrl'],
174 |                 thresh_loss=1.0,
175 |                 thresh_rank=1)
176 | 
177 | 
178 |         info('Promising: %s' % promising)
179 | 
180 |         if maybe_test_view2:
181 |             if promising:
182 |                 info('Disabling trace verification for view2')
183 |                 foobar.trace_verify = False
184 |                 msg = protocol_iter.next()
185 |                 assert msg == 'model testing complete'
186 |             else:
187 |                 warn('Not testing unpromising model %s' % str(model))
188 |         else:
189 |             warn('Skipping view2 stuff for model %s' % str(model))
190 |         rdict = visitor.hyperopt_rval(save_grams=promising)
191 |         loss_fn(visitor, rdict, argdict['bagging_fraction'])
192 |         return visitor, rdict
193 | 
194 |     try:
195 |         visitor, rdict = call_catching_pipeline_errors(foo)
196 |     except USLM_Exception, e:
197 |         exc, rdict = e.args
198 |         print ('job failed: %s: %s' % (type(e), exc))
199 |     rdict['git_versions'] = versions
200 |     return dict(rdict, in_progres=False)
201 | 
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Hyperparameter Optimization for Convolutional Vision Architectures
  2 | ==================================================================
  3 | 
  4 | This package provides a [Theano](http://www.deeplearning.net/software/theano)-based implementation of convolutional networks
  5 | as described in (Bergstra, Yamins, and Cox, 2013), which exposes many
  6 | architectural hyperparameters for optimization by
  7 | [hyperopt](http://jaberg.github.com/hyperopt).
  8 | 
  9 | # Installation
 10 | 
 11 | 1. Requirements:
 12 | 
 13 | 
 14 |    * A Python/Numpy/Scipy stack.
 15 |      The Python package requirements are listed in
 16 |      requirements.txt.
 17 | 
 18 |    * Optional (but strongly recommended) is an NVIDIA GPU device at least as
 19 |      fast as, say, a GTX280, and CUDA. See Theano's documentation for setting
 20 |      up Theano to use a GPU device.
 21 | 
 22 |    * Optional (but strongly recommended) is the MongoDB database software,
 23 |      which allows hyperopt to support parallel optimization.
 24 | 
 25 | 2. Check out this project
 26 | 
 27 |    `git clone https://github.com/jaberg/hyperopt-convnet.git`.
 28 | 
 29 | 3. Install it as a Python package. This installation makes the code files
 30 |    importable, which is required when running asynchronous hyperparameter
 31 |    optimization (i.e. with hyperopt-mongo-worker, as explained below).
 32 | 
 33 |    `python setup.py install`
 34 | 
 35 |    Consider installing this within your user account (`--user`) or within a
 36 |    virtualenv to avoid installing this package system-wide, and to avoid
 37 |    needing root privileges.
 38 | 
 39 |    Installing hyperopt-convnet will install a pile of Python packages,
 40 |    which are listed in requirements.txt.
 41 |    On my computer, I had to explicitly install a few packages, because
 42 |    whatever the setup.py script was doing wasn't working (I still don't
 43 |            understand python packaging...):
 44 |    * `pip install numpy`,
 45 |    * `pip install scipy`,
 46 |    * `pip install matplotlib`
 47 | 
 48 | 4. Replace sklearn < 0.13 with git version (we need some new stuff in SVC).
 49 | 
 50 | 
 51 | # Testing
 52 | 
 53 | If installation goes well, then you will now be able to import the `hpconvnet`
 54 | module. The easiest way to test your installation is
 55 | 
 56 | 
 57 | ```bash
 58 | THEANO_FLAGS=device=gpu shovel lfw.random_driver  --max_n_per_class=20
 59 | ```
 60 | 
 61 | This command should not crash, it should (i) download LFW if necessary and
 62 | then (ii) loop indefinitely doing random search on a tiny subset of the LFW
 63 | training data.
 64 | 
 65 | 
 66 | 
 67 | # Running An Experiment in Parallel with MongoDB
 68 | 
 69 | Running hyperparameter optimization on large convolutional networks for data
 70 | sets such as [LFW](http://vis-www.cs.umass.edu/lfw/)
 71 | and [CIFAR10](http://www.cs.toronto.edu/~kriz/cifar.html) takes a significant amount of time:
 72 | expect a search of a few hundred points to take about a GPU-week.
 73 | This cannot be completely parallelized (Bayesian optimization works on the
 74 | basis of feedback about the fitness landscape after all), but in my experience
 75 | it can easily be parallelized 5-fold to 10-fold.
 76 | So if you have access to a small cluster you can see significant progress in
 77 | an hour or two, and be done in a day.
 78 | 
 79 | What follows here is a sketch of the unix commands you would need to do to
 80 | make this happen.
 81 | To get more of a sense about what's going on, read through
 82 | [hyperopt documentation on using
 83 | mongo](https://github.com/jaberg/hyperopt/wiki/Parallelizing-search).
 84 | 
 85 | 
 86 | 1. Set up a mongodb process for inter-process communication.
 87 | 
 88 |    `
 89 |     mongod --dbpath . --port PORT --directoryperdb --fork --journal --logpath log.log --nohttpinterface
 90 |    `
 91 | 
 92 |     If this machine is visible to the internet, you should either bind mongod
 93 |     to the local loopback address and connect to the database via an ssh
 94 |     tunnel, or set up mongodb for password-protected access.
 95 | 
 96 | 2. Start an asynchronous search process, that connects to the mongodb and
 97 |    polls a work queue created there.
 98 | 
 99 |    `
100 |     shovel cifar10.tpe_driver localhost PORT 0.0
101 |    `
102 | 
103 | 3. Start one or more generic hyperopt worker processes to crank through the
104 |    trials of the experiment, pointing at the database that's written into the
105 |    shovel script, in this case:
106 | 
107 |    `
108 |     ssh WORKNODE hyperopt-mongo-worker --mongo=localhost:PORT/DBNAME
109 |    `
110 | 
111 |    The PORT should match the one used to launch mongodb.
112 |    The DBNAME should match the one used in shovel/cifar10.py:make_trials,
113 |    which is "dbname" by default.
114 | 
115 |    If you have a cluster with a queue system (e.g. Torque, PBS, etc.) then use
116 |    that system to schedule a few hyperopt-mongo-worker processes. When they
117 |    start, they will connect to the database and reserve an experiment trial.
118 |    These processes will loop indefinitely by default, dequeueing/reserving trials
119 |    and storing the results back to the database. They will stop when the
120 |    search process no longer adds new trials to the database, or when several
121 |    (4) consecutive trials fail to complete successfully (i.e. your trial
122 |    evaluation code is faulty and you should either fix it or at least catch the
123 |    terminating exceptions).
124 | 
125 | # Rough Guide to the Code
126 | 
127 | * `shovel/{cifar10,lfw,mnist}.py` driver code for various data sets.
128 |   When you type `shovel lfw.foo` in bash, it will try to run the `foo` task in
129 |   the lfw.py file.
130 | 
131 | * `hpconvnet/lfw.py` describes the search space and the objective function
132 |   that hyperopt.fmin requires to optimize LFW's view 1 data set.
133 | 
134 | * `hpconvnet/cifar10.py` describes the search space and the objective function
135 |   that hyperopt.fmin requires to optimize CIFAR10 validation performance.
136 | 
137 | * `hpconvnet/slm_visitor_esvc.py` provides a LearningAlgo (skdata-style) derived
138 |   from `SLM_Visitor` that does classification based on sklearn's SVC binary
139 |   SVM and a precomputed kernel. This is generally a good choice for data sets
140 |   without too many examples. The LFW experiments use this class.
141 | 
142 | * `hpconvnet/slm_visitor_primal.py` has a LearningAlgo (skdata-style) derived
143 |   from `SLM_Visitor` that does classification based on a primal SVM solver.
144 |   This is generally a good choice for data sets with larger numbers of
145 |   examples. The MNIST and CIFAR10 experiments use this class.
146 | 
147 | * `hpconvnet/slm_visitor.py` provides `SLM_Visitor`,
148 |   a LearningAlgo (skdata-style) base class
149 |   with image feature extraction code and several LearningAlgo interface
150 |   methods.
151 | 
152 | * `hpconvnet/slm.py` - creates the "pipeline" part of the search space, which
153 |   describes the full set of possibilities for image feature extraction (the
154 |   full set of convolutional architectures). The `uslm_domain` function
155 |   returns this search space as a pyll graph.
156 |   Note also the `call_catching_pipeline_errors` function, which includes
157 |   `except` clauses for all known errors which may arise in the course of
158 |   evaluating that pyll graph.
159 | 
160 | * `hpconvnet/pyll_slm.py` - defines many custom pyll.scope functions which
161 |   serve to describe the `uslm_domain` search space.
162 | 
163 | The basic idea of the code is that the driver code (e.g. in shovel/lfw.py)
164 | defines a search space and an objective function for hyperopt.
165 | 
166 | The search space is relatively complex, not only in terms of its size (238
167 | hyperparameters) but also in its internal logic: a "sample" from the search
168 | space is a dictionary that alongside some some simpler key-value pairs,
169 | contains a "pipeline" key whose value is itself a pyll graph (seriously, pyll
170 | has support for lambda expressions),
171 | which evaluates to a theano function, which can process images.
172 | 
173 | The objective function is implemented by e.g. lfw.slm_visitor_lfw which
174 | allocates a LearningAlgo (an SLM_Visitor_ESVC instance called `visitor`)
175 | to handle most of the work.
176 | The lfw.slm_visitor_lfw routine passes a LearningAlgo
177 | to the LFW data set's "protocol" function, which
178 | walks the LearningAlgo through the various steps of an LFW experiment: showing
179 | it the right data at the right time, asking it to compute various statistics,
180 | and so on.
181 | When that's all done, lfw.slm_visitor_lfw asks the LearningAlgo to make
182 | a report (`visitor.hyperopt_rval()`) in the form of a dictionary.
183 | That dictionary is augmented with what hyperopt needs to see (loss and status
184 | keys) and passed back to hyperopt.
185 | 
186 | 
187 | There are other files too in the hpconvnet folder, but these ones summarize
188 | the logic and control flow.
189 | 
190 | 
191 | # References
192 | 
193 | * J. Bergstra, D. Yamins, D. D. Cox (2013).
194 |   [Making a Science of Model Search: Hyperparameter Optimization in Hundreds of Dimensions for Vision Architectures](http://jmlr.csail.mit.edu/proceedings/papers/v28/bergstra13.pdf),
195 |   in Proc. ICML2013. -- This paper describes the convolutional architectures
196 |   implemented in this software package, and the results you should expect from
197 |   hyperparameter optimization.
198 | 
199 | * J. Bergstra, R. Bardenet, Y. Bengio, B. Kegl (2011).
200 |   [Algorithms for Hyper-parameter Optimization](http://books.nips.cc/papers/files/nips24/NIPS2011_1385.pdf)
201 |   In Proc. NIPS2011. -- This paper introduces the TPE hyperparameter optimization algorithm.
202 | 


--------------------------------------------------------------------------------
/hpconvnet/isvm_binary.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file supports the incremental construction of an SVM classifier
  3 | by partially-corrective boosting on the hinge loss.
  4 | 
  5 | 
  6 | Each incremental solver minimizes
  7 | 
  8 |     hinge( dot(X, w) + b + alpha * prev_Wx)
  9 |     + lambda * (|w|^2 + |alpha * prev_W|^2)
 10 | 
 11 | 
 12 | Each solver is designed to be run on a subset of all available features.
 13 | 
 14 | """
 15 | import copy
 16 | 
 17 | import numpy as np
 18 | import autodiff
 19 | import theano # abstraction leak to pass mode for optimization
 20 | 
 21 | from isvm_multi import hinge2
 22 | from isvm_multi import _default_bfgs_kwargs
 23 | from isvm_multi import IncrementalMultiSVM
 24 | 
 25 | def binary_svm_hinge_loss(x, y, weights, bias, alpha, pxw, pw_l2_sqr,
 26 |         l2_regularization):
 27 |     """
 28 |     x: (n_examples, n_features) feature matrix
 29 |     y: (n_examples,) label vector
 30 |     weights: (n_features,) new weights
 31 |     alpha: (n_prev,) multiplier on each vector of previous weights
 32 |     pw_l2_sqr: (n_prev,) squared l2-norm of existing weights
 33 |     pxw: (n_examples, n_prev) inner products of previous weights with `x`
 34 |     l2_regularization: multiplier on sum of all squared weights
 35 |     """
 36 |     n_prev, = alpha.shape
 37 | 
 38 |     xw = np.dot(x, weights)
 39 |     if n_prev:
 40 |         if np.any(pw_l2_sqr < 0):
 41 |             raise ValueError('prev_w_l2_sqr may not be negative')
 42 |         prev_l2_sqr = (pw_l2_sqr * (alpha ** 2)).sum()
 43 |         xw += np.dot(pxw, alpha)
 44 |     else:
 45 |         prev_l2_sqr = 0.0
 46 | 
 47 |     margin = y * (xw + bias)
 48 |     losses = hinge2(margin)
 49 | 
 50 |     cur_l2_sqr = (weights * weights).sum()
 51 |     l2_reg = 0.5 * l2_regularization * (cur_l2_sqr + prev_l2_sqr)
 52 |     cost = losses.sum() + l2_reg
 53 |     return cost
 54 | 
 55 | 
 56 | class IncrementalSVM(object):
 57 |     """
 58 |     On each iteration of the incremental construction this class fits a new
 59 |     weight vector w to the features x, while adjusting the norm of the
 60 |     previously-fit weight vectors to balance the current model against the old
 61 |     ones.
 62 | 
 63 |     See test_hingeboost.py for an example of incremental SVM construction.
 64 | 
 65 |     """
 66 |     def __init__(self, n_features,
 67 |             prev_w_l2_sqr=None,
 68 |             l2_regularization=1e-4,
 69 |             dtype='float64',
 70 |             scalar_bounds=(-1e3, 1e3),
 71 |             bfgs_kwargs=None,
 72 |             alpha=None,
 73 |             ):
 74 |         """
 75 |         prev_w_l2_sqr: the (un-squared) l2-norm of each column of the existing weight vector
 76 |         """
 77 |         self.n_features = n_features
 78 |         if prev_w_l2_sqr is None:
 79 |             self.prev_w_l2_sqr = np.empty((0,), dtype=dtype)
 80 |         else:
 81 |             self.prev_w_l2_sqr = np.asarray(prev_w_l2_sqr).astype(dtype)
 82 |         (self.n_prev,) = self.prev_w_l2_sqr.shape
 83 |         self.l2_regularization = l2_regularization
 84 |         self.dtype = dtype
 85 |         self.scalar_bounds = scalar_bounds
 86 |         if bfgs_kwargs is None:
 87 |             self.bfgs_kwargs = copy.deepcopy(_default_bfgs_kwargs)
 88 |         else:
 89 |             self.bfgs_kwargs = bfgs_kwargs
 90 | 
 91 |         self.weights = np.zeros((n_features,), dtype=dtype)
 92 |         self.bias = np.zeros((), dtype=dtype)
 93 |         if alpha is None:
 94 |             self.alpha = np.ones_like(self.prev_w_l2_sqr)
 95 |         else:
 96 |             self.alpha = np.array(alpha).astype(dtype)
 97 |         if self.alpha.shape != self.prev_w_l2_sqr.shape:
 98 |             raise ValueError('shape mismatch between alpha and prev_w_l2_sqr',
 99 |                     self.alpha.shape, self.prev_w_l2_sqr.shape)
100 | 
101 |     @property
102 |     def cumulative_alpha(self):
103 |         rval = list(self.alpha)
104 |         rval.append(1.0)
105 |         return np.asarray(rval, dtype=self.dtype)
106 | 
107 |     @property
108 |     def cumulative_w_l2_sqr(self):
109 |         rval = list(self.prev_w_l2_sqr)
110 |         rval.append(self.w_l2_sqr)
111 |         return np.asarray(rval, dtype=self.dtype)
112 | 
113 |     @property
114 |     def w_l2_sqr(self):
115 |         return (self.weights * self.weights).sum()
116 | 
117 |     def continuation(self, n_features=None):
118 |         if n_features is None:
119 |             n_features = self.n_features
120 |         rval = self.__class__(
121 |                 n_features=n_features,
122 |                 prev_w_l2_sqr=self.cumulative_w_l2_sqr,
123 |                 alpha=self.cumulative_alpha,
124 |                 l2_regularization=self.l2_regularization,
125 |                 dtype=self.dtype,
126 |                 scalar_bounds=self.scalar_bounds,
127 |                 bfgs_kwargs=self.bfgs_kwargs
128 |                 )
129 |         rval.bias = self.bias.copy()
130 |         return rval
131 | 
132 |     def xw_carry_forward(self, x, pxw=None):
133 |         """stack the current dot(x, weights) onto previous stack `pxw`
134 |         """
135 |         pxw = self.as_xw(x, pxw)
136 |         rval = np.hstack((pxw, np.dot(x, self.weights)[:, None]))
137 |         print rval.shape
138 |         return rval
139 | 
140 |     def decision_function(self, x, xw=None):
141 |         xw = self.as_xw(x, xw)
142 |         return np.dot(x, self.weights) + np.dot(xw, self.alpha) + self.bias
143 | 
144 |     def predict(self, x, xw=None):
145 |         xw = self.as_xw(x, xw)
146 |         return (self.decision_function(x, xw) > 0) * 2 - 1
147 | 
148 |     def loss(self, x, y, xw=None):
149 |         # y_ind is all +-1, with 1 meaning a positive label for OvA classif
150 |         assert set(y) <= set([-1, 1])
151 |         xw = self.as_xw(x, xw)
152 | 
153 |         return binary_svm_hinge_loss(x, y,
154 |                 self.weights, self.bias, self.alpha,
155 |                 xw,
156 |                 self.prev_w_l2_sqr,
157 |                 self.l2_regularization,
158 |                 )
159 | 
160 |     def as_xw(self, x, xw):
161 |         if xw is None:
162 |             if self.n_prev == 0:
163 |                 return np.zeros((len(x), self.n_prev), dtype=x.dtype)
164 |             else:
165 |                 raise TypeError('xw is required for previous models')
166 |         else:
167 |             xw = np.asarray(xw, dtype=self.dtype)
168 |             if xw.shape != (len(x), self.n_prev):
169 |                 raise ValueError('xw has wrong shape',
170 |                         xw.shape, (len(x), self.n_prev))
171 |             return xw
172 | 
173 |     def fit(self, x, y, xw=None):
174 |         """
175 |         x - n_examples x n_features design matrix.
176 |         y - vector of integer labels
177 |         xw - matrix of real-valued incoming biases obtained
178 |             by multiplying the existing weight vectors by x
179 |         """
180 |         assert set(y) <= set([-1, 1])
181 | 
182 |         if x.shape[0] != y.shape[0]:
183 |             raise ValueError('length mismatch between x and y')
184 |         n_examples, n_features = x.shape
185 |         if n_features != self.n_features:
186 |             raise ValueError('n_feature mismatch', (n_features,
187 |                 self.n_features))
188 | 
189 |         weights = self.weights
190 |         bias = self.bias
191 |         alpha = self.alpha
192 | 
193 |         x = x.astype(self.dtype)
194 |         y = y.astype(self.dtype)
195 | 
196 |         xw = self.as_xw(x, xw)
197 |         print 'WARNING: IncrementalSVM should use alpha0, n_sgd_iters'
198 | 
199 |         # -- warm up with some sgd
200 |         weights, bias, alpha, = autodiff.fmin_sgd(
201 |                 lambda w, b, a, xi, yi, xwi:
202 |                     binary_svm_hinge_loss(xi, yi, w, b, a, None,
203 |                         None,
204 |                         self.l2_regularization),
205 |                 (weights, bias, alpha),
206 |                 streams={
207 |                     'xi': x.reshape((n_examples, 1, x.shape[1])),
208 |                     'yi': y.reshape((n_examples, 1)),
209 |                     },
210 |                 stepsize=0.01,
211 |                 loops=max(1, 100000 // len(x)),
212 |                 )
213 | 
214 |         # -- fine-tune without alpha by L-BFGS
215 |         weights, bias, alpha, = autodiff.fmin_l_bfgs_b(
216 |                 lambda w, b, a:
217 |                     binary_svm_hinge_loss(x, y,
218 |                         w, b, a, None, None,
219 |                         self.l2_regularization),
220 |                 (weights, bias, alpha),
221 |                 # -- the graph is tiny, time spent optimizing it is wasted.
222 |                 theano_mode=theano.Mode(linker='cvm', optimizer='fast_run'),
223 |                 **self.bfgs_kwargs)
224 | 
225 | 
226 |         self.weights = weights
227 |         self.bias = bias
228 |         self.alpha = alpha
229 | 
230 | 
231 | class IncrementalSVM_MultiHack(object):
232 | 
233 |     def __init__(self, l2_regularization):
234 |         self.l2_regularization = l2_regularization
235 | 
236 |     def fit(self, x, y, history):
237 |         self._svm = IncrementalMultiSVM(
238 |                 dtype=x.dtype,
239 |                 n_features=x.shape[1],
240 |                 n_classes=2,
241 |                 l2_regularization=self.l2_regularization,
242 |                 n_sgd_iters=0,
243 |                 bfgs_kwargs={
244 |                     'maxfun': 1000,
245 |                     'iprint': 0,
246 |                     'm': 32,
247 |                     'factr': 100},
248 |                 )
249 |         self._svm.fit(x, (y + 1) / 2, history)
250 | 
251 |     def predict(self, x, history):
252 |         return self._svm.predict(x, history) * 2 - 1
253 | 


--------------------------------------------------------------------------------
/hpconvnet/isvm_boosting.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Experiment classes
  3 | """
  4 | 
  5 | import copy
  6 | import logging
  7 | 
  8 | import numpy as np
  9 | 
 10 | import hyperopt
 11 | from hyperopt.base import trials_from_docs
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | class BoostHelper(object):
 16 |     """
 17 |     Several helper-methods related to boosting that work on the basis of
 18 |     a dictionary doc_by_tid rather than the original trials object.
 19 | 
 20 |     _continuing: map `tid` -> list of documents that continue `tid`.
 21 |         Forward arrows in the exploration of ensembles.
 22 | 
 23 |     """
 24 | 
 25 |     @staticmethod
 26 |     def query_MongoTrials(mt, fields=(), query=None):
 27 |         """
 28 |         Retrieve `docs` for the constructor without downloading the
 29 |         entire experiment history, as would be done by mt.refresh().
 30 |         """
 31 |         exp_key = mt._exp_key
 32 |         if query is None:
 33 |             query = {}
 34 |         else:
 35 |             query = copy.deepcopy(dict(query))
 36 |         if exp_key != None:
 37 |             query.setdefault('exp_key', exp_key)
 38 |         query.setdefault('state', {'$ne': hyperopt.JOB_STATE_ERROR})
 39 |         rval = mt.handle.jobs.find(
 40 |             query,
 41 |             fields=[
 42 |                 'tid',
 43 |                 'result.status',
 44 |                 'result.loss',
 45 |                 'misc.boosting.continues',
 46 |                 'misc.boosting.variant', # -- optional
 47 |                 '_attachments',
 48 |             ] + list(fields),
 49 |             snapshot=True)
 50 |         if rval is None:
 51 |             return []
 52 |         else:
 53 |             return list(rval)
 54 | 
 55 |     def __init__(self, docs, doc_by_tid=None):
 56 |         if doc_by_tid is None:
 57 |             self.doc_by_tid = dict([(d['tid'], d) for d in docs])
 58 |             # -- assert that every document has a unique tid
 59 |             assert len(self.doc_by_tid) == len(docs)
 60 |         else:
 61 |             self.doc_by_tid = doc_by_tid
 62 | 
 63 |         self._continuing = {}
 64 |         for d in docs:
 65 |             pre_tid = self.continues(d)
 66 |             if pre_tid is not None:
 67 |                 pre_tid = pre_tid['tid']
 68 |             self._continuing.setdefault(pre_tid, []).append(d)
 69 | 
 70 | 
 71 |     def ok_tids(self):
 72 |         return [d['tid'] for d in self.doc_by_tid.values()
 73 |                 if d['result']['status'] == hyperopt.STATUS_OK]
 74 | 
 75 |     def best_doc(self):
 76 |         # -- it is no longer the case that losses can be compared
 77 |         #    between generations, only between siblings is ok.
 78 |         raise NotImplementedError('since hacking bagging support')
 79 |         ok_tids = self.ok_tids()
 80 |         losses = [self.doc_by_tid[tid]['result']['loss'] for tid in ok_tids]
 81 |         assert None not in losses
 82 |         best_idx = np.argmin(losses)
 83 |         rval = self.doc_by_tid[ok_tids[best_idx]]
 84 |         return rval
 85 | 
 86 |     def continues(self, doc):
 87 |         """Returns the (older-than-doc) trial whose decisions `doc` built on.
 88 |         """
 89 |         # -- hack to support `doc` that is a misc sub-doc
 90 |         doc = self.doc_by_tid[doc['tid']]
 91 |         if 'boosting' in doc['misc']:
 92 |             rval_tid = doc['misc']['boosting']['continues']
 93 |             if rval_tid is None:
 94 |                 return None
 95 |             else:
 96 |                 return self.doc_by_tid[rval_tid]
 97 |         else:
 98 |             return None
 99 | 
100 |     def continuing(self, doc):
101 |         """Returns all docs whose decisions were built on `doc`.
102 |         """
103 |         if doc is None:
104 |             return self._continuing.get(None, [])
105 |         else:
106 |             return self._continuing.get(doc['tid'], [])
107 | 
108 |     def children(self, doc):
109 |         return self.continuing(doc)
110 | 
111 |     def best_child(self, doc):
112 |         losses = [(child['result']['loss'], child)
113 |                   for child in self.children(doc)
114 |                   if child['result']['status'] == hyperopt.STATUS_OK
115 |                  ]
116 |         if losses:
117 |             losses.sort()
118 |             return losses[0][1]
119 |         else:
120 |             return None
121 | 
122 |     def ensemble_members(self):
123 |         """Return all docs that are part of the best ensemble in order of
124 |         addition to the ensemble.
125 |         """
126 |         # function works by working backward through the
127 |         # doc['misc']['boosting']['continues'] links
128 |         reversed_members = [self.best_doc()]
129 |         while self.continues(reversed_members[-1]) != None:
130 |             reversed_members.append(self.continues(reversed_members[-1]))
131 |         rval = list(reversed(reversed_members))
132 |         return rval
133 | 
134 |     def history(self, doc):
135 |         """
136 |         Return a list of documents that continued previous ones, leading
137 |         eventually to `doc`.  `doc` itself is the last element.
138 |         """
139 |         rval = []
140 |         try:
141 |             doc_or_None = self.continues(doc)
142 |             while doc_or_None is not None:
143 |                 rval.append(doc_or_None)
144 |                 doc_or_None = self.continues(doc_or_None)
145 |             rval.reverse()
146 |             rval.append(doc)
147 |             return rval
148 |         except KeyError, e:
149 |             if 'boosting'in str(e):
150 |                 return [doc]
151 |             raise
152 | 
153 | 
154 | def suggest(
155 |     new_ids, domain, trials, sub_suggest,
156 |     min_ok_per_round=1,
157 |     min_valid_per_round=1,
158 |     absolute_loss_thresh=1.0,
159 |     relative_loss_thresh=None,
160 |     ):
161 |     """
162 | 
163 |     Parameters
164 |     ----------
165 | 
166 |     min_ok_per_round - int
167 |         A trial cannot be extended in the ensemble until it has this many
168 |         siblings with status 'ok' and a loss <= absolute_loss_thresh.
169 | 
170 |     min_valid_per_round - int
171 |         A trial cannot be extended in the ensemble until it has this many
172 |         siblings whose job state is not ERROR.
173 | 
174 |     absolute_loss_thresh - float
175 |         Jobs with loss greater than this are not counted as 'ok'.
176 | 
177 |     relative_loss_thresh - None or float
178 |         A child cannot become a parent in the ensemble unless it improves on its
179 |         parent with a loss <= relative_loss_thresh * parent_loss.
180 | 
181 |     This search algo works by injecting a ['misc']['boosting'] subdocument into
182 |     every trial, with keys:
183 |       * variant - identify the type of boosting at work
184 |       * continues - the trial ID (tid) of the previously selected trial in the
185 |                     ensemble, or `None` for first-round trials
186 | 
187 |     In order for boosting to work properly, the 'loss' reported by trial must
188 |     represent the CUMULATIVE ENSEMBLE LOSS if the ensemble were to be extended
189 |     to include that particular trial.
190 | 
191 |     """
192 |     new_id, = new_ids
193 | 
194 |     valid_docs = [t for t in trials
195 |             if t['state'] != hyperopt.JOB_STATE_ERROR]
196 | 
197 |     # -- ok_docs are those which are eligible to be a member of the
198 |     #    final ensemble.
199 |     ok_docs = [t for t in valid_docs
200 |             if t['result']['status'] == hyperopt.STATUS_OK
201 |                and t['result']['loss'] <= absolute_loss_thresh]
202 | 
203 |     logger.info('n_ok: %i n_valid: %i' % (len(ok_docs), len(valid_docs)))
204 | 
205 |     valid_helper = BoostHelper(valid_docs)
206 |     ok_helper = BoostHelper(ok_docs)
207 | 
208 |     cur_parent = None
209 |     cur_parent_tid = None
210 |     while True:
211 |         n_ok_children = len(ok_helper.children(cur_parent))
212 |         n_valid_children = len(valid_helper.children(cur_parent))
213 |         logger.info('cur_parent: %s  n_ok_children: %i  n_valid_children: %i'
214 |                     % (None if cur_parent is None else cur_parent['tid'],
215 |                        n_ok_children,
216 |                        n_valid_children))
217 |         if n_ok_children < min_ok_per_round:
218 |             break
219 |         if n_valid_children < min_valid_per_round:
220 |             break
221 | 
222 |         best_child = ok_helper.best_child(cur_parent)
223 |         assert best_child is not None  # -- because ok_helper has some elements
224 | 
225 |         if None not in (cur_parent, relative_loss_thresh):
226 |             rel_thresh = cur_parent['result']['loss'] * relative_loss_thresh
227 |             if best_child['result']['loss'] >= rel_thresh:
228 |                 break
229 | 
230 |         logger.info('best_child: %i' % best_child['tid'])
231 |         cur_parent = best_child
232 |         cur_parent_tid = best_child['tid']
233 |         del best_child
234 | 
235 |     cur_siblings = valid_helper.children(cur_parent)
236 | 
237 |     current_trials = trials_from_docs(
238 |             cur_siblings,
239 |             exp_key=trials._exp_key,
240 |             # -- validate=False is much faster
241 |             validate=False)
242 | 
243 |     new_trial_docs = sub_suggest([new_id], domain, current_trials)
244 | 
245 |     for trial in new_trial_docs:
246 |         misc = trial['misc']
247 |         # -- boosting cannot be nested with current data structure
248 |         assert 'boosting' not in misc
249 |         # -- I think the following was a debugging sanity check
250 |         assert trial['tid'] == new_id
251 |         misc['boosting'] = {
252 |             'variant': {
253 |                 'name': 'async_suggest',
254 |                 'min_ok_per_round': min_ok_per_round,
255 |                 'min_valid_per_round': min_valid_per_round,
256 |                 'relative_loss_thresh': relative_loss_thresh,
257 |                 'absolute_loss_thresh': absolute_loss_thresh,
258 |                 },
259 |             'continues': cur_parent_tid}
260 | 
261 |     return new_trial_docs
262 | 
263 | 


--------------------------------------------------------------------------------
/hpconvnet/utils.py:
--------------------------------------------------------------------------------
  1 | import cPickle
  2 | import functools
  3 | import logging
  4 | import sys
  5 | 
  6 | import numpy as np
  7 | from PIL import Image
  8 | 
  9 | 
 10 | class ImgLoaderResizer(object):
 11 |     """ Load 250x250 greyscale images, return normalized 200x200 float32 ones.
 12 |     """
 13 |     def __init__(self, inshape, 
 14 |                  shape=None, ndim=None, dtype='float32', normalize=True,
 15 |                  crop=None):
 16 |         # DELETE ME
 17 |         assert 0
 18 |         self.inshape = inshape
 19 |         assert len(shape) == 3
 20 |         assert shape[0] == 1
 21 |         shape = tuple(shape)
 22 |         assert len(crop) == 4
 23 |         crop = tuple(crop)
 24 |         l, t, r, b = crop
 25 |         assert 0 <= l < r <= self.inshape[0]
 26 |         assert 0 <= t < b <= self.inshape[1]
 27 |         self._crop = crop   
 28 |         assert dtype == 'float32'
 29 |         self._shape = shape
 30 |         if ndim is None:
 31 |             self._ndim = None if (shape is None) else len(shape)
 32 |         else:
 33 |             self._ndim = ndim
 34 |         self._dtype = dtype
 35 |         self.normalize = normalize
 36 | 
 37 |     def rval_getattr(self, attr, objs):
 38 |         if attr == 'shape' and self._shape is not None:
 39 |             return self._shape
 40 |         if attr == 'ndim' and self._ndim is not None:
 41 |             return self._ndim
 42 |         if attr == 'dtype':
 43 |             return self._dtype
 44 |         raise AttributeError(attr)
 45 | 
 46 |     def __call__(self, file_path):
 47 |         im = Image.open(file_path)
 48 |         if im.mode != 'L':
 49 |             im = im.convert('L')
 50 |         assert im.size == self.inshape
 51 |         if self._crop != (0, 0,) + self.inshape:
 52 |             im = im.crop(self._crop)
 53 |         l, t, r, b = self._crop
 54 |         assert im.size == (r - l, b - t)
 55 |         if max(im.size) != self._shape[1]:
 56 |             m = self._shape[1]/float(max(im.size))
 57 |             new_shape = (int(round(im.size[0]*m)), int(round(im.size[1]*m)))
 58 |             im = im.resize(new_shape, Image.ANTIALIAS)
 59 |         imval = np.asarray(im, dtype=self._dtype)
 60 |         rval = np.zeros(self._shape, dtype=self._dtype)
 61 |         ctr = self._shape[1]/2
 62 |         cxmin = ctr - imval.shape[0] / 2
 63 |         cxmax = ctr - imval.shape[0] / 2 + imval.shape[0]
 64 |         cymin = ctr - imval.shape[1] / 2
 65 |         cymax = ctr - imval.shape[1] / 2 + imval.shape[1]
 66 |         rval[cxmin:cxmax,cymin:cymax] = imval
 67 |         if self.normalize:
 68 |             rval -= rval.mean()
 69 |             rval /= max(rval.std(), 1e-3)
 70 |         else:
 71 |             rval /= 255.0
 72 |         assert rval.dtype == self._dtype, (rval.dtype, self._dtype)
 73 |         assert rval.shape == self._shape, (rval.shape, self._shape)
 74 |         return rval
 75 | 
 76 | 
 77 | import theano
 78 | # -- Define theano versions of dot because my numpy installation is screwed up
 79 | # and does not use a good blas.
 80 | _theano_fA = theano.tensor.fmatrix()
 81 | _theano_fB = theano.tensor.fmatrix()
 82 | dot_f32 = theano.function(
 83 |         [_theano_fA, _theano_fB],
 84 |         theano.tensor.dot(_theano_fA, _theano_fB),
 85 |         allow_input_downcast=True,
 86 |         mode=theano.compile.Mode(linker='vm', optimizer='fast_run').excluding(
 87 |             'gpu'))
 88 | 
 89 | _theano_dA = theano.tensor.dmatrix()
 90 | _theano_dB = theano.tensor.dmatrix()
 91 | dot_f64 = theano.function(
 92 |         [_theano_dA, _theano_dB],
 93 |         theano.tensor.dot(_theano_dA, _theano_dB),
 94 |         allow_input_downcast=True,
 95 |         mode=theano.compile.Mode(linker='vm', optimizer='fast_run').excluding(
 96 |             'gpu'))
 97 | 
 98 | 
 99 | DOT_MAX_NDIMS = 256
100 | MEAN_MAX_NPOINTS = 2000
101 | STD_MAX_NPOINTS = 2000
102 | 
103 | if 1:
104 |     def dot(A, B):
105 |         _dot = dict(float32=dot_f32, float64=dot_f64)[str(A.dtype)]
106 |         return _dot(A, B)
107 |     dot.__theano_op__ = theano.tensor.dot  # -- used by autodiff
108 | else:
109 |     dot = np.dot
110 | 
111 | 
112 | def chunked_linear_kernel(Xs, Ys, symmetric):
113 |     """Compute a linear kernel in blocks so that it can use a GPU with limited
114 |     memory.
115 | 
116 |     Xs is a list of feature matrices
117 |     Ys ia  list of feature matrices
118 | 
119 |     This function computes the kernel matrix with
120 |         \sum_i len(Xs[i]) rows
121 |         \sum_j len(Ys[j]) cols
122 |     """
123 | 
124 |     dtype = Xs[0].dtype
125 | 
126 |     def _dot(A, B):
127 |         if K < DOT_MAX_NDIMS:
128 |             return dot(A, B)
129 |         else:
130 |             out = dot(A[:,:DOT_MAX_NDIMS], B[:DOT_MAX_NDIMS])
131 |             ndims_done = DOT_MAX_NDIMS            
132 |             while ndims_done < K:
133 |                 out += dot(
134 |                     A[:,ndims_done : ndims_done + DOT_MAX_NDIMS], 
135 |                     B[ndims_done : ndims_done + DOT_MAX_NDIMS])
136 |                 ndims_done += DOT_MAX_NDIMS
137 |             return out
138 | 
139 |     R = sum([len(X) for X in Xs])
140 |     C = sum([len(Y) for Y in Ys])
141 |     K = Xs[0].shape[1]
142 | 
143 |     rval = np.empty((R, C), dtype=dtype)
144 | 
145 |     if symmetric:
146 |         assert R == C
147 | 
148 |     ii0 = 0
149 |     for ii, X_i in enumerate(Xs):
150 |         sys.stdout.write('.')
151 |         sys.stdout.flush()
152 |         ii1 = ii0 + len(X_i) # -- upper bound of X block
153 | 
154 |         jj0 = 0
155 |         for jj, Y_j in enumerate(Ys):
156 |             jj1 = jj0 + len(Y_j) # -- upper bound of Y block
157 | 
158 |             r_ij = rval[ii0:ii1, jj0:jj1]
159 | 
160 |             if symmetric and jj < ii:
161 |                 r_ji = rval[jj0:jj1, ii0:ii1]
162 |                 r_ij[:] = r_ji.T
163 |             else:
164 |                 r_ij[:] = _dot(X_i, Y_j.T)
165 | 
166 |             jj0 = jj1
167 | 
168 |         ii0 = ii1
169 | 
170 |     return rval
171 | 
172 | 
173 | def linear_kernel(X, Y, block_size=10000):
174 |     """Compute a linear kernel in blocks so that it can use a GPU with limited
175 |     memory.
176 | 
177 |     Xs is a list of feature matrices
178 |     Ys ia  list of feature matrices
179 | 
180 |     This function computes the kernel matrix with
181 |         \sum_i len(Xs[i]) rows
182 |         \sum_j len(Ys[j]) cols
183 |     """
184 | 
185 |     def chunk(Z):
186 |         Zs = []
187 |         ii = 0
188 |         while len(Z[ii:ii + block_size]):
189 |             Zs.append(Z[ii:ii + block_size])
190 |             ii += block_size
191 |         return Zs
192 | 
193 |     Xs = chunk(X)
194 |     Ys = chunk(Y)
195 | 
196 |     assert sum([len(xi) for xi in Xs]) == len(X)
197 |     assert sum([len(yi) for yi in Ys]) == len(Y)
198 |     return chunked_linear_kernel(Xs, Ys, symmetric=(X is Y))
199 | 
200 | 
201 | def mean_and_std(X, remove_std0=False, unbiased=False,
202 |         internal_dtype='float64', return_dtype=None):
203 |     """Return the mean and standard deviation of each column of matrix `X`
204 | 
205 |     if `remove_std0` is True, then 0 elements of the std vector will be
206 |     switched to 1. This is typically what you want for feature normalization.
207 |     """
208 |     X = X.reshape(X.shape[0], -1)
209 |     npoints, ndims = X.shape
210 | 
211 |     if npoints < MEAN_MAX_NPOINTS:
212 |         fmean = X.mean(0, dtype=internal_dtype)
213 |     else:
214 |         sel = X[:MEAN_MAX_NPOINTS]
215 |         fmean = np.empty_like(sel[0,:]).astype(internal_dtype)
216 | 
217 |         np.add.reduce(sel, axis=0, dtype=internal_dtype, out=fmean)
218 | 
219 |         # -- sum up the features in blocks to reduce rounding error
220 |         curr = np.empty_like(fmean)
221 |         npoints_done = MEAN_MAX_NPOINTS
222 |         while npoints_done < npoints:
223 |             sel = X[npoints_done : npoints_done + MEAN_MAX_NPOINTS]
224 |             np.add.reduce(sel, axis=0, dtype=internal_dtype, out=curr)
225 |             np.add(fmean, curr, fmean)
226 |             npoints_done += MEAN_MAX_NPOINTS
227 |         fmean /= npoints
228 | 
229 |     if npoints < STD_MAX_NPOINTS:
230 |         fstd = X.std(0, dtype=internal_dtype)
231 |     else:
232 |         sel = X[:MEAN_MAX_NPOINTS]
233 | 
234 |         mem = np.empty_like(sel).astype(internal_dtype)
235 |         curr = np.empty_like(mem[0,:]).astype(internal_dtype)
236 | 
237 |         seln = sel.shape[0]
238 |         np.subtract(sel, fmean, mem[:seln])
239 |         np.multiply(mem[:seln], mem[:seln], mem[:seln])
240 |         fstd = np.add.reduce(mem[:seln], axis=0, dtype=internal_dtype)
241 | 
242 |         npoints_done = MEAN_MAX_NPOINTS
243 |         # -- loop over by blocks for improved numerical accuracy
244 |         while npoints_done < npoints:
245 | 
246 |             sel = X[npoints_done : npoints_done + MEAN_MAX_NPOINTS]
247 |             seln = sel.shape[0]
248 |             np.subtract(sel, fmean, mem[:seln])
249 |             np.multiply(mem[:seln], mem[:seln], mem[:seln])
250 |             np.add.reduce(mem[:seln], axis=0, dtype=internal_dtype, out=curr)
251 |             np.add(fstd, curr, fstd)
252 | 
253 |             npoints_done += MEAN_MAX_NPOINTS
254 | 
255 |         if unbiased:
256 |             fstd = np.sqrt(fstd / max(1, npoints - 1))
257 |         else:
258 |             fstd = np.sqrt(fstd / max(1, npoints))
259 | 
260 |     if remove_std0:
261 |         fstd[fstd == 0] = 1
262 | 
263 |     if return_dtype is None:
264 |         return_dtype = X.dtype
265 | 
266 |     return fmean.astype(return_dtype), fstd.astype(return_dtype)
267 | 
268 | 
269 | def assert_allclose(a, b, rtol=1e-05, atol=1e-08):
270 |     if not np.allclose(a, b, rtol=rtol, atol=atol):
271 |         adiff = abs(a - b).max(),
272 |         rdiff = (abs(a - b) / (abs(a) + abs(b) + 1e-15)).max()
273 |         raise ValueError('not close enough', (adiff, rdiff, {
274 |             'amax': a.max(),
275 |             'bmax': b.max(),
276 |             'amin': a.min(),
277 |             'bmin': b.min(),
278 |             'asum': a.sum(),
279 |             'bsum': b.sum(),
280 |             }))
281 | 
282 | 
283 | dumps = functools.partial(cPickle.dumps, protocol=-1)
284 | loads = cPickle.loads
285 | 
286 | 
287 | def dumps_gram(gram):
288 |     if gram.shape == gram.T.shape and np.allclose(gram, gram.T):
289 |         vals = []
290 |         for i in range(gram.shape[0]):
291 |             vals.extend(gram[i, i:])
292 |         return dumps(dict(
293 |             shape=gram.shape,
294 |             vals=np.asarray(vals, dtype=gram.dtype)))
295 |     else:
296 |         return dumps(gram)
297 | 
298 | 
299 | def loads_gram(msg):
300 |     obj = loads(msg)
301 |     if isinstance(obj, dict):
302 |         shape = obj['shape']
303 |         vals = obj['vals']
304 |         rval = np.empty(shape, vals.dtype)
305 |         jj = 0
306 |         for ii in range(shape[0]):
307 |             Ni = shape[0] - ii
308 |             rval[ii, ii:] = vals[jj: jj + Ni]
309 |             rval[ii:, ii] = vals[jj: jj + Ni]
310 |             jj += Ni
311 |         return rval
312 |     else:
313 |         return obj
314 | 
315 | 
316 | try:
317 |     import git_head_history # -- autogenerated by fabfile.py
318 | except ImportError, e:
319 |     if 'No module named git_head_history' in str(e):
320 |         # -- if there was simply no file, then it sucks but its normal.
321 |         logging.getLogger(__name__).warn(
322 |                 "failed to import git_head_history")
323 |         class git_head_history(object):
324 |             pass
325 |     else:
326 |         # -- if the file exists but failed to import, we have a problem.
327 |         raise
328 | def git_versions():
329 |     rval = {}
330 |     for project in dir(git_head_history):
331 |         if not project.startswith("_"):
332 |             obj = getattr(git_head_history, project)
333 |             rval[project] = obj
334 |     return rval
335 | 
336 | 
337 | 
338 | 
339 | 


--------------------------------------------------------------------------------
/hpconvnet/isvm_precomputed.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import logging
  3 | import functools
  4 | 
  5 | import numpy as np
  6 | import hyperopt
  7 | from sklearn.svm import SVC
  8 | from .utils import linear_kernel
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | class EnsembleSVC(object):
 14 |     """Fit and back-fit SVM ensemble.
 15 | 
 16 |     Without a `history` this solves cost function
 17 | 
 18 |     :math:`1/N \sum_i hinge(y_i max(1 - w \cdot x_i + b)) + \alpha ||w||^2`
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     """
 23 | 
 24 |     def __init__(self, train_sample='train'):
 25 |         self.train_sample = train_sample
 26 | 
 27 |         self._grams = {}
 28 |         self._weights = {}
 29 |         self._svm = None
 30 |         self._labels = {}
 31 |         self._features = {}
 32 |         self._compound_samples = {}
 33 |         self._kernels = {}
 34 | 
 35 |     def copy(self):
 36 |         rval = self.__class__(train_sample=self.train_sample)
 37 |         rval._grams = dict(self._grams)
 38 |         rval._weights = copy.deepcopy(self._weights)
 39 |         rval._svm = copy.deepcopy(self._svm)
 40 |         rval._labels = copy.deepcopy(self._labels)
 41 |         rval._features = dict(self._features)
 42 |         rval._compound_samples = copy.deepcopy(self._compound_samples)
 43 |         rval._kernels = copy.deepcopy(self._kernels)
 44 |         return rval
 45 | 
 46 |     @property
 47 |     def members(self):
 48 |         return self._weights.keys()
 49 | 
 50 |     def has_member(self, member):
 51 |         return member in self._weights
 52 | 
 53 |     def add_member(self, member, weight=1.0, kernel=linear_kernel):
 54 |         logger.debug('add_member: %s' % member)
 55 |         if member in self._weights:
 56 |             if (self._weights[member] == weight
 57 |                     and self._kernels[member] == kernel):
 58 |                 pass
 59 |             else:
 60 |                 raise KeyError(member)
 61 |         else:
 62 |             self._weights[member] = weight
 63 |             self._kernels[member] = kernel
 64 | 
 65 |     def set_weight(self, member, weight):
 66 |         self._weights[member] = weight
 67 | 
 68 |     def add_sample(self, sample, labels=None):
 69 |         logger.debug('add_sample: %s' % sample)
 70 |         if sample in self._labels:
 71 |             if np.all(labels == self._labels[sample]):
 72 |                 pass
 73 |             else:
 74 |                 raise KeyError(sample)
 75 |         else:
 76 |             self._labels[sample] = labels
 77 | 
 78 |     def add_compound_sample(self, sample, subsamples):
 79 |         logger.debug('add_compound_sample: %s' % sample)
 80 |         if sample in self._compound_samples:
 81 |             raise KeyError(sample)
 82 |         else:
 83 |             if not isinstance(subsamples, (list, tuple)):
 84 |                 raise TypeError(subsamples)
 85 |             self._compound_samples[sample] = subsamples
 86 | 
 87 |     def as_raw_samples(self, sample1):
 88 |         if isinstance(sample1, (tuple, list)):
 89 |             def add(a, b):
 90 |                 return a + b
 91 |             return reduce(add, map(self.as_raw_samples, sample1))
 92 |         elif sample1 in self._compound_samples:
 93 |             return self.as_raw_samples(self._compound_samples[sample1])
 94 |         else:
 95 |             return [sample1]
 96 | 
 97 |     def add_features(self, member, sample, features):
 98 |         if member not in self._weights:
 99 |             raise KeyError(member)
100 |         if sample not in self._labels:
101 |             raise KeyError(sample)
102 |         self._features[(member, sample)] = features
103 | 
104 |     def del_features(self, member, sample):
105 |         del self._features[(member, sample)]
106 | 
107 |     def has_gram(self, member, sample1, sample2):
108 |         return (member, sample1, sample2) in self._grams
109 | 
110 |     def add_gram(self, member, sample1, sample2, gram):
111 |         if member not in self._weights:
112 |             raise KeyError(member)
113 |         if sample1 not in self._labels:
114 |             raise KeyError(sample1)
115 |         if sample2 not in self._labels:
116 |             raise KeyError(sample2)
117 |         logger.debug('add_gram: (%s, %s, %s) -> (%i, %i) array of %s' % (
118 |             member, sample1, sample2,
119 |             gram.shape[0], gram.shape[1], gram.dtype))
120 |         self._grams[(member, sample1, sample2)] = gram
121 |         self._grams[(member, sample2, sample1)] = gram.T
122 | 
123 |     def del_gram(self, member, sample1, sample2):
124 |         del self._grams[(member, sample1, sample2)]
125 |         del self._grams[(member, sample2, sample1)]
126 | 
127 |     def compute_gram(self, member, sample1, sample2, dtype=np.float):
128 |         if member not in self._weights:
129 |             raise KeyError(member)
130 |         if sample1 not in self._labels:
131 |             raise KeyError(sample1)
132 |         if sample2 not in self._labels:
133 |             raise KeyError(sample2)
134 |         f1 = self._features[(member, sample1)]
135 |         f2 = self._features[(member, sample2)]
136 |         gram = self._kernels[member](f1, f2).astype(dtype)
137 |         if gram.shape != (len(f1), len(f2)):
138 |             raise ValueError('kernel function returned wrong shape')
139 |         self.add_gram(member, sample1, sample2, gram)
140 | 
141 |     def compute_all_grams(self, members, samples):
142 |         for member in members:
143 |             for sample1 in samples:
144 |                 for sample2 in samples:
145 |                     if (member, sample1, sample2) not in self._grams:
146 |                         self.compute_gram(member, sample1, sample2)
147 | 
148 |     def gram(self, member, sample1, sample2):
149 |         to_vstack = []
150 |         row_samples = self.as_raw_samples(sample1)
151 |         col_samples = self.as_raw_samples(sample2)
152 |         for rs in row_samples:
153 |             to_hstack = [self._grams[(member, rs, cs)]
154 |                     for cs in col_samples]
155 |             to_vstack.append(np.hstack(to_hstack))
156 |         rval = np.vstack(to_vstack)
157 |         return rval
158 | 
159 |     def labels(self, sample):
160 |         raw_samples = self.as_raw_samples(sample)
161 |         raw_labels = [self._labels[s] for s in raw_samples]
162 |         return np.concatenate(raw_labels)
163 | 
164 |     def weighted_gram(self, sample1, sample2, weights=None):
165 |         if weights is None:
166 |             weights = self._weights
167 |         members = weights.keys()
168 |         # -- sorting not strictly necessary, but helps different processes to
169 |         # -- perform the same calculation.
170 |         members.sort()
171 |         rval = None
172 |         for m in members:
173 |             # -- The weights represent squared importance coefficients, one
174 |             # -- on each ensemble member.
175 |             gg = weights[m] * self.gram(m, sample1, sample2)
176 |             rval = gg if rval is None else gg + rval
177 |         return rval
178 | 
179 |     def fit_svm(self, l2_regularization=None, train_sample=None):
180 |         if train_sample is None:
181 |             train_sample = self.train_sample
182 | 
183 |         g_trn = self.weighted_gram(train_sample, train_sample)
184 | 
185 |         if l2_regularization is None:
186 |             l2_regularization = 1.0 / len(g_trn)
187 | 
188 |         C = 1.0 / (l2_regularization * len(g_trn))
189 | 
190 |         if l2_regularization is None:
191 |             assert np.allclose(C, 1.0)
192 |         svm = SVC(
193 |             C=C,
194 |             kernel='precomputed',
195 |             cache_size=1.1 * 4.0 * g_trn.size / (1 << 20),
196 |             max_iter=0.5 * len(g_trn) ** 2,  # COMPLETE HEURISTIC GUESS
197 |             )
198 |         svm.fit(g_trn, self.labels(train_sample))
199 |         self._svm = svm
200 | 
201 |     def fit_weights_crossvalid(self, validation_sample, max_evals,
202 |         algo=None,
203 |         scales=100.0,
204 |         members=None,
205 |         ):
206 |         """Fit an SVM and optimize [some of] the kernel weights.
207 | 
208 |         Parameters
209 |         ----------
210 |         validation_sample : sample identifier
211 |             Adjust hyperparameters to optimize performance on this set.
212 | 
213 |         max_evals : integer
214 |             Try no more than this many hyperparameter settings.
215 | 
216 |         algo: hyperopt.algo
217 |             A hyperopt optimization algorithm for hyperparameters.
218 |             Default is currently hyperopt.tpe.suggest
219 | 
220 |         scales: float or dict: member -> float
221 |             Multiplicative uncertainty around the current weight value
222 |             for each member (larger for broader search).
223 | 
224 |         members : None or sequence of member names
225 |             Members to combine as in MKL. `None` means to use all members.
226 | 
227 | 
228 |         TODO
229 |         ----
230 |         Look at literature for MKL and do something more efficient
231 |         and accurate.
232 |         """
233 | 
234 |         # -- N.B.
235 |         # -- We don't need to take l2-regularization into account because by
236 |         # -- optimizing the norm of the weights, we are implicitly optimizing
237 |         # -- the l2-regularization on the model.
238 | 
239 |         trn_sample = self.train_sample
240 |         val_sample = validation_sample
241 | 
242 |         labels_trn = self.labels(trn_sample)
243 |         labels_val = self.labels(val_sample)
244 | 
245 |         if algo is None:
246 |             algo = functools.partial(
247 |                         hyperopt.tpe.suggest,
248 |                         n_startup_jobs=5)
249 | 
250 |         if isinstance(scales, (int, float, np.number)):
251 |             #scales = {m: scales for m in self._weights}
252 |             scales = dict([(m, scales) for m in self._weights])
253 |         else:
254 |             if set(scales.keys()) != set(self._weights.keys()):
255 |                 raise ValueError('wrong number of search scales')
256 | 
257 |         if members is None:
258 |             members = self._weights.keys()
259 |         else:
260 |             members = list(members)
261 |         # -- sorting not strictly necessary, but helps different processes to
262 |         # -- perform the same calculation by presenting the same `domain` below.
263 |         members.sort()
264 | 
265 |         def eval_weights(ws):
266 |             ws = np.asarray(ws)
267 |             ws_weights = copy.deepcopy(self._weights)
268 |             ws_weights.update(dict(zip(members, ws)))
269 |             g_trn = self.weighted_gram(trn_sample, trn_sample, ws_weights)
270 |             g_val = self.weighted_gram(val_sample, trn_sample, ws_weights)
271 | 
272 |             logger.debug('fitting ws=%s' % str(ws))
273 |             svm = SVC(
274 |                 C=1.0,
275 |                 kernel='precomputed',
276 |                 cache_size=1.1 * 4.0 * g_trn.size / (1 << 20),
277 |                 max_iter=0.5 * len(g_trn) ** 2,  # XXX: COMPLETE HEURISTIC GUESS
278 |                 )
279 | 
280 |             TINY = 1e-8
281 |             def nogood(msg):
282 |                 logger.debug('f(%s) -> "%s"' % (ws, msg))
283 |                 return dict(loss=1.0, status='ok', svm=svm, fit=False)
284 | 
285 |             if not np.all(np.isfinite(g_trn)):
286 |                 return nogood('non-finite gram matrix (train)')
287 |             if np.all(abs(g_trn) < TINY):
288 |                 return nogood('null gram matrix (train)')
289 |             if not np.all(np.isfinite(g_val)):
290 |                 return nogood('non-finite gram matrix (valid)')
291 |             if np.all(abs(g_val) < TINY):
292 |                 return nogood('null gram matrix (valid)')
293 | 
294 |             svm.fit(g_trn, labels_trn)
295 |             logger.debug('done!')
296 |             pred_val = svm.predict(g_val)
297 |             assert labels_val.shape == pred_val.shape
298 |             err_rate = np.mean(labels_val != pred_val)
299 |             # XXX: to break ties, take smaller weights
300 |             rval = err_rate + 1e-4 * np.log1p(np.sum(ws))
301 |             logger.debug('f(%s) -> %f -> %f' % (ws, err_rate, rval))
302 |             return dict(loss=rval, status='ok', svm=svm, fit=True)
303 | 
304 |         try:
305 |             # -- This optimizer seems a little bit less finicky than the
306 |             # scipy ones (!?) such as anneal (doesn't respect lower bound,
307 |             # or maxeval) and slsqp (xvalid cost is not continuous)... but
308 |             # -- TODO: when a GP-based optimizer is in hyperopt, that would
309 |             # probably work even better.
310 |             # -- Note: I emailed Jasper about pushing his GP impl to sklearn
311 |             level = logging.getLogger('hyperopt').level
312 |             logging.getLogger('hyperopt').setLevel(logging.WARN)
313 |             trials = hyperopt.Trials()
314 |             domain = [
315 |                 hyperopt.hp.lognormal(
316 |                     str(m), np.log(self._weights[m]), np.log(scales[m]))
317 |                 for m in members]
318 |             hyperopt.fmin(
319 |                 eval_weights,
320 |                 space=domain,
321 |                 trials=trials,
322 |                 max_evals=max_evals,
323 |                 algo=algo,
324 |                 )
325 |         finally:
326 |             logging.getLogger('hyperopt').setLevel(level)
327 |         final_weights = trials.argmin
328 | 
329 |         if not trials.best_trial['result']['fit']:
330 |             # -- meant to be caught by slm.py call_catching_pipeline_errors()
331 |             raise ValueError('Failed to fit SVM (non-finite features)')
332 |         self._svm = trials.best_trial['result']['svm']
333 |         self._weights = final_weights
334 | 
335 |     def predict(self, test_sample):
336 |         g = self.weighted_gram(test_sample, self.train_sample)
337 |         return self._svm.predict(g)
338 | 
339 |     def error_rate(self, test_sample):
340 |         preds = self.predict(test_sample)
341 |         rval = np.mean(preds != self._labels[test_sample])
342 |         return rval
343 | 
344 |     def print_summary(self):
345 |         print 'EnsembleSVC.print_summary()'
346 |         print '  weights', self._weights
347 | 
348 | 


--------------------------------------------------------------------------------
/hpconvnet/slm_visitor.py:
--------------------------------------------------------------------------------
  1 | import functools
  2 | import logging
  3 | import time
  4 | 
  5 | import numpy as np
  6 | 
  7 | import skdata.base
  8 | from skdata.larray import cache_memmap
  9 | from skdata.larray import lmap
 10 | from skdata.larray import lmap_info
 11 | from skdata.larray import reindex
 12 | 
 13 | from hyperopt.pyll import scope # TODO remove-me
 14 | import foobar
 15 | 
 16 | from .pyll_slm import average_row_l2norm
 17 | from .pyll_slm import pyll_theano_batched_lmap # TODO: CLEAN UP
 18 | from .pyll_slm import flatten_elems
 19 | 
 20 | from .isvm_boosting import BoostHelper
 21 | 
 22 | from .utils import mean_and_std
 23 | import comparisons
 24 | 
 25 | info = logging.getLogger(__name__).info
 26 | warn = logging.getLogger(__name__).warn
 27 | 
 28 | 
 29 | class SLM_Visitor(skdata.base.LearningAlgo):
 30 |     """
 31 |     This class takes an *evaluated* pipeline as a starting point,
 32 |     and applies it to a data set.  Nothing symbolic here.
 33 |     """
 34 |     def __init__(self, pipeline, ctrl, data_view,
 35 |         max_n_features,
 36 |         batchsize,
 37 |         memmap_name='',
 38 |         badfit_thresh=float('inf'),
 39 |         memmap_del_atexit=True,
 40 |         batched_lmap_speed_thresh=None,
 41 |         comparison_names=('mult', 'absdiff', 'sqrtabsdiff', 'sqdiff'),
 42 |         ):
 43 |         self.pipeline = pipeline
 44 |         self.ctrl = ctrl
 45 |         self.data_view = data_view
 46 |         self.memmap_name = memmap_name
 47 |         self.max_n_features = max_n_features
 48 |         self.badfit_thresh = badfit_thresh
 49 |         self.memmap_del_atexit = memmap_del_atexit
 50 |         self.batched_lmap_speed_thresh = batched_lmap_speed_thresh
 51 |         self.batchsize = batchsize
 52 | 
 53 |         self.image_features = {}
 54 |         self.comparison_names = comparison_names
 55 | 
 56 |     def get_image_features_lmap(self, images, batched_lmap_speed_thresh=None):
 57 |         N, H, W, C = images.shape
 58 |         assert C in (1, 3)
 59 |         # -- this loading must be simple, and match the unsup_images
 60 |         # function in lfw. Anything more elaborate must
 61 |         # be included in the pyll pipeline
 62 |         chmajor_fn = functools.partial(np.transpose, axes=(2, 0, 1))
 63 |         chmajor_fn = lmap_info(
 64 |             shape=(C, H, W),
 65 |             dtype=images.dtype
 66 |             )(chmajor_fn)
 67 |         def chmajor_fn_f_map(X):
 68 |             return np.transpose(X, axes=(0, 3, 1, 2))
 69 |         chmajor_fn.f_map = chmajor_fn_f_map
 70 | 
 71 |         rval = pyll_theano_batched_lmap(
 72 |                 scope.partial(scope.callpipe1, self.pipeline['pipe']),
 73 |                 lmap(chmajor_fn, images),
 74 |                 batchsize=self.batchsize,
 75 |                 print_progress_every=10,  # -- seconds
 76 |                 abort_on_rows_larger_than=self.max_n_features,
 77 |                 speed_thresh=batched_lmap_speed_thresh,
 78 |                 x_dtype='uint8', # HAS TO MATCH ./slm.py
 79 |                 )
 80 |         return rval
 81 | 
 82 |     # XXX ugly function, refactor with normalized_image_match_features
 83 |     #     we don't even use the "cdict" anymore and I think I manually
 84 |     #     clear the self.image_features dict after calling!
 85 |     def get_image_features(self, task, batched_lmap_speed_thresh=None):
 86 |         if batched_lmap_speed_thresh is None:
 87 |             batched_lmap_speed_thresh = self.batched_lmap_speed_thresh
 88 |         images = task.images
 89 |         try:
 90 |             rval, _images, cdict = self.image_features[images]
 91 |             # -- currently it is true that all tasks should be indexing into
 92 |             # -- the same set of images. Later when this is not the case,
 93 |             # -- delete this over-strict check.
 94 |             assert _images is images
 95 |         except KeyError:
 96 |             feature_lmap = self.get_image_features_lmap(task.images,
 97 |                     batched_lmap_speed_thresh)
 98 | 
 99 |             rval = cache_memmap(
100 |                 feature_lmap,
101 |                 self.memmap_name + '_image_features_' + task.name,
102 |                 del_atexit=self.memmap_del_atexit)
103 | 
104 |             foobar.append_ndarray_signature(rval[0],
105 |                 'get_image_features features 0', task.name)
106 |             foobar.append_ndarray_signature(rval[100],
107 |                 'get_image_features features 100', task.name)
108 | 
109 |             cdict = {}
110 |             self.image_features[images] = rval, images, cdict
111 |         return rval, cdict
112 | 
113 |     def normalized_image_features(self, images, xmean, xstd, avg_nrm,
114 |         n_rows_to_estimate_stats=1000,
115 |         flatten=True,
116 |         batched_lmap_speed_thresh=None,
117 |         ):
118 |         """
119 |         svm_dct - dict
120 |             dict of parameters for normalization:
121 |                 'remove_std0'
122 |                 'varthresh'
123 |                 'divrowl2'
124 |             write xmean, xstd if role is 'train'
125 |             read xmean and xstd if role is 'test'
126 |         role - str
127 |             either 'train' or 'test'
128 |         n_rows_to_estimate_stats - bool
129 |             estimate xmean and xstd from the first N feature vectors
130 |         flatten - bool
131 |             return features flattened to vectors
132 |         """
133 | 
134 |         if not flatten:
135 |             raise NotImplementedError('only flatten is implemented')
136 | 
137 |         pipeline = self.pipeline
138 |         features_lmap = self.get_image_features_lmap(images)
139 | 
140 |         n_features = np.prod(features_lmap.shape[1:])
141 | 
142 |         if xmean is None:
143 |             # -- load enough training data into memory to estimate stats
144 |             cache_train = flatten_elems(
145 |                 features_lmap[:n_rows_to_estimate_stats])
146 | 
147 |             xmean, xstd = mean_and_std(
148 |                 cache_train,
149 |                 remove_std0=pipeline['remove_std0'])
150 | 
151 |             xstd = np.sqrt(xstd ** 2 + pipeline['varthresh'])
152 | 
153 |             if pipeline['divrowl2']:
154 |                 avg_nrm = 1e-7 + average_row_l2norm(
155 |                     (cache_train - xmean) / xstd)
156 |             else:
157 |                 avg_nrm = 1
158 | 
159 |         def normalize(x):
160 |             return (x.flatten() - xmean) / (xstd * avg_nrm)
161 | 
162 |         def normalize_many(x):
163 |             return (x.reshape((len(x), -1)) - xmean) / (xstd * avg_nrm)
164 | 
165 |         normed_features = lmap(
166 |             lmap_info(
167 |                 shape=(n_features,),
168 |                 dtype=features_lmap.dtype)(normalize),
169 |             features_lmap,
170 |             ragged=False,
171 |             f_map=normalize_many)
172 | 
173 |         return normed_features, xmean, xstd, avg_nrm
174 | 
175 | 
176 | 
177 |     def normalized_image_match_features(self, task, svm_dct, role,
178 |             batched_lmap_speed_thresh=None):
179 |         assert role in ('train', 'test')
180 |         if batched_lmap_speed_thresh is None:
181 |             batched_lmap_speed_thresh = self.batched_lmap_speed_thresh
182 |         image_features, cdict = self.get_image_features(task,
183 |                 batched_lmap_speed_thresh=batched_lmap_speed_thresh)
184 |         del cdict # -- no longer used (waste of memory)
185 |         pipeline = self.pipeline
186 |         info('Indexing into image_features of shape %s' %
187 |                 str(image_features.shape))
188 | 
189 |         comps = [getattr(comparisons, cc)
190 |                 for cc in self.comparison_names]
191 |         n_features = np.prod(image_features.shape[1:])
192 |         n_trn = len(task.lidx)
193 | 
194 |         x_trn_shp = (n_trn, len(comps), n_features)
195 |         info('Allocating training ndarray of shape %s' % str(x_trn_shp))
196 |         x_trn = np.empty(x_trn_shp, dtype='float32')
197 | 
198 |         # -- pre-compute all of the image_features we will need
199 |         all_l_features = reindex(image_features, task.lidx)[:]
200 |         all_r_features = reindex(image_features, task.ridx)[:]
201 | 
202 |         all_l_features = all_l_features.reshape(len(all_l_features), -1)
203 |         all_r_features = all_r_features.reshape(len(all_r_features), -1)
204 | 
205 |         foobar.append_ndarray_signature(all_l_features,
206 |             'normalized_image_match l_features', task.name)
207 |         foobar.append_ndarray_signature(all_r_features,
208 |             'normalized_image_match r_features', task.name)
209 | 
210 |         if role == 'train':
211 |             if np.allclose(all_l_features.var(axis=0), 0.0):
212 |                 raise ValueError(
213 |                     'Homogeneous features (non-finite features)')
214 | 
215 |             xmean_l, xstd_l = mean_and_std(all_l_features,
216 |                     remove_std0=pipeline['remove_std0'])
217 |             xmean_r, xstd_r = mean_and_std(all_r_features,
218 |                     remove_std0=pipeline['remove_std0'])
219 |             xmean = (xmean_l + xmean_r) / 2.0
220 |             # -- this is an ad-hoc way of blending the variances.
221 |             xstd = np.sqrt(np.maximum(xstd_l, xstd_r) ** 2
222 |                            + pipeline['varthresh'])
223 | 
224 |             foobar.append_ndarray_signature(
225 |                 xmean, 'normalized_image_match xmean', task.name)
226 |             foobar.append_ndarray_signature(
227 |                 xstd, 'normalized_image_match xstd', task.name)
228 | 
229 | 
230 |             svm_dct['xmean'] = xmean
231 |             svm_dct['xstd'] = xstd
232 |         else:
233 |             xmean = svm_dct['xmean']
234 |             xstd = svm_dct['xstd']
235 | 
236 |         info('Computing comparison features')
237 | 
238 |         # -- now compute the "comparison functions" into x_trn
239 |         for jj, (lfeat, rfeat) in enumerate(
240 |                 zip(all_l_features, all_r_features)):
241 |             lfeat_z = (lfeat - xmean) / xstd
242 |             rfeat_z = (rfeat - xmean) / xstd
243 |             for ci, comp in enumerate(comps):
244 |                 x_trn[jj, ci, :] = comp(lfeat_z, rfeat_z)
245 | 
246 |         if pipeline['divrowl2']:
247 |             info('Dividing by feature norms')
248 |             # -- now normalize by average feature norm because some
249 |             #    comparison functions come out smaller than others
250 |             if role == 'train':
251 |                 svm_dct['divrowl2_avg_nrm'] = {}
252 |                 for ci, cname in enumerate(self.comparison_names):
253 |                     avg_nrm = average_row_l2norm(x_trn[:, ci, :]) + 1e-7
254 |                     svm_dct['divrowl2_avg_nrm'][cname] = avg_nrm
255 | 
256 |             avg_nrm_vec = [svm_dct['divrowl2_avg_nrm'][cname]
257 |                            for cname in self.comparison_names]
258 |             x_trn /= np.asarray(avg_nrm_vec)[None, :, None]
259 |             foobar.append_trace('get_normlized_features avg_nrm', avg_nrm_vec)
260 | 
261 |         # -- collapse comparison and feature dimensions
262 |         x_trn.shape = (x_trn.shape[0], x_trn.shape[1] * x_trn.shape[2])
263 | 
264 |         foobar.append_ndarray_signature(
265 |             x_trn, 'normalized_image_match x_trn', task.name)
266 |         info('normalized_image_match_features complete')
267 |         return x_trn
268 | 
269 |     def loss(self, model, task):
270 |         info('Score %s' % task.name)
271 |         semantics = task.semantics
272 |         methodname = 'loss_' + semantics
273 |         method = getattr(self, methodname)
274 |         loss = method(model, task)
275 |         return loss
276 | 
277 |     def best_model(self, train, valid=None):
278 |         semantics = train.semantics
279 |         # -- train the svm
280 |         info('BestModelByCrossValidation %s, %s' % (
281 |             train.name, getattr(valid, 'name', None)))
282 |         model = getattr(self, 'train_' + semantics)(train, valid)
283 |         return model
284 | 
285 |     def retrain_classifier(self, model, task):
286 |         info('RetrainClassifier %s' % task.name)
287 |         semantics = task.semantics
288 |         methodname = 'retrain_classifier_' + semantics
289 |         method = getattr(self, methodname)
290 |         new_model = method(model, task)
291 |         # -- measure the erate and compute the cur_xw values
292 |         getattr(self, 'loss_' + semantics)(new_model, task)
293 |         return new_model
294 | 
295 |     def _member_name(self, tid=None):
296 |         if tid is None:
297 |             if self.ctrl.current_trial is None:
298 |                 tid = 'debug'
299 |             else:
300 |                 tid = self.ctrl.current_trial['tid']
301 |         member_name = 'member_%s' % tid
302 |         return member_name
303 | 
304 |     def load_ensemble_history(self, fields):
305 | 
306 |         trials = self.ctrl.trials
307 |         if hasattr(trials, 'handle'):
308 |             # query mongodb directly to avoid transferring un-necessary fields
309 |             docs_for_bh = BoostHelper.query_MongoTrials(
310 |                 trials,
311 |                 fields=fields)
312 |             # download only those docs that are in the active history
313 |             trials.refresh_tids([d['tid'] for d in docs_for_bh])
314 |             # -- XXX: relatively arbitrary assert to make sure we didn't
315 |             # download a whole wack of documents... the point of
316 |             # refresh_tids is to avoid this.
317 |             assert len(trials.trials) < len(docs_for_bh) + 5, (
318 |                 len(trials.trials), len(docs_for_bh))
319 |         else:
320 |             trials.refresh()
321 |             docs_for_bh = trials.trials
322 | 
323 |         def helper():
324 |             bh = BoostHelper(docs_for_bh)
325 | 
326 |             if self.ctrl.current_trial is None:
327 |                 history = []
328 |             else:
329 |                 history = bh.history(self.ctrl.current_trial)
330 |                 assert history[-1] is self.ctrl.current_trial
331 |                 history.pop(-1)
332 |             info('load_ensemble_history: %i previous model documents found'
333 |                     % len(history))
334 |             return history
335 | 
336 |         retries = 20
337 |         while retries:
338 |             history = helper()
339 |             if any(trial['result'].get('in_progress') for trial in history):
340 |                 warn('Previous trial is still in progress, waiting 30s')
341 |                 time.sleep(30)
342 |                 retries -= 1
343 |             else:
344 |                 break
345 | 
346 |         foobar.append_trace('load ensemble history len', len(history))
347 | 
348 |         if retries:
349 |             self.history = history
350 |         else:
351 |             raise Exception('Previous trial in progress, cannot continue')
352 | 
353 | 


--------------------------------------------------------------------------------
/hpconvnet/slm_visitor_primal.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import cPickle
  3 | import functools
  4 | import logging
  5 | import os
  6 | 
  7 | import numpy as np
  8 | 
  9 | from hyperopt import pyll
 10 | import hyperopt
 11 | from hyperopt.base import use_obj_for_literal_in_memo
 12 | 
 13 | from skdata.larray import cache_memmap
 14 | 
 15 | #from .isvm_boosting import BoostHelper
 16 | from .isvm_multi import IncrementalMultiSVM
 17 | from .pyll_slm import error_rate
 18 | from .pyll_slm import view2_worth_calculating
 19 | from .slm_visitor import SLM_Visitor
 20 | from .slm import USLM_Exception
 21 | from .slm import call_catching_pipeline_errors
 22 | from .utils import git_versions
 23 | 
 24 | logger = logging.getLogger(__name__)
 25 | info = logger.info
 26 | warn = logger.warn
 27 | 
 28 | loads = cPickle.loads
 29 | dumps = functools.partial(cPickle.dumps, protocol=-1)
 30 | 
 31 | 
 32 | def recupdate(dct, path, payload):
 33 |     if path:
 34 |         if not isinstance(path[0], basestring):
 35 |             raise TypeError(path[0])
 36 |         dct.setdefault(path[0], {})
 37 |         return recupdate(dct[path[0]], path[1:], payload)
 38 |     else:
 39 |         dct.update(payload)
 40 |         return dct
 41 | 
 42 | 
 43 | class PrimalVisitor(SLM_Visitor):
 44 |     """
 45 |     An skdata-compatible learning algorithm that implements SVM with
 46 |     isvm_binary and isvm_multi.
 47 | 
 48 |     This class takes an *evaluated* pipeline as a starting point,
 49 |     and applies it to a data set.  Nothing symbolic here.
 50 |     """
 51 | 
 52 |     def __init__(self,
 53 |         optimize_l2_reg,
 54 |         thresh_loss=None,
 55 |         thresh_rank=10,
 56 |         badfit_thresh=float('inf'),
 57 |         svm_crossvalid_max_evals=20,
 58 |         **kwargs
 59 |         ):
 60 |         SLM_Visitor.__init__(self, **kwargs)
 61 | 
 62 |         self.optimize_l2_reg = optimize_l2_reg
 63 |         self.member_name = self._member_name()
 64 |         self.thresh_loss = thresh_loss
 65 |         self.thresh_rank = thresh_rank
 66 |         self.svm_crossvalid_max_evals = svm_crossvalid_max_evals
 67 |         self.badfit_thresh = badfit_thresh
 68 | 
 69 |         self._results = {}
 70 |         self._obj_results = {}
 71 | 
 72 |         if not self.optimize_l2_reg:
 73 |             raise NotImplementedError()
 74 | 
 75 |     def hyperopt_rval(self):
 76 |         return copy.deepcopy(self._results)
 77 | 
 78 |     def attach_obj_results(self):
 79 |         ctrl = self.ctrl
 80 |         if ctrl.current_trial is None:
 81 |             return
 82 |         att = ctrl.trials.trial_attachments(ctrl.current_trial)
 83 |         def helper(dct, prefix):
 84 |             for key, val in dct.items():
 85 |                 if isinstance(val, dict):
 86 |                     helper(val, '%s/%s' % (prefix, key))
 87 |                 else:
 88 |                     att['%s/%s' % (prefix, key)] = dumps(val)
 89 |         helper(self._obj_results, '')
 90 | 
 91 |     def trial_obj_attachment(self, trial, rpath):
 92 |         key = '/' + '/'.join(rpath)
 93 |         att = self.ctrl.trials.trial_attachments(trial)
 94 |         msg = att[key]
 95 |         rval = loads(msg)
 96 |         return rval
 97 | 
 98 |     def add_results(self, path, simple, objs):
 99 |         for key, val in objs.items():
100 |             if isinstance(val, dict):
101 |                 raise TypeError('cannot attach a dict', key)
102 |         recupdate(self._results, path, simple)
103 |         recupdate(self._obj_results, path, objs)
104 | 
105 |     def load_history(self):
106 |         raise NotImplementedError()
107 | 
108 |     def load_svm(self, train_name, valid_name,
109 |                       n_features, n_classes, l2_reg):
110 |         if self.history:
111 |             prev_doc = self.history[-1]
112 |             info('load_svm: %i previous model documents found'
113 |                     % len(self.history))
114 |             info('load_svm: Most-previous model document tid: %s'
115 |                     % prev_doc['tid'])
116 |             att = self.ctrl.trials.trial_attachments(prev_doc)
117 |             prev_svm = self.trial_obj_attachment(prev_doc,
118 |                 ['train_indexed_image_classification', train_name,
119 |                     valid_name, 'model'])
120 |             svm = prev_svm.continuation(n_features, l2_reg)
121 |             info('load_svm: alpha shape %s'
122 |                     % str(svm.alpha.shape))
123 |             info('load_svm: prev_w_l2_sqr shape %s'
124 |                     % str(svm.prev_w_l2_sqr.shape))
125 |         else:
126 |             info('load_svm: No previous model document found')
127 |             info('load_svm: Allocating SVM for %i x %i problem'
128 |                     % (n_features, n_classes))
129 |             svm = IncrementalMultiSVM(n_features, n_classes,
130 |                     l2_regularization=l2_reg,
131 |                     dtype='float32',
132 |                     # --  TODO consider maxfun, M, tolerances?
133 |                     bfgs_kwargs={
134 |                         'maxfun': 1000,
135 |                         'iprint': 0,
136 |                         'm': 32,
137 |                         'factr': 100},
138 |                     print_interval=5000,
139 |                     n_sgd_iters=0,
140 |                     badfit_thresh=self.badfit_thresh,
141 |                     )
142 |         return svm
143 | 
144 |     def load_prev_xw(self, task_name, train_name, valid_name, use_history):
145 |         assert use_history in ('using_history', 'not_using_history')
146 |         if not self.history:
147 |             info('load_prev_xw: No previous model documents for %s/%s/%s'
148 |                     % (task_name, train_name, valid_name))
149 |             return None
150 |         prev_xw_list = []
151 |         for pm_doc in self.history:
152 |             info('load_prev_xw doc %i loss %f' % (
153 |                 pm_doc['tid'], pm_doc['result']['loss']))
154 |             xw = self.trial_obj_attachment(pm_doc,
155 |                     ['loss_indexed_image_classification',
156 |                         task_name, train_name, valid_name, use_history, 'xw'])
157 |             prev_xw_list.append(xw.astype('float32'))
158 | 
159 |         info('load_prev_xw: %i previous model documents found'
160 |                 % len(prev_xw_list))
161 |         # -- put them into desired shape: (examples, classes, models)
162 |         prev_xw = np.asarray(prev_xw_list).transpose(1, 2, 0).copy()
163 |         return prev_xw
164 | 
165 |     def train_indexed_image_classification(self, train, valid=None):
166 | 
167 |         if valid is None:
168 |             train_name = train.name
169 |             valid_name = 'None'
170 |         else:
171 |             train_name = train.name
172 |             valid_name = valid.name
173 |             assert train.all_images is valid.all_images
174 |             assert train.all_labels is valid.all_labels
175 | 
176 |         info('train_indexed_image_classification: %s/%s' % (
177 |             train_name, valid_name))
178 | 
179 |         normed_features, xmean, xstd, avg_nrm = \
180 |             self.normalized_image_features(
181 |                 train.all_images, None, None, None, flatten=True)
182 | 
183 |         assert train.name is not None
184 | 
185 |         if hasattr(self, 'cmemmap'):
186 |             assert train.all_images is self.cmemmap_all_images
187 |         else:
188 |             self.cmemmap_all_images = train.all_images
189 |             self.cmemmap = cache_memmap(
190 |                 normed_features,
191 |                 self.memmap_name,
192 |                 del_atexit=True)
193 | 
194 |         if not hasattr(self, 'history'):
195 |             self.load_ensemble_history(fields=[])
196 | 
197 |         svm = self.load_svm(
198 |             train_name, valid_name, self.cmemmap.shape[1],
199 |             train.n_classes, self.pipeline['l2_reg'])
200 |         svm.feature_xmean = xmean
201 |         svm.feature_xstd = xstd
202 |         svm.feature_avg_nrm = avg_nrm
203 |         svm.train_name = train_name
204 |         svm.valid_name = valid_name
205 | 
206 |         prev_xw_trn = self.load_prev_xw(
207 |             train_name, train_name, valid_name, use_history='using_history')
208 | 
209 |         info('train_indexed_image_classification: Fitting SVM with prev_xw')
210 |         svm.fit(self.cmemmap[train.idxs],
211 |                 train.all_labels[train.idxs],
212 |                 prev_xw_trn)
213 | 
214 |         info('-> loaded alpha %s' % str(svm.alpha))
215 |         info('-> loaded prvl2 %s' % str(svm.prev_l2_regularization))
216 |         info('-> loaded prvw2 %s' % str(svm.prev_w_l2_sqr))
217 | 
218 |         if valid is None:
219 |             # -- XXX: it is currently a hack to use the existence
220 |             #    of the validation set to decide when to compute
221 |             #    an svm without the history features... it currently
222 |             #    so happens that for the fit/val split we have a validation
223 |             #    set and we want to train both ways, and for the sel/test
224 |             #    split we do not have a validation set and we only want the
225 |             #    fit-with-history training.
226 |             assert train.name == 'sel'
227 |             svm0 = None
228 |         else:
229 |             svm0 = copy.deepcopy(svm)
230 |             if (prev_xw_trn is not None) and prev_xw_trn.size:
231 |                 info('Fitting SVM without prev_xw')
232 |                 svm0.fit(self.cmemmap[train.idxs],
233 |                          train.all_labels[train.idxs],
234 |                          np.zeros_like(prev_xw_trn))
235 |         self.add_results(
236 |             [
237 |             'train_indexed_image_classification',
238 |             train_name,
239 |             valid_name,
240 |             ],
241 |             {
242 |             'train_name': train_name,
243 |             'valid used': (valid is not None),
244 |             'valid_name': valid_name,
245 |             },
246 |             {
247 |             'model0': svm0,
248 |             'model': svm,
249 |             })
250 | 
251 |         self.loss_indexed_image_classification(svm, train)
252 |         if valid is not None:
253 |             self.loss_indexed_image_classification(svm, valid)
254 |             self.loss_indexed_image_classification(svm0, valid,
255 |                 use_history='not_using_history')
256 | 
257 |         return svm
258 | 
259 |     def loss_indexed_image_classification(self, model, task,
260 |         use_history='using_history'):
261 |         assert task.name
262 | 
263 |         # -- N.B. using_history here, because we want to build on the models
264 |         # that *were* using history
265 |         prev_xw = self.load_prev_xw(task.name,
266 |                 model.train_name, model.valid_name,
267 |                 use_history='using_history')
268 | 
269 |         x = self.cmemmap[task.idxs]
270 | 
271 |         if (use_history == 'using_history') or (prev_xw is None):
272 |             preds = model.predict(x, prev_xw)
273 |         else:
274 |             preds = model.predict(x, np.zeros_like(prev_xw))
275 |         erate = error_rate(preds, task.all_labels[task.idxs])
276 |         xw = np.dot(x, model.weights)
277 | 
278 |         assert preds.min() >= 0
279 |         if preds.max() < 256:
280 |             preds = preds.astype('uint8')
281 |         if '64' in str(xw.dtype):
282 |             xw = xw.astype('float32')
283 | 
284 |         self.add_results(
285 |           ['loss_indexed_image_classification',
286 |            task.name,
287 |            model.train_name,
288 |            model.valid_name,
289 |            use_history,
290 |           ],
291 |           {'erate': erate,
292 |            'task_name': task.name,
293 |            'train_name': model.train_name,
294 |            'valid_name': model.valid_name,
295 |            'use_history': use_history,
296 |           },
297 |           {
298 |            'preds': preds,
299 |            'xw': xw,
300 |           })
301 | 
302 |         info('loss: ERR RATE %s = %f' % (task.name, erate))
303 |         info('loss: XW STATS %f %f %f %s' %
304 |                 (xw.min(), xw.mean(), xw.max(), xw.shape))
305 | 
306 |         return erate
307 | 
308 | 
309 | # -- this helper is called by mnist and svhn as well
310 | def uslm_eval_helper(
311 |     expr,
312 |     memo,
313 |     ctrl,
314 |     data_fraction,
315 |     assume_promising,
316 |     data_view,
317 |     memmap_name_template,
318 |     DataView,
319 |     loss_fn,
320 |     true_loss_fn,
321 |     ):
322 | 
323 |     use_obj_for_literal_in_memo(expr, data_view, DataView, memo)
324 |     versions = git_versions()
325 |     logger.info('GIT VERSIONS: %s' % str(versions))
326 | 
327 |     def exception_thrower():
328 |         argdict = pyll.rec_eval(expr, memo=memo, print_node_on_error=False)
329 |         visitor = PrimalVisitor(
330 |             pipeline=argdict['pipeline'],
331 |             ctrl=argdict['ctrl'],
332 |             data_view=argdict['data_view'],
333 |             max_n_features=argdict['max_n_features'],
334 |             # TODO: just pass memmap_name directly
335 |             memmap_name=memmap_name_template % (os.getpid(),
336 |                                            np.random.randint(10000)),
337 |             thresh_rank=1,
338 |             optimize_l2_reg=True,
339 |             batched_lmap_speed_thresh=argdict[
340 |                 'batched_lmap_speed_thresh'],
341 |             badfit_thresh=None,
342 |             batchsize=argdict['batchsize'],
343 |             )
344 | 
345 |         protocol_iter = argdict['data_view'].protocol_iter(visitor)
346 |         msg, model = protocol_iter.next()
347 |         assert msg == 'model validation complete'
348 | 
349 |         # -- save the loss, but don't save attachments yet.
350 |         rdict = visitor.hyperopt_rval()
351 |         rdict['loss'] = loss_fn(visitor, argdict['bagging_fraction'])
352 |         rdict['in_progress'] = True
353 |         rdict['status'] = hyperopt.STATUS_OK
354 |         argdict['ctrl'].checkpoint(rdict)
355 | 
356 |         if assume_promising:
357 |             promising = True
358 |         else:
359 |             promising = view2_worth_calculating(
360 |                 loss=rdict['loss'],
361 |                 ctrl=argdict['ctrl'],
362 |                 thresh_loss=1.0,
363 |                 thresh_rank=1)
364 | 
365 |         logger.info('Promising: %s' % promising)
366 |         if promising:
367 |             msg, model2 = protocol_iter.next()
368 |             assert msg == 'model testing complete'
369 |             rdict = visitor.hyperopt_rval()
370 |             rdict['loss'] = loss_fn(visitor, argdict['bagging_fraction'])
371 |             rdict['true_loss'] = true_loss_fn(visitor)
372 |             visitor.attach_obj_results()
373 |         else:
374 |             logger.warn('Not testing unpromising model %s' % str(model))
375 |             del rdict['in_progress']
376 |         return visitor, rdict
377 | 
378 |     try:
379 |         visitor, rdict = call_catching_pipeline_errors(exception_thrower)
380 |     except USLM_Exception, e:
381 |         exc, rdict = e.args
382 |         logger.info('job failed: %s: %s' % (type(e), exc))
383 |     rdict['git_versions'] = versions
384 |     return rdict
385 | 
386 | 


--------------------------------------------------------------------------------
/hpconvnet/slm_visitor_esvc.py:
--------------------------------------------------------------------------------
  1 | import cPickle
  2 | import copy
  3 | import logging
  4 | import os
  5 | import time
  6 | 
  7 | from hyperopt.base import SONify
  8 | 
  9 | from .isvm_precomputed import EnsembleSVC
 10 | from .slm_visitor import SLM_Visitor
 11 | from .utils import loads_gram, dumps_gram
 12 | from .pyll_slm import error_rate
 13 | import foobar
 14 | 
 15 | import skdata.data_home
 16 | 
 17 | debug = logging.getLogger(__name__).debug
 18 | info = logging.getLogger(__name__).info
 19 | warn = logging.getLogger(__name__).warn
 20 | 
 21 | _curdb = 'curdb' # XXX: terrible hack :(
 22 | # _curdb is an abstraction leak -- MongoTrials has failed here.
 23 | # we set it from lfw.py
 24 | 
 25 | 
 26 | def cached_gram_load(tid, att_key):
 27 |     data_home = skdata.data_home.get_data_home()
 28 |     datafilename = os.path.join(data_home,
 29 |                  'hpconvnet', 'slm_visitor_esvc', _curdb, str(tid), att_key)
 30 |     return open(datafilename).read()
 31 | 
 32 | 
 33 | def cached_gram_save(tid, att_key, data):
 34 |     data_home = skdata.data_home.get_data_home()
 35 |     cachedir = os.path.join(data_home,
 36 |                  'hpconvnet', 'slm_visitor_esvc', _curdb, str(tid))
 37 |     datafilename = os.path.join(cachedir, att_key)
 38 |     info('Caching gram data %i/%s' % (tid, att_key))
 39 |     if not os.path.exists(cachedir):
 40 |         os.makedirs(cachedir)
 41 |     datafile = open(datafilename, 'w+')
 42 |     datafile.write(data)
 43 |     datafile.close()
 44 | 
 45 | 
 46 | class ESVC_SLM_Visitor(SLM_Visitor):
 47 |     """
 48 |     Use an EnsembleSVC classifier, suitable for datasets with not too many
 49 |     examples (< 20000) and binary labels.
 50 |     """
 51 |     def __init__(self,
 52 |             optimize_l2_reg=False,
 53 |             svm_crossvalid_max_evals=20,
 54 |             **kwargs):
 55 |         SLM_Visitor.__init__(self, **kwargs)
 56 |         self.optimize_l2_reg = optimize_l2_reg
 57 |         self.member_name = self._member_name()
 58 |         self.svm_crossvalid_max_evals = svm_crossvalid_max_evals
 59 | 
 60 |         self._results = {
 61 |             'train_image_match_indexed': {},
 62 |             'retrain_classifier_image_match_indexed': {},
 63 |             'loss_image_match_indexed': {},
 64 |         }
 65 | 
 66 |         if not self.optimize_l2_reg:
 67 |             raise NotImplementedError()
 68 | 
 69 |     def norm_key(self, sample, tid=None):
 70 |         if tid is None:
 71 |             member_name = self.member_name
 72 |         else:
 73 |             member_name = self._member_name(tid)
 74 |         norm_key = 'nkey_%s_%s' % (member_name, sample)
 75 |         return norm_key
 76 | 
 77 |     def load_ensemble_weights(self, norm_sample, task_name, ens):
 78 |         # -- load the weights from the most recent ensemble, if there is one.
 79 |         for trial in self.history[-1:]:
 80 |             info('Loading weights from document %i' % trial['tid'])
 81 |             trial_norm_key = self.norm_key(norm_sample, tid=trial['tid'])
 82 |             trial_weights = trial['result']['weights']
 83 |             norm_task_weights = trial_weights[trial_norm_key][task_name]
 84 |             for norm_key, weight in norm_task_weights.items():
 85 |                 if ens.has_member(norm_key):
 86 |                     ens.set_weight(norm_key, weight)
 87 |                 else:
 88 |                     ens.add_member(norm_key, weight)
 89 |                 info(' .. weight[%s] = %s' % (norm_key, weight))
 90 |                 foobar.append_trace('load ensemble weights', norm_key, weight)
 91 | 
 92 |     def load_ensemble_grams(self, norm_sample, ens, sample1, sample2):
 93 |         trial_attachments = self.ctrl.trials.trial_attachments
 94 | 
 95 |         # -- load the gram matrices saved by each ensemble member
 96 |         for trial in self.history:
 97 |             trial_norm_key = self.norm_key(norm_sample, tid=trial['tid'])
 98 |             info('Loading grams from document %i' % trial['tid'])
 99 |             debug(' .. saved_grams: %s' %
100 |                     str(trial['result']['grams'][trial_norm_key]))
101 |             for (s1, s2) in trial['result']['grams'][trial_norm_key]:
102 |                 if set([sample1, sample2]) == set([s1, s2]):
103 |                     if not ens.has_gram(trial_norm_key, s1, s2):
104 |                         att_key = 'gram_%s_%s_%s.pkl' % (trial_norm_key, s1, s2)
105 |                         info('retrieving gram_data %i:%s'
106 |                              % (trial['tid'], att_key))
107 |                         try:
108 |                             gram_data = cached_gram_load(trial['tid'], att_key)
109 |                         except IOError:
110 |                             gram_data = trial_attachments(trial)[att_key]
111 |                             cached_gram_save(trial['tid'], att_key, gram_data)
112 |                         info('retrieved %i bytes' % len(gram_data))
113 |                         gram = loads_gram(gram_data)
114 |                         if s1 == sample1:
115 |                             ens.add_gram(trial_norm_key, sample1, sample2, gram)
116 |                         else:
117 |                             ens.add_gram(trial_norm_key, sample1, sample2,
118 |                                     gram.T)
119 |                         foobar.append_ndarray_signature(
120 |                             gram,
121 |                             'load gram', trial_norm_key, sample1, sample2)
122 |             info('Loading grams done')
123 | 
124 |     def hyperopt_rval(self, save_grams):
125 |         rval = copy.deepcopy(self._results)
126 |         rval['attachments'] = {}
127 |         rval['grams'] = {}
128 |         rval['weights'] = {}
129 |         rval['trace'] = copy.deepcopy(foobar._trace)
130 | 
131 |         saved = set()
132 | 
133 |         def jsonify_train_results(rkey):
134 |             for norm_key in rval[rkey]:
135 | 
136 |                 for task_name in rval[rkey][norm_key]:
137 |                     svm_dct = rval[rkey][norm_key][task_name]
138 |                     ens = svm_dct.pop('ens')
139 | 
140 |                     rval['weights'].setdefault(norm_key, {})
141 |                     rval['weights'][norm_key][task_name] = ens._weights
142 | 
143 |                     # -- stash these as attachments because they fill up the db.
144 |                     xmean = svm_dct.pop('xmean')
145 |                     xstd = svm_dct.pop('xstd')
146 | 
147 |                     if save_grams:
148 |                         xmean_key = 'xmean_%s_%s_%s' % (rkey, norm_key, task_name)
149 |                         xstd_key = 'xstd_%s_%s_%s' % (rkey, norm_key, task_name)
150 |                         rval['attachments'][xmean_key] = cPickle.dumps(xmean, -1)
151 |                         rval['attachments'][xstd_key] = cPickle.dumps(xstd, -1)
152 | 
153 |                         rval['grams'].setdefault(norm_key, [])
154 |                         for (inorm_key, sample1, sample2) in ens._grams:
155 |                             if inorm_key != norm_key:
156 |                                 # -- we're only interested in saving the grams
157 |                                 # calculated by this run.
158 |                                 continue
159 |                             if (norm_key, sample1, sample2) in saved:
160 |                                 # -- already saved this one
161 |                                 continue
162 | 
163 |                             att_key = 'gram_%s_%s_%s.pkl' % (
164 |                                     norm_key, sample1, sample2)
165 | 
166 |                             info('saving %s' % att_key)
167 | 
168 |                             gram = ens._grams[(norm_key, sample1, sample2)]
169 |                             rval['attachments'][att_key] = dumps_gram(
170 |                                 gram.astype('float32'))
171 | 
172 |                             rval['grams'][norm_key].append((sample1, sample2))
173 | 
174 |                             saved.add((norm_key, sample1, sample2))
175 |                             saved.add((norm_key, sample2, sample1))
176 | 
177 |         jsonify_train_results('train_image_match_indexed')
178 |         jsonify_train_results('retrain_classifier_image_match_indexed')
179 | 
180 |         return SONify(rval)
181 | 
182 |     def forget_task(self, task_name):
183 | 
184 |         # free up RAM by deleting all features computed for task_name
185 |         def delete_features(rkey):
186 |             for norm_key in self._results[rkey]:
187 |                 if task_name in self._results[rkey][norm_key]:
188 |                     svm_dct = self._results[rkey][norm_key][task_name]
189 |                     svm_dct['ens'].del_features(norm_key, task_name)
190 | 
191 |         delete_features('train_image_match_indexed')
192 |         delete_features('retrain_classifier_image_match_indexed')
193 | 
194 |     def train_image_match_indexed(self, task, valid=None):
195 | 
196 |         pipeline = self.pipeline
197 | 
198 |         info('training svm on %s' % task.name)
199 |         ens = EnsembleSVC(task.name)
200 | 
201 |         norm_task = task.name
202 |         norm_key = self.norm_key(norm_task)
203 |         svm_dct = {
204 |                 'ens': ens,
205 |                 'norm_key': norm_key,
206 |                 'norm_task': task.name,
207 |                 'task_name': task.name,
208 |                 }
209 | 
210 |         ens.add_member(norm_key)
211 |         ens.add_sample(task.name, task.y)
212 |         x_trn = self.normalized_image_match_features(task, svm_dct,
213 |                 role='train')
214 |         ens.add_features(norm_key, task.name, x_trn)
215 | 
216 |         foobar.append_ndarray_signature(x_trn,
217 |             'train_image x_trn', norm_key, task.name)
218 | 
219 |         info('computing gram: %s / %s / %s' % (
220 |             norm_key, task.name, task.name))
221 |         ens.compute_gram(norm_key, task.name, task.name, dtype='float32')
222 | 
223 |         foobar.append_ndarray_signature(
224 |             ens._grams[(norm_key, task.name, task.name)],
225 |             'train_image train_gram', norm_key, task.name)
226 | 
227 |         if valid is not None:
228 |             info('cross-validating svm on %s' % valid.name)
229 |             x_val = self.normalized_image_match_features(valid, svm_dct,
230 |                     role='test',
231 |                     # -- assume that slow features were caught earlier
232 |                     batched_lmap_speed_thresh={'seconds': 30, 'elements': 1},
233 |                     )
234 |             foobar.append_ndarray_signature(
235 |                 x_val,
236 |                 'train_image x_val', norm_key, valid.name, task.name)
237 | 
238 |             ens.add_sample(valid.name, valid.y)
239 |             ens.add_features(norm_key, valid.name, x_val)
240 | 
241 |             info('computing gram: %s / %s / %s' % (
242 |                 norm_key, valid.name, task.name))
243 |             ens.compute_gram(norm_key, valid.name, task.name, dtype='float32')
244 |             foobar.append_ndarray_signature(
245 |                 ens._grams[(norm_key, valid.name, task.name)],
246 |                 'train_image valid_gram', norm_key, valid.name, task.name)
247 | 
248 |             # -- re-fit the model using best weights on train + valid sets
249 |             info('computing gram: %s / %s / %s' % (
250 |                 norm_key, valid.name, valid.name))
251 |             ens.compute_gram(norm_key, valid.name, valid.name, dtype='float32')
252 | 
253 |             train_valid = '%s_%s' % (task.name, valid.name)
254 |             ens.add_compound_sample(train_valid, [task.name, valid.name])
255 | 
256 | 
257 |         def load_history():
258 |             info('loading history')
259 |             self.load_ensemble_history(
260 |                 fields=['result.weights','result.grams'])
261 |             self.load_ensemble_weights(norm_task, task.name, ens)
262 |             self.load_ensemble_grams(norm_task, ens, task.name, task.name)
263 |             if valid is not None:
264 |                 self.load_ensemble_grams(norm_task, ens, valid.name, task.name)
265 |                 self.load_ensemble_grams(norm_task, ens, valid.name, valid.name)
266 | 
267 | 
268 |         def train_main():
269 |             ens.train_sample = task.name
270 | 
271 |             t0 = time.time()
272 |             if valid is None:
273 |                 svm_dct['l2_reg'] = pipeline['l2_reg']
274 |                 ens.fit_svm(svm_dct['l2_reg'])
275 |                 svm_dct['train_error'] = ens.error_rate(task.name)
276 |                 svm_dct['loss'] = svm_dct['train_error']
277 |             else:
278 | 
279 |                 #scales = {m: 3.0 for m in ens._weights}
280 |                 scales = dict([(m, 3.0) for m in ens._weights])
281 |                 scales[norm_key] = 100.0
282 | 
283 |                 info('fit_weights_crossvalid(%s, %i)' % (
284 |                     valid.name, self.svm_crossvalid_max_evals))
285 |                 ens.fit_weights_crossvalid(valid.name,
286 |                         max_evals=self.svm_crossvalid_max_evals,
287 |                         scales=scales)
288 | 
289 |                 foobar.append_trace('xvalid weights', sorted(ens._weights.items()))
290 | 
291 |                 svm_dct['task_error'] = ens.error_rate(task.name)
292 |                 foobar.append_trace('task_error', svm_dct['task_error'])
293 | 
294 |                 svm_dct['valid_name'] = valid.name
295 |                 svm_dct['valid_error'] = ens.error_rate(valid.name)
296 |                 info('valid_error %f' % svm_dct['valid_error'])
297 |                 foobar.append_trace('valid_error', svm_dct['valid_error'])
298 | 
299 |                 svm_dct['l2_reg'] = None  # -- use default when retraining
300 | 
301 |                 # -- re-fit the model using best weights on train + valid sets
302 |                 ens.train_sample = train_valid
303 |                 ens.fit_svm()
304 | 
305 |             fit_time = time.time() - t0
306 |             svm_dct['fit_time'] = fit_time
307 | 
308 | 
309 |         info('training with just the current features...')
310 |         train_main()
311 |         svm_dct['task_error_no_ensemble'] = svm_dct['task_error']
312 |         svm_dct['valid_error_no_ensemble'] = svm_dct['valid_error']
313 | 
314 |         load_history()
315 |         if self.history:
316 |             info('training the full ensemble...')
317 |             train_main()
318 | 
319 |         try:
320 |             print_summary = ens.print_summary
321 |         except AttributeError:
322 |             print_summary = lambda : None
323 | 
324 |         print_summary()
325 | 
326 |         dct = self._results['train_image_match_indexed']
327 |         dct.setdefault(norm_key, {})
328 |         if task.name in dct[norm_key]:
329 |             warn('Overwriting train_image_match_indexed result: %s'
330 |                  % task.name)
331 |         dct[norm_key][task.name] = svm_dct
332 | 
333 |         return svm_dct
334 | 
335 |     def retrain_classifier_image_match_indexed(self, model, task):
336 |         # We are making the decision that retraining a classifier means not
337 |         # retraining the weights or the features, but just retraining the
338 |         # libsvm part.
339 | 
340 |         ens = model['ens'].copy()
341 |         ens.train_sample = task.name
342 |         svm_dct = dict(
343 |                 ens=ens,
344 |                 norm_key=model['norm_key'],
345 |                 norm_task=model['norm_task'],
346 |                 task_name=task.name,
347 |                 xmean=model['xmean'],
348 |                 xstd=model['xstd'],
349 |                 l2_reg=model['l2_reg'],
350 |                 )
351 |         if 'divrowl2_avg_nrm' in model:
352 |             svm_dct['divrowl2_avg_nrm'] = model['divrowl2_avg_nrm']
353 |         norm_key = svm_dct['norm_key']
354 |         norm_task = svm_dct['norm_task']
355 |         info('retraining on %s (norm_task=%s)' % (task.name, norm_task))
356 | 
357 |         ens.add_sample(task.name, task.y)
358 |         x_trn = self.normalized_image_match_features(task, svm_dct,
359 |                 # -- do not recompute mean and var
360 |                 role='test',
361 |                 # -- assume that slow features were caught earlier
362 |                 batched_lmap_speed_thresh={'seconds': 30, 'elements': 1},
363 |                 )
364 |         ens.add_features(norm_key, task.name, x_trn)
365 | 
366 |         self.load_ensemble_grams(norm_task, ens, task.name, task.name)
367 |         ens.compute_gram(norm_key, task.name, task.name, dtype='float32')
368 | 
369 |         ens.fit_svm(svm_dct['l2_reg'])
370 |         svm_dct['task_error'] = ens.error_rate(task.name)
371 | 
372 |         info('retrain_classifier: %s -> %f' % (
373 |             (norm_key, task.name), svm_dct['task_error']))
374 | 
375 |         dct = self._results['retrain_classifier_image_match_indexed']
376 |         dct.setdefault(norm_key, {})
377 |         if task.name in dct[norm_key]:
378 |             warn('Overwriting retrain_classifier_image_match_indexed result: %s'
379 |                  % task.name)
380 |         dct[norm_key][task.name] = svm_dct
381 |         return svm_dct
382 | 
383 |     def loss_image_match_indexed(self, svm_dct, task):
384 |         norm_task = svm_dct['norm_task']
385 |         norm_key = svm_dct['norm_key']
386 | 
387 |         info('loss_image_match_indexed: %s, %s' % (norm_key, task.name) )
388 |         x = self.normalized_image_match_features(task, svm_dct, 'test',
389 |                 # -- assume that slow features were caught earlier
390 |                 batched_lmap_speed_thresh={'seconds': 30, 'elements': 1},
391 |                 )
392 |         svm_dct['ens'].add_sample(task.name, task.y)
393 |         svm_dct['ens'].add_features(norm_key, task.name, x)
394 | 
395 |         self.load_ensemble_grams(norm_task, svm_dct['ens'], task.name,
396 |                 svm_dct['ens'].train_sample)
397 |         svm_dct['ens'].compute_gram(norm_key, task.name,
398 |                 svm_dct['ens'].train_sample, dtype='float32')
399 | 
400 |         preds = svm_dct['ens'].predict(task.name)
401 |         erate = error_rate(preds, task.y)
402 |         info('test_image_match_indexed error_rate %s -> %f' % (
403 |             task.name, erate))
404 | 
405 |         # -- add summary information to self._results
406 |         dct = self._results['loss_image_match_indexed']
407 |         dct.setdefault(norm_key, {})
408 |         if task.name in dct[norm_key]:
409 |             warn('Overwriting loss_image_match_indexed result: %s'
410 |                  % task.name)
411 |         dct[norm_key][task.name] = {
412 |             'error_rate': erate,
413 |             'norm_key': norm_key,
414 |             'task_name': task.name,
415 |             'preds_01': ''.join(
416 |                 ['0' if p == -1 else '1' for p in preds]),
417 |             }
418 |         return erate
419 | 
420 | 


--------------------------------------------------------------------------------
/hpconvnet/slm.py:
--------------------------------------------------------------------------------
  1 | import hashlib
  2 | import time
  3 | import numpy as np
  4 | import StringIO
  5 | import traceback
  6 | 
  7 | from hyperopt import pyll
  8 | from hyperopt.pyll import scope
  9 | 
 10 | import hyperopt
 11 | from hyperopt.pyll_utils import hp_choice
 12 | from hyperopt.pyll_utils import hp_uniform
 13 | from hyperopt.pyll_utils import hp_quniform
 14 | from hyperopt.pyll_utils import hp_loguniform
 15 | from hyperopt.pyll_utils import hp_qloguniform
 16 | from hyperopt.pyll_utils import hp_normal
 17 | from hyperopt.pyll_utils import hp_lognormal
 18 | 
 19 | import pyll_slm  # adds the symbols to pyll.scope
 20 | 
 21 | pyll.scope.import_(globals(),
 22 |     # -- from pyll
 23 |     'partial',
 24 |     'callpipe1',
 25 |     'switch',
 26 |     'sqrt',
 27 |     #
 28 |     # -- misc. from ./pyll_slm.py
 29 |     'pyll_theano_batched_lmap',
 30 |     'model_predict',
 31 |     'model_decisions',
 32 |     'error_rate',
 33 |     'mean_and_std',
 34 |     'flatten_elems',
 35 |     'np_transpose',
 36 |     'np_RandomState',
 37 |     'print_ndarray_summary',
 38 |     'pickle_dumps',
 39 |     #
 40 |     # -- filterbank allocators  (./pyll.slm.py)
 41 |     'random_patches',
 42 |     'alloc_random_uniform_filterbank',
 43 |     'patch_whitening_filterbank_X',
 44 |     'fb_whitened_patches',
 45 |     'fb_whitened_projections',
 46 |     'slm_uniform_M_FB',
 47 |     #
 48 |     # -- pipeline elements  (./pyll.slm.py)
 49 |     'slm_affine_image_warp',
 50 |     'slm_img_uint8_to_float32',
 51 |     'slm_lpool',
 52 |     'slm_lnorm',
 53 |     'slm_lpool_alpha',
 54 |     'slm_fbncc_chmaj',
 55 |     'slm_quantize_gridpool',
 56 |     #
 57 |     # -- renamed symbols
 58 |     **{
 59 |     # NEW NAME:         ORIG NAME
 60 |     's_int':           'int',
 61 |     's_float':         'float',
 62 |     'pyll_getattr':    'getattr',
 63 |     })
 64 | 
 65 | # -- where is this supposed to go?
 66 | divide_by_avg_norm=False
 67 | 
 68 | 
 69 | def stable_hash(s):
 70 |     if isinstance(s, basestring):
 71 |         return int(hashlib.sha224(s).hexdigest(), 16)
 72 |     else:
 73 |         raise TypeError(s)
 74 | 
 75 | 
 76 | def hp_TF(label):
 77 |     return hp_choice(label, [0, 1])
 78 | 
 79 | 
 80 | def rfilter_size(label, smin, smax, q=1):
 81 |     """Return an integer size from smin to smax inclusive with equal prob
 82 |     """
 83 |     return s_int(hp_quniform(label, smin - q / 2.0 + 1e-5, smax + q / 2.0, q))
 84 | 
 85 | 
 86 | def logu_range(label, lower, upper):
 87 |     """Return a continuous replacement for one_of(.1, 1, 10)"""
 88 |     return hp_loguniform(label, np.log(lower), np.log(upper))
 89 | 
 90 | 
 91 | def new_fbncc_layer(prefix, Xcm, n_patches, n_filters, size,
 92 |                    memlimit=5e8, # -- limit patches array to 500MB
 93 |                    ):
 94 |     def lab(msg):
 95 |         return '%s_fbncc_%s' % (prefix, msg)
 96 | 
 97 |     def get_rseed(name, N):
 98 |         fullname = lab(name)
 99 |         low = stable_hash(fullname) % (2 ** 31)
100 |         rval = hp_choice(fullname, range(low, low + N))
101 |         return rval
102 | 
103 |     patches = random_patches(
104 |         Xcm, n_patches, size, size,
105 |         rng=np_RandomState(get_rseed('patch_rseed', 10)),
106 |         channel_major=True,
107 |         memlimit=memlimit)
108 | 
109 |     remove_mean = hp_TF(lab('remove_mean'))
110 |     beta = hp_lognormal(lab('beta'), np.log(100), np.log(100))
111 |     hard_beta = hp_TF(lab('hard'))
112 | 
113 |     # TODO: use different nfilters, beta etc. for each algo
114 | 
115 |     # -- random projections filterbank allocation
116 |     random_projections = partial(slm_fbncc_chmaj,
117 |         m_fb=slm_uniform_M_FB(
118 |             nfilters=n_filters,
119 |             size=size,
120 |             channels=pyll_getattr(Xcm, 'shape')[1],
121 |             rseed=get_rseed('r_rseed', 10),
122 |             normalize=hp_TF(lab('r_normalize')),
123 |             dtype='float32',
124 |             ret_cmajor=True,
125 |             ),
126 |         remove_mean=remove_mean,
127 |         beta=beta,
128 |         hard_beta=hard_beta)
129 | 
130 |     # -- random whitened projections filterbank allocation
131 |     random_whitened_projections = partial(slm_fbncc_chmaj,
132 |             m_fb=fb_whitened_projections(patches,
133 |                 patch_whitening_filterbank_X(patches,
134 |                     gamma=hp_lognormal(lab('wr_gamma'),
135 |                                        np.log(1e-2), np.log(100)),
136 |                     o_ndim=2,
137 |                     remove_mean=remove_mean,
138 |                     beta=beta,
139 |                     hard_beta=hard_beta,
140 |                     ),
141 |                 n_filters=n_filters,
142 |                 rseed=get_rseed('wr_rseed', 10),
143 |                 dtype='float32',
144 |                 ),
145 |             remove_mean=remove_mean,
146 |             beta=beta,
147 |             hard_beta=hard_beta)
148 | 
149 |     # -- whitened patches filterbank allocation
150 |     whitened_patches = partial(slm_fbncc_chmaj,
151 |             m_fb=fb_whitened_patches(patches,
152 |                 patch_whitening_filterbank_X(patches,
153 |                     gamma=hp_lognormal(lab('wp_gamma'),
154 |                                        np.log(1e-2), np.log(100)),
155 |                     o_ndim=2,
156 |                     remove_mean=remove_mean,
157 |                     beta=beta,
158 |                     hard_beta=hard_beta,
159 |                     ),
160 |                 n_filters=n_filters,
161 |                 rseed=get_rseed('wp_rseed', 10),
162 |                 dtype='float32',
163 |                 ),
164 |             remove_mean=remove_mean,
165 |             beta=beta,
166 |             hard_beta=hard_beta)
167 | 
168 |     # --> MORE FB LEARNING ALGOS HERE <--
169 |     # TODO: V1-like filterbank (incl. with whitening matrix)
170 |     # TODO: sparse coding
171 |     # TODO: OMP from Coates 2011
172 |     # TODO: K-means
173 |     # TODO: RBM
174 |     # TODO: DAA
175 |     # TODO: ssRBM
176 |     rchoice = hp_choice(lab('algo'), [
177 |         random_projections,
178 |         random_whitened_projections,
179 |         whitened_patches,
180 |         ])
181 |     return rchoice
182 | 
183 | 
184 | def pipeline_extension(prefix, X, n_patches, max_filters):
185 |     assert max_filters > 16
186 |     f_layer = new_fbncc_layer(prefix, X, n_patches,
187 |             n_filters=s_int(
188 |                 hp_qloguniform('%sfb_nfilters' % prefix,
189 |                     np.log(8.01), np.log(max_filters), q=16)),
190 |             size=rfilter_size('%sfb_size' % prefix, 3, 8),
191 |             )
192 | 
193 |     p_layer = partial(slm_lpool,
194 |             stride=hp_choice('%sp_stride' % prefix, [1, 2]),
195 |             order=hp_choice('%sp_order' % prefix,
196 |                 [1, 2, hp_lognormal('%sp_order_real' % prefix,
197 |                     mu=np.log(1), sigma=np.log(3))]),
198 |             ker_size=rfilter_size('%sp_size' % prefix, 2, 8))
199 | 
200 |     return [f_layer, p_layer]
201 | 
202 | 
203 | def new_exit(pipeline, prefix):
204 |     def lab(msg):
205 |         return prefix % msg
206 |     return {
207 |         'pipe': pipeline,
208 |         'remove_std0':
209 |             hp_TF(lab('remove_std0')),
210 |         'varthresh':
211 |             hp_lognormal(lab('varthresh'),
212 |                 np.log(1e-4), np.log(1000)),
213 |         'l2_reg': hp_lognormal(lab('l2_reg'),
214 |             np.log(1e-5), np.log(1e3)),
215 |         'divrowl2': hp_TF(lab('divrowl2')),
216 |         }
217 | 
218 | 
219 | def exit_grid(pipeline, layer_num, Xcm, n_patches, max_n_features):
220 |     def lab(msg):
221 |         return 'l%ieg_%s' % (layer_num, msg)
222 | 
223 |     fsize = rfilter_size(lab('fsize'), 3, 8)
224 | 
225 |     grid_res = hp_choice(lab('res'), [2, 3])
226 |     grid_features_per_filter = 2 * (grid_res ** 2)
227 |     grid_nfilters = max_n_features // grid_features_per_filter
228 | 
229 |     grid_filtering = new_fbncc_layer(
230 |             prefix='l%ieg' % layer_num,
231 |             Xcm=Xcm,
232 |             n_patches=n_patches,
233 |             n_filters=grid_nfilters,
234 |             size=fsize,
235 |             )
236 | 
237 |     grid_pooling = partial(slm_quantize_gridpool,
238 |             alpha=hp_normal(lab('alpha'), 0.0, 1.0),
239 |             use_mid=False,
240 |             grid_res=grid_res,
241 |             order=hp_choice(lab('order'), [
242 |                 1.0, 2.0, logu_range(lab('order_real'), .1, 10.)]))
243 | 
244 |     return new_exit(pipeline + [grid_filtering, grid_pooling], lab('%s'))
245 | 
246 | 
247 | def exit_lpool_alpha(pipeline, layer_num, Xcm, n_patches, max_n_features):
248 |     def lab(msg):
249 |         return 'l%ielpa_%s' % (layer_num, msg)
250 | 
251 |     fsize = rfilter_size(lab('fsize'), 3, 8)
252 |     filtering_res = pyll_getattr(Xcm, 'shape')[2] - fsize + 1
253 |     # -- N.B. Xrows depends on other params, so we can't use it to set the
254 |     #         upper bound on lpsize. We can only sample independently, and
255 |     #         then fail below with non-positive number of features.
256 |     size = rfilter_size(lab('lpsize'), 1, 5)
257 |     stride = hp_choice(lab('stride'), [1, 2, 3])
258 |     res = scope.ceildiv(scope.max(filtering_res - size + 1, 0), stride)
259 |     if 0:
260 |         # XXX: This is a smarter way to pick the n_filters, but it triggers
261 |         # a bug in hyperopt.vectorize_helper.  The build_idxs_vals function
262 |         # there needs to be smarter -- to recognize when wanted_idxs is a
263 |         # necessarily subset of the all_idxs, and then not to append
264 |         # wanted_idxs to the union defining all_idxs... because that creates a
265 |         # cycle.  The trouble is specifically that lpool_res is used in the
266 |         # switch statement below both in the condition and the response.
267 |         nfilters = switch(res > 0,
268 |             max_n_features // (2 * (res ** 2)),
269 |             scope.Raise(ValueError, 'Non-positive number of features'))
270 |     else:
271 |         # this is less good because it risks dividing by zero,
272 |         # and forces the bandit to catch weirder errors from new_fbncc_layer
273 |         # caused by negative nfilters
274 |         nfilters = max_n_features // (2 * (res ** 2))
275 | 
276 |     filtering = new_fbncc_layer(
277 |             prefix='l%iel' % layer_num,
278 |             Xcm=Xcm,
279 |             n_patches=n_patches,
280 |             n_filters=nfilters,
281 |             size=fsize,
282 |             )
283 | 
284 |     pooling = partial(slm_lpool_alpha,
285 |             ker_size=size,
286 |             stride=stride,
287 |             alpha=hp_normal(lab('alpha'), 0.0, 1.0),
288 |             order=hp_choice(lab('order_choice'), [
289 |                 1.0, 2.0, logu_range(lab('order_real'), .1, 10.)]))
290 | 
291 |     return new_exit(pipeline + [filtering, pooling], lab('%s'))
292 | 
293 | 
294 | def exit_lpool(pipeline, layer_num, Xcm, n_patches, max_n_features):
295 |     def lab(msg):
296 |         return 'l%i_out_lp_%s' % (layer_num, msg)
297 | 
298 |     fsize = rfilter_size(lab('fsize'), 3, 8)
299 |     filtering_res = pyll_getattr(Xcm, 'shape')[2] - fsize + 1
300 |     # -- N.B. Xrows depends on other params, so we can't use it to set the
301 |     #         upper bound on lpsize. We can only sample independently, and
302 |     #         then fail below with non-positive number of features.
303 |     psize = rfilter_size(lab('psize'), 1, 5)
304 |     stride = hp_choice(lab('stride'), [1, 2, 3])
305 |     pooling_res = scope.ceildiv(filtering_res - psize + 1, stride)
306 |     nsize = rfilter_size(lab('nsize'), 1, 5)
307 |     norm_res = pooling_res - nsize + 1
308 | 
309 |     # -- raises exception at rec_eval if norm_res is 0
310 |     nfilters = max_n_features // (scope.max(norm_res, 0) ** 2)
311 | 
312 |     filtering = new_fbncc_layer(
313 |             prefix='l%ielp' % layer_num,
314 |             Xcm=Xcm,
315 |             n_patches=n_patches,
316 |             n_filters=nfilters,
317 |             size=fsize,
318 |             )
319 | 
320 |     pooling = partial(slm_lpool,
321 |             ker_size=psize,
322 |             stride=stride,
323 |             order=hp_choice(lab('order_choice'), [
324 |                 1.0, 2.0, logu_range(lab('order_real'), .1, 10.)]))
325 | 
326 |     normalization = partial(slm_lnorm,
327 |             ker_size=nsize,
328 |             remove_mean=hp_TF(lab('norm_rmean')),
329 |             threshold=hp_lognormal(lab('norm_thresh'),
330 |                 np.log(1.0), np.log(3)),
331 |             )
332 | 
333 |     seq = hp_choice(lab('use_norm'), [
334 |             [filtering, pooling],
335 |             [filtering, pooling, normalization]])
336 | 
337 |     return new_exit(pipeline + seq, lab('%s'))
338 | 
339 | 
340 | def pipeline_exits(pipeline, layer_num, Xcm, n_patches, max_n_features):
341 |     grid = exit_grid(pipeline, layer_num, Xcm, n_patches, max_n_features)
342 | 
343 |     lpool_alpha = exit_lpool_alpha(pipeline, layer_num, Xcm, n_patches,
344 |             max_n_features)
345 | 
346 |     lpool = exit_lpool(pipeline, layer_num, Xcm, n_patches, max_n_features)
347 | 
348 |     return [grid, lpool_alpha, lpool]
349 | 
350 | 
351 | def uslm_domain(Xcm,
352 |         batchsize,
353 |         chmjr_image_shape,
354 |         output_sizes,
355 |         n_patches=50000,
356 |         max_n_features=16000,
357 |         max_layer_sizes=(64, 128),
358 |         batched_lmap_speed_thresh=None,
359 |         permit_affine_warp=True,
360 |         abort_on_rows_larger_than=None,
361 |         ):
362 |     """
363 |     This function works by creating a linear pipeline, with multiple exit
364 |     points that could be the feature representation for classification.
365 | 
366 |     The function returns a switch among all of these exit points.
367 |     """
368 |     start_time = time.time()
369 | 
370 |     XC, XH, XW = chmjr_image_shape
371 |     osize = hp_choice('warp_osize', output_sizes)
372 | 
373 |     assert XW > 3, chmjr_image_shape  # -- make sure we don't screw up channel-major
374 | 
375 |     warp_options = [
376 |         # -- option 1: simple resize
377 |         partial(slm_affine_image_warp,
378 |             rot=0,
379 |             shear=0,
380 |             scale=[s_float(osize) / XH, s_float(osize) / XW],
381 |             trans=[0, 0],
382 |             oshape=[osize, osize]),
383 |         ]
384 |     if permit_affine_warp:
385 |         # -- option 2: resize with rotation, shear, translation
386 |         warp_options.append(
387 |             partial(slm_affine_image_warp,
388 |                 rot=hp_uniform('warp_rot', low=-0.3, high=0.3),
389 |                 shear=hp_uniform('warp_shear', low=-0.3, high=0.3),
390 |                 # -- most of the scaling comes via osize
391 |                 scale=[
392 |                     hp_uniform('warp_scale_h', low=0.8, high=1.2) * osize / XH,
393 |                     hp_uniform('warp_scale_v', low=0.8, high=1.2) * osize / XW,
394 |                     ],
395 |                 trans=[
396 |                     hp_uniform('warp_trans_h', low=-0.2, high=0.2) * osize,
397 |                     hp_uniform('warp_trans_v', low=-0.2, high=0.2) * osize,
398 |                     ],
399 |                 oshape=[osize, osize]
400 |                 ))
401 |     pipeline = [slm_img_uint8_to_float32,
402 |                 hp_choice('warp', warp_options)]
403 |     Xcm = pyll_theano_batched_lmap(
404 |         partial(callpipe1, pipeline),
405 |         Xcm,
406 |         batchsize=batchsize,
407 |         print_progress_every=10,
408 |         speed_thresh=batched_lmap_speed_thresh,
409 |         abort_on_rows_larger_than=abort_on_rows_larger_than,
410 |         x_dtype='uint8',
411 |         )[:]
412 | 
413 |     exits = pipeline_exits(
414 |                 pipeline,
415 |                 layer_num=0,
416 |                 Xcm=Xcm,
417 |                 n_patches=n_patches,
418 |                 max_n_features=max_n_features)
419 |     for layer_i, max_layer_size in enumerate(max_layer_sizes):
420 |         extension = pipeline_extension(
421 |                 'l%i' % layer_i, Xcm, n_patches, max_layer_size)
422 | 
423 |         pipeline.extend(extension)
424 |         Xcm = pyll_theano_batched_lmap(
425 |                 partial(callpipe1, extension),
426 |                 Xcm,  # scope.print_ndarray_summary('Xcm %i' % layer_i, Xcm),
427 |                 batchsize=batchsize,
428 |                 print_progress_every=10,
429 |                 speed_thresh=batched_lmap_speed_thresh,
430 |                 abort_on_rows_larger_than=abort_on_rows_larger_than,
431 |                 )[:]
432 |         # -- indexing computes all the values (during rec_eval)
433 |         exits.extend(
434 |                 pipeline_exits(
435 |                     pipeline=pipeline,
436 |                     layer_num=layer_i + 1,
437 |                     Xcm=Xcm,
438 |                     n_patches=n_patches,
439 |                     max_n_features=max_n_features))
440 | 
441 |     return hp_choice("exit", exits)
442 | 
443 | 
444 | class USLM_Exception(Exception):
445 |     pass
446 | 
447 | 
448 | def call_catching_pipeline_errors(fn):
449 |     def raise_error(e):
450 |         sio = StringIO.StringIO()
451 |         traceback.print_exc(None, sio)
452 |         tb = sio.getvalue()
453 |         raise USLM_Exception(e, {
454 |             'loss': float(1.0),
455 |             'status': hyperopt.STATUS_FAIL,
456 |             'failure': {
457 |                 'type': str(type(e)),
458 |                 'exc': repr(e),
459 |                 'tb': tb,
460 |             }})
461 |     try:
462 |         return fn()
463 |     except pyll_slm.InvalidDescription, e:
464 |         raise_error(e)
465 |     except pyll_slm.EvalTimeout, e:
466 |         raise_error(e)
467 |     except ZeroDivisionError, e:
468 |         raise_error(e)
469 |     except MemoryError, e:
470 |         raise_error(e)
471 |     except OSError, e:
472 |         if 'allocate memory' in str(e):
473 |             raise_error(e)
474 |         else:
475 |             raise
476 |     except ValueError, e:
477 |         if (('rowlen' in str(e) and 'exceeds limit' in str(e))
478 |                 or ('dimension mis-match' in str(e) and '= 0' in str(e))
479 |                 or ('had size 0' in str(e))
480 |                 or ('size on that axis is 0' in str(e))
481 |                 or ('non-finite features' in str(e))
482 |                 ):
483 |             raise_error(e)
484 |         else:
485 |             raise
486 |     except RuntimeError, e:
487 |         if (('taking too long' in str(e))
488 |                 or ('allocate memory' in str(e))
489 |                 or ('kernel_reduce_sum' in str(e) and 'block: 0 x' in str(e))
490 |                 or ('CudaNdarray has dim 0' in str(e))
491 |                 ):
492 |             raise_error(e)
493 |         else:
494 |             raise
495 | 
496 | 


--------------------------------------------------------------------------------
/hpconvnet/isvm_multi.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file supports the incremental construction of an SVM classifier
  3 | by partially-corrective boosting on the hinge loss.
  4 | 
  5 | 
  6 | Each incremental solver minimizes
  7 | 
  8 |     hinge( dot(X, w) + b + alpha * prev_Wx)
  9 |     + lambda * (|w|^2 + |alpha * prev_W|^2)
 10 | 
 11 | 
 12 | Each solver is designed to be run on a subset of all available features.
 13 | 
 14 | """
 15 | import copy
 16 | import logging
 17 | import gc
 18 | import os
 19 | import shutil
 20 | import sys
 21 | 
 22 | import numpy as np
 23 | import autodiff
 24 | import theano # abstraction leak to pass mode for optimization
 25 | 
 26 | import tempfile
 27 | import skdata.larray # for large tempfile creation (tempdir isn't always big enough)
 28 | 
 29 | from .utils import dot
 30 | 
 31 | logger = logging.getLogger(__name__)
 32 | info = logger.info
 33 | warn = logger.warn
 34 | error = logger.error
 35 | 
 36 | _default_bfgs_kwargs = {'factr': 100}
 37 | 
 38 | def hinge(margin):
 39 |     """
 40 |     Classic hinge loss
 41 |     """
 42 |     return np.maximum(0, 1 - margin)
 43 | 
 44 | 
 45 | def hinge2(margin):
 46 |     """
 47 |     Looks pretty much like margin, but the corner is smoothed out
 48 |     """
 49 |     return np.log1p(np.exp(10 * (0.9 - margin))) / 10
 50 | 
 51 | 
 52 | 
 53 | def multi_svm_hinge_loss(x, y, weights, bias, alpha, pxw, pw_l2_sqr,
 54 |         l2_regularization, pl2_regularization):
 55 |     """
 56 |     x: (n_examples, n_features)
 57 |     y: (n_examples, n_classes)
 58 |     weights: (n_feaures, n_classes)
 59 |     bias: (n_classes,)
 60 |     alpha: (n_prev, n_classes)
 61 |     pxw: (n_examples, n_classes, n_prev)
 62 |     pw_l2_sqr: (n_prev, n_classes)
 63 |     l2_regularization: ()
 64 |     pl2_regularization: (n_prev,)
 65 |     """
 66 | 
 67 |     n_prev, n_classes = alpha.shape
 68 |     xw = dot(x, weights)
 69 |     if n_prev:
 70 |         assert pw_l2_sqr.shape == alpha.shape, (
 71 |                 'pw_l2_sqr shape', pw_l2_sqr.shape,
 72 |                 'alpha shape', alpha.shape)
 73 |         if np.any(pw_l2_sqr < 0):
 74 |             raise ValueError('prev_w_l2_sqr may not be negative')
 75 |         prev_l2_sqr = np.sum(
 76 |                 pl2_regularization[:, None] * pw_l2_sqr * (alpha ** 2))
 77 |         n_examples, n_classes2, n_prev2 = pxw.shape
 78 |         assert n_prev2 == n_prev, ('n_prev', n_prev, n_prev2)
 79 |         assert n_classes2 == n_classes, ('n_classes', n_classes, n_classes2)
 80 |         prev_xw = (pxw * alpha.T).sum(axis=2)
 81 |         assert prev_xw.shape == xw.shape, ('xw', xw.shape, prev_xw.shape)
 82 |         xw = xw + prev_xw
 83 |     else:
 84 |         prev_l2_sqr = 0.0
 85 | 
 86 |     margin = y * (xw + bias )
 87 |     losses = hinge2(margin).mean(axis=0).sum()
 88 | 
 89 |     cur_l2_sqr = l2_regularization * (weights * weights).sum()
 90 |     l2_reg = 0.5 * (cur_l2_sqr + prev_l2_sqr)
 91 |     cost = losses + l2_reg
 92 |     return cost
 93 | 
 94 | 
 95 | def append_xw(pxw, x, weights):
 96 |     """
 97 |     Append dot(x, weights) to pxw
 98 |     """
 99 |     n_features, n_classes = weights.shape
100 | 
101 |     if x.size == 0:
102 |         my_xw = np.zeros((len(x), n_classes, 1), dtype=pxw.dtype)
103 |     elif np.all(weights == 0):
104 |         my_xw = np.zeros((len(x), n_classes, 1), dtype=pxw.dtype)
105 |     else:
106 |         my_xw = dot(x, weights)[:, :, None]
107 |     rval = np.concatenate([pxw, my_xw], axis=2)
108 |     return rval.astype(pxw.dtype)
109 | 
110 | 
111 | def append_alpha(alpha):
112 |     n_prev, n_classes = alpha.shape
113 |     ones = np.ones((1, n_classes), dtype=alpha.dtype)
114 |     rval = np.vstack([alpha, ones])
115 |     return rval.astype(alpha.dtype)
116 | 
117 | 
118 | def append_w_l2_sqr(w_l2_sqr, weights):
119 |     l2_sqr = (weights * weights).sum(axis=0)
120 |     rval = np.vstack([w_l2_sqr, l2_sqr[None, :]])
121 |     return rval.astype(w_l2_sqr.dtype)
122 | 
123 | 
124 | def append_l2_regularization(pl2reg, l2reg):
125 |     rval = np.hstack([pl2reg, [l2reg]])
126 |     return rval.astype(pl2reg.dtype)
127 | 
128 | 
129 | def fit_sgd_0(weights, bias, x, y, l2_regularization, n_iters,
130 |         print_interval):
131 |     """
132 |     Refine `weights` and `bias` by n_iters steps of SGD
133 |     """
134 |     if n_iters <= 0:
135 |         return weights, bias
136 | 
137 |     n_examples = len(x)
138 |     n_features, n_classes = weights.shape
139 |     alpha0 = np.empty((0, n_classes), dtype=weights.dtype)
140 | 
141 |     # -- use the first few elements of x to estimate the average
142 |     #    example norm
143 |     # -- fixing these learning rates makes sense to me because the
144 |     #    hinge loss puts a bound on the slope of the function being
145 |     #    optimized, the only variable is the norm / magnitude of the
146 |     #    data.
147 |     avg_w_norm = np.mean(np.sqrt((x[:200] ** 2).sum(axis=1)))
148 |     step_size_w = 0.01 / (avg_w_norm + 1e-8)
149 |     step_size_b = 0.01
150 |     step_size_a = 0.0
151 | 
152 |     weights, bias, alpha0, = autodiff.fmin_sgd(
153 |             lambda w, b, a, xx, yy1:
154 |                 multi_svm_hinge_loss(xx, yy1, w, b, a,
155 |                     None, # xwi,
156 |                     None, # prev_w_l2_sqr,
157 |                     l2_regularization,
158 |                     None),
159 |             (weights, bias, alpha0),
160 |             streams={
161 |                 'xx': x.reshape((n_examples, 1, n_features)),
162 |                 'yy1': y.reshape((n_examples, 1, n_classes)),
163 |                 },
164 |             print_interval=print_interval,
165 |             step_size=(step_size_w, step_size_b, step_size_a),
166 |             step_size_backoff=0.1,
167 |             loops=n_iters / float(len(x)),
168 |             theano_mode=theano.Mode(
169 |                 linker='cvm_nogc',
170 |                 #linker='c|py',
171 |                 optimizer='fast_run').excluding('gpu'),
172 |             theano_device='cpu',
173 |             floatX=x.dtype,
174 |             )
175 |     return weights, bias
176 | 
177 | 
178 | l_bfgs_b_debug_feature_limit = None
179 | 
180 | def fit_l_bfgs_b(weights, bias, alpha, x, y, l2reg,
181 |         pxw, pw_l2_sqr, pl2reg, bfgs_kwargs,
182 |         return_after_one_fit=False):
183 |     """
184 |     Refine `weights, bias, alpha` by l_bfgs_b
185 |     """
186 |     n_features, n_classes = weights.shape
187 |     n_prev, n_classes = alpha.shape
188 | 
189 |     alpha_orig = alpha
190 |     # -- the inplace alpha2 scaling modifies not-yet-fit weights
191 |     #    as the while loop below works its way across
192 |     weights = weights.copy()
193 | 
194 |     low = 0
195 |     high = n_features
196 | 
197 |     # -- keep trying to train on less and less of the data until it works
198 |     while True:
199 |         x0 = x[:, low:high]
200 | 
201 |         x2 = x[:, high:]
202 |         pxw2 = append_xw(pxw, x2, weights[high:])
203 |         pl2reg2 = append_l2_regularization(pl2reg, l2reg)
204 |         alpha2 = append_alpha(alpha)
205 |         pw_l2_sqr2 = append_w_l2_sqr(pw_l2_sqr, weights[high:])
206 | 
207 |         def fn(w, b, a):
208 |             return multi_svm_hinge_loss(x0, y, w, b, a,
209 |                     pxw2, pw_l2_sqr2, l2reg, pl2reg2)
210 |         try:
211 |             if l_bfgs_b_debug_feature_limit is not None:
212 |                 # -- this mechanism is used by unit tests
213 |                 if (high - low) > l_bfgs_b_debug_feature_limit:
214 |                     raise MemoryError()
215 |             (weights_, bias, alpha2), info = autodiff.fmin_l_bfgs_b(fn,
216 |                     (weights[low:high], bias, alpha2),
217 |                     return_info=True,
218 |                     borrowable=[x0],
219 |                     floatX=x.dtype,
220 |                     **bfgs_kwargs)
221 |             info['feature_high'] = high
222 |             info['feature_low'] = low
223 |             gc.collect()
224 |             logger.info('fitting successful for %i features' % high)
225 |             break
226 |         except (MemoryError, RuntimeError), e:
227 |             high /= 2
228 |             if low == high:
229 |                 raise
230 |             gc.collect()
231 |             logger.info('fitting required too much memory, falling back to %i' % high)
232 |             continue
233 | 
234 |     weights[low:high] = weights_
235 |     # -- pop off the alpha we just added
236 |     weights[high:] *= alpha2[-1]
237 |     alpha = alpha2[:-1].copy()
238 | 
239 |     if high == n_features or return_after_one_fit:
240 |         return (weights, bias, alpha), [info]
241 | 
242 |     # -- now loop over all the features, and put the results together
243 |     inc = high - low
244 |     w0s = [weights_]
245 |     costs = [info['fopt']]
246 |     infos = [info]
247 |     while high < n_features:
248 |         high += inc
249 |         low += inc
250 | 
251 |         x1 = x[:, low:high]
252 |         pxw1 = append_xw(pxw, x0, weights_)
253 |         pl2reg1 = append_l2_regularization(pl2reg, l2reg)
254 |         alpha = append_alpha(alpha)
255 |         pw_l2_sqr1 = append_w_l2_sqr(pw_l2_sqr, weights_)
256 | 
257 |         x2 = x[:, high:]
258 |         pxw2 = append_xw(pxw1, x2, weights[high:])
259 |         pl2reg2 = append_l2_regularization(pl2reg1, l2reg)
260 |         alpha2 = append_alpha(alpha)
261 |         pw_l2_sqr2 = append_w_l2_sqr(pw_l2_sqr1, weights[high:])
262 | 
263 |         def fn(w, b, a):
264 |             return multi_svm_hinge_loss(x1, y, w, b, a,
265 |                     pxw2, pw_l2_sqr2, l2reg, pl2reg2)
266 |         (weights_, bias, alpha2), info = autodiff.fmin_l_bfgs_b(fn,
267 |                 (weights[low:high], bias, alpha2),
268 |                 return_info=True,
269 |                 borrowable=[x1],
270 |                 floatX=x.dtype,
271 |                 **bfgs_kwargs)
272 | 
273 |         info['feature_high'] = high
274 |         info['feature_low'] = low
275 | 
276 |         # -- pop off the alpha we just added
277 |         weights[high:] *= alpha2[-1]
278 |         alpha = alpha2[:-1].copy()
279 | 
280 |         w0s.append(weights_)
281 |         costs.append(info['fopt'])
282 |         infos.append(info)
283 |         x0 = x1
284 |         pxw = pxw1
285 |         pl2reg = pl2reg1
286 |         pw_l2_sqr = pw_l2_sqr1
287 | 
288 |     old_alpha = alpha[:n_prev]
289 |     new_alpha = alpha[n_prev:]
290 |     assert len(new_alpha) == len(w0s) - 1
291 | 
292 |     if np.any(old_alpha < 0) or np.any(old_alpha > 1):
293 |         warn('Alpha naturally grew beyond 0-1 range: %s' % str(old_alpha))
294 | 
295 |     for w, a in zip(w0s[:-1], new_alpha):
296 |         w *= a
297 |     weights = np.vstack(w0s)
298 |     alpha_rval = old_alpha.copy()
299 |     assert alpha_rval.shape == alpha_orig.shape
300 |     return (weights, bias, alpha_rval), infos
301 | 
302 | 
303 | class IncrementalMultiSVM(object):
304 |     """
305 |     On each iteration of the incremental construction this class fits a new
306 |     weight vector w to the features x, while adjusting the norm of the
307 |     previously-fit weight vectors to balance the current model against the old
308 |     ones.
309 | 
310 |     See test_hingeboost.py for an example of incremental SVM construction.
311 | 
312 |     """
313 | 
314 |     def __init__(self, n_features, n_classes,
315 |             prev_w_l2_sqr=None,
316 |             l2_regularization=1e-4,
317 |             prev_l2_regularization=None,
318 |             dtype='float64',
319 |             scalar_bounds=(-1e3, 1e3),
320 |             bfgs_kwargs=None,
321 |             alpha=None,
322 |             print_interval=sys.maxint,
323 |             n_sgd_iters=3000,
324 |             bias=None,
325 |             assert_clip_ok=True,
326 |             badfit_thresh=float('inf'),
327 |             ):
328 | 
329 |         self.n_features = n_features
330 |         if prev_w_l2_sqr is None:
331 |             self.prev_w_l2_sqr = np.empty((0, n_classes), dtype=dtype)
332 |         else:
333 |             self.prev_w_l2_sqr = np.asarray(prev_w_l2_sqr).astype(dtype)
334 |         (self.n_prev, self.n_classes) = self.prev_w_l2_sqr.shape
335 |         if n_classes != self.n_classes:
336 |             raise ValueError('n_classes does not match prev_w_l2_sqr.shape',
337 |                     n_classes, self.prev_w_l2_sqr.shape)
338 |         self.l2_regularization = l2_regularization
339 |         if prev_l2_regularization is None:
340 |             self.prev_l2_regularization = np.empty((0,), dtype=dtype)
341 |         else:
342 |             self.prev_l2_regularization = prev_l2_regularization
343 |         self.dtype = dtype
344 |         self.scalar_bounds = scalar_bounds
345 |         self.print_interval = print_interval
346 |         if bfgs_kwargs is None:
347 |             self.bfgs_kwargs = copy.deepcopy(_default_bfgs_kwargs)
348 |             if print_interval < sys.maxint:
349 |                 self.bfgs_kwargs.setdefault('iprint', 1)
350 |         else:
351 |             self.bfgs_kwargs = bfgs_kwargs
352 | 
353 |         self.weights = np.zeros((n_features, n_classes), dtype=dtype)
354 |         if bias is None:
355 |             self.bias = np.zeros((n_classes,), dtype=dtype)
356 |         else:
357 |             self.bias = np.asarray(bias).astype(dtype)
358 |             if (n_classes,) != self.bias.shape:
359 |                 raise ValueError('bad shape for bias', self.bias.shape)
360 |         if alpha is None:
361 |             self.alpha = np.ones_like(self.prev_w_l2_sqr)
362 |         else:
363 |             self.alpha = np.array(alpha).astype(dtype)
364 |         if self.alpha.shape != self.prev_w_l2_sqr.shape:
365 |             raise ValueError('shape mismatch between alpha and prev_w_l2_sqr',
366 |                     self.alpha.shape, self.prev_w_l2_sqr.shape)
367 |         self.n_sgd_iters = n_sgd_iters
368 |         self.assert_clip_ok = assert_clip_ok
369 |         self.badfit_thresh = badfit_thresh
370 | 
371 |     def print_summary(self):
372 |         print 'IncrementalMultiSVM', repr(self)
373 |         print '-> alpha', self.alpha
374 |         print '-> prvl2', self.prev_l2_regularization
375 |         print '-> prvw2', self.prev_w_l2_sqr
376 | 
377 |     @property
378 |     def cumulative_alpha(self):
379 |         return append_alpha(self.alpha)
380 | 
381 |     @property
382 |     def cumulative_w_l2_sqr(self):
383 |         return append_w_l2_sqr(self.prev_w_l2_sqr, self.weights)
384 | 
385 |     @property
386 |     def cumulative_l2_regularization(self):
387 |         return append_l2_regularization(self.prev_l2_regularization,
388 |                 self.l2_regularization)
389 | 
390 |     def xw_carry_forward(self, x, pxw=None):
391 |         return append_xw(self.as_xw(x, pxw), x, self.weights)
392 | 
393 |     def continuation(self, n_features=None, l2_regularization=None):
394 |         if n_features is None:
395 |             n_features = self.n_features
396 |         if l2_regularization is None:
397 |             l2_regularization = self.l2_regularization
398 | 
399 |         rval = self.__class__(
400 |                 n_features=n_features,
401 |                 n_classes=self.n_classes,
402 |                 prev_w_l2_sqr=self.cumulative_w_l2_sqr,
403 |                 alpha=self.cumulative_alpha,
404 |                 prev_l2_regularization=self.cumulative_l2_regularization,
405 |                 l2_regularization=l2_regularization,
406 |                 dtype=self.dtype,
407 |                 scalar_bounds=self.scalar_bounds,
408 |                 print_interval=self.print_interval,
409 |                 bfgs_kwargs=self.bfgs_kwargs,
410 |                 n_sgd_iters=self.n_sgd_iters,
411 |                 bias=self.bias.copy(),
412 |                 assert_clip_ok=self.assert_clip_ok,
413 |                 )
414 |         return rval
415 | 
416 |     def decision_function(self, x, xw=None):
417 |         rval = dot(x, self.weights) + self.bias
418 |         xw = self.as_xw(x, xw)
419 |         if xw.size or self.alpha.size:
420 |             # -- workaround Theano's no support for tensordot
421 |             rval += (xw * self.alpha.T).sum(axis=2)
422 |         return rval
423 | 
424 |     def as_xw(self, x, xw):
425 |         if xw is None:
426 |             if self.n_prev == 0:
427 |                 return np.zeros(
428 |                         (len(x), self.n_classes, self.n_prev),
429 |                         dtype=x.dtype)
430 |             else:
431 |                 raise TypeError('xw is required for previous models')
432 |         else:
433 |             xw = np.asarray(xw, dtype=self.dtype, order='C')
434 |             if xw.shape != (len(x), self.n_classes, self.n_prev):
435 |                 raise ValueError('xw has wrong shape',
436 |                         (xw.shape, (len(x), self.n_classes, self.n_prev)))
437 |             return xw
438 | 
439 |     def predict(self, x, xw=None):
440 |         xw = self.as_xw(x, xw)
441 |         return self.decision_function(x, xw).argmax(axis=1)
442 | 
443 |     def y_ind(self, y):
444 |         # y_ind is all +-1, with 1 meaning a positive label for OvA classif
445 |         assert y.min() == 0 # fail for +-1 labels
446 |         y_ind = -np.ones((len(y), self.n_classes)).astype(self.dtype)
447 |         y_ind[np.arange(len(y)), y] = 1
448 |         return y_ind
449 | 
450 |     def loss(self, x, y, xw=None):
451 |         xw = self.as_xw(x, xw)
452 |         y_ind = self.y_ind(y)
453 |         assert self.l2_regularization is not None
454 |         return multi_svm_hinge_loss(x, y_ind,
455 |                 self.weights, self.bias, self.alpha,
456 |                 xw,
457 |                 self.prev_w_l2_sqr,
458 |                 self.l2_regularization,
459 |                 self.prev_l2_regularization,
460 |                 )
461 | 
462 |     def fit(self, x, y, xw=None):
463 |         """
464 |         x - n_examples x n_features design matrix.
465 |         y - vector of integer labels
466 |         xw - matrix of real-valued incoming biases obtained
467 |             by multiplying the existing weight vectors by x
468 |         """
469 |         pxw = self.as_xw(x, xw)
470 |         assert y.min() == 0 # fail for +-1 labels
471 | 
472 |         if x.shape[0] != y.shape[0]:
473 |             raise ValueError('length mismatch between x and y')
474 | 
475 |         n_examples, n_classes, n_prev = pxw.shape
476 |         if n_prev != self.n_prev:
477 |             raise ValueError('n_prev mismatch',
478 |                     (n_prev, self.n_prev))
479 |         if n_examples != len(x):
480 |             raise ValueError('n_examples mismatch',
481 |                     (n_examples, len(x)))
482 |         if n_classes != self.weights.shape[1]:
483 |             raise ValueError('n_classes mismatch',
484 |                     (n_classes, self.weights.shape[1]))
485 | 
486 |         weights = self.weights
487 |         bias = self.bias
488 |         alpha = self.alpha
489 | 
490 |         bias0 = np.zeros_like(bias)
491 |         alpha0 = np.empty((0, self.n_classes), dtype=alpha.dtype)
492 | 
493 |         y_ind = self.y_ind(y)
494 | 
495 |         bfgs_kwargs = dict(self.bfgs_kwargs)
496 |         bfgs_kwargs.setdefault('factr', 100)
497 | 
498 |         bfgs_kwargs_precise = dict(bfgs_kwargs)
499 |         bfgs_kwargs_precise['factr'] /= 100
500 | 
501 |         # -- warm up with some pure-online sgd
502 |         #    don't train alpha yet, wait until the weights and bias
503 |         #    are somewhat initialized.
504 |         weights, bias = fit_sgd_0(weights, bias, x, y_ind,
505 |                 self.l2_regularization,
506 |                 self.n_sgd_iters,
507 |                 self.print_interval)
508 | 
509 |         c0 = n_prev / (1.0 + n_prev)
510 |         c1 = 1 / (1.0 + n_prev)
511 | 
512 |         alpha *= c0
513 |         weights *= c1
514 |         bias = c0 * bias0 + c1 * bias
515 | 
516 |         (p_weights, p_bias, p_alpha), infos = fit_l_bfgs_b(
517 |                 weights, bias, alpha,
518 |                 x, y_ind, self.l2_regularization,
519 |                 pxw,
520 |                 self.prev_w_l2_sqr,
521 |                 self.prev_l2_regularization,
522 |                 self.bfgs_kwargs,
523 |                 return_after_one_fit=True)
524 | 
525 |         if infos[0]['feature_high'] == self.n_features:
526 |             # -- the first fit did the whole feature set
527 |             weights = p_weights
528 |             bias = p_bias
529 |             alpha = p_alpha
530 |         elif infos[0]['fopt'] >= self.badfit_thresh:
531 |             # -- the first fit was so bad that we're giving up
532 |             weights = p_weights
533 |             bias = p_bias
534 |             alpha = p_alpha
535 |         else:
536 |             # -- we couldn't fit the whole feature set at once
537 |             data_home = skdata.data_home.get_data_home()
538 |             tempdirname = os.path.join(data_home, 'hpconvnet_isvm_features')
539 |             if not os.path.exists(tempdirname):
540 |                 os.makedirs(tempdirname)
541 |             dirname = tempfile.mkdtemp(dir=tempdirname)
542 |             try:
543 |                 README = open(os.path.join(dirname, 'README'), 'w+')
544 |                 print >> README, (
545 |                     "Feature cache created by hpconvnet/isvm_multi.py")
546 |                 README.close()
547 |                 p_x = np.memmap(os.path.join(dirname, 'p_x.npy'),
548 |                         dtype=x.dtype,
549 |                         mode='w+',
550 |                         shape=x.shape)
551 | 
552 |                 for ii in range(2):
553 |                     # -- if there isn't enough GPU memory to fit the whole
554 |                     #    problem at once, then use a block coordinate descent
555 |                     #    strategy, with different blocks on each iteration.
556 |                     #    I found that 2 passes of this kind were sufficient
557 |                     #    for MNIST, when divided into 2 pieces.
558 | 
559 |                     perm = np.random.RandomState(1234 + ii).permutation(
560 |                             self.n_features)
561 | 
562 |                     p_weights = weights[perm]
563 |                     for ii in xrange(len(x)):
564 |                         x_ii = x[ii] * 1  # -- bring it into memory
565 |                         p_x[ii] = x_ii[perm]
566 | 
567 |                     (p_weights, bias, alpha), infos2 = fit_l_bfgs_b(
568 |                             p_weights, bias, alpha,
569 |                             p_x, y_ind, self.l2_regularization,
570 |                             pxw,
571 |                             self.prev_w_l2_sqr,
572 |                             self.prev_l2_regularization,
573 |                             self.bfgs_kwargs)
574 | 
575 |                     weights[perm] = p_weights
576 |                     infos.extend(infos2)
577 | 
578 |             finally:
579 |                 shutil.rmtree(dirname)
580 | 
581 |         self.weights = weights
582 |         self.bias = bias
583 |         self.alpha = alpha
584 |         self.fit_infos = infos
585 | 
586 |         # -- in cases where the prev_l2_sqr or the prev_l2_regularization are
587 |         # really tiny, alpha can do funny things, like grow greater than 1,
588 |         # and/or even be slightly negative.
589 |         clipped_alpha = np.clip(alpha, 0, 1)
590 |         if self.assert_clip_ok:
591 |             final_loss = self.loss(x, y, xw)
592 |             self.alpha = clipped_alpha
593 |             clipped_final_loss = self.loss(x, y, xw)
594 | 
595 |             if not np.allclose(final_loss, clipped_final_loss, atol=1e-3,
596 |                     rtol=1e-2):
597 |                 error('fit is significantly degraded by alpha-clipping')
598 |                 error('-> orig loss %f' % final_loss)
599 |                 error('-> clipped loss %f' % clipped_final_loss)
600 |                 error('-> alpha %s' % str(alpha))
601 |         else:
602 |             self.alpha = clipped_alpha
603 | 
604 | 
605 | 
606 | 


--------------------------------------------------------------------------------
/hpconvnet/pyll_slm.py:
--------------------------------------------------------------------------------
   1 | """
   2 | A library file for the design approach of cifar10.py
   3 | 
   4 | It includes the slm components as well as a few other things that should
   5 | migrate upstream.
   6 | 
   7 | """
   8 | import cPickle
   9 | import logging
  10 | import time
  11 | 
  12 | import numpy as np
  13 | from skimage.transform import AffineTransform
  14 | from skimage.transform._warps_cy import _warp_fast
  15 | 
  16 | import theano
  17 | import theano.tensor as tensor
  18 | from theano.tensor.nnet import conv
  19 | 
  20 | from hyperopt import pyll
  21 | import hyperopt
  22 | 
  23 | from skdata import larray
  24 | 
  25 | from .utils import mean_and_std
  26 | from .utils import dot_f32
  27 | from .utils import dot_f64
  28 | import foobar
  29 | 
  30 | import isvm_boosting # for worth_calculating
  31 | 
  32 | 
  33 | logger = logging.getLogger(__name__)
  34 | 
  35 | 
  36 | pyll.scope.define_info(o_len=2)(mean_and_std)
  37 | 
  38 | 
  39 | class InvalidDescription(Exception):
  40 |     """Model description was invalid"""
  41 | 
  42 | 
  43 | class EvalTimeout(Exception):
  44 |     """Document could not be evaluated in time
  45 |     """
  46 | 
  47 | 
  48 | global_timeout = float('inf')
  49 | 
  50 | 
  51 | def set_timeout(t):
  52 |     global global_timeout
  53 |     global_timeout = t
  54 | 
  55 | 
  56 | def raise_on_timeout():
  57 |     if time.time() > global_timeout:
  58 |         raise EvalTimeout()
  59 | 
  60 | 
  61 | 
  62 | class AffineImageWarp(theano.Op):
  63 | 
  64 |     def __eq__(self, other):
  65 |         return type(self) == type(other)
  66 | 
  67 |     def __hash__(self):
  68 |         return hash((type(self),))
  69 | 
  70 |     def make_node(self, x, rot, shear, scale, trans, oshape):
  71 |         inputs = map(theano.tensor.as_tensor_variable,
  72 |                 [x, rot, shear, scale, trans, oshape])
  73 |         if 'float' not in x.dtype:
  74 |             raise TypeError('warping int images is not supported')
  75 |         return theano.Apply(self, inputs, [x.type()])
  76 | 
  77 |     def perform(self, node, inputs, out_storage):
  78 |         x, rot, shear, scale, trans, oshape = inputs
  79 |         #foobar.append_ndarray_signature(x, 'AffineImageWarp x')
  80 | 
  81 |         aff = AffineTransform(rotation=rot, shear=shear, scale=scale,
  82 |                 translation=trans)
  83 | 
  84 |         if str(x.dtype) != node.inputs[0].dtype:
  85 |             raise TypeError("Wrong dtype argument to AffineImageWarp", x.dtype)
  86 | 
  87 |         if np.any(x < 0):
  88 |             raise ValueError('X should be positive')
  89 | 
  90 |         if np.any(x > 1.0):
  91 |             raise ValueError('X should be less than 1')
  92 | 
  93 |         N, C, H, W = x.shape
  94 |         rows, cols = oshape
  95 | 
  96 |         rval = out_storage[0][0]
  97 |         rval_shape = (N, C, rows, cols)
  98 | 
  99 |         if ((rval is None)
 100 |                 or (rval.dtype != x.dtype)
 101 |                 or rval.shape != rval_shape):
 102 |             rval3 = np.empty((N * C, rows, cols), dtype=x.dtype)
 103 |             bg_check = True
 104 |         else:
 105 |             rval3 = rval.reshape((N * C, rows, cols))
 106 |             bg_check = False
 107 | 
 108 |         xx = x.reshape(N * C, H, W)
 109 | 
 110 |         # -- a small exactly-representable float for out-of-bounds pixels
 111 |         oob = -1.0 / 2 ** 16
 112 |         order = 1  # TODO: TRY ORDER=2 (WHY DOES RANGE GET LARGER?)
 113 | 
 114 |         tform = np.linalg.inv(aff._matrix)
 115 | 
 116 |         for i in xrange(N * C):
 117 |             if bg_check and i == 0:
 118 |                 rval3[i] = _warp_fast(xx[i], tform,
 119 |                         output_shape=oshape, order=order,
 120 |                         cval=oob)
 121 |                 oob_ratio = np.mean(rval3[i] == oob)
 122 |                 if oob_ratio > 0.5:
 123 |                     raise InvalidDescription('too much background', oob_ratio)
 124 |                 rval3[i] = np.maximum(0, rval3[i])
 125 |             else:
 126 |                 rval3[i] = _warp_fast(xx[i], np.linalg.inv(aff._matrix),
 127 |                         output_shape=oshape, order=order,
 128 |                         cval=0)
 129 | 
 130 |             if 0 and i == 0:
 131 |                 print 'Debugprint from AffineImageWarp...'
 132 |                 for sym in 'rot', 'shear', 'scale', 'trans', 'oshape':
 133 |                     print sym, ':', locals()[sym]
 134 |                 import matplotlib.pyplot as pl
 135 |                 pl.subplot(2, 1, 1)
 136 |                 pl.imshow(xx[i], cmap=pl.cm.gray)
 137 |                 pl.subplot(2, 1, 2)
 138 |                 pl.imshow(rval3[i], cmap=pl.cm.gray)
 139 |                 pl.show()
 140 |                 time.sleep(2)  # -- give some time to ctrl-C
 141 | 
 142 |         if np.any(rval3 > 1.001) or np.any(rval3 < 0.0):
 143 |             min3 = np.min(rval3)
 144 |             max3 = np.max(rval3)
 145 |             raise ValueError('interpolated pixel values out of range',
 146 |                              (min3, max3))
 147 | 
 148 |         out_storage[0][0] = rval3.reshape(rval_shape)
 149 |         #foobar.append_ndarray_signature(out_storage[0][0], 'AffineImageWarp y')
 150 | 
 151 | 
 152 | affine_image_warp = AffineImageWarp()
 153 | 
 154 | 
 155 | @pyll.scope.define
 156 | def slm_affine_image_warp((x, x_shp),
 157 |         rot, shear, scale, trans, oshape):
 158 |     assert x_shp[2] == x_shp[3]
 159 |     z = affine_image_warp(x,
 160 |         rot, shear, np.asarray(scale), np.asarray(trans), np.asarray(oshape))
 161 |     z_shp = (x_shp[0], x_shp[1]) + tuple(oshape)
 162 |     assert z_shp[2] == z_shp[3]
 163 |     return z, z_shp
 164 | 
 165 | 
 166 | @pyll.scope.define
 167 | def slm_img_uint8_to_float32((x, x_shp),):
 168 |     if str(x.dtype) != 'uint8':
 169 |         raise TypeError('x must be uint8', x.dtype)
 170 |     return (x.astype('float32') / 255, x_shp)
 171 | 
 172 | 
 173 | @pyll.scope.define
 174 | def alloc_random_uniform_filterbank(n_filters, height, width,
 175 |         channels, dtype, rseed, normalize=True):
 176 |     """
 177 |     Generate the same weights as are generated by pythor3
 178 |     """
 179 |     if height != width:
 180 |         raise ValueError('filters must be square')
 181 |     if channels is None:
 182 |         filter_shape = [n_filters, height, width]
 183 |     else:
 184 |         filter_shape = [n_filters, height, width, channels]
 185 | 
 186 |     rng = np.random.RandomState(rseed)
 187 |     foobar.append_randomstate('alloc_random_uniform_filterbank', rng)
 188 |     fb_data = rng.uniform(size=filter_shape)
 189 | 
 190 |     # normalize each filter in the bank if needed
 191 |     if normalize:
 192 |         # TODO: vectorize these computations, do all at once.
 193 |         for fidx, filt in enumerate(fb_data):
 194 |             # normalization here means zero-mean, unit-L2norm
 195 |             filt -= filt.mean()
 196 |             filt_norm = np.sqrt((filt * filt).sum())
 197 |             assert filt_norm != 0
 198 |             filt /= filt_norm
 199 |             fb_data[fidx] = filt
 200 | 
 201 |     foobar.append_ndarray_signature(fb_data, 'alloc_random_uniform_filterbank')
 202 |     return fb_data.astype(dtype)
 203 | 
 204 | 
 205 | @pyll.scope.define_info(o_len=2)
 206 | def boxconv((x, x_shp), kershp, channels=False):
 207 |     """
 208 |     channels: sum over channels (T/F)
 209 |     """
 210 |     kershp = tuple(kershp)
 211 |     if channels:
 212 |         rshp = (   x_shp[0],
 213 |                     1,
 214 |                     x_shp[2] - kershp[0] + 1,
 215 |                     x_shp[3] - kershp[1] + 1)
 216 |         kerns = np.ones((1, x_shp[1]) + kershp, dtype=x.dtype)
 217 |     else:
 218 |         rshp = (   x_shp[0],
 219 |                     x_shp[1],
 220 |                     x_shp[2] - kershp[0] + 1,
 221 |                     x_shp[3] - kershp[1] + 1)
 222 |         kerns = np.ones((1, 1) + kershp, dtype=x.dtype)
 223 |         x_shp = (x_shp[0]*x_shp[1], 1, x_shp[2], x_shp[3])
 224 |         x = x.reshape(x_shp)
 225 |     try:
 226 |         rval = tensor.reshape(
 227 |                 conv.conv2d(x,
 228 |                     theano.shared(kerns),
 229 |                     image_shape=x_shp,
 230 |                     filter_shape=kerns.shape,
 231 |                     border_mode='valid'),
 232 |                 rshp)
 233 |     except Exception, e:
 234 |         if "Bad size for the output shape" in str(e):
 235 |             raise InvalidDescription('boxconv', (x_shp, kershp, channels))
 236 |         else:
 237 |             raise
 238 |     return rval, rshp
 239 | 
 240 | 
 241 | @pyll.scope.define_info(o_len=2)
 242 | def slm_fbcorr_chmaj((x, x_shp), kerns, stride=1, mode='valid'):
 243 |     """
 244 |     Channel-major filterbank correlation
 245 | 
 246 |     kerns - filterbank with shape (n_filters, ker_size, ker_size, channels)
 247 | 
 248 |     """
 249 |     assert x_shp[2] == x_shp[3]
 250 |     assert x.dtype == 'float32'
 251 |     # Reference implementation:
 252 |     # ../pythor3/pythor3/operation/fbcorr_/plugins/scipy_naive/scipy_naive.py
 253 |     if stride != 1:
 254 |         raise NotImplementedError('stride is not used in reference impl.')
 255 | 
 256 |     # -- flip the kernels so that convolution does correlation
 257 |     kerns = kerns[:, :, ::-1, ::-1]
 258 |     s_kerns = theano.shared(kerns.astype(x.dtype))
 259 |     x = conv.conv2d(
 260 |             x,
 261 |             s_kerns,
 262 |             image_shape=x_shp,
 263 |             filter_shape=kerns.shape,
 264 |             border_mode=mode)
 265 | 
 266 |     n_filters, channels, krows, kcols = kerns.shape
 267 |     if mode == 'valid':
 268 |         x_shp = (x_shp[0], n_filters,
 269 |                 x_shp[2] - krows + 1,
 270 |                 x_shp[3] - kcols + 1)
 271 |     elif mode == 'full':
 272 |         x_shp = (x_shp[0], n_filters,
 273 |                 x_shp[2] + krows - 1,
 274 |                 x_shp[3] + kcols - 1)
 275 |     else:
 276 |         raise NotImplementedError('fbcorr mode', mode)
 277 | 
 278 |     assert x_shp[2] == x_shp[3]
 279 |     return x, x_shp
 280 | 
 281 | 
 282 | @pyll.scope.define_info(o_len=2)
 283 | def slm_clipout((x, x_shp), min_out, max_out):
 284 |     if min_out is None and max_out is None:
 285 |         return x, x_shp
 286 |     elif min_out is None:
 287 |         return tensor.minimum(x, max_out), x_shp
 288 |     elif max_out is None:
 289 |         return tensor.maximum(x, min_out), x_shp
 290 |     else:
 291 |         return tensor.clip(x, min_out, max_out), x_shp
 292 | 
 293 | 
 294 | @pyll.scope.define_info(o_len=2)
 295 | def slm_lpool((x, x_shp),
 296 |         ker_size=3,
 297 |         order=1,
 298 |         stride=1,
 299 |         mode='valid'):
 300 |     assert x.dtype == 'float32'
 301 |     assert x_shp[2] == x_shp[3]
 302 |     order=float(order)
 303 | 
 304 |     ker_shape = (ker_size, ker_size)
 305 |     if hasattr(order, '__iter__'):
 306 |         o1 = (order == 1).all()
 307 |         o2 = (order == order.astype(np.int)).all()
 308 |     else:
 309 |         o1 = order == 1
 310 |         o2 = (order == int(order))
 311 | 
 312 |     if o1:
 313 |         r, r_shp = boxconv((x, x_shp), ker_shape)
 314 |     elif o2:
 315 |         r, r_shp = boxconv((x ** order, x_shp), ker_shape)
 316 |         r = tensor.maximum(r, 0) ** (1.0 / order)
 317 |     else:
 318 |         r, r_shp = boxconv((abs(x) ** order, x_shp), ker_shape)
 319 |         r = tensor.maximum(r, 0) ** (1.0 / order)
 320 | 
 321 |     if stride > 1:
 322 |         r = r[:, :, ::stride, ::stride]
 323 |         # intdiv is tricky... so just use numpy
 324 |         r_shp = np.empty(r_shp)[:, :, ::stride, ::stride].shape
 325 |     assert r_shp[2] == r_shp[3]
 326 |     return r, r_shp
 327 | 
 328 | 
 329 | @pyll.scope.define_info(o_len=2)
 330 | def slm_lnorm((x, x_shp),
 331 |         ker_size=3,
 332 |         remove_mean= False,
 333 |         div_method='euclidean',
 334 |         threshold=0.0,
 335 |         stretch=1.0,
 336 |         mode='valid',
 337 |         EPSILON=1e-4,
 338 |         ):
 339 |     # Reference implementation:
 340 |     # ../pythor3/pythor3/operation/lnorm_/plugins/scipy_naive/scipy_naive.py
 341 |     assert x.dtype == 'float32'
 342 |     assert x_shp[2] == x_shp[3]
 343 |     inker_shape=(ker_size, ker_size)
 344 |     outker_shape=(ker_size, ker_size)  # (3, 3)
 345 |     if mode != 'valid':
 346 |         raise NotImplementedError('lnorm requires mode=valid', mode)
 347 | 
 348 |     threshold = float(threshold)
 349 |     stretch = float(stretch)
 350 | 
 351 |     if outker_shape == inker_shape:
 352 |         size = np.asarray(x_shp[1] * inker_shape[0] * inker_shape[1],
 353 |                 dtype=x.dtype)
 354 |         ssq, ssqshp = boxconv((x ** 2, x_shp), inker_shape,
 355 |                 channels=True)
 356 |         xs = inker_shape[0] // 2
 357 |         ys = inker_shape[1] // 2
 358 |         # --local contrast normalization in regions that are not symmetric
 359 |         #   about the pixel being normalized feels weird, but we're
 360 |         #   allowing it here.
 361 |         xs_inc = (inker_shape[0] + 1) % 2
 362 |         ys_inc = (inker_shape[1] + 1) % 2
 363 |         if div_method == 'euclidean':
 364 |             if remove_mean:
 365 |                 arr_sum, _shp = boxconv((x, x_shp), inker_shape,
 366 |                         channels=True)
 367 |                 arr_num = (x[:, :, xs-xs_inc:-xs, ys-ys_inc:-ys]
 368 |                         - arr_sum / size)
 369 |                 arr_div = EPSILON + tensor.sqrt(
 370 |                         tensor.maximum(0,
 371 |                             ssq - (arr_sum ** 2) / size))
 372 |             else:
 373 |                 arr_num = x[:, :, xs-xs_inc:-xs, ys-ys_inc:-ys]
 374 |                 arr_div = EPSILON + tensor.sqrt(ssq)
 375 |         else:
 376 |             raise NotImplementedError('div_method', div_method)
 377 |     else:
 378 |         raise NotImplementedError('outker_shape != inker_shape',
 379 |                 outker_shape, inker_shape)
 380 | 
 381 |     if (hasattr(stretch, '__iter__') and (stretch != 1).any()) or stretch != 1:
 382 |         arr_num = arr_num * stretch
 383 |         arr_div = arr_div * stretch
 384 |     # XXX: IS THIS 1.0 supposed to be (threshold + EPSILON) ??
 385 |     arr_div = tensor.switch(arr_div < (threshold + EPSILON), 1.0, arr_div)
 386 | 
 387 |     r = arr_num / arr_div
 388 |     r_shp = x_shp[0], x_shp[1], ssqshp[2], ssqshp[3]
 389 |     return r, r_shp
 390 | 
 391 | 
 392 | @pyll.scope.define_info(o_len=2)
 393 | def slm_fbncc_chmaj((x, x_shp), m_fb, remove_mean, beta, hard_beta):
 394 |     """
 395 |     Channel-major filterbank normalized cross-correlation
 396 | 
 397 |     For each valid-mode patch (p) of the image (x), this transform computes
 398 | 
 399 |     p_c = (p - mean(p)) if (remove_mean) else (p)
 400 |     qA = p_c / sqrt(var(p_c) + beta)           # -- Coates' sc_vq_demo
 401 |     qB = p_c / sqrt(max(sum(p_c ** 2), beta))  # -- Pinto's lnorm
 402 | 
 403 |     There are two differences between qA and qB:
 404 | 
 405 |     1. the denominator contains either addition or max
 406 | 
 407 |     2. the denominator contains either var or sum of squares
 408 | 
 409 |     The first difference corresponds to the hard_beta parameter.
 410 |     The second difference amounts to a decision about the scaling of the
 411 |     output, because for every function qA(beta_A) there is a function
 412 |     qB(betaB) that is identical, except for a multiplicative factor of
 413 |     sqrt(N - 1).
 414 | 
 415 |     I think that in the context of stacked models, the factor of sqrt(N-1) is
 416 |     undesirable because we want the dynamic range of all outputs to be as
 417 |     similar as possible. So this function implements qB.
 418 | 
 419 |     Coates' denominator had var(p_c) + 10, so what should the equivalent here
 420 |     be?
 421 |     p_c / sqrt(var(p_c) + 10)
 422 |     = p_c / sqrt(sum(p_c ** 2) / (108 - 1) + 10)
 423 |     = p_c / sqrt((sum(p_c ** 2) + 107 * 10) / 107)
 424 |     = sqrt(107) * p_c / sqrt((sum(p_c ** 2) + 107 * 10))
 425 | 
 426 |     So Coates' pre-processing has beta = 1070, hard_beta=False. This function
 427 |     returns a result that is sqrt(107) ~= 10 times smaller than the Coates
 428 |     whitening step.
 429 | 
 430 |     """
 431 |     # -- just to make sure things will run on GPU
 432 |     assert x.dtype == 'float32'
 433 |     w_means, w_fb = m_fb
 434 | 
 435 |     beta = float(beta)
 436 | 
 437 |     # -- kernel Number, Features, Rows, Cols
 438 |     kN, kF, kR, kC = w_fb.shape
 439 | 
 440 |     # -- patch-wise sums and sums-of-squares
 441 |     p_sum, _shp = boxconv((x, x_shp), (kR, kC), channels=True)
 442 |     p_mean = 0 if remove_mean else p_sum / (kF * kR * kC)
 443 |     p_ssq, _shp = boxconv((x ** 2, x_shp), (kR, kC), channels=True)
 444 | 
 445 |     # -- this is an important variable in the math above, but
 446 |     #    it is not directly used in the fused lnorm_fbcorr
 447 |     # p_c = x[:, :, xs - xs_inc:-xs, ys - ys_inc:-ys] - p_mean
 448 | 
 449 |     # -- adjust the sum of squares to reflect remove_mean
 450 |     p_c_sq = p_ssq - (p_mean ** 2) * (kF * kR * kC)
 451 |     if hard_beta:
 452 |         p_div2 = tensor.maximum(p_c_sq, beta)
 453 |     else:
 454 |         p_div2 = p_c_sq + beta
 455 | 
 456 |     p_scale = 1.0 / tensor.sqrt(p_div2)
 457 | 
 458 |     # --
 459 |     # from whitening, we have a shift and linear transform (P)
 460 |     # for each patch (as vector).
 461 |     #
 462 |     # let m be the vector [m m m m] that replicates p_mean
 463 |     # let a be the scalar p_scale
 464 |     # let x be an image patch from s_imgs
 465 |     #
 466 |     # Whitening means applying the affine transformation
 467 |     #   (c - M) P
 468 |     # to contrast-normalized patch c = a (x - m),
 469 |     # where a = p_scale and m = p_mean.
 470 |     #
 471 |     # We also want to extract features in dictionary
 472 |     #
 473 |     #   (c - M) P
 474 |     #   = (a (x - [m,m,m]) - M) P
 475 |     #   = (a x - a [m,m,m] - M) P
 476 |     #   = a x P - a [m,m,m] P - M P
 477 |     #
 478 | 
 479 |     P = theano.shared(
 480 |             np.asarray(w_fb[:, :, ::-1, ::-1], order='C'))
 481 | 
 482 |     Px = conv.conv2d(x, P,
 483 |             image_shape=x_shp,
 484 |             filter_shape=w_fb.shape,
 485 |             border_mode='valid')
 486 | 
 487 |     s_P_sum = theano.shared(w_fb.sum(3).sum(2).sum(1))
 488 |     Pmmm = p_mean * s_P_sum.dimshuffle(0, 'x', 'x')
 489 |     s_PM = theano.shared((w_means * w_fb).sum(3).sum(2).sum(1))
 490 |     z = p_scale * (Px - Pmmm) - s_PM.dimshuffle(0, 'x', 'x')
 491 | 
 492 |     assert z.dtype == x.dtype, (z.dtype, x.dtype)
 493 |     return z, (_shp[0], kN, _shp[2], _shp[3])
 494 | 
 495 | 
 496 | @pyll.scope.define
 497 | def slm_flatten((x, x_shp),):
 498 |     r = tensor.flatten(x, 2)
 499 |     r_shp = x_shp[0], np.prod(x_shp[1:])
 500 |     return r, r_shp
 501 | 
 502 | 
 503 | @pyll.scope.define_info(o_len=2)
 504 | def slm_lpool_smallgrid((x, x_shp), grid_res=2, order=1):
 505 |     """
 506 |     Like lpool, but parametrized to produce a fixed size image as output.
 507 |     The image is not rescaled, but rather single giant box filters are
 508 |     defined for each output pixel, and stored in a matrix.
 509 |     """
 510 |     assert x.dtype == 'float32'
 511 |     order=float(order)
 512 | 
 513 |     if hasattr(order, '__iter__'):
 514 |         o1 = (order == 1).all()
 515 |         o2 = (order == order.astype(np.int)).all()
 516 |     else:
 517 |         o1 = order == 1
 518 |         o2 = (order == int(order))
 519 | 
 520 |     # rather than convolving with a box, this function takes
 521 |     # a dot product with the entire image
 522 |     ngR = x_shp[2] // grid_res + int(x_shp[2] % grid_res > 0)
 523 |     ngC = x_shp[3] // grid_res + int(x_shp[3] % grid_res > 0)
 524 | 
 525 |     assert ngR * grid_res >= x_shp[2]
 526 |     assert ngC * grid_res >= x_shp[3]
 527 | 
 528 |     W = np.zeros((grid_res, grid_res,) + x_shp[2:], dtype=x.dtype)
 529 |     for rr in range(grid_res):
 530 |         for cc in range(grid_res):
 531 |             W[rr, cc,
 532 |                     rr * ngR : (rr + 1) * ngR,
 533 |                     cc * ngC : (cc + 1) * ngC] = 1.0
 534 |     sW = theano.shared(W.reshape((grid_res ** 2, -1)))
 535 | 
 536 |     xmat = x.reshape((x_shp[0] * x_shp[1], x_shp[2] * x_shp[3]))
 537 | 
 538 |     if o1:
 539 |         r = tensor.dot(xmat, sW.T)
 540 |     elif o2:
 541 |         r = tensor.sqrt(tensor.dot(xmat ** 2, sW.T))
 542 |     else:
 543 |         r = tensor.dot(abs(xmat) ** order, sW.T)
 544 |         r = tensor.maximum(r, 0) ** (1.0 / order)
 545 | 
 546 |     r_shp = (x_shp[0], x_shp[1], grid_res, grid_res)
 547 |     r = r.reshape(r_shp)
 548 | 
 549 |     return r, r_shp
 550 | 
 551 | 
 552 | @pyll.scope.define_info(o_len=2)
 553 | def slm_quantize_gridpool((x, x_shp), alpha,
 554 |         use_mid=False,
 555 |         order=1.0,
 556 |         grid_res=2):
 557 |     hr = int(np.round(x_shp[2] / grid_res))
 558 |     hc = int(np.round(x_shp[3] / grid_res))
 559 |     alpha = tensor.cast(alpha, dtype=x.dtype)
 560 |     sXC_shp = (x_shp[0], x_shp[1], grid_res, grid_res, 3 if use_mid else 2)
 561 |     sXC = tensor.zeros(sXC_shp, dtype=x.dtype)
 562 | 
 563 |     for ri in range(grid_res):
 564 |         if ri == grid_res - 1:
 565 |             rslice = slice(ri * hr, None)
 566 |         else:
 567 |             rslice = slice(ri * hr, (ri + 1) * hr)
 568 |         for ci in range(grid_res):
 569 |             cslice = slice(ci * hc, (ci + 1) * hc)
 570 |             if ci == grid_res - 1:
 571 |                 cslice = slice(ci * hc, None)
 572 |             else:
 573 |                 cslice = slice(ci * hc, (ci + 1) * hc)
 574 |             xi = x[:, :, rslice, cslice]
 575 |             qs = []
 576 |             qs.append(tensor.maximum(xi - alpha, 0))
 577 |             qs.append(tensor.maximum(-xi - alpha, 0))
 578 |             if use_mid:
 579 |                 qs.append(tensor.maximum(alpha - abs(xi), 0))
 580 | 
 581 |             for qi, q in enumerate(qs):
 582 |                 inc = (q ** order).sum([2, 3]) ** (1. / order)
 583 |                 assert inc.dtype == q.dtype
 584 |                 sXC = tensor.set_subtensor(sXC[:, :, ri, ci, qi], inc)
 585 | 
 586 |     r_shp = sXC_shp[0], np.prod(sXC_shp[1:])
 587 |     r = sXC.reshape(r_shp)
 588 |     return r, r_shp
 589 | 
 590 | 
 591 | @pyll.scope.define_info(o_len=2)
 592 | def slm_lpool_alpha((x, x_shp),
 593 |         ker_size=3,
 594 |         order=1,
 595 |         stride=1,
 596 |         alpha=0.0,
 597 |         ):
 598 |     """
 599 |     lpool but with alpha-half-rectification
 600 |     """
 601 |     assert x.dtype == 'float32'
 602 |     order=float(order)
 603 | 
 604 |     ker_shape = (ker_size, ker_size)
 605 | 
 606 |     xp = tensor.maximum(x - alpha, 0)
 607 |     xn = tensor.maximum(-x - alpha, 0)
 608 |     rp, r_shp = boxconv((xp ** order, x_shp), ker_shape)
 609 |     rn, r_shp = boxconv((xn ** order, x_shp), ker_shape)
 610 |     rp = rp ** (1. / order)
 611 |     rn = rn ** (1. / order)
 612 | 
 613 |     if stride > 1:
 614 |         # -- theano optimizations should turn this stride into conv2d
 615 |         #    subsampling
 616 |         rp = rp[:, :, ::stride, ::stride]
 617 |         rn = rn[:, :, ::stride, ::stride]
 618 |         # intdiv is tricky... so just use numpy
 619 |         r_shp = np.empty(r_shp)[:, :, ::stride, ::stride].shape
 620 | 
 621 |     z_shp = (r_shp[0], 2 * r_shp[1], r_shp[2], r_shp[3])
 622 |     z = tensor.zeros(z_shp, dtype=x.dtype)
 623 |     z = tensor.set_subtensor(z[:, :r_shp[1]], rp)
 624 |     z = tensor.set_subtensor(z[:, r_shp[1]:], rn)
 625 | 
 626 |     return z, z_shp
 627 | 
 628 | 
 629 | @pyll.scope.define_info(o_len=2)
 630 | def slm_gnorm((x, x_shp),
 631 |         remove_mean= False,
 632 |         div_method='euclidean',
 633 |         threshold=0.0,
 634 |         stretch=1.0,
 635 |         EPSILON=1e-4,
 636 |         across_channels=True,
 637 |         ):
 638 |     """
 639 |     Global normalization, as opposed to local normalization
 640 |     """
 641 | 
 642 |     threshold = float(threshold)
 643 |     stretch = float(stretch)
 644 | 
 645 |     if across_channels:
 646 |         size = x_shp[1] * x_shp[2] * x_shp[3]
 647 |         ssq = (x ** 2).sum(axis=[1, 2, 3]).dimshuffle(0, 'x', 'x', 'x')
 648 |     else:
 649 |         size = x_shp[2] * x_shp[3]
 650 |         ssq = (x ** 2).sum(axis=[2, 3]).dimshuffle(0, 1, 'x', 'x')
 651 | 
 652 |     if div_method == 'euclidean':
 653 |         if remove_mean:
 654 |             if across_channels:
 655 |                 arr_sum = x.sum(axis=[1, 2, 3]).dimshuffle(0, 'x', 'x', 'x')
 656 |             else:
 657 |                 arr_sum = x.sum(axis=[2, 3]).dimshuffle(0, 1, 'x', 'x')
 658 | 
 659 |             arr_num = x - arr_sum / size
 660 |             arr_div = EPSILON + tensor.sqrt(
 661 |                     tensor.maximum(0,
 662 |                         ssq - (arr_sum ** 2) / size))
 663 |         else:
 664 |             arr_num = x
 665 |             arr_div = EPSILON + tensor.sqrt(ssq)
 666 |     else:
 667 |         raise NotImplementedError('div_method', div_method)
 668 | 
 669 |     if (hasattr(stretch, '__iter__') and (stretch != 1).any()) or stretch != 1:
 670 |         arr_num = arr_num * stretch
 671 |         arr_div = arr_div * stretch
 672 |     arr_div = tensor.switch(arr_div < (threshold + EPSILON), 1.0, arr_div)
 673 | 
 674 |     r = arr_num / arr_div
 675 |     r_shp = x_shp
 676 |     return r, r_shp
 677 | 
 678 | 
 679 | @pyll.scope.define
 680 | def contrast_normalize(patches, remove_mean, beta, hard_beta):
 681 |     X = patches
 682 |     if X.ndim != 2:
 683 |         raise TypeError('contrast_normalize requires flat patches')
 684 |     if remove_mean:
 685 |         xm = X.mean(1)
 686 |     else:
 687 |         xm = X[:,0] * 0
 688 |     Xc = X - xm[:, None]
 689 |     if 0:
 690 |         # -- for some reason the following sometimes uses gigs of RAM
 691 |         l2 = (Xc * Xc).sum(axis=1)
 692 |     else:
 693 |         l2 = np.zeros_like(Xc[:, 0])
 694 |         for i in xrange(Xc.shape[1]):
 695 |             l2 += Xc[:, i] ** 2
 696 |     if hard_beta:
 697 |         div2 = np.maximum(l2, beta)
 698 |     else:
 699 |         div2 = l2 + beta
 700 |     Xc /= np.sqrt(div2[:, None])
 701 |     foobar.append_ndarray_signature(Xc, 'contrast_normalize')
 702 |     return Xc
 703 | 
 704 | 
 705 | @pyll.scope.define
 706 | def random_patches(images, N, R, C, rng, channel_major=False, memlimit=None):
 707 |     """Return a stack of N image patches (channel major version)"""
 708 | 
 709 |     def N_with_memlimit():
 710 |         if memlimit is not None:
 711 |             # -- memlimit in bytes
 712 |             sizelimit = memlimit / images.dtype.itemsize
 713 |             return min(N, sizelimit // (R * C * iF))
 714 |         else:
 715 |             return N
 716 | 
 717 |     if channel_major:
 718 |         n_imgs, iF, iR, iC = images.shape
 719 |         N = N_with_memlimit()
 720 |         rval = np.empty((N, iF, R, C), dtype=images.dtype)
 721 |     else:
 722 |         n_imgs, iR, iC, iF = images.shape
 723 |         N = N_with_memlimit()
 724 |         rval = np.empty((N, R, C, iF), dtype=images.dtype)
 725 | 
 726 |     foobar.append_trace('random_patches dims', *rval.shape)
 727 |     foobar.append_randomstate('random_patches rng', rng)
 728 | 
 729 |     srcs = rng.randint(n_imgs, size=N)
 730 | 
 731 |     if R > iR or C > iC:
 732 |         raise InvalidDescription('cannot extract patches', (R, C))
 733 |     roffsets = rng.randint(iR - R + 1, size=N)
 734 |     coffsets = rng.randint(iC - C + 1, size=N)
 735 |     # TODO: this can be done with one advanced index right?
 736 |     for rv_i, src_i, ro, co in zip(rval, srcs, roffsets, coffsets):
 737 |         if channel_major:
 738 |             rv_i[:] = images[src_i, :, ro: ro + R, co : co + C]
 739 |         else:
 740 |             rv_i[:] = images[src_i, ro: ro + R, co : co + C]
 741 |     foobar.append_ndarray_signature(rval, 'random_patches rval')
 742 |     return rval
 743 | 
 744 | 
 745 | @pyll.scope.define_info(o_len=3)
 746 | def patch_whitening_filterbank_X(patches, o_ndim, gamma,
 747 |         remove_mean, beta, hard_beta,
 748 |         ):
 749 |     """
 750 |     patches - Image patches (can be uint8 pixels or floats)
 751 |     o_ndim - 2 to get matrix outputs, 4 to get image-stack outputs
 752 |     gamma - non-negative real to boost low-principle components
 753 | 
 754 |     remove_mean - see contrast_normalize
 755 |     beta - see contrast_normalize
 756 |     hard_beta - see contrast_normalize
 757 | 
 758 |     Returns: M, P, X
 759 |         M - mean of contrast-normalized patches
 760 |         P - whitening matrix / filterbank for contrast-normalized patches
 761 |         X - contrast-normalized patches
 762 | 
 763 |     """
 764 |     # Algorithm from Coates' sc_vq_demo.m
 765 | 
 766 |     # -- patches -> column vectors
 767 |     X = patches.reshape(len(patches), -1).astype('float64')
 768 | 
 769 |     X = contrast_normalize(X,
 770 |             remove_mean=remove_mean,
 771 |             beta=beta,
 772 |             hard_beta=hard_beta)
 773 | 
 774 |     # -- ZCA whitening (with low-pass)
 775 |     logger.debug('patch_whitening_filterbank_X starting ZCA')
 776 |     M, _std = mean_and_std(X)
 777 |     Xm = X - M
 778 |     assert Xm.shape == X.shape
 779 |     logger.info('patch_whitening_filterbank_X starting ZCA: dot %s' %
 780 |             str(Xm.shape))
 781 |     C = dot_f64(Xm.T, Xm) / (Xm.shape[0] - 1)
 782 |     logger.debug('patch_whitening_filterbank_X starting ZCA: eigh')
 783 |     D, V = np.linalg.eigh(C)
 784 |     logger.debug(
 785 |         'patch_whitening_filterbank_X starting ZCA: dot %s' % str(V.shape))
 786 |     P = dot_f32(np.sqrt(1.0 / (D + gamma)) * V, V.T)
 787 | 
 788 |     # -- return to image space
 789 |     if o_ndim == 4:
 790 |         M = M.reshape(patches.shape[1:])
 791 |         P = P.reshape((P.shape[0],) + patches.shape[1:])
 792 |         X = X.reshape((len(X),) + patches.shape[1:])
 793 |     elif o_ndim == 2:
 794 |         pass
 795 |     else:
 796 |         raise ValueError('o_ndim not in (2, 4)', o_ndim)
 797 | 
 798 |     logger.debug('patch_whitening_filterbank_X -> done')
 799 | 
 800 |     foobar.append_ndarray_signature(M, 'patch_whitening_filterbank_X M')
 801 |     foobar.append_ndarray_signature(P, 'patch_whitening_filterbank_X P')
 802 |     foobar.append_ndarray_signature(X, 'patch_whitening_filterbank_X X')
 803 |     dtype = patches.dtype
 804 |     return M.astype(dtype), P.astype(dtype), X.astype(dtype)
 805 | 
 806 | 
 807 | @pyll.scope.define_info(o_len=2)
 808 | def fb_whitened_projections(patches, pwfX, n_filters, rseed, dtype):
 809 |     """
 810 |     pwfX is the output of patch_whitening_filterbank_X with reshape=False
 811 | 
 812 |     M, and fb will be reshaped to match elements of patches
 813 |     """
 814 |     M, P, patches_cn = pwfX
 815 |     if patches_cn.ndim != 2:
 816 |         raise TypeError('wrong shape for pwfX args, should be flattened',
 817 |                 patches_cn.shape)
 818 |     rng = np.random.RandomState(rseed)
 819 |     foobar.append_randomstate('fb_whitened_projections', rng)
 820 | 
 821 |     D = rng.randn(n_filters, patches_cn.shape[1])
 822 |     D = D / (np.sqrt((D ** 2).sum(axis=1))[:, None] + 1e-20)
 823 |     fb = dot_f32(D, P)
 824 |     fb.shape = (n_filters,) + patches.shape[1:]
 825 |     M.shape = patches.shape[1:]
 826 |     M = M.astype(dtype)
 827 |     fb = fb.astype(dtype)
 828 |     if fb.size == 0:
 829 |         raise ValueError('filterbank had size 0')
 830 |     foobar.append_ndarray_signature(M, 'fb_whitened_projections M')
 831 |     foobar.append_ndarray_signature(fb, 'fb_whitened_projections fb')
 832 |     return M, fb
 833 | 
 834 | 
 835 | @pyll.scope.define_info(o_len=2)
 836 | def fb_whitened_patches(patches, pwfX, n_filters, rseed, dtype):
 837 |     """
 838 |     pwfX is the output of patch_whitening_filterbank_X with reshape=False
 839 | 
 840 |     M, and fb will be reshaped to match elements of patches
 841 | 
 842 |     """
 843 |     M, P, patches_cn = pwfX
 844 |     rng = np.random.RandomState(rseed)
 845 |     foobar.append_randomstate('fb_whitened_patches', rng)
 846 |     d_elems = rng.randint(len(patches_cn), size=n_filters)
 847 |     D = dot_f64(patches_cn[d_elems] - M, P)
 848 |     D = D / (np.sqrt((D ** 2).sum(axis=1))[:, None] + 1e-20)
 849 |     fb = dot_f32(D, P)
 850 |     fb.shape = (n_filters,) + patches.shape[1:]
 851 |     M.shape = patches.shape[1:]
 852 |     M = M.astype(dtype)
 853 |     fb = fb.astype(dtype)
 854 |     if fb.size == 0:
 855 |         raise ValueError('filterbank had size 0')
 856 |     foobar.append_ndarray_signature(M, 'fb_whitened_patches M')
 857 |     foobar.append_ndarray_signature(fb, 'fb_whitened_patches fb')
 858 |     return M, fb
 859 | 
 860 | 
 861 | @pyll.scope.define
 862 | def pyll_theano_batched_lmap(pipeline, seq, batchsize,
 863 |         _debug_call_counts=None,
 864 |         print_progress_every=float('inf'),
 865 |         abort_on_rows_larger_than=None,
 866 |         speed_thresh=None,
 867 |         x_dtype='float32',
 868 |         ):
 869 |     """
 870 |     This function returns a skdata.larray.lmap object whose function
 871 |     is defined by a theano expression.
 872 | 
 873 |     The theano expression will be built and compiled specifically for the
 874 |     dimensions of the given `seq`. Therefore, in_rows, and out_rows should
 875 |     actually be a *pyll* graph, that evaluates to a theano graph.
 876 |     """
 877 | 
 878 |     in_shp = (batchsize,) + seq.shape[1:]
 879 |     batch = np.zeros(in_shp, dtype=x_dtype)
 880 |     s_ibatch = theano.shared(batch)
 881 |     s_xi = theano.tensor.as_tensor_variable(s_ibatch).type()
 882 |     s_N = s_xi.shape[0]
 883 |     s_X = theano.tensor.set_subtensor(s_ibatch[:s_N], s_xi)
 884 |     #print 'PIPELINE', pipeline
 885 |     thing = pipeline((s_X, in_shp))
 886 |     #print 'THING'
 887 |     #print thing
 888 |     #print '==='
 889 |     s_obatch, oshp = pyll.rec_eval(thing)
 890 |     assert oshp[0] == batchsize
 891 |     logger.info('batched_lmap oshp %s' % str(oshp))
 892 |     if abort_on_rows_larger_than:
 893 |         rowlen = np.prod(oshp[1:])
 894 |         if rowlen > abort_on_rows_larger_than:
 895 |             raise ValueError('rowlen %i exceeds limit %i' % (
 896 |                 rowlen, abort_on_rows_larger_than))
 897 | 
 898 |     # Compile a function that takes a variable number of elements in,
 899 |     # returns the same number of processed elements out,
 900 |     # but does all internal computations using a fixed number of elements,
 901 |     # because convolutions are fastest when they're hard-coded to a certain
 902 |     # size.
 903 |     logger.debug('pyll_theano_batched_lmap compiling fn')
 904 |     _fn = theano.function([theano.Param(s_xi, strict=True)],
 905 |             s_obatch[:s_N],
 906 |             updates={
 907 |                 s_ibatch: s_X, # this allows the inc_subtensor to be in-place
 908 |                 })
 909 |     logger.debug('pyll_theano_batched_lmap compiling fn -> done')
 910 | 
 911 |     sums = {'elems': 0, 'times': 0.0}
 912 |     if speed_thresh is None:
 913 |         time_fn = _fn
 914 |     else:
 915 |         def time_fn(X):
 916 |             t0 = time.time()
 917 |             if str(X.dtype) != x_dtype:
 918 |                 print 'time_fn dtype problem', X.dtype, x_dtype
 919 |             rval = _fn(X)
 920 |             dt = time.time() - t0
 921 |             #print 'DEBUG time_fn dt:', dt
 922 |             sums['elems'] += len(X)
 923 |             sums['times'] += dt
 924 |             return rval
 925 | 
 926 |         def raise_if_slow():
 927 |             exc = EvalTimeout(
 928 |                 'batched_lmap failed to compute %i elements in %f secs'
 929 |                 % (speed_thresh['elements'], speed_thresh['seconds']))
 930 |             if sums['elems'] >= speed_thresh['elements']:
 931 |                 observed_ratio = sums['elems'] / sums['times']
 932 |                 required_ratio = (speed_thresh['elements'] /
 933 |                         speed_thresh['seconds'])
 934 |                 if observed_ratio < required_ratio:
 935 |                     raise exc
 936 |                 else:
 937 |                     sums['elems'] = 0
 938 |                     sums['times'] = 0.0
 939 | 
 940 |     def fn_1(x):
 941 |         if _debug_call_counts:
 942 |             _debug_call_counts['fn_1'] += 1
 943 |         return time_fn(x[None, :, :, :])[0]
 944 | 
 945 |     attrs = {
 946 |             'shape': oshp[1:],
 947 |             'ndim': len(oshp) -1,
 948 |             'dtype': s_obatch.dtype }
 949 |     def rval_getattr(attr, objs):
 950 |         # -- objs don't matter to the structure of the return value
 951 |         try:
 952 |             return attrs[attr]
 953 |         except KeyError:
 954 |             raise AttributeError(attr)
 955 | 
 956 |     fn_1.rval_getattr = rval_getattr
 957 | 
 958 |     last_print_time = [time.time()]
 959 | 
 960 |     def check_for_print(offset, X):
 961 |         curtime = time.time()
 962 |         if (curtime - last_print_time[0]) > print_progress_every:
 963 |             logger.info('pyll_theano_batched_lmap.f_map %i %i' % (
 964 |                 offset, len(X)))
 965 |             last_print_time[0] = curtime
 966 | 
 967 |         if speed_thresh is not None:
 968 |             raise_if_slow()
 969 | 
 970 |     def f_map(X):
 971 |         if _debug_call_counts:
 972 |             _debug_call_counts['f_map'] += 1
 973 | 
 974 |         if len(X) == batchsize:
 975 |             check_for_print(offset=0, X=X)
 976 |             return time_fn(X)
 977 | 
 978 |         rval = np.empty((len(X),) + oshp[1:], dtype=s_obatch.dtype)
 979 |         offset = 0
 980 |         while offset < len(X):
 981 |             check_for_print(offset, X)
 982 |             xi = X[offset: offset + batchsize]
 983 |             fn_i = time_fn(xi)
 984 |             if not np.all(np.isfinite(fn_i)):
 985 |                 raise ValueError('non-finite features')
 986 |             rval[offset:offset + len(xi)] = fn_i
 987 |             offset += len(xi)
 988 |         return rval
 989 | 
 990 |     return larray.lmap(fn_1, seq, f_map=f_map)
 991 | 
 992 | 
 993 | @pyll.scope.define
 994 | def np_transpose(obj, arg):
 995 |     return obj.transpose(*arg)
 996 | 
 997 | 
 998 | @pyll.scope.define
 999 | def np_RandomState(rseed):
1000 |     rval = np.random.RandomState(rseed)
1001 |     return rval
1002 | 
1003 | 
1004 | @pyll.scope.define
1005 | def flatten_elems(obj):
1006 |     return obj.reshape(len(obj), -1)
1007 | 
1008 | 
1009 | @pyll.scope.define
1010 | def model_predict(mdl, X):
1011 |     return mdl.predict(X)
1012 | 
1013 | 
1014 | @pyll.scope.define
1015 | def model_decisions(mdl, X):
1016 |     return mdl.decisions(X)
1017 | 
1018 | 
1019 | @pyll.scope.define
1020 | def pickle_dumps(obj, protocol=None):
1021 |     if protocol is None:
1022 |         return cPickle.dumps(obj)
1023 |     else:
1024 |         return cPickle.dumps(obj, protocol=protocol)
1025 | 
1026 | 
1027 | @pyll.scope.define
1028 | def error_rate(pred, y):
1029 |     return np.mean(pred != y)
1030 | 
1031 | 
1032 | @pyll.scope.define
1033 | def print_ndarray_summary(msg, X):
1034 |     print msg, X.dtype, X.shape, X.min(), X.max(), X.mean()
1035 |     return X
1036 | 
1037 | 
1038 | @pyll.scope.define_info(o_len=2)
1039 | def slm_uniform_M_FB(nfilters, size, channels, rseed, normalize, dtype,
1040 |         ret_cmajor):
1041 |     print 'Allocating uniform filterbank', nfilters, size, channels
1042 |     M = np.asarray(0).reshape((1, 1, 1)).astype(dtype)
1043 |     FB = alloc_random_uniform_filterbank(
1044 |                     nfilters, size, size, channels,
1045 |                     dtype=dtype,
1046 |                     rseed=rseed,
1047 |                     normalize=normalize)
1048 |     if FB.size == 0:
1049 |         raise ValueError('filterbank had size 0')
1050 |     if ret_cmajor:
1051 |         return M, FB.transpose(0, 3, 1, 2)
1052 |     else:
1053 |         return M, FB
1054 | 
1055 | 
1056 | @pyll.scope.define
1057 | def larray_cache_memory(obj):
1058 |     return larray.cache_memory(obj)
1059 | 
1060 | 
1061 | @pyll.scope.define
1062 | def larray_cache_memmap(obj, name, basedir=None, msg=None):
1063 |     return larray.cache_memmap(obj, name, basedir=basedir, msg=msg)
1064 | 
1065 | 
1066 | @pyll.scope.define
1067 | def ceildiv(a, b):
1068 |     return int(np.ceil(float(a) / float(b)))
1069 | 
1070 | 
1071 | @pyll.scope.define
1072 | def view2_worth_calculating(loss, ctrl, thresh_loss, thresh_rank):
1073 |     #
1074 |     #  Decide whether to bother calculating the view2 score, which is slow.
1075 |     #
1076 | 
1077 |     if thresh_loss is not None and loss > thresh_loss:
1078 |         logger.info('worth_calculating_view2: False (loss %f > thresh %f)' % (
1079 |             loss, thresh_loss))
1080 |         return False
1081 |     elif ctrl is None:
1082 |         logger.info('worth_calculating_view2: True (ctrl is None)')
1083 |         return True
1084 |     else:
1085 |         trials = ctrl.trials
1086 |         # -- old logic
1087 |         if hasattr(trials, 'handle'):
1088 |             # -- hack for mongodb
1089 |             query = {
1090 |                     'result.status': hyperopt.STATUS_OK,
1091 |                     'exp_key': trials._exp_key,
1092 |                     }
1093 |             #docs = list(trials.handle.jobs.find(query, {'result.loss': 1}))
1094 |             docs = isvm_boosting.BoostHelper.query_MongoTrials(trials,
1095 |                     query={
1096 |                         'result.status': hyperopt.STATUS_OK,
1097 |                         #'misc.boosting.continues': {'$in': [None, parent_tid]},
1098 |                         })
1099 |         else:
1100 |             # -- impl for local trials object
1101 |             trials.refresh()
1102 |             docs = [d for d in trials.trials
1103 |                     if (
1104 |                         'result' in d
1105 |                         and d['result']['status'] == hyperopt.STATUS_OK
1106 |                         and d['exp_key'] == trials._exp_key
1107 |                         )]
1108 |         bh = isvm_boosting.BoostHelper(docs)
1109 |         cur_parent = bh.continues(ctrl.current_trial)
1110 |         cur_parent_tid = cur_parent['tid'] if cur_parent else None
1111 |         best_sibling = bh.best_child(cur_parent)
1112 |         if best_sibling:
1113 |             logger.info(
1114 |                     'view2_worth_calculating cur_parent:%s best_sibling:%s(%s)'
1115 |                     % (cur_parent_tid, best_sibling['tid'],
1116 |                         best_sibling['result']['loss']))
1117 |             if loss <= best_sibling['result']['loss']:
1118 |                 return True
1119 |             else:
1120 |                 if thresh_rank > 1:
1121 |                     raise NotImplementedError('thresh_rank')
1122 |                 return False
1123 |         else:
1124 |             logger.info(
1125 |                     'view2_worth_calculating cur_parent:%s, best_child:None'
1126 |                     % cur_parent_tid)
1127 |             return True
1128 |         #losses = [d['result']['loss'] for d in docs]
1129 |         #losses.sort()
1130 |         #if len(losses) < thresh_rank:
1131 |             #logger.info('worth_calculating_view2: True (small len(losses))')
1132 |             #return True
1133 |         #else:
1134 |             #rank = np.searchsorted(losses, loss)
1135 |             #rval = rank < thresh_rank
1136 |             #logger.info('worth_calculating_view2: %s (rank %i / %i)' % (
1137 |                 #rval, rank, len(losses)))
1138 |             #return rval
1139 | 
1140 | 
1141 | @pyll.scope.define
1142 | def average_row_l2norm(X):
1143 |     return np.sqrt((np.asarray(X) ** 2).sum(axis=1)).mean()
1144 | 
1145 | 


--------------------------------------------------------------------------------