├── __init__.py
├── README.md
├── LICENSE.md
└── multiisotonic.py


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # multiisotonic
2 | An interface for multidimensional isotonic regression consistent with scikit-learn.
3 | 
4 | In one dimension, points are completely ordered (i.e. every point is either greater than, less than, or equal to every other point). This case is handled by the sklearn.isotonic module. In the multidimensional case, a complete ordering of points is no longer generally possible, but points may still be partially ordered: If all of the coordinates of the first point are less than or equal to the coordinates of the second point, then the first point can be deemed less than or equal to the second. For example, in two dimensions, the points (1,3) and (2,2) are not ordered, but the point (1,3) is less than the point (2,4), which is less than the point (2,5). A multidimensional isotonic function is guaranteed to yield values that are nondecreasing when evaluated at a series of nondecreasing points; for example, f(1,3) <= f(2,4) <= f(2,5).
5 | 
6 | A multidimensional isotonic regression takes a set of multidimensional points X, with corresponding values y, and returns an isotonic function with minimum squared distance from y. Algorithmically, this turns out to be mappable to a network flow problem (see [Picard 1976](http://dx.doi.org/10.1287/mnsc.22.11.1268) or [Spouge, Wan, and Wilbur 2003](http://dx.doi.org/10.1023/A:1023901806339)). This procedure is sensitive only to feature ranks, not values.
7 | 
8 | This package requires [scikit-learn](https://github.com/scikit-learn/scikit-learn) and [python-igraph](https://github.com/igraph/python-igraph), and all sub-dependencies.
9 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015, Alexander P. Fields
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | * Neither the name of multiisotonic nor the names of its
15 |   contributors may be used to endorse or promote products derived from
16 |   this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/multiisotonic.py:
--------------------------------------------------------------------------------
  1 | # Author: Alex Fields (github.com/alexfields)
  2 | 
  3 | import numpy as np
  4 | from scipy import sparse
  5 | import igraph
  6 | from sklearn.base import BaseEstimator, RegressorMixin
  7 | from sklearn.utils.validation import NotFittedError, check_X_y, check_array
  8 | 
  9 | 
 10 | class MultiIsotonicRegressor(BaseEstimator, RegressorMixin):
 11 |     """Regress a target value as a non-decreasing function of each input attribute,
 12 |     when the other attributes are non-decreasing
 13 | 
 14 |     min_partition_size is the minimum allowable size to which to partition the
 15 |     training set, to avoid overfitting
 16 |     """
 17 |     def __init__(self, min_partition_size=1):
 18 |         self.min_partition_size = min_partition_size
 19 | 
 20 |     def fit(self, X, y):
 21 |         """Fit a multidimensional isotonic regression model
 22 | 
 23 |         Parameters
 24 |         ----------
 25 |         X : array-like, shape=(n_samples, n_features)
 26 |             Training data
 27 | 
 28 |         y : array-like, shape=(n_samples,)
 29 |             Target values
 30 | 
 31 |         Returns
 32 |         -------
 33 |         self : object
 34 |             Returns an instance of self
 35 |         """
 36 |         X, y = check_X_y(X, y, y_numeric=True)  # In principle, Infs would be OK, but better to complain and let the user handle it
 37 | 
 38 |         myorder = np.argsort(X[:, 0])  # order along the first axis to at least avoid some of the comparisons
 39 |         self._training_set = X[myorder, :]
 40 |         ysort = np.array(y, dtype=np.float64)[myorder]
 41 | 
 42 |         indices = []
 43 |         indptr = [0]
 44 |         for (i, Xrow) in enumerate(self._training_set[:, 1:]):
 45 |             indices.append(np.flatnonzero((Xrow <= self._training_set[i+1:, 1:]).all(1))+i+1)
 46 |             indptr.append(indptr[-1]+len(indices[-1]))
 47 |         all_comparisons = sparse.csr_matrix((np.ones(indptr[-1], dtype=np.bool), np.concatenate(indices), indptr),
 48 |                                             shape=(X.shape[0], X.shape[0]), dtype=np.bool)
 49 |         edges_to_add = zip(*(all_comparisons-all_comparisons.dot(all_comparisons)).nonzero())
 50 |         mygraph = igraph.Graph(n=y.size, edges=edges_to_add, directed=True, vertex_attrs={'y': ysort})
 51 | 
 52 |         def _add_source_sink(graph_part):
 53 |             """Add in the edges connecting the source and sink vertices to the internal nodes of the graph"""
 54 |             y_part = np.array(graph_part.vs['y'])
 55 |             y_part -= y_part.mean()
 56 |             maxval = np.abs(y_part).sum()+1
 57 |             vsrc = graph_part.vcount()
 58 |             vsnk = vsrc+1
 59 |             graph_part.add_vertices(2)
 60 |             src_snk_edges = [(vsrc, curr_v) if curr_y > 0 else (curr_v, vsnk) for (curr_v, curr_y) in enumerate(y_part)]
 61 |             n_internal_edges = graph_part.ecount()
 62 |             graph_part.add_edges(src_snk_edges)
 63 |             graph_part.es['c'] = ([maxval]*n_internal_edges)+list(np.abs(y_part))
 64 | 
 65 |         def _partition_graph(origV):
 66 |             """Recursively partition a subgraph (indexed by origV) according to the mincut algorithm
 67 | 
 68 |             Parameters
 69 |             ----------
 70 |             origV : list-like
 71 |                 A list of indices of mygraph corresponding to the subgraph to partition
 72 | 
 73 |             Returns
 74 |             -------
 75 |             partition : list of lists
 76 |                 A list of lists of indices indicating the final partitioning of the graph
 77 |             """
 78 |             currgraph = mygraph.subgraph(origV)
 79 |             _add_source_sink(currgraph)
 80 |             currpart = currgraph.mincut(currgraph.vcount()-2, currgraph.vcount()-1, 'c').partition
 81 |             if len(currpart[0])-1 < self.min_partition_size or len(currpart[1])-1 < self.min_partition_size:
 82 |                 # this partitioning would result in one of the sets being too small - so don't do it!
 83 |                 return [origV]
 84 |             else:
 85 |                 return _partition_graph([origV[idx] for idx in currpart[0][:-1]]) + _partition_graph([origV[idx] for idx in currpart[1][:-1]])
 86 | 
 87 |         nodes_to_cover = y.size
 88 |         self._training_set_scores = np.empty(y.size)
 89 |         for part in _partition_graph(range(y.size)):
 90 |             self._training_set_scores[part] = ysort[part].mean()
 91 |             nodes_to_cover -= len(part)
 92 |         assert nodes_to_cover == 0
 93 | 
 94 |         return self
 95 | 
 96 |     def predict(self, X):
 97 |         """Predict according to the isotonic fit
 98 | 
 99 |         Parameters
100 |         ----------
101 |         X : array-like, shape=(n_samples, n_features)
102 | 
103 |         Returns
104 |         -------
105 |         C : array, shape=(n_samples,)
106 |             Predicted values
107 |         """
108 |         if not hasattr(self, '_training_set'):
109 |             raise NotFittedError
110 |         X = check_array(X)
111 |         res = np.empty(X.shape[0])
112 |         minval = self._training_set_scores.min()  # when the features are below the entire training set, set to the minimum training set value
113 |         for (i, Xrow) in enumerate(X):
114 |             lower_training_set = (self._training_set <= Xrow).all(1)
115 |             if lower_training_set.any():
116 |                 res[i] = self._training_set_scores[lower_training_set].max()
117 |             else:
118 |                 res[i] = minval
119 |         return res
120 | 


--------------------------------------------------------------------------------