├── .gitignore ├── LICENSE ├── README.md ├── envelopment.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask instance folder 57 | instance/ 58 | 59 | # Sphinx documentation 60 | docs/_build/ 61 | 62 | # PyBuilder 63 | target/ 64 | 65 | # IPython Notebook 66 | .ipynb_checkpoints 67 | 68 | # pyenv 69 | .python-version 70 | 71 | # dotenv 72 | .env 73 | 74 | # pycharm 75 | .idea/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Matus Luptak 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data Envelopment Analysis 2 | 3 | This repository implements the DEA (Data Envelopment Analysis) algorithm in Python, based on `numpy` (`scipy` stack). 4 | The model is built on the specifications and formulas described and derived in Sherman and Zhu (2006, pp. 49-89). 5 | 6 | ## Brief description 7 | The model is estimated by minimizing the `theta` expression for each unit, where theta is `uy/vx`, subject to constraints as defined in 8 | equation (2.2) in Sherman and Zhu. This dual linear program is optimized using *Sequential Least Squares Programming*, as implemented in `scipy` with the 9 | `scipy.optimize.fmin_slsqp` method. 10 | 11 | The model is optimized unit-by-unit, with weights being initialized randomly, with a uniform distribution between `-0.5` and `0.5`. 12 | After each unit's weights are optimized and the algorithm converges, `theta` (efficiency) of that unit is calculated and saved. 13 | 14 | After all units are fit, the `fit()` function prints `thetas` for each unit. 15 | 16 | ## Sources 17 | Sherman and Zhu (2006) *Service Productivity Management*, Improving Service Performance using Data Envelopment Analysis (DEA) [Chapter 2] 18 | ISBN: 978-0-387-33211-6 19 | -------------------------------------------------------------------------------- /envelopment.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data Envelopment Analysis implementation 3 | 4 | Sources: 5 | Sherman & Zhu (2006) Service Productivity Management, Improving Service Performance using Data Envelopment Analysis (DEA) [Chapter 2] 6 | ISBN: 978-0-387-33211-6 7 | http://deazone.com/en/resources/tutorial 8 | 9 | """ 10 | 11 | import numpy as np 12 | from scipy.optimize import fmin_slsqp 13 | 14 | 15 | class DEA(object): 16 | 17 | def __init__(self, inputs, outputs): 18 | """ 19 | Initialize the DEA object with input data 20 | n = number of entities (observations) 21 | m = number of inputs (variables, features) 22 | r = number of outputs 23 | :param inputs: inputs, n x m numpy array 24 | :param outputs: outputs, n x r numpy array 25 | :return: self 26 | """ 27 | 28 | # supplied data 29 | self.inputs = inputs 30 | self.outputs = outputs 31 | 32 | # parameters 33 | self.n = inputs.shape[0] 34 | self.m = inputs.shape[1] 35 | self.r = outputs.shape[1] 36 | 37 | # iterators 38 | self.unit_ = range(self.n) 39 | self.input_ = range(self.m) 40 | self.output_ = range(self.r) 41 | 42 | # result arrays 43 | self.output_w = np.zeros((self.r, 1), dtype=np.float) # output weights 44 | self.input_w = np.zeros((self.m, 1), dtype=np.float) # input weights 45 | self.lambdas = np.zeros((self.n, 1), dtype=np.float) # unit efficiencies 46 | self.efficiency = np.zeros_like(self.lambdas) # thetas 47 | 48 | # names 49 | self.names = [] 50 | 51 | def __efficiency(self, unit): 52 | """ 53 | Efficiency function with already computed weights 54 | :param unit: which unit to compute for 55 | :return: efficiency 56 | """ 57 | 58 | # compute efficiency 59 | denominator = np.dot(self.inputs, self.input_w) 60 | numerator = np.dot(self.outputs, self.output_w) 61 | 62 | return (numerator/denominator)[unit] 63 | 64 | def __target(self, x, unit): 65 | """ 66 | Theta target function for one unit 67 | :param x: combined weights 68 | :param unit: which production unit to compute 69 | :return: theta 70 | """ 71 | in_w, out_w, lambdas = x[:self.m], x[self.m:(self.m+self.r)], x[(self.m+self.r):] # unroll the weights 72 | denominator = np.dot(self.inputs[unit], in_w) 73 | numerator = np.dot(self.outputs[unit], out_w) 74 | 75 | return numerator/denominator 76 | 77 | def __constraints(self, x, unit): 78 | """ 79 | Constraints for optimization for one unit 80 | :param x: combined weights 81 | :param unit: which production unit to compute 82 | :return: array of constraints 83 | """ 84 | 85 | in_w, out_w, lambdas = x[:self.m], x[self.m:(self.m+self.r)], x[(self.m+self.r):] # unroll the weights 86 | constr = [] # init the constraint array 87 | 88 | # for each input, lambdas with inputs 89 | for input in self.input_: 90 | t = self.__target(x, unit) 91 | lhs = np.dot(self.inputs[:, input], lambdas) 92 | cons = t*self.inputs[unit, input] - lhs 93 | constr.append(cons) 94 | 95 | # for each output, lambdas with outputs 96 | for output in self.output_: 97 | lhs = np.dot(self.outputs[:, output], lambdas) 98 | cons = lhs - self.outputs[unit, output] 99 | constr.append(cons) 100 | 101 | # for each unit 102 | for u in self.unit_: 103 | constr.append(lambdas[u]) 104 | 105 | return np.array(constr) 106 | 107 | def __optimize(self): 108 | """ 109 | Optimization of the DEA model 110 | Use: http://docs.scipy.org/doc/scipy-0.17.0/reference/generated/scipy.optimize.linprog.html 111 | A = coefficients in the constraints 112 | b = rhs of constraints 113 | c = coefficients of the target function 114 | :return: 115 | """ 116 | d0 = self.m + self.r + self.n 117 | # iterate over units 118 | for unit in self.unit_: 119 | # weights 120 | x0 = np.random.rand(d0) - 0.5 121 | x0 = fmin_slsqp(self.__target, x0, f_ieqcons=self.__constraints, args=(unit,)) 122 | # unroll weights 123 | self.input_w, self.output_w, self.lambdas = x0[:self.m], x0[self.m:(self.m+self.r)], x0[(self.m+self.r):] 124 | self.efficiency[unit] = self.__efficiency(unit) 125 | 126 | def name_units(self, names): 127 | """ 128 | Provide names for units for presentation purposes 129 | :param names: a list of names, equal in length to the number of units 130 | :return: nothing 131 | """ 132 | 133 | assert(self.n == len(names)) 134 | 135 | self.names = names 136 | 137 | def fit(self): 138 | """ 139 | Optimize the dataset, generate basic table 140 | :return: table 141 | """ 142 | 143 | self.__optimize() # optimize 144 | 145 | print("Final thetas for each unit:\n") 146 | print("---------------------------\n") 147 | for n, eff in enumerate(self.efficiency): 148 | if len(self.names) > 0: 149 | name = "Unit %s" % self.names[n] 150 | else: 151 | name = "Unit %d" % (n+1) 152 | print("%s theta: %.4f" % (name, eff)) 153 | print("\n") 154 | print("---------------------------\n") 155 | 156 | 157 | if __name__ == "__main__": 158 | X = np.array([ 159 | [20., 300.], 160 | [30., 200.], 161 | [40., 100.], 162 | [20., 200.], 163 | [10., 400.] 164 | ]) 165 | y = np.array([ 166 | [1000.], 167 | [1000.], 168 | [1000.], 169 | [1000.], 170 | [1000.] 171 | ]) 172 | names = [ 173 | 'Bratislava', 174 | 'Zilina', 175 | 'Kosice', 176 | 'Presov', 177 | 'Poprad' 178 | ] 179 | dea = DEA(X,y) 180 | dea.name_units(names) 181 | dea.fit() 182 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.11.0 2 | scipy>=0.16.1 --------------------------------------------------------------------------------