├── .gitignore
├── LICENSE
├── README.md
├── envelopment.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask instance folder
57 | instance/
58 | 
59 | # Sphinx documentation
60 | docs/_build/
61 | 
62 | # PyBuilder
63 | target/
64 | 
65 | # IPython Notebook
66 | .ipynb_checkpoints
67 | 
68 | # pyenv
69 | .python-version
70 | 
71 | # dotenv
72 | .env
73 | 
74 | # pycharm
75 | .idea/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Matus Luptak
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Data Envelopment Analysis
 2 | 
 3 | This repository implements the DEA (Data Envelopment Analysis) algorithm in Python, based on `numpy` (`scipy` stack).
 4 | The model is built on the specifications and formulas described and derived in Sherman and Zhu (2006, pp. 49-89).
 5 | 
 6 | ## Brief description
 7 | The model is estimated by minimizing the `theta` expression for each unit, where theta is `uy/vx`, subject to constraints as defined in
 8 | equation (2.2) in Sherman and Zhu. This dual linear program is optimized using *Sequential Least Squares Programming*, as implemented in `scipy` with the 
 9 | `scipy.optimize.fmin_slsqp` method. 
10 | 
11 | The model is optimized unit-by-unit, with weights being initialized randomly, with a uniform distribution between `-0.5` and `0.5`. 
12 | After each unit's weights are optimized and the algorithm converges, `theta` (efficiency) of that unit is calculated and saved.
13 | 
14 | After all units are fit, the `fit()` function prints `thetas` for each unit.
15 | 
16 | ## Sources
17 | Sherman and Zhu (2006) *Service Productivity Management*, Improving Service Performance using Data Envelopment Analysis (DEA) [Chapter 2]
18 | ISBN: 978-0-387-33211-6
19 | 


--------------------------------------------------------------------------------
/envelopment.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Data Envelopment Analysis implementation
  3 | 
  4 | Sources:
  5 | Sherman & Zhu (2006) Service Productivity Management, Improving Service Performance using Data Envelopment Analysis (DEA) [Chapter 2]
  6 | ISBN: 978-0-387-33211-6
  7 | http://deazone.com/en/resources/tutorial
  8 | 
  9 | """
 10 | 
 11 | import numpy as np
 12 | from scipy.optimize import fmin_slsqp
 13 | 
 14 | 
 15 | class DEA(object):
 16 | 
 17 |     def __init__(self, inputs, outputs):
 18 |         """
 19 |         Initialize the DEA object with input data
 20 |         n = number of entities (observations)
 21 |         m = number of inputs (variables, features)
 22 |         r = number of outputs
 23 |         :param inputs: inputs, n x m numpy array
 24 |         :param outputs: outputs, n x r numpy array
 25 |         :return: self
 26 |         """
 27 | 
 28 |         # supplied data
 29 |         self.inputs = inputs
 30 |         self.outputs = outputs
 31 | 
 32 |         # parameters
 33 |         self.n = inputs.shape[0]
 34 |         self.m = inputs.shape[1]
 35 |         self.r = outputs.shape[1]
 36 | 
 37 |         # iterators
 38 |         self.unit_ = range(self.n)
 39 |         self.input_ = range(self.m)
 40 |         self.output_ = range(self.r)
 41 | 
 42 |         # result arrays
 43 |         self.output_w = np.zeros((self.r, 1), dtype=np.float)  # output weights
 44 |         self.input_w = np.zeros((self.m, 1), dtype=np.float)  # input weights
 45 |         self.lambdas = np.zeros((self.n, 1), dtype=np.float)  # unit efficiencies
 46 |         self.efficiency = np.zeros_like(self.lambdas)  # thetas
 47 | 
 48 |         # names
 49 |         self.names = []
 50 | 
 51 |     def __efficiency(self, unit):
 52 |         """
 53 |         Efficiency function with already computed weights
 54 |         :param unit: which unit to compute for
 55 |         :return: efficiency
 56 |         """
 57 | 
 58 |         # compute efficiency
 59 |         denominator = np.dot(self.inputs, self.input_w)
 60 |         numerator = np.dot(self.outputs, self.output_w)
 61 | 
 62 |         return (numerator/denominator)[unit]
 63 | 
 64 |     def __target(self, x, unit):
 65 |         """
 66 |         Theta target function for one unit
 67 |         :param x: combined weights
 68 |         :param unit: which production unit to compute
 69 |         :return: theta
 70 |         """
 71 |         in_w, out_w, lambdas = x[:self.m], x[self.m:(self.m+self.r)], x[(self.m+self.r):]  # unroll the weights
 72 |         denominator = np.dot(self.inputs[unit], in_w)
 73 |         numerator = np.dot(self.outputs[unit], out_w)
 74 | 
 75 |         return numerator/denominator
 76 | 
 77 |     def __constraints(self, x, unit):
 78 |         """
 79 |         Constraints for optimization for one unit
 80 |         :param x: combined weights
 81 |         :param unit: which production unit to compute
 82 |         :return: array of constraints
 83 |         """
 84 | 
 85 |         in_w, out_w, lambdas = x[:self.m], x[self.m:(self.m+self.r)], x[(self.m+self.r):]  # unroll the weights
 86 |         constr = []  # init the constraint array
 87 | 
 88 |         # for each input, lambdas with inputs
 89 |         for input in self.input_:
 90 |             t = self.__target(x, unit)
 91 |             lhs = np.dot(self.inputs[:, input], lambdas)
 92 |             cons = t*self.inputs[unit, input] - lhs
 93 |             constr.append(cons)
 94 | 
 95 |         # for each output, lambdas with outputs
 96 |         for output in self.output_:
 97 |             lhs = np.dot(self.outputs[:, output], lambdas)
 98 |             cons = lhs - self.outputs[unit, output]
 99 |             constr.append(cons)
100 | 
101 |         # for each unit
102 |         for u in self.unit_:
103 |             constr.append(lambdas[u])
104 | 
105 |         return np.array(constr)
106 | 
107 |     def __optimize(self):
108 |         """
109 |         Optimization of the DEA model
110 |         Use: http://docs.scipy.org/doc/scipy-0.17.0/reference/generated/scipy.optimize.linprog.html
111 |         A = coefficients in the constraints
112 |         b = rhs of constraints
113 |         c = coefficients of the target function
114 |         :return:
115 |         """
116 |         d0 = self.m + self.r + self.n
117 |         # iterate over units
118 |         for unit in self.unit_:
119 |             # weights
120 |             x0 = np.random.rand(d0) - 0.5
121 |             x0 = fmin_slsqp(self.__target, x0, f_ieqcons=self.__constraints, args=(unit,))
122 |             # unroll weights
123 |             self.input_w, self.output_w, self.lambdas = x0[:self.m], x0[self.m:(self.m+self.r)], x0[(self.m+self.r):]
124 |             self.efficiency[unit] = self.__efficiency(unit)
125 | 
126 |     def name_units(self, names):
127 |         """
128 |         Provide names for units for presentation purposes
129 |         :param names: a list of names, equal in length to the number of units
130 |         :return: nothing
131 |         """
132 | 
133 |         assert(self.n == len(names))
134 | 
135 |         self.names = names
136 | 
137 |     def fit(self):
138 |         """
139 |         Optimize the dataset, generate basic table
140 |         :return: table
141 |         """
142 | 
143 |         self.__optimize()  # optimize
144 | 
145 |         print("Final thetas for each unit:\n")
146 |         print("---------------------------\n")
147 |         for n, eff in enumerate(self.efficiency):
148 |             if len(self.names) > 0:
149 |                 name = "Unit %s" % self.names[n]
150 |             else:
151 |                 name = "Unit %d" % (n+1)
152 |             print("%s theta: %.4f" % (name, eff))
153 |             print("\n")
154 |         print("---------------------------\n")
155 | 
156 | 
157 | if __name__ == "__main__":
158 |     X = np.array([
159 |         [20., 300.],
160 |         [30., 200.],
161 |         [40., 100.],
162 |         [20., 200.],
163 |         [10., 400.]
164 |     ])
165 |     y = np.array([
166 |         [1000.],
167 |         [1000.],
168 |         [1000.],
169 |         [1000.],
170 |         [1000.]
171 |     ])
172 |     names = [
173 |         'Bratislava',
174 |         'Zilina',
175 |         'Kosice',
176 |         'Presov',
177 |         'Poprad'
178 |     ]
179 |     dea = DEA(X,y)
180 |     dea.name_units(names)
181 |     dea.fit()
182 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.11.0
2 | scipy>=0.16.1


--------------------------------------------------------------------------------