├── pyforma
    ├── __init__.py
    ├── tests
    │   ├── __init__.py
    │   └── test_pyforma.py
    └── pyforma.py
├── .gitignore
├── setup.py
├── .travis.yml
├── LICENSE
└── README.md


/pyforma/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pyforma/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | __pycache__
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='pyforma',
 5 |     version='0.1dev',
 6 |     description='Installs and runs pyforma.',
 7 |     author='Oakland Analytics',
 8 |     author_email='oaklandanalytics@gmail.com',
 9 |     license='BSD',
10 |     url='https://github.com/fscottfoti/pyforma',
11 |     classifiers=[
12 |         'Development Status :: 4 - Beta',
13 |         'Programming Language :: Python :: 2.7',
14 |         'License :: OSI Approved :: BSD License'
15 |     ],
16 |     packages=find_packages(exclude=['*.tests'])
17 | )
18 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "2.7"
 4 | install:
 5 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh
 6 |   -O miniconda.sh; else wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
 7 |   -O miniconda.sh; fi
 8 | - bash miniconda.sh -b -p $HOME/miniconda
 9 | - export PATH="$HOME/miniconda/bin:$PATH"
10 | - hash -r
11 | - conda config --set always_yes yes --set changeps1 no
12 | - conda update -q conda
13 | - conda info -a
14 | - |
15 |   conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION pandas pip pytables pytest
16 | - source activate test-environment
17 | - pip install pytest-cov coveralls pep8
18 | script:
19 | - pep8 .
20 | - py.test --cov pyforma --cov-report term-missing
21 | after_success:
22 | - coveralls
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016, Fletcher Foti
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | * Neither the name of pyforma nor the names of its
15 |   contributors may be used to endorse or promote products derived from
16 |   this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/pyforma/pyforma.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | 
  4 | 
  5 | def describe_cartesian_product(*args):
  6 |     """
  7 |     Returns a string to describe
  8 |     """
  9 |     n = reduce(lambda x, y: x * y, [len(s) for s in args])
 10 |     s = reduce(lambda x, y: str(x) + " x " + str(y),
 11 |                [len(s) for s in args]) + " = " + str(n)
 12 | 
 13 |     return s
 14 | 
 15 | 
 16 | def cartesian_product(*args):
 17 |     """
 18 |     Return the cartesion product of multiple series as a dataframe -
 19 |     just pass in the series as arguments (see the test)
 20 |     """
 21 | 
 22 |     dfs = [pd.DataFrame({s.name: s, "key": 0}) for s in args]
 23 |     df = reduce(
 24 |         lambda df1, df2: df1.merge(df2, how='left', on='key'),
 25 |         dfs)
 26 |     df.drop('key', 1, inplace=True)   # drop temp key
 27 |     return df
 28 | 
 29 | 
 30 | def price_per_sqft_with_affordable_housing(
 31 |     price_per_sqft,
 32 |     sqft_per_unit,
 33 |     AMI,
 34 |     depth_of_affordability,
 35 |     price_multiplier,
 36 |     cap_rate,
 37 |     pct_affordable_units
 38 | ):
 39 | 
 40 |     AMI *= depth_of_affordability
 41 | 
 42 |     monthly_payment = AMI * .33 / 12 * price_multiplier
 43 | 
 44 |     value_of_payment = monthly_payment * 12 / cap_rate
 45 | 
 46 |     affordable_price_per_sqft = value_of_payment / sqft_per_unit
 47 | 
 48 |     blended_price_per_sqft = \
 49 |         pct_affordable_units * affordable_price_per_sqft + \
 50 |         (1-pct_affordable_units) * price_per_sqft
 51 | 
 52 |     return blended_price_per_sqft
 53 | 
 54 | 
 55 | def average_unit_size(cfg):
 56 |     """
 57 |     Compute the overall average unit size, combining the unit mix
 58 |     and sizes per unit
 59 |     """
 60 | 
 61 |     sizes = 0
 62 |     for use_type, mix in \
 63 |             zip(cfg["use_mix"]["use_types"], cfg["use_mix"]["mix"]):
 64 | 
 65 |         sizes += cfg["use_types"][use_type]["size"] * mix
 66 | 
 67 |     return sizes
 68 | 
 69 | 
 70 | def spot_residential_sales_proforma(cfg):
 71 |     """
 72 |     This takes a hierarchical Python object of a certain form and
 73 |     passes back another Python object.  Documenting the structure
 74 |     is not well suited to pydocs - see the Readme instead.
 75 |     """
 76 | 
 77 |     parcel_acres = cfg["parcel_size"] / 43560.0
 78 | 
 79 |     num_units_by_type = {"residential_units": 0}
 80 | 
 81 |     # compute basic measures for floor area in units
 82 |     usable_floor_area = 0
 83 |     revenue = 0
 84 |     parking_spaces = 0
 85 |     for use_type, mix in \
 86 |             zip(cfg["use_mix"]["use_types"], cfg["use_mix"]["mix"]):
 87 | 
 88 |         # this allow non-int numbers of units
 89 |         num_units = mix * cfg["built_dua"] * parcel_acres
 90 | 
 91 |         num_units_by_type["residential_units"] += num_units
 92 | 
 93 |         num_units_by_type[use_type + "_num_units"] = num_units
 94 | 
 95 |         use_cfg = cfg["use_types"][use_type]
 96 | 
 97 |         usable_floor_area += use_cfg["size"] * num_units
 98 | 
 99 |         if "affordable_housing" in cfg:
100 | 
101 |             aff_cfg = cfg["affordable_housing"]
102 |             price_per_sqft = price_per_sqft_with_affordable_housing(
103 |                 use_cfg["price_per_sqft"],
104 |                 use_cfg["size"],
105 |                 aff_cfg["AMI"],
106 |                 aff_cfg.get("depth_of_affordability", 1.0),
107 |                 aff_cfg["price_multiplier_by_type"][use_type],
108 |                 cfg["cap_rate"],
109 |                 aff_cfg["pct_affordable_units"]
110 |             )
111 | 
112 |         else:
113 | 
114 |             price_per_sqft = use_cfg["price_per_sqft"]
115 | 
116 |         revenue += use_cfg["size"] * price_per_sqft * num_units
117 |         parking_spaces += use_cfg["parking_ratio"] * num_units
118 | 
119 |     # add in ground floor measures
120 |     if "ground_floor" in cfg["use_mix"]:
121 | 
122 |         # this assumes ground floor is non-res - is that ok?
123 |         ground_floor = cfg["use_mix"]["ground_floor"]
124 |         ground_floor_type = ground_floor["use"]
125 |         ground_floor_size = ground_floor["size"]
126 | 
127 |         usable_floor_area += ground_floor_size
128 |         use_cfg = cfg["use_types"][ground_floor_type]
129 |         revenue_from_ground_floor = \
130 |             use_cfg["rent_per_sqft"] / cfg["cap_rate"] * ground_floor_size
131 |         revenue += revenue_from_ground_floor
132 |         parking_spaces += ground_floor_size / cfg["non_res_parking_denom"] * \
133 |             use_cfg["parking_ratio"]
134 | 
135 |     # now compute parking attributes for the building so far
136 |     parking_type = cfg["parking_type"]
137 |     parking_cfg = cfg["parking_types"][parking_type]
138 |     parking_area = parking_spaces * parking_cfg["space_size"]
139 |     parking_cost = parking_area * parking_cfg["space_cost_sqft"]
140 | 
141 |     floor_area_including_common_space = \
142 |         usable_floor_area / cfg["building_efficiency"]
143 | 
144 |     # compute the building footprint
145 | 
146 |     max_footprint = cfg["parcel_size"] * cfg["parcel_efficiency"]
147 | 
148 |     if parking_type == "surface":
149 |         if max_footprint - parking_area < .1 * cfg["parcel_size"]:
150 |             # building has to be 10% of the parcel
151 |             raise Error("Parking covers >90%% of the parcel")
152 |         max_footprint -= parking_area
153 |         total_floor_area = floor_area_including_common_space
154 | 
155 |     elif parking_type == "deck":
156 |         total_floor_area = floor_area_including_common_space + parking_area
157 | 
158 |     elif parking_type == "underground":
159 |         total_floor_area = floor_area_including_common_space
160 | 
161 |     stories = np.ceil(total_floor_area / max_footprint)
162 |     footprint_size = total_floor_area / stories
163 | 
164 |     # now compute costs
165 |     building_type = cfg["building_types"][cfg["building_type"]]
166 |     cost = floor_area_including_common_space * \
167 |         building_type["cost_per_sqft"] * cfg["cost_shifter"] + \
168 |         parking_cost + cfg["parcel_acquisition_cost"]
169 | 
170 |     profit = revenue - cost
171 | 
172 |     # check against max_dua
173 |     failure_dua = cfg["built_dua"] > cfg["max_dua"] \
174 |         if "max_dua" in cfg else False
175 | 
176 |     # check against max_far
177 |     built_far = total_floor_area / cfg["parcel_size"]
178 |     if "max_far" in cfg:
179 |         failure_far = built_far > cfg["max_far"]
180 | 
181 |     # check against max_height
182 |     height = stories * cfg["height_per_story"]
183 |     if "max_height" in cfg:
184 |         failure_height = height > cfg["max_height"]
185 | 
186 |     # check against buiding type densities
187 |     failure_btype = \
188 |         (cfg["built_dua"] < building_type["allowable_densities"][0]) | \
189 |         (cfg["built_dua"] > building_type["allowable_densities"][1])
190 | 
191 |     out = {
192 |         "built_far": built_far,
193 |         "height": height,
194 |         "usable_floor_area": usable_floor_area,
195 |         "floor_area_including_common_space": floor_area_including_common_space,
196 |         "ground_floor_type": ground_floor_type,
197 |         "ground_floor_size": ground_floor_size,
198 |         "footprint_size": footprint_size,
199 |         "revenue_from_ground_floor": revenue_from_ground_floor,
200 |         "parking_type": parking_type,
201 |         "parking_spaces": parking_spaces,
202 |         "parking_area": parking_area,
203 |         "parking_cost": parking_cost,
204 |         "total_floor_area": total_floor_area,
205 |         "revenue": revenue,
206 |         "cost": cost,
207 |         "profit": profit,
208 |         "stories": stories,
209 |         "failure_dua": failure_dua,
210 |         "failure_far": failure_far,
211 |         "failure_height": failure_height,
212 |         "failure_btype": failure_btype,
213 |         "building_type": cfg["building_type"]
214 |     }
215 | 
216 |     for k, v in num_units_by_type.iteritems():
217 |         out[k] = v
218 | 
219 |     if "affordable_housing" in cfg:
220 |         out["affordable_units"] = out["residential_units"] * \
221 |             cfg["affordable_housing"]["pct_affordable_units"]
222 | 
223 |     return out
224 | 


--------------------------------------------------------------------------------
/pyforma/tests/test_pyforma.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | 
  4 | import pandas as pd
  5 | import numpy as np
  6 | import pytest
  7 | import pprint
  8 | 
  9 | from .. import pyforma
 10 | 
 11 | pp = pprint.PrettyPrinter(indent=4)
 12 | 
 13 | 
 14 | def assert_almost_equal(*args):
 15 |     for i in range(1, len(args)):
 16 |         assert abs(args[0] - args[i]) < .001
 17 | 
 18 | 
 19 | @pytest.fixture
 20 | def pro_forma_config_basic():
 21 |     return {
 22 |         "use_types": {
 23 |             "0br": {
 24 |                 "price_per_sqft": 600,
 25 |                 "size": 600,
 26 |                 "parking_ratio": .3
 27 |             },
 28 |             "1br": {
 29 |                 "price_per_sqft": 650,
 30 |                 "size": 750,
 31 |                 "parking_ratio": 1.0
 32 |             },
 33 |             "2br": {
 34 |                 "price_per_sqft": 700,
 35 |                 "size": 850,
 36 |                 "parking_ratio": 1.5
 37 |             },
 38 |             "3br+": {
 39 |                 "price_per_sqft": 750,
 40 |                 "size": 1000,
 41 |                 "parking_ratio": 2
 42 |             },
 43 |             "retail": {
 44 |                 "rent_per_sqft": 3,
 45 |                 "parking_ratio": 2
 46 |             }
 47 |         },
 48 |         "parking_types": {
 49 |             "surface": {
 50 |                 "space_size": 300,
 51 |                 "space_cost_sqft": 30
 52 |             },
 53 |             "deck": {
 54 |                 "space_size": 250,
 55 |                 "space_cost_sqft": 90
 56 |             },
 57 |             "underground": {
 58 |                 "space_size": 250,
 59 |                 "space_cost_sqft": 110
 60 |             }
 61 |         },
 62 |         "building_types": {
 63 |             "garden_apartments": {
 64 |                 "cost_per_sqft": 400,
 65 |                 "allowable_densities": [5, 15]
 66 |             }, "fancy_condos": {
 67 |                 "cost_per_sqft": 800,
 68 |                 "allowable_densities": [10, 20]
 69 |             }, "ground_floor_retail": {
 70 |                 "cost_per_sqft": 600
 71 |             }
 72 |         },
 73 |         "parcel_size": 43560,
 74 |         "cap_rate": .06,
 75 |         "max_far": 1.2,
 76 |         "max_height": 20,
 77 |         "height_per_story": 12,
 78 |         "parcel_efficiency": .8,
 79 |         "building_efficiency": .8,
 80 |         "cost_shifter": 1.2,
 81 |         "parcel_acquisition_cost": 1000000,
 82 |         "non_res_parking_denom": 1000,
 83 |         "use_mix": {
 84 |             "use_types": ["0br", "1br", "2br"],
 85 |             "mix": [.3, .3, .4],
 86 |             "ground_floor": {
 87 |                 "use": "retail",
 88 |                 "size": 3000
 89 |             }
 90 |         },
 91 |         "absorption_in_months": 20,  # XXX not used yet
 92 |         "parking_type": "deck",
 93 |         "building_type": "garden_apartments",
 94 |         "built_dua": 10
 95 |     }
 96 | 
 97 | 
 98 | def test_cartesian_product():
 99 | 
100 |     df = pyforma.cartesian_product(
101 |         pd.Series([5, 10, 30], name="dua"),
102 |         pd.Series([1, 1.5, 2], name="far"),
103 |         pd.Series([1000, 2000, 3000], name="parcel_sizes"),
104 |         pd.Series([500, 600], name="price_per_sqft")
105 |     )
106 | 
107 |     assert len(df) == 3 * 3 * 3 * 2
108 | 
109 |     assert df.price_per_sqft.value_counts().loc[500] == 3 * 3 * 3
110 | 
111 |     assert df.dua.value_counts().loc[5] == 3 * 3 * 2
112 | 
113 |     assert len(df.query("dua == 5 and far == 1.5 and parcel_sizes == 1000" +
114 |                         " and price_per_sqft == 600")) == 1
115 | 
116 | 
117 | def test_performance_of_vectorized(pro_forma_config_basic):
118 | 
119 |     cfg = pro_forma_config_basic
120 | 
121 |     series = [
122 |         pd.Series(np.arange(1, 300, 5), name="dua"),
123 |         pd.Series(np.arange(.25, 8, .5), name="far"),
124 |         pd.Series(np.arange(1000, 100000, 50000), name="parcel_size"),
125 |         pd.Series(np.arange(500, 2000, 500), name="price_per_sqft")
126 |     ]
127 |     df = pyforma.cartesian_product(*series)
128 | 
129 |     cfg["parcel_size"] = df.parcel_size
130 |     cfg["max_dua"] = df.dua
131 |     cfg["max_far"] = df.far
132 |     cfg["use_types"]["2br"]["price_per_sqft"] = df.price_per_sqft
133 | 
134 |     t1 = time.time()
135 |     ret = pyforma.spot_residential_sales_proforma(cfg)
136 |     elapsed1 = time.time() - t1
137 | 
138 |     t1 = time.time()
139 |     for index, row in df.iterrows():
140 |         cfg["parcel_size"] = row.parcel_size
141 |         cfg["max_dua"] = row.dua
142 |         cfg["max_far"] = row.far
143 |         cfg["use_types"]["2br"]["price_per_sqft"] = row.price_per_sqft
144 |         ret = pyforma.spot_residential_sales_proforma(pro_forma_config_basic)
145 |     elapsed2 = time.time() - t1
146 | 
147 |     factor = elapsed2 / elapsed1
148 | 
149 |     # if you run enough pro formas in a batch, it's 900x faster to run
150 |     # the pandas version than to run them one by one - when you run
151 |     # fewer pro formas, like you kind of have to do in a unit test, it
152 |     # will only be 300x faster as is asserted here
153 |     assert factor > 100
154 | 
155 | 
156 | def test_different_parking_types(pro_forma_config_basic):
157 | 
158 |     cfg = pro_forma_config_basic
159 | 
160 |     d = {}
161 |     for parking in ["surface", "deck", "underground"]:
162 | 
163 |         cfg["parking_type"] = parking
164 |         d[parking] = \
165 |             pyforma.spot_residential_sales_proforma(pro_forma_config_basic)
166 | 
167 |     assert d["surface"]["parking_spaces"] == d["deck"]["parking_spaces"] == \
168 |         d["underground"]["parking_spaces"]
169 | 
170 |     spaces = d["surface"]["parking_spaces"]
171 |     assert d["surface"]["parking_area"] == \
172 |         spaces * cfg["parking_types"]["surface"]["space_size"]
173 | 
174 |     # surface pushes building up, underground keeps is low
175 |     assert d["surface"]["stories"] >= d["deck"]["stories"] >= \
176 |         d["underground"]["stories"]
177 | 
178 |     parking_far = d["deck"]["parking_area"] / cfg["parcel_size"]
179 |     # these don't perfectly equal so do this weird subtraction
180 |     assert d["deck"]["built_far"] - \
181 |         parking_far - d["underground"]["built_far"] < .01
182 |     assert d["deck"]["built_far"] - \
183 |         parking_far - d["surface"]["built_far"] < .01
184 | 
185 |     assert -1 * (d["deck"]["profit"] - d["surface"]["profit"]) == \
186 |         d["deck"]["parking_area"] * \
187 |         cfg["parking_types"]["deck"]["space_cost_sqft"] - \
188 |         d["surface"]["parking_area"] * \
189 |         cfg["parking_types"]["surface"]["space_cost_sqft"]
190 | 
191 | 
192 | def test_pyforma_basic_vectorized(pro_forma_config_basic):
193 | 
194 |     cfg = pro_forma_config_basic
195 | 
196 |     series = [
197 |         pd.Series(np.arange(1, 300, 5), name="dua"),
198 |         pd.Series(np.arange(.25, 8, .5), name="far"),
199 |         pd.Series(np.arange(1000, 100000, 10000), name="parcel_size"),
200 |         pd.Series(np.arange(500, 2000, 250), name="price_per_sqft")
201 |     ]
202 |     df = pyforma.cartesian_product(*series)
203 |     print pyforma.describe_cartesian_product(*series)
204 | 
205 |     pro_forma_config_basic["parcel_size"] = df.parcel_size
206 |     pro_forma_config_basic["use_types"]["2br"]["price_per_sqft"] = \
207 |         df.price_per_sqft
208 | 
209 |     t1 = time.time()
210 |     ret = pyforma.spot_residential_sales_proforma(pro_forma_config_basic)
211 |     t2 = time.time()
212 |     assert t2 - t1 < 1.0
213 | 
214 |     print "Ran {} pro forma in {:.2f}s".format(len(df), t2-t1)
215 | 
216 |     ret = pd.DataFrame(ret)
217 | 
218 |     one, two, three = df.loc[0], df.loc[1], df.loc[2]
219 |     # only thing in the assumptions that's different is the price
220 |     assert one.dua == two.dua == three.dua
221 |     assert one.far == two.far == three.far
222 |     assert one.parcel_size == two.parcel_size == three.parcel_size
223 |     assert one.price_per_sqft + 500 == two.price_per_sqft + 250 == \
224 |         three.price_per_sqft
225 | 
226 |     # since the only thing that's different is the price, the revenue
227 |     # and profit should be different by the number of 2brs, the size of
228 |     # the 2 brs, and the difference in the price per size
229 |     one, two, three = ret.iloc[0], ret.iloc[1], ret.iloc[2]
230 |     assert_almost_equal(5.52, one.built_far, two.built_far, three.built_far)
231 |     assert_almost_equal(
232 |         three.revenue - two.revenue,
233 |         one["2br_num_units"] * cfg["use_types"]["2br"]["size"] * 250)
234 |     assert_almost_equal(
235 |         two.profit - one.profit,
236 |         one["2br_num_units"] * cfg["use_types"]["2br"]["size"] * 250)
237 | 
238 | 
239 | def test_pyforma_basic(pro_forma_config_basic):
240 | 
241 |     ret = pyforma.spot_residential_sales_proforma(pro_forma_config_basic)
242 | 
243 |     assert ret["usable_floor_area"] == 3 * 600 + 3 * 750 + 4 * 850 + 3000
244 | 
245 |     assert ret["stories"] == 1
246 | 
247 |     assert ret["floor_area_including_common_space"] == \
248 |         ret["usable_floor_area"] / .8
249 | 
250 |     assert ret["parking_spaces"] == \
251 |         3 * .3 + 3 * 1 + 4 * 1.5 + 3000 / 1000.0 * 2
252 | 
253 |     assert ret["revenue_from_ground_floor"] == 3000 * 3 / .06
254 | 
255 |     assert ret["revenue"] == 3 * 600 * 600 + 3 * 750 * 650 + 4 * 850 * 700 + \
256 |         ret["revenue_from_ground_floor"]
257 | 
258 |     assert ret["ground_floor_type"] == "retail"
259 | 
260 |     assert ret["profit"] == ret["revenue"] - ret["cost"]
261 | 
262 |     assert ret["parking_type"] == "deck"
263 | 
264 |     assert ret["parking_area"] == ret["parking_spaces"] * 250
265 | 
266 |     assert ret["total_floor_area"] == \
267 |         ret["floor_area_including_common_space"] + ret["parking_area"]
268 | 
269 |     assert ret["footprint_size"] == ret["total_floor_area"] / ret["stories"]
270 | 
271 |     assert ret["parking_cost"] == ret["parking_area"] * 90
272 | 
273 |     assert ret["cost"] == ret["parking_cost"] + \
274 |         ret["floor_area_including_common_space"] * 400 * 1.2 + 1000000
275 | 
276 |     assert ret["building_type"] == "garden_apartments"
277 | 
278 |     assert round(ret["built_far"], 2) == 0.39
279 | 
280 |     assert ret["height"] == 12
281 | 
282 |     assert "failure_height" in ret
283 | 
284 |     assert "failure_far" in ret
285 | 
286 | 
287 | def test_average_unit_size(pro_forma_config_basic):
288 | 
289 |     ret = pyforma.average_unit_size(pro_forma_config_basic)
290 | 
291 |     assert ret == 600 * .3 + 750 * .3 + 850 * .4
292 | 
293 | 
294 | def test_affordable_housing(pro_forma_config_basic):
295 | 
296 |     price_per_sqft_in = pd.Series([400, 500, 600])
297 | 
298 |     price_per_sqft = pyforma.price_per_sqft_with_affordable_housing(
299 |         price_per_sqft_in,           # price per sqft
300 |         1000,                        # sqft per unit
301 |         125000,                      # AMI
302 |         .8,                          # pct of AMI
303 |         .75,                         # price multiplier for 1BR
304 |         .05,                         # interest rate
305 |         0                            # pct of affordable units
306 |     )
307 | 
308 |     # should be equal if it's 100% not affordable passed in
309 |     np.testing.assert_array_equal(price_per_sqft_in, price_per_sqft)
310 | 
311 |     price_per_sqft = pyforma.price_per_sqft_with_affordable_housing(
312 |         price_per_sqft_in,           # price per sqft
313 |         1000,                        # sqft per unit
314 |         125000,                      # AMI
315 |         .8,                          # pct of AMI
316 |         .75,                         # price multiplier for 1BR
317 |         .05,                         # interest rate
318 |         1.0                          # pct of affordable units
319 |     )
320 | 
321 |     # for these numbers, this is the price per sqft affordable
322 |     np.testing.assert_allclose(price_per_sqft, [495]*3)
323 | 
324 |     # now test a blended version
325 |     price_per_sqft = pyforma.price_per_sqft_with_affordable_housing(
326 |         price_per_sqft_in,           # price per sqft
327 |         1000,                        # sqft per unit
328 |         125000,                      # AMI
329 |         .8,                          # pct of AMI
330 |         .75,                         # price multiplier for 1BR
331 |         .05,                         # interest rate
332 |         .25                          # pct of affordable units
333 |     )
334 | 
335 |     # for these numbers, this is the price per sqft affordable
336 |     np.testing.assert_allclose(price_per_sqft, [423.75,  498.75,  573.75])
337 | 
338 |     # test running through the json api too
339 |     pro_forma_config_basic["affordable_housing"] = {
340 |         "AMI": 80000,
341 |         "depth_of_affordability": .8,
342 |         "pct_affordable_units": .2,
343 |         "price_multiplier_by_type": {
344 |             "0br": .7,
345 |             "1br": .75,
346 |             "2br": .9,
347 |             "3br+": 1.04
348 |         }
349 |     }
350 |     ret = pyforma.spot_residential_sales_proforma(pro_forma_config_basic)
351 | 
352 |     assert ret["affordable_units"] == .2 * ret["residential_units"]
353 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # pyforma
  2 | 
  3 | Real estate pro formas in Python
  4 | 
  5 | [![Build Status](https://travis-ci.org/oaklandanalytics/pyforma.svg?branch=master)](https://travis-ci.org/oaklandanalytics/pyforma) [![Coverage Status](https://coveralls.io/repos/github/oaklandanalytics/pyforma/badge.svg?branch=master)](https://coveralls.io/github/oaklandanalytics/pyforma?branch=master)
  6 | 
  7 | 
  8 | The pro formas contained in this project are taught in real estate analysis classes and predict which kinds of building(s) can be built on a plot of land by summing inflows and outflows of cash that will be gained and lost while constructing and selling a building.
  9 | 
 10 | Pro formas range from exceptionally simple to extraordinarily complex.  Most of the pro formas contained in this project are relatively simple as the purpose of this project is to run pro formas over large parts of a city to learn, for instance, the impact of an affordable housing policy on the number of housing units generated across the entire city.
 11 | 
 12 | From a programming perspective, this project hopes to write a reasonable API to execute pro formas in code, rather than the de facto standard for pro formas, which is Excel.  The API we chose is a hierarchical object, which clearly takes some inspiration from Javascript (this is the de rigueur appoach in JS), but the first implementation is written in Python as we think Python is a simpler language to get started running basic data science applications.  A javascript version of many of these pro formas will eventually be written so as to perform analysis in an interactive fashion directly in the browser.
 13 | 
 14 | Additionally, after many years of working with Python Pandas, I've determined that running vectorized financial analysis is challenging to both read and write.  The typical real estate analyst comes from an Excel background, and the code he or she reads and writes should not be complicated vector and matrix operations (e.g. argmax or dot product or matrix multiply).  On the other hand, Pandas runs roughly 900 times faster than a simple Python "for" loop.
 15 | 
 16 | The decision we made was thus to make the API first operate on scalars (i.e. numbers) so that the simple logic and intent of the API can become clear, and to allow the substitution of a pd.Series (a vector) for each scalar in almost all places in order to gain performance improvements over large datasets.
 17 | 
 18 | This has two additional benefits.  First, sometimes a value is not known for *every* parcel, in this case a scalar best guess can be substituted for having a specific value associated with every parcel.  Second, we can take advantage of the readability of the standard Pandas operation that `pd.Series([1, 2, 3]) * 2 == pd.Series([2, 4, 6])` so that the code is the same in almost every place whether a parameter is scalar or vector.
 19 | 
 20 | ## Spot Pro Forma
 21 | 
 22 | The simplest pro forma we call a `spot` pro forma because it does not consider cash flows over time.  In this case most inflows and outflows are in costs per square foot and sales prices per sqft.  This pro forma is mainly an accounting of policies like parking ratios, ground floor retail, unit mixes and the like.  Even though it's so simple it's extrodinarily powerful as the level of detail in data necessary to run most parcel-scale pro formas is simply not available across the scope of a city, and the effect of such specific analysis on a city-wide scale would be modest.
 23 | 
 24 | The steps for computing a spot pro formas are roughly:
 25 | 
 26 | * Take a `built_dua` (the dwelling units per acre that is the density at which a building will be constructed) and multiply times a unit mix to get the number of units of each type (1BR, 2BR and so forth - the unit types are specified by the user as well).
 27 | 
 28 | * Use price per sqft, average unit size, and parking ratios per unit to compute the total total revenue, built space, and parking spaces for the building.
 29 | 
 30 | * If ground floor uses are specfied (e.g. retail), add revenue, built area, and parking spaces for the non-residential portion of the building.
 31 | 
 32 | * Take the number of parking spaces and the parking type specified of the user to compute the total built area and cost of parking.
 33 | 
 34 | * Apply a net to gross factor for common spaces.
 35 | 
 36 | * Based on the parking type (surface, deck, or underground), configure the building on the parcel to compute the area of the building footprint and number of stories.
 37 | 
 38 | * Compute profit as revenues minus costs.
 39 | 
 40 | * Check for constraint failures if the user passes a building configuration that conflicts with the building that gets contructed (e.g. garden apartments can't be more than 3 stories tall).  Also check for zoning violations such as maximum FAR and maximum height limits.
 41 | 
 42 | ### The API
 43 | 
 44 | The spot pro forma API looks like this (a good place to start to learn how to use the API is to explore the thorough unit tests in the `tests` directory - this example comes directly from the tests).
 45 | 
 46 | ```json
 47 | {
 48 |     "use_types": {
 49 |         "0br": {
 50 |             "price_per_sqft": 600,
 51 |             "size": 600,
 52 |             "parking_ratio": 0.3
 53 |         },
 54 |         "1br": {
 55 |             "price_per_sqft": 650,
 56 |             "size": 750,
 57 |             "parking_ratio": 1.0
 58 |         },
 59 |         "2br": {
 60 |             "price_per_sqft": 700,
 61 |             "size": 850,
 62 |             "parking_ratio": 1.5
 63 |         },
 64 |         "3br+": {
 65 |             "price_per_sqft": 750,
 66 |             "size": 1000,
 67 |             "parking_ratio": 2
 68 |         },
 69 |         "retail": {
 70 |             "rent_per_sqft": 30,
 71 |             "parking_ratio": 2
 72 |         }
 73 |     },
 74 |     "parking_types": {
 75 |         "surface": {
 76 |             "space_size": 300,
 77 |             "space_cost_sqft": 30
 78 |         },
 79 |         "deck": {
 80 |             "space_size": 250,
 81 |             "space_cost_sqft": 90
 82 |         },
 83 |         "underground": {
 84 |             "space_size": 250,
 85 |             "space_cost_sqft": 110
 86 |         }
 87 |     },
 88 |     "building_types": {
 89 |         "garden_apartments": {
 90 |             "cost_per_sqft": 400,
 91 |             "allowable_densities": [5, 15]
 92 |         }, "fancy_condos": {
 93 |             "cost_per_sqft": 800,
 94 |             "allowable_densities": [10, 20]
 95 |         }, "ground_floor_retail": {
 96 |             "cost_per_sqft": 600
 97 |         }
 98 |     },
 99 |     "parcel_size": 10000,
100 |     "cap_rate": 0.06,
101 |     "max_far": 1.2,
102 |     "max_height": 20,
103 |     "height_per_story": 12,
104 |     "parcel_efficiency": 0.8,
105 |     "building_efficiency": 0.8,
106 |     "cost_shifter": 1.2,
107 |     "parcel_acquisition_cost": 1000000,
108 |     "non_res_parking_denom": 1000,
109 |     "use_mix": {
110 |         "use_types": ["0br", "1br", "2br"],
111 |         "mix": [0.3, 0.3, 0.4],
112 |         "ground_floor": {
113 |             "use": "retail",
114 |             "size": 3000
115 |         }
116 |     },
117 |     "absorption_in_months": 20,
118 |     "parking_type": "deck",
119 |     "building_type": "garden_apartments",
120 |     "built_dua": 10
121 | }
122 | ```
123 | 
124 | Hopefully if you've followed most of the discussion so far, this API will be fairly easy to pick up on.  We'll parallel the logic described above with a discussion of the parameters in the API.
125 | 
126 | For starters there is a `unit_types` object which has parameters for each of the unit types.  Each unit type has a price per sqft, size, and parking ratio as described in the previous section.  
127 | 
128 | Non-residential uses, which are the ground floor uses (e.g. retail), have rent per sqft as this is standard and gets converted to a price per sqft using the cap rate also specfied in the object, as well as a parking ratio which uses square feet rather than number of untis and the `non_res_parking_denom` which gives the deominator for non-residential parking ratios.
129 | 
130 | Next comes a `parking_types` object which contains keys of surface, deck, and underground and have parking space sizes and costs per sqft.
131 | 
132 | Next there is a `building_types` object which contains all *possible* building types even though only one building type will actually be used for each pro forma (this will come in handy when vectorizing the operation).  Think of this as the data that comes out of the RSMeans handbook.  Right now, a building type gets a description name, and values of cost per sqft and reasonable limits on the number of stories.
133 | 
134 | Finally comes a `use_mix` object which has two lists of `use_types` and their `mix` which should be of the same length and the floats in the mix list should add up to 1.0.  This is the ratio of different unit types in the building (e.g. 30% 1BR and 70% 2BR).  There can also be a `ground_floor` object which gives the type and size of any non-residential space in the building.
135 | 
136 | Various scalar parameters are as follows:
137 | 
138 | * parcel_size is the size of the parcel in square feet
139 | * cap_rate converts yearly rent to price, so a cap_rate of .05 means a rent of $30/sqft/year is equivalent to a sales price of $600/sqft
140 | * max_height and max_far give density limits that will be tested after the building is configured
141 | * height_per_story converts number of stories to building height
142 | * the parcel_efficiency gives the maximum building footprint size based on the size of the parcel, and building_efficiency gives the ratio of sellable area to total area (accounts for common space)
143 | * cost_shifter is optional and can be used to specify the RSMeans area cost shifter
144 | * parcel_acquistion_cost is the cost of buying the parcel and building - this number typically comes out of some sort of statistical model
145 | * finally, parking_type, building_type, and built_dua are three of the most important parameters as they specify exactly what form the current computations will take.  Although there are many building types, a few parking types, and many different densities at which a building can be built, each pro forma only uses one.
146 | 
147 | ## Settings for affordable / inclusionary housing
148 | 
149 | **pyforma** has support to calculate the impact of affordable / inclusionary housing.  To enable affordable housing, include a sub-dictionary with the key affordable_housing and keys like the following (include as part of the larger config object described above).  Keys include
150 | 
151 | * AMI - the area median income, which is usually specified by HUD for current affordable housing policy, but which could be forecast median incomes for future years.  As before, this can be a scalar value or a Series of values per parcel.
152 | 
153 | * depth_of_affordability is the percent of AMI at which the housing should be affordable.  A value of 1.0 would be equivalent to AMI, and values are usually less than 1.0 in current housing policy.
154 | 
155 | * pct_affordable_units is the percentage of affordable units which are required for a development to be build.  A value of .2 would mean 20% of the units built would be affordable at this percentage of AMI.  This value can, and probably should, be varied by jurisdiction and in fact can be varied by parcel for complete flexibility.
156 | 
157 | * price_multiplier_by_type is a dictionary where keys are unit types as are specified elsewhere in the config object.  These are also multipliers which are usually set by policy such that different size units should be affordable at different levels of AMI - obviously smaller units are usually set to be affordable at smaller multiples of AMI, while larger units should be set higher.  Note that setting 
158 | 
159 | ```
160 | ...
161 | "affordable_housing": {
162 |     "AMI": 80000,
163 |     "depth_of_affordability": .8,
164 |     "pct_affordable_units": .2,
165 |     "price_multiplier_by_type": {
166 |         "0br": .7,
167 |         "1br": .75,
168 |         "2br": .9,
169 |         "3br+": 1.04
170 |     }
171 | }
172 | ...
173 | ```
174 | 
175 | If "affordable_housing" is set as an input, "affordable_units" will be set as a key in the output, which will be a Series providing the number of affordable units per development.
176 | 
177 | Note that the purpose of these parameters is to adjust the profitability of developments, which necessarily reduces the probability of a development being built relative to developments which have no inclusionary housing.  Thus an increase in affordable housing in an urban county and strong market like San Francisco, will probably work, and create potentially large numbers of affordability, while at the same time providing a suburbanizing force to development region-wide.  This is in fact the whole purpose of running analyses like these.  Also note that at some level of inclusionary housing, depending on market conditions, a development can go from profitable to unprofitable, which is why inclusionary rates are often linked to market cycles.
178 | 
179 | 
180 | ## Running pyforma far and wide
181 | 
182 | The real power of this API is not to call the API once with scalar values, but to pass in a Pandas Series of values (a vector of values) and perform the computation more efficiently.  Python is notoriously slow at performing "for loop" operations, and in fact in this case **using a Pandas Series and letting pyforma do the computation for you is *900* times faster than calling this API with scalars in a for loop**.  The use of scalars in the API is not for large numbers of operations, say 100k calls or more.
183 | 
184 | It's also clear that there are two main use cases for using pyforma:
185 | 
186 | * The "far" in the heading, which would be to explore many (potentially millions) of pro formas run on a single parcel to optimize the return on that parcel
187 | 
188 | * The "wide" in the heading, which would be to explore a pro forma on a large number of parcels (potentially millions) at the max zoning allowed or similar
189 | 
190 | In fact, the API is general simple enough that you can make any calls you want and aggregate them however you want.  For instance, the user of the API could run 20 pro formas per parcel for 2 million parcels, or about 40 million parcels in only a few seconds.  The 20 pro formas per parcel could test various inflection points, or parking types, etc, and then maximize the return per parcel before doing an aggregation across all parcels like summing feasible units in an area.
191 | 
192 | ## A vectorized example, incluing use of the cartesian_product helper
193 | 
194 | Here is an example of using pyforma in a vectorized manner (again drawn from the unit tests).  First imagine you have an object called `cfg` which is set to the configuration object from the previous example.  In this example we want to test a series of DUA values, a series of FAR values, a series of parcel sizes, and a series of price per square foot numbers, and we want to test *all* combinations of those Series.
195 | 
196 | pyforma has a helper method to assist with this use case, called `cartesian_product`, which will perform the cross product for you - just pass the Series as arguments to the method like shown below.  The method will create a DataFrame which has columns that are named the same as each series, and will create a row in the DataFrame with every combination of values of the passed Series (and do so efficiently).  So if you pass four Series, with lengths 2, 3, 4, and 3 respectively, the length of the output DataFrame will be 2 * 3 * 4 * 3 = 72.  This is obviously polynomial expansion so use judiciously.
197 | 
198 | Once you have the set of values you want to test, simply substitute the Pandas Series for the previous scalar values (in this case the output of `cartesian_product` but could also be the actual values taken from parcels throughout a city), and finally call the appropriate method to run the pro formas.
199 | 
200 | ```python 
201 | df = pyforma.cartesian_product([
202 |     pd.Series(np.arange(1, 300, 5), name="dua"),
203 |     pd.Series(np.arange(.25, 8, .5), name="far"),
204 |     pd.Series(np.arange(1000, 100000, 50000), name="parcel_size"),
205 |     pd.Series(np.arange(500, 2000, 500), name="price_per_sqft")
206 | ])
207 | 
208 | # cfg is initially set to the object from the previous example
209 | cfg["parcel_size"] = df.parcel_size
210 | cfg["max_dua"] = df.dua
211 | cfg["max_far"] = df.far
212 | cfg["use_types"]["2br"]["price_per_sqft"] = df.price_per_sqft
213 | 
214 | ret = pyforma.spot_residential_sales_proforma(cfg)
215 | ```
216 | 
217 | ## A note on parking types and vectorization
218 | 
219 | At this point there are three parking types, and thus the scalar passed takes one of the values "surface", "deck", and "underground".  For now, these can't be vectorized in a Series which mixes these values.  This keeps the code simple internally, but will probably change at a future date.  For now, simply call the method 3 times if you want to test multiple parking types.
220 | 
221 | ## Benchmarks
222 | 
223 | Current benchmarks for the style of pro forma that pyforma current supports will run 18 million pro formas per second.
224 | 
225 | ## Outputs (what the API returns)
226 | 
227 | Similar to the object passed as input, the `spot_residential_sales_proforma` returns a Python dictionary with key-value pairs.  If the values passed are scalars, the values returned will be scalars.  If any of the values passed are Series, most of the values returned will be Series as well.  Below is a list and description of the keys returned and a sample return object.
228 | 
229 | * built_far - the actual floor are ratio for the building
230 | * height - the height for this building
231 | * num_units_by_type - a list of the number of each type of unit in the mix array passed in (in units rather than in proportions).  For now these values can be partial units (floats).
232 | * usable_floor_area - The amount of floor area (can be spread among floors) that is inside a unit or non-residential area
233 | * floor_area_including_commin_space - The usable floor area plus the shared space
234 | * ground_floor_type - this is passed in by the user and returned to the user for convenience
235 | * ground_floor_size - this is passed in by the user and returned to the user for convenience
236 | * footprint_size - the area of the building footprint
237 | * revenue_from_the_ground_floor - if there is ground floor non-residential space, this is the revenue that space generates (a full price, not a yearly rent)
238 | * parking_type - this is passed in by the user and returned to the user for convenience
239 | * parking_spaces - the total number of parking spaces this building will require
240 | * parking_area - the area of said parking spaces
241 | * parking_cost - the cost of said parking spaces
242 | * total_floor_area - the floor area plus common spaces plus parking (if it's not surface parking)
243 | * revenue - the total revenue the building generates - i.e. an estimate of the NPV
244 | * cost - the total cost to construct the building, which includes usable space, common space, and parking
245 | * profit - revenue minus cost, duh (includes acquistion cost for the parcel in addition to construction cost)
246 | * stories - the number of stories of the building
247 | * failure_dua - a True/False value as to whether the building has a zoning failure where is exceeds the max DUA value
248 | * failure_far - a True/False value as to whether the building has a zoning failure where is exceeds the max FAR value
249 | * failure_height - a True/False value as to whether the building has a zoning failure where is exceeds the max height value
250 | * failure_btype - a True/False value as to whether the density of this building exceeds the range specified as allowable for a given building type - e.g. no townhome is 5 stories; the building will be analyzed as requested but this is considered a "building type failure"
251 | * building_type - this is passed in by the user and returned to the user for convenience
252 | 
253 | ```
254 | {
255 |     "built_far": built_far,
256 |     "height": height,
257 |     "num_units_by_type": num_units_by_type,
258 |     "usable_floor_area": usable_floor_area,
259 |     "floor_area_including_common_space": floor_area_including_common_space,
260 |     "ground_floor_type": ground_floor_type,
261 |     "ground_floor_size": ground_floor_size,
262 |     "footprint_size": footprint_size,
263 |     "revenue_from_ground_floor": revenue_from_ground_floor,
264 |     "parking_type": parking_type,
265 |     "parking_spaces": parking_spaces,
266 |     "parking_area": parking_area,
267 |     "parking_cost": parking_cost,
268 |     "total_floor_area": total_floor_area,
269 |     "revenue": revenue,
270 |     "cost": cost,
271 |     "profit": profit,
272 |     "stories": stories,
273 |     "failure_dua": failure_dua,
274 |     "failure_far": failure_far,
275 |     "failure_height": failure_height,
276 |     "failure_btype": failure_btype,
277 |     "building_type": building_type
278 | }
279 | ```
280 | 
281 | ## Zoning failures
282 | 
283 | There are four zoning failures that are described in detail above - they are DUA, FAR, height, and building type.  At first it is not obvious why the API should even allow zoning to be violated, but this API is written to be as flexible as possible, and the user is free to pass in many different kinds of buildings.  They might be taller than the max height, they might be 10 story townhomes or 1 story condos, but when the results don't make sense this will be flagged as a constraint failure.  Make sure to check the constraint failures if your use case requires it.
284 | 


--------------------------------------------------------------------------------