├── pyforma ├── __init__.py ├── tests │ ├── __init__.py │ └── test_pyforma.py └── pyforma.py ├── .gitignore ├── setup.py ├── .travis.yml ├── LICENSE └── README.md /pyforma/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyforma/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | __pycache__ 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='pyforma', 5 | version='0.1dev', 6 | description='Installs and runs pyforma.', 7 | author='Oakland Analytics', 8 | author_email='oaklandanalytics@gmail.com', 9 | license='BSD', 10 | url='https://github.com/fscottfoti/pyforma', 11 | classifiers=[ 12 | 'Development Status :: 4 - Beta', 13 | 'Programming Language :: Python :: 2.7', 14 | 'License :: OSI Approved :: BSD License' 15 | ], 16 | packages=find_packages(exclude=['*.tests']) 17 | ) 18 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | install: 5 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh 6 | -O miniconda.sh; else wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh 7 | -O miniconda.sh; fi 8 | - bash miniconda.sh -b -p $HOME/miniconda 9 | - export PATH="$HOME/miniconda/bin:$PATH" 10 | - hash -r 11 | - conda config --set always_yes yes --set changeps1 no 12 | - conda update -q conda 13 | - conda info -a 14 | - | 15 | conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION pandas pip pytables pytest 16 | - source activate test-environment 17 | - pip install pytest-cov coveralls pep8 18 | script: 19 | - pep8 . 20 | - py.test --cov pyforma --cov-report term-missing 21 | after_success: 22 | - coveralls 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Fletcher Foti 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of pyforma nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /pyforma/pyforma.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | 5 | def describe_cartesian_product(*args): 6 | """ 7 | Returns a string to describe 8 | """ 9 | n = reduce(lambda x, y: x * y, [len(s) for s in args]) 10 | s = reduce(lambda x, y: str(x) + " x " + str(y), 11 | [len(s) for s in args]) + " = " + str(n) 12 | 13 | return s 14 | 15 | 16 | def cartesian_product(*args): 17 | """ 18 | Return the cartesion product of multiple series as a dataframe - 19 | just pass in the series as arguments (see the test) 20 | """ 21 | 22 | dfs = [pd.DataFrame({s.name: s, "key": 0}) for s in args] 23 | df = reduce( 24 | lambda df1, df2: df1.merge(df2, how='left', on='key'), 25 | dfs) 26 | df.drop('key', 1, inplace=True) # drop temp key 27 | return df 28 | 29 | 30 | def price_per_sqft_with_affordable_housing( 31 | price_per_sqft, 32 | sqft_per_unit, 33 | AMI, 34 | depth_of_affordability, 35 | price_multiplier, 36 | cap_rate, 37 | pct_affordable_units 38 | ): 39 | 40 | AMI *= depth_of_affordability 41 | 42 | monthly_payment = AMI * .33 / 12 * price_multiplier 43 | 44 | value_of_payment = monthly_payment * 12 / cap_rate 45 | 46 | affordable_price_per_sqft = value_of_payment / sqft_per_unit 47 | 48 | blended_price_per_sqft = \ 49 | pct_affordable_units * affordable_price_per_sqft + \ 50 | (1-pct_affordable_units) * price_per_sqft 51 | 52 | return blended_price_per_sqft 53 | 54 | 55 | def average_unit_size(cfg): 56 | """ 57 | Compute the overall average unit size, combining the unit mix 58 | and sizes per unit 59 | """ 60 | 61 | sizes = 0 62 | for use_type, mix in \ 63 | zip(cfg["use_mix"]["use_types"], cfg["use_mix"]["mix"]): 64 | 65 | sizes += cfg["use_types"][use_type]["size"] * mix 66 | 67 | return sizes 68 | 69 | 70 | def spot_residential_sales_proforma(cfg): 71 | """ 72 | This takes a hierarchical Python object of a certain form and 73 | passes back another Python object. Documenting the structure 74 | is not well suited to pydocs - see the Readme instead. 75 | """ 76 | 77 | parcel_acres = cfg["parcel_size"] / 43560.0 78 | 79 | num_units_by_type = {"residential_units": 0} 80 | 81 | # compute basic measures for floor area in units 82 | usable_floor_area = 0 83 | revenue = 0 84 | parking_spaces = 0 85 | for use_type, mix in \ 86 | zip(cfg["use_mix"]["use_types"], cfg["use_mix"]["mix"]): 87 | 88 | # this allow non-int numbers of units 89 | num_units = mix * cfg["built_dua"] * parcel_acres 90 | 91 | num_units_by_type["residential_units"] += num_units 92 | 93 | num_units_by_type[use_type + "_num_units"] = num_units 94 | 95 | use_cfg = cfg["use_types"][use_type] 96 | 97 | usable_floor_area += use_cfg["size"] * num_units 98 | 99 | if "affordable_housing" in cfg: 100 | 101 | aff_cfg = cfg["affordable_housing"] 102 | price_per_sqft = price_per_sqft_with_affordable_housing( 103 | use_cfg["price_per_sqft"], 104 | use_cfg["size"], 105 | aff_cfg["AMI"], 106 | aff_cfg.get("depth_of_affordability", 1.0), 107 | aff_cfg["price_multiplier_by_type"][use_type], 108 | cfg["cap_rate"], 109 | aff_cfg["pct_affordable_units"] 110 | ) 111 | 112 | else: 113 | 114 | price_per_sqft = use_cfg["price_per_sqft"] 115 | 116 | revenue += use_cfg["size"] * price_per_sqft * num_units 117 | parking_spaces += use_cfg["parking_ratio"] * num_units 118 | 119 | # add in ground floor measures 120 | if "ground_floor" in cfg["use_mix"]: 121 | 122 | # this assumes ground floor is non-res - is that ok? 123 | ground_floor = cfg["use_mix"]["ground_floor"] 124 | ground_floor_type = ground_floor["use"] 125 | ground_floor_size = ground_floor["size"] 126 | 127 | usable_floor_area += ground_floor_size 128 | use_cfg = cfg["use_types"][ground_floor_type] 129 | revenue_from_ground_floor = \ 130 | use_cfg["rent_per_sqft"] / cfg["cap_rate"] * ground_floor_size 131 | revenue += revenue_from_ground_floor 132 | parking_spaces += ground_floor_size / cfg["non_res_parking_denom"] * \ 133 | use_cfg["parking_ratio"] 134 | 135 | # now compute parking attributes for the building so far 136 | parking_type = cfg["parking_type"] 137 | parking_cfg = cfg["parking_types"][parking_type] 138 | parking_area = parking_spaces * parking_cfg["space_size"] 139 | parking_cost = parking_area * parking_cfg["space_cost_sqft"] 140 | 141 | floor_area_including_common_space = \ 142 | usable_floor_area / cfg["building_efficiency"] 143 | 144 | # compute the building footprint 145 | 146 | max_footprint = cfg["parcel_size"] * cfg["parcel_efficiency"] 147 | 148 | if parking_type == "surface": 149 | if max_footprint - parking_area < .1 * cfg["parcel_size"]: 150 | # building has to be 10% of the parcel 151 | raise Error("Parking covers >90%% of the parcel") 152 | max_footprint -= parking_area 153 | total_floor_area = floor_area_including_common_space 154 | 155 | elif parking_type == "deck": 156 | total_floor_area = floor_area_including_common_space + parking_area 157 | 158 | elif parking_type == "underground": 159 | total_floor_area = floor_area_including_common_space 160 | 161 | stories = np.ceil(total_floor_area / max_footprint) 162 | footprint_size = total_floor_area / stories 163 | 164 | # now compute costs 165 | building_type = cfg["building_types"][cfg["building_type"]] 166 | cost = floor_area_including_common_space * \ 167 | building_type["cost_per_sqft"] * cfg["cost_shifter"] + \ 168 | parking_cost + cfg["parcel_acquisition_cost"] 169 | 170 | profit = revenue - cost 171 | 172 | # check against max_dua 173 | failure_dua = cfg["built_dua"] > cfg["max_dua"] \ 174 | if "max_dua" in cfg else False 175 | 176 | # check against max_far 177 | built_far = total_floor_area / cfg["parcel_size"] 178 | if "max_far" in cfg: 179 | failure_far = built_far > cfg["max_far"] 180 | 181 | # check against max_height 182 | height = stories * cfg["height_per_story"] 183 | if "max_height" in cfg: 184 | failure_height = height > cfg["max_height"] 185 | 186 | # check against buiding type densities 187 | failure_btype = \ 188 | (cfg["built_dua"] < building_type["allowable_densities"][0]) | \ 189 | (cfg["built_dua"] > building_type["allowable_densities"][1]) 190 | 191 | out = { 192 | "built_far": built_far, 193 | "height": height, 194 | "usable_floor_area": usable_floor_area, 195 | "floor_area_including_common_space": floor_area_including_common_space, 196 | "ground_floor_type": ground_floor_type, 197 | "ground_floor_size": ground_floor_size, 198 | "footprint_size": footprint_size, 199 | "revenue_from_ground_floor": revenue_from_ground_floor, 200 | "parking_type": parking_type, 201 | "parking_spaces": parking_spaces, 202 | "parking_area": parking_area, 203 | "parking_cost": parking_cost, 204 | "total_floor_area": total_floor_area, 205 | "revenue": revenue, 206 | "cost": cost, 207 | "profit": profit, 208 | "stories": stories, 209 | "failure_dua": failure_dua, 210 | "failure_far": failure_far, 211 | "failure_height": failure_height, 212 | "failure_btype": failure_btype, 213 | "building_type": cfg["building_type"] 214 | } 215 | 216 | for k, v in num_units_by_type.iteritems(): 217 | out[k] = v 218 | 219 | if "affordable_housing" in cfg: 220 | out["affordable_units"] = out["residential_units"] * \ 221 | cfg["affordable_housing"]["pct_affordable_units"] 222 | 223 | return out 224 | -------------------------------------------------------------------------------- /pyforma/tests/test_pyforma.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import pandas as pd 5 | import numpy as np 6 | import pytest 7 | import pprint 8 | 9 | from .. import pyforma 10 | 11 | pp = pprint.PrettyPrinter(indent=4) 12 | 13 | 14 | def assert_almost_equal(*args): 15 | for i in range(1, len(args)): 16 | assert abs(args[0] - args[i]) < .001 17 | 18 | 19 | @pytest.fixture 20 | def pro_forma_config_basic(): 21 | return { 22 | "use_types": { 23 | "0br": { 24 | "price_per_sqft": 600, 25 | "size": 600, 26 | "parking_ratio": .3 27 | }, 28 | "1br": { 29 | "price_per_sqft": 650, 30 | "size": 750, 31 | "parking_ratio": 1.0 32 | }, 33 | "2br": { 34 | "price_per_sqft": 700, 35 | "size": 850, 36 | "parking_ratio": 1.5 37 | }, 38 | "3br+": { 39 | "price_per_sqft": 750, 40 | "size": 1000, 41 | "parking_ratio": 2 42 | }, 43 | "retail": { 44 | "rent_per_sqft": 3, 45 | "parking_ratio": 2 46 | } 47 | }, 48 | "parking_types": { 49 | "surface": { 50 | "space_size": 300, 51 | "space_cost_sqft": 30 52 | }, 53 | "deck": { 54 | "space_size": 250, 55 | "space_cost_sqft": 90 56 | }, 57 | "underground": { 58 | "space_size": 250, 59 | "space_cost_sqft": 110 60 | } 61 | }, 62 | "building_types": { 63 | "garden_apartments": { 64 | "cost_per_sqft": 400, 65 | "allowable_densities": [5, 15] 66 | }, "fancy_condos": { 67 | "cost_per_sqft": 800, 68 | "allowable_densities": [10, 20] 69 | }, "ground_floor_retail": { 70 | "cost_per_sqft": 600 71 | } 72 | }, 73 | "parcel_size": 43560, 74 | "cap_rate": .06, 75 | "max_far": 1.2, 76 | "max_height": 20, 77 | "height_per_story": 12, 78 | "parcel_efficiency": .8, 79 | "building_efficiency": .8, 80 | "cost_shifter": 1.2, 81 | "parcel_acquisition_cost": 1000000, 82 | "non_res_parking_denom": 1000, 83 | "use_mix": { 84 | "use_types": ["0br", "1br", "2br"], 85 | "mix": [.3, .3, .4], 86 | "ground_floor": { 87 | "use": "retail", 88 | "size": 3000 89 | } 90 | }, 91 | "absorption_in_months": 20, # XXX not used yet 92 | "parking_type": "deck", 93 | "building_type": "garden_apartments", 94 | "built_dua": 10 95 | } 96 | 97 | 98 | def test_cartesian_product(): 99 | 100 | df = pyforma.cartesian_product( 101 | pd.Series([5, 10, 30], name="dua"), 102 | pd.Series([1, 1.5, 2], name="far"), 103 | pd.Series([1000, 2000, 3000], name="parcel_sizes"), 104 | pd.Series([500, 600], name="price_per_sqft") 105 | ) 106 | 107 | assert len(df) == 3 * 3 * 3 * 2 108 | 109 | assert df.price_per_sqft.value_counts().loc[500] == 3 * 3 * 3 110 | 111 | assert df.dua.value_counts().loc[5] == 3 * 3 * 2 112 | 113 | assert len(df.query("dua == 5 and far == 1.5 and parcel_sizes == 1000" + 114 | " and price_per_sqft == 600")) == 1 115 | 116 | 117 | def test_performance_of_vectorized(pro_forma_config_basic): 118 | 119 | cfg = pro_forma_config_basic 120 | 121 | series = [ 122 | pd.Series(np.arange(1, 300, 5), name="dua"), 123 | pd.Series(np.arange(.25, 8, .5), name="far"), 124 | pd.Series(np.arange(1000, 100000, 50000), name="parcel_size"), 125 | pd.Series(np.arange(500, 2000, 500), name="price_per_sqft") 126 | ] 127 | df = pyforma.cartesian_product(*series) 128 | 129 | cfg["parcel_size"] = df.parcel_size 130 | cfg["max_dua"] = df.dua 131 | cfg["max_far"] = df.far 132 | cfg["use_types"]["2br"]["price_per_sqft"] = df.price_per_sqft 133 | 134 | t1 = time.time() 135 | ret = pyforma.spot_residential_sales_proforma(cfg) 136 | elapsed1 = time.time() - t1 137 | 138 | t1 = time.time() 139 | for index, row in df.iterrows(): 140 | cfg["parcel_size"] = row.parcel_size 141 | cfg["max_dua"] = row.dua 142 | cfg["max_far"] = row.far 143 | cfg["use_types"]["2br"]["price_per_sqft"] = row.price_per_sqft 144 | ret = pyforma.spot_residential_sales_proforma(pro_forma_config_basic) 145 | elapsed2 = time.time() - t1 146 | 147 | factor = elapsed2 / elapsed1 148 | 149 | # if you run enough pro formas in a batch, it's 900x faster to run 150 | # the pandas version than to run them one by one - when you run 151 | # fewer pro formas, like you kind of have to do in a unit test, it 152 | # will only be 300x faster as is asserted here 153 | assert factor > 100 154 | 155 | 156 | def test_different_parking_types(pro_forma_config_basic): 157 | 158 | cfg = pro_forma_config_basic 159 | 160 | d = {} 161 | for parking in ["surface", "deck", "underground"]: 162 | 163 | cfg["parking_type"] = parking 164 | d[parking] = \ 165 | pyforma.spot_residential_sales_proforma(pro_forma_config_basic) 166 | 167 | assert d["surface"]["parking_spaces"] == d["deck"]["parking_spaces"] == \ 168 | d["underground"]["parking_spaces"] 169 | 170 | spaces = d["surface"]["parking_spaces"] 171 | assert d["surface"]["parking_area"] == \ 172 | spaces * cfg["parking_types"]["surface"]["space_size"] 173 | 174 | # surface pushes building up, underground keeps is low 175 | assert d["surface"]["stories"] >= d["deck"]["stories"] >= \ 176 | d["underground"]["stories"] 177 | 178 | parking_far = d["deck"]["parking_area"] / cfg["parcel_size"] 179 | # these don't perfectly equal so do this weird subtraction 180 | assert d["deck"]["built_far"] - \ 181 | parking_far - d["underground"]["built_far"] < .01 182 | assert d["deck"]["built_far"] - \ 183 | parking_far - d["surface"]["built_far"] < .01 184 | 185 | assert -1 * (d["deck"]["profit"] - d["surface"]["profit"]) == \ 186 | d["deck"]["parking_area"] * \ 187 | cfg["parking_types"]["deck"]["space_cost_sqft"] - \ 188 | d["surface"]["parking_area"] * \ 189 | cfg["parking_types"]["surface"]["space_cost_sqft"] 190 | 191 | 192 | def test_pyforma_basic_vectorized(pro_forma_config_basic): 193 | 194 | cfg = pro_forma_config_basic 195 | 196 | series = [ 197 | pd.Series(np.arange(1, 300, 5), name="dua"), 198 | pd.Series(np.arange(.25, 8, .5), name="far"), 199 | pd.Series(np.arange(1000, 100000, 10000), name="parcel_size"), 200 | pd.Series(np.arange(500, 2000, 250), name="price_per_sqft") 201 | ] 202 | df = pyforma.cartesian_product(*series) 203 | print pyforma.describe_cartesian_product(*series) 204 | 205 | pro_forma_config_basic["parcel_size"] = df.parcel_size 206 | pro_forma_config_basic["use_types"]["2br"]["price_per_sqft"] = \ 207 | df.price_per_sqft 208 | 209 | t1 = time.time() 210 | ret = pyforma.spot_residential_sales_proforma(pro_forma_config_basic) 211 | t2 = time.time() 212 | assert t2 - t1 < 1.0 213 | 214 | print "Ran {} pro forma in {:.2f}s".format(len(df), t2-t1) 215 | 216 | ret = pd.DataFrame(ret) 217 | 218 | one, two, three = df.loc[0], df.loc[1], df.loc[2] 219 | # only thing in the assumptions that's different is the price 220 | assert one.dua == two.dua == three.dua 221 | assert one.far == two.far == three.far 222 | assert one.parcel_size == two.parcel_size == three.parcel_size 223 | assert one.price_per_sqft + 500 == two.price_per_sqft + 250 == \ 224 | three.price_per_sqft 225 | 226 | # since the only thing that's different is the price, the revenue 227 | # and profit should be different by the number of 2brs, the size of 228 | # the 2 brs, and the difference in the price per size 229 | one, two, three = ret.iloc[0], ret.iloc[1], ret.iloc[2] 230 | assert_almost_equal(5.52, one.built_far, two.built_far, three.built_far) 231 | assert_almost_equal( 232 | three.revenue - two.revenue, 233 | one["2br_num_units"] * cfg["use_types"]["2br"]["size"] * 250) 234 | assert_almost_equal( 235 | two.profit - one.profit, 236 | one["2br_num_units"] * cfg["use_types"]["2br"]["size"] * 250) 237 | 238 | 239 | def test_pyforma_basic(pro_forma_config_basic): 240 | 241 | ret = pyforma.spot_residential_sales_proforma(pro_forma_config_basic) 242 | 243 | assert ret["usable_floor_area"] == 3 * 600 + 3 * 750 + 4 * 850 + 3000 244 | 245 | assert ret["stories"] == 1 246 | 247 | assert ret["floor_area_including_common_space"] == \ 248 | ret["usable_floor_area"] / .8 249 | 250 | assert ret["parking_spaces"] == \ 251 | 3 * .3 + 3 * 1 + 4 * 1.5 + 3000 / 1000.0 * 2 252 | 253 | assert ret["revenue_from_ground_floor"] == 3000 * 3 / .06 254 | 255 | assert ret["revenue"] == 3 * 600 * 600 + 3 * 750 * 650 + 4 * 850 * 700 + \ 256 | ret["revenue_from_ground_floor"] 257 | 258 | assert ret["ground_floor_type"] == "retail" 259 | 260 | assert ret["profit"] == ret["revenue"] - ret["cost"] 261 | 262 | assert ret["parking_type"] == "deck" 263 | 264 | assert ret["parking_area"] == ret["parking_spaces"] * 250 265 | 266 | assert ret["total_floor_area"] == \ 267 | ret["floor_area_including_common_space"] + ret["parking_area"] 268 | 269 | assert ret["footprint_size"] == ret["total_floor_area"] / ret["stories"] 270 | 271 | assert ret["parking_cost"] == ret["parking_area"] * 90 272 | 273 | assert ret["cost"] == ret["parking_cost"] + \ 274 | ret["floor_area_including_common_space"] * 400 * 1.2 + 1000000 275 | 276 | assert ret["building_type"] == "garden_apartments" 277 | 278 | assert round(ret["built_far"], 2) == 0.39 279 | 280 | assert ret["height"] == 12 281 | 282 | assert "failure_height" in ret 283 | 284 | assert "failure_far" in ret 285 | 286 | 287 | def test_average_unit_size(pro_forma_config_basic): 288 | 289 | ret = pyforma.average_unit_size(pro_forma_config_basic) 290 | 291 | assert ret == 600 * .3 + 750 * .3 + 850 * .4 292 | 293 | 294 | def test_affordable_housing(pro_forma_config_basic): 295 | 296 | price_per_sqft_in = pd.Series([400, 500, 600]) 297 | 298 | price_per_sqft = pyforma.price_per_sqft_with_affordable_housing( 299 | price_per_sqft_in, # price per sqft 300 | 1000, # sqft per unit 301 | 125000, # AMI 302 | .8, # pct of AMI 303 | .75, # price multiplier for 1BR 304 | .05, # interest rate 305 | 0 # pct of affordable units 306 | ) 307 | 308 | # should be equal if it's 100% not affordable passed in 309 | np.testing.assert_array_equal(price_per_sqft_in, price_per_sqft) 310 | 311 | price_per_sqft = pyforma.price_per_sqft_with_affordable_housing( 312 | price_per_sqft_in, # price per sqft 313 | 1000, # sqft per unit 314 | 125000, # AMI 315 | .8, # pct of AMI 316 | .75, # price multiplier for 1BR 317 | .05, # interest rate 318 | 1.0 # pct of affordable units 319 | ) 320 | 321 | # for these numbers, this is the price per sqft affordable 322 | np.testing.assert_allclose(price_per_sqft, [495]*3) 323 | 324 | # now test a blended version 325 | price_per_sqft = pyforma.price_per_sqft_with_affordable_housing( 326 | price_per_sqft_in, # price per sqft 327 | 1000, # sqft per unit 328 | 125000, # AMI 329 | .8, # pct of AMI 330 | .75, # price multiplier for 1BR 331 | .05, # interest rate 332 | .25 # pct of affordable units 333 | ) 334 | 335 | # for these numbers, this is the price per sqft affordable 336 | np.testing.assert_allclose(price_per_sqft, [423.75, 498.75, 573.75]) 337 | 338 | # test running through the json api too 339 | pro_forma_config_basic["affordable_housing"] = { 340 | "AMI": 80000, 341 | "depth_of_affordability": .8, 342 | "pct_affordable_units": .2, 343 | "price_multiplier_by_type": { 344 | "0br": .7, 345 | "1br": .75, 346 | "2br": .9, 347 | "3br+": 1.04 348 | } 349 | } 350 | ret = pyforma.spot_residential_sales_proforma(pro_forma_config_basic) 351 | 352 | assert ret["affordable_units"] == .2 * ret["residential_units"] 353 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyforma 2 | 3 | Real estate pro formas in Python 4 | 5 | [![Build Status](https://travis-ci.org/oaklandanalytics/pyforma.svg?branch=master)](https://travis-ci.org/oaklandanalytics/pyforma) [![Coverage Status](https://coveralls.io/repos/github/oaklandanalytics/pyforma/badge.svg?branch=master)](https://coveralls.io/github/oaklandanalytics/pyforma?branch=master) 6 | 7 | 8 | The pro formas contained in this project are taught in real estate analysis classes and predict which kinds of building(s) can be built on a plot of land by summing inflows and outflows of cash that will be gained and lost while constructing and selling a building. 9 | 10 | Pro formas range from exceptionally simple to extraordinarily complex. Most of the pro formas contained in this project are relatively simple as the purpose of this project is to run pro formas over large parts of a city to learn, for instance, the impact of an affordable housing policy on the number of housing units generated across the entire city. 11 | 12 | From a programming perspective, this project hopes to write a reasonable API to execute pro formas in code, rather than the de facto standard for pro formas, which is Excel. The API we chose is a hierarchical object, which clearly takes some inspiration from Javascript (this is the de rigueur appoach in JS), but the first implementation is written in Python as we think Python is a simpler language to get started running basic data science applications. A javascript version of many of these pro formas will eventually be written so as to perform analysis in an interactive fashion directly in the browser. 13 | 14 | Additionally, after many years of working with Python Pandas, I've determined that running vectorized financial analysis is challenging to both read and write. The typical real estate analyst comes from an Excel background, and the code he or she reads and writes should not be complicated vector and matrix operations (e.g. argmax or dot product or matrix multiply). On the other hand, Pandas runs roughly 900 times faster than a simple Python "for" loop. 15 | 16 | The decision we made was thus to make the API first operate on scalars (i.e. numbers) so that the simple logic and intent of the API can become clear, and to allow the substitution of a pd.Series (a vector) for each scalar in almost all places in order to gain performance improvements over large datasets. 17 | 18 | This has two additional benefits. First, sometimes a value is not known for *every* parcel, in this case a scalar best guess can be substituted for having a specific value associated with every parcel. Second, we can take advantage of the readability of the standard Pandas operation that `pd.Series([1, 2, 3]) * 2 == pd.Series([2, 4, 6])` so that the code is the same in almost every place whether a parameter is scalar or vector. 19 | 20 | ## Spot Pro Forma 21 | 22 | The simplest pro forma we call a `spot` pro forma because it does not consider cash flows over time. In this case most inflows and outflows are in costs per square foot and sales prices per sqft. This pro forma is mainly an accounting of policies like parking ratios, ground floor retail, unit mixes and the like. Even though it's so simple it's extrodinarily powerful as the level of detail in data necessary to run most parcel-scale pro formas is simply not available across the scope of a city, and the effect of such specific analysis on a city-wide scale would be modest. 23 | 24 | The steps for computing a spot pro formas are roughly: 25 | 26 | * Take a `built_dua` (the dwelling units per acre that is the density at which a building will be constructed) and multiply times a unit mix to get the number of units of each type (1BR, 2BR and so forth - the unit types are specified by the user as well). 27 | 28 | * Use price per sqft, average unit size, and parking ratios per unit to compute the total total revenue, built space, and parking spaces for the building. 29 | 30 | * If ground floor uses are specfied (e.g. retail), add revenue, built area, and parking spaces for the non-residential portion of the building. 31 | 32 | * Take the number of parking spaces and the parking type specified of the user to compute the total built area and cost of parking. 33 | 34 | * Apply a net to gross factor for common spaces. 35 | 36 | * Based on the parking type (surface, deck, or underground), configure the building on the parcel to compute the area of the building footprint and number of stories. 37 | 38 | * Compute profit as revenues minus costs. 39 | 40 | * Check for constraint failures if the user passes a building configuration that conflicts with the building that gets contructed (e.g. garden apartments can't be more than 3 stories tall). Also check for zoning violations such as maximum FAR and maximum height limits. 41 | 42 | ### The API 43 | 44 | The spot pro forma API looks like this (a good place to start to learn how to use the API is to explore the thorough unit tests in the `tests` directory - this example comes directly from the tests). 45 | 46 | ```json 47 | { 48 | "use_types": { 49 | "0br": { 50 | "price_per_sqft": 600, 51 | "size": 600, 52 | "parking_ratio": 0.3 53 | }, 54 | "1br": { 55 | "price_per_sqft": 650, 56 | "size": 750, 57 | "parking_ratio": 1.0 58 | }, 59 | "2br": { 60 | "price_per_sqft": 700, 61 | "size": 850, 62 | "parking_ratio": 1.5 63 | }, 64 | "3br+": { 65 | "price_per_sqft": 750, 66 | "size": 1000, 67 | "parking_ratio": 2 68 | }, 69 | "retail": { 70 | "rent_per_sqft": 30, 71 | "parking_ratio": 2 72 | } 73 | }, 74 | "parking_types": { 75 | "surface": { 76 | "space_size": 300, 77 | "space_cost_sqft": 30 78 | }, 79 | "deck": { 80 | "space_size": 250, 81 | "space_cost_sqft": 90 82 | }, 83 | "underground": { 84 | "space_size": 250, 85 | "space_cost_sqft": 110 86 | } 87 | }, 88 | "building_types": { 89 | "garden_apartments": { 90 | "cost_per_sqft": 400, 91 | "allowable_densities": [5, 15] 92 | }, "fancy_condos": { 93 | "cost_per_sqft": 800, 94 | "allowable_densities": [10, 20] 95 | }, "ground_floor_retail": { 96 | "cost_per_sqft": 600 97 | } 98 | }, 99 | "parcel_size": 10000, 100 | "cap_rate": 0.06, 101 | "max_far": 1.2, 102 | "max_height": 20, 103 | "height_per_story": 12, 104 | "parcel_efficiency": 0.8, 105 | "building_efficiency": 0.8, 106 | "cost_shifter": 1.2, 107 | "parcel_acquisition_cost": 1000000, 108 | "non_res_parking_denom": 1000, 109 | "use_mix": { 110 | "use_types": ["0br", "1br", "2br"], 111 | "mix": [0.3, 0.3, 0.4], 112 | "ground_floor": { 113 | "use": "retail", 114 | "size": 3000 115 | } 116 | }, 117 | "absorption_in_months": 20, 118 | "parking_type": "deck", 119 | "building_type": "garden_apartments", 120 | "built_dua": 10 121 | } 122 | ``` 123 | 124 | Hopefully if you've followed most of the discussion so far, this API will be fairly easy to pick up on. We'll parallel the logic described above with a discussion of the parameters in the API. 125 | 126 | For starters there is a `unit_types` object which has parameters for each of the unit types. Each unit type has a price per sqft, size, and parking ratio as described in the previous section. 127 | 128 | Non-residential uses, which are the ground floor uses (e.g. retail), have rent per sqft as this is standard and gets converted to a price per sqft using the cap rate also specfied in the object, as well as a parking ratio which uses square feet rather than number of untis and the `non_res_parking_denom` which gives the deominator for non-residential parking ratios. 129 | 130 | Next comes a `parking_types` object which contains keys of surface, deck, and underground and have parking space sizes and costs per sqft. 131 | 132 | Next there is a `building_types` object which contains all *possible* building types even though only one building type will actually be used for each pro forma (this will come in handy when vectorizing the operation). Think of this as the data that comes out of the RSMeans handbook. Right now, a building type gets a description name, and values of cost per sqft and reasonable limits on the number of stories. 133 | 134 | Finally comes a `use_mix` object which has two lists of `use_types` and their `mix` which should be of the same length and the floats in the mix list should add up to 1.0. This is the ratio of different unit types in the building (e.g. 30% 1BR and 70% 2BR). There can also be a `ground_floor` object which gives the type and size of any non-residential space in the building. 135 | 136 | Various scalar parameters are as follows: 137 | 138 | * parcel_size is the size of the parcel in square feet 139 | * cap_rate converts yearly rent to price, so a cap_rate of .05 means a rent of $30/sqft/year is equivalent to a sales price of $600/sqft 140 | * max_height and max_far give density limits that will be tested after the building is configured 141 | * height_per_story converts number of stories to building height 142 | * the parcel_efficiency gives the maximum building footprint size based on the size of the parcel, and building_efficiency gives the ratio of sellable area to total area (accounts for common space) 143 | * cost_shifter is optional and can be used to specify the RSMeans area cost shifter 144 | * parcel_acquistion_cost is the cost of buying the parcel and building - this number typically comes out of some sort of statistical model 145 | * finally, parking_type, building_type, and built_dua are three of the most important parameters as they specify exactly what form the current computations will take. Although there are many building types, a few parking types, and many different densities at which a building can be built, each pro forma only uses one. 146 | 147 | ## Settings for affordable / inclusionary housing 148 | 149 | **pyforma** has support to calculate the impact of affordable / inclusionary housing. To enable affordable housing, include a sub-dictionary with the key affordable_housing and keys like the following (include as part of the larger config object described above). Keys include 150 | 151 | * AMI - the area median income, which is usually specified by HUD for current affordable housing policy, but which could be forecast median incomes for future years. As before, this can be a scalar value or a Series of values per parcel. 152 | 153 | * depth_of_affordability is the percent of AMI at which the housing should be affordable. A value of 1.0 would be equivalent to AMI, and values are usually less than 1.0 in current housing policy. 154 | 155 | * pct_affordable_units is the percentage of affordable units which are required for a development to be build. A value of .2 would mean 20% of the units built would be affordable at this percentage of AMI. This value can, and probably should, be varied by jurisdiction and in fact can be varied by parcel for complete flexibility. 156 | 157 | * price_multiplier_by_type is a dictionary where keys are unit types as are specified elsewhere in the config object. These are also multipliers which are usually set by policy such that different size units should be affordable at different levels of AMI - obviously smaller units are usually set to be affordable at smaller multiples of AMI, while larger units should be set higher. Note that setting 158 | 159 | ``` 160 | ... 161 | "affordable_housing": { 162 | "AMI": 80000, 163 | "depth_of_affordability": .8, 164 | "pct_affordable_units": .2, 165 | "price_multiplier_by_type": { 166 | "0br": .7, 167 | "1br": .75, 168 | "2br": .9, 169 | "3br+": 1.04 170 | } 171 | } 172 | ... 173 | ``` 174 | 175 | If "affordable_housing" is set as an input, "affordable_units" will be set as a key in the output, which will be a Series providing the number of affordable units per development. 176 | 177 | Note that the purpose of these parameters is to adjust the profitability of developments, which necessarily reduces the probability of a development being built relative to developments which have no inclusionary housing. Thus an increase in affordable housing in an urban county and strong market like San Francisco, will probably work, and create potentially large numbers of affordability, while at the same time providing a suburbanizing force to development region-wide. This is in fact the whole purpose of running analyses like these. Also note that at some level of inclusionary housing, depending on market conditions, a development can go from profitable to unprofitable, which is why inclusionary rates are often linked to market cycles. 178 | 179 | 180 | ## Running pyforma far and wide 181 | 182 | The real power of this API is not to call the API once with scalar values, but to pass in a Pandas Series of values (a vector of values) and perform the computation more efficiently. Python is notoriously slow at performing "for loop" operations, and in fact in this case **using a Pandas Series and letting pyforma do the computation for you is *900* times faster than calling this API with scalars in a for loop**. The use of scalars in the API is not for large numbers of operations, say 100k calls or more. 183 | 184 | It's also clear that there are two main use cases for using pyforma: 185 | 186 | * The "far" in the heading, which would be to explore many (potentially millions) of pro formas run on a single parcel to optimize the return on that parcel 187 | 188 | * The "wide" in the heading, which would be to explore a pro forma on a large number of parcels (potentially millions) at the max zoning allowed or similar 189 | 190 | In fact, the API is general simple enough that you can make any calls you want and aggregate them however you want. For instance, the user of the API could run 20 pro formas per parcel for 2 million parcels, or about 40 million parcels in only a few seconds. The 20 pro formas per parcel could test various inflection points, or parking types, etc, and then maximize the return per parcel before doing an aggregation across all parcels like summing feasible units in an area. 191 | 192 | ## A vectorized example, incluing use of the cartesian_product helper 193 | 194 | Here is an example of using pyforma in a vectorized manner (again drawn from the unit tests). First imagine you have an object called `cfg` which is set to the configuration object from the previous example. In this example we want to test a series of DUA values, a series of FAR values, a series of parcel sizes, and a series of price per square foot numbers, and we want to test *all* combinations of those Series. 195 | 196 | pyforma has a helper method to assist with this use case, called `cartesian_product`, which will perform the cross product for you - just pass the Series as arguments to the method like shown below. The method will create a DataFrame which has columns that are named the same as each series, and will create a row in the DataFrame with every combination of values of the passed Series (and do so efficiently). So if you pass four Series, with lengths 2, 3, 4, and 3 respectively, the length of the output DataFrame will be 2 * 3 * 4 * 3 = 72. This is obviously polynomial expansion so use judiciously. 197 | 198 | Once you have the set of values you want to test, simply substitute the Pandas Series for the previous scalar values (in this case the output of `cartesian_product` but could also be the actual values taken from parcels throughout a city), and finally call the appropriate method to run the pro formas. 199 | 200 | ```python 201 | df = pyforma.cartesian_product([ 202 | pd.Series(np.arange(1, 300, 5), name="dua"), 203 | pd.Series(np.arange(.25, 8, .5), name="far"), 204 | pd.Series(np.arange(1000, 100000, 50000), name="parcel_size"), 205 | pd.Series(np.arange(500, 2000, 500), name="price_per_sqft") 206 | ]) 207 | 208 | # cfg is initially set to the object from the previous example 209 | cfg["parcel_size"] = df.parcel_size 210 | cfg["max_dua"] = df.dua 211 | cfg["max_far"] = df.far 212 | cfg["use_types"]["2br"]["price_per_sqft"] = df.price_per_sqft 213 | 214 | ret = pyforma.spot_residential_sales_proforma(cfg) 215 | ``` 216 | 217 | ## A note on parking types and vectorization 218 | 219 | At this point there are three parking types, and thus the scalar passed takes one of the values "surface", "deck", and "underground". For now, these can't be vectorized in a Series which mixes these values. This keeps the code simple internally, but will probably change at a future date. For now, simply call the method 3 times if you want to test multiple parking types. 220 | 221 | ## Benchmarks 222 | 223 | Current benchmarks for the style of pro forma that pyforma current supports will run 18 million pro formas per second. 224 | 225 | ## Outputs (what the API returns) 226 | 227 | Similar to the object passed as input, the `spot_residential_sales_proforma` returns a Python dictionary with key-value pairs. If the values passed are scalars, the values returned will be scalars. If any of the values passed are Series, most of the values returned will be Series as well. Below is a list and description of the keys returned and a sample return object. 228 | 229 | * built_far - the actual floor are ratio for the building 230 | * height - the height for this building 231 | * num_units_by_type - a list of the number of each type of unit in the mix array passed in (in units rather than in proportions). For now these values can be partial units (floats). 232 | * usable_floor_area - The amount of floor area (can be spread among floors) that is inside a unit or non-residential area 233 | * floor_area_including_commin_space - The usable floor area plus the shared space 234 | * ground_floor_type - this is passed in by the user and returned to the user for convenience 235 | * ground_floor_size - this is passed in by the user and returned to the user for convenience 236 | * footprint_size - the area of the building footprint 237 | * revenue_from_the_ground_floor - if there is ground floor non-residential space, this is the revenue that space generates (a full price, not a yearly rent) 238 | * parking_type - this is passed in by the user and returned to the user for convenience 239 | * parking_spaces - the total number of parking spaces this building will require 240 | * parking_area - the area of said parking spaces 241 | * parking_cost - the cost of said parking spaces 242 | * total_floor_area - the floor area plus common spaces plus parking (if it's not surface parking) 243 | * revenue - the total revenue the building generates - i.e. an estimate of the NPV 244 | * cost - the total cost to construct the building, which includes usable space, common space, and parking 245 | * profit - revenue minus cost, duh (includes acquistion cost for the parcel in addition to construction cost) 246 | * stories - the number of stories of the building 247 | * failure_dua - a True/False value as to whether the building has a zoning failure where is exceeds the max DUA value 248 | * failure_far - a True/False value as to whether the building has a zoning failure where is exceeds the max FAR value 249 | * failure_height - a True/False value as to whether the building has a zoning failure where is exceeds the max height value 250 | * failure_btype - a True/False value as to whether the density of this building exceeds the range specified as allowable for a given building type - e.g. no townhome is 5 stories; the building will be analyzed as requested but this is considered a "building type failure" 251 | * building_type - this is passed in by the user and returned to the user for convenience 252 | 253 | ``` 254 | { 255 | "built_far": built_far, 256 | "height": height, 257 | "num_units_by_type": num_units_by_type, 258 | "usable_floor_area": usable_floor_area, 259 | "floor_area_including_common_space": floor_area_including_common_space, 260 | "ground_floor_type": ground_floor_type, 261 | "ground_floor_size": ground_floor_size, 262 | "footprint_size": footprint_size, 263 | "revenue_from_ground_floor": revenue_from_ground_floor, 264 | "parking_type": parking_type, 265 | "parking_spaces": parking_spaces, 266 | "parking_area": parking_area, 267 | "parking_cost": parking_cost, 268 | "total_floor_area": total_floor_area, 269 | "revenue": revenue, 270 | "cost": cost, 271 | "profit": profit, 272 | "stories": stories, 273 | "failure_dua": failure_dua, 274 | "failure_far": failure_far, 275 | "failure_height": failure_height, 276 | "failure_btype": failure_btype, 277 | "building_type": building_type 278 | } 279 | ``` 280 | 281 | ## Zoning failures 282 | 283 | There are four zoning failures that are described in detail above - they are DUA, FAR, height, and building type. At first it is not obvious why the API should even allow zoning to be violated, but this API is written to be as flexible as possible, and the user is free to pass in many different kinds of buildings. They might be taller than the max height, they might be 10 story townhomes or 1 story condos, but when the results don't make sense this will be flagged as a constraint failure. Make sure to check the constraint failures if your use case requires it. 284 | --------------------------------------------------------------------------------