├── .gitignore ├── LICENSE ├── README.md ├── assets └── sprf.png ├── data ├── atlantic.csv ├── california_housing.csv ├── deforestation.csv ├── meuse.csv └── plants.csv ├── figure.ipynb ├── requirements.txt ├── scripts ├── benchmarks.py ├── models.py ├── plotting.py └── synthetic_tests.py ├── setup.py ├── sprf ├── __init__.py ├── geographical_random_forest.py ├── spatial_random_forest.py └── tuning.py ├── sprf_demo.ipynb └── tests └── test_sprf.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # vscode and mac 10 | .DS_Store 11 | *.code-workspace 12 | *.drawio 13 | 14 | # directories 15 | data_orig/ 16 | private_notebooks/ 17 | 18 | # Distribution / packaging 19 | .Python 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | pip-wheel-metadata/ 33 | share/python-wheels/ 34 | *.egg-info/ 35 | .installed.cfg 36 | *.egg 37 | MANIFEST 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .nox/ 53 | .coverage 54 | .coverage.* 55 | .cache 56 | nosetests.xml 57 | coverage.xml 58 | *.cover 59 | *.py,cover 60 | .hypothesis/ 61 | .pytest_cache/ 62 | 63 | # Translations 64 | *.mo 65 | *.pot 66 | 67 | # Django stuff: 68 | *.log 69 | local_settings.py 70 | db.sqlite3 71 | db.sqlite3-journal 72 | 73 | # Flask stuff: 74 | instance/ 75 | .webassets-cache 76 | 77 | # Scrapy stuff: 78 | .scrapy 79 | 80 | # Sphinx documentation 81 | docs/_build/ 82 | 83 | # PyBuilder 84 | target/ 85 | 86 | # Jupyter Notebook 87 | .ipynb_checkpoints 88 | 89 | # IPython 90 | profile_default/ 91 | ipython_config.py 92 | 93 | # pyenv 94 | .python-version 95 | 96 | # pipenv 97 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 98 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 99 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 100 | # install all needed dependencies. 101 | #Pipfile.lock 102 | 103 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 104 | __pypackages__/ 105 | 106 | # Celery stuff 107 | celerybeat-schedule 108 | celerybeat.pid 109 | 110 | # SageMath parsed files 111 | *.sage.py 112 | 113 | # Environments 114 | .env 115 | .venv 116 | env/ 117 | venv/ 118 | ENV/ 119 | env.bak/ 120 | venv.bak/ 121 | 122 | # Spyder project settings 123 | .spyderproject 124 | .spyproject 125 | 126 | # Rope project settings 127 | .ropeproject 128 | 129 | # mkdocs documentation 130 | /site 131 | 132 | # mypy 133 | .mypy_cache/ 134 | .dmypy.json 135 | dmypy.json 136 | 137 | # Pyre type checker 138 | .pyre/ 139 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Mobility Information Engineering Lab at ETH Zürich 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Benchmarking regression models under spatial heterogeneity 2 | 3 | This repository accompanies our GIScience publication "Benchmarking regression models under spatial heterogeneity" (see reference below). In the code base, we provide 1) the script for reproducing our experiments on synthetic data, 2) the script for reproducing our benchmarking experiments on several real datasets and 3) an open-source Python implementation of spatial Random Forests. Each part is described in the following. 4 | 5 | #### Installation 6 | 7 | The required packages and our sprf package can be installed via pip in editable mode in a virtual environment with the following commands: 8 | ``` 9 | git clone https://github.com/mie-lab/spatial_rf_python.git 10 | cd spatial_rf_python 11 | python -m venv env 12 | source env/bin/activate 13 | pip install -e . 14 | ```` 15 | 16 | ### 1) Experiments on synthetic datasets 17 | 18 | To reproduce our analysis on synthetic data, run: 19 | ``` 20 | python scripts/synthetic_tests.py 21 | ``` 22 | All results will be saved in a single csv file named `synthetic_data_results.csv`. 23 | 24 | ### 2) Benchmarking on real datasets 25 | 26 | We use five public data sets to validate our results and to benchmark different algorithms. The datasets are provided as csv fils in the [data](data) folder. They include 27 | * A [plants](https://github.com/BlasBenito/spatialRF/blob/main/data/plant_richness_df.rda) dataset 28 | * A [deforestation](https://github.com/FSantosCodes/GWRFC/tree/master/data) dataset 29 | * A [mortality rate](https://www.dropbox.com/s/lrz6og0ld2m64df/Data_GWR.7z?dl=0) dataset from [here](https://zia207.github.io/geospatial-r-github.io/geographically-wighted-random-forest.html) 30 | 31 | Please cite these sources if reusing their data. 32 | 33 | Our code for benchmarking is provided as a [notebook](benchmarks.ipynb) and as a [script](scripts/benchmarks.py). To reproduce our experiments from the paper, run 34 | ``` 35 | python scripts/benchmarks.py 36 | ``` 37 | The results will be saved as csv files in a folder named `outputs`. 38 | 39 | ### 3) Spatial Random Forest implementation in Python 40 | 41 | This repository further provides Python implementations of Spatial Random Forests. Different approaches have been proposed in the literature, but here, we focus on the one by Georganos et al termed *Geographical Random Forests*. We implement their approach, but since it is very inefficient to train one random forest per sample, we additionally implement a more efficient variant (which we simply call *Spatial Random Forests*): Instead of training one Random Forest per sample, we train a fixed number of random forests on spatially distinct set of points. The prediction is then a weighted average of the tree-wise predictions, weighted by the distance of the test sample from the centers of each tree (see figure below). 42 | 43 | 44 | 45 | #### Usage 46 | 47 | We demonstrate the usage of the spatial Random Forests in the [demonstration notebook](sprf_demo.ipynb). 48 | 49 | The usage is analogous to other scikit-learn models, except that the coordinates must also be given as input. 50 | ``` 51 | from sprf import SpatialRandomForest 52 | spatial_rf = SpatialRandomForest() 53 | spatial_rf.fit(train_x, train_y, train_coords) 54 | test_pred = spatial_rf.predict(test_x, test_coords) 55 | ``` 56 | 57 | To train a Geographical Random Forest as proposed by Georganos et al, we provide the corresponding class which can be used in the same way: 58 | 59 | ``` 60 | from sprf import GeographicalRandomForest 61 | geo_rf = GeographicalRandomForest() 62 | geo_rf.fit(train_x, train_y, train_coords) 63 | test_pred = geo_rf.predict(test_x, test_coords) 64 | ``` 65 | 66 | 67 | 68 | ### Citation 69 | 70 | If you use our work, please cite our paper with the following bibtex entry: 71 | 72 | ```bib 73 | @inproceedings{wiedemann2023benchmarking, 74 | title={Benchmarking regression models under spatial heterogeneity}, 75 | author={Wiedemann, Nina and Martin, Henry and Westerholt, René}, 76 | booktitle={12th International Conference on Geographic Information Science (GIScience 2023)}, 77 | year={2023}, 78 | } 79 | ``` 80 | -------------------------------------------------------------------------------- /assets/sprf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mie-lab/spatial_rf_python/20d88253883492d81e62ae550a01ff2b8aaeed26/assets/sprf.png -------------------------------------------------------------------------------- /data/atlantic.csv: -------------------------------------------------------------------------------- 1 | FIPS,x,y,Rate,POV,SMOK,PM25,NO2,SO2 2 | 13111,1056523.936,1376613.19,72,15.92,27.93333333,11.75533333,0.977666667,0.064184954 3 | 42115,1653442.064,2267301.476,59,12.22,26.96666667,9.026,1.499933333,0.03321098 4 | 42075,1633708.275,2096683.059,61,8.986666667,25.27333333,11.96333333,3.616466667,0.120281334 5 | 51683,1584048.852,1901443.401,62,7.86,22.9,12.73133333,3.621933333,0.118371127 6 | 36057,1735811.102,2409536.493,59,14.74666667,27.18,8.302,1.632733333,0.006404368 7 | 13149,1003646.751,1193901.75,86,17.50666667,30.21333333,12.27133333,1.6258,0.138780485 8 | 37153,1463896.202,1452940.213,79,21.93333333,29.48,12.71066667,1.487333333,0.022304498 9 | 51735,1716542.533,1743450.717,61,4.646666667,21.23333333,11.84333333,2.520266667,0.057279707 10 | 37003,1320295.845,1533038.997,64,12.67333333,27.77333333,11.55666667,1.7928,0.090705483 11 | 37063,1520852.145,1580156.875,65,14.81333333,19.58,12.35666667,1.937466667,0.06037865 12 | 36093,1768117.621,2407434.527,62,11.24666667,23.78666667,8.767333333,2.0944,0.006131673 13 | 51590,1463355.628,1632109.561,72,21.37333333,29.71333333,11.47266667,2.113266667,0.126120542 14 | 51600,1597510.11,1916180.732,41,5.573333333,15.58,13.132,4.147,0.13029202 15 | 51530,1445710.523,1759556.916,61,13.16666667,31.7,10.12466667,1.666666667,0.028030571 16 | 37181,1555147.596,1623916.995,79,22.22,29.66,11.73133333,1.738866667,0.064398226 17 | 13003,1240686.99,999295.872,93,24.16,28.28666667,11.87,0.636,0.007735251 18 | 51081,1622534.834,1672340.575,78,20.35333333,28.42,11.92733333,1.584866667,0.043300666 19 | 36015,1568845.615,2286060.403,73,14.36,26.11333333,9.176666667,1.640066667,0.041286954 20 | 51790,1462544.758,1811357.668,63,13.55333333,25.05333333,10.766,1.5146,0.033238452 21 | 37157,1434932.939,1605912.888,78,14.52666667,31.1,11.486,2.297333333,0.1052822 22 | 13119,1161855.577,1335144.383,72,16.56666667,27.51333333,12.73133333,1.394466667,0.030687529 23 | 42001,1581875.064,2029400.655,56,7.6,24.32,12.94466667,3.292466667,0.113010656 24 | 51540,1514326.824,1806985.039,53,19.77333333,23.18666667,11.30533333,1.984533333,0.033495497 25 | 24510,1644880.387,1977469.45,87,20.96,28.99333333,14.09,5.1886,0.146629183 26 | 42105,1485798.53,2224296.623,68,13.48,27,9.63,1.641466667,0.056870652 27 | 13173,1227378.725,968358.8725,89,21.00666667,28.51333333,11.602,0.651733333,0.007598222 28 | 13169,1151573.414,1181611.119,62,11.68666667,24.82,12.27866667,1.468666667,0.139995631 29 | 36109,1584749.021,2324857.448,52,15.81333333,16.21333333,9.181333333,1.777533333,0.032778433 30 | 36053,1638111.937,2388843.96,67,10.93333333,24.82666667,8.768,1.846866667,0.013151125 31 | 37007,1432790.709,1443984.99,66,20.93333333,28.37333333,12.72266667,1.685533333,0.029624066 32 | 37195,1611858.913,1559545.423,71,19.19333333,25.69333333,13.01866667,1.695066667,0.017671372 33 | 54077,1385413.786,1946823.68,62,16.44,28.66,10.95533333,2.3982,0.250782854 34 | 51089,1420848.109,1636040.467,65,14.69333333,30.15333333,10.89533333,1.855,0.091236621 35 | 51099,1622210.598,1854839.183,74,6.793333333,21.59333333,12.08266667,2.948666667,0.125150723 36 | 42047,1431155.363,2177641.106,64,8.88,27.58666667,10.53466667,1.742266667,0.115719854 37 | 42053,1381551.31,2178527.873,74,17.62666667,30,10.91066667,2.315866667,0.122241604 38 | 13247,1099839.163,1245707.827,62,11.68666667,22.44,13.13333333,3.7826,0.135322848 39 | 42079,1653351.821,2193500.035,65,12.32666667,28.84666667,9.622666667,2.4324,0.075286871 40 | 36005,1834509.187,2196946.254,45,27.97333333,23.25333333,11.748,11.64453333,0.039877724 41 | 42113,1604588.651,2214031.262,63,12.98,26.98666667,9.344,1.614866667,0.086849193 42 | 42021,1444689.53,2074193.143,57,13.27333333,26.94666667,11.18733333,3.099533333,0.263529537 43 | 45037,1285877.942,1284646.665,58,18.08,26.04,12.18666667,1.290266667,0.024738322 44 | 42037,1622394.822,2172315.89,56,12.44666667,25.78,10.06,2.702,0.115677531 45 | 51003,1508276.716,1804173.952,53,7.726666667,16.52666667,11.18733333,1.7494,0.033366864 46 | 37151,1445280.416,1529734.553,71,13.18,28.74666667,12.40466667,2.2968,0.057280666 47 | 36021,1816659.884,2353655.649,64,10.28,23.51333333,8.988,2.0608,0.010004799 48 | 34023,1800141.087,2141559.363,53,7.126666667,18.71333333,11.63666667,6.869533333,0.050961628 49 | 13175,1218893.132,1127443.529,70,20.53333333,25.94666667,12.17733333,0.846866667,0.030264103 50 | 13133,1179492.3,1247682.041,60,20.29333333,25.23333333,12.434,1.344666667,0.101512916 51 | 13017,1201807.39,1045952.93,96,23.05333333,28.68666667,12.47533333,0.646466667,0.01071437 52 | 13277,1177578.936,1008758.602,70,21.71333333,26.18,12.75466667,0.709266667,0.008984232 53 | 13065,1262954.262,959490.1139,104,23.44,29.1,11.29533333,0.5954,0.010513754 54 | 51570,1623811.322,1739869.115,72,7.573333333,28.4,11.83066667,2.799066667,0.079022377 55 | 37081,1439791.529,1570775.945,61,14.43333333,23.26666667,12.17266667,2.711333333,0.088706873 56 | 51640,1330087.373,1619263.778,79,19.74666667,31.58,9.587333333,1.330133333,0.042831727 57 | 51670,1631867.742,1744490.814,101,16.50666667,32.39333333,11.836,3.2466,0.08019077 58 | 13023,1181765.025,1119115.174,85,17.57333333,27.9,12.23266667,0.881733333,0.038834418 59 | 13029,1363361.497,1097965.828,92,10.56,23.74666667,11.306,0.949533333,0.010313017 60 | 13109,1319776.971,1107445.306,75,23.98,28.52666667,11.85,0.826466667,0.011627792 61 | 13171,1097548.858,1180372.835,80,16.28666667,28.16666667,12.32066667,1.6782,0.130991601 62 | 13195,1167461.434,1307970.612,85,14.76,27.47333333,12.796,1.343933333,0.043572274 63 | 51720,1176211.402,1625932.365,87,19.52666667,31.21333333,9.728666667,2.209,0.057382373 64 | 51830,1683120.473,1752365.459,48,18.90666667,17.66666667,11.94733333,2.523466667,0.06950066 65 | 36043,1681804.141,2456217.556,60,13.16,25.12,7.756666667,1.204733333,0.005746315 66 | 36007,1643936.98,2304050.692,58,14.11333333,23.94666667,8.944,1.687333333,0.024850571 67 | 51750,1353126.639,1674729.088,78,23.04,22.76666667,9.702,1.917866667,0.034742502 68 | 54047,1253566.396,1687978.492,106,34.46666667,34.96,9.408666667,1.168266667,0.03828383 69 | 54033,1329015.139,1916161.457,84,17.36666667,28,11.68333333,2.949733333,0.210726373 70 | 54007,1310840.99,1846926.185,78,21.61333333,30.92666667,10.63066667,1.652,0.102185634 71 | 13021,1142177.553,1155786.935,75,21.44,25.35333333,12.65533333,1.3396,0.100164584 72 | 51550,1731568.728,1694759.535,72,8.193333333,21.58666667,10.29,2.3896,0.043557652 73 | 36049,1634724.239,2487703.529,61,13.64,26.76666667,7.936666667,1.520533333,0.006190014 74 | 37029,1746713.975,1664623.636,66,9.473333333,23.39333333,9.948,1.659933333,0.024237535 75 | 42007,1305889.304,2071193.128,65,10.99333333,27.33333333,13.158,5.270933333,0.312501187 76 | 13009,1179525.219,1190286.64,68,20.94666667,26.47333333,12.322,1.3204,0.1200595 77 | 42025,1682297.897,2170140.233,68,10.06,29.44666667,9.887333333,3.412066667,0.072730595 78 | 13177,1115419.382,1036903.959,77,9.926666667,24.68,12.45266667,0.884333333,0.012986966 79 | 24011,1720531.915,1943797.185,80,11.59333333,25.52,12.92866667,4.189066667,0.08052073 80 | 13083,949876.9767,1363028.669,90,14.02666667,26.56,12.678,2.0276,0.097772433 81 | 13131,1119076.387,935895.365,77,22.2,27.09333333,11.876,0.645266667,0.00789093 82 | 54089,1317582.555,1729447.166,70,24.83333333,30.33333333,9.448666667,1.3772,0.037281857 83 | 13143,989780.5921,1248202.62,89,17,30.24666667,12.64266667,1.5934,0.204562146 84 | 13227,1048750.481,1330599.247,74,11.52666667,26.40666667,12.84866667,1.5288,0.122754787 85 | 42109,1574275.135,2130545.407,52,10.56,25.57333333,11.046,3.1288,0.144884251 86 | 51690,1421799.935,1636136.484,80,20.12666667,28.96,10.94466667,1.848133333,0.090804627 87 | 36017,1652705.228,2344047.232,63,14.2,26.08666667,8.732666667,1.443333333,0.016552842 88 | 13275,1149153.54,938437.0275,78,20.18,25.81333333,11.886,0.669133333,0.007555336 89 | 37041,1717104.377,1631166.367,69,18.54666667,26.00666667,10.55933333,1.296266667,0.020002733 90 | 37169,1394188.124,1599626.409,77,11.90666667,30.93333333,11.198,1.993266667,0.087910654 91 | 54069,1294167.135,2002786.432,76,15.28,28.36666667,13.43333333,4.545066667,0.406559049 92 | 51775,1393922.749,1700093.235,71,8.753333333,26.52666667,10.322,2.206066667,0.034263399 93 | 51630,1593702.528,1852059.96,69,14.9,24.49333333,12.024,2.721933333,0.106019507 94 | 13097,1031382.337,1242709.45,73,10.76,24.77333333,13.25333333,2.909266667,0.206280792 95 | 37069,1571748.504,1594889.666,72,14.1,26.08,12.15733333,1.711933333,0.038521283 96 | 13139,1109248.869,1321624.535,62,13.34,24.87333333,13.088,1.556533333,0.054985529 97 | 37149,1242895.468,1448685.554,53,12.36,23.83333333,11.25866667,1.763133333,0.059930607 98 | 13243,1058282.459,1027981.069,69,27.06,27.09333333,11.98533333,0.720533333,0.013834109 99 | 13283,1252655.898,1125410.716,86,26.58666667,29.05333333,12.074,0.699733333,0.019636071 100 | 37109,1323677.525,1484314.988,66,11.66666667,26.34666667,12.20333333,2.459466667,0.115437795 101 | 36035,1731906.788,2432891.232,65,14.28,27.58666667,8.024,1.350866667,0.005243562 102 | 36097,1553949.123,2311832.582,67,12.36666667,25.81333333,9.096666667,1.772,0.039672887 103 | 37077,1535005.921,1613210.202,76,13.83333333,27.14666667,11.71133333,1.764733333,0.075356169 104 | 51730,1625527.449,1733268.28,97,20.62,29.52666667,11.636,2.496133333,0.074446439 105 | 13101,1248348.27,934440.3294,73,23.71333333,28.58666667,11.18266667,0.6676,0.009702606 106 | 42009,1473184.117,2023798.399,52,11.92666667,26.82666667,11.02666667,2.842666667,0.158886417 107 | 13189,1243553.429,1245820.702,94,19.44666667,28.29333333,12.42133333,1.410066667,0.045470302 108 | 13081,1148323.713,1057203.733,78,27.50666667,27.83333333,12.63066667,0.806333333,0.014370813 109 | 51710,1730261.754,1719321.773,78,19.39333333,25.38666667,11.14066667,2.617333333,0.049621813 110 | 51065,1535867.509,1788758.043,55,6.973333333,23.19333333,11.23133333,1.841466667,0.041039072 111 | 37001,1474728.089,1572762.401,69,13.5,27,11.9,2.298466667,0.078810443 112 | 13179,1361780.107,1076726.915,76,17.19333333,25.43333333,11.11933333,0.8934,0.009774712 113 | 13239,1032302.684,1036603.891,93,23.98666667,28.82666667,12.018,0.760133333,0.015273258 114 | 51137,1549999.877,1837565.678,68,9.053333333,25.82666667,11.49733333,2.199933333,0.057854525 115 | 54017,1301653.502,1910050.185,76,19.32,31.52666667,11.968,3.158266667,0.223853046 116 | 13201,1068274.984,962213.1331,78,21.99333333,26.76666667,12.52266667,0.661933333,0.010339054 117 | 34011,1768774.33,2011919.092,70,15.23333333,28.11333333,12.91466667,3.487733333,0.045609201 118 | 13135,1095759.988,1279796.436,53,9.273333333,19.22666667,13.40666667,3.243666667,0.104596959 119 | 42019,1337590.339,2102567.407,59,8.333333333,23.4,12.604,4.383266667,0.286625125 120 | 36091,1776076.011,2442465.765,63,6.593333333,21.13333333,8.527333333,2.015266667,0.004390669 121 | 37107,1646302.997,1513043.27,73,20.23333333,27.35333333,12.118,1.4862,0.012877913 122 | 13215,1037083.188,1109400.014,74,17.92,24.95333333,12.68866667,0.9664,0.036799276 123 | 42077,1699112.311,2138770.953,56,10.36666667,24.28666667,11.43933333,4.451666667,0.072346667 124 | 13089,1079948.559,1256389.654,52,15.00666667,18.88,13.422,4.4912,0.140996438 125 | 51183,1642946.115,1704445.424,84,19.40666667,27.91333333,11.49266667,1.908266667,0.058643347 126 | 51840,1517023.255,1936981.425,78,13.45333333,27.10666667,11.83933333,2.989666667,0.081773593 127 | 37093,1509167.122,1462165.108,82,17.87333333,27.16666667,12.834,1.599666667,0.017354063 128 | 42083,1429885.045,2220922.964,73,14.33333333,28.52666667,10.05,1.466866667,0.071557137 129 | 45061,1434148.183,1352336.958,81,26.19333333,29.28666667,12.572,1.3402,0.038363087 130 | 45051,1552375.47,1345424.088,70,15.16,29.98666667,10.71666667,1.072,0.027848615 131 | 37031,1742993.493,1485548.212,75,12.74666667,26.84,8.127333333,0.82,0.007016134 132 | 45005,1354654.169,1206555.333,72,35.26,27.74666667,11.34533333,0.9478,0.02473639 133 | 37039,1075668.387,1409410.991,69,17.08666667,27.97333333,11.29533333,0.9582,0.047794317 134 | 36027,1820406.179,2298754.25,60,8.1,21.64,9.426,2.402666667,0.01841173 135 | 13223,1019493.25,1265907.639,74,7.593333333,25.84,13.292,2.622066667,0.220265044 136 | 13117,1082914.224,1307948.074,52,5.6,18.55333333,13.46266667,1.844266667,0.090544611 137 | 42131,1643088.775,2230410.228,64,10.86,26.07333333,9.138666667,2.1882,0.055272458 138 | 54075,1378205.378,1816237.449,74,17.68,28.20666667,9.215333333,0.9346,0.044613198 139 | 13053,1047514.925,1092319.577,88,18.64,20.62666667,12.27533333,0.788733333,0.028402561 140 | 54019,1291852.933,1767655.85,95,21.61333333,31.48666667,9.795333333,1.633333333,0.070849713 141 | 54101,1339894.375,1828331.821,89,27.11333333,31.50666667,9.648,1.1206,0.062105957 142 | 37185,1580830.177,1632431.425,61,22.88,27.20666667,11.71466667,1.617666667,0.045477709 143 | 13191,1375560.829,1041347.409,69,19.58666667,27.81333333,10.686,0.7158,0.01113753 144 | 13311,1111168.399,1358919.711,61,13.81333333,24.90666667,11.89466667,1.216533333,0.036456967 145 | 51153,1585461.893,1896563.543,62,5.58,20.53333333,12.45733333,3.242533333,0.109487893 146 | 37085,1534801.605,1506770.297,76,16.22,27.13333333,13.02066667,1.676866667,0.018872703 147 | 45067,1516215.834,1357000.88,79,24.65333333,28.66,11.63733333,1.219466667,0.028252022 148 | 36123,1530166.579,2334378.468,64,14.02,24.79333333,9.015333333,1.724466667,0.039520381 149 | 13267,1305712.571,1092823.631,92,25.6,28.5,11.97,0.7536,0.010500177 150 | 13321,1145798.959,1015128.398,71,19.61333333,27.99333333,12.538,0.7988,0.010234082 151 | 45035,1440375.919,1230614.934,64,11.23333333,24.40666667,10.73666667,1.241466667,0.044484932 152 | 36069,1509820.646,2355302.693,64,8.846666667,21.54,9.15,1.925866667,0.038785772 153 | 13057,1050761.13,1306115.374,61,6.9,22.27333333,13.41933333,2.4658,0.152438517 154 | 13161,1255596.036,1058523.507,99,20.23333333,27.54,12.16466667,0.671133333,0.009300602 155 | 13069,1239676.066,1027479.846,74,22.30666667,27.84666667,12.26266667,0.6426,0.008462469 156 | 36073,1427249.188,2384624.13,69,12.96,28.18,10.236,3.3304,0.044723983 157 | 36101,1515917.072,2289805.219,73,14.04,25.55333333,8.947333333,1.434266667,0.040500091 158 | 34021,1780222.646,2119053.231,54,9.12,19.90666667,11.80266667,5.281866667,0.055527646 159 | 13255,1081737.822,1199158.137,78,18.62,28.1,12.452,1.939,0.149703155 160 | 54051,1294707.942,1976028.343,74,15.62,30.70666667,13.07266667,4.182133333,0.386921674 161 | 54029,1290225.113,2050386.631,77,12.44,30.7,12.98533333,4.887533333,0.334911688 162 | 42133,1621663.153,2043017.974,59,8.046666667,24.26666667,12.976,3.753866667,0.127608038 163 | 37115,1186006.548,1505916.697,64,16.96666667,27.21333333,10.14666667,1.2564,0.045084359 164 | 37133,1676336.57,1460888.579,80,15.22,26.54,9.95,0.993533333,0.013750033 165 | 54013,1274478.73,1857390.209,87,22.74,32.95333333,11.51933333,2.135666667,0.184781843 166 | 51125,1484760.238,1773042.453,70,11.76,24.68,10.714,1.6068,0.029505235 167 | 51680,1466332.778,1725627.038,73,18.90666667,25.04,11.02066667,1.818466667,0.035563315 168 | 13281,1108099.219,1389069.294,61,13.9,24.78666667,11.24733333,0.8272,0.03193818 169 | 13225,1133468.486,1127832.389,90,21.42,25.57333333,12.34,1.1298,0.062417203 170 | 51063,1373699.002,1656374.609,49,12.24,26.48,9.65,1.364,0.049349769 171 | 36029,1397300.328,2323592.893,67,13.28,24.30666667,10.39933333,2.8934,0.05711464 172 | 13151,1091082.441,1221936.754,68,7.786666667,22.52666667,12.828,2.972,0.155174237 173 | 51520,1222120.615,1597141.253,94,18.8,28.81333333,10.36266667,2.103133333,0.045761823 174 | 54065,1501884.099,1978349.929,80,12.14,29.8,11.454,3.229733333,0.09698803 175 | 51103,1693664.731,1807551.068,66,13.46,23.46,11.41333333,1.771333333,0.056949109 176 | 36121,1439265.332,2324433.93,60,10.36,27.28666667,9.487333333,1.900466667,0.048157697 177 | 42081,1561950.648,2193635.395,61,12.75333333,26.18666667,9.878666667,2.324466667,0.097097508 178 | 51017,1406203.336,1789944.572,69,9.013333333,27.05333333,9.51,1.360866667,0.031966773 179 | 51131,1752689.964,1775435.961,94,19.72666667,25.65333333,10.22733333,2.277266667,0.035814662 180 | 37057,1407800.143,1532761.676,73,13.04666667,28.97333333,12.45466667,2.689266667,0.076664917 181 | 37155,1528493.411,1422787.013,78,27.67333333,31.03333333,12.62733333,1.4948,0.01660844 182 | 42097,1602057.96,2145493.006,63,12.42,28.08666667,10.73533333,2.723666667,0.147714748 183 | 42121,1341070.721,2158752.11,72,14.47333333,29.29333333,11.614,3.263333333,0.145555821 184 | 51197,1311583.651,1644628.34,72,13.60666667,29.03333333,9.462666667,1.188666667,0.030809531 185 | 54045,1221701.597,1734554.266,120,22.09333333,33.81333333,10.21066667,2.0086,0.088614108 186 | 13213,1018911.294,1363530.392,97,15.28,31.04666667,12.724,1.828133333,0.102093803 187 | 37013,1709880.255,1554745.801,77,18.38666667,25.44666667,10.264,1.271133333,0.009340984 188 | 37113,1132960.345,1418734.132,58,14.99333333,26.43333333,10.33933333,0.743533333,0.027987447 189 | 36009,1412014.758,2267732.697,64,14.83333333,27.32,9.815333333,1.8274,0.062639113 190 | 36055,1471985.02,2381307.728,58,13.00666667,21.5,9.776,3.0206,0.040782406 191 | 34015,1757950.477,2048967.108,74,6.8,24.17333333,12.68266667,5.456533333,0.053715548 192 | 13037,1073561.977,1003649.305,81,30.94,26.66,12.17266667,0.751066667,0.011407108 193 | 54039,1248201.79,1795739.978,87,14.71333333,28.16,11.28733333,2.838533333,0.173502213 194 | 11001,1620145.774,1926595.621,62,17.8,22.08,13.512,5.445133333,0.149480163 195 | 45085,1427140.308,1323124.199,68,17.94,26.52,12.34666667,1.3676,0.047499713 196 | 51043,1533134.273,1933160.16,68,7.1,23.06666667,12.00733333,3.052133333,0.084447852 197 | 54021,1295165.507,1869902.771,71,23.96666667,29.19333333,11.466,2.0594,0.154926879 198 | 34033,1743771.149,2031005.452,73,9.773333333,25.06666667,13.34266667,4.574466667,0.057401127 199 | 13193,1116496.715,1102091.278,73,27.00666667,28.44666667,12.25333333,1.0106,0.036778614 200 | 13263,1066361.408,1134167.661,84,20.86,26.7,12.35666667,0.9238,0.062907262 201 | 13093,1142014.884,1082724.612,54,25.5,28.05333333,12.43866667,0.8602,0.022368158 202 | 51133,1693700.158,1825328.974,63,12.88,24.08666667,11.51333333,1.921,0.070302289 203 | 37105,1505940.861,1513658.565,73,14.4,26.29333333,12.83333333,1.705,0.02582737 204 | 45027,1446754.817,1298002.946,71,24.02,26.77333333,11.794,1.274266667,0.053875999 205 | 37127,1600385.578,1587248.999,70,15.25333333,25.6,12.644,1.635666667,0.024683401 206 | 51620,1676303.202,1683927.061,73,18.78,26.34666667,11.38333333,1.793933333,0.04932482 207 | 51049,1545565.804,1752895.876,67,15.27333333,27.97333333,11.28133333,1.7732,0.049144742 208 | 51037,1519860.206,1691095.398,77,16.97333333,27.1,11.13466667,1.7392,0.063611694 209 | 51115,1710730.906,1777230.966,60,8.986666667,24.13333333,11.85933333,1.879933333,0.054094095 210 | 51119,1686849.317,1794380.822,75,12.44,24.02,11.59866667,2.022266667,0.070397411 211 | 51015,1456078.917,1810843.496,63,8.226666667,25.95333333,10.19066667,1.487066667,0.0339999 212 | 54003,1523121.264,1971372.329,85,11.32,29.97333333,12.18866667,3.4102,0.090112598 213 | 37163,1586724.023,1473472.829,71,19.37333333,27.38,12.51066667,1.355133333,0.016449593 214 | 51117,1552612.154,1659453.181,74,16.14666667,28.27333333,11.50466667,1.677533333,0.071183425 215 | 37035,1321432.026,1503977.361,69,12.14,25.78,11.99,2.247333333,0.108931048 216 | 13049,1318756.006,952508.0788,83,23.02666667,30.75333333,10.73533333,0.753,0.023360332 217 | 37191,1611551.667,1520555.882,76,17.38666667,26.9,12.89266667,1.736933333,0.014442434 218 | 36023,1613685.107,2347274.963,65,14.45333333,24.92,8.912,1.686666667,0.02249919 219 | 37059,1376002.495,1542989.916,60,10.05333333,25.97333333,12.12933333,2.326533333,0.09012095 220 | 13291,1086530.75,1376942.086,62,14.79333333,25.21333333,11.34733333,0.900933333,0.043418764 221 | 36037,1435433.15,2357644.153,67,10.15333333,25.44,10.012,2.558533333,0.045821095 222 | 51159,1666241.838,1825977.972,68,17.26666667,28.79333333,11.484,2.175866667,0.08299332 223 | 13115,983511.4296,1300252.571,81,17.04,26.82666667,13.256,2.2278,0.189847276 224 | 45009,1378521.094,1235921.641,65,25.44,25.5,11.29066667,1.0986,0.034552977 225 | 42023,1467229.207,2185659.851,63,11.69333333,28.05333333,10.09733333,1.740466667,0.091801556 226 | 24009,1666038.768,1894678.827,68,5.46,22.88666667,12.58533333,2.8886,0.133364234 227 | 51127,1652922.898,1773161.116,80,5.626666667,24.48,11.34733333,2.578533333,0.083857622 228 | 13055,968986.5888,1322457.969,87,17.49333333,30.50666667,13.202,1.927666667,0.15694681 229 | 13241,1138683.737,1389143.35,68,14.74,24.92,11.10266667,0.7984,0.026398006 230 | 42117,1537131.813,2237598.109,60,13.80666667,25.39333333,9.308666667,1.612666667,0.05207866 231 | 45021,1295852.481,1430438.159,79,17.1,30.26666667,12.254,1.971,0.074208924 232 | 51149,1640404.662,1734227.045,67,8.906666667,24.88666667,11.55133333,2.593866667,0.074335136 233 | 45089,1492001.499,1300584.526,73,28.44666667,26.08666667,10.87133333,1.212133333,0.050556954 234 | 45003,1319815.773,1263983.766,68,15.08,25.48666667,11.71933333,1.271333333,0.026713313 235 | 13145,1031170.742,1134100.571,60,9.393333333,21.38,12.39666667,0.952266667,0.057190997 236 | 42049,1307535.891,2220156.836,69,14.02666667,27.22666667,11.41266667,2.5386,0.078515267 237 | 36067,1594290.542,2390104.004,66,12.77333333,23.46,9.342,2.217066667,0.019415654 238 | 37131,1642325.495,1646805.024,64,23.24666667,27.13333333,11.928,1.4642,0.0309088 239 | 51036,1650666.498,1755747.35,73,10.53333333,27.4,11.52333333,2.8706,0.080230738 240 | 51067,1414401.142,1670082.516,55,11.74666667,26.65333333,10.43333333,1.821,0.06122508 241 | 34019,1755884.159,2146097.44,46,3.413333333,16.98,10.99133333,4.996733333,0.063308726 242 | 13159,1135652.349,1212270.721,68,15.75333333,26.81333333,12.39266667,1.7292,0.167261261 243 | 37173,1121743.842,1455205.271,75,17.30666667,30.68,10.22066667,0.779933333,0.038159153 244 | 37187,1727246.941,1596012.316,69,22.26,27.06,9.89,1.518933333,0.012975895 245 | 13015,1017773.77,1301385.213,86,12.5,27.63333333,13.48733333,2.540066667,0.196600637 246 | 51071,1337018.762,1693779.243,83,11.37333333,27.88,9.333333333,1.865133333,0.03055773 247 | 51740,1723227.177,1712308.886,81,16.80666667,26.14666667,11.14866667,2.5748,0.051780903 248 | 24005,1639584.197,1994032.373,63,7.753333333,22.46,13.484,4.4376,0.135596137 249 | 24015,1694933.488,2018400.3,79,8.46,26.07333333,13.76,4.755933333,0.089461949 250 | 37079,1637921.407,1539543.085,68,20.49333333,26.16,12.72733333,1.618466667,0.013556089 251 | 24019,1713827.547,1896169.761,77,14.64,26.37333333,12.348,3.3664,0.071502067 252 | 37111,1247170.21,1494736.273,70,14.94,29.22666667,10.408,1.6632,0.06043457 253 | 13163,1256171.786,1199257.299,77,23.84,27.06,12.14933333,1.0764,0.040545905 254 | 13289,1169114.232,1143614.233,73,19.92,28.86,12.18733333,1.0758,0.070710838 255 | 51107,1563959.503,1936625.552,50,3.26,14.90666667,12.896,3.701533333,0.102731287 256 | 37139,1742060.758,1653103.173,74,18.08666667,26.09333333,10.12533333,1.4822,0.024836003 257 | 37005,1314686.836,1596855.37,63,17.64,28.08666667,9.848666667,1.2924,0.044066831 258 | 54009,1294492.84,2022929.839,76,12.44666667,30.58666667,13.258,4.1472,0.391901706 259 | 37067,1397694.609,1569407.773,67,14.14666667,25.45333333,12.11466667,2.743333333,0.090139443 260 | 45063,1346738.572,1308601.146,66,11.12666667,24.78666667,12.044,1.4006,0.032624567 261 | 10001,1737530.538,1972072.594,74,11.93333333,26.07333333,13.42266667,3.421533333,0.06929497 262 | 24027,1619605.459,1966057.211,42,4.446666667,13.88,13.384,4.7924,0.140965119 263 | 24029,1694052.145,1981950.218,65,11.78,22.96,13.53266667,4.581533333,0.102078797 264 | 24031,1599218.37,1948856.135,44,5.82,13.51333333,13.38066667,4.832066667,0.130191437 265 | 37091,1679783.717,1647579.887,70,23.35333333,26.52,11.362,1.4432,0.030431981 266 | 36003,1464342.964,2278548.815,67,16.50666667,24.69333333,9.233333333,1.309066667,0.044734191 267 | 37061,1626897.935,1474774.311,65,20.34,27.73333333,11.69,1.2014,0.016240834 268 | 13121,1057680.928,1255772.741,58,16.11333333,18.88666667,13.46466667,3.747733333,0.166282138 269 | 54001,1363554.584,1904822.789,72,20.64,29.07333333,10.59466667,2.035666667,0.16523128 270 | 51111,1557688.929,1690682.635,68,19.68666667,29.47333333,11.22666667,1.625133333,0.057253665 271 | 51169,1181659.323,1602179.406,84,16.82,29.69333333,10.52666667,2.512133333,0.059581884 272 | 24037,1668389.554,1867466.358,70,7.686666667,22.06666667,12.39,2.421866667,0.101043297 273 | 45077,1199434.591,1398170.842,64,14.14,24.58666667,11.932,1.527933333,0.027727574 274 | 45087,1301978.779,1390787.374,79,16.78,30.08666667,12.28133333,1.3432,0.041707734 275 | 45083,1264596.645,1412493.179,73,14.05333333,26.68666667,12.328,1.836066667,0.051226487 276 | 45013,1422969.875,1149284.841,50,11.61333333,21.56666667,10.86,0.9116,0.015457588 277 | 45053,1394211.904,1150416.926,60,23.18666667,27.96,10.896,0.962333333,0.016498578 278 | 37051,1545014.124,1472239.09,76,16.4,25.54,13.02733333,1.6628,0.016146991 279 | 10003,1719077.989,2024828.341,66,9.393333333,23.24,14.13333333,4.9514,0.068458668 280 | 13077,1036448.997,1204099.492,66,9.92,23.62666667,12.62466667,1.9452,0.160924834 281 | 13233,989224.3021,1271527.442,100,17.14666667,29.36,12.866,1.945133333,0.211033087 282 | 24003,1652614.186,1944776.741,68,5.713333333,21.74,13.526,4.5748,0.141986298 283 | 37087,1166148.013,1468980.246,61,14.2,28.46666667,9.826666667,1.024666667,0.036620953 284 | 54087,1257108.312,1839934.89,82,21.40666667,33.21333333,11.66733333,2.575266667,0.21716079 285 | 37197,1361162.478,1566766.534,69,12.86666667,29.06,11.572,2.0944,0.083312892 286 | 10005,1762733.34,1928788.66,69,11.62666667,27.42,12.29266667,3.601333333,0.057635935 287 | 37089,1214308.512,1450981.49,56,12.14666667,24.26666667,10.62333333,1.5424,0.045088568 288 | 13157,1134916.515,1304291.6,83,13.32,26.22,13.05333333,1.7394,0.057007757 289 | 37161,1263152.818,1465512.044,69,16.99333333,28.60666667,11.21066667,1.900533333,0.075897961 290 | 51141,1385083.588,1629473.103,65,15.12666667,29.87333333,10.398,1.783533333,0.070270986 291 | 42067,1552118.755,2098795.276,51,9.493333333,26.25333333,11.24066667,3.056866667,0.106863053 292 | 42071,1658497.002,2064716.084,52,8.76,23.22666667,12.81133333,4.389,0.108056667 293 | 45019,1486274.334,1210584.557,62,15.84666667,23.36666667,9.590666667,1.022133333,0.027919816 294 | 37193,1316547.473,1564807.212,68,16.15333333,28.67333333,10.69733333,1.5656,0.06409672 295 | 13141,1199327.046,1215604.883,72,28.94,27.47333333,12.25066667,1.3462,0.101570204 296 | 13011,1137907.672,1329501.639,60,14.30666667,27.9,12.83866667,1.4372,0.037617526 297 | 37177,1759744.963,1602276.313,70,26.16,29.27333333,8.868,1.401666667,0.011368369 298 | 42085,1302019.114,2140915.863,62,13,27.64,12.19066667,3.375133333,0.148678507 299 | 42087,1532755.461,2104065.458,63,13.76,28.11333333,11.04066667,2.893466667,0.10175133 300 | 42027,1509494.377,2134790.064,49,14.48666667,18.35333333,10.54133333,2.778333333,0.115801529 301 | 42029,1701412.133,2065940.824,54,5.72,18.22666667,13.038,5.0668,0.075305733 302 | 45033,1508707.785,1391005.584,94,25.06666667,30.48,12.386,1.3276,0.019207342 303 | 42031,1372665.672,2140579.475,58,14.4,25.1,11.74466667,3.210266667,0.211933187 304 | 45039,1351833.839,1365215.886,76,19.30666667,27.66,12.394,1.2808,0.03338413 305 | 13013,1123623.574,1286900.033,82,11.34,27.31333333,13.074,2.222866667,0.079718502 306 | 51147,1534615.893,1718046.143,70,20.8,25.88,11.206,1.9544,0.047593932 307 | 42093,1602149.321,2165666.682,54,9.88,27.02666667,10.764,3.075933333,0.140087713 308 | 42095,1718996.522,2159377.207,58,8.12,24.1,11.07466667,4.218066667,0.066797708 309 | 37175,1187967.651,1432054.99,50,13.48,24.61333333,10.37066667,1.1282,0.031857878 310 | 51077,1303279.472,1613812.512,74,15.95333333,29.81333333,9.492,1.106466667,0.035797871 311 | 24033,1636048.467,1921250.675,44,8.4,19.88666667,12.94933333,5.0034,0.153599393 312 | 34007,1770779.48,2061835.737,67,11.20666667,24.21333333,12.352,6.069333333,0.051329366 313 | 37199,1220636.087,1515441.143,59,17.25333333,27.62,9.689333333,1.147133333,0.045491152 314 | 13245,1282819.849,1237590.181,83,21.76,26.13333333,12.23933333,1.5142,0.02978399 315 | 13265,1205964.13,1249909.645,68,26.33333333,29.1,12.34733333,1.231133333,0.072741572 316 | 13279,1279101,1097466.725,96,24.2,26.5,12.12866667,0.727733333,0.011724619 317 | 36013,1356925.681,2255464.747,60,16.2,26.81333333,10.80266667,2.7216,0.07880145 318 | 37171,1354669.092,1594547.909,77,15.75333333,29.58,10.79533333,1.801666667,0.068460218 319 | 42043,1606103.319,2096485.218,61,10.52,23.40666667,11.74733333,3.305466667,0.133408198 320 | 51011,1499577.028,1728467.534,65,12.92666667,27.3,10.992,1.869533333,0.038460886 321 | 37009,1282966.394,1585422.594,60,16.16666667,27.18,9.619333333,1.268333333,0.038737395 322 | 51033,1611502.058,1824508.555,80,10.58,26.24,11.56733333,2.698,0.099649543 323 | 36047,1833913.467,2172024.905,42,22.83333333,21.98,12.294,9.603133333,0.038231252 324 | 51173,1272812.077,1630310.37,88,15.79333333,29.8,9.416,1.043533333,0.032801523 325 | 45059,1270662.378,1362776.656,75,17.32666667,28.74,12.48733333,1.290666667,0.025618834 326 | 51101,1640681.282,1793703.591,77,7.086666667,25.39333333,11.57666667,2.6488,0.091064661 327 | 45075,1397734.609,1264367.994,63,22.82666667,25.24,11.50133333,1.3372,0.045034246 328 | 51059,1599741.123,1914646.415,41,5.206666667,15.58,13.124,4.197266667,0.132658801 329 | 37023,1276514.134,1506834.895,71,15.09333333,29.36666667,10.76,1.639733333,0.076762768 330 | 51177,1581708.461,1836635.824,69,6.286666667,22.98666667,11.66866667,2.361,0.084119819 331 | 24041,1700070.455,1927775.068,52,8.58,21.46,13.02,3.826066667,0.091707227 332 | 42057,1506294.912,2020785.076,61,10.59333333,26.62666667,11.404,3.006933333,0.107416161 333 | 37159,1382960.571,1511317.321,72,14.22666667,27.73333333,12.72866667,2.4164,0.085937188 334 | 37117,1680413.528,1588520.683,79,20.81333333,25.64666667,11.318,1.585666667,0.014973045 335 | 13293,1085492.191,1156919.191,73,18.34,29.43333333,12.21066667,1.129666667,0.094517932 336 | 13313,998958.9498,1363198.725,79,14.55333333,27.76,13.13866667,2.328133333,0.111123155 337 | 51179,1593460.956,1865910.319,68,4.833333333,20.14,12.20866667,2.85,0.112266356 338 | 37027,1287060,1531518.525,80,14.56666667,29.48,10.84733333,1.598333333,0.069409705 339 | 42119,1570613.582,2151895.609,43,11.78,24.94666667,10.69266667,2.865866667,0.139606984 340 | 36083,1814336.67,2406093.9,74,10.66,23.66,8.708666667,2.2682,0.005980841 341 | 37045,1296774.667,1462918.922,72,17,27.89333333,12.01733333,2.031333333,0.098139093 342 | 24045,1750040.409,1892975.815,81,13.31333333,24.7,11.58533333,3.467466667,0.056606171 343 | 37119,1362697.348,1463374.166,56,12.08,19.47333333,13.03333333,3.1472,0.086533074 344 | 13297,1124773.134,1263220.213,68,11.93333333,26.28666667,12.866,2.137866667,0.108604907 345 | 42123,1372531.962,2211179.994,62,11.65333333,27.86,10.67733333,2.4872,0.087274143 346 | 34039,1803087.26,2167525.559,50,9.12,20.5,12.02733333,9.225,0.049706539 347 | 13307,1073563.458,1061689.581,66,19.30666667,26.8,12.05666667,0.7182,0.017903297 348 | 13309,1242618.537,1091702.72,67,31.72666667,28.05333333,12.11666667,0.746933333,0.013975894 349 | 42003,1340289.628,2052713.595,67,11.39333333,25.3,13.14933333,5.3952,0.405722665 350 | 42129,1385974.15,2042662.979,60,9.586666667,25.29333333,12.426,3.954466667,0.390517557 351 | 13085,1075715.83,1331660.446,74,10.51333333,25.73333333,12.78533333,1.252933333,0.081379445 352 | 13063,1071151.413,1229322.893,69,16.13333333,24.11333333,13.236,3.692933333,0.1628903 353 | 37141,1638010.356,1430021.552,69,15.14,27.39333333,10.21266667,0.896866667,0.020219009 354 | 37129,1646097.352,1398296.877,66,14.06666667,24.50666667,9.52,0.9528,0.0197437 355 | 24039,1744615.768,1862073.288,87,21.86,27.29333333,11.44866667,2.336866667,0.04860733 356 | 13167,1239454.726,1157124.13,68,26.93333333,28.34,12.08333333,0.851466667,0.036318168 357 | 36117,1524638.913,2392871.668,64,10.37333333,25.34666667,9.677333333,2.417866667,0.032075248 358 | 34005,1793128.135,2075366.679,60,5.54,21.64,11.64,4.755933333,0.045815574 359 | 37053,1763878.691,1670135.135,85,10.44666667,26.93333333,9.660666667,1.611266667,0.022068544 360 | 45091,1336110.314,1428342.555,71,11.68,25.60666667,12.576,1.9606,0.070648188 361 | 45065,1252619.994,1294113.473,57,18.9,26.20666667,12.47066667,1.146533333,0.02755893 362 | 45081,1303698.653,1313763.029,57,16.90666667,26.86666667,12.21933333,1.081466667,0.023179284 363 | 37123,1443588.974,1486516.397,63,18.66666667,27.22,12.53733333,1.7368,0.035944271 364 | 36039,1776833.617,2347254.627,75,13.5,26.41333333,8.798,2.173866667,0.011101784 365 | 45073,1170904.671,1378993.2,61,13.52,25.98,12.01666667,1.133533333,0.024534528 366 | 34041,1741874.528,2176288.673,61,6.246666667,22.55333333,10.152,4.036666667,0.060948017 367 | 42005,1376413.78,2098078.46,61,12.14666667,27.58666667,12.25,4.1228,0.345467193 368 | 42035,1517690.196,2172273.438,69,13.89333333,26.25333333,10.08266667,2.2874,0.092003654 369 | 13095,1111782.166,1008830.392,76,25.54,24.98,12.48666667,0.8886,0.01055159 370 | 36115,1804774.034,2472917.923,71,11.82,26.26666667,8.425333333,1.563866667,0.003323526 371 | 13269,1094442.998,1121507.842,72,24.24666667,28.86,12.34,0.960533333,0.052555854 372 | 13067,1045720.264,1271398.865,56,9.793333333,18.76666667,13.56533333,3.912933333,0.192186332 373 | 54037,1540247.38,1956911.497,79,9.606666667,25.58666667,12.47266667,3.5824,0.090802669 374 | 51035,1344969.201,1628917.501,66,15.07333333,29.30666667,9.627333333,1.3884,0.047610201 375 | 13251,1335318.349,1176789.814,81,21.18666667,26.57333333,11.52666667,0.740666667,0.019671816 376 | 36033,1703903.787,2595627.355,70,16.21333333,27.16666667,7.441333333,0.791,0.001725461 377 | 36051,1474589.851,2334175.807,67,11.57333333,23.93333333,9.173333333,1.775866667,0.041916752 378 | 54099,1174285.609,1763155.466,87,18.98666667,31.59333333,10.934,2.315333333,0.156098409 379 | 51027,1222296.597,1670564.647,77,22.90666667,31.51333333,9.558,1.386933333,0.046099452 380 | 45031,1457757.858,1375458.332,80,20.59333333,27.84,12.722,1.509866667,0.028191081 381 | 45079,1378047.721,1327144.98,65,14.46666667,21.80666667,12.47133333,1.593133333,0.040118978 382 | 51061,1556868.479,1895059.369,64,5.873333333,21.62666667,11.91466667,2.565466667,0.088261504 383 | 54057,1447341.374,1951614.004,73,14.74,27.72,10.71733333,2.6718,0.144306704 384 | 54059,1206178.921,1720396.176,117,24.92,33.39333333,10.17466667,2.023866667,0.074718423 385 | 54109,1258731,1714945.02,96,22.81333333,33.22,9.568666667,1.483133333,0.047587327 386 | 24047,1778263.316,1880783.857,68,10.22,23.66666667,11.25266667,2.643733333,0.039584709 387 | 45069,1477747.456,1409462.874,111,25.05333333,31.22,12.71,1.441866667,0.019776288 388 | 51075,1569406.987,1781445.46,60,7.006666667,22.2,11.478,2.084866667,0.064823311 389 | 51135,1569996.505,1715378.669,79,18.91333333,30.18666667,11.32,1.9602,0.058551801 390 | 54031,1462694.697,1908032.105,59,13.50666667,29.97333333,10.206,1.935066667,0.090330269 391 | 37143,1730519.803,1640428.514,61,17.11333333,26.19333333,10.14466667,1.243466667,0.02090575 392 | 45045,1231184.545,1403328.199,63,12.87333333,23.2,12.33666667,1.697266667,0.035648661 393 | 13087,1086388.993,932261.3744,87,24.13333333,26.85333333,11.91733333,0.6314,0.009050791 394 | 13205,1118028.341,975040.5711,79,25.48,28.46,12.46266667,0.745933333,0.008809128 395 | 13235,1171003.423,1094923.931,66,19.76,28.52666667,12.42933333,0.813333333,0.025316279 396 | 54027,1476981.123,1945927.229,77,15.80666667,30.44666667,10.56866667,2.5572,0.103569945 397 | 54055,1300125.813,1698264.065,82,20.34666667,30.86,9.208666667,1.259333333,0.032377277 398 | 54063,1345865.977,1723197.083,72,15.92666667,28.76666667,9.285333333,1.546933333,0.031847434 399 | 54067,1311268.885,1800627.417,82,19.15333333,29.52,9.838,1.2936,0.074269695 400 | 54079,1212726.84,1809882.281,73,9.846666667,25.23333333,11.83666667,3.134333333,0.271460418 401 | 51193,1656055.089,1843192.977,81,14.44,27.42,11.804,2.3918,0.113077794 402 | 51005,1388452.804,1756142.049,76,11.42666667,26.75333333,9.616,1.575733333,0.030462425 403 | 34013,1804735.869,2182643.518,55,14.73333333,21.47333333,11.86933333,10.5624,0.046986706 404 | 13033,1294398.008,1205638.256,81,25.21333333,27.76666667,11.84,1.0434,0.026772439 405 | 51001,1763272.516,1826266.62,89,17.81333333,26.00666667,10.694,2.126,0.033603465 406 | 13031,1329138.939,1135899.882,67,23.32666667,22.45333333,11.72133333,0.881133333,0.014463077 407 | 13165,1302225.403,1176479.14,80,26.43333333,27.55333333,11.828,0.768333333,0.021431232 408 | 37101,1576397.465,1531400.34,75,13.52666667,25.89333333,13.13266667,2.077133333,0.018123846 409 | 51105,1135752.782,1594803.167,98,23.91333333,30,10.47066667,1.666066667,0.063224455 410 | 36103,1916558.965,2219152.491,60,6.493333333,21.82666667,10.46666667,5.666066667,0.023758802 411 | 51019,1439176.423,1711113.406,61,7.98,25.4,10.64733333,1.9292,0.040129713 412 | 51047,1549846.101,1864900.94,69,9.38,24.64666667,11.658,2.259266667,0.068956104 413 | 13103,1366794.276,1138324.102,81,10.22,25.72,11.286,0.987066667,0.014068462 414 | 54083,1381105.214,1867124.342,64,18.00666667,28.47333333,9.473333333,1.143666667,0.08417722 415 | 51085,1604974.703,1792754.047,69,4.873333333,21.92,11.78133333,2.726933333,0.084289716 416 | 51069,1508841.421,1939047.688,72,7.12,25.38,11.472,2.8908,0.083405897 417 | 51195,1175972.341,1630930.847,102,20.78,30.29333333,9.794666667,2.1212,0.055951526 418 | 24017,1631028.737,1883315.978,70,6.526666667,21.62,12.286,3.227333333,0.126469912 419 | 36031,1757119.24,2553752.167,63,12.54666667,22.38,7.366,1.0304,0.001890423 420 | 51145,1573084.275,1762556.11,68,5.98,24.41333333,11.46666667,2.052466667,0.069999768 421 | 51760,1611344.221,1767745.306,84,21.63333333,24.86,12.282,3.314333333,0.086532211 422 | 36099,1549405.809,2355026.711,63,12.32,26.28666667,9.496666667,1.980933333,0.034136129 423 | 13045,1004456.194,1226100.569,77,15.37333333,26.64666667,12.57666667,1.8114,0.186850758 424 | 51073,1694092.154,1771191.17,84,9.066666667,25.02666667,11.782,1.877066667,0.063584232 425 | 13059,1155637.238,1286419.257,61,26.07333333,19.08666667,12.91666667,1.7166,0.069270951 426 | 51083,1500908.066,1659731.3,71,17.28,27.89333333,11.26266667,1.963,0.105735036 427 | 13061,1039034.904,1010413.014,66,30.28666667,26.92,11.95466667,0.6786,0.012870123 428 | 42125,1323351.94,2018108.583,70,10.11333333,26.47333333,13.104,4.348266667,0.420846544 429 | 45055,1400663.565,1366623.446,79,13.8,27.02666667,12.73333333,1.365133333,0.036224219 430 | 34009,1800201.815,1993222.583,71,9.826666667,25.06,12.22666667,2.242533333,0.039603257 431 | 54081,1281918.487,1736878.053,79,17.97333333,30.80666667,9.546666667,1.510933333,0.053551753 432 | 13073,1262487.034,1255436.349,63,7.14,20.92666667,12.438,1.550733333,0.033392771 433 | 13091,1200452.766,1092122.291,81,21.46,27.88666667,12.30533333,0.786866667,0.020215714 434 | 54053,1198413.316,1837359.461,93,18.16,31.96666667,12.19533333,3.346466667,0.328547367 435 | 37075,1093298.55,1436078.013,67,18.91333333,29.88,10.6,0.8758,0.044359369 436 | 42051,1378511.3,1996883.489,71,17.74,28.8,12.122,3.289533333,0.363875318 437 | 51041,1605034.702,1749188.255,64,6.053333333,21.03333333,11.846,2.791733333,0.08104605 438 | 51139,1502186.462,1871116.694,66,12.76666667,29.88,10.93133333,1.3806,0.052210197 439 | 51161,1393012.024,1697984.566,54,6.186666667,21.98,10.02266667,2.14,0.035611585 440 | 13099,1049875.392,977774.6751,76,27.16,26.52,12.31,0.650466667,0.011437491 441 | 51155,1340728.098,1665907.142,74,13.79333333,29.42,9.498666667,1.443733333,0.03471435 442 | 54073,1261607.333,1915191.247,81,13.41333333,31.09333333,12.59733333,3.822,0.309699136 443 | 51171,1489940.523,1896026.698,68,9.493333333,26.19333333,10.92333333,1.608,0.067379625 444 | 37167,1413285.618,1479056.188,70,13.29333333,26.78,12.71933333,2.1168,0.049407271 445 | 51185,1265973.084,1661117.007,85,16.86,29.04,9.216666667,1.150133333,0.033583226 446 | 13187,1089047.823,1347799.341,72,14.44666667,25.31333333,12.28666667,1.1558,0.054792182 447 | 13147,1186214.801,1335649.717,64,17.22666667,26.82666667,12.77933333,1.248866667,0.027080393 448 | 54107,1234436.881,1892845.955,77,15.5,30.1,12.86466667,4.1584,0.334966519 449 | 54041,1323938.678,1882650.824,87,18.65333333,30.56666667,11.062,2.1718,0.135115377 450 | 51175,1660955.711,1685061.662,73,14.96666667,26.34666667,11.58933333,1.6544,0.048127204 451 | 13027,1181516.343,940200.0498,76,23.58666667,26.68,11.74533333,0.6898,0.007454007 452 | 13127,1367612.98,1010378.056,75,16.02,24.20666667,10.648,0.675533333,0.016681435 453 | 13153,1150012.808,1117688.327,70,12.15333333,24.45333333,12.38533333,0.939,0.047824388 454 | 13181,1241432.538,1280516.511,76,18.13333333,27.74666667,12.44933333,1.265266667,0.034317774 455 | 54005,1237814.756,1758586.578,117,19.40666667,33.33333333,10.39666667,2.077866667,0.114510997 456 | 54035,1227296.869,1849010.747,79,15.84666667,28.86666667,12.198,3.2112,0.307647429 457 | 24023,1417357.194,1959178.546,49,13.27333333,22.78666667,10.73266667,2.2226,0.208542187 458 | 13207,1118726.876,1176047.921,62,12.60666667,25.11333333,12.358,1.692866667,0.136457909 459 | 13219,1151027.256,1272737.377,50,7.32,17.78666667,12.76333333,1.8114,0.088119263 460 | 13221,1182710.86,1282146.612,68,14.14666667,26.14666667,12.57066667,1.296066667,0.06162576 461 | 13319,1190786.057,1161735.547,71,18.15333333,26.35333333,12.34666667,1.0366,0.076514197 462 | 54023,1432264.469,1913616.709,51,15.18666667,28.70666667,10.108,1.8462,0.127781155 463 | 54093,1400892.182,1909096.143,64,16.08,27.87333333,9.789333333,1.548333333,0.151042948 464 | 36059,1859870.436,2189947.365,47,5.726666667,18.82,11.05133333,13.0592,0.034188713 465 | 36075,1588690.728,2436988.083,79,14.70666667,28.18,9.054,2.1862,0.014027634 466 | 13259,1046643.494,1062076.282,71,28.12,27.22,12.11466667,0.7876,0.01841575 467 | 37073,1702334.911,1662006.436,71,15.38666667,25.74,10.83866667,1.512466667,0.035770505 468 | 51199,1696239.433,1752034.183,68,4.8,18.16,12.08266667,2.2666,0.064840601 469 | 36079,1828676.407,2261806.233,61,5,19.14,9.526,3.034666667,0.027107157 470 | 51820,1477903.628,1803796.554,63,13.74666667,25.95333333,10.62266667,1.556733333,0.03125429 471 | 36063,1387460.155,2371359.136,72,12.09333333,27.27333333,10.92933333,3.77,0.051687158 472 | 13007,1093091.067,983304.1776,63,24.60666667,26.10666667,12.48733333,0.764533333,0.009830176 473 | 24021,1575637.407,1982508.773,55,5.346666667,19.72666667,13.25266667,3.657733333,0.108542513 474 | 36089,1647135.996,2571722.277,77,16.67333333,25.84,7.974666667,0.986066667,0.00309673 475 | 13271,1225504.006,1068331.624,73,28.98666667,28.64666667,12.23133333,0.654,0.011615129 476 | 13229,1302309.007,1014963.543,92,18.44666667,28.42,11.52666667,0.6454,0.010584248 477 | 45041,1486637.477,1345492.052,77,18.07333333,26.13333333,11.896,1.3086,0.035607105 478 | 37125,1481683.846,1490671.681,61,12.48,23.59333333,12.69466667,1.642733333,0.025624934 479 | 42065,1408466.268,2139659.345,58,13.13333333,27.06666667,11.41466667,2.764733333,0.210871695 480 | 34027,1778629.855,2185127.613,49,4.006666667,16.96,10.40866667,6.314666667,0.052155983 481 | 37019,1617731.315,1374464.226,68,14.13333333,29.45333333,9.852666667,0.791066667,0.019846569 482 | 37083,1623563.726,1624783.652,71,24.41333333,27.96,12.17866667,1.655333333,0.027546562 483 | 13075,1191026.254,976480.7218,81,21.28,28.88,12.46533333,0.680466667,0.007717036 484 | 45029,1420276.636,1202878.044,81,21.50666667,27.52,10.67533333,1.0124,0.032097859 485 | 45043,1531529.213,1286308.015,64,17.56,26.1,9.911333333,1.066133333,0.04238067 486 | 37033,1473661.73,1612333.892,73,16.92,29.27333333,11.45733333,2.2072,0.114894069 487 | 37049,1697494.859,1510019.832,72,14.31333333,24.64,9.945333333,1.350933333,0.009910888 488 | 13211,1149571.168,1245032.103,60,13.48666667,23.52666667,12.62866667,1.632733333,0.128666985 489 | 42101,1751441.689,2080937.922,80,22.82,29.42,13.31,6.893,0.058032971 490 | 36085,1817579.991,2161994.379,58,10.34666667,23.70666667,11.70666667,9.3702,0.032904442 491 | 34003,1814401.264,2204752.455,48,5.886666667,18.34,11.37533333,9.018333333,0.04112128 492 | 36011,1567737.518,2374439.526,61,12.3,27.77333333,9.520666667,2.130266667,0.027034536 493 | 37145,1505394.763,1617645.776,74,13.60666667,27.37333333,11.62533333,2.132466667,0.104696358 494 | 51007,1572483.12,1737932.54,82,10.14666667,28.56666667,11.296,1.994733333,0.064431327 495 | 51660,1472787.865,1845021.532,51,24.03333333,20.66666667,11.05466667,1.5598,0.04264868 496 | 54097,1348482.639,1875559.148,74,19.86,28.52666667,10.222,1.7296,0.106872143 497 | 36025,1711335.093,2323192.08,57,14.04666667,25.22,8.61,1.116,0.015069066 498 | 34017,1819905.966,2180131.941,53,15.05333333,22.00666667,12.13533333,10.94493333,0.044451956 499 | 13305,1326934.884,1040349.836,83,20.16,27.84666667,11.46266667,0.695666667,0.010446505 500 | 34029,1824854.704,2083480.234,65,8.42,24.91333333,11.718,3.2172,0.037615359 501 | 51181,1670078.514,1731435.217,66,11.72,25.02,11.464,2.4372,0.068553031 502 | 51191,1237850.245,1611510.929,72,12.75333333,26.56,9.882,1.6892,0.041376635 503 | 37043,1104919.49,1404573.347,57,15.26,26.17333333,10.86866667,0.783466667,0.033007594 504 | 36077,1695430.803,2369555.448,59,13.91333333,23.34666667,8.384666667,1.2666,0.010474389 505 | 42089,1709056.974,2192003.055,69,9.773333333,24.96666667,9.652,2.6352,0.060264661 506 | 36041,1712275.97,2490987.62,73,10.17333333,22.35333333,7.322666667,0.886666667,0.003557744 507 | 37147,1662198.209,1556593.377,63,20.43333333,23.14,12.17,1.583533333,0.013127509 508 | 13019,1208146.786,992422.7947,88,19.59333333,29.06666667,12.24733333,0.6612,0.007774275 509 | 36071,1784395.906,2248556.365,64,10.91333333,23.10666667,9.442,2.7786,0.031406659 510 | 36001,1780443.169,2385249.289,64,11.42666667,21.46,8.835333333,2.475333333,0.007572167 511 | 13303,1222799.465,1184983.217,73,22.82,27.57333333,12.23533333,1.018133333,0.065402509 512 | 51113,1523847.656,1851669.482,57,10.34666667,24.45333333,11.29333333,1.636866667,0.047605457 513 | 51187,1519532.581,1907327.622,80,9.506666667,26.86666667,11.37266667,1.966666667,0.07172545 514 | 37065,1635788.182,1587878.711,72,22.82666667,28.27333333,12.63733333,1.6888,0.018609596 515 | 13051,1392412.033,1101415.414,65,17.26,24.16666667,11.10933333,1.0518,0.009944617 516 | 51053,1607599.901,1715108.814,78,10.75333333,28.22,11.47733333,2.143133333,0.064553301 517 | 13043,1298399.5,1132052.351,77,23.82,27.38,12.06666667,0.7762,0.015079075 518 | 51009,1466190.928,1748958.493,74,11.94666667,27.19333333,10.70066667,1.6734,0.029867792 519 | 51770,1402346.68,1700671.94,82,17.51333333,28.96,10.36,2.145066667,0.035279193 520 | 37179,1394272.731,1439225.335,60,9.286666667,22.08666667,12.874,2.0826,0.04897361 521 | 37189,1269211.479,1560293.27,47,18.84666667,20.79333333,9.731333333,1.244933333,0.042434137 522 | 51157,1528372.041,1883439.688,56,8.286666667,21.96,11.38333333,1.718133333,0.063077793 523 | 54085,1273217.53,1895127.718,80,17.58,31.44,12.086,3.053266667,0.24868401 524 | 51013,1613596.309,1922262.73,43,6.766666667,13.98,13.568,5.005933333,0.144993707 525 | 13285,1016197.213,1165749.052,74,17.27333333,26.14,12.26266667,1.258666667,0.097151262 526 | 13039,1360362.387,975480.8504,72,11.96,25.05333333,10.468,0.752133333,0.027253299 527 | 36019,1748150.048,2623675.29,69,13.74666667,23.7,7.742,1.1768,0.001208892 528 | 45011,1342753.031,1236098.031,73,21.40666667,27.20666667,11.50266667,1.095666667,0.027567092 529 | 45057,1384086.392,1403329.781,68,15.94666667,29.13333333,12.678,1.591466667,0.041551825 530 | 37011,1251796.433,1540204.588,66,17.58,26.47333333,9.703333333,1.1974,0.041331988 531 | 24001,1463675.83,1978141.473,64,15.16,26.38,11.03733333,2.706333333,0.140489244 532 | 37165,1490869.031,1438927.834,79,23.49333333,28.08666667,12.85466667,1.426133333,0.01773952 533 | 42073,1301449.84,2105533.834,62,13.28,27.28666667,12.96466667,4.101933333,0.218065512 534 | 42039,1307257.316,2185193.456,66,14.26,27.56666667,11.51466667,2.923066667,0.097478561 535 | 42015,1596515.267,2251514.685,65,12.86,26.88,9.106666667,1.471466667,0.053094296 536 | 13035,1111388.857,1205843.94,91,14.74,29.11333333,12.38133333,2.014466667,0.160238001 537 | 13001,1288990.75,1057012.927,77,19.44,27.43333333,12.026,0.7092,0.008886769 538 | 36107,1604638.584,2296816.053,61,9.806666667,23.31333333,9.08,1.734533333,0.034471747 539 | 37121,1231540.194,1530025.882,72,15.86,27.71333333,9.962,1.087666667,0.042747608 540 | 45001,1233904.427,1327933.576,70,16.4,26.92666667,12.66666667,1.252533333,0.02427076 541 | 54095,1283161.73,1929358.59,86,16.5,30.61333333,12.554,3.479333333,0.298923313 542 | 51800,1701943.473,1690601.23,74,11.63333333,23.46666667,10.886,1.933466667,0.049455466 543 | 45023,1343252.819,1397436.613,81,18.55333333,30.46666667,12.39533333,1.4288,0.04506228 544 | 13317,1214993.687,1275513.359,76,19.96666667,28.10666667,12.48133333,1.184533333,0.048553454 545 | 13155,1198918.04,1027877.281,77,21.77333333,29.3,12.55666667,0.678866667,0.009601194 546 | 36045,1590775.698,2508226.523,72,14.85333333,26,8.586,1.803866667,0.007207704 547 | 34001,1803710.599,2031834.035,69,10.92666667,25.98666667,11.76333333,3.114133333,0.040341525 548 | 42017,1745633.551,2117385.439,60,5.286666667,21.84,11.75133333,5.077333333,0.062835561 549 | 37037,1494044.085,1537413.373,51,10.59333333,21.08666667,12.54733333,2.254,0.040195966 550 | 37097,1349138.009,1524687.672,65,10.98666667,25.21333333,12.08733333,2.315333333,0.097859031 551 | 42061,1506866.668,2076919.913,61,12.58666667,26.34,10.99733333,2.758066667,0.114331818 552 | 37017,1577416.075,1428761.544,70,21.81333333,27.52,11.878,1.133066667,0.019236577 553 | 13025,1326661.912,1000420.533,99,18.22666667,30.06,11.04466667,0.677133333,0.015539183 554 | 13315,1178880.849,1066819.615,74,27.22666667,28.12,12.58733333,0.729133333,0.015056365 555 | 51700,1702968.965,1737731.396,68,13.80666667,24.94666667,12.11533333,2.6156,0.050765587 556 | 51091,1415121.011,1826079.888,56,12.74666667,25.20666667,9.149333333,1.113466667,0.041548653 557 | 51810,1752695.578,1705589.886,63,7.513333333,22.45333333,10.156,2.119733333,0.03788188 558 | 51165,1471173.589,1853131.421,51,8.92,23.8,10.72066667,1.485933333,0.049190178 559 | 13183,1339648.617,1064862.147,84,20.88666667,27.95333333,11.35,0.814733333,0.009522017 560 | 13231,1074360.616,1179283.593,85,11.23333333,25.39333333,12.31266667,1.468933333,0.127035045 561 | 13113,1060350.192,1213666.982,48,5.18,18.26,12.96466667,2.4054,0.163848439 562 | 13005,1276775.865,1033243.564,97,20.66,29.77333333,11.92533333,0.6524,0.008562619 563 | 37183,1545650.8,1556677.834,53,9.106666667,17.72666667,12.79,2.105933333,0.031941021 564 | 45071,1310463.886,1346837.147,69,16.20666667,27.22,12.446,1.220533333,0.024817823 565 | 13071,1158669.438,976083.7313,80,22.59333333,27.98,12.49333333,0.695,0.008105214 566 | 13199,1047465.29,1170294.889,66,19.15333333,28.18666667,12.24066667,1.247066667,0.109732141 567 | 24025,1663852.67,2010730.911,64,6.066666667,21.71333333,13.44133333,4.541066667,0.119126544 568 | 51051,1197155.374,1651004.36,91,20.36,31.32666667,9.614,1.8648,0.05338612 569 | 34035,1780207.58,2151254.925,47,4.273333333,15.79333333,11.512,6.028333333,0.056940368 570 | 36113,1765770.902,2492083.498,63,10.66,24.27333333,7.889333333,1.2494,0.002861461 571 | 42107,1645709.504,2137970.773,67,11.43333333,28.67333333,10.458,3.339333333,0.106499101 572 | 42041,1571533.317,2060710.058,52,6.646666667,20.99333333,12.386,3.9012,0.107308048 573 | 54071,1427489.357,1864438.437,46,13.79333333,27.12666667,9.411333333,1.290066667,0.068158849 574 | 51021,1303204.702,1667921.155,71,12.98666667,28.03333333,9.201333333,1.154,0.028963157 575 | 24043,1538083.918,1990247.173,66,10.17333333,25.11333333,12.51466667,3.615933333,0.096786915 576 | 13079,1116766.6,1142102.125,75,16.65333333,27.51333333,12.22533333,1.2422,0.082118091 577 | 54025,1347254.648,1767353.557,80,17.39333333,28.57333333,9.438,1.296133333,0.040138342 578 | 42045,1731682.361,2066067.558,67,9.146666667,24.60666667,13.42733333,6.200533333,0.061369367 579 | 13273,1087754.095,1033206.976,85,27.55333333,27.72,12.16266667,0.795333333,0.01352705 580 | 42127,1697727.853,2257139.2,64,11.91333333,26.90666667,8.940666667,1.246066667,0.031406852 581 | 51031,1478293.438,1705612.137,68,11.79333333,26.74,10.88533333,1.837933333,0.051762803 582 | 13129,1011067.176,1330535.353,90,14.01333333,28.56666667,13.30933333,2.062133333,0.157855786 583 | 51121,1367109.715,1682899.993,55,17.4,19.98,9.554,1.964066667,0.036665624 584 | 51029,1519909.161,1754957.03,69,18.9,28.81333333,11.11733333,1.6644,0.037051356 585 | 37055,1798253.644,1604192.673,67,9.18,25.10666667,8.216666667,1.1462,0.008347594 586 | 13137,1130864.957,1359803.544,60,13.65333333,25.58666667,12.17,1.1376,0.030868917 587 | 54043,1204390.574,1770898.492,108,23.90666667,33.2,10.938,2.4316,0.171496282 588 | 42033,1454142.899,2133464.921,61,13.96666667,27.39333333,10.77533333,2.168066667,0.175473521 589 | 36105,1739211.263,2274248.013,71,15.28,25.80666667,8.680666667,1.484133333,0.023338405 590 | 42111,1429094.609,2011809.21,49,12.92666667,27.22,11.344,2.639533333,0.257661742 591 | 51023,1409669.854,1733520.052,53,6.28,23.20666667,10.06533333,1.777333333,0.028794215 592 | 51109,1559975.42,1808855.188,76,10.14,26.84,11.40266667,2.0466,0.057913201 593 | 42055,1538800.032,2027269.531,55,8.533333333,24.38666667,12.33933333,3.279733333,0.095794648 594 | 13253,1058244.949,935725.1685,81,23.44666667,27.06,12.06266667,0.6452,0.010910305 595 | 37095,1763204.221,1568816.331,69,22.05333333,28.37333333,8.826666667,1.182133333,0.007774045 596 | 42103,1727250.264,2227452.264,58,8.246666667,25.24666667,8.897333333,1.825666667,0.041719105 597 | 13105,1201057.911,1311274.263,74,18.64666667,28.79333333,12.79066667,1.111533333,0.033088462 598 | 45007,1213040.652,1358418.683,70,13.92,26.8,12.72666667,1.501466667,0.024158228 599 | 36061,1827575.68,2186232.241,43,17.91333333,18.74,12.272,10.6904,0.042456716 600 | 36095,1743539.37,2375273.679,67,12.16666667,25.10666667,8.224666667,1.495333333,0.008576691 601 | 13301,1226878.056,1235056.745,84,23.9,28.78,12.29,1.332466667,0.062790517 602 | 13261,1106655.278,1065071.709,70,25.01333333,25.10666667,12.22466667,0.893733333,0.018143691 603 | 51045,1376536.195,1719306.873,63,10.04666667,25.97333333,9.652,1.887,0.030335087 604 | 13107,1274318.468,1149501.496,84,25.76,27.90666667,11.99266667,0.749666667,0.021429513 605 | 54015,1284743.833,1815780.539,103,24.78,32.10666667,10.682,2.301733333,0.122965278 606 | 42059,1331673.938,1981403.637,74,16.40666667,27.34666667,12.764,4.152733333,0.394057632 607 | 36081,1841804.183,2182258.761,39,14.54,20,12.01066667,14.68953333,0.037377968 608 | 36065,1648770.996,2428756.763,63,14.28,26.18666667,8.445333333,1.834,0.008933147 609 | 51079,1510291.406,1835864.464,61,8.126666667,24.67333333,11.17866667,1.533066667,0.038746902 610 | 54011,1185759.528,1795938.607,83,18.78666667,28.82666667,11.58866667,2.789466667,0.23816696 611 | 54061,1350699.787,1959197.776,61,17.48,22.56,12.19933333,3.297,0.326752058 612 | 51510,1616258.352,1915795.151,48,8.166666667,16.33333333,13.338,4.856866667,0.147191167 613 | 42063,1410676.886,2085864.825,53,15.92666667,24.21333333,11.798,3.693533333,0.339120644 614 | 24013,1604981.209,1998805.915,61,4.866666667,21.18666667,13.15266667,3.2682,0.127370906 615 | 54103,1301405.283,1948184.975,83,17.38666667,30.04666667,12.572,3.599866667,0.328060327 616 | 13299,1287458.284,978514.5473,78,21.82,28.40666667,11.30066667,0.639466667,0.01326026 617 | 54091,1356170.287,1926659.922,78,18.14,28.22666667,11.38066667,2.714933333,0.229157977 618 | 37103,1676368.487,1494220.466,70,17.85333333,27.88,10.46933333,1.345333333,0.011981824 619 | 45049,1378190.662,1186307.961,74,22.88,26.22666667,11.01733333,0.8428,0.023250973 620 | 51650,1717605.117,1735088.898,73,13.46,23.96,11.932,2.729333333,0.056447223 621 | 51095,1675785.756,1757564.59,48,6.693333333,18.48,11.70733333,2.332533333,0.074459277 622 | 42069,1678063.195,2228579.147,64,12.03333333,26.96,9.420666667,1.9836,0.04698714 623 | 42013,1475160.903,2078209.205,65,13.15333333,26.76666667,11.01933333,2.9708,0.174829692 624 | 13257,1153423.385,1354138.051,83,17.77333333,27.32666667,12.522,1.286733333,0.027933233 625 | 37047,1576210.284,1388925.415,80,22.56,29.26666667,11.02666667,0.963466667,0.020479309 626 | 37071,1330908.701,1463722.596,82,14.53333333,28.59333333,12.63066667,2.612666667,0.104389145 627 | 45025,1433973.062,1406304.158,81,20.36666667,29.28666667,12.806,1.384066667,0.027277384 628 | 36111,1775867.93,2302403.31,61,11.6,23.75333333,9.09,2.032466667,0.017977323 629 | 37021,1205489.367,1480787.544,62,14.03333333,25.36,10.22,1.417466667,0.046396954 630 | 37015,1686744.852,1615496.926,70,23.18,26.56,11.17933333,1.533866667,0.019561572 631 | 34031,1794089.083,2208556.382,52,13.49333333,21.47333333,10.50133333,5.992066667,0.042913013 632 | 45017,1395339.658,1290613.603,53,16.70666667,24.62666667,11.85933333,1.471933333,0.047343003 633 | 36087,1813564.819,2226668.669,44,10.77333333,18.28,10.42866667,5.0918,0.036711558 634 | 51057,1647038.652,1822048.171,74,13,25.64666667,11.38933333,2.438,0.103948363 635 | 37135,1498854.676,1579049.044,58,13.19333333,17.04666667,12.08933333,1.981266667,0.072613316 636 | 34037,1759786.533,2212712.261,63,4.686666667,21.71333333,9.342666667,3.157,0.045925872 637 | 13123,1046569.812,1355924.329,63,15.96666667,28.58,12.278,1.247066667,0.092880033 638 | 13295,969740.2733,1351864.506,95,15.14666667,29.5,12.974,2.117466667,0.120574462 639 | 45047,1265105.223,1324705.806,65,16.17333333,26.52666667,12.34666667,1.125133333,0.022022008 640 | 51097,1656798.071,1798464.495,76,12.48666667,27.42666667,11.33266667,2.389266667,0.090404004 641 | 42099,1566640.722,2086593.725,70,8.56,25.40666667,11.64533333,3.324266667,0.113054116 642 | 13047,982225.8689,1372225.108,73,11.64,27.54666667,13.1,2.697666667,0.097585145 643 | 37099,1156077.938,1437274.96,58,17.27333333,25.83333333,10.082,0.866,0.028709487 644 | 13209,1259366.381,1100453.583,87,22.35333333,24.56666667,12.062,0.6928,0.013618591 645 | 13237,1164434.823,1216682.797,67,15.64,24.84,12.45666667,1.533333333,0.148870224 646 | 51163,1436054.549,1767464.309,61,10.61333333,24.94666667,9.969333333,1.712733333,0.028223625 647 | 37137,1729079.643,1519154.323,64,16.42666667,26.17333333,8.904666667,1.135466667,0.007762193 648 | 13197,1071814.999,1095905.649,64,22.07333333,29.11333333,12.17733333,0.763733333,0.031503506 649 | 34025,1820299.025,2125684.596,61,6.18,20.81333333,11.73533333,5.0214,0.044226238 650 | 13217,1117382.249,1236773.733,75,12.46666667,25.47333333,12.752,2.468133333,0.144379531 651 | 13249,1092478.705,1088370.428,77,17.74666667,27.5,12.15666667,0.828933333,0.027058017 652 | 51143,1459738.423,1658545.822,72,12.98,28.16,11.10733333,1.933733333,0.102060102 653 | 51167,1222483.585,1632905.456,83,18.16666667,29.48666667,9.679333333,1.765666667,0.04744314 654 | 13287,1164606.542,1036137.608,75,26.06666667,28.28666667,12.63,0.7356,0.011126866 655 | 54105,1249192.815,1873699.746,91,18.85333333,30.52666667,12.25533333,2.759333333,0.27650887 656 | 51025,1594645.856,1677031.064,74,20.12,28.42666667,11.48533333,1.5654,0.049614259 657 | 51093,1690124.337,1710628.896,70,9.013333333,22.83333333,11.3,2.228533333,0.058830963 658 | 24035,1699831.3,1961649.538,67,6.633333333,21.7,13.27066667,4.305866667,0.096557021 659 | 13125,1235765.269,1216094.351,70,16.13333333,28.77333333,12.21333333,1.259333333,0.059530334 660 | 42091,1727248.17,2098844.026,53,5.426666667,20.43333333,12.528,5.060933333,0.063949792 661 | 37025,1385195.632,1483013.976,66,10.22666667,24.38,12.91733333,2.677466667,0.078309834 662 | 42011,1676333.766,2111342.483,57,10.77333333,24.4,11.80733333,4.2978,0.086217084 663 | 45015,1479695.44,1250590.851,73,12.89333333,26.22666667,10.42333333,1.198666667,0.049416007 664 | 54049,1336346.909,1943122.147,80,16.3,26.5,12.28733333,3.2686,0.288285121 665 | 51087,1617200.64,1769762.462,64,8.18,20.61333333,12.01866667,3.250133333,0.08572237 666 | 36119,1835028.388,2232826.671,46,8.546666667,18.12666667,10.35333333,5.469333333,0.032412807 667 | 13185,1211163.789,943251.3662,79,20.2,24.98,11.708,0.710666667,0.007496417 668 | -------------------------------------------------------------------------------- /data/meuse.csv: -------------------------------------------------------------------------------- 1 | x,y,cadmium,copper,lead,zinc,elev,dist,om,ffreq,soil,lime,dist_to_meuse 2 | 181072,333611,11.7,85,299,1022,7.909,0.00135803,13.6,1,1,1,50 3 | 181025,333558,8.6,81,277,1141,6.983,0.0122243,14.0,1,1,1,30 4 | 181165,333537,6.5,68,199,640,7.8,0.103029,13.0,1,1,1,150 5 | 181298,333484,2.6,81,116,257,7.655,0.190094,8.0,1,2,0,270 6 | 181307,333330,2.8,48,117,269,7.48,0.27709,8.7,1,2,0,380 7 | 181390,333260,3.0,61,137,281,7.791,0.364067,7.8,1,2,0,470 8 | 181165,333370,3.2,31,132,346,8.217,0.190094,9.2,1,2,0,240 9 | 181027,333363,2.8,29,150,406,8.49,0.0921516,9.5,1,1,0,120 10 | 181060,333231,2.4,37,133,347,8.668,0.184614,10.6,1,1,0,240 11 | 181232,333168,1.6,24,80,183,9.049,0.309702,6.3,1,2,0,420 12 | 181191,333115,1.4,25,86,189,9.015,0.315116,6.4,1,2,0,400 13 | 181032,333031,1.8,25,97,251,9.073,0.228123,9.0,1,1,0,300 14 | 180874,333339,11.2,93,285,1096,7.32,0.0,15.4,1,1,1,20 15 | 180969,333252,2.5,31,183,504,8.815,0.113932,8.4,1,1,0,130 16 | 181011,333161,2.0,27,130,326,8.937,0.168336,9.1,1,1,0,220 17 | 180830,333246,9.5,86,240,1032,7.702,0.0,16.2,1,1,1,10 18 | 180763,333104,7.0,74,133,606,7.16,0.0122243,16.0,1,1,1,10 19 | 180694,332972,7.1,69,148,711,7.1,0.0122243,16.0,1,1,1,10 20 | 180625,332847,8.7,69,207,735,7.02,0.0,13.7,1,1,1,10 21 | 180555,332707,12.9,95,284,1052,6.86,0.0,14.8,1,1,1,10 22 | 180642,332708,5.5,53,194,673,8.908,0.0703468,10.2,1,1,1,80 23 | 180704,332717,2.8,35,123,402,8.99,0.0975136,7.2,1,1,1,140 24 | 180704,332664,2.9,35,110,343,8.83,0.113932,7.2,1,1,1,160 25 | 181153,332925,1.7,24,85,218,9.02,0.342321,7.0,1,2,0,440 26 | 181147,332823,1.4,26,75,200,8.976,0.385804,6.9,1,2,0,490 27 | 181167,332778,1.5,22,76,194,8.973,0.429289,6.3,1,2,0,530 28 | 181008,332777,1.3,27,73,207,8.507,0.315116,5.6,1,2,0,400 29 | 180973,332687,1.3,24,67,180,8.743,0.320574,4.4,1,2,0,400 30 | 180916,332753,1.8,22,87,240,8.973,0.249863,5.3,1,2,0,330 31 | 181352,332946,1.5,21,65,180,9.043,0.489064,4.8,1,2,0,630 32 | 181133,332570,1.3,29,78,208,8.688,0.472778,2.6,1,2,0,570 33 | 180878,332489,1.3,21,64,198,8.727,0.287957,1.0,1,2,0,390 34 | 180829,332450,2.1,27,77,250,8.328,0.271622,2.4,1,2,0,360 35 | 180954,332399,1.2,26,80,192,7.971,0.385807,1.9,1,2,0,500 36 | 180956,332318,1.6,27,82,213,7.809,0.418417,3.1,1,2,0,550 37 | 180710,332330,3.0,32,97,321,6.986,0.244474,1.6,1,2,0,340 38 | 180632,332445,5.8,50,166,569,7.756,0.135709,3.5,1,2,0,210 39 | 180530,332538,7.9,67,217,833,7.784,0.0484965,8.1,1,1,1,60 40 | 180478,332578,8.1,77,219,906,7.0,0.0,7.9,1,1,1,10 41 | 180383,332476,14.1,108,405,1454,6.92,0.00135803,9.5,1,1,1,20 42 | 180494,332330,2.4,32,102,298,7.516,0.135709,1.4,1,2,0,170 43 | 180410,332031,1.3,21,62,258,9.28,0.320572,2.0,1,2,0,360 44 | 180355,332299,4.2,51,281,746,7.94,0.081222,5.1,1,2,0,100 45 | 180292,332157,4.3,50,294,746,6.36,0.190086,5.3,1,2,0,200 46 | 180283,332014,3.1,38,211,464,7.78,0.287941,4.5,1,2,0,320 47 | 180282,331861,1.7,26,135,365,8.18,0.423826,4.9,1,2,0,480 48 | 180270,331707,1.7,24,112,282,9.42,0.554289,4.5,1,2,0,660 49 | 180199,331591,2.1,32,162,375,8.867,0.603225,5.5,1,2,0,690 50 | 180135,331552,1.7,24,94,222,8.292,0.614071,3.4,1,2,0,710 51 | 180237,332351,8.2,47,191,812,8.06,0.00135803,11.1,1,1,1,10 52 | 180103,332297,17.0,128,405,1548,7.98,0.0,12.3,1,1,1,10 53 | 179973,332255,12.0,117,654,1839,7.9,0.0054321,16.5,1,1,1,10 54 | 179826,332217,9.4,104,482,1528,7.74,0.0054321,13.9,1,1,1,10 55 | 179687,332161,8.2,76,276,933,7.552,0.0054321,8.1,1,1,1,20 56 | 179792,332035,2.6,36,180,432,7.76,0.146578,3.1,1,1,0,200 57 | 179902,332113,3.5,34,207,550,6.74,0.135684,5.8,1,1,0,140 58 | 180100,332213,10.9,90,541,1571,6.68,0.0703333,10.2,1,1,1,70 59 | 179604,332059,7.3,80,310,1190,7.4,0.0484831,12.0,1,1,1,20 60 | 179526,331936,9.4,78,210,907,7.44,0.0054321,14.1,1,1,1,10 61 | 179495,331770,8.3,77,158,761,7.36,0.0054321,14.5,1,1,1,10 62 | 179489,331633,7.0,65,141,659,7.2,0.0316663,14.8,1,1,1,20 63 | 179414,331494,6.8,66,144,643,7.22,0.0122243,13.3,1,1,1,10 64 | 179334,331366,7.4,72,181,801,7.36,0.0122243,15.2,1,1,1,20 65 | 179255,331264,6.6,75,173,784,5.18,0.0373395,11.4,1,1,1,20 66 | 179470,331125,7.8,75,399,1060,5.8,0.211846,9.0,1,1,0,270 67 | 179692,330933,0.7,22,45,119,7.64,0.451037,3.6,1,1,1,560 68 | 179852,330801,3.4,55,325,778,6.32,0.575877,6.9,1,1,0,750 69 | 179140,330955,3.9,47,268,703,5.76,0.0756869,7.0,1,1,1,80 70 | 179128,330867,3.5,46,252,676,6.48,0.12481,6.2,1,1,1,130 71 | 179065,330864,4.7,55,315,793,6.48,0.103024,6.5,1,1,0,110 72 | 179007,330727,3.9,49,260,685,6.32,0.157469,5.7,1,1,0,200 73 | 179110,330758,3.1,39,237,593,6.32,0.200976,7.0,1,1,1,260 74 | 179032,330645,2.9,45,228,549,6.16,0.200976,7.3,1,1,0,270 75 | 179095,330636,3.9,48,241,680,6.56,0.26622,8.2,1,1,0,320 76 | 179058,330510,2.7,36,201,539,6.9,0.298835,4.3,1,1,0,360 77 | 178810,330666,2.5,36,204,560,7.54,0.0812247,4.4,1,1,1,80 78 | 178912,330779,5.6,68,429,1136,6.42,0.070355,8.2,1,1,1,100 79 | 178981,330924,9.4,88,462,1383,6.28,0.0122243,8.5,1,1,1,70 80 | 179076,331005,10.8,85,333,1161,6.34,0.0,9.6,1,1,1,20 81 | 180151,330353,18.1,76,464,1672,7.307,0.0537723,17.0,1,1,1,50 82 | 179211,331175,6.3,63,159,765,5.7,0.0593662,12.8,1,1,1,80 83 | 181118,333214,2.1,32,116,279,7.72,0.211843,5.9,1,2,0,290 84 | 179474,331304,1.8,25,81,241,7.932,0.12481,2.9,2,2,1,160 85 | 179559,331423,2.2,27,131,317,7.82,0.12481,4.5,2,1,0,160 86 | 179022,330873,2.8,36,216,545,8.575,0.0921516,10.7,2,1,0,140 87 | 178953,330742,2.4,41,145,505,8.536,0.113941,9.4,2,1,0,150 88 | 178875,330516,2.6,33,163,420,8.504,0.179216,9.0,2,1,0,220 89 | 178803,330349,1.8,27,129,332,8.659,0.233596,7.0,2,1,0,280 90 | 179029,330394,2.0,38,148,400,7.633,0.336861,6.5,2,1,1,450 91 | 178605,330406,2.7,37,214,553,8.538,0.070355,9.4,2,1,1,70 92 | 178701,330557,2.7,34,226,577,7.68,0.0593662,10.2,2,1,0,70 93 | 179547,330245,0.9,19,54,155,7.564,0.255341,6.4,2,1,0,340 94 | 179301,330179,0.9,22,70,224,7.76,0.364067,7.6,2,1,0,470 95 | 179405,330567,0.4,26,73,180,7.653,0.429295,7.0,2,1,0,630 96 | 179462,330766,0.8,25,87,226,7.951,0.380328,5.6,2,1,0,460 97 | 179293,330797,0.4,22,76,186,8.176,0.249874,6.5,2,1,0,320 98 | 179180,330710,0.4,24,81,198,8.468,0.266212,6.6,2,1,0,320 99 | 179206,330398,0.4,18,68,187,8.41,0.451037,5.9,2,1,0,540 100 | 179618,330458,0.8,23,66,199,7.61,0.30971,6.5,2,1,0,420 101 | 179782,330540,0.4,22,49,157,7.792,0.293359,6.4,2,1,0,380 102 | 179980,330773,0.4,23,63,203,8.76,0.532351,7.2,2,2,0,500 103 | 180067,331185,0.4,23,48,143,9.879,0.619513,6.6,2,3,0,760 104 | 180162,331387,0.2,23,51,136,9.097,0.684725,4.3,2,2,0,750 105 | 180451,331473,0.2,18,50,117,9.095,0.809742,5.3,2,3,0,1000 106 | 180328,331158,0.4,20,39,113,9.717,0.880389,4.1,2,3,0,860 107 | 180276,330963,0.2,22,48,130,9.924,0.749591,6.1,2,3,0,680 108 | 180114,330803,0.2,27,64,192,9.404,0.575752,7.5,2,3,0,500 109 | 179881,330912,0.4,25,84,240,10.52,0.581484,8.8,2,3,0,650 110 | 179774,330921,0.2,30,67,221,8.84,0.49452,5.7,2,3,0,630 111 | 179657,331150,0.2,23,49,140,8.472,0.32058,6.1,2,3,0,410 112 | 179731,331245,0.2,24,48,128,9.634,0.336851,7.1,2,3,0,390 113 | 179717,331441,0.2,21,56,166,9.206,0.249852,4.1,2,2,0,310 114 | 179446,331422,0.2,24,65,191,8.47,0.0756869,6.0,2,1,0,70 115 | 179524,331565,0.2,21,84,232,8.463,0.0756869,6.6,2,1,0,70 116 | 179644,331730,0.2,23,75,203,9.691,0.162853,6.8,2,1,1,150 117 | 180321,330366,3.7,53,250,722,8.704,0.0974916,9.1,2,2,0,80 118 | 180162,331837,0.2,33,81,210,9.42,0.440142,5.9,2,2,0,450 119 | 180029,331720,0.2,22,72,198,9.573,0.4619,4.9,2,2,0,530 120 | 179797,331919,0.2,23,86,139,9.555,0.222701,7.1,2,1,0,240 121 | 179642,331955,0.2,25,94,253,8.779,0.103024,8.1,2,1,1,70 122 | 179849,332142,1.2,30,244,703,8.54,0.0921353,8.3,2,1,0,70 123 | 180265,332297,2.4,47,297,832,8.809,0.0484884,10.0,2,1,0,60 124 | 180107,332101,0.2,31,96,262,9.523,0.168331,5.9,2,1,0,190 125 | 180462,331947,0.2,20,56,142,9.811,0.38581,5.0,2,2,0,450 126 | 180478,331822,0.2,16,49,119,9.604,0.489064,4.5,2,2,0,550 127 | 180347,331700,0.2,17,50,152,9.732,0.57602,5.4,2,2,0,650 128 | 180862,333116,0.4,26,148,415,9.518,0.0812194,2.3,2,1,0,100 129 | 180700,332882,1.6,34,162,474,9.72,0.0373369,7.5,2,1,0,170 130 | 180201,331160,0.8,18,37,126,9.036,0.771698,4.6,2,3,1,860 131 | 180173,331923,1.2,23,80,210,9.528,0.336829,5.8,2,2,0,410 132 | 180923,332874,0.2,20,80,220,9.155,0.228123,4.4,3,1,0,290 133 | 180467,331694,0.2,14,49,133,10.08,0.597761,4.4,3,2,0,680 134 | 179917,331325,0.8,46,42,141,9.97,0.44558,4.5,3,2,0,540 135 | 179822,331242,1.0,29,48,158,10.136,0.396675,5.2,3,2,0,480 136 | 179991,331069,0.8,19,41,129,10.32,0.581478,4.6,3,3,0,720 137 | 179120,330578,1.2,31,73,206,9.041,0.287966,6.9,3,1,0,380 138 | 179034,330561,2.0,27,146,451,7.86,0.233596,7.0,3,1,0,310 139 | 179085,330433,1.5,29,95,296,8.741,0.364067,5.4,3,1,0,430 140 | 179236,330046,1.1,22,72,189,7.822,0.331454,6.2,3,1,0,370 141 | 179456,330072,0.8,20,51,154,7.78,0.211846,5.0,3,1,0,290 142 | 179550,329940,0.8,20,54,169,8.121,0.103029,5.1,3,1,0,150 143 | 179445,329807,2.1,29,136,403,8.231,0.070355,8.1,3,1,0,70 144 | 179337,329870,2.5,38,170,471,8.351,0.146576,8.0,3,1,0,220 145 | 179245,329714,3.8,39,179,612,7.3,0.0537723,8.8,3,1,0,80 146 | 179024,329733,3.2,35,200,601,7.536,0.119286,9.3,3,1,0,120 147 | 178786,329822,3.1,42,258,783,7.706,0.0921435,8.4,3,1,0,120 148 | 179135,329890,1.5,24,93,258,8.07,0.249863,7.7,3,1,0,260 149 | 179030,330082,1.2,20,68,214,8.226,0.37494,5.7,3,1,0,440 150 | 179184,330182,0.8,20,49,166,8.128,0.423837,4.7,3,1,0,540 151 | 179085,330292,3.1,39,173,496,8.577,0.423837,9.1,3,1,0,520 152 | 178875,330311,2.1,31,119,342,8.429,0.27709,6.5,3,1,0,350 153 | 179466,330381,0.8,21,51,162,9.406,0.358606,5.7,3,1,0,460 154 | 180627,330190,2.7,27,124,375,8.261,0.0122243,5.5,3,3,0,40 155 | -------------------------------------------------------------------------------- /figure.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "119ab91e", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# Standard and GIS Modules\n", 11 | "import os\n", 12 | "import numpy as np\n", 13 | "import matplotlib.pyplot as plt\n", 14 | "import pandas as pd\n", 15 | "import time\n", 16 | "import scipy\n", 17 | "\n", 18 | "import warnings\n", 19 | "import seaborn as sns\n", 20 | "warnings.filterwarnings(\"ignore\")\n", 21 | "plt.rcParams.update({\"font.size\":20})" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "id": "3712c602", 27 | "metadata": {}, 28 | "source": [ 29 | "### Real data" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "id": "5b5f28a9", 35 | "metadata": {}, 36 | "source": [ 37 | "##### Compute LOSH" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "id": "6eba7823", 44 | "metadata": { 45 | "scrolled": true 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "from esda.losh import LOSH\n", 50 | "import libpysal\n", 51 | "# ls = LOSH(connectivity=w, inference=\"chi-square\").fit(boston_ds['NOX'])\n", 52 | "\n", 53 | "dataset_target = {\n", 54 | " \"plants\": \"richness_species_vascular\",\n", 55 | " \"meuse\": \"zinc\",\n", 56 | " \"atlantic\": \"Rate\",\n", 57 | " \"deforestation\": \"deforestation_quantile\",\n", 58 | " \"california_housing\": \"median_house_value\",\n", 59 | "}\n", 60 | "\n", 61 | "dataset_losh = {}\n", 62 | "for d in os.listdir(\"data\"):\n", 63 | "# if \"meuse\" in d: # or \"deforestation\" in d:\n", 64 | "# continue\n", 65 | "# print(\"------------- \", d)\n", 66 | " f = pd.read_csv(os.path.join(\"data\", d))\n", 67 | " coords = f[[\"x\", \"y\"]]\n", 68 | " \n", 69 | " # with KNN:\n", 70 | " w = libpysal.weights.KNN(coords, k=20)\n", 71 | " \n", 72 | " # extract target var\n", 73 | " target_var = dataset_target[d.split(\".\")[0]]\n", 74 | " \n", 75 | " ls = LOSH(connectivity=w, inference=\"chi-square\").fit(f[target_var].values)\n", 76 | " \n", 77 | " dataset_losh[(d.split(\".\")[0]).replace(\"_\", \" \")] = np.mean(ls.Hi)\n", 78 | "# f.drop([\"x\", \"y\", ]], axis=1, inplace=True)\n", 79 | "# w_cutoff = (np.max(coords, axis=0) - np.min(coords, axis=0)).sum() / 10\n", 80 | "# print(w_cutoff)\n", 81 | "# w = get_weights_as_array(np.array(coords), w_cutoff)\n", 82 | "dataset_losh" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "id": "116756e9", 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "# 100\n", 93 | "{'plants': 0.9909525898042943,\n", 94 | " 'california_housing': 0.9199100144573773,\n", 95 | " 'deforestation': 0.9968442529301278,\n", 96 | " 'atlantic': 0.9620820900732183}\n", 97 | "# 10\n", 98 | "{'plants': 1.0165645863370958,\n", 99 | " 'california_housing': 0.8750507024465561,\n", 100 | " 'deforestation': 0.9969507834341067,\n", 101 | " 'atlantic': 1.0031870804023555}\n", 102 | "# 20\n", 103 | "{'plants': 1.0609760745783343,\n", 104 | " 'california_housing': 0.8773202806724744,\n", 105 | " 'deforestation': 0.9978335532280674,\n", 106 | " 'atlantic': 1.004762023375391}" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "id": "5109aacb", 112 | "metadata": {}, 113 | "source": [ 114 | "##### Compute sample size" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "id": "c75396c3", 121 | "metadata": { 122 | "scrolled": true 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "sample_dict = {}\n", 127 | "feature_dict = {}\n", 128 | "for out_file in os.listdir(\"data\"):\n", 129 | " if \"folds\" in out_file or \"synthetic\" in out_file or out_file[0] == \".\" or \"deprecated\" in out_file:\n", 130 | " continue\n", 131 | " dataset_name = out_file[:-4].replace(\"_\", \" \")\n", 132 | " data = pd.read_csv(os.path.join(\"data\", out_file))\n", 133 | " sample_dict[dataset_name] = len(data)\n", 134 | " feature_dict[dataset_name] = data.shape[1] - 2\n", 135 | "sample_dict" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "id": "e7de9f24", 141 | "metadata": {}, 142 | "source": [ 143 | "#### Model recommentation" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "id": "e38ab3ec", 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "recommendation_dict = {\"california housing\": \"RF\", \"meuse\": \"GWR\", \"plants\": \"GWR / Kriging\", \"atlantic\": \"RF\", \"deforestation\": \"RF\"}" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "id": "980e47e2", 159 | "metadata": {}, 160 | "source": [ 161 | "#### Read results" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "id": "54c21e36", 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "res_path = \"outputs/real_feb_23\"" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "id": "a1de3c1d", 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "all_res = []\n", 182 | "for out_file in os.listdir(res_path):\n", 183 | " if \"folds\" in out_file or \"synthetic\" in out_file or out_file[0] == \".\" or \"deprecated\" in out_file:\n", 184 | " continue\n", 185 | " dataset_name = \" \".join(out_file.split(\"_\")[1:])[:-4]\n", 186 | " res = pd.read_csv(os.path.join(res_path, out_file))\n", 187 | " res[\"Dataset\"] = dataset_name\n", 188 | " raw_data = pd.read_csv(os.path.join(\"data\", out_file[8:]))\n", 189 | "# res[\"Samples\"] = len(raw_data)\n", 190 | "# res[\"LOSH\"] = round(dataset_losh[out_file[8:-4]], 2)\n", 191 | "# res.sort_values(\"Method\", ascending=False, inplace=True)\n", 192 | " all_res.append(res)\n", 193 | "all_res = pd.concat(all_res)" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "id": "fed1e3b4", 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "all_res.loc[all_res[\"Method\"] == \"linear regression\", \"Method\"] = \"OLS\"" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "id": "1a1f56cb", 210 | "metadata": {}, 211 | "outputs": [], 212 | "source": [ 213 | "# remove SAR\n", 214 | "all_res = all_res[all_res[\"Method\"] != \"SAR\"]" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "id": "fc2625c7", 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "# check if all the metrics align in terms of ranking\n", 225 | "all(all_res.sort_values([\"Dataset\", \"R-Squared\"]).reset_index() == all_res.sort_values([\"Dataset\", \"MAE\"]).reset_index())" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "id": "ff6f1f14", 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [ 235 | "pivoted = all_res.pivot(index='Dataset', columns='Method', values='RMSE')\n", 236 | "pivoted.loc[\"california housing\"] = pivoted.loc[\"california housing\"].round()\n", 237 | "# sorted_columns\n", 238 | "pivoted[\"LOSH\"] = pd.Series(dataset_losh)\n", 239 | "pivoted[\"Samples\"] = pd.Series(sample_dict)\n", 240 | "pivoted[\"k\"] = pd.Series(feature_dict)\n", 241 | "pivoted[\"Recommended model\"] = pd.Series(recommendation_dict)\n", 242 | "col_order = ['Dataset', \"Samples\", \"k\", \"LOSH\", \"Recommended model\", 'OLS', 'SLX', 'GWR', 'RF', 'RF (coordinates)', 'spatial RF', 'Kriging']\n", 243 | "\n", 244 | "final = pivoted.reset_index().reset_index(drop=True)[col_order] #.drop_index(\"Method\", axis=1)" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "id": "b1e9c0c9", 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [ 254 | "final" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "id": "1db0cddf", 261 | "metadata": {}, 262 | "outputs": [], 263 | "source": [ 264 | "print(final.to_latex(index=False, float_format=\"%.2f\"))" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "id": "75e44ce3", 270 | "metadata": {}, 271 | "source": [ 272 | "#### Make table" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": null, 278 | "id": "587f6a77", 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "print(all_res.groupby([\"Dataset\", \"Samples\", \"LOSH\", \"Method\"]).mean().round(5).to_latex(float_format=\"%.2f\"))" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "id": "ae6a910f", 288 | "metadata": {}, 289 | "source": [ 290 | "#### Make plot" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "id": "a936cde1", 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "# how big is each real dataset:\n", 301 | "data_path = \"data/\"\n", 302 | "data_len = {}\n", 303 | "for dataset in os.listdir(data_path):\n", 304 | " test = pd.read_csv(data_path+dataset)\n", 305 | "# print(dataset, len(test))\n", 306 | " dataset_name = \" \".join(dataset[:-4].split(\"_\"))\n", 307 | " data_len[dataset_name] = f\"{dataset_name}\\n({len(test)})\"\n", 308 | "all_res[\"Dataset\"] = all_res[\"Dataset\"].map(data_len)" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": null, 314 | "id": "c02ffaf7", 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [ 318 | "plt.rcParams.update({\"font.size\":20})\n", 319 | "plt.figure(figsize=(12,6))\n", 320 | "sns.barplot(data=all_res.reset_index(), x=\"Dataset\", y=\"R-Squared\", hue=\"Method\")\n", 321 | "# plt.xlabel(\"Number of samples\")\n", 322 | "plt.legend(ncol=4, fontsize=15.5)\n", 323 | "plt.ylim(0., 1.35)\n", 324 | "plt.tight_layout()\n", 325 | "plt.xlabel(\"Dataset\", weight=\"bold\")\n", 326 | "plt.ylabel(\"R-Squared score\", weight=\"bold\")\n", 327 | "plt.savefig(\"outputs/real_dataset_barplot.pdf\")\n", 328 | "plt.show()" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": null, 334 | "id": "d312e4c2", 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [ 338 | "results.columns" 339 | ] 340 | }, 341 | { 342 | "cell_type": "markdown", 343 | "id": "907e595b", 344 | "metadata": {}, 345 | "source": [ 346 | "## Explanatory plots" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": null, 352 | "id": "9f20540b", 353 | "metadata": {}, 354 | "outputs": [], 355 | "source": [ 356 | "weights = np.array([-0.95, 0.38, 0.66, -0.43, 0.22])\n", 357 | "\n", 358 | "nr_data = 10000\n", 359 | "nr_feats = 5\n", 360 | "feat_cols = [\"feat_\" + str(i) for i in range(nr_feats)]\n", 361 | "\n", 362 | "coords = np.array([[i, j] for i in range(int(np.sqrt(nr_data))) for j in range(int(np.sqrt(nr_data)))])\n", 363 | "coords = coords / np.max(coords) * 2 - 1\n", 364 | "\n", 365 | "spatial_variation = np.zeros((nr_data, nr_feats))\n", 366 | "for i in range(nr_feats):\n", 367 | " spatial_variation[:, i] = 0.5 * (\n", 368 | " np.sin(coords[:, 0] * np.pi * 2 + i)\n", 369 | " + np.cos(coords[:, 1] * np.pi * 2 + i)\n", 370 | " )" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": null, 376 | "id": "8d003bf7", 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [ 380 | "cmap = plt.cm.Spectral\n", 381 | "fig = plt.figure(figsize=(20,4)) # TODO\n", 382 | "\n", 383 | "########## Number 1\n", 384 | "for i in range(5):\n", 385 | " ax = fig.add_subplot(1, 5, i+1)\n", 386 | "# print(weights[i] + spatial_variation[:, i])\n", 387 | " cols = [cmap((val+1) / 2) for val in (weights[i] + 0.5 * spatial_variation[:, i])]\n", 388 | " im = ax.scatter(coords[:, 0], coords[:, 1], c=cols)\n", 389 | " ax.set_xlabel(rf\"Coefficient $\\beta_{i+1}$\")\n", 390 | " ax.set_xticks([])\n", 391 | " ax.set_yticks([])\n", 392 | " ax.spines['bottom'].set_color('white')\n", 393 | " ax.spines['top'].set_color('white') \n", 394 | " ax.spines['right'].set_color('white')\n", 395 | " ax.spines['left'].set_color('white')\n", 396 | "\n", 397 | "import matplotlib as mpl\n", 398 | "cmap = mpl.cm.viridis\n", 399 | "bounds = [-1, 2, 5, 7, 12, 15]\n", 400 | "cmap = mpl.cm.Spectral\n", 401 | "norm = mpl.colors.Normalize(vmin=-1, vmax=1)\n", 402 | "\n", 403 | "fig.subplots_adjust(right=0.88)\n", 404 | "cbar_ax = fig.add_axes([0.9, 0.15, 0.02, 0.7])\n", 405 | "cbar = fig.colorbar(mpl.cm.ScalarMappable(norm=norm, cmap=cmap),\n", 406 | " cax=cbar_ax)\n", 407 | "\n", 408 | "plt.savefig(\"outputs/coefficient_figure_5.png\")\n", 409 | "plt.show()\n", 410 | "\n", 411 | "\n", 412 | "# ########## Number 2\n", 413 | "# for i in range(5):\n", 414 | "# ax = fig.add_subplot(2, 5, i+1 + 5)\n", 415 | "# # print(weights[i] + spatial_variation[:, i])\n", 416 | "# cols = [cmap((val+1) / 2) for val in (weights[i] + 0.4 * spatial_variation[:, i])]\n", 417 | "# im = ax.scatter(coords[:, 0], coords[:, 1], c=cols)\n", 418 | "# ax.set_xlabel(\"Coefficient \"+str(i+1))\n", 419 | "# ax.set_xticks([])\n", 420 | "# ax.set_yticks([])\n", 421 | "# ax.spines['bottom'].set_color('white')\n", 422 | "# ax.spines['top'].set_color('white') \n", 423 | "# ax.spines['right'].set_color('white')\n", 424 | "# ax.spines['left'].set_color('white')\n", 425 | "\n", 426 | "# import matplotlib as mpl\n", 427 | "# cmap = mpl.cm.viridis\n", 428 | "# bounds = [-1, 2, 5, 7, 12, 15]\n", 429 | "# cmap = mpl.cm.Spectral\n", 430 | "# norm = mpl.colors.Normalize(vmin=-1, vmax=1)\n", 431 | "\n", 432 | "# fig.subplots_adjust(right=0.825)\n", 433 | "# cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])\n", 434 | "\n", 435 | "# cbar = fig.colorbar(mpl.cm.ScalarMappable(norm=norm, cmap=cmap),\n", 436 | "# cax=cbar_ax)\n", 437 | " \n", 438 | "# plt.tight_layout(w_pad=5, h_pad = 5)\n" 439 | ] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "id": "5bffe3f3", 444 | "metadata": {}, 445 | "source": [ 446 | "### Noise figure\n", 447 | "\n", 448 | "To generate the noise plots, run the above code with the three different types of noise, and execute the plot below with each type" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": null, 454 | "id": "7b4531e5", 455 | "metadata": { 456 | "scrolled": true 457 | }, 458 | "outputs": [], 459 | "source": [ 460 | "# CODE FOR ALL THREE NOISE TYPES\n", 461 | "noise_level = 0.5\n", 462 | "\n", 463 | "nr_data = 10000\n", 464 | "nr_feats = 5\n", 465 | "feat_cols = [\"feat_\" + str(i) for i in range(nr_feats)]\n", 466 | "\n", 467 | "coords = np.array([[i, j] for i in range(int(np.sqrt(nr_data))) for j in range(int(np.sqrt(nr_data)))])\n", 468 | "coords = coords / np.max(coords) * 2 - 1\n", 469 | "synthetic_data = pd.DataFrame(coords, columns=[\"x_coord\", \"y_coord\"])\n", 470 | "\n", 471 | "spatial_variation = np.zeros((nr_data, nr_feats))\n", 472 | "for i in range(nr_feats):\n", 473 | " spatial_variation[:, i] = 0.5 * (\n", 474 | " np.sin(coords[:, 0] * np.pi * 2 + i)\n", 475 | " + np.cos(coords[:, 1] * np.pi * 2 + i)\n", 476 | " )\n", 477 | "\n", 478 | " \n", 479 | "for noise_type in [\"constant\", \"heterogeneous - same\", \"heterogeneous - different\"]:\n", 480 | "\n", 481 | " if noise_type == \"constant\":\n", 482 | " noise = np.random.normal(0, noise_level, nr_data)\n", 483 | " elif noise_type == \"heterogeneous - different\":\n", 484 | " spatial_variation_different = noise_level * (\n", 485 | " 0.5\n", 486 | " * (\n", 487 | " synthetic_data[\"x_coord\"].values\n", 488 | " + synthetic_data[\"y_coord\"].values\n", 489 | " )\n", 490 | " + 1\n", 491 | " )\n", 492 | " noise = np.random.normal(\n", 493 | " 0,\n", 494 | " spatial_variation_different,\n", 495 | " len(spatial_variation_different),\n", 496 | " )\n", 497 | " elif noise_type == \"heterogeneous - same\":\n", 498 | " # e.g. high noise level (0.5), spatial variation is from\n", 499 | " # sin and cos so it's between -1 and 1, so we make + 1\n", 500 | " # so on average we multiply by 1, but varying variance\n", 501 | " # between 0.5 * 0 and 0.5 * 2\n", 502 | " spatially_dependent_noise = noise_level * (\n", 503 | " spatial_variation[:, 0] + 1 # without locality level!\n", 504 | " )\n", 505 | " noise = np.random.normal(\n", 506 | " 0, spatially_dependent_noise, nr_data\n", 507 | " )\n", 508 | " else:\n", 509 | " raise RuntimeError(\"Noise must be one of above\")\n", 510 | "\n", 511 | " plt.figure(figsize=(6,4))\n", 512 | " plt.scatter(coords[:, 0], coords[:, 1], c=noise, vmin=-3, vmax=3)\n", 513 | " plt.colorbar()\n", 514 | " # plt.title(f\"Distribution $\\epsilon$ ({noise_type})\", fontsize=15)\n", 515 | " plt.axis(\"off\")\n", 516 | " plt.tight_layout()\n", 517 | " plt.savefig(f\"outputs/noise_{noise_type}.png\")\n", 518 | " plt.show()" 519 | ] 520 | }, 521 | { 522 | "cell_type": "markdown", 523 | "id": "bbd0f97b", 524 | "metadata": {}, 525 | "source": [ 526 | "# Synthetic experiment - load results" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": null, 532 | "id": "f6db9914", 533 | "metadata": {}, 534 | "outputs": [], 535 | "source": [ 536 | "def add_discrete_noise(results):\n", 537 | " results[\"noise_discrete\"] = pd.NA\n", 538 | " results[\"locality_discrete\"] = pd.NA\n", 539 | " results.loc[results[\"noise\"] < 0.3, \"noise_discrete\"] = \"low\"\n", 540 | " results.loc[results[\"noise\"] >= 0.3, \"noise_discrete\"] = \"high\"\n", 541 | " results.loc[results[\"locality\"] < 0.3, \"locality_discrete\"] = \"low\"\n", 542 | " results.loc[results[\"locality\"] >= 0.3, \"locality_discrete\"] = \"high\"\n", 543 | " return results" 544 | ] 545 | }, 546 | { 547 | "cell_type": "code", 548 | "execution_count": null, 549 | "id": "68ae2751", 550 | "metadata": {}, 551 | "outputs": [], 552 | "source": [ 553 | "path = \"outputs/syn_feb_23\"" 554 | ] 555 | }, 556 | { 557 | "cell_type": "code", 558 | "execution_count": null, 559 | "id": "d905af50", 560 | "metadata": {}, 561 | "outputs": [], 562 | "source": [ 563 | "# # merge the three files\n", 564 | "# results = []\n", 565 | "# for noise in [\"uniformly_distributed\", \"heterogeneous_-_same\", \"heterogeneous_-_different\"]:\n", 566 | "# results.append(pd.read_csv(os.path.join(path, \"synthetic_data_results_\"+ noise+\".csv\")))\n", 567 | "# results = pd.concat(results)\n", 568 | "# results.loc[results[\"model\"] == \"linear regression\", \"model\"] = \"OLS\"\n", 569 | "# results = add_discrete_noise(results)" 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": null, 575 | "id": "e87187e0", 576 | "metadata": {}, 577 | "outputs": [], 578 | "source": [ 579 | "noise_level_range = [0, 0.1, 0.2, 0.3, 0.4, 0.5]\n", 580 | "locality_range = [0, 0.1, 0.2, 0.3, 0.4, 0.5]" 581 | ] 582 | }, 583 | { 584 | "cell_type": "code", 585 | "execution_count": null, 586 | "id": "5d6923fb", 587 | "metadata": {}, 588 | "outputs": [], 589 | "source": [ 590 | "use_function = \"non-linear 2\"" 591 | ] 592 | }, 593 | { 594 | "cell_type": "code", 595 | "execution_count": null, 596 | "id": "79662156", 597 | "metadata": {}, 598 | "outputs": [], 599 | "source": [ 600 | "# take files from the testing-directory and merge them with the linear results\n", 601 | "non_linear_dir = \"new_non_linear\"\n", 602 | "test_dir = \"multiple_nonlinear_tests\"\n", 603 | "\n", 604 | "# merge the three files\n", 605 | "results = []\n", 606 | "for noise in [\"uniformly_distributed\", \"heterogeneous_-_same\", \"heterogeneous_-_different\", \"train\"]:\n", 607 | " results2 = pd.read_csv(os.path.join(path, non_linear_dir, test_dir, \"synthetic_data_results_\"+ noise+\".csv\"))\n", 608 | " results1 = pd.read_csv(os.path.join(path, \"synthetic_data_results_\"+ noise+\".csv\"))\n", 609 | " # only use non-linear results from file 1 and linear ones from file 2\n", 610 | " results2 = results2[results2[\"data mode\"] == use_function]\n", 611 | " results1 = results1[results1[\"data mode\"] == \"linear\"]\n", 612 | " # concat\n", 613 | " results_one = pd.concat((results1, results2))\n", 614 | " # post proressing\n", 615 | " results_one.loc[results_one[\"model\"] == \"linear regression\", \"model\"] = \"OLS\"\n", 616 | " results_one.loc[results_one[\"data mode\"] == use_function, \"data mode\"] = \"non-linear\"\n", 617 | " results_one.to_csv(os.path.join(path, non_linear_dir, \"synthetic_data_results_\"+ noise+\".csv\"))\n", 618 | " if noise != \"train\":\n", 619 | " results.append(results_one)\n", 620 | "\n", 621 | "results = pd.concat(results)\n", 622 | "# general preprocesing steps\n", 623 | "results = add_discrete_noise(results)\n", 624 | "print(len(results)//2)" 625 | ] 626 | }, 627 | { 628 | "cell_type": "markdown", 629 | "id": "f24e27eb", 630 | "metadata": {}, 631 | "source": [ 632 | "### Main plot" 633 | ] 634 | }, 635 | { 636 | "cell_type": "code", 637 | "execution_count": null, 638 | "id": "1ea6447f", 639 | "metadata": {}, 640 | "outputs": [], 641 | "source": [ 642 | "include_models = ['OLS', 'SLX', 'GWR', 'RF', 'RF (coordinates)', 'spatial RF', 'Kriging']\n" 643 | ] 644 | }, 645 | { 646 | "cell_type": "code", 647 | "execution_count": null, 648 | "id": "05a3385e", 649 | "metadata": {}, 650 | "outputs": [], 651 | "source": [ 652 | "def main_plot(results, nr_data=500, noise_type=\"uniformly distributed\", save_path=\"outputs/main_plot.pdf\", score_col=\"RMSE\"):\n", 653 | " include_models = [m for m in include_models if m in results[\"model\"].unique()]\n", 654 | " include_function = [\"linear\", \"non-linear\"]\n", 655 | " # [model for model in results[\"model\"].unique() if \"geo\" not in model]\n", 656 | " nr_models = len(include_models)\n", 657 | " fig = plt.figure(figsize=(15, 4))\n", 658 | " #fig = plt.figure(figsize=(16, 6.5))\n", 659 | " for mode_ind, mode in enumerate(include_function):\n", 660 | " # print(\"----------------\")\n", 661 | " for model_ind, model in enumerate(include_models):\n", 662 | " # print(mode, \"data, --> model:\", model)\n", 663 | " results_filter = results[\n", 664 | " (results[\"data mode\"] == mode) & \n", 665 | " (results[\"model\"] == model) & \n", 666 | " (results[\"nr_data\"] == nr_data) & \n", 667 | " (results[\"noise_type\"] == noise_type)\n", 668 | " ]\n", 669 | " results_filter.set_index([\"noise\", \"locality\"], inplace=True)\n", 670 | " visualize_scores = np.zeros((len(noise_level_range), len(locality_range)))\n", 671 | " for i, noise in enumerate(noise_level_range):\n", 672 | " for j, locality in enumerate(locality_range):\n", 673 | " score = results_filter.loc[noise, locality][score_col].mean()\n", 674 | " visualize_scores[i, j] = score\n", 675 | "\n", 676 | " ax1 = fig.add_subplot(len(include_function), nr_models+1, ((nr_models+1) * mode_ind) + model_ind+1)\n", 677 | " imshow_plot = ax1.imshow(visualize_scores, vmin=0, vmax=0.9)\n", 678 | " # plt.axis(\"off\")\n", 679 | "# if model_ind==0:\n", 680 | "# ax1.set_ylabel(\"$\\longleftarrow$ Increasing \\n noise\", fontsize=15)\n", 681 | "# ax1.yaxis.set_label_position(\"right\")\n", 682 | "# ax1.yaxis.tick_right()\n", 683 | " plt.xticks([])\n", 684 | " plt.yticks([])\n", 685 | "# ax1.set_xlabel(\"$\\longrightarrow$ decreasing \\n stationarity\", fontsize=10)\n", 686 | " if model_ind == 0:\n", 687 | " # ax2 = ax1.twinx()\n", 688 | " # ax2.set_ylabel(mode)\n", 689 | " # ax2.yaxis.set_label_position(\"right\")\n", 690 | "# pad = 2\n", 691 | " mode_new = \"non-linear\\n(simple) \" if mode == \"non-linear (simple)\" else mode\n", 692 | " ax1.annotate(mode_new, xy=(0, 0.5), xytext=(-50, 0), # ax1.yaxis.labelpad - pad\n", 693 | " xycoords=ax1.yaxis.label, textcoords='offset points',\n", 694 | " size=18, ha='right', va='center', rotation=90, weight=\"bold\")\n", 695 | " if mode_ind == 0:\n", 696 | " ax1.set_title(model, weight=\"bold\", fontsize=15)\n", 697 | " \n", 698 | " fig.text(0.5, 0.0, \"$\\longrightarrow$ decreasing stationarity\", ha='center')\n", 699 | "# fig.text(0.5, 0.36, \"$\\longrightarrow$ decreasing stationarity\", ha='center')\n", 700 | "# fig.text(0.5, 0.7, \"$\\longrightarrow$ decreasing stationarity\", ha='center')\n", 701 | " \n", 702 | " fig.text(0.06, 0.45, \"$\\longleftarrow$ Increasing noise\", va='center', rotation='vertical')\n", 703 | " # make colorbar\n", 704 | " # fig.subplots_adjust(right=0.95)\n", 705 | " cbar_ax = fig.add_axes([0.88, 0.05, 0.02, 0.9])\n", 706 | " fig.colorbar(imshow_plot, cax=cbar_ax, label=score_col)\n", 707 | " plt.tight_layout()\n", 708 | " if save_path is not None:\n", 709 | " plt.savefig(save_path)\n", 710 | " plt.show()" 711 | ] 712 | }, 713 | { 714 | "cell_type": "code", 715 | "execution_count": null, 716 | "id": "f45f6842", 717 | "metadata": {}, 718 | "outputs": [], 719 | "source": [ 720 | "main_plot(results, nr_data=1000, save_path =\"outputs/main_plot.pdf\")" 721 | ] 722 | }, 723 | { 724 | "cell_type": "markdown", 725 | "id": "39700ebb", 726 | "metadata": {}, 727 | "source": [ 728 | "### Barplot - low noise, 0.3 non-stationarity, over samples\n" 729 | ] 730 | }, 731 | { 732 | "cell_type": "code", 733 | "execution_count": null, 734 | "id": "5e76ad28", 735 | "metadata": {}, 736 | "outputs": [], 737 | "source": [ 738 | "score_col = \"RMSE\"" 739 | ] 740 | }, 741 | { 742 | "cell_type": "code", 743 | "execution_count": null, 744 | "id": "3ecbf08c", 745 | "metadata": {}, 746 | "outputs": [], 747 | "source": [ 748 | "# only look at local models\n", 749 | "subset = results[\n", 750 | " (results[\"model\"] != \"SAR\")\n", 751 | " # .isin([\"GWR\", \"RF\", \"RF (coordinates)\", \"spatial RF\", \"Kriging\"])) # \"spatial RF\",\n", 752 | "]" 753 | ] 754 | }, 755 | { 756 | "cell_type": "code", 757 | "execution_count": null, 758 | "id": "ac28b3eb", 759 | "metadata": {}, 760 | "outputs": [], 761 | "source": [ 762 | "plt.rcParams.update({\"font.size\":22})" 763 | ] 764 | }, 765 | { 766 | "cell_type": "code", 767 | "execution_count": null, 768 | "id": "441d1439", 769 | "metadata": { 770 | "scrolled": true 771 | }, 772 | "outputs": [], 773 | "source": [ 774 | "# subset.groupby([\"nr_data\", \"data mode\", \"model\", \"noise_discrete\", \"locality_discrete\"]).agg({\"R2 score\": \"mean\"})\n", 775 | "plt.figure(figsize=(18,6))\n", 776 | "counter = 1\n", 777 | "modes = [\"linear\", \"non-linear\"]\n", 778 | "for mode, save_name in zip(modes, [\"linear\", \"non_linear\"]):\n", 779 | " plt.subplot(1, len(modes), counter)\n", 780 | " counter += 1\n", 781 | " subset_2 = subset[\n", 782 | " (subset[\"data mode\"] == mode) &\n", 783 | " (subset[\"noise_discrete\"] == \"low\") & \n", 784 | " (subset[\"locality_discrete\"] == \"high\") & \n", 785 | "# (subset[\"noise\"] == 0.1) & (subset[\"locality\"] == 0.4)&\n", 786 | " (subset[\"noise_type\"] == \"uniformly distributed\")\n", 787 | " ]\n", 788 | " subset_2 = subset_2.groupby([\"nr_data\", \"model\"]).agg({score_col: \"mean\"})\n", 789 | "\n", 790 | " ax = sns.barplot(data=subset_2.reset_index().set_index(\"model\").loc[include_models].reset_index(), x=\"nr_data\", y=score_col, hue=\"model\")\n", 791 | " plt.ylim(0, 0.7)\n", 792 | " plt.xlabel(\"Number of samples\")\n", 793 | "# if mode == \"non-linear (simple)\":\n", 794 | "# plt.legend(title=\"Model\", loc=\"lower right\", framealpha=1, ncol=2)\n", 795 | "# else:\n", 796 | " plt.legend([],[], frameon=False)\n", 797 | " plt.title(mode+\" DGP\")\n", 798 | " \n", 799 | "handles, labels = ax.get_legend_handles_labels()\n", 800 | "plt.tight_layout()\n", 801 | "plt.figlegend(handles, labels, loc = 'upper center', ncol=7, labelspacing=0., bbox_to_anchor=(0.5,1.09))\n", 802 | "plt.savefig(f\"outputs/barplot_main.pdf\", bbox_inches=\"tight\")\n", 803 | "plt.show()\n", 804 | " " 805 | ] 806 | }, 807 | { 808 | "cell_type": "markdown", 809 | "id": "f89fd4ac", 810 | "metadata": {}, 811 | "source": [ 812 | "### Noise type analysis" 813 | ] 814 | }, 815 | { 816 | "cell_type": "code", 817 | "execution_count": null, 818 | "id": "28fd4877", 819 | "metadata": {}, 820 | "outputs": [], 821 | "source": [ 822 | "fontsize=18\n", 823 | "plt.rcParams.update({\"font.size\":fontsize})" 824 | ] 825 | }, 826 | { 827 | "cell_type": "code", 828 | "execution_count": null, 829 | "id": "467e6faa", 830 | "metadata": {}, 831 | "outputs": [], 832 | "source": [ 833 | "subset = results[\n", 834 | " results[\"model\"].isin([\"OLS\", \"GWR\", \"RF (coordinates)\", \"Kriging\"]) # \"spatial RF\",\n", 835 | "]\n", 836 | "subset[\"noise_type\"] = subset[\"noise_type\"].map({\n", 837 | " 'uniformly distributed':'uniformly distributed noise', 'heterogeneous - same': 'heterogeneous (trigonometric)',\n", 838 | " 'heterogeneous - different': \"heterogeneous (linear)\" \n", 839 | "})" 840 | ] 841 | }, 842 | { 843 | "cell_type": "code", 844 | "execution_count": null, 845 | "id": "1d974285", 846 | "metadata": {}, 847 | "outputs": [], 848 | "source": [ 849 | "# fig = plt.figure(figsize=(12, 9))\n", 850 | "fig = plt.figure(figsize=(13, 4))\n", 851 | "counter = 1\n", 852 | "for i, mode in enumerate([\"linear\", \"non-linear\"]): # \"non-linear (simple)\" # linear\", \n", 853 | " for j, model in enumerate([\"GWR\", \"Kriging\"]):\n", 854 | " subset2 = subset[\n", 855 | " (subset[\"model\"] == model) &\n", 856 | " (subset[\"data mode\"] == mode) &\n", 857 | " # (subset[\"noise_discrete\"] == \"low\") & \n", 858 | " (subset[\"locality_discrete\"] == \"high\") & \n", 859 | " # (subset[\"noise\"] == 0.3) & \n", 860 | "# (subset[\"locality\"] == 0.4) & \n", 861 | " # (subset[\"noise_type\"] == \"constant\") &\n", 862 | " (subset[\"nr_data\"] == 500)\n", 863 | " ]\n", 864 | "# subset2[\"noise_type\"] = subset2[\"noise_type\"] + \" noise\"\n", 865 | " ax = fig.add_subplot(1, 4, counter)\n", 866 | " sns.lineplot(ax=ax, data=subset2.reset_index(), x =\"noise\", y=\"RMSE\", hue=\"noise_type\")\n", 867 | "# if counter == 1:\n", 868 | "# plt.legend(title=\"Noise (spatial distribution)\")# , loc=(1, 1))\n", 869 | "# else:\n", 870 | " plt.legend([], [], frameon=False)\n", 871 | " plt.xlabel(r\"Noise level $\\sigma$\")\n", 872 | " \n", 873 | " plt.title(f\"{model}\\n{mode} DGP\", fontsize=fontsize)\n", 874 | " if counter > 1:\n", 875 | " plt.ylabel(\"\")\n", 876 | " plt.yticks([], [])\n", 877 | "# if j == 0:\n", 878 | "# ax.annotate(mode, xy=(0, 0.5), xytext=(-20, 0), # ax1.yaxis.labelpad - pad\n", 879 | "# xycoords=ax.yaxis.label, textcoords='offset points',\n", 880 | "# size='large', ha='right', va='center', rotation=90, weight=\"bold\")\n", 881 | "# ax.set_xlabel(\"Noise level ($\\sigma$)\")\n", 882 | " ax.set_ylim(0, 0.8)\n", 883 | "# if counter in [2, 4]:\n", 884 | "# plt.ylabel(\"\")\n", 885 | "# if counter in [1, 2]:\n", 886 | "# plt.title(model, weight=\"bold\", fontsize=16)\n", 887 | " counter += 1\n", 888 | "\n", 889 | "handles, labels = ax.get_legend_handles_labels()\n", 890 | "plt.tight_layout()\n", 891 | "plt.figlegend(handles, labels, loc = 'upper center', ncol=3, labelspacing=0., bbox_to_anchor=(0.5,1.09))\n", 892 | "plt.savefig(\"outputs/noise_analysis.pdf\", bbox_inches=\"tight\")\n", 893 | "plt.show()" 894 | ] 895 | }, 896 | { 897 | "cell_type": "markdown", 898 | "id": "5e970481", 899 | "metadata": {}, 900 | "source": [ 901 | "### Runtime" 902 | ] 903 | }, 904 | { 905 | "cell_type": "code", 906 | "execution_count": null, 907 | "id": "acd774aa", 908 | "metadata": {}, 909 | "outputs": [], 910 | "source": [ 911 | "time_plot_data = results.groupby([\"model\", \"nr_data\"])[\"time\"].mean()\n", 912 | "time_plot_data = time_plot_data.loc[['OLS', 'SLX', 'GWR', 'RF', 'RF (coordinates)', 'spatial RF', 'Kriging']]" 913 | ] 914 | }, 915 | { 916 | "cell_type": "code", 917 | "execution_count": null, 918 | "id": "bd508470", 919 | "metadata": {}, 920 | "outputs": [], 921 | "source": [ 922 | "plt.figure(figsize=(10,4))\n", 923 | "sns.barplot(data=time_plot_data.reset_index(), x=\"nr_data\", y=\"time\", hue=\"model\")\n", 924 | "plt.yscale(\"log\")\n", 925 | "plt.ylabel(\"Runtime (seconds)\")\n", 926 | "plt.xlabel(\"Number of samples\")\n", 927 | "plt.legend(ncol=3, loc=\"upper left\")\n", 928 | "plt.ylim(0, 800)\n", 929 | "plt.tight_layout()\n", 930 | "plt.savefig(\"outputs/runtime_plot.pdf\")\n", 931 | "plt.show()" 932 | ] 933 | }, 934 | { 935 | "cell_type": "markdown", 936 | "id": "de3f6fc3", 937 | "metadata": {}, 938 | "source": [ 939 | "### Train vs test score" 940 | ] 941 | }, 942 | { 943 | "cell_type": "code", 944 | "execution_count": null, 945 | "id": "831a9c34", 946 | "metadata": {}, 947 | "outputs": [], 948 | "source": [ 949 | "train_results = pd.read_csv(os.path.join(path, 'new_non_linear', \"synthetic_data_results_train.csv\"))\n", 950 | "train_results.loc[train_results[\"model\"] == \"linear regression\", \"model\"] = \"OLS\"\n", 951 | "test_results = results[results[\"noise_type\"] == \"uniformly distributed\"]# .drop(\"Unnamed: 0\", axis=1)\n", 952 | "train_results[\"evaluation\"] = \"training error\"\n", 953 | "test_results[\"evaluation\"] = \"testing error\"\n", 954 | "print(len(train_results), len(test_results))\n", 955 | "traintest = pd.concat((train_results, test_results))\n", 956 | "# traintest[\"R2 score\"] = traintest[\"R2 score\"].clip(-5, 1)" 957 | ] 958 | }, 959 | { 960 | "cell_type": "code", 961 | "execution_count": null, 962 | "id": "05d12297", 963 | "metadata": {}, 964 | "outputs": [], 965 | "source": [ 966 | "fig = plt.figure(figsize=(15, 4)) # 10\n", 967 | "subset = traintest[\n", 968 | " (traintest[\"model\"].isin([\"SLX\", \"GWR\"])) # & # \"spatial RF\",\n", 969 | " & (traintest[\"nr_data\"] == 1000)\n", 970 | "].sort_values(\"model\", ascending=False)\n", 971 | "counter = 1\n", 972 | "modes_considered = [\"linear\"] # [\"linear\", \"non-linear\"]\n", 973 | "signal_to_noise_modes = [\n", 974 | " \"Weak non-stationarity ($\\lambda$) \\n and weak noise ($\\sigma$)\",\n", 975 | " \"Strong non-stationarity ($\\lambda$) \\n and weak noise ($\\sigma$)\",\n", 976 | " \"Weak non-stationarity ($\\lambda$) \\n and strong noise ($\\sigma$)\",\n", 977 | " \"Strong non-stationarity ($\\lambda$) \\n and strong noise ($\\sigma$)\",\n", 978 | "]\n", 979 | "for i, mode in enumerate(modes_considered): # \"non-linear (simple)\" # linear\", \n", 980 | " for j, greatersmaller in enumerate(signal_to_noise_modes):\n", 981 | " #\"Low $\\lambda$ and low $\\sigma$\",\n", 982 | " # \"High $\\lambda$ and low $\\sigma$\", \n", 983 | " # \"Low $\\lambda$ and high $\\sigma$\"]):\n", 984 | " if greatersmaller == \"Weak non-stationarity ($\\lambda$) \\n and weak noise ($\\sigma$)\":\n", 985 | " subset2 = subset[\n", 986 | " (subset[\"noise\"] < 0.4) & \n", 987 | " (subset[\"locality\"] < 0.4)\n", 988 | " ]\n", 989 | " elif greatersmaller == \"Strong non-stationarity ($\\lambda$) \\n and weak noise ($\\sigma$)\":\n", 990 | " subset2 = subset[\n", 991 | " (subset[\"noise\"] < 0.4) & \n", 992 | " (subset[\"locality\"] >= 0.4)\n", 993 | " ]\n", 994 | " elif greatersmaller == \"Strong non-stationarity ($\\lambda$) \\n and strong noise ($\\sigma$)\":\n", 995 | " subset2 = subset[\n", 996 | " (subset[\"noise\"] >= 0.4) & \n", 997 | " (subset[\"locality\"] >= 0.4)\n", 998 | " ]\n", 999 | " elif greatersmaller == \"Weak non-stationarity ($\\lambda$) \\n and strong noise ($\\sigma$)\":\n", 1000 | " subset2 = subset[\n", 1001 | " (subset[\"noise\"] >= 0.4) & \n", 1002 | " (subset[\"locality\"] < 0.4)\n", 1003 | " ]\n", 1004 | " subset2 = subset2[subset2[\"data mode\"] == mode]\n", 1005 | " \n", 1006 | " if i==0 and j==1: \n", 1007 | " print(subset2.groupby([\"model\", \"evaluation\"]).agg({\"RMSE\": \"mean\"}))\n", 1008 | " ax = fig.add_subplot(len(modes_considered), len(signal_to_noise_modes), counter)\n", 1009 | " sns.barplot(ax=ax, data=subset2, x=\"model\", y=\"RMSE\", hue=\"evaluation\")\n", 1010 | "# if counter == 2:\n", 1011 | "# plt.legend(title=\"Evaluation data\", ncol=1, framealpha=1, loc=\"lower center\")\n", 1012 | "# else:\n", 1013 | " ymax = 0.59\n", 1014 | " plt.legend([], [], frameon=False)\n", 1015 | " if j == 0:\n", 1016 | " ax.annotate(mode, xy=(0, 0.5), xytext=(-20, 0), # ax1.yaxis.labelpad - pad\n", 1017 | " xycoords=ax.yaxis.label, textcoords='offset points',\n", 1018 | " size=19, ha='right', va='center', rotation=90, weight=\"bold\")\n", 1019 | " if i == len(modes_considered)-2:\n", 1020 | " plt.xticks([], [])\n", 1021 | " if j > 0:\n", 1022 | " plt.yticks([], [])\n", 1023 | " else:\n", 1024 | " plt.yticks(np.arange(0, ymax, 0.1), np.around(np.arange(0, ymax, 0.1), 1))\n", 1025 | "# ax.set_xlabel(\"Noise level ($\\sigma$)\")\n", 1026 | " ax.set_ylim(0, ymax)\n", 1027 | " ax.set_xlabel(\"\")\n", 1028 | " if counter in [2, 3, 4]:\n", 1029 | " plt.ylabel(\"\")\n", 1030 | " if counter <= len(signal_to_noise_modes):\n", 1031 | " plt.title(greatersmaller, weight=\"bold\", fontsize=15)\n", 1032 | " counter += 1\n", 1033 | "plt.tight_layout()\n", 1034 | "\n", 1035 | "handles, labels = ax.get_legend_handles_labels()\n", 1036 | "plt.tight_layout()\n", 1037 | "plt.figlegend(handles, labels, loc = 'upper center', ncol=2, labelspacing=0., bbox_to_anchor=(0.5,1.1))\n", 1038 | "plt.savefig(\"outputs/train_analysis.pdf\", bbox_inches=\"tight\")\n", 1039 | "plt.show()" 1040 | ] 1041 | }, 1042 | { 1043 | "cell_type": "code", 1044 | "execution_count": null, 1045 | "id": "592a99ca", 1046 | "metadata": {}, 1047 | "outputs": [], 1048 | "source": [ 1049 | " 0.198287 / 0.153864, 0.22/0.189239, 0.195645 / 0.149066" 1050 | ] 1051 | }, 1052 | { 1053 | "cell_type": "markdown", 1054 | "id": "a027c8f8", 1055 | "metadata": {}, 1056 | "source": [ 1057 | "### Geo rf vs spatial rf (with old results)" 1058 | ] 1059 | }, 1060 | { 1061 | "cell_type": "code", 1062 | "execution_count": null, 1063 | "id": "3bfd2ea4", 1064 | "metadata": {}, 1065 | "outputs": [], 1066 | "source": [ 1067 | "res_old = pd.read_csv(\"outputs/synthetic_results_nov_22/synthetic_results.csv\")\n", 1068 | "res_old = res_old[res_old[\"model\"].isin(['geographical RF', 'spatial RF']) & (res_old[\"nr_data\"] < 1000)]" 1069 | ] 1070 | }, 1071 | { 1072 | "cell_type": "code", 1073 | "execution_count": null, 1074 | "id": "cb1177f1", 1075 | "metadata": {}, 1076 | "outputs": [], 1077 | "source": [ 1078 | "spatial_rf_better = res_old.set_index(\"model\")\n", 1079 | "print(\"Spatial RF has better R2 score in percent cases:\",\n", 1080 | " sum(spatial_rf_better.loc[\"spatial RF\"][\"R2 score\"].values >= spatial_rf_better.loc[\"geographical RF\"][\"R2 score\"].values) / (len(spatial_rf_better) / 2)\n", 1081 | " )" 1082 | ] 1083 | }, 1084 | { 1085 | "cell_type": "code", 1086 | "execution_count": null, 1087 | "id": "8120f832", 1088 | "metadata": {}, 1089 | "outputs": [], 1090 | "source": [ 1091 | "# # First try: model on x axis and function mode on y axis\n", 1092 | "# plt.figure(figsize=(10, 4))\n", 1093 | "# res_old.loc[res_old[\"R2 score\"] < 0, \"R2 score\"] = 0\n", 1094 | "# sns.barplot(data=res_old, x=\"model\", y=\"R2 score\", hue=\"data mode\")\n", 1095 | "# plt.legend(ncol=3, fontsize=17, loc=\"upper center\")\n", 1096 | "# plt.ylim(0, 1)\n", 1097 | "\n", 1098 | "plt.rcParams.update({\"font.size\":18})\n", 1099 | "plt.figure(figsize=(7, 4.4))\n", 1100 | "res_old.loc[res_old[\"R2 score\"] < 0, \"R2 score\"] = 0\n", 1101 | "sns.barplot(data=res_old, x=\"data mode\", y=\"R2 score\", hue=\"model\")\n", 1102 | "plt.legend(ncol=2, fontsize=18, loc=\"upper center\") # , title=\"Model\")\n", 1103 | "plt.xlabel(\"Function mode\")\n", 1104 | "plt.ylim(0, 1)\n", 1105 | "plt.tight_layout()\n", 1106 | "plt.savefig(\"outputs/geo_vs_spatial_rf.pdf\")\n", 1107 | "plt.show()" 1108 | ] 1109 | }, 1110 | { 1111 | "cell_type": "markdown", 1112 | "id": "8259dff5", 1113 | "metadata": {}, 1114 | "source": [ 1115 | "## GWR coefficient analysis" 1116 | ] 1117 | }, 1118 | { 1119 | "cell_type": "code", 1120 | "execution_count": null, 1121 | "id": "248253fc", 1122 | "metadata": {}, 1123 | "outputs": [], 1124 | "source": [ 1125 | "import os\n", 1126 | "import numpy as np\n", 1127 | "import matplotlib.pyplot as plt\n", 1128 | "import pandas as pd\n", 1129 | "import time\n", 1130 | "import scipy\n", 1131 | "import warnings\n", 1132 | "\n", 1133 | "from sklearn.metrics import r2_score\n", 1134 | "from mgwr.gwr import GWR\n", 1135 | "from mgwr.sel_bw import Sel_BW\n", 1136 | "\n", 1137 | "np.random.seed(42)\n", 1138 | "\n", 1139 | "\n", 1140 | "\n", 1141 | "def non_linear_function_simple(feat_arr, weights):\n", 1142 | " if len(weights.shape) == 1:\n", 1143 | " weights = np.expand_dims(weights, 0)\n", 1144 | " function_zoo = [\n", 1145 | " np.sin,\n", 1146 | " np.exp,\n", 1147 | " lambda x: x ** 2,\n", 1148 | " lambda x: x,\n", 1149 | " np.cos,\n", 1150 | " lambda x: np.log(x ** 2),\n", 1151 | " ]\n", 1152 | " feature_transformed = np.zeros(feat_arr.shape)\n", 1153 | " for i in range(feat_arr.shape[1]):\n", 1154 | " feature_transformed[:, i] = (\n", 1155 | " function_zoo[i](feat_arr[:, i]) * weights[:, i]\n", 1156 | " )\n", 1157 | " return np.sum(feature_transformed, axis=1)\n", 1158 | "\n", 1159 | "nr_feats = 5\n", 1160 | "max_depth = 30\n", 1161 | "noise_type = \"constant\"\n", 1162 | "\n", 1163 | "locality = 0.3\n", 1164 | "\n", 1165 | "weights = np.array([-0.95, 0.38, 0.66, -0.43, 0.22])\n", 1166 | "\n", 1167 | "nr_data = 1000\n", 1168 | "\n", 1169 | "# MAKE MAIN DATA\n", 1170 | "train_cutoff = int(nr_data * 0.9)\n", 1171 | "feat_cols = [\"feat_\" + str(i) for i in range(nr_feats)]\n", 1172 | "synthetic_data = pd.DataFrame(\n", 1173 | " np.random.rand(nr_data, 2 + nr_feats) * 2 - 1,\n", 1174 | " columns=[\"x_coord\", \"y_coord\"] + feat_cols,\n", 1175 | ")\n", 1176 | "\n", 1177 | "# simulate spatial variation of features (varying per weight)\n", 1178 | "spatial_variation = np.zeros((nr_data, nr_feats))\n", 1179 | "for i in range(nr_feats):\n", 1180 | " spatial_variation[:, i] = 0.5 * (\n", 1181 | " np.sin(synthetic_data[\"x_coord\"].values * np.pi * 2 + i)\n", 1182 | " + np.cos(synthetic_data[\"y_coord\"].values * np.pi * 2 + i)\n", 1183 | " )\n", 1184 | " \n", 1185 | "spatially_dependent_weights = weights + locality * spatial_variation\n", 1186 | "\n", 1187 | "synthetic_data[\"label\"] = non_linear_function_simple(\n", 1188 | " synthetic_data[feat_cols].values,\n", 1189 | " spatially_dependent_weights,\n", 1190 | " )\n", 1191 | "\n", 1192 | "param_arr = [spatially_dependent_weights[:train_cutoff, 0]]\n", 1193 | "for noise_level in [0, 0.3, 0.5]:\n", 1194 | " noise = np.random.normal(0, noise_level, nr_data)\n", 1195 | " synthetic_data[\"label\"] = synthetic_data[\"label\"] + noise\n", 1196 | "\n", 1197 | " train_data, test_data = (\n", 1198 | " synthetic_data[:train_cutoff],\n", 1199 | " synthetic_data[train_cutoff:],\n", 1200 | " )\n", 1201 | "\n", 1202 | " train_coords = np.array(train_data[[\"x_coord\", \"y_coord\"]])\n", 1203 | " train_y = np.expand_dims(train_data[\"label\"].values, 1)\n", 1204 | " train_x = np.array(train_data[feat_cols])\n", 1205 | " # bandwidth selection\n", 1206 | " # import pickle\n", 1207 | " gwr_selector = Sel_BW(\n", 1208 | " train_coords, train_y, train_x, fixed=True, kernel=\"exponential\"\n", 1209 | " )\n", 1210 | " gwr_bw = gwr_selector.search(criterion=\"AICc\")\n", 1211 | " # create and train model\n", 1212 | " gwr_model = GWR(\n", 1213 | " train_coords,\n", 1214 | " train_y,\n", 1215 | " train_x,\n", 1216 | " gwr_bw,\n", 1217 | " kernel=\"exponential\",\n", 1218 | " fixed=True,\n", 1219 | " )\n", 1220 | " gwr_results = gwr_model.fit()\n", 1221 | "\n", 1222 | " test_coords = np.array(test_data[[\"x_coord\", \"y_coord\"]])\n", 1223 | " test_x = np.array(test_data[feat_cols])\n", 1224 | " # predict\n", 1225 | " test_pred = gwr_model.predict(\n", 1226 | " test_coords, test_x, gwr_results.scale, gwr_results.resid_response\n", 1227 | " ).predictions\n", 1228 | "\n", 1229 | "\n", 1230 | " score = r2_score(test_pred, test_data[\"label\"])\n", 1231 | " print(score)\n", 1232 | " param_arr.append(gwr_results.params[:, 1])" 1233 | ] 1234 | }, 1235 | { 1236 | "cell_type": "code", 1237 | "execution_count": null, 1238 | "id": "583a60ec", 1239 | "metadata": {}, 1240 | "outputs": [], 1241 | "source": [ 1242 | "plt.figure(figsize=(16,4))\n", 1243 | "names = [r\"Real $\\beta_1$\", r\"GWR $\\beta_1$ ($\\sigma=0$)\", r\"GWR $\\beta_1$ ($\\sigma=0.3$)\",r\"GWR $\\beta_1$ ($\\sigma=0.5$)\"]\n", 1244 | "for i in range(4):\n", 1245 | " plt.subplot(1,4,i+1)\n", 1246 | " plt.scatter(train_data[\"x_coord\"], train_data[\"y_coord\"], c=param_arr[i])\n", 1247 | " plt.title(names[i], fontsize=18)\n", 1248 | " plt.axis(\"off\")\n", 1249 | "plt.tight_layout()\n", 1250 | "plt.savefig(\"outputs/gwr_beta_comparison.png\")\n", 1251 | "plt.show()" 1252 | ] 1253 | }, 1254 | { 1255 | "cell_type": "code", 1256 | "execution_count": null, 1257 | "id": "45fb53a2", 1258 | "metadata": {}, 1259 | "outputs": [], 1260 | "source": [] 1261 | } 1262 | ], 1263 | "metadata": { 1264 | "kernelspec": { 1265 | "display_name": "Python 3 (ipykernel)", 1266 | "language": "python", 1267 | "name": "python3" 1268 | }, 1269 | "language_info": { 1270 | "codemirror_mode": { 1271 | "name": "ipython", 1272 | "version": 3 1273 | }, 1274 | "file_extension": ".py", 1275 | "mimetype": "text/x-python", 1276 | "name": "python", 1277 | "nbconvert_exporter": "python", 1278 | "pygments_lexer": "ipython3", 1279 | "version": "3.9.5" 1280 | } 1281 | }, 1282 | "nbformat": 4, 1283 | "nbformat_minor": 5 1284 | } 1285 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | matplotlib 4 | pandas 5 | scikit-learn -------------------------------------------------------------------------------- /scripts/benchmarks.py: -------------------------------------------------------------------------------- 1 | # Standard and GIS Modules 2 | import os 3 | import sys 4 | import numpy as np 5 | import pandas as pd 6 | import time 7 | 8 | # ignore linalg warnings from MGWR package 9 | import warnings 10 | 11 | warnings.filterwarnings("ignore") 12 | 13 | # gwr: 14 | from mgwr.gwr import GWR 15 | from mgwr.sel_bw import Sel_BW 16 | 17 | from sklearn.ensemble import RandomForestRegressor 18 | from sklearn.linear_model import LinearRegression 19 | from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score 20 | from sprf.spatial_random_forest import SpatialRandomForest 21 | from sprf.geographical_random_forest import GeographicalRandomForest 22 | 23 | from models import * 24 | 25 | 26 | def get_folds(nr_samples, nr_folds=10): 27 | fold_inds = np.random.permutation(nr_samples) 28 | num_per_fold = nr_samples // nr_folds 29 | train_inds, test_inds = [], [] 30 | for i in range(nr_folds): 31 | # print("start, end", i*num_per_fold) 32 | if i < nr_folds - 1: 33 | test_inds_fold = np.arange( 34 | i * num_per_fold, (i + 1) * num_per_fold, 1 35 | ) 36 | else: 37 | test_inds_fold = np.arange(i * num_per_fold, nr_samples) 38 | test_inds.append(fold_inds[test_inds_fold]) 39 | train_inds.append(np.delete(fold_inds, test_inds_fold)) 40 | return train_inds, test_inds 41 | 42 | 43 | def prepare_data(data, target, lon="x", lat="y"): 44 | """Assumes that all other columns are used as covariates""" 45 | # covariates = [col for col in data.columns if col not in [lon, lat, target]] 46 | # return data[covariates], data[target], data[[lon, lat]] 47 | return data.rename( 48 | columns={target: "label", lon: "x_coord", lat: "y_coord"} 49 | ) 50 | 51 | 52 | def add_metrics(test_pred, test_y, res_dict_init, method, runtime): 53 | res_dict = res_dict_init.copy() 54 | res_dict["Method"] = method 55 | res_dict["RMSE"] = mean_squared_error(test_pred, test_y, squared=False) 56 | res_dict["MAE"] = mean_absolute_error(test_pred, test_y) 57 | res_dict["R-Squared"] = r2_score(test_y, test_pred) 58 | res_dict["Runtime"] = runtime 59 | return res_dict 60 | 61 | 62 | def cross_validation(data): 63 | nr_folds = 5 64 | train_inds, test_inds = get_folds(len(data), nr_folds=nr_folds) 65 | res_df = [] 66 | 67 | # dataset specific information 68 | target = dataset_target[DATASET] 69 | x_coord_name = dataset_x.get(DATASET, "x") 70 | y_coord_name = dataset_y.get(DATASET, "y") 71 | 72 | # model params --> TODO: grid search 73 | max_depth = 10 74 | spatial_neighbors = len(data) // 5 # one fifth of the dataset 75 | print("Number of neighbors considered for spatial RF:", spatial_neighbors) 76 | 77 | data_renamed = prepare_data(data.copy(), target, x_coord_name, y_coord_name) 78 | 79 | for fold in range(nr_folds): 80 | res_dict_init = {"fold": fold, "max_depth": max_depth} 81 | train_data = data_renamed.iloc[train_inds[fold]] 82 | test_data = data_renamed.iloc[test_inds[fold]] 83 | feat_cols = [ 84 | col 85 | for col in train_data.columns 86 | if "coord" not in col and col != "label" 87 | ] 88 | # print( 89 | # train_x.shape, train_y.shape, train_coords.shape, test_x.shape, 90 | # test_y.shape, test_coords.shape 91 | # ) 92 | for model_function, name in zip(model_function_names, model_names): 93 | tic = time.time() 94 | test_pred = model_function( 95 | train_data.copy(), test_data.copy(), feat_cols=feat_cols, 96 | ) 97 | runtime = time.time() - tic 98 | res_df.append( 99 | add_metrics( 100 | test_pred, test_data["label"], res_dict_init, name, runtime, 101 | ) 102 | ) 103 | print(name, res_df[-1]["R-Squared"]) 104 | 105 | # Finalize results 106 | res_df = pd.DataFrame(res_df) 107 | return res_df 108 | 109 | 110 | os.makedirs("outputs", exist_ok=True) 111 | 112 | dataset_target = { 113 | "plants": "richness_species_vascular", 114 | "meuse": "zinc", 115 | "atlantic": "Rate", 116 | "deforestation": "deforestation_quantile", 117 | "california_housing": "median_house_value", 118 | } 119 | 120 | model_function_names = [ 121 | linear_regression, 122 | rf_coordinates, 123 | rf_global, 124 | rf_spatial, 125 | my_gwr, 126 | kriging, 127 | sarm, 128 | slx 129 | # rf_geographical, 130 | ] 131 | model_names = [ 132 | "linear regression", 133 | "RF (coordinates)", 134 | "RF", 135 | "spatial RF", 136 | "GWR", 137 | "Kriging", 138 | "SAR", 139 | "SLX" 140 | # "geographical RF", 141 | ] 142 | 143 | datasets = [ 144 | "meuse", 145 | "plants", 146 | "atlantic", 147 | "deforestation", 148 | "california_housing", 149 | ] 150 | 151 | np.random.seed(42) 152 | 153 | for DATASET in datasets: 154 | print("\nDATASET", DATASET, "\n") 155 | 156 | dataset_x = {} # per default: x 157 | dataset_y = {} # per default: y 158 | data_path = os.path.join("data", DATASET + ".csv") 159 | 160 | data = pd.read_csv(data_path) 161 | print("Number of samples", len(data)) 162 | 163 | results = cross_validation(data) 164 | results.to_csv( 165 | os.path.join("outputs", f"results_{DATASET}_folds.csv"), index=False 166 | ) 167 | 168 | results_grouped = ( 169 | results.groupby(["Method"]) 170 | .mean() 171 | .drop(["fold", "max_depth"], axis=1) 172 | .sort_values("RMSE") 173 | ) 174 | results_grouped.to_csv(os.path.join("outputs", f"results_{DATASET}.csv")) 175 | 176 | print(results_grouped) 177 | print("--------------") 178 | -------------------------------------------------------------------------------- /scripts/models.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.ensemble import RandomForestRegressor 4 | from sklearn.linear_model import LinearRegression 5 | from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score 6 | from sprf.spatial_random_forest import SpatialRandomForest 7 | from sprf.geographical_random_forest import GeographicalRandomForest 8 | from scipy.spatial import distance_matrix 9 | from mgwr.gwr import GWR 10 | from mgwr.sel_bw import Sel_BW 11 | from pykrige.rk import RegressionKriging 12 | import spreg 13 | import libpysal 14 | 15 | 16 | def linear_regression(train_data, test_data, feat_cols=[], **kwargs): 17 | regr = LinearRegression() 18 | regr.fit(train_data[feat_cols], train_data["label"]) 19 | test_pred = regr.predict(test_data[feat_cols]) 20 | return test_pred 21 | 22 | 23 | def rf_coordinates( 24 | train_data, test_data, feat_cols=[], max_depth=30, n_estim=100, **kwargs 25 | ): 26 | regr = RandomForestRegressor(n_estimators=n_estim, max_depth=max_depth) 27 | regr.fit( 28 | train_data[["x_coord", "y_coord"] + feat_cols], train_data["label"] 29 | ) 30 | test_pred = regr.predict(test_data[["x_coord", "y_coord"] + feat_cols]) 31 | return test_pred 32 | 33 | 34 | def rf_global( 35 | train_data, test_data, feat_cols=[], max_depth=30, n_estim=100, **kwargs 36 | ): 37 | regr = RandomForestRegressor(n_estimators=n_estim, max_depth=max_depth) 38 | regr.fit(train_data[feat_cols], train_data["label"]) 39 | test_pred = regr.predict(test_data[feat_cols]) 40 | return test_pred 41 | 42 | 43 | def rf_spatial( 44 | train_data, test_data, feat_cols=[], max_depth=30, nr_data=500, **kwargs 45 | ): 46 | n_estim = 100 if nr_data > 200 else 50 47 | regr = SpatialRandomForest( 48 | n_estimators=n_estim, neighbors=500, max_depth=max_depth 49 | ) 50 | regr.tune_neighbors( 51 | train_data[feat_cols], 52 | train_data["label"], 53 | train_data[["x_coord", "y_coord"]], 54 | ) 55 | print("spatial rf tuned:", regr.neighbors) 56 | regr.fit( 57 | train_data[feat_cols], 58 | train_data["label"], 59 | train_data[["x_coord", "y_coord"]], 60 | ) 61 | test_pred = regr.predict( 62 | test_data[feat_cols], test_data[["x_coord", "y_coord"]] 63 | ) 64 | return test_pred 65 | 66 | 67 | def rf_geographical( 68 | train_data, test_data, feat_cols=[], max_depth=30, **kwargs 69 | ): 70 | n_estim = 20 # lower number of estimators to reduce runtime 71 | regr = GeographicalRandomForest( 72 | n_estimators=n_estim, neighbors=500, max_depth=max_depth 73 | ) 74 | regr.tune_neighbors( 75 | train_data[feat_cols], 76 | train_data["label"], 77 | train_data[["x_coord", "y_coord"]], 78 | ) 79 | print("geo rf tuned:", regr.neighbors) 80 | regr.fit( 81 | train_data[feat_cols], 82 | train_data["label"], 83 | train_data[["x_coord", "y_coord"]], 84 | ) 85 | test_pred = regr.predict( 86 | test_data[feat_cols], test_data[["x_coord", "y_coord"]] 87 | ) 88 | return test_pred 89 | 90 | 91 | def kriging( 92 | train_data, test_data, feat_cols=[], max_depth=30, n_estim=100, **kwargs 93 | ): 94 | krig = RegressionKriging( 95 | RandomForestRegressor(n_estimators=n_estim, max_depth=max_depth), 96 | verbose=False, 97 | ) 98 | krig.fit( 99 | train_data[feat_cols].values, 100 | train_data[["x_coord", "y_coord"]].values, 101 | train_data["label"].values, 102 | ) 103 | test_pred = krig.predict( 104 | test_data[feat_cols].values, 105 | test_data[["x_coord", "y_coord"]].values.astype(float), 106 | ) 107 | return test_pred 108 | 109 | 110 | def get_weights_as_array(points, max_points): 111 | dist_matrix = distance_matrix(points, points) 112 | my_w = 1 / dist_matrix 113 | my_w[my_w == np.inf] = 0 114 | sorted_vals_points = np.sort(my_w, axis=1)[:, -max_points] 115 | my_w[my_w < np.expand_dims(sorted_vals_points, 1)] = 0 116 | my_w = my_w / np.expand_dims(np.sum(my_w, axis=1), 1) 117 | return my_w 118 | 119 | 120 | def morans_i(y, w): 121 | sum_numerator = 0 122 | sum_denominator = 0 123 | normed_y = y - np.mean(y) 124 | for i in range(len(w)): 125 | for j in range(len(w)): 126 | sum_numerator += w[i, j] * normed_y[i] * normed_y[j] 127 | sum_denominator += normed_y[i] ** 2 128 | return (len(y) / np.sum(w)) * (sum_numerator / sum_denominator) 129 | 130 | 131 | def slx(train_data, test_data, w_cutoff=20, feat_cols=[], **kwargs): 132 | divide_test = len(train_data) 133 | together = pd.concat((train_data, test_data)) 134 | w = get_weights_as_array(together[["x_coord", "y_coord"]].values, w_cutoff) 135 | X = together[feat_cols].values 136 | lagged_X = np.matmul(w, X) 137 | X_with_lag = np.hstack((X, lagged_X)) 138 | 139 | regr = LinearRegression() 140 | # fit training with lagged X 141 | regr.fit(X_with_lag[:divide_test], train_data["label"].values) 142 | # predict test part 143 | test_pred = regr.predict(X_with_lag[divide_test:]) 144 | return test_pred 145 | 146 | 147 | def sarm(train_data, test_data, feat_cols=[], **kwargs): 148 | X = train_data[feat_cols].values 149 | Y = train_data["label"].values 150 | try: 151 | dist_with_next = ( 152 | train_data[["x_coord", "y_coord"]] 153 | - train_data[["x_coord", "y_coord"]].shift(1) 154 | ) ** 2 155 | thresh = np.sqrt( 156 | dist_with_next["x_coord"] + dist_with_next["y_coord"] 157 | ).median() 158 | w = libpysal.weights.DistanceBand( 159 | train_data[["x_coord", "y_coord"]].values.astype(float), 160 | threshold=thresh, 161 | binary=False, 162 | ) 163 | model = spreg.GM_Lag(Y, X, w=w) 164 | # print("pseudo r2", model.pr2) 165 | intercept = model.betas[0] 166 | coeff = model.betas[1:-1] 167 | roh = model.betas[-1] 168 | # basic is just X\beta 169 | test_pred_basic = ( 170 | np.matmul(test_data[feat_cols].values, coeff) + intercept 171 | ) 172 | # complex is with the second part 173 | def get_weights_as_array(points, max_dist): 174 | dist_matrix = distance_matrix(points, points) 175 | my_w = 1 / dist_matrix 176 | my_w[my_w == np.inf] = 0 177 | my_w[my_w < 1 / max_dist] = 0 178 | return my_w 179 | 180 | W = get_weights_as_array( 181 | test_data[["x_coord", "y_coord"]].values, thresh 182 | ) 183 | test_pred = np.matmul( 184 | np.linalg.inv(np.identity(len(W)) - roh * W), test_pred_basic 185 | ) 186 | except: 187 | print("ERROR in SAR") 188 | test_pred = np.zeros(len(test_data)) + np.mean(Y) 189 | return test_pred 190 | 191 | 192 | def my_gwr(train_data, test_data, feat_cols=[], **kwargs): 193 | try: 194 | train_coords = np.array(train_data[["x_coord", "y_coord"]]) 195 | train_y = np.expand_dims(train_data["label"].values, 1) 196 | train_x = np.array(train_data[feat_cols]) 197 | # bandwidth selection 198 | gwr_selector = Sel_BW( 199 | train_coords, train_y, train_x, fixed=True, kernel="exponential" 200 | ) 201 | gwr_bw = gwr_selector.search(criterion="AICc") 202 | # create and train model 203 | gwr_model = GWR( 204 | train_coords, 205 | train_y, 206 | train_x, 207 | gwr_bw, 208 | kernel="exponential", 209 | fixed=True, 210 | ) 211 | gwr_results = gwr_model.fit() 212 | 213 | test_coords = np.array(test_data[["x_coord", "y_coord"]]) 214 | test_x = np.array(test_data[feat_cols]) 215 | # predict 216 | test_pred = gwr_model.predict( 217 | test_coords, test_x, gwr_results.scale, gwr_results.resid_response 218 | ).predictions 219 | return test_pred 220 | except: 221 | print("GWR not possible") 222 | return np.zeros(len(test_data)) 223 | -------------------------------------------------------------------------------- /scripts/plotting.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import pandas as pd 4 | import seaborn as sns 5 | import os 6 | 7 | 8 | def plot_morans_i(): 9 | np.random.seed(42) 10 | test = create_data(500) 11 | w = get_weights_as_array(test[:, :2], 0.2) 12 | for t in range(5): 13 | morans = morans_i(test[:, t + 2], w) 14 | plt.scatter(test[:, 0], test[:, 1], c=test[:, t + 2]) 15 | plt.colorbar() 16 | plt.title(f"Morans I: {morans}") 17 | plt.axis("off") 18 | plt.show() 19 | 20 | 21 | def main_plot( 22 | results, 23 | nr_data=500, 24 | noise_type="uniformly distributed", 25 | save_path="outputs/main_plot.pdf", 26 | score_col="RMSE", 27 | ): 28 | noise_level_range = [0, 0.1, 0.2, 0.3, 0.4, 0.5] 29 | locality_range = [0, 0.1, 0.2, 0.3, 0.4, 0.5] 30 | include_models = [ 31 | "OLS", 32 | "SLX", 33 | "GWR", 34 | "RF", 35 | "RF (coordinates)", 36 | "spatial RF", 37 | "Kriging", 38 | ] 39 | include_models = [ 40 | m for m in include_models if m in results["model"].unique() 41 | ] 42 | # [model for model in results["model"].unique() if "geo" not in model] 43 | nr_models = len(include_models) 44 | fig = plt.figure(figsize=(16, 6.5)) 45 | for mode_ind, mode in enumerate( 46 | ["linear", "non-linear (simple)", "non-linear"] 47 | ): 48 | # print("----------------") 49 | for model_ind, model in enumerate(include_models): 50 | # print(mode, "data, --> model:", model) 51 | results_filter = results[ 52 | (results["data mode"] == mode) 53 | & (results["model"] == model) 54 | & (results["nr_data"] == nr_data) 55 | & (results["noise_type"] == noise_type) 56 | ] 57 | results_filter.set_index(["noise", "locality"], inplace=True) 58 | visualize_scores = np.zeros( 59 | (len(noise_level_range), len(locality_range)) 60 | ) 61 | for i, noise in enumerate(noise_level_range): 62 | for j, locality in enumerate(locality_range): 63 | score = results_filter.loc[noise, locality][ 64 | score_col 65 | ].mean() 66 | visualize_scores[i, j] = score 67 | 68 | ax1 = fig.add_subplot( 69 | 3, nr_models + 1, ((nr_models + 1) * mode_ind) + model_ind + 1 70 | ) 71 | imshow_plot = ax1.imshow(visualize_scores, vmin=0, vmax=0.6) 72 | # plt.axis("off") 73 | # if model_ind==0: 74 | # ax1.set_ylabel("$\longleftarrow$ Increasing \n noise", fontsize=15) 75 | # ax1.yaxis.set_label_position("right") 76 | # ax1.yaxis.tick_right() 77 | plt.xticks([]) 78 | plt.yticks([]) 79 | # ax1.set_xlabel("$\longrightarrow$ decreasing \n stationarity", fontsize=10) 80 | if model_ind == 0: 81 | # ax2 = ax1.twinx() 82 | # ax2.set_ylabel(mode) 83 | # ax2.yaxis.set_label_position("right") 84 | # pad = 2 85 | mode_new = ( 86 | "non-linear\n(simple) " 87 | if mode == "non-linear (simple)" 88 | else mode 89 | ) 90 | ax1.annotate( 91 | mode_new, 92 | xy=(0, 0.5), 93 | xytext=(-50, 0), # ax1.yaxis.labelpad - pad 94 | xycoords=ax1.yaxis.label, 95 | textcoords="offset points", 96 | size=18, 97 | ha="right", 98 | va="center", 99 | rotation=90, 100 | weight="bold", 101 | ) 102 | if mode_ind == 0: 103 | ax1.set_title(model, weight="bold", fontsize=15) 104 | 105 | fig.text(0.5, 0.0, "$\longrightarrow$ decreasing stationarity", ha="center") 106 | # fig.text(0.5, 0.36, "$\longrightarrow$ decreasing stationarity", ha='center') 107 | # fig.text(0.5, 0.7, "$\longrightarrow$ decreasing stationarity", ha='center') 108 | 109 | fig.text( 110 | 0.07, 111 | 0.5, 112 | "$\longleftarrow$ Increasing noise", 113 | va="center", 114 | rotation="vertical", 115 | ) 116 | # make colorbar 117 | # fig.subplots_adjust(right=0.95) 118 | cbar_ax = fig.add_axes([0.88, 0.05, 0.02, 0.9]) 119 | fig.colorbar(imshow_plot, cax=cbar_ax, label=score_col) 120 | plt.tight_layout() 121 | if save_path is not None: 122 | plt.savefig(save_path) 123 | plt.show() 124 | 125 | 126 | def barplot_synthetic(results, score_col="RMSE"): 127 | # only look at local models 128 | subset = results[ 129 | results["model"].isin( 130 | ["GWR", "RF", "spatial RF", "Kriging"] 131 | ) # "spatial RF", 132 | ] 133 | # subset.groupby(["nr_data", "data mode", "model", "noise_discrete", "locality_discrete"]).agg({"R2 score": "mean"}) 134 | plt.figure(figsize=(18, 6)) 135 | counter = 1 136 | modes = ["linear", "non-linear (simple)", "non-linear"] 137 | for mode, save_name in zip( 138 | modes, ["linear", "non_linear_simple", "non_linear"] 139 | ): 140 | plt.subplot(1, len(modes), counter) 141 | counter += 1 142 | subset_2 = subset[ 143 | (subset["data mode"] == mode) 144 | & (subset["noise_discrete"] == "low") 145 | & (subset["locality_discrete"] == "high") 146 | & (subset["noise_type"] == "uniformly distributed") 147 | ] 148 | subset_2 = subset_2.groupby(["nr_data", "model"]).agg( 149 | {score_col: "mean"} 150 | ) 151 | 152 | ax = sns.barplot( 153 | data=subset_2.reset_index(), x="nr_data", y=score_col, hue="model" 154 | ) 155 | # plt.ylim(0, 1) 156 | plt.xlabel("Number of samples") 157 | # if mode == "non-linear (simple)": 158 | # plt.legend(title="Model", loc="lower right", framealpha=1, ncol=2) 159 | # else: 160 | plt.legend([], [], frameon=False) 161 | plt.title(mode + " DGP", fontsize=18) 162 | 163 | handles, labels = ax.get_legend_handles_labels() 164 | plt.tight_layout() 165 | plt.figlegend( 166 | handles, 167 | labels, 168 | loc="upper center", 169 | ncol=5, 170 | labelspacing=0.0, 171 | bbox_to_anchor=(0.5, 1.09), 172 | ) 173 | plt.savefig(f"outputs/barplot_main.pdf", bbox_inches="tight") 174 | plt.show() 175 | 176 | 177 | def noise_analysis(results): 178 | fig = plt.figure(figsize=(12, 9)) 179 | subset = results[ 180 | results["model"].isin( 181 | ["OLS", "GWR", "RF (coordinates)", "Kriging"] 182 | ) # "spatial RF", 183 | ] 184 | counter = 1 185 | for i, mode in enumerate( 186 | ["linear", "non-linear (simple)"] 187 | ): # "non-linear (simple)" # linear", 188 | for j, model in enumerate(["GWR", "Kriging"]): 189 | subset2 = subset[ 190 | (subset["model"] == model) 191 | & (subset["data mode"] == mode) 192 | & 193 | # (subset["noise_discrete"] == "low") & 194 | (subset["locality_discrete"] == "high") 195 | & 196 | # (subset["noise"] == 0.3) & 197 | # (subset["locality"] == 0.4) & 198 | # (subset["noise_type"] == "constant") & 199 | (subset["nr_data"] == 500) 200 | ] 201 | subset2["noise_type"] = subset2["noise_type"] + " noise" 202 | ax = fig.add_subplot(2, 2, counter) 203 | sns.lineplot( 204 | ax=ax, 205 | data=subset2.reset_index(), 206 | x="noise", 207 | y="RMSE", 208 | hue="noise_type", 209 | ) 210 | # if counter == 1: 211 | # plt.legend(title="Noise (spatial distribution)")# , loc=(1, 1)) 212 | # else: 213 | plt.legend([], [], frameon=False) 214 | if j == 0: 215 | ax.annotate( 216 | mode, 217 | xy=(0, 0.5), 218 | xytext=(-20, 0), # ax1.yaxis.labelpad - pad 219 | xycoords=ax.yaxis.label, 220 | textcoords="offset points", 221 | size="large", 222 | ha="right", 223 | va="center", 224 | rotation=90, 225 | weight="bold", 226 | ) 227 | ax.set_xlabel("Noise level ($\sigma$)") 228 | ax.set_ylim(0, 0.7) 229 | if counter in [2, 4]: 230 | plt.ylabel("") 231 | if counter in [1, 2]: 232 | plt.title(model, weight="bold", fontsize=16) 233 | counter += 1 234 | 235 | handles, labels = ax.get_legend_handles_labels() 236 | plt.tight_layout() 237 | plt.figlegend( 238 | handles, 239 | labels, 240 | loc="upper center", 241 | ncol=3, 242 | labelspacing=0.0, 243 | bbox_to_anchor=(0.5, 1.07), 244 | ) 245 | plt.savefig("outputs/noise_analysis.pdf", bbox_inches="tight") 246 | plt.show() 247 | -------------------------------------------------------------------------------- /scripts/synthetic_tests.py: -------------------------------------------------------------------------------- 1 | # Standard and GIS Modules 2 | import os 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import pandas as pd 6 | import time 7 | import scipy 8 | import warnings 9 | 10 | warnings.filterwarnings("ignore") 11 | 12 | from sklearn.metrics import r2_score, mean_squared_error 13 | from models import * 14 | 15 | 16 | def create_data(nr_data, nr_feats=5, rho=0.6, weight_matrix_cutoff=20): 17 | 18 | x_coords = np.random.rand(nr_data, 2) * 2 - 1 19 | 20 | all_feats = np.zeros((nr_data, nr_feats)) 21 | 22 | for feat in range(nr_feats): 23 | att = np.random.uniform(-1, 1, nr_data) 24 | 25 | w = get_weights_as_array(x_coords, weight_matrix_cutoff) 26 | # compute I - rho*W 27 | m = np.identity(len(x_coords)) - rho * w 28 | # invert and multiply with x_j 29 | att_hat = np.matmul(np.linalg.inv(m), att) 30 | # scale to -1 to 1 31 | att_hat = (att_hat - np.min(att_hat)) / ( 32 | np.max(att_hat) - np.min(att_hat) 33 | ) * 2 - 1 34 | all_feats[:, feat] = att_hat 35 | return np.hstack([x_coords, all_feats]) 36 | 37 | 38 | def non_linear(feat_arr, weights): 39 | feature_transformed = np.zeros(feat_arr.shape) 40 | a, b, c, d, e = ( 41 | feat_arr[:, 0], 42 | feat_arr[:, 1], 43 | feat_arr[:, 2], 44 | feat_arr[:, 3], 45 | feat_arr[:, 4], 46 | ) 47 | # first term: a**2 * b 48 | feature_transformed[:, 0] = a ** 2 * np.sin(b) * weights[:, 0] 49 | feature_transformed[:, 1] = np.sin(b) * d * weights[:, 1] 50 | feature_transformed[:, 2] = e * np.log(c ** 2) * weights[:, 2] 51 | feature_transformed[:, 3] = d ** 2 * np.cos(b) * weights[:, 3] 52 | feature_transformed[:, 4] = e * a ** 2 * d * weights[:, 4] 53 | 54 | return np.sum(feature_transformed, axis=1) 55 | 56 | 57 | # parameters and models to include 58 | np.random.seed(42) 59 | noise_level_range = [0, 0.1, 0.2, 0.3, 0.4, 0.5] 60 | locality_range = [0, 0.1, 0.2, 0.3, 0.4, 0.5] 61 | model_function_names = [ 62 | linear_regression, 63 | rf_coordinates, 64 | rf_global, 65 | rf_spatial, 66 | my_gwr, 67 | kriging, 68 | sarm, 69 | slx 70 | # rf_geographical, 71 | ] 72 | model_names = [ 73 | "linear regression", 74 | "RF (coordinates)", 75 | "RF", 76 | "spatial RF", 77 | "GWR", 78 | "Kriging", 79 | "SAR", 80 | "SLX" 81 | # "geographical RF", 82 | ] 83 | 84 | # MAIN PARAMETERS 85 | nr_feats = 5 86 | max_depth = 30 87 | n_estim = 150 88 | w_cutoff = 20 89 | rho = 0.75 90 | noise_type = "uniformly distributed" 91 | # 'heterogeneous - same', 'heterogeneous - different' 92 | 93 | # save results 94 | results_list = [] 95 | 96 | weights = np.array([-0.95, 0.38, 0.66, -0.43, 0.22]) 97 | 98 | for nr_data in [100, 500, 1000, 5000]: 99 | print("\n ======== DATA SAMPLES", nr_data) 100 | 101 | # MAKE MAIN DATA 102 | train_cutoff = int(nr_data * 0.9) 103 | feat_cols = ["feat_" + str(i) for i in range(nr_feats)] 104 | # V1: X random uniform 105 | # synthetic_data_array = np.random.rand(nr_data, 2 + nr_feats) * 2 - 1 106 | # V2: with spatial lag 107 | synthetic_data_array = create_data( 108 | nr_data, nr_feats=nr_feats, rho=rho, weight_matrix_cutoff=w_cutoff 109 | ) 110 | print(synthetic_data_array.shape) 111 | 112 | synthetic_data = pd.DataFrame( 113 | synthetic_data_array, columns=["x_coord", "y_coord"] + feat_cols, 114 | ) 115 | print(synthetic_data.head(5)) 116 | # Double check Moran's I 117 | w = get_weights_as_array(synthetic_data_array[:, :2], w_cutoff) 118 | for t in range(5): 119 | print( 120 | "Moran's I of coefficient", 121 | t, 122 | morans_i(synthetic_data_array[:, t + 2], w), 123 | ) 124 | 125 | # simulate spatial variation of features (varying per weight) 126 | spatial_variation = np.zeros((nr_data, nr_feats)) 127 | for i in range(nr_feats): 128 | spatial_variation[:, i] = 0.5 * ( 129 | np.sin(synthetic_data["x_coord"].values * np.pi * 2 + i) 130 | + np.cos(synthetic_data["y_coord"].values * np.pi * 2 + i) 131 | ) 132 | 133 | for noise_level in noise_level_range: 134 | for locality in locality_range: 135 | # spatially dependent but linear 136 | spatially_dependent_weights = weights + locality * spatial_variation 137 | 138 | for mode in ["linear", "non-linear"]: 139 | print("--------", noise_level, locality, mode) 140 | # apply linear or non_linear function 141 | if mode == "linear": 142 | synthetic_data["label"] = np.sum( 143 | spatially_dependent_weights 144 | * synthetic_data[feat_cols].values, 145 | axis=1, 146 | ) 147 | else: 148 | synthetic_data["label"] = non_linear( 149 | synthetic_data[feat_cols].values, 150 | spatially_dependent_weights, 151 | ) 152 | 153 | if noise_type == "uniformly distributed": 154 | noise = np.random.normal(0, noise_level, nr_data) 155 | elif noise_type == "heterogeneous - different": 156 | spatial_variation_different = noise_level * ( 157 | 0.5 158 | * ( 159 | synthetic_data["x_coord"].values 160 | + synthetic_data["y_coord"].values 161 | ) 162 | + 1 163 | ) 164 | noise = np.random.normal( 165 | 0, 166 | spatial_variation_different, 167 | len(spatial_variation_different), 168 | ) 169 | elif noise_type == "heterogeneous - same": 170 | # e.g. high noise level (0.5), spatial variation is from 171 | # sin and cos so it's between -1 and 1, so we make + 1 172 | # so on average we multiply by 1, but varying variance 173 | # between 0.5 * 0 and 0.5 * 2 174 | spatially_dependent_noise = noise_level * ( 175 | spatial_variation[:, 0] + 1 # without locality level! 176 | ) 177 | noise = np.random.normal( 178 | 0, spatially_dependent_noise, nr_data 179 | ) 180 | else: 181 | raise RuntimeError("Noise must be one of above") 182 | 183 | synthetic_data["label"] = synthetic_data["label"] + noise 184 | 185 | train_data, test_data = ( 186 | synthetic_data[:train_cutoff], 187 | synthetic_data[train_cutoff:], 188 | ) 189 | 190 | for model_function, name in zip( 191 | model_function_names, model_names 192 | ): 193 | tic = time.time() 194 | test_pred = model_function( 195 | train_data.copy(), 196 | test_data.copy(), 197 | # train_data.copy(), # for overfitting test 198 | feat_cols=feat_cols, 199 | max_depth=max_depth, 200 | nr_data=nr_data, 201 | n_estim=n_estim, 202 | w_cutoff=w_cutoff, 203 | ) 204 | # compute metrics 205 | score = r2_score(test_data["label"], test_pred) 206 | rmse = mean_squared_error( 207 | test_data["label"], test_pred, squared=False 208 | ) 209 | # train_data["label"]) # for overfitting test 210 | time_diff = time.time() - tic 211 | # add to results 212 | results_list.append( 213 | { 214 | "nr_data": nr_data, 215 | "noise": noise_level, 216 | "locality": locality, 217 | "data mode": mode, 218 | "model": name, 219 | "time": time_diff, 220 | "R2 score": score, 221 | "RMSE": rmse, 222 | } 223 | ) 224 | print(name, round(rmse, 3)) 225 | 226 | results = pd.DataFrame(results_list) 227 | results["noise_type"] = noise_type 228 | noise_name = "_".join(noise_type.split(" ")) 229 | results.to_csv(f"synthetic_data_results_{noise_name}.csv", index=False) 230 | print("Saved intermediate results") 231 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Package installer.""" 2 | import os 3 | from setuptools import setup 4 | from setuptools import find_packages 5 | 6 | LONG_DESCRIPTION = "" 7 | if os.path.exists("README.md"): 8 | with open("README.md") as fp: 9 | LONG_DESCRIPTION = fp.read() 10 | 11 | scripts = [] 12 | 13 | setup( 14 | name="sprf", 15 | version="0.0.1", 16 | description="Spatial Random Forests", 17 | long_description=LONG_DESCRIPTION, 18 | long_description_content_type="text/markdown", 19 | author="MIE Lab", 20 | author_email=("nwiedemann@ethz.ch"), 21 | license="GPLv3", 22 | url="https://github.com/mie-lab/spatial_rf_python", 23 | install_requires=["numpy", "scipy", "pandas", "scikit-learn"], 24 | classifiers=[ 25 | "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", 26 | "Intended Audience :: Science/Research", 27 | "Programming Language :: Python :: 3.6", 28 | "Topic :: Software Development :: Libraries :: Python Modules", 29 | ], 30 | packages=find_packages("."), 31 | python_requires=">=3.8", 32 | ) 33 | -------------------------------------------------------------------------------- /sprf/__init__.py: -------------------------------------------------------------------------------- 1 | from sprf.spatial_random_forest import SpatialRandomForest 2 | from sprf.geographical_random_forest import GeographicalRandomForest 3 | -------------------------------------------------------------------------------- /sprf/geographical_random_forest.py: -------------------------------------------------------------------------------- 1 | from multiprocessing.sharedctypes import Value 2 | import warnings 3 | import numpy as np 4 | 5 | from sklearn.ensemble import RandomForestRegressor 6 | from sprf.tuning import tune_neighbors 7 | 8 | class GeographicalRandomForest: 9 | """ 10 | Geographical Random Forest according to Georganos et al. 11 | 12 | Parameters 13 | ---------- 14 | sample_by : str, optional {neighbors, distance} 15 | Sampling strategy. The spatial random forest consists of trees that are 16 | fitted on a spatial subset of samples. These spatial subsets can either 17 | be sampled by defining a distance-radius, or by specifying a fixed 18 | number of spatial neighbors. By default "neighbors", see notes below. 19 | neighbors : int, optional 20 | Number of neighbors to use for spatial fit, by default 500 samples. 21 | Only relevant if sample_by=neighbors. 22 | max_distance : int, optional 23 | Maximum distance of samples to belong to the same decision tree. Only 24 | relevant if sample_by=distance. By default 150000m 25 | """ 26 | 27 | def __init__( 28 | self, 29 | sample_by: str = "neighbors", 30 | neighbors: int = 500, 31 | max_distance: float = 150000, 32 | **random_forest_arguments 33 | ): 34 | self.sample_by = sample_by 35 | if sample_by == "distance" and max_distance == 150000: 36 | warnings.warn( 37 | "It seems that you have selected the 'distance'-sampling mode,\ 38 | but the parameter max_distance is still the default. Make\ 39 | sure to adapt the max_distance parameter to your dataset." 40 | ) 41 | self.max_distance = max_distance 42 | self.neighbors = neighbors 43 | self.random_forest_arguments = random_forest_arguments 44 | 45 | def fit(self, x_train, y_train, coords_train): 46 | # convert to arrays 47 | x_train, y_train, coords_train = ( 48 | np.array(x_train), 49 | np.array(y_train), 50 | np.array(coords_train), 51 | ) 52 | assert ( 53 | len(coords_train.shape) == 2 and coords_train.shape[1] == 2 54 | ), "coords test must have len 2 in dimension 1" 55 | 56 | # init RFs 57 | self.random_forests = [ 58 | RandomForestRegressor(**self.random_forest_arguments) 59 | for _ in range(len(x_train)) 60 | ] 61 | 62 | # make distance matrix n x n 63 | dist = np.zeros((len(coords_train), len(coords_train))) 64 | for i, coord1 in enumerate(coords_train): 65 | for j, coord2 in enumerate(coords_train[i:]): 66 | dist[i, j + i] = np.linalg.norm(coord1 - coord2) 67 | # mirror distance matrix 68 | dist = dist + dist.T 69 | 70 | # save the train coordinates because they are needed for prediction 71 | self.rf_coords_train = coords_train 72 | 73 | # fit one random forest per sample 74 | for core_ind in range(len(x_train)): 75 | dist_to_others = dist[core_ind] 76 | if self.sample_by == "distance": 77 | samples_to_fit = np.where(dist_to_others < self.max_distance 78 | )[0] 79 | elif self.sample_by == "neighbors": 80 | sorted_inds = np.argsort(dist_to_others) 81 | samples_to_fit = sorted_inds[:self.neighbors] 82 | else: 83 | raise NotImplementedError( 84 | "sample mode must be one of 'neighbors', 'distance'!" 85 | ) 86 | x_train_subset = x_train[samples_to_fit] 87 | y_train_subset = y_train[samples_to_fit] 88 | self.random_forests[core_ind].fit(x_train_subset, y_train_subset) 89 | 90 | 91 | def tune_neighbors(self, *args, **kwargs): 92 | self.neighbors = tune_neighbors(self, *args, **kwargs) 93 | 94 | def predict(self, x_test, coords_test): 95 | x_test = np.array(x_test) 96 | coords_test = np.array(coords_test) 97 | 98 | # predict with a the closest random forest for each sample 99 | predictions = [] 100 | for i in range(len(x_test)): 101 | dist_to_train_points = np.linalg.norm( 102 | self.rf_coords_train - coords_test[i], axis=1 103 | ) 104 | closest_rf = np.argmin(dist_to_train_points) 105 | y_pred = self.random_forests[closest_rf].predict( 106 | x_test[i].reshape(1, -1) 107 | ) 108 | predictions.append(y_pred) 109 | return np.array(predictions) 110 | -------------------------------------------------------------------------------- /sprf/spatial_random_forest.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import numpy as np 3 | from sklearn.cluster import KMeans 4 | from sklearn.tree import DecisionTreeRegressor 5 | from sklearn.metrics import r2_score 6 | from sprf.tuning import tune_neighbors 7 | 8 | class SpatialRandomForest: 9 | """ 10 | Spatial Random Forest implementation, following the sklearn style 11 | 12 | Parameters 13 | ---------- 14 | n_estimators : int, optional 15 | Number of base estimators (decision trees), by default 20 16 | sample_mode : str, optional {cluster, random} 17 | Trees are rooted either in the centers of clusters of the dataset, or in 18 | random locations, by default "cluster" 19 | sample_by : str, optional {neighbors, distance} 20 | Sampling strategy. The spatial random forest consists of trees that are 21 | fitted on a spatial subset of samples. These spatial subsets can either 22 | be sampled by defining a distance-radius, or by specifying a fixed 23 | number of spatial neighbors. By default "neighbors", see notes below. 24 | neighbors : int, optional 25 | Number of neighbors to use for spatial fit, by default 500 samples. 26 | Only relevant if sample_by=neighbors. 27 | max_distance : int, optional 28 | Maximum distance of samples to belong to the same decision tree. Only 29 | relevant if sample_by=distance. By default 150000m 30 | min_points_distance : int, optional 31 | Minimum points for fitting a decision tree, i.e. if the distance is 32 | set too low, decision trees would be fit on an insufficient number of 33 | points. Only relevant if sample_by=distance, by default 100 34 | **kwargs: dict 35 | Any arguments that are passed to the sklearn DecisionTreeRegressor 36 | 37 | Notes 38 | ---------- 39 | - Only regression is implemented so far. 40 | - In contrast to other spatial RF papers, we do not build on tree per 41 | sample, but rather a fixed set of trees on spatial subsets 42 | of the data. 43 | - The spatial subsets are either coosen by a fixed number of neighbors or 44 | by a radius (spatial distance) 45 | - Projected coordinates are assumed! 46 | 47 | Example 48 | ---------- 49 | sp = SpatialRandomForest(max_depth=20, neighbors=50) 50 | sp.fit(train_x, train_y, train_coords) 51 | pred_y = sp.predict(test_x, test_coords) 52 | """ 53 | 54 | def __init__( 55 | self, 56 | n_estimators: int = 100, 57 | sample_mode: str = "cluster", 58 | sample_by: str = "neighbors", 59 | neighbors: int = 500, 60 | max_distance: float = 150000, 61 | min_points_distance: int = 100, 62 | **decision_tree_args, 63 | ): 64 | self.estimators = [ 65 | DecisionTreeRegressor(**decision_tree_args) 66 | for _ in range(n_estimators) 67 | ] 68 | if sample_by == "distance" and max_distance == 150000: 69 | warnings.warn( 70 | "It seems that you have selected the 'distance'-sampling mode,\ 71 | but the parameter max_distance is still the default. Make\ 72 | sure to adapt the max_distance parameter to your dataset." 73 | ) 74 | self.n_estimators = n_estimators 75 | self.sample_mode = sample_mode 76 | self.sample_by = sample_by 77 | # only relevant if sample_by == distance 78 | self.max_distance = max_distance 79 | self.min_points_distance = min_points_distance 80 | # only relevant if sample_by == "neighbors" 81 | self.neighbors = neighbors 82 | # init core points 83 | self.estimator_core_points = [] 84 | 85 | def _sample_core_points(self, coords): 86 | """ 87 | Sample indices of points that form the centers of each spatial tree. 88 | coords: 2D Array of shape (N, 2) where N is the number of samples 89 | Returns: 2D Array of shape (N, 2) which is a subset / another set of 90 | coordinates 91 | """ 92 | if self.sample_mode == "cluster": 93 | # cluster coordinates with kmeans use centers as core points 94 | kmeans = KMeans(self.n_estimators) 95 | kmeans.fit(coords) 96 | core_points = kmeans.cluster_centers_ 97 | # TODO: elif sample_mode == "grid": 98 | elif self.sample_mode == "random": 99 | # select random coordinates from the train data as core points 100 | core_points = coords[np.random.permutation(len(coords) 101 | )[:self.n_estimators]] 102 | else: 103 | raise NotImplementedError( 104 | "sample mode must be one of cluster, random" 105 | ) 106 | return core_points 107 | 108 | def _sample_point_clouds(self, coords): 109 | """ 110 | Assign samples to their spatial decision tree. 111 | coords: 2D Array of shape (N, 2) where N is the number of samples 112 | Returns: List of lists with indices of samples belonging to each tree 113 | """ 114 | point_clouds = [] 115 | for core_point in self.estimator_core_points: 116 | # Compute distance of the core point to all coordinates 117 | dist_to_others = np.sqrt(np.sum((coords - core_point)**2, axis=1)) 118 | if self.sample_by == "neighbors": 119 | # add fixed number of closest samples 120 | point_clouds.append( 121 | np.argsort(dist_to_others)[:self.neighbors] 122 | ) 123 | elif self.sample_by == "distance": 124 | # filter by distance 125 | point_with_lower_dist = np.where( 126 | dist_to_others < self.max_distance 127 | )[0] 128 | # only use point clouds that are large enough! --> cannot fit a 129 | # decision tree on 5 points 130 | if len(point_with_lower_dist) > self.min_points_distance: 131 | point_clouds.append(point_with_lower_dist) 132 | else: 133 | raise NotImplementedError( 134 | "sample mode must be one of 'neighbors', 'distance'!" 135 | ) 136 | return point_clouds 137 | 138 | def fit(self, x_train, y_train, coords_train): 139 | """ 140 | Fit spatial random forest to a dataset. 141 | 142 | Parameters 143 | ---------- 144 | x_train : {array-like, sparse matrix} of shape (n_samples, n_features) 145 | The training input samples. Internally, its dtype will be converted 146 | to ``dtype=np.float32``. 147 | y_train : array-like of shape (n_samples,) or (n_samples, n_outputs) 148 | The target values (real numbers in regression). 149 | coords_train: array-like of shape (n_samples, 2) with spatial 150 | coordinates of each sample. Geographic coordinates are assumed to be 151 | projected! 152 | """ 153 | # convert to arrays 154 | x_train, y_train, coords_train = ( 155 | np.array(x_train), 156 | np.array(y_train), 157 | np.array(coords_train), 158 | ) 159 | assert ( 160 | len(coords_train.shape) == 2 and coords_train.shape[1] == 2 161 | ), "coords test must have len 2 in dimension 1" 162 | 163 | # sample core points 164 | self.estimator_core_points = self._sample_core_points(coords_train) 165 | # assign samples to their core points 166 | # (one sample can be in several point clouds!) 167 | point_clouds = self._sample_point_clouds(coords_train) 168 | if len(point_clouds) < self.n_estimators: 169 | warnings.warn( 170 | f"Some point clouds had less than {self.min_points_distance}\ 171 | points and are therefore ignored.\ 172 | Consider increasing the parameter 'max_distance' to\ 173 | include more points (recommended), or decrease 'min_points_distance'" 174 | ) 175 | # correct number of estimators 176 | self.n_estimators = len(point_clouds) 177 | self.estimators = self.estimators[:self.n_estimators] 178 | # correct core points: Use center of gravity of each point clouds 179 | self.estimator_core_points = np.array( 180 | [ 181 | np.mean(coords_train[cloud_inds], axis=0) 182 | for cloud_inds in point_clouds 183 | ] 184 | ) 185 | # fit each point cloud to an estimator 186 | for i, sample_inds in enumerate(point_clouds): 187 | x_train_subset = x_train[sample_inds] 188 | y_train_subset = y_train[sample_inds] 189 | self.estimators[i].fit(x_train_subset, y_train_subset) 190 | 191 | def tune_neighbors(self, *args, **kwargs): 192 | self.neighbors = tune_neighbors(self, *args, **kwargs) 193 | 194 | def predict(self, x_test, coords_test=None, weighted=True, closest=False): 195 | """ 196 | Predict class for X. 197 | The predicted class of an input sample is a vote by the trees in 198 | the forest, weighted by their probability estimates. That is, 199 | the predicted class is the one with highest mean probability 200 | estimate across the trees. 201 | 202 | Parameters 203 | ---------- 204 | x_test : array-like of shape (n_samples, n_features) 205 | The input samples. Internally, its dtype will be converted to 206 | ``dtype=np.float32``. 207 | coords_test: array-like of shape (n_samples, 2), optional 208 | Coordinates are only required if weighted=True, i.e. if the tree- 209 | wise outputs should be weighted and combined by their distance 210 | weighted: bool, optional 211 | Whether the tree-wise predictions should be aggregated based on 212 | their spatial distance to the test sample (similar to inverse 213 | distance weighting). 214 | 215 | Returns 216 | ------- 217 | y : ndarray of shape (n_samples,) or (n_samples, n_outputs) 218 | The predicted values. 219 | """ 220 | # convert to arrays 221 | x_test = np.array(x_test) 222 | if coords_test is not None: 223 | coords_test = np.array(coords_test) 224 | assert (coords_test is not None) or weighted == False, ( 225 | "If weighted=True, then coords_test is required." 226 | ) 227 | # predict output with each base estimator 228 | y_pred = np.zeros((len(x_test), self.n_estimators)) 229 | for i, estimator in enumerate(self.estimators): 230 | y_pred[:, i] = estimator.predict(x_test) 231 | # If no spatial weighting: Simply return average of estimators 232 | if not weighted and not closest: 233 | return np.mean(y_pred, axis=1) 234 | # if weighted: check that test coords are alright 235 | coords_test = np.array(coords_test) 236 | assert ( 237 | len(coords_test.shape) == 2 and coords_test.shape[1] == 2 238 | ), "coords test must have len 2 in dimension 1" 239 | # compute distance of test samples to all core points 240 | dist_to_core_points = np.array( 241 | [ 242 | np.sqrt(np.sum((coords_test - core_point)**2, axis=1)) 243 | for core_point in self.estimator_core_points 244 | ] 245 | ).swapaxes(1, 0) 246 | if closest: 247 | use_tree = np.argmin(dist_to_core_points, axis=1) 248 | return y_pred[np.arange(len(y_pred)), use_tree] 249 | 250 | # turn into probabilies 251 | if np.any(dist_to_core_points == 0): 252 | # special if test sample is exactly equal to one of the core points 253 | weights = np.array( 254 | [0 if dist != 0 else 1 for dist in dist_to_core_points] 255 | ) 256 | else: 257 | # normal situation: weight dependent on spatial distance 258 | weights = 1 / dist_to_core_points 259 | weights = weights / np.expand_dims(np.sum(weights, axis=1), 1) 260 | 261 | # prediction is weighted sum 262 | y_pred = np.sum(y_pred * weights, axis=1) 263 | return y_pred 264 | 265 | def _sample_by_distance_old( 266 | coords_train, nr_clouds=20, radius=150000, min_points=400 267 | ): 268 | """Deprecated""" 269 | # make distance matrix n x n 270 | dist = np.zeros((len(coords_train), len(coords_train))) 271 | for i, coord1 in enumerate(coords_train): 272 | for j, coord2 in enumerate(coords_train[i:]): 273 | dist[i, j + i] = np.linalg.norm(coord1 - coord2) 274 | # mirror distance matrix 275 | dist = dist + dist.T 276 | # make point clouds 277 | point_clouds = [] 278 | for core_ind in np.random.permutation(len(dist)): 279 | dist_to_others = dist[core_ind] 280 | inds = np.where(dist_to_others < radius)[0] 281 | if len(inds) > min_points: 282 | point_clouds.append(inds) 283 | # print("Cloud for core ind", core_ind, "has members", len(inds)) 284 | if len(point_clouds) > nr_clouds: 285 | break 286 | return point_clouds 287 | -------------------------------------------------------------------------------- /sprf/tuning.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.metrics import r2_score 3 | 4 | 5 | def tune_neighbors( 6 | model, x_train, y_train, coords_train, nr_check=10, eval_criterium=r2_score, 7 | ): 8 | x_train, y_train, coords_train = ( 9 | np.array(x_train), 10 | np.array(y_train), 11 | np.array(coords_train), 12 | ) 13 | max_neighbors = len(x_train) 14 | # split in train and val 15 | cutoff = int(len(x_train) * 0.9) 16 | rand_inds = np.random.permutation(max_neighbors) 17 | train_i, val_i = rand_inds[:cutoff], rand_inds[cutoff:] 18 | x_val = x_train[val_i] 19 | x_train = x_train[train_i] 20 | y_val = y_train[val_i] 21 | y_train = y_train[train_i] 22 | coords_val = coords_train[val_i] 23 | coords_train = coords_train[train_i] 24 | 25 | steps_to_check = np.linspace(0, max_neighbors, nr_check + 2).astype(int) 26 | best_neighbors = model.neighbors 27 | best_performance = -np.inf 28 | for neighbors in steps_to_check[1:-1]: 29 | model.neighbors = neighbors 30 | model.fit(x_train, y_train, coords_train) 31 | y_pred = model.predict(x_val, coords_val) 32 | performance = eval_criterium(y_pred, y_val) 33 | if performance > best_performance: 34 | best_neighbors = neighbors 35 | best_performance = performance 36 | 37 | # print("Found best bandwidth (neighbors) at ", best_neighbors) 38 | model.neighbors = best_neighbors 39 | return best_neighbors 40 | -------------------------------------------------------------------------------- /sprf_demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "0d30a6cf", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# Standard and GIS Modules\n", 11 | "import os\n", 12 | "import numpy as np\n", 13 | "import pandas as pd\n", 14 | "import time\n", 15 | "import scipy\n", 16 | "from sklearn.ensemble import RandomForestRegressor\n", 17 | "from sklearn.metrics import mean_squared_error\n", 18 | "\n", 19 | "# import sprf package\n", 20 | "from sprf.spatial_random_forest import SpatialRandomForest\n", 21 | "from sprf.geographical_random_forest import GeographicalRandomForest\n", 22 | "\n", 23 | "# constants:\n", 24 | "dataset_target = {\n", 25 | " \"plants\": \"richness_species_vascular\",\n", 26 | " \"meuse\": \"zinc\",\n", 27 | " \"atlantic\": \"Rate\",\n", 28 | " \"deforestation\": \"deforestation_quantile\",\n", 29 | " \"california_housing\": \"median_house_value\",\n", 30 | "}" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "id": "b7d00340", 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "name": "stdout", 41 | "output_type": "stream", 42 | "text": [ 43 | "Set dataset here: Must be one of ['plants', 'meuse', 'atlantic', 'deforestation', 'california_housing']\n" 44 | ] 45 | } 46 | ], 47 | "source": [ 48 | "print(f\"Set dataset here: Must be one of {list(dataset_target.keys())}\")\n", 49 | "dataset = \"plants\"" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "id": "dcb25046", 55 | "metadata": {}, 56 | "source": [ 57 | "### Load data" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 3, 63 | "id": "28692dce", 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "name": "stdout", 68 | "output_type": "stream", 69 | "text": [ 70 | "samples: 227\n" 71 | ] 72 | } 73 | ], 74 | "source": [ 75 | "data = pd.read_csv(os.path.join(\"data\", dataset+\".csv\"))\n", 76 | "print(\"samples: \", len(data))\n", 77 | "target = dataset_target[dataset]" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "id": "73366dc4", 83 | "metadata": {}, 84 | "source": [ 85 | "### Split into train and test" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 4, 91 | "id": "29cd834b", 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "def prepare_data(data, target, lon=\"x\", lat=\"y\"):\n", 96 | " \"\"\"Assumes that all other columns are used as covariates\"\"\"\n", 97 | " covariates = [col for col in data.columns if col not in [lon, lat, target]]\n", 98 | " return data[covariates], data[target], data[[lon, lat]]" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 5, 104 | "id": "9102cda2", 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "inds = np.random.permutation(len(data))\n", 109 | "split = int(0.9* len(data))\n", 110 | "train_data = data.iloc[inds[:split]]\n", 111 | "test_data = data.iloc[inds[split:]]\n", 112 | "\n", 113 | "# split into x, y and coordinates\n", 114 | "train_x, train_y, train_coords = prepare_data(\n", 115 | " train_data, target\n", 116 | ")\n", 117 | "test_x, test_y, test_coords = prepare_data(\n", 118 | " test_data, target\n", 119 | ")" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "id": "9f6f9ac8", 125 | "metadata": {}, 126 | "source": [ 127 | "### Train and test basic random forest" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 6, 133 | "id": "821a8ecf", 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "n_estimators = 100 # can take quite long for Geographical RF\n", 138 | "max_depth = 10\n", 139 | "spatial_neighbors = len(data) // 5" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 7, 145 | "id": "9b29767c", 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "name": "stdout", 150 | "output_type": "stream", 151 | "text": [ 152 | "Error of basic Random Forest: 1233.113304527673\n" 153 | ] 154 | } 155 | ], 156 | "source": [ 157 | "rf = RandomForestRegressor(max_depth=max_depth)\n", 158 | "rf.fit(train_x, train_y)\n", 159 | "test_pred = rf.predict(test_x)\n", 160 | "\n", 161 | "rmse_rf = mean_squared_error(test_pred, test_y, squared=False)\n", 162 | "print(\"Error of basic Random Forest: \", rmse_rf)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "id": "3b8a6329", 168 | "metadata": {}, 169 | "source": [ 170 | "### Train and test spatial random forest" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 8, 176 | "id": "55dc25b8", 177 | "metadata": {}, 178 | "outputs": [ 179 | { 180 | "name": "stdout", 181 | "output_type": "stream", 182 | "text": [ 183 | "Error of spatial Random Forest: 1494.09398242311\n" 184 | ] 185 | } 186 | ], 187 | "source": [ 188 | "sp = SpatialRandomForest(\n", 189 | " max_depth=max_depth, neighbors=spatial_neighbors\n", 190 | ")\n", 191 | "sp.fit(train_x, train_y, train_coords)\n", 192 | "test_pred = sp.predict(test_x, test_coords)\n", 193 | "\n", 194 | "rmse_spatial_rf = mean_squared_error(test_pred, test_y, squared=False)\n", 195 | "print(\"Error of spatial Random Forest: \", rmse_spatial_rf)" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "id": "560c3a66", 201 | "metadata": {}, 202 | "source": [ 203 | "### Train and test geographical random forest" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 9, 209 | "id": "8bcf6b0e", 210 | "metadata": {}, 211 | "outputs": [ 212 | { 213 | "name": "stdout", 214 | "output_type": "stream", 215 | "text": [ 216 | "Error of spatial Random Forest: 1326.3920442520464\n" 217 | ] 218 | } 219 | ], 220 | "source": [ 221 | "geo_rf = GeographicalRandomForest(\n", 222 | " n_estimators=n_estimators, neighbors=spatial_neighbors, max_depth=max_depth\n", 223 | ")\n", 224 | "geo_rf.fit(train_x, train_y, train_coords)\n", 225 | "test_pred = geo_rf.predict(test_x, test_coords)\n", 226 | "\n", 227 | "rmse_geo_rf = mean_squared_error(test_pred, test_y, squared=False)\n", 228 | "print(\"Error of spatial Random Forest: \", rmse_geo_rf)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "id": "0f5a1644", 234 | "metadata": {}, 235 | "source": [ 236 | "### Tune number of neighbors" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 10, 242 | "id": "05ce2fd3", 243 | "metadata": {}, 244 | "outputs": [ 245 | { 246 | "name": "stdout", 247 | "output_type": "stream", 248 | "text": [ 249 | "spatial rf tuned: 74\n", 250 | "Error of tuned spatial Random Forest: 1452.5026591622952\n" 251 | ] 252 | } 253 | ], 254 | "source": [ 255 | "regr = SpatialRandomForest(\n", 256 | " n_estimators=n_estimators, neighbors=500, max_depth=max_depth\n", 257 | " )\n", 258 | "regr.tune_neighbors(train_x, train_y, train_coords)\n", 259 | "print(\"spatial rf tuned:\", regr.neighbors)\n", 260 | "regr.fit(train_x, train_y, train_coords)\n", 261 | "test_pred = regr.predict(test_x, test_coords)\n", 262 | "rmse_spatial_rf_tuned = mean_squared_error(test_pred, test_y, squared=False)\n", 263 | "print(\"Error of tuned spatial Random Forest: \", rmse_spatial_rf_tuned)" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "id": "9f78a087", 270 | "metadata": {}, 271 | "outputs": [], 272 | "source": [] 273 | } 274 | ], 275 | "metadata": { 276 | "kernelspec": { 277 | "display_name": "Python 3 (ipykernel)", 278 | "language": "python", 279 | "name": "python3" 280 | }, 281 | "language_info": { 282 | "codemirror_mode": { 283 | "name": "ipython", 284 | "version": 3 285 | }, 286 | "file_extension": ".py", 287 | "mimetype": "text/x-python", 288 | "name": "python", 289 | "nbconvert_exporter": "python", 290 | "pygments_lexer": "ipython3", 291 | "version": "3.9.5" 292 | } 293 | }, 294 | "nbformat": 4, 295 | "nbformat_minor": 5 296 | } 297 | -------------------------------------------------------------------------------- /tests/test_sprf.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | import pandas as pd 4 | from sprf import SpatialRandomForest 5 | 6 | 7 | class TestSpatialRandomForest: 8 | """Test spatial random forest class""" 9 | 10 | x_train = np.random.rand(500, 10) 11 | y_train = np.random.rand(500) 12 | coords_train = np.random.rand(500, 2) 13 | x_test = np.random.rand(50, 10) 14 | y_test = np.random.rand(50) 15 | coords_test = np.random.rand(50, 2) 16 | 17 | def test_init_warning(self): 18 | with pytest.warns(UserWarning): 19 | sp = SpatialRandomForest(sample_by="distance") 20 | 21 | def test_fit(self): 22 | sp = SpatialRandomForest() 23 | sp.fit(self.x_train, self.y_train, self.coords_train) 24 | assert sp.n_estimators == 20 25 | 26 | def test_fit_equal(self): 27 | np.random.seed(42) 28 | sp1 = SpatialRandomForest() 29 | x_df = pd.DataFrame(self.x_train) 30 | sp1.fit(x_df, self.y_train, self.coords_train) 31 | y_pred_1 = sp1.predict(self.x_test, self.coords_test) 32 | 33 | np.random.seed(42) 34 | sp2 = SpatialRandomForest() 35 | sp2.fit(self.x_train, self.y_train, self.coords_train) 36 | y_pred_2 = sp2.predict(self.x_test, self.coords_test) 37 | assert np.all(y_pred_1 == y_pred_2) 38 | 39 | def test_predict_error(self): 40 | sp = SpatialRandomForest() 41 | sp.fit(self.x_train, self.y_train, self.coords_train) 42 | with pytest.raises(AssertionError): 43 | sp.predict(self.x_test) 44 | --------------------------------------------------------------------------------