├── .gitignore
├── LICENSE
├── README.md
├── assets
    └── sprf.png
├── data
    ├── atlantic.csv
    ├── california_housing.csv
    ├── deforestation.csv
    ├── meuse.csv
    └── plants.csv
├── figure.ipynb
├── requirements.txt
├── scripts
    ├── benchmarks.py
    ├── models.py
    ├── plotting.py
    └── synthetic_tests.py
├── setup.py
├── sprf
    ├── __init__.py
    ├── geographical_random_forest.py
    ├── spatial_random_forest.py
    └── tuning.py
├── sprf_demo.ipynb
└── tests
    └── test_sprf.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # vscode and mac
 10 | .DS_Store
 11 | *.code-workspace
 12 | *.drawio
 13 | 
 14 | # directories
 15 | data_orig/
 16 | private_notebooks/
 17 | 
 18 | # Distribution / packaging
 19 | .Python
 20 | build/
 21 | develop-eggs/
 22 | dist/
 23 | downloads/
 24 | eggs/
 25 | .eggs/
 26 | lib/
 27 | lib64/
 28 | parts/
 29 | sdist/
 30 | var/
 31 | wheels/
 32 | pip-wheel-metadata/
 33 | share/python-wheels/
 34 | *.egg-info/
 35 | .installed.cfg
 36 | *.egg
 37 | MANIFEST
 38 | 
 39 | # PyInstaller
 40 | #  Usually these files are written by a python script from a template
 41 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 42 | *.manifest
 43 | *.spec
 44 | 
 45 | # Installer logs
 46 | pip-log.txt
 47 | pip-delete-this-directory.txt
 48 | 
 49 | # Unit test / coverage reports
 50 | htmlcov/
 51 | .tox/
 52 | .nox/
 53 | .coverage
 54 | .coverage.*
 55 | .cache
 56 | nosetests.xml
 57 | coverage.xml
 58 | *.cover
 59 | *.py,cover
 60 | .hypothesis/
 61 | .pytest_cache/
 62 | 
 63 | # Translations
 64 | *.mo
 65 | *.pot
 66 | 
 67 | # Django stuff:
 68 | *.log
 69 | local_settings.py
 70 | db.sqlite3
 71 | db.sqlite3-journal
 72 | 
 73 | # Flask stuff:
 74 | instance/
 75 | .webassets-cache
 76 | 
 77 | # Scrapy stuff:
 78 | .scrapy
 79 | 
 80 | # Sphinx documentation
 81 | docs/_build/
 82 | 
 83 | # PyBuilder
 84 | target/
 85 | 
 86 | # Jupyter Notebook
 87 | .ipynb_checkpoints
 88 | 
 89 | # IPython
 90 | profile_default/
 91 | ipython_config.py
 92 | 
 93 | # pyenv
 94 | .python-version
 95 | 
 96 | # pipenv
 97 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 98 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 99 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
100 | #   install all needed dependencies.
101 | #Pipfile.lock
102 | 
103 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
104 | __pypackages__/
105 | 
106 | # Celery stuff
107 | celerybeat-schedule
108 | celerybeat.pid
109 | 
110 | # SageMath parsed files
111 | *.sage.py
112 | 
113 | # Environments
114 | .env
115 | .venv
116 | env/
117 | venv/
118 | ENV/
119 | env.bak/
120 | venv.bak/
121 | 
122 | # Spyder project settings
123 | .spyderproject
124 | .spyproject
125 | 
126 | # Rope project settings
127 | .ropeproject
128 | 
129 | # mkdocs documentation
130 | /site
131 | 
132 | # mypy
133 | .mypy_cache/
134 | .dmypy.json
135 | dmypy.json
136 | 
137 | # Pyre type checker
138 | .pyre/
139 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Mobility Information Engineering Lab at ETH Zürich
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Benchmarking regression models under spatial heterogeneity 
 2 | 
 3 | This repository accompanies our GIScience publication "Benchmarking regression models under spatial heterogeneity" (see reference below). In the code base, we provide 1) the script for reproducing our experiments on synthetic data, 2) the script for reproducing our benchmarking experiments on several real datasets and 3) an open-source Python implementation of spatial Random Forests. Each part is described in the following.
 4 | 
 5 | #### Installation
 6 | 
 7 | The required packages and our sprf package can be installed via pip in editable mode in a virtual environment with the following commands:
 8 | ```
 9 | git clone https://github.com/mie-lab/spatial_rf_python.git
10 | cd spatial_rf_python
11 | python -m venv env
12 | source env/bin/activate
13 | pip install -e .
14 | ````
15 | 
16 | ### 1) Experiments on synthetic datasets
17 | 
18 | To reproduce our analysis on synthetic data, run:
19 | ```
20 | python scripts/synthetic_tests.py
21 | ```
22 | All results will be saved in a single csv file named `synthetic_data_results.csv`.
23 | 
24 | ### 2) Benchmarking on real datasets
25 | 
26 | We use five public data sets to validate our results and to benchmark different algorithms. The datasets are provided as csv fils in the [data](data) folder. They include
27 | * A [plants](https://github.com/BlasBenito/spatialRF/blob/main/data/plant_richness_df.rda) dataset
28 | * A [deforestation](https://github.com/FSantosCodes/GWRFC/tree/master/data) dataset
29 | * A [mortality rate](https://www.dropbox.com/s/lrz6og0ld2m64df/Data_GWR.7z?dl=0) dataset from [here](https://zia207.github.io/geospatial-r-github.io/geographically-wighted-random-forest.html)
30 | 
31 | Please cite these sources if reusing their data.
32 | 
33 | Our code for benchmarking is provided as a [notebook](benchmarks.ipynb) and as a [script](scripts/benchmarks.py). To reproduce our experiments from the paper, run
34 | ```
35 | python scripts/benchmarks.py
36 | ```
37 | The results will be saved as csv files in a folder named `outputs`.
38 | 
39 | ### 3) Spatial Random Forest implementation in Python
40 | 
41 | This repository further provides Python implementations of Spatial Random Forests. Different approaches have been proposed in the literature, but here, we focus on the one by Georganos et al termed *Geographical Random Forests*. We implement their approach, but since it is very inefficient to train one random forest per sample, we additionally implement a more efficient variant (which we simply call *Spatial Random Forests*): Instead of training one Random Forest per sample, we train a fixed number of random forests on spatially distinct set of points. The prediction is then a weighted average of the tree-wise predictions, weighted by the distance of the test sample from the centers of each tree (see figure below).
42 | 
43 | <img src="assets/sprf.png" width="250" />
44 | 
45 | #### Usage
46 | 
47 | We demonstrate the usage of the spatial Random Forests in the [demonstration notebook](sprf_demo.ipynb).
48 | 
49 | The usage is analogous to other scikit-learn models, except that the coordinates must also be given as input.
50 | ```
51 | from sprf import SpatialRandomForest
52 | spatial_rf = SpatialRandomForest()
53 | spatial_rf.fit(train_x, train_y, train_coords)
54 | test_pred = spatial_rf.predict(test_x, test_coords)
55 | ```
56 | 
57 | To train a Geographical Random Forest as proposed by Georganos et al, we provide the corresponding class which can be used in the same way:
58 | 
59 | ```
60 | from sprf import GeographicalRandomForest
61 | geo_rf = GeographicalRandomForest()
62 | geo_rf.fit(train_x, train_y, train_coords)
63 | test_pred = geo_rf.predict(test_x, test_coords)
64 | ```
65 | 
66 | 
67 | 
68 | ### Citation
69 | 
70 | If you use our work, please cite our paper with the following bibtex entry:
71 | 
72 | ```bib
73 | @inproceedings{wiedemann2023benchmarking,
74 |   title={Benchmarking regression models under spatial heterogeneity},
75 |   author={Wiedemann, Nina and Martin, Henry and Westerholt, René},
76 |   booktitle={12th International Conference on Geographic Information Science (GIScience 2023)},
77 |   year={2023},
78 | }
79 | ```
80 | 


--------------------------------------------------------------------------------
/assets/sprf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mie-lab/spatial_rf_python/20d88253883492d81e62ae550a01ff2b8aaeed26/assets/sprf.png


--------------------------------------------------------------------------------
/data/atlantic.csv:
--------------------------------------------------------------------------------
  1 | FIPS,x,y,Rate,POV,SMOK,PM25,NO2,SO2
  2 | 13111,1056523.936,1376613.19,72,15.92,27.93333333,11.75533333,0.977666667,0.064184954
  3 | 42115,1653442.064,2267301.476,59,12.22,26.96666667,9.026,1.499933333,0.03321098
  4 | 42075,1633708.275,2096683.059,61,8.986666667,25.27333333,11.96333333,3.616466667,0.120281334
  5 | 51683,1584048.852,1901443.401,62,7.86,22.9,12.73133333,3.621933333,0.118371127
  6 | 36057,1735811.102,2409536.493,59,14.74666667,27.18,8.302,1.632733333,0.006404368
  7 | 13149,1003646.751,1193901.75,86,17.50666667,30.21333333,12.27133333,1.6258,0.138780485
  8 | 37153,1463896.202,1452940.213,79,21.93333333,29.48,12.71066667,1.487333333,0.022304498
  9 | 51735,1716542.533,1743450.717,61,4.646666667,21.23333333,11.84333333,2.520266667,0.057279707
 10 | 37003,1320295.845,1533038.997,64,12.67333333,27.77333333,11.55666667,1.7928,0.090705483
 11 | 37063,1520852.145,1580156.875,65,14.81333333,19.58,12.35666667,1.937466667,0.06037865
 12 | 36093,1768117.621,2407434.527,62,11.24666667,23.78666667,8.767333333,2.0944,0.006131673
 13 | 51590,1463355.628,1632109.561,72,21.37333333,29.71333333,11.47266667,2.113266667,0.126120542
 14 | 51600,1597510.11,1916180.732,41,5.573333333,15.58,13.132,4.147,0.13029202
 15 | 51530,1445710.523,1759556.916,61,13.16666667,31.7,10.12466667,1.666666667,0.028030571
 16 | 37181,1555147.596,1623916.995,79,22.22,29.66,11.73133333,1.738866667,0.064398226
 17 | 13003,1240686.99,999295.872,93,24.16,28.28666667,11.87,0.636,0.007735251
 18 | 51081,1622534.834,1672340.575,78,20.35333333,28.42,11.92733333,1.584866667,0.043300666
 19 | 36015,1568845.615,2286060.403,73,14.36,26.11333333,9.176666667,1.640066667,0.041286954
 20 | 51790,1462544.758,1811357.668,63,13.55333333,25.05333333,10.766,1.5146,0.033238452
 21 | 37157,1434932.939,1605912.888,78,14.52666667,31.1,11.486,2.297333333,0.1052822
 22 | 13119,1161855.577,1335144.383,72,16.56666667,27.51333333,12.73133333,1.394466667,0.030687529
 23 | 42001,1581875.064,2029400.655,56,7.6,24.32,12.94466667,3.292466667,0.113010656
 24 | 51540,1514326.824,1806985.039,53,19.77333333,23.18666667,11.30533333,1.984533333,0.033495497
 25 | 24510,1644880.387,1977469.45,87,20.96,28.99333333,14.09,5.1886,0.146629183
 26 | 42105,1485798.53,2224296.623,68,13.48,27,9.63,1.641466667,0.056870652
 27 | 13173,1227378.725,968358.8725,89,21.00666667,28.51333333,11.602,0.651733333,0.007598222
 28 | 13169,1151573.414,1181611.119,62,11.68666667,24.82,12.27866667,1.468666667,0.139995631
 29 | 36109,1584749.021,2324857.448,52,15.81333333,16.21333333,9.181333333,1.777533333,0.032778433
 30 | 36053,1638111.937,2388843.96,67,10.93333333,24.82666667,8.768,1.846866667,0.013151125
 31 | 37007,1432790.709,1443984.99,66,20.93333333,28.37333333,12.72266667,1.685533333,0.029624066
 32 | 37195,1611858.913,1559545.423,71,19.19333333,25.69333333,13.01866667,1.695066667,0.017671372
 33 | 54077,1385413.786,1946823.68,62,16.44,28.66,10.95533333,2.3982,0.250782854
 34 | 51089,1420848.109,1636040.467,65,14.69333333,30.15333333,10.89533333,1.855,0.091236621
 35 | 51099,1622210.598,1854839.183,74,6.793333333,21.59333333,12.08266667,2.948666667,0.125150723
 36 | 42047,1431155.363,2177641.106,64,8.88,27.58666667,10.53466667,1.742266667,0.115719854
 37 | 42053,1381551.31,2178527.873,74,17.62666667,30,10.91066667,2.315866667,0.122241604
 38 | 13247,1099839.163,1245707.827,62,11.68666667,22.44,13.13333333,3.7826,0.135322848
 39 | 42079,1653351.821,2193500.035,65,12.32666667,28.84666667,9.622666667,2.4324,0.075286871
 40 | 36005,1834509.187,2196946.254,45,27.97333333,23.25333333,11.748,11.64453333,0.039877724
 41 | 42113,1604588.651,2214031.262,63,12.98,26.98666667,9.344,1.614866667,0.086849193
 42 | 42021,1444689.53,2074193.143,57,13.27333333,26.94666667,11.18733333,3.099533333,0.263529537
 43 | 45037,1285877.942,1284646.665,58,18.08,26.04,12.18666667,1.290266667,0.024738322
 44 | 42037,1622394.822,2172315.89,56,12.44666667,25.78,10.06,2.702,0.115677531
 45 | 51003,1508276.716,1804173.952,53,7.726666667,16.52666667,11.18733333,1.7494,0.033366864
 46 | 37151,1445280.416,1529734.553,71,13.18,28.74666667,12.40466667,2.2968,0.057280666
 47 | 36021,1816659.884,2353655.649,64,10.28,23.51333333,8.988,2.0608,0.010004799
 48 | 34023,1800141.087,2141559.363,53,7.126666667,18.71333333,11.63666667,6.869533333,0.050961628
 49 | 13175,1218893.132,1127443.529,70,20.53333333,25.94666667,12.17733333,0.846866667,0.030264103
 50 | 13133,1179492.3,1247682.041,60,20.29333333,25.23333333,12.434,1.344666667,0.101512916
 51 | 13017,1201807.39,1045952.93,96,23.05333333,28.68666667,12.47533333,0.646466667,0.01071437
 52 | 13277,1177578.936,1008758.602,70,21.71333333,26.18,12.75466667,0.709266667,0.008984232
 53 | 13065,1262954.262,959490.1139,104,23.44,29.1,11.29533333,0.5954,0.010513754
 54 | 51570,1623811.322,1739869.115,72,7.573333333,28.4,11.83066667,2.799066667,0.079022377
 55 | 37081,1439791.529,1570775.945,61,14.43333333,23.26666667,12.17266667,2.711333333,0.088706873
 56 | 51640,1330087.373,1619263.778,79,19.74666667,31.58,9.587333333,1.330133333,0.042831727
 57 | 51670,1631867.742,1744490.814,101,16.50666667,32.39333333,11.836,3.2466,0.08019077
 58 | 13023,1181765.025,1119115.174,85,17.57333333,27.9,12.23266667,0.881733333,0.038834418
 59 | 13029,1363361.497,1097965.828,92,10.56,23.74666667,11.306,0.949533333,0.010313017
 60 | 13109,1319776.971,1107445.306,75,23.98,28.52666667,11.85,0.826466667,0.011627792
 61 | 13171,1097548.858,1180372.835,80,16.28666667,28.16666667,12.32066667,1.6782,0.130991601
 62 | 13195,1167461.434,1307970.612,85,14.76,27.47333333,12.796,1.343933333,0.043572274
 63 | 51720,1176211.402,1625932.365,87,19.52666667,31.21333333,9.728666667,2.209,0.057382373
 64 | 51830,1683120.473,1752365.459,48,18.90666667,17.66666667,11.94733333,2.523466667,0.06950066
 65 | 36043,1681804.141,2456217.556,60,13.16,25.12,7.756666667,1.204733333,0.005746315
 66 | 36007,1643936.98,2304050.692,58,14.11333333,23.94666667,8.944,1.687333333,0.024850571
 67 | 51750,1353126.639,1674729.088,78,23.04,22.76666667,9.702,1.917866667,0.034742502
 68 | 54047,1253566.396,1687978.492,106,34.46666667,34.96,9.408666667,1.168266667,0.03828383
 69 | 54033,1329015.139,1916161.457,84,17.36666667,28,11.68333333,2.949733333,0.210726373
 70 | 54007,1310840.99,1846926.185,78,21.61333333,30.92666667,10.63066667,1.652,0.102185634
 71 | 13021,1142177.553,1155786.935,75,21.44,25.35333333,12.65533333,1.3396,0.100164584
 72 | 51550,1731568.728,1694759.535,72,8.193333333,21.58666667,10.29,2.3896,0.043557652
 73 | 36049,1634724.239,2487703.529,61,13.64,26.76666667,7.936666667,1.520533333,0.006190014
 74 | 37029,1746713.975,1664623.636,66,9.473333333,23.39333333,9.948,1.659933333,0.024237535
 75 | 42007,1305889.304,2071193.128,65,10.99333333,27.33333333,13.158,5.270933333,0.312501187
 76 | 13009,1179525.219,1190286.64,68,20.94666667,26.47333333,12.322,1.3204,0.1200595
 77 | 42025,1682297.897,2170140.233,68,10.06,29.44666667,9.887333333,3.412066667,0.072730595
 78 | 13177,1115419.382,1036903.959,77,9.926666667,24.68,12.45266667,0.884333333,0.012986966
 79 | 24011,1720531.915,1943797.185,80,11.59333333,25.52,12.92866667,4.189066667,0.08052073
 80 | 13083,949876.9767,1363028.669,90,14.02666667,26.56,12.678,2.0276,0.097772433
 81 | 13131,1119076.387,935895.365,77,22.2,27.09333333,11.876,0.645266667,0.00789093
 82 | 54089,1317582.555,1729447.166,70,24.83333333,30.33333333,9.448666667,1.3772,0.037281857
 83 | 13143,989780.5921,1248202.62,89,17,30.24666667,12.64266667,1.5934,0.204562146
 84 | 13227,1048750.481,1330599.247,74,11.52666667,26.40666667,12.84866667,1.5288,0.122754787
 85 | 42109,1574275.135,2130545.407,52,10.56,25.57333333,11.046,3.1288,0.144884251
 86 | 51690,1421799.935,1636136.484,80,20.12666667,28.96,10.94466667,1.848133333,0.090804627
 87 | 36017,1652705.228,2344047.232,63,14.2,26.08666667,8.732666667,1.443333333,0.016552842
 88 | 13275,1149153.54,938437.0275,78,20.18,25.81333333,11.886,0.669133333,0.007555336
 89 | 37041,1717104.377,1631166.367,69,18.54666667,26.00666667,10.55933333,1.296266667,0.020002733
 90 | 37169,1394188.124,1599626.409,77,11.90666667,30.93333333,11.198,1.993266667,0.087910654
 91 | 54069,1294167.135,2002786.432,76,15.28,28.36666667,13.43333333,4.545066667,0.406559049
 92 | 51775,1393922.749,1700093.235,71,8.753333333,26.52666667,10.322,2.206066667,0.034263399
 93 | 51630,1593702.528,1852059.96,69,14.9,24.49333333,12.024,2.721933333,0.106019507
 94 | 13097,1031382.337,1242709.45,73,10.76,24.77333333,13.25333333,2.909266667,0.206280792
 95 | 37069,1571748.504,1594889.666,72,14.1,26.08,12.15733333,1.711933333,0.038521283
 96 | 13139,1109248.869,1321624.535,62,13.34,24.87333333,13.088,1.556533333,0.054985529
 97 | 37149,1242895.468,1448685.554,53,12.36,23.83333333,11.25866667,1.763133333,0.059930607
 98 | 13243,1058282.459,1027981.069,69,27.06,27.09333333,11.98533333,0.720533333,0.013834109
 99 | 13283,1252655.898,1125410.716,86,26.58666667,29.05333333,12.074,0.699733333,0.019636071
100 | 37109,1323677.525,1484314.988,66,11.66666667,26.34666667,12.20333333,2.459466667,0.115437795
101 | 36035,1731906.788,2432891.232,65,14.28,27.58666667,8.024,1.350866667,0.005243562
102 | 36097,1553949.123,2311832.582,67,12.36666667,25.81333333,9.096666667,1.772,0.039672887
103 | 37077,1535005.921,1613210.202,76,13.83333333,27.14666667,11.71133333,1.764733333,0.075356169
104 | 51730,1625527.449,1733268.28,97,20.62,29.52666667,11.636,2.496133333,0.074446439
105 | 13101,1248348.27,934440.3294,73,23.71333333,28.58666667,11.18266667,0.6676,0.009702606
106 | 42009,1473184.117,2023798.399,52,11.92666667,26.82666667,11.02666667,2.842666667,0.158886417
107 | 13189,1243553.429,1245820.702,94,19.44666667,28.29333333,12.42133333,1.410066667,0.045470302
108 | 13081,1148323.713,1057203.733,78,27.50666667,27.83333333,12.63066667,0.806333333,0.014370813
109 | 51710,1730261.754,1719321.773,78,19.39333333,25.38666667,11.14066667,2.617333333,0.049621813
110 | 51065,1535867.509,1788758.043,55,6.973333333,23.19333333,11.23133333,1.841466667,0.041039072
111 | 37001,1474728.089,1572762.401,69,13.5,27,11.9,2.298466667,0.078810443
112 | 13179,1361780.107,1076726.915,76,17.19333333,25.43333333,11.11933333,0.8934,0.009774712
113 | 13239,1032302.684,1036603.891,93,23.98666667,28.82666667,12.018,0.760133333,0.015273258
114 | 51137,1549999.877,1837565.678,68,9.053333333,25.82666667,11.49733333,2.199933333,0.057854525
115 | 54017,1301653.502,1910050.185,76,19.32,31.52666667,11.968,3.158266667,0.223853046
116 | 13201,1068274.984,962213.1331,78,21.99333333,26.76666667,12.52266667,0.661933333,0.010339054
117 | 34011,1768774.33,2011919.092,70,15.23333333,28.11333333,12.91466667,3.487733333,0.045609201
118 | 13135,1095759.988,1279796.436,53,9.273333333,19.22666667,13.40666667,3.243666667,0.104596959
119 | 42019,1337590.339,2102567.407,59,8.333333333,23.4,12.604,4.383266667,0.286625125
120 | 36091,1776076.011,2442465.765,63,6.593333333,21.13333333,8.527333333,2.015266667,0.004390669
121 | 37107,1646302.997,1513043.27,73,20.23333333,27.35333333,12.118,1.4862,0.012877913
122 | 13215,1037083.188,1109400.014,74,17.92,24.95333333,12.68866667,0.9664,0.036799276
123 | 42077,1699112.311,2138770.953,56,10.36666667,24.28666667,11.43933333,4.451666667,0.072346667
124 | 13089,1079948.559,1256389.654,52,15.00666667,18.88,13.422,4.4912,0.140996438
125 | 51183,1642946.115,1704445.424,84,19.40666667,27.91333333,11.49266667,1.908266667,0.058643347
126 | 51840,1517023.255,1936981.425,78,13.45333333,27.10666667,11.83933333,2.989666667,0.081773593
127 | 37093,1509167.122,1462165.108,82,17.87333333,27.16666667,12.834,1.599666667,0.017354063
128 | 42083,1429885.045,2220922.964,73,14.33333333,28.52666667,10.05,1.466866667,0.071557137
129 | 45061,1434148.183,1352336.958,81,26.19333333,29.28666667,12.572,1.3402,0.038363087
130 | 45051,1552375.47,1345424.088,70,15.16,29.98666667,10.71666667,1.072,0.027848615
131 | 37031,1742993.493,1485548.212,75,12.74666667,26.84,8.127333333,0.82,0.007016134
132 | 45005,1354654.169,1206555.333,72,35.26,27.74666667,11.34533333,0.9478,0.02473639
133 | 37039,1075668.387,1409410.991,69,17.08666667,27.97333333,11.29533333,0.9582,0.047794317
134 | 36027,1820406.179,2298754.25,60,8.1,21.64,9.426,2.402666667,0.01841173
135 | 13223,1019493.25,1265907.639,74,7.593333333,25.84,13.292,2.622066667,0.220265044
136 | 13117,1082914.224,1307948.074,52,5.6,18.55333333,13.46266667,1.844266667,0.090544611
137 | 42131,1643088.775,2230410.228,64,10.86,26.07333333,9.138666667,2.1882,0.055272458
138 | 54075,1378205.378,1816237.449,74,17.68,28.20666667,9.215333333,0.9346,0.044613198
139 | 13053,1047514.925,1092319.577,88,18.64,20.62666667,12.27533333,0.788733333,0.028402561
140 | 54019,1291852.933,1767655.85,95,21.61333333,31.48666667,9.795333333,1.633333333,0.070849713
141 | 54101,1339894.375,1828331.821,89,27.11333333,31.50666667,9.648,1.1206,0.062105957
142 | 37185,1580830.177,1632431.425,61,22.88,27.20666667,11.71466667,1.617666667,0.045477709
143 | 13191,1375560.829,1041347.409,69,19.58666667,27.81333333,10.686,0.7158,0.01113753
144 | 13311,1111168.399,1358919.711,61,13.81333333,24.90666667,11.89466667,1.216533333,0.036456967
145 | 51153,1585461.893,1896563.543,62,5.58,20.53333333,12.45733333,3.242533333,0.109487893
146 | 37085,1534801.605,1506770.297,76,16.22,27.13333333,13.02066667,1.676866667,0.018872703
147 | 45067,1516215.834,1357000.88,79,24.65333333,28.66,11.63733333,1.219466667,0.028252022
148 | 36123,1530166.579,2334378.468,64,14.02,24.79333333,9.015333333,1.724466667,0.039520381
149 | 13267,1305712.571,1092823.631,92,25.6,28.5,11.97,0.7536,0.010500177
150 | 13321,1145798.959,1015128.398,71,19.61333333,27.99333333,12.538,0.7988,0.010234082
151 | 45035,1440375.919,1230614.934,64,11.23333333,24.40666667,10.73666667,1.241466667,0.044484932
152 | 36069,1509820.646,2355302.693,64,8.846666667,21.54,9.15,1.925866667,0.038785772
153 | 13057,1050761.13,1306115.374,61,6.9,22.27333333,13.41933333,2.4658,0.152438517
154 | 13161,1255596.036,1058523.507,99,20.23333333,27.54,12.16466667,0.671133333,0.009300602
155 | 13069,1239676.066,1027479.846,74,22.30666667,27.84666667,12.26266667,0.6426,0.008462469
156 | 36073,1427249.188,2384624.13,69,12.96,28.18,10.236,3.3304,0.044723983
157 | 36101,1515917.072,2289805.219,73,14.04,25.55333333,8.947333333,1.434266667,0.040500091
158 | 34021,1780222.646,2119053.231,54,9.12,19.90666667,11.80266667,5.281866667,0.055527646
159 | 13255,1081737.822,1199158.137,78,18.62,28.1,12.452,1.939,0.149703155
160 | 54051,1294707.942,1976028.343,74,15.62,30.70666667,13.07266667,4.182133333,0.386921674
161 | 54029,1290225.113,2050386.631,77,12.44,30.7,12.98533333,4.887533333,0.334911688
162 | 42133,1621663.153,2043017.974,59,8.046666667,24.26666667,12.976,3.753866667,0.127608038
163 | 37115,1186006.548,1505916.697,64,16.96666667,27.21333333,10.14666667,1.2564,0.045084359
164 | 37133,1676336.57,1460888.579,80,15.22,26.54,9.95,0.993533333,0.013750033
165 | 54013,1274478.73,1857390.209,87,22.74,32.95333333,11.51933333,2.135666667,0.184781843
166 | 51125,1484760.238,1773042.453,70,11.76,24.68,10.714,1.6068,0.029505235
167 | 51680,1466332.778,1725627.038,73,18.90666667,25.04,11.02066667,1.818466667,0.035563315
168 | 13281,1108099.219,1389069.294,61,13.9,24.78666667,11.24733333,0.8272,0.03193818
169 | 13225,1133468.486,1127832.389,90,21.42,25.57333333,12.34,1.1298,0.062417203
170 | 51063,1373699.002,1656374.609,49,12.24,26.48,9.65,1.364,0.049349769
171 | 36029,1397300.328,2323592.893,67,13.28,24.30666667,10.39933333,2.8934,0.05711464
172 | 13151,1091082.441,1221936.754,68,7.786666667,22.52666667,12.828,2.972,0.155174237
173 | 51520,1222120.615,1597141.253,94,18.8,28.81333333,10.36266667,2.103133333,0.045761823
174 | 54065,1501884.099,1978349.929,80,12.14,29.8,11.454,3.229733333,0.09698803
175 | 51103,1693664.731,1807551.068,66,13.46,23.46,11.41333333,1.771333333,0.056949109
176 | 36121,1439265.332,2324433.93,60,10.36,27.28666667,9.487333333,1.900466667,0.048157697
177 | 42081,1561950.648,2193635.395,61,12.75333333,26.18666667,9.878666667,2.324466667,0.097097508
178 | 51017,1406203.336,1789944.572,69,9.013333333,27.05333333,9.51,1.360866667,0.031966773
179 | 51131,1752689.964,1775435.961,94,19.72666667,25.65333333,10.22733333,2.277266667,0.035814662
180 | 37057,1407800.143,1532761.676,73,13.04666667,28.97333333,12.45466667,2.689266667,0.076664917
181 | 37155,1528493.411,1422787.013,78,27.67333333,31.03333333,12.62733333,1.4948,0.01660844
182 | 42097,1602057.96,2145493.006,63,12.42,28.08666667,10.73533333,2.723666667,0.147714748
183 | 42121,1341070.721,2158752.11,72,14.47333333,29.29333333,11.614,3.263333333,0.145555821
184 | 51197,1311583.651,1644628.34,72,13.60666667,29.03333333,9.462666667,1.188666667,0.030809531
185 | 54045,1221701.597,1734554.266,120,22.09333333,33.81333333,10.21066667,2.0086,0.088614108
186 | 13213,1018911.294,1363530.392,97,15.28,31.04666667,12.724,1.828133333,0.102093803
187 | 37013,1709880.255,1554745.801,77,18.38666667,25.44666667,10.264,1.271133333,0.009340984
188 | 37113,1132960.345,1418734.132,58,14.99333333,26.43333333,10.33933333,0.743533333,0.027987447
189 | 36009,1412014.758,2267732.697,64,14.83333333,27.32,9.815333333,1.8274,0.062639113
190 | 36055,1471985.02,2381307.728,58,13.00666667,21.5,9.776,3.0206,0.040782406
191 | 34015,1757950.477,2048967.108,74,6.8,24.17333333,12.68266667,5.456533333,0.053715548
192 | 13037,1073561.977,1003649.305,81,30.94,26.66,12.17266667,0.751066667,0.011407108
193 | 54039,1248201.79,1795739.978,87,14.71333333,28.16,11.28733333,2.838533333,0.173502213
194 | 11001,1620145.774,1926595.621,62,17.8,22.08,13.512,5.445133333,0.149480163
195 | 45085,1427140.308,1323124.199,68,17.94,26.52,12.34666667,1.3676,0.047499713
196 | 51043,1533134.273,1933160.16,68,7.1,23.06666667,12.00733333,3.052133333,0.084447852
197 | 54021,1295165.507,1869902.771,71,23.96666667,29.19333333,11.466,2.0594,0.154926879
198 | 34033,1743771.149,2031005.452,73,9.773333333,25.06666667,13.34266667,4.574466667,0.057401127
199 | 13193,1116496.715,1102091.278,73,27.00666667,28.44666667,12.25333333,1.0106,0.036778614
200 | 13263,1066361.408,1134167.661,84,20.86,26.7,12.35666667,0.9238,0.062907262
201 | 13093,1142014.884,1082724.612,54,25.5,28.05333333,12.43866667,0.8602,0.022368158
202 | 51133,1693700.158,1825328.974,63,12.88,24.08666667,11.51333333,1.921,0.070302289
203 | 37105,1505940.861,1513658.565,73,14.4,26.29333333,12.83333333,1.705,0.02582737
204 | 45027,1446754.817,1298002.946,71,24.02,26.77333333,11.794,1.274266667,0.053875999
205 | 37127,1600385.578,1587248.999,70,15.25333333,25.6,12.644,1.635666667,0.024683401
206 | 51620,1676303.202,1683927.061,73,18.78,26.34666667,11.38333333,1.793933333,0.04932482
207 | 51049,1545565.804,1752895.876,67,15.27333333,27.97333333,11.28133333,1.7732,0.049144742
208 | 51037,1519860.206,1691095.398,77,16.97333333,27.1,11.13466667,1.7392,0.063611694
209 | 51115,1710730.906,1777230.966,60,8.986666667,24.13333333,11.85933333,1.879933333,0.054094095
210 | 51119,1686849.317,1794380.822,75,12.44,24.02,11.59866667,2.022266667,0.070397411
211 | 51015,1456078.917,1810843.496,63,8.226666667,25.95333333,10.19066667,1.487066667,0.0339999
212 | 54003,1523121.264,1971372.329,85,11.32,29.97333333,12.18866667,3.4102,0.090112598
213 | 37163,1586724.023,1473472.829,71,19.37333333,27.38,12.51066667,1.355133333,0.016449593
214 | 51117,1552612.154,1659453.181,74,16.14666667,28.27333333,11.50466667,1.677533333,0.071183425
215 | 37035,1321432.026,1503977.361,69,12.14,25.78,11.99,2.247333333,0.108931048
216 | 13049,1318756.006,952508.0788,83,23.02666667,30.75333333,10.73533333,0.753,0.023360332
217 | 37191,1611551.667,1520555.882,76,17.38666667,26.9,12.89266667,1.736933333,0.014442434
218 | 36023,1613685.107,2347274.963,65,14.45333333,24.92,8.912,1.686666667,0.02249919
219 | 37059,1376002.495,1542989.916,60,10.05333333,25.97333333,12.12933333,2.326533333,0.09012095
220 | 13291,1086530.75,1376942.086,62,14.79333333,25.21333333,11.34733333,0.900933333,0.043418764
221 | 36037,1435433.15,2357644.153,67,10.15333333,25.44,10.012,2.558533333,0.045821095
222 | 51159,1666241.838,1825977.972,68,17.26666667,28.79333333,11.484,2.175866667,0.08299332
223 | 13115,983511.4296,1300252.571,81,17.04,26.82666667,13.256,2.2278,0.189847276
224 | 45009,1378521.094,1235921.641,65,25.44,25.5,11.29066667,1.0986,0.034552977
225 | 42023,1467229.207,2185659.851,63,11.69333333,28.05333333,10.09733333,1.740466667,0.091801556
226 | 24009,1666038.768,1894678.827,68,5.46,22.88666667,12.58533333,2.8886,0.133364234
227 | 51127,1652922.898,1773161.116,80,5.626666667,24.48,11.34733333,2.578533333,0.083857622
228 | 13055,968986.5888,1322457.969,87,17.49333333,30.50666667,13.202,1.927666667,0.15694681
229 | 13241,1138683.737,1389143.35,68,14.74,24.92,11.10266667,0.7984,0.026398006
230 | 42117,1537131.813,2237598.109,60,13.80666667,25.39333333,9.308666667,1.612666667,0.05207866
231 | 45021,1295852.481,1430438.159,79,17.1,30.26666667,12.254,1.971,0.074208924
232 | 51149,1640404.662,1734227.045,67,8.906666667,24.88666667,11.55133333,2.593866667,0.074335136
233 | 45089,1492001.499,1300584.526,73,28.44666667,26.08666667,10.87133333,1.212133333,0.050556954
234 | 45003,1319815.773,1263983.766,68,15.08,25.48666667,11.71933333,1.271333333,0.026713313
235 | 13145,1031170.742,1134100.571,60,9.393333333,21.38,12.39666667,0.952266667,0.057190997
236 | 42049,1307535.891,2220156.836,69,14.02666667,27.22666667,11.41266667,2.5386,0.078515267
237 | 36067,1594290.542,2390104.004,66,12.77333333,23.46,9.342,2.217066667,0.019415654
238 | 37131,1642325.495,1646805.024,64,23.24666667,27.13333333,11.928,1.4642,0.0309088
239 | 51036,1650666.498,1755747.35,73,10.53333333,27.4,11.52333333,2.8706,0.080230738
240 | 51067,1414401.142,1670082.516,55,11.74666667,26.65333333,10.43333333,1.821,0.06122508
241 | 34019,1755884.159,2146097.44,46,3.413333333,16.98,10.99133333,4.996733333,0.063308726
242 | 13159,1135652.349,1212270.721,68,15.75333333,26.81333333,12.39266667,1.7292,0.167261261
243 | 37173,1121743.842,1455205.271,75,17.30666667,30.68,10.22066667,0.779933333,0.038159153
244 | 37187,1727246.941,1596012.316,69,22.26,27.06,9.89,1.518933333,0.012975895
245 | 13015,1017773.77,1301385.213,86,12.5,27.63333333,13.48733333,2.540066667,0.196600637
246 | 51071,1337018.762,1693779.243,83,11.37333333,27.88,9.333333333,1.865133333,0.03055773
247 | 51740,1723227.177,1712308.886,81,16.80666667,26.14666667,11.14866667,2.5748,0.051780903
248 | 24005,1639584.197,1994032.373,63,7.753333333,22.46,13.484,4.4376,0.135596137
249 | 24015,1694933.488,2018400.3,79,8.46,26.07333333,13.76,4.755933333,0.089461949
250 | 37079,1637921.407,1539543.085,68,20.49333333,26.16,12.72733333,1.618466667,0.013556089
251 | 24019,1713827.547,1896169.761,77,14.64,26.37333333,12.348,3.3664,0.071502067
252 | 37111,1247170.21,1494736.273,70,14.94,29.22666667,10.408,1.6632,0.06043457
253 | 13163,1256171.786,1199257.299,77,23.84,27.06,12.14933333,1.0764,0.040545905
254 | 13289,1169114.232,1143614.233,73,19.92,28.86,12.18733333,1.0758,0.070710838
255 | 51107,1563959.503,1936625.552,50,3.26,14.90666667,12.896,3.701533333,0.102731287
256 | 37139,1742060.758,1653103.173,74,18.08666667,26.09333333,10.12533333,1.4822,0.024836003
257 | 37005,1314686.836,1596855.37,63,17.64,28.08666667,9.848666667,1.2924,0.044066831
258 | 54009,1294492.84,2022929.839,76,12.44666667,30.58666667,13.258,4.1472,0.391901706
259 | 37067,1397694.609,1569407.773,67,14.14666667,25.45333333,12.11466667,2.743333333,0.090139443
260 | 45063,1346738.572,1308601.146,66,11.12666667,24.78666667,12.044,1.4006,0.032624567
261 | 10001,1737530.538,1972072.594,74,11.93333333,26.07333333,13.42266667,3.421533333,0.06929497
262 | 24027,1619605.459,1966057.211,42,4.446666667,13.88,13.384,4.7924,0.140965119
263 | 24029,1694052.145,1981950.218,65,11.78,22.96,13.53266667,4.581533333,0.102078797
264 | 24031,1599218.37,1948856.135,44,5.82,13.51333333,13.38066667,4.832066667,0.130191437
265 | 37091,1679783.717,1647579.887,70,23.35333333,26.52,11.362,1.4432,0.030431981
266 | 36003,1464342.964,2278548.815,67,16.50666667,24.69333333,9.233333333,1.309066667,0.044734191
267 | 37061,1626897.935,1474774.311,65,20.34,27.73333333,11.69,1.2014,0.016240834
268 | 13121,1057680.928,1255772.741,58,16.11333333,18.88666667,13.46466667,3.747733333,0.166282138
269 | 54001,1363554.584,1904822.789,72,20.64,29.07333333,10.59466667,2.035666667,0.16523128
270 | 51111,1557688.929,1690682.635,68,19.68666667,29.47333333,11.22666667,1.625133333,0.057253665
271 | 51169,1181659.323,1602179.406,84,16.82,29.69333333,10.52666667,2.512133333,0.059581884
272 | 24037,1668389.554,1867466.358,70,7.686666667,22.06666667,12.39,2.421866667,0.101043297
273 | 45077,1199434.591,1398170.842,64,14.14,24.58666667,11.932,1.527933333,0.027727574
274 | 45087,1301978.779,1390787.374,79,16.78,30.08666667,12.28133333,1.3432,0.041707734
275 | 45083,1264596.645,1412493.179,73,14.05333333,26.68666667,12.328,1.836066667,0.051226487
276 | 45013,1422969.875,1149284.841,50,11.61333333,21.56666667,10.86,0.9116,0.015457588
277 | 45053,1394211.904,1150416.926,60,23.18666667,27.96,10.896,0.962333333,0.016498578
278 | 37051,1545014.124,1472239.09,76,16.4,25.54,13.02733333,1.6628,0.016146991
279 | 10003,1719077.989,2024828.341,66,9.393333333,23.24,14.13333333,4.9514,0.068458668
280 | 13077,1036448.997,1204099.492,66,9.92,23.62666667,12.62466667,1.9452,0.160924834
281 | 13233,989224.3021,1271527.442,100,17.14666667,29.36,12.866,1.945133333,0.211033087
282 | 24003,1652614.186,1944776.741,68,5.713333333,21.74,13.526,4.5748,0.141986298
283 | 37087,1166148.013,1468980.246,61,14.2,28.46666667,9.826666667,1.024666667,0.036620953
284 | 54087,1257108.312,1839934.89,82,21.40666667,33.21333333,11.66733333,2.575266667,0.21716079
285 | 37197,1361162.478,1566766.534,69,12.86666667,29.06,11.572,2.0944,0.083312892
286 | 10005,1762733.34,1928788.66,69,11.62666667,27.42,12.29266667,3.601333333,0.057635935
287 | 37089,1214308.512,1450981.49,56,12.14666667,24.26666667,10.62333333,1.5424,0.045088568
288 | 13157,1134916.515,1304291.6,83,13.32,26.22,13.05333333,1.7394,0.057007757
289 | 37161,1263152.818,1465512.044,69,16.99333333,28.60666667,11.21066667,1.900533333,0.075897961
290 | 51141,1385083.588,1629473.103,65,15.12666667,29.87333333,10.398,1.783533333,0.070270986
291 | 42067,1552118.755,2098795.276,51,9.493333333,26.25333333,11.24066667,3.056866667,0.106863053
292 | 42071,1658497.002,2064716.084,52,8.76,23.22666667,12.81133333,4.389,0.108056667
293 | 45019,1486274.334,1210584.557,62,15.84666667,23.36666667,9.590666667,1.022133333,0.027919816
294 | 37193,1316547.473,1564807.212,68,16.15333333,28.67333333,10.69733333,1.5656,0.06409672
295 | 13141,1199327.046,1215604.883,72,28.94,27.47333333,12.25066667,1.3462,0.101570204
296 | 13011,1137907.672,1329501.639,60,14.30666667,27.9,12.83866667,1.4372,0.037617526
297 | 37177,1759744.963,1602276.313,70,26.16,29.27333333,8.868,1.401666667,0.011368369
298 | 42085,1302019.114,2140915.863,62,13,27.64,12.19066667,3.375133333,0.148678507
299 | 42087,1532755.461,2104065.458,63,13.76,28.11333333,11.04066667,2.893466667,0.10175133
300 | 42027,1509494.377,2134790.064,49,14.48666667,18.35333333,10.54133333,2.778333333,0.115801529
301 | 42029,1701412.133,2065940.824,54,5.72,18.22666667,13.038,5.0668,0.075305733
302 | 45033,1508707.785,1391005.584,94,25.06666667,30.48,12.386,1.3276,0.019207342
303 | 42031,1372665.672,2140579.475,58,14.4,25.1,11.74466667,3.210266667,0.211933187
304 | 45039,1351833.839,1365215.886,76,19.30666667,27.66,12.394,1.2808,0.03338413
305 | 13013,1123623.574,1286900.033,82,11.34,27.31333333,13.074,2.222866667,0.079718502
306 | 51147,1534615.893,1718046.143,70,20.8,25.88,11.206,1.9544,0.047593932
307 | 42093,1602149.321,2165666.682,54,9.88,27.02666667,10.764,3.075933333,0.140087713
308 | 42095,1718996.522,2159377.207,58,8.12,24.1,11.07466667,4.218066667,0.066797708
309 | 37175,1187967.651,1432054.99,50,13.48,24.61333333,10.37066667,1.1282,0.031857878
310 | 51077,1303279.472,1613812.512,74,15.95333333,29.81333333,9.492,1.106466667,0.035797871
311 | 24033,1636048.467,1921250.675,44,8.4,19.88666667,12.94933333,5.0034,0.153599393
312 | 34007,1770779.48,2061835.737,67,11.20666667,24.21333333,12.352,6.069333333,0.051329366
313 | 37199,1220636.087,1515441.143,59,17.25333333,27.62,9.689333333,1.147133333,0.045491152
314 | 13245,1282819.849,1237590.181,83,21.76,26.13333333,12.23933333,1.5142,0.02978399
315 | 13265,1205964.13,1249909.645,68,26.33333333,29.1,12.34733333,1.231133333,0.072741572
316 | 13279,1279101,1097466.725,96,24.2,26.5,12.12866667,0.727733333,0.011724619
317 | 36013,1356925.681,2255464.747,60,16.2,26.81333333,10.80266667,2.7216,0.07880145
318 | 37171,1354669.092,1594547.909,77,15.75333333,29.58,10.79533333,1.801666667,0.068460218
319 | 42043,1606103.319,2096485.218,61,10.52,23.40666667,11.74733333,3.305466667,0.133408198
320 | 51011,1499577.028,1728467.534,65,12.92666667,27.3,10.992,1.869533333,0.038460886
321 | 37009,1282966.394,1585422.594,60,16.16666667,27.18,9.619333333,1.268333333,0.038737395
322 | 51033,1611502.058,1824508.555,80,10.58,26.24,11.56733333,2.698,0.099649543
323 | 36047,1833913.467,2172024.905,42,22.83333333,21.98,12.294,9.603133333,0.038231252
324 | 51173,1272812.077,1630310.37,88,15.79333333,29.8,9.416,1.043533333,0.032801523
325 | 45059,1270662.378,1362776.656,75,17.32666667,28.74,12.48733333,1.290666667,0.025618834
326 | 51101,1640681.282,1793703.591,77,7.086666667,25.39333333,11.57666667,2.6488,0.091064661
327 | 45075,1397734.609,1264367.994,63,22.82666667,25.24,11.50133333,1.3372,0.045034246
328 | 51059,1599741.123,1914646.415,41,5.206666667,15.58,13.124,4.197266667,0.132658801
329 | 37023,1276514.134,1506834.895,71,15.09333333,29.36666667,10.76,1.639733333,0.076762768
330 | 51177,1581708.461,1836635.824,69,6.286666667,22.98666667,11.66866667,2.361,0.084119819
331 | 24041,1700070.455,1927775.068,52,8.58,21.46,13.02,3.826066667,0.091707227
332 | 42057,1506294.912,2020785.076,61,10.59333333,26.62666667,11.404,3.006933333,0.107416161
333 | 37159,1382960.571,1511317.321,72,14.22666667,27.73333333,12.72866667,2.4164,0.085937188
334 | 37117,1680413.528,1588520.683,79,20.81333333,25.64666667,11.318,1.585666667,0.014973045
335 | 13293,1085492.191,1156919.191,73,18.34,29.43333333,12.21066667,1.129666667,0.094517932
336 | 13313,998958.9498,1363198.725,79,14.55333333,27.76,13.13866667,2.328133333,0.111123155
337 | 51179,1593460.956,1865910.319,68,4.833333333,20.14,12.20866667,2.85,0.112266356
338 | 37027,1287060,1531518.525,80,14.56666667,29.48,10.84733333,1.598333333,0.069409705
339 | 42119,1570613.582,2151895.609,43,11.78,24.94666667,10.69266667,2.865866667,0.139606984
340 | 36083,1814336.67,2406093.9,74,10.66,23.66,8.708666667,2.2682,0.005980841
341 | 37045,1296774.667,1462918.922,72,17,27.89333333,12.01733333,2.031333333,0.098139093
342 | 24045,1750040.409,1892975.815,81,13.31333333,24.7,11.58533333,3.467466667,0.056606171
343 | 37119,1362697.348,1463374.166,56,12.08,19.47333333,13.03333333,3.1472,0.086533074
344 | 13297,1124773.134,1263220.213,68,11.93333333,26.28666667,12.866,2.137866667,0.108604907
345 | 42123,1372531.962,2211179.994,62,11.65333333,27.86,10.67733333,2.4872,0.087274143
346 | 34039,1803087.26,2167525.559,50,9.12,20.5,12.02733333,9.225,0.049706539
347 | 13307,1073563.458,1061689.581,66,19.30666667,26.8,12.05666667,0.7182,0.017903297
348 | 13309,1242618.537,1091702.72,67,31.72666667,28.05333333,12.11666667,0.746933333,0.013975894
349 | 42003,1340289.628,2052713.595,67,11.39333333,25.3,13.14933333,5.3952,0.405722665
350 | 42129,1385974.15,2042662.979,60,9.586666667,25.29333333,12.426,3.954466667,0.390517557
351 | 13085,1075715.83,1331660.446,74,10.51333333,25.73333333,12.78533333,1.252933333,0.081379445
352 | 13063,1071151.413,1229322.893,69,16.13333333,24.11333333,13.236,3.692933333,0.1628903
353 | 37141,1638010.356,1430021.552,69,15.14,27.39333333,10.21266667,0.896866667,0.020219009
354 | 37129,1646097.352,1398296.877,66,14.06666667,24.50666667,9.52,0.9528,0.0197437
355 | 24039,1744615.768,1862073.288,87,21.86,27.29333333,11.44866667,2.336866667,0.04860733
356 | 13167,1239454.726,1157124.13,68,26.93333333,28.34,12.08333333,0.851466667,0.036318168
357 | 36117,1524638.913,2392871.668,64,10.37333333,25.34666667,9.677333333,2.417866667,0.032075248
358 | 34005,1793128.135,2075366.679,60,5.54,21.64,11.64,4.755933333,0.045815574
359 | 37053,1763878.691,1670135.135,85,10.44666667,26.93333333,9.660666667,1.611266667,0.022068544
360 | 45091,1336110.314,1428342.555,71,11.68,25.60666667,12.576,1.9606,0.070648188
361 | 45065,1252619.994,1294113.473,57,18.9,26.20666667,12.47066667,1.146533333,0.02755893
362 | 45081,1303698.653,1313763.029,57,16.90666667,26.86666667,12.21933333,1.081466667,0.023179284
363 | 37123,1443588.974,1486516.397,63,18.66666667,27.22,12.53733333,1.7368,0.035944271
364 | 36039,1776833.617,2347254.627,75,13.5,26.41333333,8.798,2.173866667,0.011101784
365 | 45073,1170904.671,1378993.2,61,13.52,25.98,12.01666667,1.133533333,0.024534528
366 | 34041,1741874.528,2176288.673,61,6.246666667,22.55333333,10.152,4.036666667,0.060948017
367 | 42005,1376413.78,2098078.46,61,12.14666667,27.58666667,12.25,4.1228,0.345467193
368 | 42035,1517690.196,2172273.438,69,13.89333333,26.25333333,10.08266667,2.2874,0.092003654
369 | 13095,1111782.166,1008830.392,76,25.54,24.98,12.48666667,0.8886,0.01055159
370 | 36115,1804774.034,2472917.923,71,11.82,26.26666667,8.425333333,1.563866667,0.003323526
371 | 13269,1094442.998,1121507.842,72,24.24666667,28.86,12.34,0.960533333,0.052555854
372 | 13067,1045720.264,1271398.865,56,9.793333333,18.76666667,13.56533333,3.912933333,0.192186332
373 | 54037,1540247.38,1956911.497,79,9.606666667,25.58666667,12.47266667,3.5824,0.090802669
374 | 51035,1344969.201,1628917.501,66,15.07333333,29.30666667,9.627333333,1.3884,0.047610201
375 | 13251,1335318.349,1176789.814,81,21.18666667,26.57333333,11.52666667,0.740666667,0.019671816
376 | 36033,1703903.787,2595627.355,70,16.21333333,27.16666667,7.441333333,0.791,0.001725461
377 | 36051,1474589.851,2334175.807,67,11.57333333,23.93333333,9.173333333,1.775866667,0.041916752
378 | 54099,1174285.609,1763155.466,87,18.98666667,31.59333333,10.934,2.315333333,0.156098409
379 | 51027,1222296.597,1670564.647,77,22.90666667,31.51333333,9.558,1.386933333,0.046099452
380 | 45031,1457757.858,1375458.332,80,20.59333333,27.84,12.722,1.509866667,0.028191081
381 | 45079,1378047.721,1327144.98,65,14.46666667,21.80666667,12.47133333,1.593133333,0.040118978
382 | 51061,1556868.479,1895059.369,64,5.873333333,21.62666667,11.91466667,2.565466667,0.088261504
383 | 54057,1447341.374,1951614.004,73,14.74,27.72,10.71733333,2.6718,0.144306704
384 | 54059,1206178.921,1720396.176,117,24.92,33.39333333,10.17466667,2.023866667,0.074718423
385 | 54109,1258731,1714945.02,96,22.81333333,33.22,9.568666667,1.483133333,0.047587327
386 | 24047,1778263.316,1880783.857,68,10.22,23.66666667,11.25266667,2.643733333,0.039584709
387 | 45069,1477747.456,1409462.874,111,25.05333333,31.22,12.71,1.441866667,0.019776288
388 | 51075,1569406.987,1781445.46,60,7.006666667,22.2,11.478,2.084866667,0.064823311
389 | 51135,1569996.505,1715378.669,79,18.91333333,30.18666667,11.32,1.9602,0.058551801
390 | 54031,1462694.697,1908032.105,59,13.50666667,29.97333333,10.206,1.935066667,0.090330269
391 | 37143,1730519.803,1640428.514,61,17.11333333,26.19333333,10.14466667,1.243466667,0.02090575
392 | 45045,1231184.545,1403328.199,63,12.87333333,23.2,12.33666667,1.697266667,0.035648661
393 | 13087,1086388.993,932261.3744,87,24.13333333,26.85333333,11.91733333,0.6314,0.009050791
394 | 13205,1118028.341,975040.5711,79,25.48,28.46,12.46266667,0.745933333,0.008809128
395 | 13235,1171003.423,1094923.931,66,19.76,28.52666667,12.42933333,0.813333333,0.025316279
396 | 54027,1476981.123,1945927.229,77,15.80666667,30.44666667,10.56866667,2.5572,0.103569945
397 | 54055,1300125.813,1698264.065,82,20.34666667,30.86,9.208666667,1.259333333,0.032377277
398 | 54063,1345865.977,1723197.083,72,15.92666667,28.76666667,9.285333333,1.546933333,0.031847434
399 | 54067,1311268.885,1800627.417,82,19.15333333,29.52,9.838,1.2936,0.074269695
400 | 54079,1212726.84,1809882.281,73,9.846666667,25.23333333,11.83666667,3.134333333,0.271460418
401 | 51193,1656055.089,1843192.977,81,14.44,27.42,11.804,2.3918,0.113077794
402 | 51005,1388452.804,1756142.049,76,11.42666667,26.75333333,9.616,1.575733333,0.030462425
403 | 34013,1804735.869,2182643.518,55,14.73333333,21.47333333,11.86933333,10.5624,0.046986706
404 | 13033,1294398.008,1205638.256,81,25.21333333,27.76666667,11.84,1.0434,0.026772439
405 | 51001,1763272.516,1826266.62,89,17.81333333,26.00666667,10.694,2.126,0.033603465
406 | 13031,1329138.939,1135899.882,67,23.32666667,22.45333333,11.72133333,0.881133333,0.014463077
407 | 13165,1302225.403,1176479.14,80,26.43333333,27.55333333,11.828,0.768333333,0.021431232
408 | 37101,1576397.465,1531400.34,75,13.52666667,25.89333333,13.13266667,2.077133333,0.018123846
409 | 51105,1135752.782,1594803.167,98,23.91333333,30,10.47066667,1.666066667,0.063224455
410 | 36103,1916558.965,2219152.491,60,6.493333333,21.82666667,10.46666667,5.666066667,0.023758802
411 | 51019,1439176.423,1711113.406,61,7.98,25.4,10.64733333,1.9292,0.040129713
412 | 51047,1549846.101,1864900.94,69,9.38,24.64666667,11.658,2.259266667,0.068956104
413 | 13103,1366794.276,1138324.102,81,10.22,25.72,11.286,0.987066667,0.014068462
414 | 54083,1381105.214,1867124.342,64,18.00666667,28.47333333,9.473333333,1.143666667,0.08417722
415 | 51085,1604974.703,1792754.047,69,4.873333333,21.92,11.78133333,2.726933333,0.084289716
416 | 51069,1508841.421,1939047.688,72,7.12,25.38,11.472,2.8908,0.083405897
417 | 51195,1175972.341,1630930.847,102,20.78,30.29333333,9.794666667,2.1212,0.055951526
418 | 24017,1631028.737,1883315.978,70,6.526666667,21.62,12.286,3.227333333,0.126469912
419 | 36031,1757119.24,2553752.167,63,12.54666667,22.38,7.366,1.0304,0.001890423
420 | 51145,1573084.275,1762556.11,68,5.98,24.41333333,11.46666667,2.052466667,0.069999768
421 | 51760,1611344.221,1767745.306,84,21.63333333,24.86,12.282,3.314333333,0.086532211
422 | 36099,1549405.809,2355026.711,63,12.32,26.28666667,9.496666667,1.980933333,0.034136129
423 | 13045,1004456.194,1226100.569,77,15.37333333,26.64666667,12.57666667,1.8114,0.186850758
424 | 51073,1694092.154,1771191.17,84,9.066666667,25.02666667,11.782,1.877066667,0.063584232
425 | 13059,1155637.238,1286419.257,61,26.07333333,19.08666667,12.91666667,1.7166,0.069270951
426 | 51083,1500908.066,1659731.3,71,17.28,27.89333333,11.26266667,1.963,0.105735036
427 | 13061,1039034.904,1010413.014,66,30.28666667,26.92,11.95466667,0.6786,0.012870123
428 | 42125,1323351.94,2018108.583,70,10.11333333,26.47333333,13.104,4.348266667,0.420846544
429 | 45055,1400663.565,1366623.446,79,13.8,27.02666667,12.73333333,1.365133333,0.036224219
430 | 34009,1800201.815,1993222.583,71,9.826666667,25.06,12.22666667,2.242533333,0.039603257
431 | 54081,1281918.487,1736878.053,79,17.97333333,30.80666667,9.546666667,1.510933333,0.053551753
432 | 13073,1262487.034,1255436.349,63,7.14,20.92666667,12.438,1.550733333,0.033392771
433 | 13091,1200452.766,1092122.291,81,21.46,27.88666667,12.30533333,0.786866667,0.020215714
434 | 54053,1198413.316,1837359.461,93,18.16,31.96666667,12.19533333,3.346466667,0.328547367
435 | 37075,1093298.55,1436078.013,67,18.91333333,29.88,10.6,0.8758,0.044359369
436 | 42051,1378511.3,1996883.489,71,17.74,28.8,12.122,3.289533333,0.363875318
437 | 51041,1605034.702,1749188.255,64,6.053333333,21.03333333,11.846,2.791733333,0.08104605
438 | 51139,1502186.462,1871116.694,66,12.76666667,29.88,10.93133333,1.3806,0.052210197
439 | 51161,1393012.024,1697984.566,54,6.186666667,21.98,10.02266667,2.14,0.035611585
440 | 13099,1049875.392,977774.6751,76,27.16,26.52,12.31,0.650466667,0.011437491
441 | 51155,1340728.098,1665907.142,74,13.79333333,29.42,9.498666667,1.443733333,0.03471435
442 | 54073,1261607.333,1915191.247,81,13.41333333,31.09333333,12.59733333,3.822,0.309699136
443 | 51171,1489940.523,1896026.698,68,9.493333333,26.19333333,10.92333333,1.608,0.067379625
444 | 37167,1413285.618,1479056.188,70,13.29333333,26.78,12.71933333,2.1168,0.049407271
445 | 51185,1265973.084,1661117.007,85,16.86,29.04,9.216666667,1.150133333,0.033583226
446 | 13187,1089047.823,1347799.341,72,14.44666667,25.31333333,12.28666667,1.1558,0.054792182
447 | 13147,1186214.801,1335649.717,64,17.22666667,26.82666667,12.77933333,1.248866667,0.027080393
448 | 54107,1234436.881,1892845.955,77,15.5,30.1,12.86466667,4.1584,0.334966519
449 | 54041,1323938.678,1882650.824,87,18.65333333,30.56666667,11.062,2.1718,0.135115377
450 | 51175,1660955.711,1685061.662,73,14.96666667,26.34666667,11.58933333,1.6544,0.048127204
451 | 13027,1181516.343,940200.0498,76,23.58666667,26.68,11.74533333,0.6898,0.007454007
452 | 13127,1367612.98,1010378.056,75,16.02,24.20666667,10.648,0.675533333,0.016681435
453 | 13153,1150012.808,1117688.327,70,12.15333333,24.45333333,12.38533333,0.939,0.047824388
454 | 13181,1241432.538,1280516.511,76,18.13333333,27.74666667,12.44933333,1.265266667,0.034317774
455 | 54005,1237814.756,1758586.578,117,19.40666667,33.33333333,10.39666667,2.077866667,0.114510997
456 | 54035,1227296.869,1849010.747,79,15.84666667,28.86666667,12.198,3.2112,0.307647429
457 | 24023,1417357.194,1959178.546,49,13.27333333,22.78666667,10.73266667,2.2226,0.208542187
458 | 13207,1118726.876,1176047.921,62,12.60666667,25.11333333,12.358,1.692866667,0.136457909
459 | 13219,1151027.256,1272737.377,50,7.32,17.78666667,12.76333333,1.8114,0.088119263
460 | 13221,1182710.86,1282146.612,68,14.14666667,26.14666667,12.57066667,1.296066667,0.06162576
461 | 13319,1190786.057,1161735.547,71,18.15333333,26.35333333,12.34666667,1.0366,0.076514197
462 | 54023,1432264.469,1913616.709,51,15.18666667,28.70666667,10.108,1.8462,0.127781155
463 | 54093,1400892.182,1909096.143,64,16.08,27.87333333,9.789333333,1.548333333,0.151042948
464 | 36059,1859870.436,2189947.365,47,5.726666667,18.82,11.05133333,13.0592,0.034188713
465 | 36075,1588690.728,2436988.083,79,14.70666667,28.18,9.054,2.1862,0.014027634
466 | 13259,1046643.494,1062076.282,71,28.12,27.22,12.11466667,0.7876,0.01841575
467 | 37073,1702334.911,1662006.436,71,15.38666667,25.74,10.83866667,1.512466667,0.035770505
468 | 51199,1696239.433,1752034.183,68,4.8,18.16,12.08266667,2.2666,0.064840601
469 | 36079,1828676.407,2261806.233,61,5,19.14,9.526,3.034666667,0.027107157
470 | 51820,1477903.628,1803796.554,63,13.74666667,25.95333333,10.62266667,1.556733333,0.03125429
471 | 36063,1387460.155,2371359.136,72,12.09333333,27.27333333,10.92933333,3.77,0.051687158
472 | 13007,1093091.067,983304.1776,63,24.60666667,26.10666667,12.48733333,0.764533333,0.009830176
473 | 24021,1575637.407,1982508.773,55,5.346666667,19.72666667,13.25266667,3.657733333,0.108542513
474 | 36089,1647135.996,2571722.277,77,16.67333333,25.84,7.974666667,0.986066667,0.00309673
475 | 13271,1225504.006,1068331.624,73,28.98666667,28.64666667,12.23133333,0.654,0.011615129
476 | 13229,1302309.007,1014963.543,92,18.44666667,28.42,11.52666667,0.6454,0.010584248
477 | 45041,1486637.477,1345492.052,77,18.07333333,26.13333333,11.896,1.3086,0.035607105
478 | 37125,1481683.846,1490671.681,61,12.48,23.59333333,12.69466667,1.642733333,0.025624934
479 | 42065,1408466.268,2139659.345,58,13.13333333,27.06666667,11.41466667,2.764733333,0.210871695
480 | 34027,1778629.855,2185127.613,49,4.006666667,16.96,10.40866667,6.314666667,0.052155983
481 | 37019,1617731.315,1374464.226,68,14.13333333,29.45333333,9.852666667,0.791066667,0.019846569
482 | 37083,1623563.726,1624783.652,71,24.41333333,27.96,12.17866667,1.655333333,0.027546562
483 | 13075,1191026.254,976480.7218,81,21.28,28.88,12.46533333,0.680466667,0.007717036
484 | 45029,1420276.636,1202878.044,81,21.50666667,27.52,10.67533333,1.0124,0.032097859
485 | 45043,1531529.213,1286308.015,64,17.56,26.1,9.911333333,1.066133333,0.04238067
486 | 37033,1473661.73,1612333.892,73,16.92,29.27333333,11.45733333,2.2072,0.114894069
487 | 37049,1697494.859,1510019.832,72,14.31333333,24.64,9.945333333,1.350933333,0.009910888
488 | 13211,1149571.168,1245032.103,60,13.48666667,23.52666667,12.62866667,1.632733333,0.128666985
489 | 42101,1751441.689,2080937.922,80,22.82,29.42,13.31,6.893,0.058032971
490 | 36085,1817579.991,2161994.379,58,10.34666667,23.70666667,11.70666667,9.3702,0.032904442
491 | 34003,1814401.264,2204752.455,48,5.886666667,18.34,11.37533333,9.018333333,0.04112128
492 | 36011,1567737.518,2374439.526,61,12.3,27.77333333,9.520666667,2.130266667,0.027034536
493 | 37145,1505394.763,1617645.776,74,13.60666667,27.37333333,11.62533333,2.132466667,0.104696358
494 | 51007,1572483.12,1737932.54,82,10.14666667,28.56666667,11.296,1.994733333,0.064431327
495 | 51660,1472787.865,1845021.532,51,24.03333333,20.66666667,11.05466667,1.5598,0.04264868
496 | 54097,1348482.639,1875559.148,74,19.86,28.52666667,10.222,1.7296,0.106872143
497 | 36025,1711335.093,2323192.08,57,14.04666667,25.22,8.61,1.116,0.015069066
498 | 34017,1819905.966,2180131.941,53,15.05333333,22.00666667,12.13533333,10.94493333,0.044451956
499 | 13305,1326934.884,1040349.836,83,20.16,27.84666667,11.46266667,0.695666667,0.010446505
500 | 34029,1824854.704,2083480.234,65,8.42,24.91333333,11.718,3.2172,0.037615359
501 | 51181,1670078.514,1731435.217,66,11.72,25.02,11.464,2.4372,0.068553031
502 | 51191,1237850.245,1611510.929,72,12.75333333,26.56,9.882,1.6892,0.041376635
503 | 37043,1104919.49,1404573.347,57,15.26,26.17333333,10.86866667,0.783466667,0.033007594
504 | 36077,1695430.803,2369555.448,59,13.91333333,23.34666667,8.384666667,1.2666,0.010474389
505 | 42089,1709056.974,2192003.055,69,9.773333333,24.96666667,9.652,2.6352,0.060264661
506 | 36041,1712275.97,2490987.62,73,10.17333333,22.35333333,7.322666667,0.886666667,0.003557744
507 | 37147,1662198.209,1556593.377,63,20.43333333,23.14,12.17,1.583533333,0.013127509
508 | 13019,1208146.786,992422.7947,88,19.59333333,29.06666667,12.24733333,0.6612,0.007774275
509 | 36071,1784395.906,2248556.365,64,10.91333333,23.10666667,9.442,2.7786,0.031406659
510 | 36001,1780443.169,2385249.289,64,11.42666667,21.46,8.835333333,2.475333333,0.007572167
511 | 13303,1222799.465,1184983.217,73,22.82,27.57333333,12.23533333,1.018133333,0.065402509
512 | 51113,1523847.656,1851669.482,57,10.34666667,24.45333333,11.29333333,1.636866667,0.047605457
513 | 51187,1519532.581,1907327.622,80,9.506666667,26.86666667,11.37266667,1.966666667,0.07172545
514 | 37065,1635788.182,1587878.711,72,22.82666667,28.27333333,12.63733333,1.6888,0.018609596
515 | 13051,1392412.033,1101415.414,65,17.26,24.16666667,11.10933333,1.0518,0.009944617
516 | 51053,1607599.901,1715108.814,78,10.75333333,28.22,11.47733333,2.143133333,0.064553301
517 | 13043,1298399.5,1132052.351,77,23.82,27.38,12.06666667,0.7762,0.015079075
518 | 51009,1466190.928,1748958.493,74,11.94666667,27.19333333,10.70066667,1.6734,0.029867792
519 | 51770,1402346.68,1700671.94,82,17.51333333,28.96,10.36,2.145066667,0.035279193
520 | 37179,1394272.731,1439225.335,60,9.286666667,22.08666667,12.874,2.0826,0.04897361
521 | 37189,1269211.479,1560293.27,47,18.84666667,20.79333333,9.731333333,1.244933333,0.042434137
522 | 51157,1528372.041,1883439.688,56,8.286666667,21.96,11.38333333,1.718133333,0.063077793
523 | 54085,1273217.53,1895127.718,80,17.58,31.44,12.086,3.053266667,0.24868401
524 | 51013,1613596.309,1922262.73,43,6.766666667,13.98,13.568,5.005933333,0.144993707
525 | 13285,1016197.213,1165749.052,74,17.27333333,26.14,12.26266667,1.258666667,0.097151262
526 | 13039,1360362.387,975480.8504,72,11.96,25.05333333,10.468,0.752133333,0.027253299
527 | 36019,1748150.048,2623675.29,69,13.74666667,23.7,7.742,1.1768,0.001208892
528 | 45011,1342753.031,1236098.031,73,21.40666667,27.20666667,11.50266667,1.095666667,0.027567092
529 | 45057,1384086.392,1403329.781,68,15.94666667,29.13333333,12.678,1.591466667,0.041551825
530 | 37011,1251796.433,1540204.588,66,17.58,26.47333333,9.703333333,1.1974,0.041331988
531 | 24001,1463675.83,1978141.473,64,15.16,26.38,11.03733333,2.706333333,0.140489244
532 | 37165,1490869.031,1438927.834,79,23.49333333,28.08666667,12.85466667,1.426133333,0.01773952
533 | 42073,1301449.84,2105533.834,62,13.28,27.28666667,12.96466667,4.101933333,0.218065512
534 | 42039,1307257.316,2185193.456,66,14.26,27.56666667,11.51466667,2.923066667,0.097478561
535 | 42015,1596515.267,2251514.685,65,12.86,26.88,9.106666667,1.471466667,0.053094296
536 | 13035,1111388.857,1205843.94,91,14.74,29.11333333,12.38133333,2.014466667,0.160238001
537 | 13001,1288990.75,1057012.927,77,19.44,27.43333333,12.026,0.7092,0.008886769
538 | 36107,1604638.584,2296816.053,61,9.806666667,23.31333333,9.08,1.734533333,0.034471747
539 | 37121,1231540.194,1530025.882,72,15.86,27.71333333,9.962,1.087666667,0.042747608
540 | 45001,1233904.427,1327933.576,70,16.4,26.92666667,12.66666667,1.252533333,0.02427076
541 | 54095,1283161.73,1929358.59,86,16.5,30.61333333,12.554,3.479333333,0.298923313
542 | 51800,1701943.473,1690601.23,74,11.63333333,23.46666667,10.886,1.933466667,0.049455466
543 | 45023,1343252.819,1397436.613,81,18.55333333,30.46666667,12.39533333,1.4288,0.04506228
544 | 13317,1214993.687,1275513.359,76,19.96666667,28.10666667,12.48133333,1.184533333,0.048553454
545 | 13155,1198918.04,1027877.281,77,21.77333333,29.3,12.55666667,0.678866667,0.009601194
546 | 36045,1590775.698,2508226.523,72,14.85333333,26,8.586,1.803866667,0.007207704
547 | 34001,1803710.599,2031834.035,69,10.92666667,25.98666667,11.76333333,3.114133333,0.040341525
548 | 42017,1745633.551,2117385.439,60,5.286666667,21.84,11.75133333,5.077333333,0.062835561
549 | 37037,1494044.085,1537413.373,51,10.59333333,21.08666667,12.54733333,2.254,0.040195966
550 | 37097,1349138.009,1524687.672,65,10.98666667,25.21333333,12.08733333,2.315333333,0.097859031
551 | 42061,1506866.668,2076919.913,61,12.58666667,26.34,10.99733333,2.758066667,0.114331818
552 | 37017,1577416.075,1428761.544,70,21.81333333,27.52,11.878,1.133066667,0.019236577
553 | 13025,1326661.912,1000420.533,99,18.22666667,30.06,11.04466667,0.677133333,0.015539183
554 | 13315,1178880.849,1066819.615,74,27.22666667,28.12,12.58733333,0.729133333,0.015056365
555 | 51700,1702968.965,1737731.396,68,13.80666667,24.94666667,12.11533333,2.6156,0.050765587
556 | 51091,1415121.011,1826079.888,56,12.74666667,25.20666667,9.149333333,1.113466667,0.041548653
557 | 51810,1752695.578,1705589.886,63,7.513333333,22.45333333,10.156,2.119733333,0.03788188
558 | 51165,1471173.589,1853131.421,51,8.92,23.8,10.72066667,1.485933333,0.049190178
559 | 13183,1339648.617,1064862.147,84,20.88666667,27.95333333,11.35,0.814733333,0.009522017
560 | 13231,1074360.616,1179283.593,85,11.23333333,25.39333333,12.31266667,1.468933333,0.127035045
561 | 13113,1060350.192,1213666.982,48,5.18,18.26,12.96466667,2.4054,0.163848439
562 | 13005,1276775.865,1033243.564,97,20.66,29.77333333,11.92533333,0.6524,0.008562619
563 | 37183,1545650.8,1556677.834,53,9.106666667,17.72666667,12.79,2.105933333,0.031941021
564 | 45071,1310463.886,1346837.147,69,16.20666667,27.22,12.446,1.220533333,0.024817823
565 | 13071,1158669.438,976083.7313,80,22.59333333,27.98,12.49333333,0.695,0.008105214
566 | 13199,1047465.29,1170294.889,66,19.15333333,28.18666667,12.24066667,1.247066667,0.109732141
567 | 24025,1663852.67,2010730.911,64,6.066666667,21.71333333,13.44133333,4.541066667,0.119126544
568 | 51051,1197155.374,1651004.36,91,20.36,31.32666667,9.614,1.8648,0.05338612
569 | 34035,1780207.58,2151254.925,47,4.273333333,15.79333333,11.512,6.028333333,0.056940368
570 | 36113,1765770.902,2492083.498,63,10.66,24.27333333,7.889333333,1.2494,0.002861461
571 | 42107,1645709.504,2137970.773,67,11.43333333,28.67333333,10.458,3.339333333,0.106499101
572 | 42041,1571533.317,2060710.058,52,6.646666667,20.99333333,12.386,3.9012,0.107308048
573 | 54071,1427489.357,1864438.437,46,13.79333333,27.12666667,9.411333333,1.290066667,0.068158849
574 | 51021,1303204.702,1667921.155,71,12.98666667,28.03333333,9.201333333,1.154,0.028963157
575 | 24043,1538083.918,1990247.173,66,10.17333333,25.11333333,12.51466667,3.615933333,0.096786915
576 | 13079,1116766.6,1142102.125,75,16.65333333,27.51333333,12.22533333,1.2422,0.082118091
577 | 54025,1347254.648,1767353.557,80,17.39333333,28.57333333,9.438,1.296133333,0.040138342
578 | 42045,1731682.361,2066067.558,67,9.146666667,24.60666667,13.42733333,6.200533333,0.061369367
579 | 13273,1087754.095,1033206.976,85,27.55333333,27.72,12.16266667,0.795333333,0.01352705
580 | 42127,1697727.853,2257139.2,64,11.91333333,26.90666667,8.940666667,1.246066667,0.031406852
581 | 51031,1478293.438,1705612.137,68,11.79333333,26.74,10.88533333,1.837933333,0.051762803
582 | 13129,1011067.176,1330535.353,90,14.01333333,28.56666667,13.30933333,2.062133333,0.157855786
583 | 51121,1367109.715,1682899.993,55,17.4,19.98,9.554,1.964066667,0.036665624
584 | 51029,1519909.161,1754957.03,69,18.9,28.81333333,11.11733333,1.6644,0.037051356
585 | 37055,1798253.644,1604192.673,67,9.18,25.10666667,8.216666667,1.1462,0.008347594
586 | 13137,1130864.957,1359803.544,60,13.65333333,25.58666667,12.17,1.1376,0.030868917
587 | 54043,1204390.574,1770898.492,108,23.90666667,33.2,10.938,2.4316,0.171496282
588 | 42033,1454142.899,2133464.921,61,13.96666667,27.39333333,10.77533333,2.168066667,0.175473521
589 | 36105,1739211.263,2274248.013,71,15.28,25.80666667,8.680666667,1.484133333,0.023338405
590 | 42111,1429094.609,2011809.21,49,12.92666667,27.22,11.344,2.639533333,0.257661742
591 | 51023,1409669.854,1733520.052,53,6.28,23.20666667,10.06533333,1.777333333,0.028794215
592 | 51109,1559975.42,1808855.188,76,10.14,26.84,11.40266667,2.0466,0.057913201
593 | 42055,1538800.032,2027269.531,55,8.533333333,24.38666667,12.33933333,3.279733333,0.095794648
594 | 13253,1058244.949,935725.1685,81,23.44666667,27.06,12.06266667,0.6452,0.010910305
595 | 37095,1763204.221,1568816.331,69,22.05333333,28.37333333,8.826666667,1.182133333,0.007774045
596 | 42103,1727250.264,2227452.264,58,8.246666667,25.24666667,8.897333333,1.825666667,0.041719105
597 | 13105,1201057.911,1311274.263,74,18.64666667,28.79333333,12.79066667,1.111533333,0.033088462
598 | 45007,1213040.652,1358418.683,70,13.92,26.8,12.72666667,1.501466667,0.024158228
599 | 36061,1827575.68,2186232.241,43,17.91333333,18.74,12.272,10.6904,0.042456716
600 | 36095,1743539.37,2375273.679,67,12.16666667,25.10666667,8.224666667,1.495333333,0.008576691
601 | 13301,1226878.056,1235056.745,84,23.9,28.78,12.29,1.332466667,0.062790517
602 | 13261,1106655.278,1065071.709,70,25.01333333,25.10666667,12.22466667,0.893733333,0.018143691
603 | 51045,1376536.195,1719306.873,63,10.04666667,25.97333333,9.652,1.887,0.030335087
604 | 13107,1274318.468,1149501.496,84,25.76,27.90666667,11.99266667,0.749666667,0.021429513
605 | 54015,1284743.833,1815780.539,103,24.78,32.10666667,10.682,2.301733333,0.122965278
606 | 42059,1331673.938,1981403.637,74,16.40666667,27.34666667,12.764,4.152733333,0.394057632
607 | 36081,1841804.183,2182258.761,39,14.54,20,12.01066667,14.68953333,0.037377968
608 | 36065,1648770.996,2428756.763,63,14.28,26.18666667,8.445333333,1.834,0.008933147
609 | 51079,1510291.406,1835864.464,61,8.126666667,24.67333333,11.17866667,1.533066667,0.038746902
610 | 54011,1185759.528,1795938.607,83,18.78666667,28.82666667,11.58866667,2.789466667,0.23816696
611 | 54061,1350699.787,1959197.776,61,17.48,22.56,12.19933333,3.297,0.326752058
612 | 51510,1616258.352,1915795.151,48,8.166666667,16.33333333,13.338,4.856866667,0.147191167
613 | 42063,1410676.886,2085864.825,53,15.92666667,24.21333333,11.798,3.693533333,0.339120644
614 | 24013,1604981.209,1998805.915,61,4.866666667,21.18666667,13.15266667,3.2682,0.127370906
615 | 54103,1301405.283,1948184.975,83,17.38666667,30.04666667,12.572,3.599866667,0.328060327
616 | 13299,1287458.284,978514.5473,78,21.82,28.40666667,11.30066667,0.639466667,0.01326026
617 | 54091,1356170.287,1926659.922,78,18.14,28.22666667,11.38066667,2.714933333,0.229157977
618 | 37103,1676368.487,1494220.466,70,17.85333333,27.88,10.46933333,1.345333333,0.011981824
619 | 45049,1378190.662,1186307.961,74,22.88,26.22666667,11.01733333,0.8428,0.023250973
620 | 51650,1717605.117,1735088.898,73,13.46,23.96,11.932,2.729333333,0.056447223
621 | 51095,1675785.756,1757564.59,48,6.693333333,18.48,11.70733333,2.332533333,0.074459277
622 | 42069,1678063.195,2228579.147,64,12.03333333,26.96,9.420666667,1.9836,0.04698714
623 | 42013,1475160.903,2078209.205,65,13.15333333,26.76666667,11.01933333,2.9708,0.174829692
624 | 13257,1153423.385,1354138.051,83,17.77333333,27.32666667,12.522,1.286733333,0.027933233
625 | 37047,1576210.284,1388925.415,80,22.56,29.26666667,11.02666667,0.963466667,0.020479309
626 | 37071,1330908.701,1463722.596,82,14.53333333,28.59333333,12.63066667,2.612666667,0.104389145
627 | 45025,1433973.062,1406304.158,81,20.36666667,29.28666667,12.806,1.384066667,0.027277384
628 | 36111,1775867.93,2302403.31,61,11.6,23.75333333,9.09,2.032466667,0.017977323
629 | 37021,1205489.367,1480787.544,62,14.03333333,25.36,10.22,1.417466667,0.046396954
630 | 37015,1686744.852,1615496.926,70,23.18,26.56,11.17933333,1.533866667,0.019561572
631 | 34031,1794089.083,2208556.382,52,13.49333333,21.47333333,10.50133333,5.992066667,0.042913013
632 | 45017,1395339.658,1290613.603,53,16.70666667,24.62666667,11.85933333,1.471933333,0.047343003
633 | 36087,1813564.819,2226668.669,44,10.77333333,18.28,10.42866667,5.0918,0.036711558
634 | 51057,1647038.652,1822048.171,74,13,25.64666667,11.38933333,2.438,0.103948363
635 | 37135,1498854.676,1579049.044,58,13.19333333,17.04666667,12.08933333,1.981266667,0.072613316
636 | 34037,1759786.533,2212712.261,63,4.686666667,21.71333333,9.342666667,3.157,0.045925872
637 | 13123,1046569.812,1355924.329,63,15.96666667,28.58,12.278,1.247066667,0.092880033
638 | 13295,969740.2733,1351864.506,95,15.14666667,29.5,12.974,2.117466667,0.120574462
639 | 45047,1265105.223,1324705.806,65,16.17333333,26.52666667,12.34666667,1.125133333,0.022022008
640 | 51097,1656798.071,1798464.495,76,12.48666667,27.42666667,11.33266667,2.389266667,0.090404004
641 | 42099,1566640.722,2086593.725,70,8.56,25.40666667,11.64533333,3.324266667,0.113054116
642 | 13047,982225.8689,1372225.108,73,11.64,27.54666667,13.1,2.697666667,0.097585145
643 | 37099,1156077.938,1437274.96,58,17.27333333,25.83333333,10.082,0.866,0.028709487
644 | 13209,1259366.381,1100453.583,87,22.35333333,24.56666667,12.062,0.6928,0.013618591
645 | 13237,1164434.823,1216682.797,67,15.64,24.84,12.45666667,1.533333333,0.148870224
646 | 51163,1436054.549,1767464.309,61,10.61333333,24.94666667,9.969333333,1.712733333,0.028223625
647 | 37137,1729079.643,1519154.323,64,16.42666667,26.17333333,8.904666667,1.135466667,0.007762193
648 | 13197,1071814.999,1095905.649,64,22.07333333,29.11333333,12.17733333,0.763733333,0.031503506
649 | 34025,1820299.025,2125684.596,61,6.18,20.81333333,11.73533333,5.0214,0.044226238
650 | 13217,1117382.249,1236773.733,75,12.46666667,25.47333333,12.752,2.468133333,0.144379531
651 | 13249,1092478.705,1088370.428,77,17.74666667,27.5,12.15666667,0.828933333,0.027058017
652 | 51143,1459738.423,1658545.822,72,12.98,28.16,11.10733333,1.933733333,0.102060102
653 | 51167,1222483.585,1632905.456,83,18.16666667,29.48666667,9.679333333,1.765666667,0.04744314
654 | 13287,1164606.542,1036137.608,75,26.06666667,28.28666667,12.63,0.7356,0.011126866
655 | 54105,1249192.815,1873699.746,91,18.85333333,30.52666667,12.25533333,2.759333333,0.27650887
656 | 51025,1594645.856,1677031.064,74,20.12,28.42666667,11.48533333,1.5654,0.049614259
657 | 51093,1690124.337,1710628.896,70,9.013333333,22.83333333,11.3,2.228533333,0.058830963
658 | 24035,1699831.3,1961649.538,67,6.633333333,21.7,13.27066667,4.305866667,0.096557021
659 | 13125,1235765.269,1216094.351,70,16.13333333,28.77333333,12.21333333,1.259333333,0.059530334
660 | 42091,1727248.17,2098844.026,53,5.426666667,20.43333333,12.528,5.060933333,0.063949792
661 | 37025,1385195.632,1483013.976,66,10.22666667,24.38,12.91733333,2.677466667,0.078309834
662 | 42011,1676333.766,2111342.483,57,10.77333333,24.4,11.80733333,4.2978,0.086217084
663 | 45015,1479695.44,1250590.851,73,12.89333333,26.22666667,10.42333333,1.198666667,0.049416007
664 | 54049,1336346.909,1943122.147,80,16.3,26.5,12.28733333,3.2686,0.288285121
665 | 51087,1617200.64,1769762.462,64,8.18,20.61333333,12.01866667,3.250133333,0.08572237
666 | 36119,1835028.388,2232826.671,46,8.546666667,18.12666667,10.35333333,5.469333333,0.032412807
667 | 13185,1211163.789,943251.3662,79,20.2,24.98,11.708,0.710666667,0.007496417
668 | 


--------------------------------------------------------------------------------
/data/meuse.csv:
--------------------------------------------------------------------------------
  1 | x,y,cadmium,copper,lead,zinc,elev,dist,om,ffreq,soil,lime,dist_to_meuse
  2 | 181072,333611,11.7,85,299,1022,7.909,0.00135803,13.6,1,1,1,50
  3 | 181025,333558,8.6,81,277,1141,6.983,0.0122243,14.0,1,1,1,30
  4 | 181165,333537,6.5,68,199,640,7.8,0.103029,13.0,1,1,1,150
  5 | 181298,333484,2.6,81,116,257,7.655,0.190094,8.0,1,2,0,270
  6 | 181307,333330,2.8,48,117,269,7.48,0.27709,8.7,1,2,0,380
  7 | 181390,333260,3.0,61,137,281,7.791,0.364067,7.8,1,2,0,470
  8 | 181165,333370,3.2,31,132,346,8.217,0.190094,9.2,1,2,0,240
  9 | 181027,333363,2.8,29,150,406,8.49,0.0921516,9.5,1,1,0,120
 10 | 181060,333231,2.4,37,133,347,8.668,0.184614,10.6,1,1,0,240
 11 | 181232,333168,1.6,24,80,183,9.049,0.309702,6.3,1,2,0,420
 12 | 181191,333115,1.4,25,86,189,9.015,0.315116,6.4,1,2,0,400
 13 | 181032,333031,1.8,25,97,251,9.073,0.228123,9.0,1,1,0,300
 14 | 180874,333339,11.2,93,285,1096,7.32,0.0,15.4,1,1,1,20
 15 | 180969,333252,2.5,31,183,504,8.815,0.113932,8.4,1,1,0,130
 16 | 181011,333161,2.0,27,130,326,8.937,0.168336,9.1,1,1,0,220
 17 | 180830,333246,9.5,86,240,1032,7.702,0.0,16.2,1,1,1,10
 18 | 180763,333104,7.0,74,133,606,7.16,0.0122243,16.0,1,1,1,10
 19 | 180694,332972,7.1,69,148,711,7.1,0.0122243,16.0,1,1,1,10
 20 | 180625,332847,8.7,69,207,735,7.02,0.0,13.7,1,1,1,10
 21 | 180555,332707,12.9,95,284,1052,6.86,0.0,14.8,1,1,1,10
 22 | 180642,332708,5.5,53,194,673,8.908,0.0703468,10.2,1,1,1,80
 23 | 180704,332717,2.8,35,123,402,8.99,0.0975136,7.2,1,1,1,140
 24 | 180704,332664,2.9,35,110,343,8.83,0.113932,7.2,1,1,1,160
 25 | 181153,332925,1.7,24,85,218,9.02,0.342321,7.0,1,2,0,440
 26 | 181147,332823,1.4,26,75,200,8.976,0.385804,6.9,1,2,0,490
 27 | 181167,332778,1.5,22,76,194,8.973,0.429289,6.3,1,2,0,530
 28 | 181008,332777,1.3,27,73,207,8.507,0.315116,5.6,1,2,0,400
 29 | 180973,332687,1.3,24,67,180,8.743,0.320574,4.4,1,2,0,400
 30 | 180916,332753,1.8,22,87,240,8.973,0.249863,5.3,1,2,0,330
 31 | 181352,332946,1.5,21,65,180,9.043,0.489064,4.8,1,2,0,630
 32 | 181133,332570,1.3,29,78,208,8.688,0.472778,2.6,1,2,0,570
 33 | 180878,332489,1.3,21,64,198,8.727,0.287957,1.0,1,2,0,390
 34 | 180829,332450,2.1,27,77,250,8.328,0.271622,2.4,1,2,0,360
 35 | 180954,332399,1.2,26,80,192,7.971,0.385807,1.9,1,2,0,500
 36 | 180956,332318,1.6,27,82,213,7.809,0.418417,3.1,1,2,0,550
 37 | 180710,332330,3.0,32,97,321,6.986,0.244474,1.6,1,2,0,340
 38 | 180632,332445,5.8,50,166,569,7.756,0.135709,3.5,1,2,0,210
 39 | 180530,332538,7.9,67,217,833,7.784,0.0484965,8.1,1,1,1,60
 40 | 180478,332578,8.1,77,219,906,7.0,0.0,7.9,1,1,1,10
 41 | 180383,332476,14.1,108,405,1454,6.92,0.00135803,9.5,1,1,1,20
 42 | 180494,332330,2.4,32,102,298,7.516,0.135709,1.4,1,2,0,170
 43 | 180410,332031,1.3,21,62,258,9.28,0.320572,2.0,1,2,0,360
 44 | 180355,332299,4.2,51,281,746,7.94,0.081222,5.1,1,2,0,100
 45 | 180292,332157,4.3,50,294,746,6.36,0.190086,5.3,1,2,0,200
 46 | 180283,332014,3.1,38,211,464,7.78,0.287941,4.5,1,2,0,320
 47 | 180282,331861,1.7,26,135,365,8.18,0.423826,4.9,1,2,0,480
 48 | 180270,331707,1.7,24,112,282,9.42,0.554289,4.5,1,2,0,660
 49 | 180199,331591,2.1,32,162,375,8.867,0.603225,5.5,1,2,0,690
 50 | 180135,331552,1.7,24,94,222,8.292,0.614071,3.4,1,2,0,710
 51 | 180237,332351,8.2,47,191,812,8.06,0.00135803,11.1,1,1,1,10
 52 | 180103,332297,17.0,128,405,1548,7.98,0.0,12.3,1,1,1,10
 53 | 179973,332255,12.0,117,654,1839,7.9,0.0054321,16.5,1,1,1,10
 54 | 179826,332217,9.4,104,482,1528,7.74,0.0054321,13.9,1,1,1,10
 55 | 179687,332161,8.2,76,276,933,7.552,0.0054321,8.1,1,1,1,20
 56 | 179792,332035,2.6,36,180,432,7.76,0.146578,3.1,1,1,0,200
 57 | 179902,332113,3.5,34,207,550,6.74,0.135684,5.8,1,1,0,140
 58 | 180100,332213,10.9,90,541,1571,6.68,0.0703333,10.2,1,1,1,70
 59 | 179604,332059,7.3,80,310,1190,7.4,0.0484831,12.0,1,1,1,20
 60 | 179526,331936,9.4,78,210,907,7.44,0.0054321,14.1,1,1,1,10
 61 | 179495,331770,8.3,77,158,761,7.36,0.0054321,14.5,1,1,1,10
 62 | 179489,331633,7.0,65,141,659,7.2,0.0316663,14.8,1,1,1,20
 63 | 179414,331494,6.8,66,144,643,7.22,0.0122243,13.3,1,1,1,10
 64 | 179334,331366,7.4,72,181,801,7.36,0.0122243,15.2,1,1,1,20
 65 | 179255,331264,6.6,75,173,784,5.18,0.0373395,11.4,1,1,1,20
 66 | 179470,331125,7.8,75,399,1060,5.8,0.211846,9.0,1,1,0,270
 67 | 179692,330933,0.7,22,45,119,7.64,0.451037,3.6,1,1,1,560
 68 | 179852,330801,3.4,55,325,778,6.32,0.575877,6.9,1,1,0,750
 69 | 179140,330955,3.9,47,268,703,5.76,0.0756869,7.0,1,1,1,80
 70 | 179128,330867,3.5,46,252,676,6.48,0.12481,6.2,1,1,1,130
 71 | 179065,330864,4.7,55,315,793,6.48,0.103024,6.5,1,1,0,110
 72 | 179007,330727,3.9,49,260,685,6.32,0.157469,5.7,1,1,0,200
 73 | 179110,330758,3.1,39,237,593,6.32,0.200976,7.0,1,1,1,260
 74 | 179032,330645,2.9,45,228,549,6.16,0.200976,7.3,1,1,0,270
 75 | 179095,330636,3.9,48,241,680,6.56,0.26622,8.2,1,1,0,320
 76 | 179058,330510,2.7,36,201,539,6.9,0.298835,4.3,1,1,0,360
 77 | 178810,330666,2.5,36,204,560,7.54,0.0812247,4.4,1,1,1,80
 78 | 178912,330779,5.6,68,429,1136,6.42,0.070355,8.2,1,1,1,100
 79 | 178981,330924,9.4,88,462,1383,6.28,0.0122243,8.5,1,1,1,70
 80 | 179076,331005,10.8,85,333,1161,6.34,0.0,9.6,1,1,1,20
 81 | 180151,330353,18.1,76,464,1672,7.307,0.0537723,17.0,1,1,1,50
 82 | 179211,331175,6.3,63,159,765,5.7,0.0593662,12.8,1,1,1,80
 83 | 181118,333214,2.1,32,116,279,7.72,0.211843,5.9,1,2,0,290
 84 | 179474,331304,1.8,25,81,241,7.932,0.12481,2.9,2,2,1,160
 85 | 179559,331423,2.2,27,131,317,7.82,0.12481,4.5,2,1,0,160
 86 | 179022,330873,2.8,36,216,545,8.575,0.0921516,10.7,2,1,0,140
 87 | 178953,330742,2.4,41,145,505,8.536,0.113941,9.4,2,1,0,150
 88 | 178875,330516,2.6,33,163,420,8.504,0.179216,9.0,2,1,0,220
 89 | 178803,330349,1.8,27,129,332,8.659,0.233596,7.0,2,1,0,280
 90 | 179029,330394,2.0,38,148,400,7.633,0.336861,6.5,2,1,1,450
 91 | 178605,330406,2.7,37,214,553,8.538,0.070355,9.4,2,1,1,70
 92 | 178701,330557,2.7,34,226,577,7.68,0.0593662,10.2,2,1,0,70
 93 | 179547,330245,0.9,19,54,155,7.564,0.255341,6.4,2,1,0,340
 94 | 179301,330179,0.9,22,70,224,7.76,0.364067,7.6,2,1,0,470
 95 | 179405,330567,0.4,26,73,180,7.653,0.429295,7.0,2,1,0,630
 96 | 179462,330766,0.8,25,87,226,7.951,0.380328,5.6,2,1,0,460
 97 | 179293,330797,0.4,22,76,186,8.176,0.249874,6.5,2,1,0,320
 98 | 179180,330710,0.4,24,81,198,8.468,0.266212,6.6,2,1,0,320
 99 | 179206,330398,0.4,18,68,187,8.41,0.451037,5.9,2,1,0,540
100 | 179618,330458,0.8,23,66,199,7.61,0.30971,6.5,2,1,0,420
101 | 179782,330540,0.4,22,49,157,7.792,0.293359,6.4,2,1,0,380
102 | 179980,330773,0.4,23,63,203,8.76,0.532351,7.2,2,2,0,500
103 | 180067,331185,0.4,23,48,143,9.879,0.619513,6.6,2,3,0,760
104 | 180162,331387,0.2,23,51,136,9.097,0.684725,4.3,2,2,0,750
105 | 180451,331473,0.2,18,50,117,9.095,0.809742,5.3,2,3,0,1000
106 | 180328,331158,0.4,20,39,113,9.717,0.880389,4.1,2,3,0,860
107 | 180276,330963,0.2,22,48,130,9.924,0.749591,6.1,2,3,0,680
108 | 180114,330803,0.2,27,64,192,9.404,0.575752,7.5,2,3,0,500
109 | 179881,330912,0.4,25,84,240,10.52,0.581484,8.8,2,3,0,650
110 | 179774,330921,0.2,30,67,221,8.84,0.49452,5.7,2,3,0,630
111 | 179657,331150,0.2,23,49,140,8.472,0.32058,6.1,2,3,0,410
112 | 179731,331245,0.2,24,48,128,9.634,0.336851,7.1,2,3,0,390
113 | 179717,331441,0.2,21,56,166,9.206,0.249852,4.1,2,2,0,310
114 | 179446,331422,0.2,24,65,191,8.47,0.0756869,6.0,2,1,0,70
115 | 179524,331565,0.2,21,84,232,8.463,0.0756869,6.6,2,1,0,70
116 | 179644,331730,0.2,23,75,203,9.691,0.162853,6.8,2,1,1,150
117 | 180321,330366,3.7,53,250,722,8.704,0.0974916,9.1,2,2,0,80
118 | 180162,331837,0.2,33,81,210,9.42,0.440142,5.9,2,2,0,450
119 | 180029,331720,0.2,22,72,198,9.573,0.4619,4.9,2,2,0,530
120 | 179797,331919,0.2,23,86,139,9.555,0.222701,7.1,2,1,0,240
121 | 179642,331955,0.2,25,94,253,8.779,0.103024,8.1,2,1,1,70
122 | 179849,332142,1.2,30,244,703,8.54,0.0921353,8.3,2,1,0,70
123 | 180265,332297,2.4,47,297,832,8.809,0.0484884,10.0,2,1,0,60
124 | 180107,332101,0.2,31,96,262,9.523,0.168331,5.9,2,1,0,190
125 | 180462,331947,0.2,20,56,142,9.811,0.38581,5.0,2,2,0,450
126 | 180478,331822,0.2,16,49,119,9.604,0.489064,4.5,2,2,0,550
127 | 180347,331700,0.2,17,50,152,9.732,0.57602,5.4,2,2,0,650
128 | 180862,333116,0.4,26,148,415,9.518,0.0812194,2.3,2,1,0,100
129 | 180700,332882,1.6,34,162,474,9.72,0.0373369,7.5,2,1,0,170
130 | 180201,331160,0.8,18,37,126,9.036,0.771698,4.6,2,3,1,860
131 | 180173,331923,1.2,23,80,210,9.528,0.336829,5.8,2,2,0,410
132 | 180923,332874,0.2,20,80,220,9.155,0.228123,4.4,3,1,0,290
133 | 180467,331694,0.2,14,49,133,10.08,0.597761,4.4,3,2,0,680
134 | 179917,331325,0.8,46,42,141,9.97,0.44558,4.5,3,2,0,540
135 | 179822,331242,1.0,29,48,158,10.136,0.396675,5.2,3,2,0,480
136 | 179991,331069,0.8,19,41,129,10.32,0.581478,4.6,3,3,0,720
137 | 179120,330578,1.2,31,73,206,9.041,0.287966,6.9,3,1,0,380
138 | 179034,330561,2.0,27,146,451,7.86,0.233596,7.0,3,1,0,310
139 | 179085,330433,1.5,29,95,296,8.741,0.364067,5.4,3,1,0,430
140 | 179236,330046,1.1,22,72,189,7.822,0.331454,6.2,3,1,0,370
141 | 179456,330072,0.8,20,51,154,7.78,0.211846,5.0,3,1,0,290
142 | 179550,329940,0.8,20,54,169,8.121,0.103029,5.1,3,1,0,150
143 | 179445,329807,2.1,29,136,403,8.231,0.070355,8.1,3,1,0,70
144 | 179337,329870,2.5,38,170,471,8.351,0.146576,8.0,3,1,0,220
145 | 179245,329714,3.8,39,179,612,7.3,0.0537723,8.8,3,1,0,80
146 | 179024,329733,3.2,35,200,601,7.536,0.119286,9.3,3,1,0,120
147 | 178786,329822,3.1,42,258,783,7.706,0.0921435,8.4,3,1,0,120
148 | 179135,329890,1.5,24,93,258,8.07,0.249863,7.7,3,1,0,260
149 | 179030,330082,1.2,20,68,214,8.226,0.37494,5.7,3,1,0,440
150 | 179184,330182,0.8,20,49,166,8.128,0.423837,4.7,3,1,0,540
151 | 179085,330292,3.1,39,173,496,8.577,0.423837,9.1,3,1,0,520
152 | 178875,330311,2.1,31,119,342,8.429,0.27709,6.5,3,1,0,350
153 | 179466,330381,0.8,21,51,162,9.406,0.358606,5.7,3,1,0,460
154 | 180627,330190,2.7,27,124,375,8.261,0.0122243,5.5,3,3,0,40
155 | 


--------------------------------------------------------------------------------
/figure.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": null,
   6 |    "id": "119ab91e",
   7 |    "metadata": {},
   8 |    "outputs": [],
   9 |    "source": [
  10 |     "# Standard and GIS Modules\n",
  11 |     "import os\n",
  12 |     "import numpy as np\n",
  13 |     "import matplotlib.pyplot as plt\n",
  14 |     "import pandas as pd\n",
  15 |     "import time\n",
  16 |     "import scipy\n",
  17 |     "\n",
  18 |     "import warnings\n",
  19 |     "import seaborn as sns\n",
  20 |     "warnings.filterwarnings(\"ignore\")\n",
  21 |     "plt.rcParams.update({\"font.size\":20})"
  22 |    ]
  23 |   },
  24 |   {
  25 |    "cell_type": "markdown",
  26 |    "id": "3712c602",
  27 |    "metadata": {},
  28 |    "source": [
  29 |     "### Real data"
  30 |    ]
  31 |   },
  32 |   {
  33 |    "cell_type": "markdown",
  34 |    "id": "5b5f28a9",
  35 |    "metadata": {},
  36 |    "source": [
  37 |     "##### Compute LOSH"
  38 |    ]
  39 |   },
  40 |   {
  41 |    "cell_type": "code",
  42 |    "execution_count": null,
  43 |    "id": "6eba7823",
  44 |    "metadata": {
  45 |     "scrolled": true
  46 |    },
  47 |    "outputs": [],
  48 |    "source": [
  49 |     "from esda.losh import LOSH\n",
  50 |     "import libpysal\n",
  51 |     "# ls = LOSH(connectivity=w, inference=\"chi-square\").fit(boston_ds['NOX'])\n",
  52 |     "\n",
  53 |     "dataset_target = {\n",
  54 |     "    \"plants\": \"richness_species_vascular\",\n",
  55 |     "    \"meuse\": \"zinc\",\n",
  56 |     "    \"atlantic\": \"Rate\",\n",
  57 |     "    \"deforestation\": \"deforestation_quantile\",\n",
  58 |     "    \"california_housing\": \"median_house_value\",\n",
  59 |     "}\n",
  60 |     "\n",
  61 |     "dataset_losh = {}\n",
  62 |     "for d in os.listdir(\"data\"):\n",
  63 |     "#     if \"meuse\" in d: #  or \"deforestation\" in d:\n",
  64 |     "#         continue\n",
  65 |     "#     print(\"------------- \", d)\n",
  66 |     "    f = pd.read_csv(os.path.join(\"data\", d))\n",
  67 |     "    coords = f[[\"x\", \"y\"]]\n",
  68 |     "    \n",
  69 |     "    # with KNN:\n",
  70 |     "    w = libpysal.weights.KNN(coords, k=20)\n",
  71 |     "    \n",
  72 |     "    # extract target var\n",
  73 |     "    target_var = dataset_target[d.split(\".\")[0]]\n",
  74 |     "    \n",
  75 |     "    ls = LOSH(connectivity=w, inference=\"chi-square\").fit(f[target_var].values)\n",
  76 |     "    \n",
  77 |     "    dataset_losh[(d.split(\".\")[0]).replace(\"_\", \" \")] = np.mean(ls.Hi)\n",
  78 |     "#     f.drop([\"x\", \"y\", ]], axis=1, inplace=True)\n",
  79 |     "#     w_cutoff = (np.max(coords, axis=0) - np.min(coords, axis=0)).sum() / 10\n",
  80 |     "#     print(w_cutoff)\n",
  81 |     "#     w = get_weights_as_array(np.array(coords), w_cutoff)\n",
  82 |     "dataset_losh"
  83 |    ]
  84 |   },
  85 |   {
  86 |    "cell_type": "code",
  87 |    "execution_count": null,
  88 |    "id": "116756e9",
  89 |    "metadata": {},
  90 |    "outputs": [],
  91 |    "source": [
  92 |     "# 100\n",
  93 |     "{'plants': 0.9909525898042943,\n",
  94 |     " 'california_housing': 0.9199100144573773,\n",
  95 |     " 'deforestation': 0.9968442529301278,\n",
  96 |     " 'atlantic': 0.9620820900732183}\n",
  97 |     "# 10\n",
  98 |     "{'plants': 1.0165645863370958,\n",
  99 |     " 'california_housing': 0.8750507024465561,\n",
 100 |     " 'deforestation': 0.9969507834341067,\n",
 101 |     " 'atlantic': 1.0031870804023555}\n",
 102 |     "# 20\n",
 103 |     "{'plants': 1.0609760745783343,\n",
 104 |     " 'california_housing': 0.8773202806724744,\n",
 105 |     " 'deforestation': 0.9978335532280674,\n",
 106 |     " 'atlantic': 1.004762023375391}"
 107 |    ]
 108 |   },
 109 |   {
 110 |    "cell_type": "markdown",
 111 |    "id": "5109aacb",
 112 |    "metadata": {},
 113 |    "source": [
 114 |     "##### Compute sample size"
 115 |    ]
 116 |   },
 117 |   {
 118 |    "cell_type": "code",
 119 |    "execution_count": null,
 120 |    "id": "c75396c3",
 121 |    "metadata": {
 122 |     "scrolled": true
 123 |    },
 124 |    "outputs": [],
 125 |    "source": [
 126 |     "sample_dict = {}\n",
 127 |     "feature_dict = {}\n",
 128 |     "for out_file in os.listdir(\"data\"):\n",
 129 |     "    if \"folds\" in out_file or \"synthetic\" in out_file or out_file[0] == \".\" or \"deprecated\" in out_file:\n",
 130 |     "        continue\n",
 131 |     "    dataset_name = out_file[:-4].replace(\"_\", \" \")\n",
 132 |     "    data = pd.read_csv(os.path.join(\"data\", out_file))\n",
 133 |     "    sample_dict[dataset_name] = len(data)\n",
 134 |     "    feature_dict[dataset_name] = data.shape[1] - 2\n",
 135 |     "sample_dict"
 136 |    ]
 137 |   },
 138 |   {
 139 |    "cell_type": "markdown",
 140 |    "id": "e7de9f24",
 141 |    "metadata": {},
 142 |    "source": [
 143 |     "#### Model recommentation"
 144 |    ]
 145 |   },
 146 |   {
 147 |    "cell_type": "code",
 148 |    "execution_count": null,
 149 |    "id": "e38ab3ec",
 150 |    "metadata": {},
 151 |    "outputs": [],
 152 |    "source": [
 153 |     "recommendation_dict = {\"california housing\": \"RF\", \"meuse\": \"GWR\", \"plants\": \"GWR / Kriging\", \"atlantic\": \"RF\", \"deforestation\": \"RF\"}"
 154 |    ]
 155 |   },
 156 |   {
 157 |    "cell_type": "markdown",
 158 |    "id": "980e47e2",
 159 |    "metadata": {},
 160 |    "source": [
 161 |     "#### Read results"
 162 |    ]
 163 |   },
 164 |   {
 165 |    "cell_type": "code",
 166 |    "execution_count": null,
 167 |    "id": "54c21e36",
 168 |    "metadata": {},
 169 |    "outputs": [],
 170 |    "source": [
 171 |     "res_path = \"outputs/real_feb_23\""
 172 |    ]
 173 |   },
 174 |   {
 175 |    "cell_type": "code",
 176 |    "execution_count": null,
 177 |    "id": "a1de3c1d",
 178 |    "metadata": {},
 179 |    "outputs": [],
 180 |    "source": [
 181 |     "all_res = []\n",
 182 |     "for out_file in os.listdir(res_path):\n",
 183 |     "    if \"folds\" in out_file or \"synthetic\" in out_file or out_file[0] == \".\" or \"deprecated\" in out_file:\n",
 184 |     "        continue\n",
 185 |     "    dataset_name = \" \".join(out_file.split(\"_\")[1:])[:-4]\n",
 186 |     "    res = pd.read_csv(os.path.join(res_path, out_file))\n",
 187 |     "    res[\"Dataset\"] = dataset_name\n",
 188 |     "    raw_data = pd.read_csv(os.path.join(\"data\", out_file[8:]))\n",
 189 |     "#     res[\"Samples\"] = len(raw_data)\n",
 190 |     "#     res[\"LOSH\"] = round(dataset_losh[out_file[8:-4]], 2)\n",
 191 |     "#     res.sort_values(\"Method\", ascending=False, inplace=True)\n",
 192 |     "    all_res.append(res)\n",
 193 |     "all_res = pd.concat(all_res)"
 194 |    ]
 195 |   },
 196 |   {
 197 |    "cell_type": "code",
 198 |    "execution_count": null,
 199 |    "id": "fed1e3b4",
 200 |    "metadata": {},
 201 |    "outputs": [],
 202 |    "source": [
 203 |     "all_res.loc[all_res[\"Method\"] == \"linear regression\", \"Method\"] = \"OLS\""
 204 |    ]
 205 |   },
 206 |   {
 207 |    "cell_type": "code",
 208 |    "execution_count": null,
 209 |    "id": "1a1f56cb",
 210 |    "metadata": {},
 211 |    "outputs": [],
 212 |    "source": [
 213 |     "# remove SAR\n",
 214 |     "all_res = all_res[all_res[\"Method\"] != \"SAR\"]"
 215 |    ]
 216 |   },
 217 |   {
 218 |    "cell_type": "code",
 219 |    "execution_count": null,
 220 |    "id": "fc2625c7",
 221 |    "metadata": {},
 222 |    "outputs": [],
 223 |    "source": [
 224 |     "# check if all the metrics align in terms of ranking\n",
 225 |     "all(all_res.sort_values([\"Dataset\", \"R-Squared\"]).reset_index() == all_res.sort_values([\"Dataset\", \"MAE\"]).reset_index())"
 226 |    ]
 227 |   },
 228 |   {
 229 |    "cell_type": "code",
 230 |    "execution_count": null,
 231 |    "id": "ff6f1f14",
 232 |    "metadata": {},
 233 |    "outputs": [],
 234 |    "source": [
 235 |     "pivoted = all_res.pivot(index='Dataset', columns='Method', values='RMSE')\n",
 236 |     "pivoted.loc[\"california housing\"] = pivoted.loc[\"california housing\"].round()\n",
 237 |     "# sorted_columns\n",
 238 |     "pivoted[\"LOSH\"] = pd.Series(dataset_losh)\n",
 239 |     "pivoted[\"Samples\"] = pd.Series(sample_dict)\n",
 240 |     "pivoted[\"k\"] = pd.Series(feature_dict)\n",
 241 |     "pivoted[\"Recommended model\"] = pd.Series(recommendation_dict)\n",
 242 |     "col_order = ['Dataset', \"Samples\", \"k\", \"LOSH\", \"Recommended model\", 'OLS', 'SLX', 'GWR', 'RF', 'RF (coordinates)', 'spatial RF', 'Kriging']\n",
 243 |     "\n",
 244 |     "final = pivoted.reset_index().reset_index(drop=True)[col_order] #.drop_index(\"Method\", axis=1)"
 245 |    ]
 246 |   },
 247 |   {
 248 |    "cell_type": "code",
 249 |    "execution_count": null,
 250 |    "id": "b1e9c0c9",
 251 |    "metadata": {},
 252 |    "outputs": [],
 253 |    "source": [
 254 |     "final"
 255 |    ]
 256 |   },
 257 |   {
 258 |    "cell_type": "code",
 259 |    "execution_count": null,
 260 |    "id": "1db0cddf",
 261 |    "metadata": {},
 262 |    "outputs": [],
 263 |    "source": [
 264 |     "print(final.to_latex(index=False, float_format=\"%.2f\"))"
 265 |    ]
 266 |   },
 267 |   {
 268 |    "cell_type": "markdown",
 269 |    "id": "75e44ce3",
 270 |    "metadata": {},
 271 |    "source": [
 272 |     "#### Make table"
 273 |    ]
 274 |   },
 275 |   {
 276 |    "cell_type": "code",
 277 |    "execution_count": null,
 278 |    "id": "587f6a77",
 279 |    "metadata": {},
 280 |    "outputs": [],
 281 |    "source": [
 282 |     "print(all_res.groupby([\"Dataset\", \"Samples\", \"LOSH\", \"Method\"]).mean().round(5).to_latex(float_format=\"%.2f\"))"
 283 |    ]
 284 |   },
 285 |   {
 286 |    "cell_type": "markdown",
 287 |    "id": "ae6a910f",
 288 |    "metadata": {},
 289 |    "source": [
 290 |     "#### Make plot"
 291 |    ]
 292 |   },
 293 |   {
 294 |    "cell_type": "code",
 295 |    "execution_count": null,
 296 |    "id": "a936cde1",
 297 |    "metadata": {},
 298 |    "outputs": [],
 299 |    "source": [
 300 |     "# how big is each real dataset:\n",
 301 |     "data_path = \"data/\"\n",
 302 |     "data_len = {}\n",
 303 |     "for dataset in os.listdir(data_path):\n",
 304 |     "    test = pd.read_csv(data_path+dataset)\n",
 305 |     "#     print(dataset, len(test))\n",
 306 |     "    dataset_name = \" \".join(dataset[:-4].split(\"_\"))\n",
 307 |     "    data_len[dataset_name] = f\"{dataset_name}\\n({len(test)})\"\n",
 308 |     "all_res[\"Dataset\"] = all_res[\"Dataset\"].map(data_len)"
 309 |    ]
 310 |   },
 311 |   {
 312 |    "cell_type": "code",
 313 |    "execution_count": null,
 314 |    "id": "c02ffaf7",
 315 |    "metadata": {},
 316 |    "outputs": [],
 317 |    "source": [
 318 |     "plt.rcParams.update({\"font.size\":20})\n",
 319 |     "plt.figure(figsize=(12,6))\n",
 320 |     "sns.barplot(data=all_res.reset_index(), x=\"Dataset\", y=\"R-Squared\", hue=\"Method\")\n",
 321 |     "# plt.xlabel(\"Number of samples\")\n",
 322 |     "plt.legend(ncol=4, fontsize=15.5)\n",
 323 |     "plt.ylim(0., 1.35)\n",
 324 |     "plt.tight_layout()\n",
 325 |     "plt.xlabel(\"Dataset\", weight=\"bold\")\n",
 326 |     "plt.ylabel(\"R-Squared score\", weight=\"bold\")\n",
 327 |     "plt.savefig(\"outputs/real_dataset_barplot.pdf\")\n",
 328 |     "plt.show()"
 329 |    ]
 330 |   },
 331 |   {
 332 |    "cell_type": "code",
 333 |    "execution_count": null,
 334 |    "id": "d312e4c2",
 335 |    "metadata": {},
 336 |    "outputs": [],
 337 |    "source": [
 338 |     "results.columns"
 339 |    ]
 340 |   },
 341 |   {
 342 |    "cell_type": "markdown",
 343 |    "id": "907e595b",
 344 |    "metadata": {},
 345 |    "source": [
 346 |     "## Explanatory plots"
 347 |    ]
 348 |   },
 349 |   {
 350 |    "cell_type": "code",
 351 |    "execution_count": null,
 352 |    "id": "9f20540b",
 353 |    "metadata": {},
 354 |    "outputs": [],
 355 |    "source": [
 356 |     "weights = np.array([-0.95, 0.38, 0.66, -0.43, 0.22])\n",
 357 |     "\n",
 358 |     "nr_data = 10000\n",
 359 |     "nr_feats = 5\n",
 360 |     "feat_cols = [\"feat_\" + str(i) for i in range(nr_feats)]\n",
 361 |     "\n",
 362 |     "coords = np.array([[i, j] for i in range(int(np.sqrt(nr_data))) for j in range(int(np.sqrt(nr_data)))])\n",
 363 |     "coords = coords / np.max(coords) * 2 - 1\n",
 364 |     "\n",
 365 |     "spatial_variation = np.zeros((nr_data, nr_feats))\n",
 366 |     "for i in range(nr_feats):\n",
 367 |     "    spatial_variation[:, i] = 0.5 * (\n",
 368 |     "        np.sin(coords[:, 0] * np.pi * 2 + i)\n",
 369 |     "        + np.cos(coords[:, 1] * np.pi * 2 + i)\n",
 370 |     "    )"
 371 |    ]
 372 |   },
 373 |   {
 374 |    "cell_type": "code",
 375 |    "execution_count": null,
 376 |    "id": "8d003bf7",
 377 |    "metadata": {},
 378 |    "outputs": [],
 379 |    "source": [
 380 |     "cmap = plt.cm.Spectral\n",
 381 |     "fig = plt.figure(figsize=(20,4)) # TODO\n",
 382 |     "\n",
 383 |     "########## Number 1\n",
 384 |     "for i in range(5):\n",
 385 |     "    ax = fig.add_subplot(1, 5, i+1)\n",
 386 |     "#     print(weights[i] + spatial_variation[:, i])\n",
 387 |     "    cols = [cmap((val+1) / 2) for val in (weights[i] + 0.5 * spatial_variation[:, i])]\n",
 388 |     "    im = ax.scatter(coords[:, 0], coords[:, 1], c=cols)\n",
 389 |     "    ax.set_xlabel(rf\"Coefficient $\\beta_{i+1}$\")\n",
 390 |     "    ax.set_xticks([])\n",
 391 |     "    ax.set_yticks([])\n",
 392 |     "    ax.spines['bottom'].set_color('white')\n",
 393 |     "    ax.spines['top'].set_color('white') \n",
 394 |     "    ax.spines['right'].set_color('white')\n",
 395 |     "    ax.spines['left'].set_color('white')\n",
 396 |     "\n",
 397 |     "import matplotlib as mpl\n",
 398 |     "cmap = mpl.cm.viridis\n",
 399 |     "bounds = [-1, 2, 5, 7, 12, 15]\n",
 400 |     "cmap = mpl.cm.Spectral\n",
 401 |     "norm = mpl.colors.Normalize(vmin=-1, vmax=1)\n",
 402 |     "\n",
 403 |     "fig.subplots_adjust(right=0.88)\n",
 404 |     "cbar_ax = fig.add_axes([0.9, 0.15, 0.02, 0.7])\n",
 405 |     "cbar = fig.colorbar(mpl.cm.ScalarMappable(norm=norm, cmap=cmap),\n",
 406 |     "             cax=cbar_ax)\n",
 407 |     "\n",
 408 |     "plt.savefig(\"outputs/coefficient_figure_5.png\")\n",
 409 |     "plt.show()\n",
 410 |     "\n",
 411 |     "\n",
 412 |     "# ########## Number 2\n",
 413 |     "# for i in range(5):\n",
 414 |     "#     ax = fig.add_subplot(2, 5, i+1 + 5)\n",
 415 |     "# #     print(weights[i] + spatial_variation[:, i])\n",
 416 |     "#     cols = [cmap((val+1) / 2) for val in (weights[i] + 0.4 * spatial_variation[:, i])]\n",
 417 |     "#     im = ax.scatter(coords[:, 0], coords[:, 1], c=cols)\n",
 418 |     "#     ax.set_xlabel(\"Coefficient \"+str(i+1))\n",
 419 |     "#     ax.set_xticks([])\n",
 420 |     "#     ax.set_yticks([])\n",
 421 |     "#     ax.spines['bottom'].set_color('white')\n",
 422 |     "#     ax.spines['top'].set_color('white') \n",
 423 |     "#     ax.spines['right'].set_color('white')\n",
 424 |     "#     ax.spines['left'].set_color('white')\n",
 425 |     "\n",
 426 |     "# import matplotlib as mpl\n",
 427 |     "# cmap = mpl.cm.viridis\n",
 428 |     "# bounds = [-1, 2, 5, 7, 12, 15]\n",
 429 |     "# cmap = mpl.cm.Spectral\n",
 430 |     "# norm = mpl.colors.Normalize(vmin=-1, vmax=1)\n",
 431 |     "\n",
 432 |     "# fig.subplots_adjust(right=0.825)\n",
 433 |     "# cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])\n",
 434 |     "\n",
 435 |     "# cbar = fig.colorbar(mpl.cm.ScalarMappable(norm=norm, cmap=cmap),\n",
 436 |     "#              cax=cbar_ax)\n",
 437 |     "             \n",
 438 |     "# plt.tight_layout(w_pad=5, h_pad = 5)\n"
 439 |    ]
 440 |   },
 441 |   {
 442 |    "cell_type": "markdown",
 443 |    "id": "5bffe3f3",
 444 |    "metadata": {},
 445 |    "source": [
 446 |     "### Noise figure\n",
 447 |     "\n",
 448 |     "To generate the noise plots, run the above code with the three different types of noise, and execute the plot below with each type"
 449 |    ]
 450 |   },
 451 |   {
 452 |    "cell_type": "code",
 453 |    "execution_count": null,
 454 |    "id": "7b4531e5",
 455 |    "metadata": {
 456 |     "scrolled": true
 457 |    },
 458 |    "outputs": [],
 459 |    "source": [
 460 |     "# CODE FOR ALL THREE NOISE TYPES\n",
 461 |     "noise_level = 0.5\n",
 462 |     "\n",
 463 |     "nr_data = 10000\n",
 464 |     "nr_feats = 5\n",
 465 |     "feat_cols = [\"feat_\" + str(i) for i in range(nr_feats)]\n",
 466 |     "\n",
 467 |     "coords = np.array([[i, j] for i in range(int(np.sqrt(nr_data))) for j in range(int(np.sqrt(nr_data)))])\n",
 468 |     "coords = coords / np.max(coords) * 2 - 1\n",
 469 |     "synthetic_data = pd.DataFrame(coords, columns=[\"x_coord\", \"y_coord\"])\n",
 470 |     "\n",
 471 |     "spatial_variation = np.zeros((nr_data, nr_feats))\n",
 472 |     "for i in range(nr_feats):\n",
 473 |     "    spatial_variation[:, i] = 0.5 * (\n",
 474 |     "        np.sin(coords[:, 0] * np.pi * 2 + i)\n",
 475 |     "        + np.cos(coords[:, 1] * np.pi * 2 + i)\n",
 476 |     "    )\n",
 477 |     "\n",
 478 |     "    \n",
 479 |     "for noise_type in [\"constant\", \"heterogeneous - same\", \"heterogeneous - different\"]:\n",
 480 |     "\n",
 481 |     "    if noise_type == \"constant\":\n",
 482 |     "        noise = np.random.normal(0, noise_level, nr_data)\n",
 483 |     "    elif noise_type == \"heterogeneous - different\":\n",
 484 |     "        spatial_variation_different = noise_level * (\n",
 485 |     "            0.5\n",
 486 |     "            * (\n",
 487 |     "                synthetic_data[\"x_coord\"].values\n",
 488 |     "                + synthetic_data[\"y_coord\"].values\n",
 489 |     "            )\n",
 490 |     "            + 1\n",
 491 |     "        )\n",
 492 |     "        noise = np.random.normal(\n",
 493 |     "            0,\n",
 494 |     "            spatial_variation_different,\n",
 495 |     "            len(spatial_variation_different),\n",
 496 |     "        )\n",
 497 |     "    elif noise_type == \"heterogeneous - same\":\n",
 498 |     "        # e.g. high noise level (0.5), spatial variation is from\n",
 499 |     "        # sin and cos so it's between -1 and 1, so we make + 1\n",
 500 |     "        # so on average we multiply by 1, but varying variance\n",
 501 |     "        # between 0.5 * 0 and 0.5 * 2\n",
 502 |     "        spatially_dependent_noise = noise_level * (\n",
 503 |     "            spatial_variation[:, 0] + 1  # without locality level!\n",
 504 |     "        )\n",
 505 |     "        noise = np.random.normal(\n",
 506 |     "            0, spatially_dependent_noise, nr_data\n",
 507 |     "        )\n",
 508 |     "    else:\n",
 509 |     "        raise RuntimeError(\"Noise must be one of above\")\n",
 510 |     "\n",
 511 |     "    plt.figure(figsize=(6,4))\n",
 512 |     "    plt.scatter(coords[:, 0], coords[:, 1], c=noise, vmin=-3, vmax=3)\n",
 513 |     "    plt.colorbar()\n",
 514 |     "    # plt.title(f\"Distribution $\\epsilon$ ({noise_type})\", fontsize=15)\n",
 515 |     "    plt.axis(\"off\")\n",
 516 |     "    plt.tight_layout()\n",
 517 |     "    plt.savefig(f\"outputs/noise_{noise_type}.png\")\n",
 518 |     "    plt.show()"
 519 |    ]
 520 |   },
 521 |   {
 522 |    "cell_type": "markdown",
 523 |    "id": "bbd0f97b",
 524 |    "metadata": {},
 525 |    "source": [
 526 |     "# Synthetic experiment - load results"
 527 |    ]
 528 |   },
 529 |   {
 530 |    "cell_type": "code",
 531 |    "execution_count": null,
 532 |    "id": "f6db9914",
 533 |    "metadata": {},
 534 |    "outputs": [],
 535 |    "source": [
 536 |     "def add_discrete_noise(results):\n",
 537 |     "    results[\"noise_discrete\"] = pd.NA\n",
 538 |     "    results[\"locality_discrete\"] = pd.NA\n",
 539 |     "    results.loc[results[\"noise\"] < 0.3,  \"noise_discrete\"] = \"low\"\n",
 540 |     "    results.loc[results[\"noise\"] >= 0.3,  \"noise_discrete\"] = \"high\"\n",
 541 |     "    results.loc[results[\"locality\"] < 0.3,  \"locality_discrete\"] = \"low\"\n",
 542 |     "    results.loc[results[\"locality\"] >= 0.3,  \"locality_discrete\"] = \"high\"\n",
 543 |     "    return results"
 544 |    ]
 545 |   },
 546 |   {
 547 |    "cell_type": "code",
 548 |    "execution_count": null,
 549 |    "id": "68ae2751",
 550 |    "metadata": {},
 551 |    "outputs": [],
 552 |    "source": [
 553 |     "path = \"outputs/syn_feb_23\""
 554 |    ]
 555 |   },
 556 |   {
 557 |    "cell_type": "code",
 558 |    "execution_count": null,
 559 |    "id": "d905af50",
 560 |    "metadata": {},
 561 |    "outputs": [],
 562 |    "source": [
 563 |     "# # merge the three files\n",
 564 |     "# results = []\n",
 565 |     "# for noise in [\"uniformly_distributed\", \"heterogeneous_-_same\", \"heterogeneous_-_different\"]:\n",
 566 |     "#     results.append(pd.read_csv(os.path.join(path, \"synthetic_data_results_\"+ noise+\".csv\")))\n",
 567 |     "# results = pd.concat(results)\n",
 568 |     "# results.loc[results[\"model\"] == \"linear regression\", \"model\"] = \"OLS\"\n",
 569 |     "# results = add_discrete_noise(results)"
 570 |    ]
 571 |   },
 572 |   {
 573 |    "cell_type": "code",
 574 |    "execution_count": null,
 575 |    "id": "e87187e0",
 576 |    "metadata": {},
 577 |    "outputs": [],
 578 |    "source": [
 579 |     "noise_level_range = [0, 0.1, 0.2, 0.3, 0.4, 0.5]\n",
 580 |     "locality_range = [0, 0.1, 0.2, 0.3, 0.4, 0.5]"
 581 |    ]
 582 |   },
 583 |   {
 584 |    "cell_type": "code",
 585 |    "execution_count": null,
 586 |    "id": "5d6923fb",
 587 |    "metadata": {},
 588 |    "outputs": [],
 589 |    "source": [
 590 |     "use_function = \"non-linear 2\""
 591 |    ]
 592 |   },
 593 |   {
 594 |    "cell_type": "code",
 595 |    "execution_count": null,
 596 |    "id": "79662156",
 597 |    "metadata": {},
 598 |    "outputs": [],
 599 |    "source": [
 600 |     "# take files from the testing-directory and merge them with the linear results\n",
 601 |     "non_linear_dir = \"new_non_linear\"\n",
 602 |     "test_dir = \"multiple_nonlinear_tests\"\n",
 603 |     "\n",
 604 |     "# merge the three files\n",
 605 |     "results = []\n",
 606 |     "for noise in [\"uniformly_distributed\", \"heterogeneous_-_same\", \"heterogeneous_-_different\", \"train\"]:\n",
 607 |     "    results2 = pd.read_csv(os.path.join(path, non_linear_dir, test_dir, \"synthetic_data_results_\"+ noise+\".csv\"))\n",
 608 |     "    results1 = pd.read_csv(os.path.join(path, \"synthetic_data_results_\"+ noise+\".csv\"))\n",
 609 |     "    # only use non-linear results from file 1 and linear ones from file 2\n",
 610 |     "    results2 = results2[results2[\"data mode\"] == use_function]\n",
 611 |     "    results1 = results1[results1[\"data mode\"] == \"linear\"]\n",
 612 |     "    # concat\n",
 613 |     "    results_one = pd.concat((results1, results2))\n",
 614 |     "    # post proressing\n",
 615 |     "    results_one.loc[results_one[\"model\"] == \"linear regression\", \"model\"] = \"OLS\"\n",
 616 |     "    results_one.loc[results_one[\"data mode\"] == use_function, \"data mode\"] = \"non-linear\"\n",
 617 |     "    results_one.to_csv(os.path.join(path, non_linear_dir, \"synthetic_data_results_\"+ noise+\".csv\"))\n",
 618 |     "    if noise != \"train\":\n",
 619 |     "        results.append(results_one)\n",
 620 |     "\n",
 621 |     "results = pd.concat(results)\n",
 622 |     "# general preprocesing steps\n",
 623 |     "results = add_discrete_noise(results)\n",
 624 |     "print(len(results)//2)"
 625 |    ]
 626 |   },
 627 |   {
 628 |    "cell_type": "markdown",
 629 |    "id": "f24e27eb",
 630 |    "metadata": {},
 631 |    "source": [
 632 |     "### Main plot"
 633 |    ]
 634 |   },
 635 |   {
 636 |    "cell_type": "code",
 637 |    "execution_count": null,
 638 |    "id": "1ea6447f",
 639 |    "metadata": {},
 640 |    "outputs": [],
 641 |    "source": [
 642 |     "include_models = ['OLS', 'SLX', 'GWR', 'RF', 'RF (coordinates)', 'spatial RF', 'Kriging']\n"
 643 |    ]
 644 |   },
 645 |   {
 646 |    "cell_type": "code",
 647 |    "execution_count": null,
 648 |    "id": "05a3385e",
 649 |    "metadata": {},
 650 |    "outputs": [],
 651 |    "source": [
 652 |     "def main_plot(results, nr_data=500, noise_type=\"uniformly distributed\", save_path=\"outputs/main_plot.pdf\", score_col=\"RMSE\"):\n",
 653 |     "    include_models = [m for m in include_models if m in results[\"model\"].unique()]\n",
 654 |     "    include_function = [\"linear\", \"non-linear\"]\n",
 655 |     "    # [model for model in results[\"model\"].unique() if \"geo\" not in model]\n",
 656 |     "    nr_models = len(include_models)\n",
 657 |     "    fig = plt.figure(figsize=(15, 4))\n",
 658 |     "    #fig = plt.figure(figsize=(16, 6.5))\n",
 659 |     "    for mode_ind, mode in enumerate(include_function):\n",
 660 |     "    #     print(\"----------------\")\n",
 661 |     "        for model_ind, model in enumerate(include_models):\n",
 662 |     "    #         print(mode, \"data, --> model:\", model)\n",
 663 |     "            results_filter = results[\n",
 664 |     "                (results[\"data mode\"] == mode) & \n",
 665 |     "                (results[\"model\"] == model) & \n",
 666 |     "                (results[\"nr_data\"] == nr_data) & \n",
 667 |     "                (results[\"noise_type\"] == noise_type)\n",
 668 |     "            ]\n",
 669 |     "            results_filter.set_index([\"noise\", \"locality\"], inplace=True)\n",
 670 |     "            visualize_scores = np.zeros((len(noise_level_range), len(locality_range)))\n",
 671 |     "            for i, noise in enumerate(noise_level_range):\n",
 672 |     "                for j, locality in enumerate(locality_range):\n",
 673 |     "                    score = results_filter.loc[noise, locality][score_col].mean()\n",
 674 |     "                    visualize_scores[i, j] = score\n",
 675 |     "\n",
 676 |     "            ax1 = fig.add_subplot(len(include_function), nr_models+1, ((nr_models+1) * mode_ind) + model_ind+1)\n",
 677 |     "            imshow_plot = ax1.imshow(visualize_scores, vmin=0, vmax=0.9)\n",
 678 |     "    #         plt.axis(\"off\")\n",
 679 |     "#             if model_ind==0:\n",
 680 |     "#                 ax1.set_ylabel(\"$\\longleftarrow$ Increasing \\n noise\", fontsize=15)\n",
 681 |     "#             ax1.yaxis.set_label_position(\"right\")\n",
 682 |     "#             ax1.yaxis.tick_right()\n",
 683 |     "            plt.xticks([])\n",
 684 |     "            plt.yticks([])\n",
 685 |     "#             ax1.set_xlabel(\"$\\longrightarrow$ decreasing \\n stationarity\", fontsize=10)\n",
 686 |     "            if model_ind == 0:\n",
 687 |     "    #             ax2 = ax1.twinx()\n",
 688 |     "    #             ax2.set_ylabel(mode)\n",
 689 |     "    #             ax2.yaxis.set_label_position(\"right\")\n",
 690 |     "#                 pad = 2\n",
 691 |     "                mode_new = \"non-linear\\n(simple)  \" if mode == \"non-linear (simple)\" else mode\n",
 692 |     "                ax1.annotate(mode_new, xy=(0, 0.5), xytext=(-50, 0), # ax1.yaxis.labelpad - pad\n",
 693 |     "                    xycoords=ax1.yaxis.label, textcoords='offset points',\n",
 694 |     "                    size=18, ha='right', va='center', rotation=90, weight=\"bold\")\n",
 695 |     "            if mode_ind == 0:\n",
 696 |     "                ax1.set_title(model, weight=\"bold\", fontsize=15)\n",
 697 |     "    \n",
 698 |     "    fig.text(0.5, 0.0, \"$\\longrightarrow$ decreasing stationarity\", ha='center')\n",
 699 |     "#     fig.text(0.5, 0.36, \"$\\longrightarrow$ decreasing stationarity\", ha='center')\n",
 700 |     "#     fig.text(0.5, 0.7, \"$\\longrightarrow$ decreasing stationarity\", ha='center')\n",
 701 |     "    \n",
 702 |     "    fig.text(0.06, 0.45, \"$\\longleftarrow$ Increasing noise\", va='center', rotation='vertical')\n",
 703 |     "    # make colorbar\n",
 704 |     "    # fig.subplots_adjust(right=0.95)\n",
 705 |     "    cbar_ax = fig.add_axes([0.88, 0.05, 0.02, 0.9])\n",
 706 |     "    fig.colorbar(imshow_plot, cax=cbar_ax, label=score_col)\n",
 707 |     "    plt.tight_layout()\n",
 708 |     "    if save_path is not None:\n",
 709 |     "        plt.savefig(save_path)\n",
 710 |     "    plt.show()"
 711 |    ]
 712 |   },
 713 |   {
 714 |    "cell_type": "code",
 715 |    "execution_count": null,
 716 |    "id": "f45f6842",
 717 |    "metadata": {},
 718 |    "outputs": [],
 719 |    "source": [
 720 |     "main_plot(results, nr_data=1000, save_path =\"outputs/main_plot.pdf\")"
 721 |    ]
 722 |   },
 723 |   {
 724 |    "cell_type": "markdown",
 725 |    "id": "39700ebb",
 726 |    "metadata": {},
 727 |    "source": [
 728 |     "### Barplot - low noise, 0.3 non-stationarity, over samples\n"
 729 |    ]
 730 |   },
 731 |   {
 732 |    "cell_type": "code",
 733 |    "execution_count": null,
 734 |    "id": "5e76ad28",
 735 |    "metadata": {},
 736 |    "outputs": [],
 737 |    "source": [
 738 |     "score_col = \"RMSE\""
 739 |    ]
 740 |   },
 741 |   {
 742 |    "cell_type": "code",
 743 |    "execution_count": null,
 744 |    "id": "3ecbf08c",
 745 |    "metadata": {},
 746 |    "outputs": [],
 747 |    "source": [
 748 |     "# only look at local models\n",
 749 |     "subset = results[\n",
 750 |     "    (results[\"model\"] != \"SAR\")\n",
 751 |     "     # .isin([\"GWR\", \"RF\", \"RF (coordinates)\", \"spatial RF\", \"Kriging\"])) # \"spatial RF\",\n",
 752 |     "]"
 753 |    ]
 754 |   },
 755 |   {
 756 |    "cell_type": "code",
 757 |    "execution_count": null,
 758 |    "id": "ac28b3eb",
 759 |    "metadata": {},
 760 |    "outputs": [],
 761 |    "source": [
 762 |     "plt.rcParams.update({\"font.size\":22})"
 763 |    ]
 764 |   },
 765 |   {
 766 |    "cell_type": "code",
 767 |    "execution_count": null,
 768 |    "id": "441d1439",
 769 |    "metadata": {
 770 |     "scrolled": true
 771 |    },
 772 |    "outputs": [],
 773 |    "source": [
 774 |     "# subset.groupby([\"nr_data\", \"data mode\", \"model\", \"noise_discrete\", \"locality_discrete\"]).agg({\"R2 score\": \"mean\"})\n",
 775 |     "plt.figure(figsize=(18,6))\n",
 776 |     "counter = 1\n",
 777 |     "modes = [\"linear\", \"non-linear\"]\n",
 778 |     "for mode, save_name in zip(modes, [\"linear\", \"non_linear\"]):\n",
 779 |     "    plt.subplot(1, len(modes), counter)\n",
 780 |     "    counter += 1\n",
 781 |     "    subset_2 = subset[\n",
 782 |     "        (subset[\"data mode\"] == mode) &\n",
 783 |     "        (subset[\"noise_discrete\"] == \"low\") & \n",
 784 |     "        (subset[\"locality_discrete\"] == \"high\") & \n",
 785 |     "#         (subset[\"noise\"] == 0.1) & (subset[\"locality\"] == 0.4)&\n",
 786 |     "        (subset[\"noise_type\"] == \"uniformly distributed\")\n",
 787 |     "    ]\n",
 788 |     "    subset_2 = subset_2.groupby([\"nr_data\", \"model\"]).agg({score_col: \"mean\"})\n",
 789 |     "\n",
 790 |     "    ax = sns.barplot(data=subset_2.reset_index().set_index(\"model\").loc[include_models].reset_index(), x=\"nr_data\", y=score_col, hue=\"model\")\n",
 791 |     "    plt.ylim(0, 0.7)\n",
 792 |     "    plt.xlabel(\"Number of samples\")\n",
 793 |     "#     if mode == \"non-linear (simple)\":\n",
 794 |     "#         plt.legend(title=\"Model\", loc=\"lower right\", framealpha=1, ncol=2)\n",
 795 |     "#     else:\n",
 796 |     "    plt.legend([],[], frameon=False)\n",
 797 |     "    plt.title(mode+\" DGP\")\n",
 798 |     "    \n",
 799 |     "handles, labels = ax.get_legend_handles_labels()\n",
 800 |     "plt.tight_layout()\n",
 801 |     "plt.figlegend(handles, labels, loc = 'upper center', ncol=7, labelspacing=0., bbox_to_anchor=(0.5,1.09))\n",
 802 |     "plt.savefig(f\"outputs/barplot_main.pdf\", bbox_inches=\"tight\")\n",
 803 |     "plt.show()\n",
 804 |     "    "
 805 |    ]
 806 |   },
 807 |   {
 808 |    "cell_type": "markdown",
 809 |    "id": "f89fd4ac",
 810 |    "metadata": {},
 811 |    "source": [
 812 |     "### Noise type analysis"
 813 |    ]
 814 |   },
 815 |   {
 816 |    "cell_type": "code",
 817 |    "execution_count": null,
 818 |    "id": "28fd4877",
 819 |    "metadata": {},
 820 |    "outputs": [],
 821 |    "source": [
 822 |     "fontsize=18\n",
 823 |     "plt.rcParams.update({\"font.size\":fontsize})"
 824 |    ]
 825 |   },
 826 |   {
 827 |    "cell_type": "code",
 828 |    "execution_count": null,
 829 |    "id": "467e6faa",
 830 |    "metadata": {},
 831 |    "outputs": [],
 832 |    "source": [
 833 |     "subset = results[\n",
 834 |     "    results[\"model\"].isin([\"OLS\", \"GWR\", \"RF (coordinates)\", \"Kriging\"]) # \"spatial RF\",\n",
 835 |     "]\n",
 836 |     "subset[\"noise_type\"] = subset[\"noise_type\"].map({\n",
 837 |     "    'uniformly distributed':'uniformly distributed noise', 'heterogeneous - same': 'heterogeneous (trigonometric)',\n",
 838 |     "       'heterogeneous - different': \"heterogeneous (linear)\" \n",
 839 |     "})"
 840 |    ]
 841 |   },
 842 |   {
 843 |    "cell_type": "code",
 844 |    "execution_count": null,
 845 |    "id": "1d974285",
 846 |    "metadata": {},
 847 |    "outputs": [],
 848 |    "source": [
 849 |     "# fig = plt.figure(figsize=(12, 9))\n",
 850 |     "fig = plt.figure(figsize=(13, 4))\n",
 851 |     "counter = 1\n",
 852 |     "for i, mode in enumerate([\"linear\", \"non-linear\"]): #  \"non-linear (simple)\" # linear\", \n",
 853 |     "    for j, model in enumerate([\"GWR\", \"Kriging\"]):\n",
 854 |     "        subset2 = subset[\n",
 855 |     "                (subset[\"model\"] == model) &\n",
 856 |     "                (subset[\"data mode\"] == mode) &\n",
 857 |     "        #         (subset[\"noise_discrete\"] == \"low\") & \n",
 858 |     "                (subset[\"locality_discrete\"] == \"high\") & \n",
 859 |     "        #         (subset[\"noise\"] == 0.3) & \n",
 860 |     "#                 (subset[\"locality\"] == 0.4) & \n",
 861 |     "        #         (subset[\"noise_type\"] == \"constant\") &\n",
 862 |     "                (subset[\"nr_data\"] == 500)\n",
 863 |     "        ]\n",
 864 |     "#         subset2[\"noise_type\"] = subset2[\"noise_type\"] + \" noise\"\n",
 865 |     "        ax = fig.add_subplot(1, 4, counter)\n",
 866 |     "        sns.lineplot(ax=ax, data=subset2.reset_index(), x =\"noise\", y=\"RMSE\", hue=\"noise_type\")\n",
 867 |     "#         if counter == 1:\n",
 868 |     "#             plt.legend(title=\"Noise (spatial distribution)\")# , loc=(1, 1))\n",
 869 |     "#         else:\n",
 870 |     "        plt.legend([], [], frameon=False)\n",
 871 |     "        plt.xlabel(r\"Noise level $\\sigma$\")\n",
 872 |     "    \n",
 873 |     "        plt.title(f\"{model}\\n{mode} DGP\", fontsize=fontsize)\n",
 874 |     "        if counter > 1:\n",
 875 |     "            plt.ylabel(\"\")\n",
 876 |     "            plt.yticks([], [])\n",
 877 |     "#         if j == 0:\n",
 878 |     "#             ax.annotate(mode, xy=(0, 0.5), xytext=(-20, 0), # ax1.yaxis.labelpad - pad\n",
 879 |     "#                     xycoords=ax.yaxis.label, textcoords='offset points',\n",
 880 |     "#                     size='large', ha='right', va='center', rotation=90, weight=\"bold\")\n",
 881 |     "#         ax.set_xlabel(\"Noise level ($\\sigma$)\")\n",
 882 |     "        ax.set_ylim(0, 0.8)\n",
 883 |     "#         if counter in [2, 4]:\n",
 884 |     "#             plt.ylabel(\"\")\n",
 885 |     "#         if counter in [1, 2]:\n",
 886 |     "#             plt.title(model, weight=\"bold\", fontsize=16)\n",
 887 |     "        counter += 1\n",
 888 |     "\n",
 889 |     "handles, labels = ax.get_legend_handles_labels()\n",
 890 |     "plt.tight_layout()\n",
 891 |     "plt.figlegend(handles, labels, loc = 'upper center', ncol=3, labelspacing=0., bbox_to_anchor=(0.5,1.09))\n",
 892 |     "plt.savefig(\"outputs/noise_analysis.pdf\", bbox_inches=\"tight\")\n",
 893 |     "plt.show()"
 894 |    ]
 895 |   },
 896 |   {
 897 |    "cell_type": "markdown",
 898 |    "id": "5e970481",
 899 |    "metadata": {},
 900 |    "source": [
 901 |     "### Runtime"
 902 |    ]
 903 |   },
 904 |   {
 905 |    "cell_type": "code",
 906 |    "execution_count": null,
 907 |    "id": "acd774aa",
 908 |    "metadata": {},
 909 |    "outputs": [],
 910 |    "source": [
 911 |     "time_plot_data = results.groupby([\"model\", \"nr_data\"])[\"time\"].mean()\n",
 912 |     "time_plot_data = time_plot_data.loc[['OLS', 'SLX', 'GWR', 'RF', 'RF (coordinates)', 'spatial RF', 'Kriging']]"
 913 |    ]
 914 |   },
 915 |   {
 916 |    "cell_type": "code",
 917 |    "execution_count": null,
 918 |    "id": "bd508470",
 919 |    "metadata": {},
 920 |    "outputs": [],
 921 |    "source": [
 922 |     "plt.figure(figsize=(10,4))\n",
 923 |     "sns.barplot(data=time_plot_data.reset_index(), x=\"nr_data\", y=\"time\", hue=\"model\")\n",
 924 |     "plt.yscale(\"log\")\n",
 925 |     "plt.ylabel(\"Runtime (seconds)\")\n",
 926 |     "plt.xlabel(\"Number of samples\")\n",
 927 |     "plt.legend(ncol=3, loc=\"upper left\")\n",
 928 |     "plt.ylim(0, 800)\n",
 929 |     "plt.tight_layout()\n",
 930 |     "plt.savefig(\"outputs/runtime_plot.pdf\")\n",
 931 |     "plt.show()"
 932 |    ]
 933 |   },
 934 |   {
 935 |    "cell_type": "markdown",
 936 |    "id": "de3f6fc3",
 937 |    "metadata": {},
 938 |    "source": [
 939 |     "### Train vs test score"
 940 |    ]
 941 |   },
 942 |   {
 943 |    "cell_type": "code",
 944 |    "execution_count": null,
 945 |    "id": "831a9c34",
 946 |    "metadata": {},
 947 |    "outputs": [],
 948 |    "source": [
 949 |     "train_results = pd.read_csv(os.path.join(path, 'new_non_linear', \"synthetic_data_results_train.csv\"))\n",
 950 |     "train_results.loc[train_results[\"model\"] == \"linear regression\", \"model\"] = \"OLS\"\n",
 951 |     "test_results = results[results[\"noise_type\"] == \"uniformly distributed\"]# .drop(\"Unnamed: 0\", axis=1)\n",
 952 |     "train_results[\"evaluation\"] = \"training error\"\n",
 953 |     "test_results[\"evaluation\"] = \"testing error\"\n",
 954 |     "print(len(train_results), len(test_results))\n",
 955 |     "traintest = pd.concat((train_results, test_results))\n",
 956 |     "# traintest[\"R2 score\"] = traintest[\"R2 score\"].clip(-5, 1)"
 957 |    ]
 958 |   },
 959 |   {
 960 |    "cell_type": "code",
 961 |    "execution_count": null,
 962 |    "id": "05d12297",
 963 |    "metadata": {},
 964 |    "outputs": [],
 965 |    "source": [
 966 |     "fig = plt.figure(figsize=(15, 4)) # 10\n",
 967 |     "subset = traintest[\n",
 968 |     "    (traintest[\"model\"].isin([\"SLX\", \"GWR\"])) # & # \"spatial RF\",\n",
 969 |     "    & (traintest[\"nr_data\"] == 1000)\n",
 970 |     "].sort_values(\"model\", ascending=False)\n",
 971 |     "counter = 1\n",
 972 |     "modes_considered = [\"linear\"] # [\"linear\", \"non-linear\"]\n",
 973 |     "signal_to_noise_modes = [\n",
 974 |     "        \"Weak non-stationarity ($\\lambda$) \\n and weak noise ($\\sigma$)\",\n",
 975 |     "        \"Strong non-stationarity ($\\lambda$) \\n and weak noise ($\\sigma$)\",\n",
 976 |     "        \"Weak non-stationarity ($\\lambda$) \\n and strong noise ($\\sigma$)\",\n",
 977 |     "        \"Strong non-stationarity ($\\lambda$) \\n and strong noise ($\\sigma$)\",\n",
 978 |     "]\n",
 979 |     "for i, mode in enumerate(modes_considered): #  \"non-linear (simple)\" # linear\", \n",
 980 |     "    for j, greatersmaller in enumerate(signal_to_noise_modes):\n",
 981 |     "        #\"Low $\\lambda$ and low $\\sigma$\",\n",
 982 |     "         #                               \"High $\\lambda$ and low $\\sigma$\", \n",
 983 |     "          #                              \"Low $\\lambda$ and high $\\sigma$\"]):\n",
 984 |     "        if greatersmaller == \"Weak non-stationarity ($\\lambda$) \\n and weak noise ($\\sigma$)\":\n",
 985 |     "            subset2 = subset[\n",
 986 |     "                (subset[\"noise\"] < 0.4) & \n",
 987 |     "                (subset[\"locality\"] < 0.4)\n",
 988 |     "            ]\n",
 989 |     "        elif greatersmaller == \"Strong non-stationarity ($\\lambda$) \\n and weak noise ($\\sigma$)\":\n",
 990 |     "            subset2 = subset[\n",
 991 |     "                (subset[\"noise\"] < 0.4) & \n",
 992 |     "                (subset[\"locality\"] >= 0.4)\n",
 993 |     "            ]\n",
 994 |     "        elif greatersmaller == \"Strong non-stationarity ($\\lambda$) \\n and strong noise ($\\sigma$)\":\n",
 995 |     "            subset2 = subset[\n",
 996 |     "                (subset[\"noise\"] >= 0.4) & \n",
 997 |     "                (subset[\"locality\"] >= 0.4)\n",
 998 |     "            ]\n",
 999 |     "        elif greatersmaller == \"Weak non-stationarity ($\\lambda$) \\n and strong noise ($\\sigma$)\":\n",
1000 |     "            subset2 = subset[\n",
1001 |     "                (subset[\"noise\"] >= 0.4) & \n",
1002 |     "                (subset[\"locality\"] < 0.4)\n",
1003 |     "            ]\n",
1004 |     "        subset2 = subset2[subset2[\"data mode\"] == mode]\n",
1005 |     "        \n",
1006 |     "        if i==0 and j==1: \n",
1007 |     "            print(subset2.groupby([\"model\", \"evaluation\"]).agg({\"RMSE\": \"mean\"}))\n",
1008 |     "        ax = fig.add_subplot(len(modes_considered), len(signal_to_noise_modes), counter)\n",
1009 |     "        sns.barplot(ax=ax, data=subset2, x=\"model\", y=\"RMSE\", hue=\"evaluation\")\n",
1010 |     "#         if counter == 2:\n",
1011 |     "#             plt.legend(title=\"Evaluation data\", ncol=1, framealpha=1, loc=\"lower center\")\n",
1012 |     "#         else:\n",
1013 |     "        ymax = 0.59\n",
1014 |     "        plt.legend([], [], frameon=False)\n",
1015 |     "        if j == 0:\n",
1016 |     "            ax.annotate(mode, xy=(0, 0.5), xytext=(-20, 0), # ax1.yaxis.labelpad - pad\n",
1017 |     "                    xycoords=ax.yaxis.label, textcoords='offset points',\n",
1018 |     "                    size=19, ha='right', va='center', rotation=90, weight=\"bold\")\n",
1019 |     "        if i == len(modes_considered)-2:\n",
1020 |     "            plt.xticks([], [])\n",
1021 |     "        if j > 0:\n",
1022 |     "            plt.yticks([], [])\n",
1023 |     "        else:\n",
1024 |     "            plt.yticks(np.arange(0, ymax, 0.1), np.around(np.arange(0, ymax, 0.1), 1))\n",
1025 |     "#         ax.set_xlabel(\"Noise level ($\\sigma$)\")\n",
1026 |     "        ax.set_ylim(0, ymax)\n",
1027 |     "        ax.set_xlabel(\"\")\n",
1028 |     "        if counter in [2, 3, 4]:\n",
1029 |     "            plt.ylabel(\"\")\n",
1030 |     "        if counter <= len(signal_to_noise_modes):\n",
1031 |     "            plt.title(greatersmaller, weight=\"bold\", fontsize=15)\n",
1032 |     "        counter += 1\n",
1033 |     "plt.tight_layout()\n",
1034 |     "\n",
1035 |     "handles, labels = ax.get_legend_handles_labels()\n",
1036 |     "plt.tight_layout()\n",
1037 |     "plt.figlegend(handles, labels, loc = 'upper center', ncol=2, labelspacing=0., bbox_to_anchor=(0.5,1.1))\n",
1038 |     "plt.savefig(\"outputs/train_analysis.pdf\", bbox_inches=\"tight\")\n",
1039 |     "plt.show()"
1040 |    ]
1041 |   },
1042 |   {
1043 |    "cell_type": "code",
1044 |    "execution_count": null,
1045 |    "id": "592a99ca",
1046 |    "metadata": {},
1047 |    "outputs": [],
1048 |    "source": [
1049 |     " 0.198287 / 0.153864, 0.22/0.189239, 0.195645 / 0.149066"
1050 |    ]
1051 |   },
1052 |   {
1053 |    "cell_type": "markdown",
1054 |    "id": "a027c8f8",
1055 |    "metadata": {},
1056 |    "source": [
1057 |     "### Geo rf vs spatial rf (with old results)"
1058 |    ]
1059 |   },
1060 |   {
1061 |    "cell_type": "code",
1062 |    "execution_count": null,
1063 |    "id": "3bfd2ea4",
1064 |    "metadata": {},
1065 |    "outputs": [],
1066 |    "source": [
1067 |     "res_old = pd.read_csv(\"outputs/synthetic_results_nov_22/synthetic_results.csv\")\n",
1068 |     "res_old = res_old[res_old[\"model\"].isin(['geographical RF', 'spatial RF']) & (res_old[\"nr_data\"] < 1000)]"
1069 |    ]
1070 |   },
1071 |   {
1072 |    "cell_type": "code",
1073 |    "execution_count": null,
1074 |    "id": "cb1177f1",
1075 |    "metadata": {},
1076 |    "outputs": [],
1077 |    "source": [
1078 |     "spatial_rf_better = res_old.set_index(\"model\")\n",
1079 |     "print(\"Spatial RF has better R2 score in percent cases:\",\n",
1080 |     "    sum(spatial_rf_better.loc[\"spatial RF\"][\"R2 score\"].values >= spatial_rf_better.loc[\"geographical RF\"][\"R2 score\"].values) / (len(spatial_rf_better) / 2)\n",
1081 |     "     )"
1082 |    ]
1083 |   },
1084 |   {
1085 |    "cell_type": "code",
1086 |    "execution_count": null,
1087 |    "id": "8120f832",
1088 |    "metadata": {},
1089 |    "outputs": [],
1090 |    "source": [
1091 |     "# # First try: model on x axis and function mode on y axis\n",
1092 |     "# plt.figure(figsize=(10, 4))\n",
1093 |     "# res_old.loc[res_old[\"R2 score\"] < 0, \"R2 score\"] = 0\n",
1094 |     "# sns.barplot(data=res_old, x=\"model\", y=\"R2 score\", hue=\"data mode\")\n",
1095 |     "# plt.legend(ncol=3, fontsize=17, loc=\"upper center\")\n",
1096 |     "# plt.ylim(0, 1)\n",
1097 |     "\n",
1098 |     "plt.rcParams.update({\"font.size\":18})\n",
1099 |     "plt.figure(figsize=(7, 4.4))\n",
1100 |     "res_old.loc[res_old[\"R2 score\"] < 0, \"R2 score\"] = 0\n",
1101 |     "sns.barplot(data=res_old, x=\"data mode\", y=\"R2 score\", hue=\"model\")\n",
1102 |     "plt.legend(ncol=2, fontsize=18, loc=\"upper center\") # , title=\"Model\")\n",
1103 |     "plt.xlabel(\"Function mode\")\n",
1104 |     "plt.ylim(0, 1)\n",
1105 |     "plt.tight_layout()\n",
1106 |     "plt.savefig(\"outputs/geo_vs_spatial_rf.pdf\")\n",
1107 |     "plt.show()"
1108 |    ]
1109 |   },
1110 |   {
1111 |    "cell_type": "markdown",
1112 |    "id": "8259dff5",
1113 |    "metadata": {},
1114 |    "source": [
1115 |     "## GWR coefficient analysis"
1116 |    ]
1117 |   },
1118 |   {
1119 |    "cell_type": "code",
1120 |    "execution_count": null,
1121 |    "id": "248253fc",
1122 |    "metadata": {},
1123 |    "outputs": [],
1124 |    "source": [
1125 |     "import os\n",
1126 |     "import numpy as np\n",
1127 |     "import matplotlib.pyplot as plt\n",
1128 |     "import pandas as pd\n",
1129 |     "import time\n",
1130 |     "import scipy\n",
1131 |     "import warnings\n",
1132 |     "\n",
1133 |     "from sklearn.metrics import r2_score\n",
1134 |     "from mgwr.gwr import GWR\n",
1135 |     "from mgwr.sel_bw import Sel_BW\n",
1136 |     "\n",
1137 |     "np.random.seed(42)\n",
1138 |     "\n",
1139 |     "\n",
1140 |     "\n",
1141 |     "def non_linear_function_simple(feat_arr, weights):\n",
1142 |     "    if len(weights.shape) == 1:\n",
1143 |     "        weights = np.expand_dims(weights, 0)\n",
1144 |     "    function_zoo = [\n",
1145 |     "        np.sin,\n",
1146 |     "        np.exp,\n",
1147 |     "        lambda x: x ** 2,\n",
1148 |     "        lambda x: x,\n",
1149 |     "        np.cos,\n",
1150 |     "        lambda x: np.log(x ** 2),\n",
1151 |     "    ]\n",
1152 |     "    feature_transformed = np.zeros(feat_arr.shape)\n",
1153 |     "    for i in range(feat_arr.shape[1]):\n",
1154 |     "        feature_transformed[:, i] = (\n",
1155 |     "            function_zoo[i](feat_arr[:, i]) * weights[:, i]\n",
1156 |     "        )\n",
1157 |     "    return np.sum(feature_transformed, axis=1)\n",
1158 |     "\n",
1159 |     "nr_feats = 5\n",
1160 |     "max_depth = 30\n",
1161 |     "noise_type = \"constant\"\n",
1162 |     "\n",
1163 |     "locality = 0.3\n",
1164 |     "\n",
1165 |     "weights = np.array([-0.95, 0.38, 0.66, -0.43, 0.22])\n",
1166 |     "\n",
1167 |     "nr_data = 1000\n",
1168 |     "\n",
1169 |     "# MAKE MAIN DATA\n",
1170 |     "train_cutoff = int(nr_data * 0.9)\n",
1171 |     "feat_cols = [\"feat_\" + str(i) for i in range(nr_feats)]\n",
1172 |     "synthetic_data = pd.DataFrame(\n",
1173 |     "    np.random.rand(nr_data, 2 + nr_feats) * 2 - 1,\n",
1174 |     "    columns=[\"x_coord\", \"y_coord\"] + feat_cols,\n",
1175 |     ")\n",
1176 |     "\n",
1177 |     "# simulate spatial variation of features (varying per weight)\n",
1178 |     "spatial_variation = np.zeros((nr_data, nr_feats))\n",
1179 |     "for i in range(nr_feats):\n",
1180 |     "    spatial_variation[:, i] = 0.5 * (\n",
1181 |     "        np.sin(synthetic_data[\"x_coord\"].values * np.pi * 2 + i)\n",
1182 |     "        + np.cos(synthetic_data[\"y_coord\"].values * np.pi * 2 + i)\n",
1183 |     "    )\n",
1184 |     "                \n",
1185 |     "spatially_dependent_weights = weights + locality * spatial_variation\n",
1186 |     "\n",
1187 |     "synthetic_data[\"label\"] = non_linear_function_simple(\n",
1188 |     "                        synthetic_data[feat_cols].values,\n",
1189 |     "                        spatially_dependent_weights,\n",
1190 |     "                    )\n",
1191 |     "\n",
1192 |     "param_arr = [spatially_dependent_weights[:train_cutoff, 0]]\n",
1193 |     "for noise_level in [0, 0.3, 0.5]:\n",
1194 |     "    noise = np.random.normal(0, noise_level, nr_data)\n",
1195 |     "    synthetic_data[\"label\"] = synthetic_data[\"label\"] + noise\n",
1196 |     "\n",
1197 |     "    train_data, test_data = (\n",
1198 |     "                        synthetic_data[:train_cutoff],\n",
1199 |     "                        synthetic_data[train_cutoff:],\n",
1200 |     "                    )\n",
1201 |     "\n",
1202 |     "    train_coords = np.array(train_data[[\"x_coord\", \"y_coord\"]])\n",
1203 |     "    train_y = np.expand_dims(train_data[\"label\"].values, 1)\n",
1204 |     "    train_x = np.array(train_data[feat_cols])\n",
1205 |     "    # bandwidth selection\n",
1206 |     "    # import pickle\n",
1207 |     "    gwr_selector = Sel_BW(\n",
1208 |     "        train_coords, train_y, train_x, fixed=True, kernel=\"exponential\"\n",
1209 |     "    )\n",
1210 |     "    gwr_bw = gwr_selector.search(criterion=\"AICc\")\n",
1211 |     "    # create and train model\n",
1212 |     "    gwr_model = GWR(\n",
1213 |     "        train_coords,\n",
1214 |     "        train_y,\n",
1215 |     "        train_x,\n",
1216 |     "        gwr_bw,\n",
1217 |     "        kernel=\"exponential\",\n",
1218 |     "        fixed=True,\n",
1219 |     "    )\n",
1220 |     "    gwr_results = gwr_model.fit()\n",
1221 |     "\n",
1222 |     "    test_coords = np.array(test_data[[\"x_coord\", \"y_coord\"]])\n",
1223 |     "    test_x = np.array(test_data[feat_cols])\n",
1224 |     "    # predict\n",
1225 |     "    test_pred = gwr_model.predict(\n",
1226 |     "        test_coords, test_x, gwr_results.scale, gwr_results.resid_response\n",
1227 |     "    ).predictions\n",
1228 |     "\n",
1229 |     "\n",
1230 |     "    score = r2_score(test_pred, test_data[\"label\"])\n",
1231 |     "    print(score)\n",
1232 |     "    param_arr.append(gwr_results.params[:, 1])"
1233 |    ]
1234 |   },
1235 |   {
1236 |    "cell_type": "code",
1237 |    "execution_count": null,
1238 |    "id": "583a60ec",
1239 |    "metadata": {},
1240 |    "outputs": [],
1241 |    "source": [
1242 |     "plt.figure(figsize=(16,4))\n",
1243 |     "names = [r\"Real $\\beta_1$\", r\"GWR $\\beta_1$ ($\\sigma=0$)\", r\"GWR $\\beta_1$ ($\\sigma=0.3$)\",r\"GWR $\\beta_1$ ($\\sigma=0.5$)\"]\n",
1244 |     "for i in range(4):\n",
1245 |     "    plt.subplot(1,4,i+1)\n",
1246 |     "    plt.scatter(train_data[\"x_coord\"], train_data[\"y_coord\"], c=param_arr[i])\n",
1247 |     "    plt.title(names[i], fontsize=18)\n",
1248 |     "    plt.axis(\"off\")\n",
1249 |     "plt.tight_layout()\n",
1250 |     "plt.savefig(\"outputs/gwr_beta_comparison.png\")\n",
1251 |     "plt.show()"
1252 |    ]
1253 |   },
1254 |   {
1255 |    "cell_type": "code",
1256 |    "execution_count": null,
1257 |    "id": "45fb53a2",
1258 |    "metadata": {},
1259 |    "outputs": [],
1260 |    "source": []
1261 |   }
1262 |  ],
1263 |  "metadata": {
1264 |   "kernelspec": {
1265 |    "display_name": "Python 3 (ipykernel)",
1266 |    "language": "python",
1267 |    "name": "python3"
1268 |   },
1269 |   "language_info": {
1270 |    "codemirror_mode": {
1271 |     "name": "ipython",
1272 |     "version": 3
1273 |    },
1274 |    "file_extension": ".py",
1275 |    "mimetype": "text/x-python",
1276 |    "name": "python",
1277 |    "nbconvert_exporter": "python",
1278 |    "pygments_lexer": "ipython3",
1279 |    "version": "3.9.5"
1280 |   }
1281 |  },
1282 |  "nbformat": 4,
1283 |  "nbformat_minor": 5
1284 | }
1285 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | matplotlib
4 | pandas
5 | scikit-learn


--------------------------------------------------------------------------------
/scripts/benchmarks.py:
--------------------------------------------------------------------------------
  1 | # Standard and GIS Modules
  2 | import os
  3 | import sys
  4 | import numpy as np
  5 | import pandas as pd
  6 | import time
  7 | 
  8 | # ignore linalg warnings from MGWR package
  9 | import warnings
 10 | 
 11 | warnings.filterwarnings("ignore")
 12 | 
 13 | # gwr:
 14 | from mgwr.gwr import GWR
 15 | from mgwr.sel_bw import Sel_BW
 16 | 
 17 | from sklearn.ensemble import RandomForestRegressor
 18 | from sklearn.linear_model import LinearRegression
 19 | from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
 20 | from sprf.spatial_random_forest import SpatialRandomForest
 21 | from sprf.geographical_random_forest import GeographicalRandomForest
 22 | 
 23 | from models import *
 24 | 
 25 | 
 26 | def get_folds(nr_samples, nr_folds=10):
 27 |     fold_inds = np.random.permutation(nr_samples)
 28 |     num_per_fold = nr_samples // nr_folds
 29 |     train_inds, test_inds = [], []
 30 |     for i in range(nr_folds):
 31 |         # print("start, end", i*num_per_fold)
 32 |         if i < nr_folds - 1:
 33 |             test_inds_fold = np.arange(
 34 |                 i * num_per_fold, (i + 1) * num_per_fold, 1
 35 |             )
 36 |         else:
 37 |             test_inds_fold = np.arange(i * num_per_fold, nr_samples)
 38 |         test_inds.append(fold_inds[test_inds_fold])
 39 |         train_inds.append(np.delete(fold_inds, test_inds_fold))
 40 |     return train_inds, test_inds
 41 | 
 42 | 
 43 | def prepare_data(data, target, lon="x", lat="y"):
 44 |     """Assumes that all other columns are used as covariates"""
 45 |     # covariates = [col for col in data.columns if col not in [lon, lat, target]]
 46 |     # return data[covariates], data[target], data[[lon, lat]]
 47 |     return data.rename(
 48 |         columns={target: "label", lon: "x_coord", lat: "y_coord"}
 49 |     )
 50 | 
 51 | 
 52 | def add_metrics(test_pred, test_y, res_dict_init, method, runtime):
 53 |     res_dict = res_dict_init.copy()
 54 |     res_dict["Method"] = method
 55 |     res_dict["RMSE"] = mean_squared_error(test_pred, test_y, squared=False)
 56 |     res_dict["MAE"] = mean_absolute_error(test_pred, test_y)
 57 |     res_dict["R-Squared"] = r2_score(test_y, test_pred)
 58 |     res_dict["Runtime"] = runtime
 59 |     return res_dict
 60 | 
 61 | 
 62 | def cross_validation(data):
 63 |     nr_folds = 5
 64 |     train_inds, test_inds = get_folds(len(data), nr_folds=nr_folds)
 65 |     res_df = []
 66 | 
 67 |     # dataset specific information
 68 |     target = dataset_target[DATASET]
 69 |     x_coord_name = dataset_x.get(DATASET, "x")
 70 |     y_coord_name = dataset_y.get(DATASET, "y")
 71 | 
 72 |     # model params --> TODO: grid search
 73 |     max_depth = 10
 74 |     spatial_neighbors = len(data) // 5  # one fifth of the dataset
 75 |     print("Number of neighbors considered for spatial RF:", spatial_neighbors)
 76 | 
 77 |     data_renamed = prepare_data(data.copy(), target, x_coord_name, y_coord_name)
 78 | 
 79 |     for fold in range(nr_folds):
 80 |         res_dict_init = {"fold": fold, "max_depth": max_depth}
 81 |         train_data = data_renamed.iloc[train_inds[fold]]
 82 |         test_data = data_renamed.iloc[test_inds[fold]]
 83 |         feat_cols = [
 84 |             col
 85 |             for col in train_data.columns
 86 |             if "coord" not in col and col != "label"
 87 |         ]
 88 |         # print(
 89 |         #     train_x.shape, train_y.shape, train_coords.shape, test_x.shape,
 90 |         #     test_y.shape, test_coords.shape
 91 |         # )
 92 |         for model_function, name in zip(model_function_names, model_names):
 93 |             tic = time.time()
 94 |             test_pred = model_function(
 95 |                 train_data.copy(), test_data.copy(), feat_cols=feat_cols,
 96 |             )
 97 |             runtime = time.time() - tic
 98 |             res_df.append(
 99 |                 add_metrics(
100 |                     test_pred, test_data["label"], res_dict_init, name, runtime,
101 |                 )
102 |             )
103 |             print(name, res_df[-1]["R-Squared"])
104 | 
105 |     # Finalize results
106 |     res_df = pd.DataFrame(res_df)
107 |     return res_df
108 | 
109 | 
110 | os.makedirs("outputs", exist_ok=True)
111 | 
112 | dataset_target = {
113 |     "plants": "richness_species_vascular",
114 |     "meuse": "zinc",
115 |     "atlantic": "Rate",
116 |     "deforestation": "deforestation_quantile",
117 |     "california_housing": "median_house_value",
118 | }
119 | 
120 | model_function_names = [
121 |     linear_regression,
122 |     rf_coordinates,
123 |     rf_global,
124 |     rf_spatial,
125 |     my_gwr,
126 |     kriging,
127 |     sarm,
128 |     slx
129 |     # rf_geographical,
130 | ]
131 | model_names = [
132 |     "linear regression",
133 |     "RF (coordinates)",
134 |     "RF",
135 |     "spatial RF",
136 |     "GWR",
137 |     "Kriging",
138 |     "SAR",
139 |     "SLX"
140 |     # "geographical RF",
141 | ]
142 | 
143 | datasets = [
144 |     "meuse",
145 |     "plants",
146 |     "atlantic",
147 |     "deforestation",
148 |     "california_housing",
149 | ]
150 | 
151 | np.random.seed(42)
152 | 
153 | for DATASET in datasets:
154 |     print("\nDATASET", DATASET, "\n")
155 | 
156 |     dataset_x = {}  # per default: x
157 |     dataset_y = {}  # per default: y
158 |     data_path = os.path.join("data", DATASET + ".csv")
159 | 
160 |     data = pd.read_csv(data_path)
161 |     print("Number of samples", len(data))
162 | 
163 |     results = cross_validation(data)
164 |     results.to_csv(
165 |         os.path.join("outputs", f"results_{DATASET}_folds.csv"), index=False
166 |     )
167 | 
168 |     results_grouped = (
169 |         results.groupby(["Method"])
170 |         .mean()
171 |         .drop(["fold", "max_depth"], axis=1)
172 |         .sort_values("RMSE")
173 |     )
174 |     results_grouped.to_csv(os.path.join("outputs", f"results_{DATASET}.csv"))
175 | 
176 |     print(results_grouped)
177 |     print("--------------")
178 | 


--------------------------------------------------------------------------------
/scripts/models.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from sklearn.ensemble import RandomForestRegressor
  4 | from sklearn.linear_model import LinearRegression
  5 | from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
  6 | from sprf.spatial_random_forest import SpatialRandomForest
  7 | from sprf.geographical_random_forest import GeographicalRandomForest
  8 | from scipy.spatial import distance_matrix
  9 | from mgwr.gwr import GWR
 10 | from mgwr.sel_bw import Sel_BW
 11 | from pykrige.rk import RegressionKriging
 12 | import spreg
 13 | import libpysal
 14 | 
 15 | 
 16 | def linear_regression(train_data, test_data, feat_cols=[], **kwargs):
 17 |     regr = LinearRegression()
 18 |     regr.fit(train_data[feat_cols], train_data["label"])
 19 |     test_pred = regr.predict(test_data[feat_cols])
 20 |     return test_pred
 21 | 
 22 | 
 23 | def rf_coordinates(
 24 |     train_data, test_data, feat_cols=[], max_depth=30, n_estim=100, **kwargs
 25 | ):
 26 |     regr = RandomForestRegressor(n_estimators=n_estim, max_depth=max_depth)
 27 |     regr.fit(
 28 |         train_data[["x_coord", "y_coord"] + feat_cols], train_data["label"]
 29 |     )
 30 |     test_pred = regr.predict(test_data[["x_coord", "y_coord"] + feat_cols])
 31 |     return test_pred
 32 | 
 33 | 
 34 | def rf_global(
 35 |     train_data, test_data, feat_cols=[], max_depth=30, n_estim=100, **kwargs
 36 | ):
 37 |     regr = RandomForestRegressor(n_estimators=n_estim, max_depth=max_depth)
 38 |     regr.fit(train_data[feat_cols], train_data["label"])
 39 |     test_pred = regr.predict(test_data[feat_cols])
 40 |     return test_pred
 41 | 
 42 | 
 43 | def rf_spatial(
 44 |     train_data, test_data, feat_cols=[], max_depth=30, nr_data=500, **kwargs
 45 | ):
 46 |     n_estim = 100 if nr_data > 200 else 50
 47 |     regr = SpatialRandomForest(
 48 |         n_estimators=n_estim, neighbors=500, max_depth=max_depth
 49 |     )
 50 |     regr.tune_neighbors(
 51 |         train_data[feat_cols],
 52 |         train_data["label"],
 53 |         train_data[["x_coord", "y_coord"]],
 54 |     )
 55 |     print("spatial rf tuned:", regr.neighbors)
 56 |     regr.fit(
 57 |         train_data[feat_cols],
 58 |         train_data["label"],
 59 |         train_data[["x_coord", "y_coord"]],
 60 |     )
 61 |     test_pred = regr.predict(
 62 |         test_data[feat_cols], test_data[["x_coord", "y_coord"]]
 63 |     )
 64 |     return test_pred
 65 | 
 66 | 
 67 | def rf_geographical(
 68 |     train_data, test_data, feat_cols=[], max_depth=30, **kwargs
 69 | ):
 70 |     n_estim = 20  # lower number of estimators to reduce runtime
 71 |     regr = GeographicalRandomForest(
 72 |         n_estimators=n_estim, neighbors=500, max_depth=max_depth
 73 |     )
 74 |     regr.tune_neighbors(
 75 |         train_data[feat_cols],
 76 |         train_data["label"],
 77 |         train_data[["x_coord", "y_coord"]],
 78 |     )
 79 |     print("geo rf tuned:", regr.neighbors)
 80 |     regr.fit(
 81 |         train_data[feat_cols],
 82 |         train_data["label"],
 83 |         train_data[["x_coord", "y_coord"]],
 84 |     )
 85 |     test_pred = regr.predict(
 86 |         test_data[feat_cols], test_data[["x_coord", "y_coord"]]
 87 |     )
 88 |     return test_pred
 89 | 
 90 | 
 91 | def kriging(
 92 |     train_data, test_data, feat_cols=[], max_depth=30, n_estim=100, **kwargs
 93 | ):
 94 |     krig = RegressionKriging(
 95 |         RandomForestRegressor(n_estimators=n_estim, max_depth=max_depth),
 96 |         verbose=False,
 97 |     )
 98 |     krig.fit(
 99 |         train_data[feat_cols].values,
100 |         train_data[["x_coord", "y_coord"]].values,
101 |         train_data["label"].values,
102 |     )
103 |     test_pred = krig.predict(
104 |         test_data[feat_cols].values,
105 |         test_data[["x_coord", "y_coord"]].values.astype(float),
106 |     )
107 |     return test_pred
108 | 
109 | 
110 | def get_weights_as_array(points, max_points):
111 |     dist_matrix = distance_matrix(points, points)
112 |     my_w = 1 / dist_matrix
113 |     my_w[my_w == np.inf] = 0
114 |     sorted_vals_points = np.sort(my_w, axis=1)[:, -max_points]
115 |     my_w[my_w < np.expand_dims(sorted_vals_points, 1)] = 0
116 |     my_w = my_w / np.expand_dims(np.sum(my_w, axis=1), 1)
117 |     return my_w
118 | 
119 | 
120 | def morans_i(y, w):
121 |     sum_numerator = 0
122 |     sum_denominator = 0
123 |     normed_y = y - np.mean(y)
124 |     for i in range(len(w)):
125 |         for j in range(len(w)):
126 |             sum_numerator += w[i, j] * normed_y[i] * normed_y[j]
127 |         sum_denominator += normed_y[i] ** 2
128 |     return (len(y) / np.sum(w)) * (sum_numerator / sum_denominator)
129 | 
130 | 
131 | def slx(train_data, test_data, w_cutoff=20, feat_cols=[], **kwargs):
132 |     divide_test = len(train_data)
133 |     together = pd.concat((train_data, test_data))
134 |     w = get_weights_as_array(together[["x_coord", "y_coord"]].values, w_cutoff)
135 |     X = together[feat_cols].values
136 |     lagged_X = np.matmul(w, X)
137 |     X_with_lag = np.hstack((X, lagged_X))
138 | 
139 |     regr = LinearRegression()
140 |     # fit training with lagged X
141 |     regr.fit(X_with_lag[:divide_test], train_data["label"].values)
142 |     # predict test part
143 |     test_pred = regr.predict(X_with_lag[divide_test:])
144 |     return test_pred
145 | 
146 | 
147 | def sarm(train_data, test_data, feat_cols=[], **kwargs):
148 |     X = train_data[feat_cols].values
149 |     Y = train_data["label"].values
150 |     try:
151 |         dist_with_next = (
152 |             train_data[["x_coord", "y_coord"]]
153 |             - train_data[["x_coord", "y_coord"]].shift(1)
154 |         ) ** 2
155 |         thresh = np.sqrt(
156 |             dist_with_next["x_coord"] + dist_with_next["y_coord"]
157 |         ).median()
158 |         w = libpysal.weights.DistanceBand(
159 |             train_data[["x_coord", "y_coord"]].values.astype(float),
160 |             threshold=thresh,
161 |             binary=False,
162 |         )
163 |         model = spreg.GM_Lag(Y, X, w=w)
164 |         # print("pseudo r2", model.pr2)
165 |         intercept = model.betas[0]
166 |         coeff = model.betas[1:-1]
167 |         roh = model.betas[-1]
168 |         # basic is just X\beta
169 |         test_pred_basic = (
170 |             np.matmul(test_data[feat_cols].values, coeff) + intercept
171 |         )
172 |         # complex is with the second part
173 |         def get_weights_as_array(points, max_dist):
174 |             dist_matrix = distance_matrix(points, points)
175 |             my_w = 1 / dist_matrix
176 |             my_w[my_w == np.inf] = 0
177 |             my_w[my_w < 1 / max_dist] = 0
178 |             return my_w
179 | 
180 |         W = get_weights_as_array(
181 |             test_data[["x_coord", "y_coord"]].values, thresh
182 |         )
183 |         test_pred = np.matmul(
184 |             np.linalg.inv(np.identity(len(W)) - roh * W), test_pred_basic
185 |         )
186 |     except:
187 |         print("ERROR in SAR")
188 |         test_pred = np.zeros(len(test_data)) + np.mean(Y)
189 |     return test_pred
190 | 
191 | 
192 | def my_gwr(train_data, test_data, feat_cols=[], **kwargs):
193 |     try:
194 |         train_coords = np.array(train_data[["x_coord", "y_coord"]])
195 |         train_y = np.expand_dims(train_data["label"].values, 1)
196 |         train_x = np.array(train_data[feat_cols])
197 |         # bandwidth selection
198 |         gwr_selector = Sel_BW(
199 |             train_coords, train_y, train_x, fixed=True, kernel="exponential"
200 |         )
201 |         gwr_bw = gwr_selector.search(criterion="AICc")
202 |         # create and train model
203 |         gwr_model = GWR(
204 |             train_coords,
205 |             train_y,
206 |             train_x,
207 |             gwr_bw,
208 |             kernel="exponential",
209 |             fixed=True,
210 |         )
211 |         gwr_results = gwr_model.fit()
212 | 
213 |         test_coords = np.array(test_data[["x_coord", "y_coord"]])
214 |         test_x = np.array(test_data[feat_cols])
215 |         # predict
216 |         test_pred = gwr_model.predict(
217 |             test_coords, test_x, gwr_results.scale, gwr_results.resid_response
218 |         ).predictions
219 |         return test_pred
220 |     except:
221 |         print("GWR not possible")
222 |         return np.zeros(len(test_data))
223 | 


--------------------------------------------------------------------------------
/scripts/plotting.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | import pandas as pd
  4 | import seaborn as sns
  5 | import os
  6 | 
  7 | 
  8 | def plot_morans_i():
  9 |     np.random.seed(42)
 10 |     test = create_data(500)
 11 |     w = get_weights_as_array(test[:, :2], 0.2)
 12 |     for t in range(5):
 13 |         morans = morans_i(test[:, t + 2], w)
 14 |         plt.scatter(test[:, 0], test[:, 1], c=test[:, t + 2])
 15 |         plt.colorbar()
 16 |         plt.title(f"Morans I: {morans}")
 17 |         plt.axis("off")
 18 |         plt.show()
 19 | 
 20 | 
 21 | def main_plot(
 22 |     results,
 23 |     nr_data=500,
 24 |     noise_type="uniformly distributed",
 25 |     save_path="outputs/main_plot.pdf",
 26 |     score_col="RMSE",
 27 | ):
 28 |     noise_level_range = [0, 0.1, 0.2, 0.3, 0.4, 0.5]
 29 |     locality_range = [0, 0.1, 0.2, 0.3, 0.4, 0.5]
 30 |     include_models = [
 31 |         "OLS",
 32 |         "SLX",
 33 |         "GWR",
 34 |         "RF",
 35 |         "RF (coordinates)",
 36 |         "spatial RF",
 37 |         "Kriging",
 38 |     ]
 39 |     include_models = [
 40 |         m for m in include_models if m in results["model"].unique()
 41 |     ]
 42 |     # [model for model in results["model"].unique() if "geo" not in model]
 43 |     nr_models = len(include_models)
 44 |     fig = plt.figure(figsize=(16, 6.5))
 45 |     for mode_ind, mode in enumerate(
 46 |         ["linear", "non-linear (simple)", "non-linear"]
 47 |     ):
 48 |         #     print("----------------")
 49 |         for model_ind, model in enumerate(include_models):
 50 |             #         print(mode, "data, --> model:", model)
 51 |             results_filter = results[
 52 |                 (results["data mode"] == mode)
 53 |                 & (results["model"] == model)
 54 |                 & (results["nr_data"] == nr_data)
 55 |                 & (results["noise_type"] == noise_type)
 56 |             ]
 57 |             results_filter.set_index(["noise", "locality"], inplace=True)
 58 |             visualize_scores = np.zeros(
 59 |                 (len(noise_level_range), len(locality_range))
 60 |             )
 61 |             for i, noise in enumerate(noise_level_range):
 62 |                 for j, locality in enumerate(locality_range):
 63 |                     score = results_filter.loc[noise, locality][
 64 |                         score_col
 65 |                     ].mean()
 66 |                     visualize_scores[i, j] = score
 67 | 
 68 |             ax1 = fig.add_subplot(
 69 |                 3, nr_models + 1, ((nr_models + 1) * mode_ind) + model_ind + 1
 70 |             )
 71 |             imshow_plot = ax1.imshow(visualize_scores, vmin=0, vmax=0.6)
 72 |             #         plt.axis("off")
 73 |             #             if model_ind==0:
 74 |             #                 ax1.set_ylabel("$\longleftarrow$ Increasing \n noise", fontsize=15)
 75 |             #             ax1.yaxis.set_label_position("right")
 76 |             #             ax1.yaxis.tick_right()
 77 |             plt.xticks([])
 78 |             plt.yticks([])
 79 |             #             ax1.set_xlabel("$\longrightarrow$ decreasing \n stationarity", fontsize=10)
 80 |             if model_ind == 0:
 81 |                 #             ax2 = ax1.twinx()
 82 |                 #             ax2.set_ylabel(mode)
 83 |                 #             ax2.yaxis.set_label_position("right")
 84 |                 #                 pad = 2
 85 |                 mode_new = (
 86 |                     "non-linear\n(simple)  "
 87 |                     if mode == "non-linear (simple)"
 88 |                     else mode
 89 |                 )
 90 |                 ax1.annotate(
 91 |                     mode_new,
 92 |                     xy=(0, 0.5),
 93 |                     xytext=(-50, 0),  # ax1.yaxis.labelpad - pad
 94 |                     xycoords=ax1.yaxis.label,
 95 |                     textcoords="offset points",
 96 |                     size=18,
 97 |                     ha="right",
 98 |                     va="center",
 99 |                     rotation=90,
100 |                     weight="bold",
101 |                 )
102 |             if mode_ind == 0:
103 |                 ax1.set_title(model, weight="bold", fontsize=15)
104 | 
105 |     fig.text(0.5, 0.0, "$\longrightarrow$ decreasing stationarity", ha="center")
106 |     #     fig.text(0.5, 0.36, "$\longrightarrow$ decreasing stationarity", ha='center')
107 |     #     fig.text(0.5, 0.7, "$\longrightarrow$ decreasing stationarity", ha='center')
108 | 
109 |     fig.text(
110 |         0.07,
111 |         0.5,
112 |         "$\longleftarrow$ Increasing noise",
113 |         va="center",
114 |         rotation="vertical",
115 |     )
116 |     # make colorbar
117 |     # fig.subplots_adjust(right=0.95)
118 |     cbar_ax = fig.add_axes([0.88, 0.05, 0.02, 0.9])
119 |     fig.colorbar(imshow_plot, cax=cbar_ax, label=score_col)
120 |     plt.tight_layout()
121 |     if save_path is not None:
122 |         plt.savefig(save_path)
123 |     plt.show()
124 | 
125 | 
126 | def barplot_synthetic(results, score_col="RMSE"):
127 |     # only look at local models
128 |     subset = results[
129 |         results["model"].isin(
130 |             ["GWR", "RF", "spatial RF", "Kriging"]
131 |         )  # "spatial RF",
132 |     ]
133 |     # subset.groupby(["nr_data", "data mode", "model", "noise_discrete", "locality_discrete"]).agg({"R2 score": "mean"})
134 |     plt.figure(figsize=(18, 6))
135 |     counter = 1
136 |     modes = ["linear", "non-linear (simple)", "non-linear"]
137 |     for mode, save_name in zip(
138 |         modes, ["linear", "non_linear_simple", "non_linear"]
139 |     ):
140 |         plt.subplot(1, len(modes), counter)
141 |         counter += 1
142 |         subset_2 = subset[
143 |             (subset["data mode"] == mode)
144 |             & (subset["noise_discrete"] == "low")
145 |             & (subset["locality_discrete"] == "high")
146 |             & (subset["noise_type"] == "uniformly distributed")
147 |         ]
148 |         subset_2 = subset_2.groupby(["nr_data", "model"]).agg(
149 |             {score_col: "mean"}
150 |         )
151 | 
152 |         ax = sns.barplot(
153 |             data=subset_2.reset_index(), x="nr_data", y=score_col, hue="model"
154 |         )
155 |         #     plt.ylim(0, 1)
156 |         plt.xlabel("Number of samples")
157 |         #     if mode == "non-linear (simple)":
158 |         #         plt.legend(title="Model", loc="lower right", framealpha=1, ncol=2)
159 |         #     else:
160 |         plt.legend([], [], frameon=False)
161 |         plt.title(mode + " DGP", fontsize=18)
162 | 
163 |     handles, labels = ax.get_legend_handles_labels()
164 |     plt.tight_layout()
165 |     plt.figlegend(
166 |         handles,
167 |         labels,
168 |         loc="upper center",
169 |         ncol=5,
170 |         labelspacing=0.0,
171 |         bbox_to_anchor=(0.5, 1.09),
172 |     )
173 |     plt.savefig(f"outputs/barplot_main.pdf", bbox_inches="tight")
174 |     plt.show()
175 | 
176 | 
177 | def noise_analysis(results):
178 |     fig = plt.figure(figsize=(12, 9))
179 |     subset = results[
180 |         results["model"].isin(
181 |             ["OLS", "GWR", "RF (coordinates)", "Kriging"]
182 |         )  # "spatial RF",
183 |     ]
184 |     counter = 1
185 |     for i, mode in enumerate(
186 |         ["linear", "non-linear (simple)"]
187 |     ):  #  "non-linear (simple)" # linear",
188 |         for j, model in enumerate(["GWR", "Kriging"]):
189 |             subset2 = subset[
190 |                 (subset["model"] == model)
191 |                 & (subset["data mode"] == mode)
192 |                 &
193 |                 #         (subset["noise_discrete"] == "low") &
194 |                 (subset["locality_discrete"] == "high")
195 |                 &
196 |                 #         (subset["noise"] == 0.3) &
197 |                 #                 (subset["locality"] == 0.4) &
198 |                 #         (subset["noise_type"] == "constant") &
199 |                 (subset["nr_data"] == 500)
200 |             ]
201 |             subset2["noise_type"] = subset2["noise_type"] + " noise"
202 |             ax = fig.add_subplot(2, 2, counter)
203 |             sns.lineplot(
204 |                 ax=ax,
205 |                 data=subset2.reset_index(),
206 |                 x="noise",
207 |                 y="RMSE",
208 |                 hue="noise_type",
209 |             )
210 |             #         if counter == 1:
211 |             #             plt.legend(title="Noise (spatial distribution)")# , loc=(1, 1))
212 |             #         else:
213 |             plt.legend([], [], frameon=False)
214 |             if j == 0:
215 |                 ax.annotate(
216 |                     mode,
217 |                     xy=(0, 0.5),
218 |                     xytext=(-20, 0),  # ax1.yaxis.labelpad - pad
219 |                     xycoords=ax.yaxis.label,
220 |                     textcoords="offset points",
221 |                     size="large",
222 |                     ha="right",
223 |                     va="center",
224 |                     rotation=90,
225 |                     weight="bold",
226 |                 )
227 |             ax.set_xlabel("Noise level ($\sigma$)")
228 |             ax.set_ylim(0, 0.7)
229 |             if counter in [2, 4]:
230 |                 plt.ylabel("")
231 |             if counter in [1, 2]:
232 |                 plt.title(model, weight="bold", fontsize=16)
233 |             counter += 1
234 | 
235 |     handles, labels = ax.get_legend_handles_labels()
236 |     plt.tight_layout()
237 |     plt.figlegend(
238 |         handles,
239 |         labels,
240 |         loc="upper center",
241 |         ncol=3,
242 |         labelspacing=0.0,
243 |         bbox_to_anchor=(0.5, 1.07),
244 |     )
245 |     plt.savefig("outputs/noise_analysis.pdf", bbox_inches="tight")
246 |     plt.show()
247 | 


--------------------------------------------------------------------------------
/scripts/synthetic_tests.py:
--------------------------------------------------------------------------------
  1 | # Standard and GIS Modules
  2 | import os
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | import pandas as pd
  6 | import time
  7 | import scipy
  8 | import warnings
  9 | 
 10 | warnings.filterwarnings("ignore")
 11 | 
 12 | from sklearn.metrics import r2_score, mean_squared_error
 13 | from models import *
 14 | 
 15 | 
 16 | def create_data(nr_data, nr_feats=5, rho=0.6, weight_matrix_cutoff=20):
 17 | 
 18 |     x_coords = np.random.rand(nr_data, 2) * 2 - 1
 19 | 
 20 |     all_feats = np.zeros((nr_data, nr_feats))
 21 | 
 22 |     for feat in range(nr_feats):
 23 |         att = np.random.uniform(-1, 1, nr_data)
 24 | 
 25 |         w = get_weights_as_array(x_coords, weight_matrix_cutoff)
 26 |         # compute I - rho*W
 27 |         m = np.identity(len(x_coords)) - rho * w
 28 |         # invert and multiply with x_j
 29 |         att_hat = np.matmul(np.linalg.inv(m), att)
 30 |         # scale to -1 to 1
 31 |         att_hat = (att_hat - np.min(att_hat)) / (
 32 |             np.max(att_hat) - np.min(att_hat)
 33 |         ) * 2 - 1
 34 |         all_feats[:, feat] = att_hat
 35 |     return np.hstack([x_coords, all_feats])
 36 | 
 37 | 
 38 | def non_linear(feat_arr, weights):
 39 |     feature_transformed = np.zeros(feat_arr.shape)
 40 |     a, b, c, d, e = (
 41 |         feat_arr[:, 0],
 42 |         feat_arr[:, 1],
 43 |         feat_arr[:, 2],
 44 |         feat_arr[:, 3],
 45 |         feat_arr[:, 4],
 46 |     )
 47 |     # first term: a**2 * b
 48 |     feature_transformed[:, 0] = a ** 2 * np.sin(b) * weights[:, 0]
 49 |     feature_transformed[:, 1] = np.sin(b) * d * weights[:, 1]
 50 |     feature_transformed[:, 2] = e * np.log(c ** 2) * weights[:, 2]
 51 |     feature_transformed[:, 3] = d ** 2 * np.cos(b) * weights[:, 3]
 52 |     feature_transformed[:, 4] = e * a ** 2 * d * weights[:, 4]
 53 | 
 54 |     return np.sum(feature_transformed, axis=1)
 55 | 
 56 | 
 57 | # parameters and models to include
 58 | np.random.seed(42)
 59 | noise_level_range = [0, 0.1, 0.2, 0.3, 0.4, 0.5]
 60 | locality_range = [0, 0.1, 0.2, 0.3, 0.4, 0.5]
 61 | model_function_names = [
 62 |     linear_regression,
 63 |     rf_coordinates,
 64 |     rf_global,
 65 |     rf_spatial,
 66 |     my_gwr,
 67 |     kriging,
 68 |     sarm,
 69 |     slx
 70 |     # rf_geographical,
 71 | ]
 72 | model_names = [
 73 |     "linear regression",
 74 |     "RF (coordinates)",
 75 |     "RF",
 76 |     "spatial RF",
 77 |     "GWR",
 78 |     "Kriging",
 79 |     "SAR",
 80 |     "SLX"
 81 |     # "geographical RF",
 82 | ]
 83 | 
 84 | # MAIN PARAMETERS
 85 | nr_feats = 5
 86 | max_depth = 30
 87 | n_estim = 150
 88 | w_cutoff = 20
 89 | rho = 0.75
 90 | noise_type = "uniformly distributed"
 91 | # 'heterogeneous - same', 'heterogeneous - different'
 92 | 
 93 | # save results
 94 | results_list = []
 95 | 
 96 | weights = np.array([-0.95, 0.38, 0.66, -0.43, 0.22])
 97 | 
 98 | for nr_data in [100, 500, 1000, 5000]:
 99 |     print("\n ======== DATA SAMPLES", nr_data)
100 | 
101 |     # MAKE MAIN DATA
102 |     train_cutoff = int(nr_data * 0.9)
103 |     feat_cols = ["feat_" + str(i) for i in range(nr_feats)]
104 |     # V1: X random uniform
105 |     # synthetic_data_array = np.random.rand(nr_data, 2 + nr_feats) * 2 - 1
106 |     # V2: with spatial lag
107 |     synthetic_data_array = create_data(
108 |         nr_data, nr_feats=nr_feats, rho=rho, weight_matrix_cutoff=w_cutoff
109 |     )
110 |     print(synthetic_data_array.shape)
111 | 
112 |     synthetic_data = pd.DataFrame(
113 |         synthetic_data_array, columns=["x_coord", "y_coord"] + feat_cols,
114 |     )
115 |     print(synthetic_data.head(5))
116 |     # Double check Moran's I
117 |     w = get_weights_as_array(synthetic_data_array[:, :2], w_cutoff)
118 |     for t in range(5):
119 |         print(
120 |             "Moran's I of coefficient",
121 |             t,
122 |             morans_i(synthetic_data_array[:, t + 2], w),
123 |         )
124 | 
125 |     # simulate spatial variation of features (varying per weight)
126 |     spatial_variation = np.zeros((nr_data, nr_feats))
127 |     for i in range(nr_feats):
128 |         spatial_variation[:, i] = 0.5 * (
129 |             np.sin(synthetic_data["x_coord"].values * np.pi * 2 + i)
130 |             + np.cos(synthetic_data["y_coord"].values * np.pi * 2 + i)
131 |         )
132 | 
133 |     for noise_level in noise_level_range:
134 |         for locality in locality_range:
135 |             # spatially dependent but linear
136 |             spatially_dependent_weights = weights + locality * spatial_variation
137 | 
138 |             for mode in ["linear", "non-linear"]:
139 |                 print("--------", noise_level, locality, mode)
140 |                 # apply linear or non_linear function
141 |                 if mode == "linear":
142 |                     synthetic_data["label"] = np.sum(
143 |                         spatially_dependent_weights
144 |                         * synthetic_data[feat_cols].values,
145 |                         axis=1,
146 |                     )
147 |                 else:
148 |                     synthetic_data["label"] = non_linear(
149 |                         synthetic_data[feat_cols].values,
150 |                         spatially_dependent_weights,
151 |                     )
152 | 
153 |                 if noise_type == "uniformly distributed":
154 |                     noise = np.random.normal(0, noise_level, nr_data)
155 |                 elif noise_type == "heterogeneous - different":
156 |                     spatial_variation_different = noise_level * (
157 |                         0.5
158 |                         * (
159 |                             synthetic_data["x_coord"].values
160 |                             + synthetic_data["y_coord"].values
161 |                         )
162 |                         + 1
163 |                     )
164 |                     noise = np.random.normal(
165 |                         0,
166 |                         spatial_variation_different,
167 |                         len(spatial_variation_different),
168 |                     )
169 |                 elif noise_type == "heterogeneous - same":
170 |                     # e.g. high noise level (0.5), spatial variation is from
171 |                     # sin and cos so it's between -1 and 1, so we make + 1
172 |                     # so on average we multiply by 1, but varying variance
173 |                     # between 0.5 * 0 and 0.5 * 2
174 |                     spatially_dependent_noise = noise_level * (
175 |                         spatial_variation[:, 0] + 1  # without locality level!
176 |                     )
177 |                     noise = np.random.normal(
178 |                         0, spatially_dependent_noise, nr_data
179 |                     )
180 |                 else:
181 |                     raise RuntimeError("Noise must be one of above")
182 | 
183 |                 synthetic_data["label"] = synthetic_data["label"] + noise
184 | 
185 |                 train_data, test_data = (
186 |                     synthetic_data[:train_cutoff],
187 |                     synthetic_data[train_cutoff:],
188 |                 )
189 | 
190 |                 for model_function, name in zip(
191 |                     model_function_names, model_names
192 |                 ):
193 |                     tic = time.time()
194 |                     test_pred = model_function(
195 |                         train_data.copy(),
196 |                         test_data.copy(),
197 |                         # train_data.copy(), # for overfitting test
198 |                         feat_cols=feat_cols,
199 |                         max_depth=max_depth,
200 |                         nr_data=nr_data,
201 |                         n_estim=n_estim,
202 |                         w_cutoff=w_cutoff,
203 |                     )
204 |                     # compute metrics
205 |                     score = r2_score(test_data["label"], test_pred)
206 |                     rmse = mean_squared_error(
207 |                         test_data["label"], test_pred, squared=False
208 |                     )
209 |                     # train_data["label"]) # for overfitting test
210 |                     time_diff = time.time() - tic
211 |                     # add to results
212 |                     results_list.append(
213 |                         {
214 |                             "nr_data": nr_data,
215 |                             "noise": noise_level,
216 |                             "locality": locality,
217 |                             "data mode": mode,
218 |                             "model": name,
219 |                             "time": time_diff,
220 |                             "R2 score": score,
221 |                             "RMSE": rmse,
222 |                         }
223 |                     )
224 |                     print(name, round(rmse, 3))
225 | 
226 |         results = pd.DataFrame(results_list)
227 |         results["noise_type"] = noise_type
228 |         noise_name = "_".join(noise_type.split(" "))
229 |         results.to_csv(f"synthetic_data_results_{noise_name}.csv", index=False)
230 |         print("Saved intermediate results")
231 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """Package installer."""
 2 | import os
 3 | from setuptools import setup
 4 | from setuptools import find_packages
 5 | 
 6 | LONG_DESCRIPTION = ""
 7 | if os.path.exists("README.md"):
 8 |     with open("README.md") as fp:
 9 |         LONG_DESCRIPTION = fp.read()
10 | 
11 | scripts = []
12 | 
13 | setup(
14 |     name="sprf",
15 |     version="0.0.1",
16 |     description="Spatial Random Forests",
17 |     long_description=LONG_DESCRIPTION,
18 |     long_description_content_type="text/markdown",
19 |     author="MIE Lab",
20 |     author_email=("nwiedemann@ethz.ch"),
21 |     license="GPLv3",
22 |     url="https://github.com/mie-lab/spatial_rf_python",
23 |     install_requires=["numpy", "scipy", "pandas", "scikit-learn"],
24 |     classifiers=[
25 |         "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
26 |         "Intended Audience :: Science/Research",
27 |         "Programming Language :: Python :: 3.6",
28 |         "Topic :: Software Development :: Libraries :: Python Modules",
29 |     ],
30 |     packages=find_packages("."),
31 |     python_requires=">=3.8",
32 | )
33 | 


--------------------------------------------------------------------------------
/sprf/__init__.py:
--------------------------------------------------------------------------------
1 | from sprf.spatial_random_forest import SpatialRandomForest
2 | from sprf.geographical_random_forest import GeographicalRandomForest
3 | 


--------------------------------------------------------------------------------
/sprf/geographical_random_forest.py:
--------------------------------------------------------------------------------
  1 | from multiprocessing.sharedctypes import Value
  2 | import warnings
  3 | import numpy as np
  4 | 
  5 | from sklearn.ensemble import RandomForestRegressor
  6 | from sprf.tuning import tune_neighbors
  7 | 
  8 | class GeographicalRandomForest:
  9 |     """
 10 |     Geographical Random Forest according to Georganos et al.
 11 |     
 12 |     Parameters
 13 |     ----------
 14 |     sample_by : str, optional {neighbors, distance}
 15 |         Sampling strategy. The spatial random forest consists of trees that are
 16 |         fitted on a spatial subset of samples. These spatial subsets can either
 17 |         be sampled by defining a distance-radius, or by specifying a fixed
 18 |         number of spatial neighbors. By default "neighbors", see notes below.
 19 |     neighbors : int, optional
 20 |         Number of neighbors to use for spatial fit, by default 500 samples.
 21 |         Only relevant if sample_by=neighbors.
 22 |     max_distance : int, optional
 23 |         Maximum distance of samples to belong to the same decision tree. Only
 24 |         relevant if sample_by=distance. By default 150000m
 25 |     """
 26 | 
 27 |     def __init__(
 28 |         self,
 29 |         sample_by: str = "neighbors",
 30 |         neighbors: int = 500,
 31 |         max_distance: float = 150000,
 32 |         **random_forest_arguments
 33 |     ):
 34 |         self.sample_by = sample_by
 35 |         if sample_by == "distance" and max_distance == 150000:
 36 |             warnings.warn(
 37 |                 "It seems that you have selected the 'distance'-sampling mode,\
 38 |                      but the parameter max_distance is still the default. Make\
 39 |                      sure to adapt the max_distance parameter to your dataset."
 40 |             )
 41 |         self.max_distance = max_distance
 42 |         self.neighbors = neighbors
 43 |         self.random_forest_arguments = random_forest_arguments
 44 | 
 45 |     def fit(self, x_train, y_train, coords_train):
 46 |         # convert to arrays
 47 |         x_train, y_train, coords_train = (
 48 |             np.array(x_train),
 49 |             np.array(y_train),
 50 |             np.array(coords_train),
 51 |         )
 52 |         assert (
 53 |             len(coords_train.shape) == 2 and coords_train.shape[1] == 2
 54 |         ), "coords test must have len 2 in dimension 1"
 55 | 
 56 |         # init RFs
 57 |         self.random_forests = [
 58 |             RandomForestRegressor(**self.random_forest_arguments)
 59 |             for _ in range(len(x_train))
 60 |         ]
 61 | 
 62 |         # make distance matrix n x n
 63 |         dist = np.zeros((len(coords_train), len(coords_train)))
 64 |         for i, coord1 in enumerate(coords_train):
 65 |             for j, coord2 in enumerate(coords_train[i:]):
 66 |                 dist[i, j + i] = np.linalg.norm(coord1 - coord2)
 67 |         # mirror distance matrix
 68 |         dist = dist + dist.T
 69 | 
 70 |         # save the train coordinates because they are needed for prediction
 71 |         self.rf_coords_train = coords_train
 72 | 
 73 |         # fit one random forest per sample
 74 |         for core_ind in range(len(x_train)):
 75 |             dist_to_others = dist[core_ind]
 76 |             if self.sample_by == "distance":
 77 |                 samples_to_fit = np.where(dist_to_others < self.max_distance
 78 |                                           )[0]
 79 |             elif self.sample_by == "neighbors":
 80 |                 sorted_inds = np.argsort(dist_to_others)
 81 |                 samples_to_fit = sorted_inds[:self.neighbors]
 82 |             else:
 83 |                 raise NotImplementedError(
 84 |                     "sample mode must be one of 'neighbors', 'distance'!"
 85 |                 )
 86 |             x_train_subset = x_train[samples_to_fit]
 87 |             y_train_subset = y_train[samples_to_fit]
 88 |             self.random_forests[core_ind].fit(x_train_subset, y_train_subset)
 89 | 
 90 | 
 91 |     def tune_neighbors(self, *args, **kwargs):
 92 |         self.neighbors = tune_neighbors(self, *args, **kwargs)
 93 | 
 94 |     def predict(self, x_test, coords_test):
 95 |         x_test = np.array(x_test)
 96 |         coords_test = np.array(coords_test)
 97 | 
 98 |         # predict with a the closest random forest for each sample
 99 |         predictions = []
100 |         for i in range(len(x_test)):
101 |             dist_to_train_points = np.linalg.norm(
102 |                 self.rf_coords_train - coords_test[i], axis=1
103 |             )
104 |             closest_rf = np.argmin(dist_to_train_points)
105 |             y_pred = self.random_forests[closest_rf].predict(
106 |                 x_test[i].reshape(1, -1)
107 |             )
108 |             predictions.append(y_pred)
109 |         return np.array(predictions)
110 | 


--------------------------------------------------------------------------------
/sprf/spatial_random_forest.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | import numpy as np
  3 | from sklearn.cluster import KMeans
  4 | from sklearn.tree import DecisionTreeRegressor
  5 | from sklearn.metrics import r2_score
  6 | from sprf.tuning import tune_neighbors
  7 | 
  8 | class SpatialRandomForest:
  9 |     """
 10 |     Spatial Random Forest implementation, following the sklearn style
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     n_estimators : int, optional
 15 |         Number of base estimators (decision trees), by default 20
 16 |     sample_mode : str, optional {cluster, random}
 17 |         Trees are rooted either in the centers of clusters of the dataset, or in
 18 |         random locations, by default "cluster"
 19 |     sample_by : str, optional {neighbors, distance}
 20 |         Sampling strategy. The spatial random forest consists of trees that are
 21 |         fitted on a spatial subset of samples. These spatial subsets can either
 22 |         be sampled by defining a distance-radius, or by specifying a fixed
 23 |         number of spatial neighbors. By default "neighbors", see notes below.
 24 |     neighbors : int, optional
 25 |         Number of neighbors to use for spatial fit, by default 500 samples.
 26 |         Only relevant if sample_by=neighbors.
 27 |     max_distance : int, optional
 28 |         Maximum distance of samples to belong to the same decision tree. Only
 29 |         relevant if sample_by=distance. By default 150000m
 30 |     min_points_distance : int, optional
 31 |         Minimum points for fitting a decision tree, i.e. if the distance is
 32 |         set too low, decision trees would be fit on an insufficient number of
 33 |         points. Only relevant if sample_by=distance, by default 100
 34 |     **kwargs: dict
 35 |         Any arguments that are passed to the sklearn DecisionTreeRegressor
 36 | 
 37 |     Notes
 38 |     ----------
 39 |     - Only regression is implemented so far.
 40 |     - In contrast to other spatial RF papers, we do not build on tree per
 41 |      sample, but rather a fixed set of <n_estimators> trees on spatial subsets
 42 |      of the data. 
 43 |     - The spatial subsets are either coosen by a fixed number of neighbors or
 44 |      by a radius (spatial distance)
 45 |     - Projected coordinates are assumed!
 46 | 
 47 |     Example
 48 |     ----------
 49 |     sp = SpatialRandomForest(max_depth=20, neighbors=50)
 50 |     sp.fit(train_x, train_y, train_coords)
 51 |     pred_y = sp.predict(test_x, test_coords)
 52 |     """
 53 | 
 54 |     def __init__(
 55 |         self,
 56 |         n_estimators: int = 100,
 57 |         sample_mode: str = "cluster",
 58 |         sample_by: str = "neighbors",
 59 |         neighbors: int = 500,
 60 |         max_distance: float = 150000,
 61 |         min_points_distance: int = 100,
 62 |         **decision_tree_args,
 63 |     ):
 64 |         self.estimators = [
 65 |             DecisionTreeRegressor(**decision_tree_args)
 66 |             for _ in range(n_estimators)
 67 |         ]
 68 |         if sample_by == "distance" and max_distance == 150000:
 69 |             warnings.warn(
 70 |                 "It seems that you have selected the 'distance'-sampling mode,\
 71 |                      but the parameter max_distance is still the default. Make\
 72 |                      sure to adapt the max_distance parameter to your dataset."
 73 |             )
 74 |         self.n_estimators = n_estimators
 75 |         self.sample_mode = sample_mode
 76 |         self.sample_by = sample_by
 77 |         # only relevant if sample_by == distance
 78 |         self.max_distance = max_distance
 79 |         self.min_points_distance = min_points_distance
 80 |         # only relevant if sample_by == "neighbors"
 81 |         self.neighbors = neighbors
 82 |         # init core points
 83 |         self.estimator_core_points = []
 84 | 
 85 |     def _sample_core_points(self, coords):
 86 |         """
 87 |         Sample indices of points that form the centers of each spatial tree.
 88 |         coords: 2D Array of shape (N, 2) where N is the number of samples
 89 |         Returns: 2D Array of shape (N, 2) which is a subset / another set of
 90 |             coordinates
 91 |         """
 92 |         if self.sample_mode == "cluster":
 93 |             # cluster coordinates with kmeans use centers as core points
 94 |             kmeans = KMeans(self.n_estimators)
 95 |             kmeans.fit(coords)
 96 |             core_points = kmeans.cluster_centers_
 97 |         # TODO: elif sample_mode == "grid":
 98 |         elif self.sample_mode == "random":
 99 |             # select random coordinates from the train data as core points
100 |             core_points = coords[np.random.permutation(len(coords)
101 |                                                        )[:self.n_estimators]]
102 |         else:
103 |             raise NotImplementedError(
104 |                 "sample mode must be one of cluster, random"
105 |             )
106 |         return core_points
107 | 
108 |     def _sample_point_clouds(self, coords):
109 |         """
110 |         Assign samples to their spatial decision tree.
111 |         coords: 2D Array of shape (N, 2) where N is the number of samples
112 |         Returns: List of lists with indices of samples belonging to each tree
113 |         """
114 |         point_clouds = []
115 |         for core_point in self.estimator_core_points:
116 |             # Compute distance of the core point to all coordinates
117 |             dist_to_others = np.sqrt(np.sum((coords - core_point)**2, axis=1))
118 |             if self.sample_by == "neighbors":
119 |                 # add fixed number of closest samples
120 |                 point_clouds.append(
121 |                     np.argsort(dist_to_others)[:self.neighbors]
122 |                 )
123 |             elif self.sample_by == "distance":
124 |                 # filter by distance
125 |                 point_with_lower_dist = np.where(
126 |                     dist_to_others < self.max_distance
127 |                 )[0]
128 |                 # only use point clouds that are large enough! --> cannot fit a
129 |                 # decision tree on 5 points
130 |                 if len(point_with_lower_dist) > self.min_points_distance:
131 |                     point_clouds.append(point_with_lower_dist)
132 |             else:
133 |                 raise NotImplementedError(
134 |                     "sample mode must be one of 'neighbors', 'distance'!"
135 |                 )
136 |         return point_clouds
137 | 
138 |     def fit(self, x_train, y_train, coords_train):
139 |         """
140 |         Fit spatial random forest to a dataset.
141 | 
142 |         Parameters
143 |         ----------
144 |         x_train : {array-like, sparse matrix} of shape (n_samples, n_features)
145 |             The training input samples. Internally, its dtype will be converted
146 |             to ``dtype=np.float32``.
147 |         y_train : array-like of shape (n_samples,) or (n_samples, n_outputs)
148 |             The target values (real numbers in regression).
149 |         coords_train: array-like of shape (n_samples, 2) with spatial
150 |             coordinates of each sample. Geographic coordinates are assumed to be
151 |             projected!
152 |         """
153 |         # convert to arrays
154 |         x_train, y_train, coords_train = (
155 |             np.array(x_train),
156 |             np.array(y_train),
157 |             np.array(coords_train),
158 |         )
159 |         assert (
160 |             len(coords_train.shape) == 2 and coords_train.shape[1] == 2
161 |         ), "coords test must have len 2 in dimension 1"
162 | 
163 |         # sample core points
164 |         self.estimator_core_points = self._sample_core_points(coords_train)
165 |         # assign samples to their core points
166 |         # (one sample can be in several point clouds!)
167 |         point_clouds = self._sample_point_clouds(coords_train)
168 |         if len(point_clouds) < self.n_estimators:
169 |             warnings.warn(
170 |                 f"Some point clouds had less than {self.min_points_distance}\
171 |                      points and are therefore ignored.\
172 |                      Consider increasing the parameter 'max_distance' to\
173 |            include more points (recommended), or decrease 'min_points_distance'"
174 |             )
175 |             # correct number of estimators
176 |             self.n_estimators = len(point_clouds)
177 |             self.estimators = self.estimators[:self.n_estimators]
178 |         # correct core points: Use center of gravity of each point clouds
179 |         self.estimator_core_points = np.array(
180 |             [
181 |                 np.mean(coords_train[cloud_inds], axis=0)
182 |                 for cloud_inds in point_clouds
183 |             ]
184 |         )
185 |         # fit each point cloud to an estimator
186 |         for i, sample_inds in enumerate(point_clouds):
187 |             x_train_subset = x_train[sample_inds]
188 |             y_train_subset = y_train[sample_inds]
189 |             self.estimators[i].fit(x_train_subset, y_train_subset)
190 |     
191 |     def tune_neighbors(self, *args, **kwargs):
192 |         self.neighbors = tune_neighbors(self, *args, **kwargs)
193 | 
194 |     def predict(self, x_test, coords_test=None, weighted=True, closest=False):
195 |         """
196 |         Predict class for X.
197 |         The predicted class of an input sample is a vote by the trees in
198 |         the forest, weighted by their probability estimates. That is,
199 |         the predicted class is the one with highest mean probability
200 |         estimate across the trees.
201 | 
202 |         Parameters
203 |         ----------
204 |         x_test : array-like of shape (n_samples, n_features)
205 |             The input samples. Internally, its dtype will be converted to
206 |             ``dtype=np.float32``. 
207 |         coords_test: array-like of shape (n_samples, 2), optional
208 |             Coordinates are only required if weighted=True, i.e. if the tree-
209 |             wise outputs should be weighted and combined by their distance
210 |         weighted: bool, optional
211 |             Whether the tree-wise predictions should be aggregated based on
212 |             their spatial distance to the test sample (similar to inverse
213 |             distance weighting).
214 | 
215 |         Returns
216 |         -------
217 |         y : ndarray of shape (n_samples,) or (n_samples, n_outputs)
218 |             The predicted values.
219 |         """
220 |         # convert to arrays
221 |         x_test = np.array(x_test)
222 |         if coords_test is not None:
223 |             coords_test = np.array(coords_test)
224 |         assert (coords_test is not None) or weighted == False, (
225 |             "If weighted=True, then coords_test is required."
226 |         )
227 |         # predict output with each base estimator
228 |         y_pred = np.zeros((len(x_test), self.n_estimators))
229 |         for i, estimator in enumerate(self.estimators):
230 |             y_pred[:, i] = estimator.predict(x_test)
231 |         # If no spatial weighting: Simply return average of estimators
232 |         if not weighted and not closest:
233 |             return np.mean(y_pred, axis=1)
234 |         # if weighted: check that test coords are alright
235 |         coords_test = np.array(coords_test)
236 |         assert (
237 |             len(coords_test.shape) == 2 and coords_test.shape[1] == 2
238 |         ), "coords test must have len 2 in dimension 1"
239 |         # compute distance of test samples to all core points
240 |         dist_to_core_points = np.array(
241 |             [
242 |                 np.sqrt(np.sum((coords_test - core_point)**2, axis=1))
243 |                 for core_point in self.estimator_core_points
244 |             ]
245 |         ).swapaxes(1, 0)
246 |         if closest:
247 |             use_tree = np.argmin(dist_to_core_points, axis=1)
248 |             return y_pred[np.arange(len(y_pred)), use_tree]
249 | 
250 |         # turn into probabilies
251 |         if np.any(dist_to_core_points == 0):
252 |             # special if test sample is exactly equal to one of the core points
253 |             weights = np.array(
254 |                 [0 if dist != 0 else 1 for dist in dist_to_core_points]
255 |             )
256 |         else:
257 |             # normal situation: weight dependent on spatial distance
258 |             weights = 1 / dist_to_core_points
259 |             weights = weights / np.expand_dims(np.sum(weights, axis=1), 1)
260 | 
261 |         # prediction is weighted sum
262 |         y_pred = np.sum(y_pred * weights, axis=1)
263 |         return y_pred
264 | 
265 |     def _sample_by_distance_old(
266 |         coords_train, nr_clouds=20, radius=150000, min_points=400
267 |     ):
268 |         """Deprecated"""
269 |         # make distance matrix n x n
270 |         dist = np.zeros((len(coords_train), len(coords_train)))
271 |         for i, coord1 in enumerate(coords_train):
272 |             for j, coord2 in enumerate(coords_train[i:]):
273 |                 dist[i, j + i] = np.linalg.norm(coord1 - coord2)
274 |         # mirror distance matrix
275 |         dist = dist + dist.T
276 |         # make point clouds
277 |         point_clouds = []
278 |         for core_ind in np.random.permutation(len(dist)):
279 |             dist_to_others = dist[core_ind]
280 |             inds = np.where(dist_to_others < radius)[0]
281 |             if len(inds) > min_points:
282 |                 point_clouds.append(inds)
283 |             #             print("Cloud for core ind", core_ind, "has members", len(inds))
284 |             if len(point_clouds) > nr_clouds:
285 |                 break
286 |         return point_clouds
287 | 


--------------------------------------------------------------------------------
/sprf/tuning.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.metrics import r2_score
 3 | 
 4 | 
 5 | def tune_neighbors(
 6 |     model, x_train, y_train, coords_train, nr_check=10, eval_criterium=r2_score,
 7 | ):
 8 |     x_train, y_train, coords_train = (
 9 |         np.array(x_train),
10 |         np.array(y_train),
11 |         np.array(coords_train),
12 |     )
13 |     max_neighbors = len(x_train)
14 |     # split in train and val
15 |     cutoff = int(len(x_train) * 0.9)
16 |     rand_inds = np.random.permutation(max_neighbors)
17 |     train_i, val_i = rand_inds[:cutoff], rand_inds[cutoff:]
18 |     x_val = x_train[val_i]
19 |     x_train = x_train[train_i]
20 |     y_val = y_train[val_i]
21 |     y_train = y_train[train_i]
22 |     coords_val = coords_train[val_i]
23 |     coords_train = coords_train[train_i]
24 | 
25 |     steps_to_check = np.linspace(0, max_neighbors, nr_check + 2).astype(int)
26 |     best_neighbors = model.neighbors
27 |     best_performance = -np.inf
28 |     for neighbors in steps_to_check[1:-1]:
29 |         model.neighbors = neighbors
30 |         model.fit(x_train, y_train, coords_train)
31 |         y_pred = model.predict(x_val, coords_val)
32 |         performance = eval_criterium(y_pred, y_val)
33 |         if performance > best_performance:
34 |             best_neighbors = neighbors
35 |             best_performance = performance
36 | 
37 |     # print("Found best bandwidth (neighbors) at ", best_neighbors)
38 |     model.neighbors = best_neighbors
39 |     return best_neighbors
40 | 


--------------------------------------------------------------------------------
/sprf_demo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "0d30a6cf",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "# Standard and GIS Modules\n",
 11 |     "import os\n",
 12 |     "import numpy as np\n",
 13 |     "import pandas as pd\n",
 14 |     "import time\n",
 15 |     "import scipy\n",
 16 |     "from sklearn.ensemble import RandomForestRegressor\n",
 17 |     "from sklearn.metrics import mean_squared_error\n",
 18 |     "\n",
 19 |     "# import sprf package\n",
 20 |     "from sprf.spatial_random_forest import SpatialRandomForest\n",
 21 |     "from sprf.geographical_random_forest import GeographicalRandomForest\n",
 22 |     "\n",
 23 |     "# constants:\n",
 24 |     "dataset_target = {\n",
 25 |     "    \"plants\": \"richness_species_vascular\",\n",
 26 |     "    \"meuse\": \"zinc\",\n",
 27 |     "    \"atlantic\": \"Rate\",\n",
 28 |     "    \"deforestation\": \"deforestation_quantile\",\n",
 29 |     "    \"california_housing\": \"median_house_value\",\n",
 30 |     "}"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 2,
 36 |    "id": "b7d00340",
 37 |    "metadata": {},
 38 |    "outputs": [
 39 |     {
 40 |      "name": "stdout",
 41 |      "output_type": "stream",
 42 |      "text": [
 43 |       "Set dataset here: Must be one of ['plants', 'meuse', 'atlantic', 'deforestation', 'california_housing']\n"
 44 |      ]
 45 |     }
 46 |    ],
 47 |    "source": [
 48 |     "print(f\"Set dataset here: Must be one of {list(dataset_target.keys())}\")\n",
 49 |     "dataset = \"plants\""
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "id": "dcb25046",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "### Load data"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 3,
 63 |    "id": "28692dce",
 64 |    "metadata": {},
 65 |    "outputs": [
 66 |     {
 67 |      "name": "stdout",
 68 |      "output_type": "stream",
 69 |      "text": [
 70 |       "samples:  227\n"
 71 |      ]
 72 |     }
 73 |    ],
 74 |    "source": [
 75 |     "data = pd.read_csv(os.path.join(\"data\", dataset+\".csv\"))\n",
 76 |     "print(\"samples: \", len(data))\n",
 77 |     "target = dataset_target[dataset]"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "id": "73366dc4",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "### Split into train and test"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 4,
 91 |    "id": "29cd834b",
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "def prepare_data(data, target, lon=\"x\", lat=\"y\"):\n",
 96 |     "    \"\"\"Assumes that all other columns are used as covariates\"\"\"\n",
 97 |     "    covariates = [col for col in data.columns if col not in [lon, lat, target]]\n",
 98 |     "    return data[covariates], data[target], data[[lon, lat]]"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 5,
104 |    "id": "9102cda2",
105 |    "metadata": {},
106 |    "outputs": [],
107 |    "source": [
108 |     "inds = np.random.permutation(len(data))\n",
109 |     "split = int(0.9* len(data))\n",
110 |     "train_data = data.iloc[inds[:split]]\n",
111 |     "test_data = data.iloc[inds[split:]]\n",
112 |     "\n",
113 |     "# split into x, y and coordinates\n",
114 |     "train_x, train_y, train_coords = prepare_data(\n",
115 |     "    train_data, target\n",
116 |     ")\n",
117 |     "test_x, test_y, test_coords = prepare_data(\n",
118 |     "    test_data, target\n",
119 |     ")"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "markdown",
124 |    "id": "9f6f9ac8",
125 |    "metadata": {},
126 |    "source": [
127 |     "### Train and test basic random forest"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 6,
133 |    "id": "821a8ecf",
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "n_estimators = 100 # can take quite long for Geographical RF\n",
138 |     "max_depth = 10\n",
139 |     "spatial_neighbors = len(data) // 5"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 7,
145 |    "id": "9b29767c",
146 |    "metadata": {},
147 |    "outputs": [
148 |     {
149 |      "name": "stdout",
150 |      "output_type": "stream",
151 |      "text": [
152 |       "Error of basic Random Forest:  1233.113304527673\n"
153 |      ]
154 |     }
155 |    ],
156 |    "source": [
157 |     "rf = RandomForestRegressor(max_depth=max_depth)\n",
158 |     "rf.fit(train_x, train_y)\n",
159 |     "test_pred = rf.predict(test_x)\n",
160 |     "\n",
161 |     "rmse_rf = mean_squared_error(test_pred, test_y, squared=False)\n",
162 |     "print(\"Error of basic Random Forest: \", rmse_rf)"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "markdown",
167 |    "id": "3b8a6329",
168 |    "metadata": {},
169 |    "source": [
170 |     "### Train and test spatial random forest"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 8,
176 |    "id": "55dc25b8",
177 |    "metadata": {},
178 |    "outputs": [
179 |     {
180 |      "name": "stdout",
181 |      "output_type": "stream",
182 |      "text": [
183 |       "Error of spatial Random Forest:  1494.09398242311\n"
184 |      ]
185 |     }
186 |    ],
187 |    "source": [
188 |     "sp = SpatialRandomForest(\n",
189 |     "    max_depth=max_depth, neighbors=spatial_neighbors\n",
190 |     ")\n",
191 |     "sp.fit(train_x, train_y, train_coords)\n",
192 |     "test_pred = sp.predict(test_x, test_coords)\n",
193 |     "\n",
194 |     "rmse_spatial_rf = mean_squared_error(test_pred, test_y, squared=False)\n",
195 |     "print(\"Error of spatial Random Forest: \", rmse_spatial_rf)"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "markdown",
200 |    "id": "560c3a66",
201 |    "metadata": {},
202 |    "source": [
203 |     "### Train and test geographical random forest"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": 9,
209 |    "id": "8bcf6b0e",
210 |    "metadata": {},
211 |    "outputs": [
212 |     {
213 |      "name": "stdout",
214 |      "output_type": "stream",
215 |      "text": [
216 |       "Error of spatial Random Forest:  1326.3920442520464\n"
217 |      ]
218 |     }
219 |    ],
220 |    "source": [
221 |     "geo_rf = GeographicalRandomForest(\n",
222 |     "    n_estimators=n_estimators, neighbors=spatial_neighbors, max_depth=max_depth\n",
223 |     ")\n",
224 |     "geo_rf.fit(train_x, train_y, train_coords)\n",
225 |     "test_pred = geo_rf.predict(test_x, test_coords)\n",
226 |     "\n",
227 |     "rmse_geo_rf = mean_squared_error(test_pred, test_y, squared=False)\n",
228 |     "print(\"Error of spatial Random Forest: \", rmse_geo_rf)"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "id": "0f5a1644",
234 |    "metadata": {},
235 |    "source": [
236 |     "### Tune number of neighbors"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": 10,
242 |    "id": "05ce2fd3",
243 |    "metadata": {},
244 |    "outputs": [
245 |     {
246 |      "name": "stdout",
247 |      "output_type": "stream",
248 |      "text": [
249 |       "spatial rf tuned: 74\n",
250 |       "Error of tuned spatial Random Forest:  1452.5026591622952\n"
251 |      ]
252 |     }
253 |    ],
254 |    "source": [
255 |     "regr = SpatialRandomForest(\n",
256 |     "        n_estimators=n_estimators, neighbors=500, max_depth=max_depth\n",
257 |     "    )\n",
258 |     "regr.tune_neighbors(train_x, train_y, train_coords)\n",
259 |     "print(\"spatial rf tuned:\", regr.neighbors)\n",
260 |     "regr.fit(train_x, train_y, train_coords)\n",
261 |     "test_pred = regr.predict(test_x, test_coords)\n",
262 |     "rmse_spatial_rf_tuned = mean_squared_error(test_pred, test_y, squared=False)\n",
263 |     "print(\"Error of tuned spatial Random Forest: \", rmse_spatial_rf_tuned)"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": null,
269 |    "id": "9f78a087",
270 |    "metadata": {},
271 |    "outputs": [],
272 |    "source": []
273 |   }
274 |  ],
275 |  "metadata": {
276 |   "kernelspec": {
277 |    "display_name": "Python 3 (ipykernel)",
278 |    "language": "python",
279 |    "name": "python3"
280 |   },
281 |   "language_info": {
282 |    "codemirror_mode": {
283 |     "name": "ipython",
284 |     "version": 3
285 |    },
286 |    "file_extension": ".py",
287 |    "mimetype": "text/x-python",
288 |    "name": "python",
289 |    "nbconvert_exporter": "python",
290 |    "pygments_lexer": "ipython3",
291 |    "version": "3.9.5"
292 |   }
293 |  },
294 |  "nbformat": 4,
295 |  "nbformat_minor": 5
296 | }
297 | 


--------------------------------------------------------------------------------
/tests/test_sprf.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | import pandas as pd
 4 | from sprf import SpatialRandomForest
 5 | 
 6 | 
 7 | class TestSpatialRandomForest:
 8 |     """Test spatial random forest class"""
 9 | 
10 |     x_train = np.random.rand(500, 10)
11 |     y_train = np.random.rand(500)
12 |     coords_train = np.random.rand(500, 2)
13 |     x_test = np.random.rand(50, 10)
14 |     y_test = np.random.rand(50)
15 |     coords_test = np.random.rand(50, 2)
16 | 
17 |     def test_init_warning(self):
18 |         with pytest.warns(UserWarning):
19 |             sp = SpatialRandomForest(sample_by="distance")
20 | 
21 |     def test_fit(self):
22 |         sp = SpatialRandomForest()
23 |         sp.fit(self.x_train, self.y_train, self.coords_train)
24 |         assert sp.n_estimators == 20
25 | 
26 |     def test_fit_equal(self):
27 |         np.random.seed(42)
28 |         sp1 = SpatialRandomForest()
29 |         x_df = pd.DataFrame(self.x_train)
30 |         sp1.fit(x_df, self.y_train, self.coords_train)
31 |         y_pred_1 = sp1.predict(self.x_test, self.coords_test)
32 | 
33 |         np.random.seed(42)
34 |         sp2 = SpatialRandomForest()
35 |         sp2.fit(self.x_train, self.y_train, self.coords_train)
36 |         y_pred_2 = sp2.predict(self.x_test, self.coords_test)
37 |         assert np.all(y_pred_1 == y_pred_2)
38 | 
39 |     def test_predict_error(self):
40 |         sp = SpatialRandomForest()
41 |         sp.fit(self.x_train, self.y_train, self.coords_train)
42 |         with pytest.raises(AssertionError):
43 |             sp.predict(self.x_test)
44 | 


--------------------------------------------------------------------------------