├── .gitignore ├── LICENSE ├── README.md ├── SupplementaryMaterial.pdf ├── code_method ├── environment.yaml ├── morphometric_assessment.ipynb └── validation.ipynb ├── code_production ├── Amsterdam │ ├── 200224_extract Ams from 3d bag.ipynb │ ├── 200224_extract_network.ipynb │ ├── 200224_preprocess.ipynb │ ├── 200224_simplify_network.ipynb │ ├── 200224_tessellation.ipynb │ ├── 200225_fix_tessellation.ipynb │ ├── 200227_Queen_corners.ipynb │ ├── 200305 height processing.ipynb │ ├── 200305_Blocks.ipynb │ ├── 200305_fix2_tessellation.ipynb │ ├── 200305_measure_part 1.ipynb │ ├── 200305_network_minor_preprocessing.ipynb │ ├── 200306_false_nodes.ipynb │ ├── 200306_measure part 2.ipynb │ ├── 200307_Calculate_contextualised.ipynb │ ├── 200307_Complete_BIC_analysis.ipynb │ ├── 200307_Normalize.ipynb │ ├── 200307_Simpson diversity.ipynb │ ├── 200307_merge data.ipynb │ ├── 200309_Amsterdam_level0_XXcomponents.ipynb │ ├── 200310_Hierarchical clustering.ipynb │ ├── 200310_additionalBIC.ipynb │ ├── 200315_Plot clusters.ipynb │ ├── 200318_Plot_hierarchical_clusters.ipynb │ ├── 200504_plot_bic.ipynb │ ├── 200511_get_origin_bag.ipynb │ ├── 200511_get_origin_wfs.ipynb │ ├── 200511_validation_origin.ipynb │ ├── clustering.ipynb │ ├── fix broken chars.ipynb │ └── recalculate_contextual.ipynb ├── Prague │ ├── 01tes.py │ ├── 02queen.py │ ├── 03_fix_streets.py │ ├── 03blocks.py │ ├── 04measure1.py │ ├── 04measure2.py │ ├── 04measure3.py │ ├── 04measure4.py │ ├── 05contextual.ipynb │ ├── 06Simpson_diversity.ipynb │ ├── 07BIC_detail10-15.ipynb │ ├── 07BIC_detail15-25.ipynb │ ├── 07BIC_sample+detail.ipynb │ ├── 08clustering.ipynb │ ├── 09Extract validation data.ipynb │ ├── 10Historical origin and taxa.ipynb │ ├── 11Land use and taxa.ipynb │ └── 12Qualitative_typology.ipynb ├── plot.ipynb └── taxonomy.ipynb ├── files └── sample.gpkg └── leaflet_maps.png /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | .DS_Store 131 | files/geometry.gpkg 132 | files/*.parquet 133 | files/*.pq 134 | files/*.gal 135 | files/*.csv 136 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Attribution 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution 4.0 International Public License 58 | 59 | By exercising the Licensed Rights (defined below), You accept and agree 60 | to be bound by the terms and conditions of this Creative Commons 61 | Attribution 4.0 International Public License ("Public License"). To the 62 | extent this Public License may be interpreted as a contract, You are 63 | granted the Licensed Rights in consideration of Your acceptance of 64 | these terms and conditions, and the Licensor grants You such rights in 65 | consideration of benefits the Licensor receives from making the 66 | Licensed Material available under these terms and conditions. 67 | 68 | 69 | Section 1 -- Definitions. 70 | 71 | a. Adapted Material means material subject to Copyright and Similar 72 | Rights that is derived from or based upon the Licensed Material 73 | and in which the Licensed Material is translated, altered, 74 | arranged, transformed, or otherwise modified in a manner requiring 75 | permission under the Copyright and Similar Rights held by the 76 | Licensor. For purposes of this Public License, where the Licensed 77 | Material is a musical work, performance, or sound recording, 78 | Adapted Material is always produced where the Licensed Material is 79 | synched in timed relation with a moving image. 80 | 81 | b. Adapter's License means the license You apply to Your Copyright 82 | and Similar Rights in Your contributions to Adapted Material in 83 | accordance with the terms and conditions of this Public License. 84 | 85 | c. Copyright and Similar Rights means copyright and/or similar rights 86 | closely related to copyright including, without limitation, 87 | performance, broadcast, sound recording, and Sui Generis Database 88 | Rights, without regard to how the rights are labeled or 89 | categorized. For purposes of this Public License, the rights 90 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 91 | Rights. 92 | 93 | d. Effective Technological Measures means those measures that, in the 94 | absence of proper authority, may not be circumvented under laws 95 | fulfilling obligations under Article 11 of the WIPO Copyright 96 | Treaty adopted on December 20, 1996, and/or similar international 97 | agreements. 98 | 99 | e. Exceptions and Limitations means fair use, fair dealing, and/or 100 | any other exception or limitation to Copyright and Similar Rights 101 | that applies to Your use of the Licensed Material. 102 | 103 | f. Licensed Material means the artistic or literary work, database, 104 | or other material to which the Licensor applied this Public 105 | License. 106 | 107 | g. Licensed Rights means the rights granted to You subject to the 108 | terms and conditions of this Public License, which are limited to 109 | all Copyright and Similar Rights that apply to Your use of the 110 | Licensed Material and that the Licensor has authority to license. 111 | 112 | h. Licensor means the individual(s) or entity(ies) granting rights 113 | under this Public License. 114 | 115 | i. Share means to provide material to the public by any means or 116 | process that requires permission under the Licensed Rights, such 117 | as reproduction, public display, public performance, distribution, 118 | dissemination, communication, or importation, and to make material 119 | available to the public including in ways that members of the 120 | public may access the material from a place and at a time 121 | individually chosen by them. 122 | 123 | j. Sui Generis Database Rights means rights other than copyright 124 | resulting from Directive 96/9/EC of the European Parliament and of 125 | the Council of 11 March 1996 on the legal protection of databases, 126 | as amended and/or succeeded, as well as other essentially 127 | equivalent rights anywhere in the world. 128 | 129 | k. You means the individual or entity exercising the Licensed Rights 130 | under this Public License. Your has a corresponding meaning. 131 | 132 | 133 | Section 2 -- Scope. 134 | 135 | a. License grant. 136 | 137 | 1. Subject to the terms and conditions of this Public License, 138 | the Licensor hereby grants You a worldwide, royalty-free, 139 | non-sublicensable, non-exclusive, irrevocable license to 140 | exercise the Licensed Rights in the Licensed Material to: 141 | 142 | a. reproduce and Share the Licensed Material, in whole or 143 | in part; and 144 | 145 | b. produce, reproduce, and Share Adapted Material. 146 | 147 | 2. Exceptions and Limitations. For the avoidance of doubt, where 148 | Exceptions and Limitations apply to Your use, this Public 149 | License does not apply, and You do not need to comply with 150 | its terms and conditions. 151 | 152 | 3. Term. The term of this Public License is specified in Section 153 | 6(a). 154 | 155 | 4. Media and formats; technical modifications allowed. The 156 | Licensor authorizes You to exercise the Licensed Rights in 157 | all media and formats whether now known or hereafter created, 158 | and to make technical modifications necessary to do so. The 159 | Licensor waives and/or agrees not to assert any right or 160 | authority to forbid You from making technical modifications 161 | necessary to exercise the Licensed Rights, including 162 | technical modifications necessary to circumvent Effective 163 | Technological Measures. For purposes of this Public License, 164 | simply making modifications authorized by this Section 2(a) 165 | (4) never produces Adapted Material. 166 | 167 | 5. Downstream recipients. 168 | 169 | a. Offer from the Licensor -- Licensed Material. Every 170 | recipient of the Licensed Material automatically 171 | receives an offer from the Licensor to exercise the 172 | Licensed Rights under the terms and conditions of this 173 | Public License. 174 | 175 | b. No downstream restrictions. You may not offer or impose 176 | any additional or different terms or conditions on, or 177 | apply any Effective Technological Measures to, the 178 | Licensed Material if doing so restricts exercise of the 179 | Licensed Rights by any recipient of the Licensed 180 | Material. 181 | 182 | 6. No endorsement. Nothing in this Public License constitutes or 183 | may be construed as permission to assert or imply that You 184 | are, or that Your use of the Licensed Material is, connected 185 | with, or sponsored, endorsed, or granted official status by, 186 | the Licensor or others designated to receive attribution as 187 | provided in Section 3(a)(1)(A)(i). 188 | 189 | b. Other rights. 190 | 191 | 1. Moral rights, such as the right of integrity, are not 192 | licensed under this Public License, nor are publicity, 193 | privacy, and/or other similar personality rights; however, to 194 | the extent possible, the Licensor waives and/or agrees not to 195 | assert any such rights held by the Licensor to the limited 196 | extent necessary to allow You to exercise the Licensed 197 | Rights, but not otherwise. 198 | 199 | 2. Patent and trademark rights are not licensed under this 200 | Public License. 201 | 202 | 3. To the extent possible, the Licensor waives any right to 203 | collect royalties from You for the exercise of the Licensed 204 | Rights, whether directly or through a collecting society 205 | under any voluntary or waivable statutory or compulsory 206 | licensing scheme. In all other cases the Licensor expressly 207 | reserves any right to collect such royalties. 208 | 209 | 210 | Section 3 -- License Conditions. 211 | 212 | Your exercise of the Licensed Rights is expressly made subject to the 213 | following conditions. 214 | 215 | a. Attribution. 216 | 217 | 1. If You Share the Licensed Material (including in modified 218 | form), You must: 219 | 220 | a. retain the following if it is supplied by the Licensor 221 | with the Licensed Material: 222 | 223 | i. identification of the creator(s) of the Licensed 224 | Material and any others designated to receive 225 | attribution, in any reasonable manner requested by 226 | the Licensor (including by pseudonym if 227 | designated); 228 | 229 | ii. a copyright notice; 230 | 231 | iii. a notice that refers to this Public License; 232 | 233 | iv. a notice that refers to the disclaimer of 234 | warranties; 235 | 236 | v. a URI or hyperlink to the Licensed Material to the 237 | extent reasonably practicable; 238 | 239 | b. indicate if You modified the Licensed Material and 240 | retain an indication of any previous modifications; and 241 | 242 | c. indicate the Licensed Material is licensed under this 243 | Public License, and include the text of, or the URI or 244 | hyperlink to, this Public License. 245 | 246 | 2. You may satisfy the conditions in Section 3(a)(1) in any 247 | reasonable manner based on the medium, means, and context in 248 | which You Share the Licensed Material. For example, it may be 249 | reasonable to satisfy the conditions by providing a URI or 250 | hyperlink to a resource that includes the required 251 | information. 252 | 253 | 3. If requested by the Licensor, You must remove any of the 254 | information required by Section 3(a)(1)(A) to the extent 255 | reasonably practicable. 256 | 257 | 4. If You Share Adapted Material You produce, the Adapter's 258 | License You apply must not prevent recipients of the Adapted 259 | Material from complying with this Public License. 260 | 261 | 262 | Section 4 -- Sui Generis Database Rights. 263 | 264 | Where the Licensed Rights include Sui Generis Database Rights that 265 | apply to Your use of the Licensed Material: 266 | 267 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 268 | to extract, reuse, reproduce, and Share all or a substantial 269 | portion of the contents of the database; 270 | 271 | b. if You include all or a substantial portion of the database 272 | contents in a database in which You have Sui Generis Database 273 | Rights, then the database in which You have Sui Generis Database 274 | Rights (but not its individual contents) is Adapted Material; and 275 | 276 | c. You must comply with the conditions in Section 3(a) if You Share 277 | all or a substantial portion of the contents of the database. 278 | 279 | For the avoidance of doubt, this Section 4 supplements and does not 280 | replace Your obligations under this Public License where the Licensed 281 | Rights include other Copyright and Similar Rights. 282 | 283 | 284 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 285 | 286 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 287 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 288 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 289 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 290 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 291 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 292 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 293 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 294 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 295 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 296 | 297 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 298 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 299 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 300 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 301 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 302 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 303 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 304 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 305 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 306 | 307 | c. The disclaimer of warranties and limitation of liability provided 308 | above shall be interpreted in a manner that, to the extent 309 | possible, most closely approximates an absolute disclaimer and 310 | waiver of all liability. 311 | 312 | 313 | Section 6 -- Term and Termination. 314 | 315 | a. This Public License applies for the term of the Copyright and 316 | Similar Rights licensed here. However, if You fail to comply with 317 | this Public License, then Your rights under this Public License 318 | terminate automatically. 319 | 320 | b. Where Your right to use the Licensed Material has terminated under 321 | Section 6(a), it reinstates: 322 | 323 | 1. automatically as of the date the violation is cured, provided 324 | it is cured within 30 days of Your discovery of the 325 | violation; or 326 | 327 | 2. upon express reinstatement by the Licensor. 328 | 329 | For the avoidance of doubt, this Section 6(b) does not affect any 330 | right the Licensor may have to seek remedies for Your violations 331 | of this Public License. 332 | 333 | c. For the avoidance of doubt, the Licensor may also offer the 334 | Licensed Material under separate terms or conditions or stop 335 | distributing the Licensed Material at any time; however, doing so 336 | will not terminate this Public License. 337 | 338 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 339 | License. 340 | 341 | 342 | Section 7 -- Other Terms and Conditions. 343 | 344 | a. The Licensor shall not be bound by any additional or different 345 | terms or conditions communicated by You unless expressly agreed. 346 | 347 | b. Any arrangements, understandings, or agreements regarding the 348 | Licensed Material not stated herein are separate from and 349 | independent of the terms and conditions of this Public License. 350 | 351 | 352 | Section 8 -- Interpretation. 353 | 354 | a. For the avoidance of doubt, this Public License does not, and 355 | shall not be interpreted to, reduce, limit, restrict, or impose 356 | conditions on any use of the Licensed Material that could lawfully 357 | be made without permission under this Public License. 358 | 359 | b. To the extent possible, if any provision of this Public License is 360 | deemed unenforceable, it shall be automatically reformed to the 361 | minimum extent necessary to make it enforceable. If the provision 362 | cannot be reformed, it shall be severed from this Public License 363 | without affecting the enforceability of the remaining terms and 364 | conditions. 365 | 366 | c. No term or condition of this Public License will be waived and no 367 | failure to comply consented to unless expressly agreed to by the 368 | Licensor. 369 | 370 | d. Nothing in this Public License constitutes or may be interpreted 371 | as a limitation upon, or waiver of, any privileges and immunities 372 | that apply to the Licensor or You, including from the legal 373 | processes of any jurisdiction or authority. 374 | 375 | 376 | ======================================================================= 377 | 378 | Creative Commons is not a party to its public 379 | licenses. Notwithstanding, Creative Commons may elect to apply one of 380 | its public licenses to material it publishes and in those instances 381 | will be considered the “Licensor.” The text of the Creative Commons 382 | public licenses is dedicated to the public domain under the CC0 Public 383 | Domain Dedication. Except for the limited purpose of indicating that 384 | material is shared under a Creative Commons public license or as 385 | otherwise permitted by the Creative Commons policies published at 386 | creativecommons.org/policies, Creative Commons does not authorize the 387 | use of the trademark "Creative Commons" or any other trademark or logo 388 | of Creative Commons without its prior written consent including, 389 | without limitation, in connection with any unauthorized modifications 390 | to any of its public licenses or any other arrangements, 391 | understandings, or agreements concerning use of licensed material. For 392 | the avoidance of doubt, this paragraph does not form part of the 393 | public licenses. 394 | 395 | Creative Commons may be contacted at creativecommons.org. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Methodological Foundation of a Numerical Taxonomy of Urban Form 2 | Code repository for the **Methodological Foundation of a Numerical Taxonomy of Urban 3 | Form** paper. 4 | 5 | > Fleischmann M, Feliciotti A, Romice O and Porta S (2021) Methodological Foundation of 6 | > a Numerical Taxonomy of Urban Form. Environment and Planning B: Urban Analytics and 7 | > City Science, doi: [10.1177/23998083211059835](https://doi.org/10.1177/23998083211059835) 8 | 9 | Martin Fleischmann1, 2, Alessandra Feliciotti2, Ombretta 10 | Romice2, Sergio Porta2 11 | 12 | 1 Department of Geography and Planning, University of Liverpool 13 | 14 | 2 Urban Design Studies Unit, Department of Architecture, University of Strathclyde 15 | 16 | Contact: martin@martinfleischmann.net 17 | 18 | Date: 28/10/2021 19 | 20 | [![maps](leaflet_maps.png)](https://martinfleis.github.io/numerical-taxonomy-maps/) 21 | 22 | The online interactive maps of the final classification are available at [https://martinfleis.github.io/numerical-taxonomy-maps/](https://martinfleis.github.io/numerical-taxonomy-maps/). 23 | 24 | ## Code 25 | 26 | The code is split into two folders - `code_method` containing cleaned reproducible 27 | Python code for everyone willing to use the method, and `code_production` containing an 28 | archive of the used (and somewhat messy) code. 29 | 30 | ### The method 31 | The folder `code_method` contains generalised code for the method, that should be 32 | reproducible on a custom data. The main notebook `morphometric_assessment.ipynb` has 33 | been updated to work with the recent releases of software. You can create the 34 | reproducible environment to run it using `conda` or `mamba` and the `environment.yaml` 35 | file in the `code_method` folder. 36 | 37 | ``` 38 | conda env create -f environment.yaml 39 | ``` 40 | 41 | You can also create a new environment `taxonomy` manually: 42 | 43 | ``` 44 | conda create -n taxonomy 45 | conda activate taxonomy 46 | conda config --env --add channels conda-forge 47 | conda config --env --set channel_priority strict 48 | conda install momepy mapclassify seaborn 49 | ``` 50 | 51 | Alternatively, you can use the Docker container `darribas/gds_py:7.0`. 52 | 53 | ### The code 54 | The folder `code_production` is an archive of the actual production code used to 55 | generate the analysis presented in the paper. However, it is recommended to use the code 56 | in the `code_method` folder if you want to reproduce the work. The code in the folder is 57 | stored for archival purposes and different parts may depend on different versions of 58 | dependecies. 59 | 60 | ## Data 61 | 62 | Non-proprietary data are archived on figshare as 63 | [10.6084/m9.figshare.16897102](https://doi.org/10.6084/m9.figshare.16897102). The 64 | archive contains input geometry, generated geometry, all measured morphometric 65 | characters and a final classification labels for Prague and Amsterdam. It does not 66 | contain validation data, which are available upon request (due to the licensing). 67 | 68 | The online interactive maps of the final classification are available at [https://martinfleis.github.io/numerical-taxonomy-maps/](https://martinfleis.github.io/numerical-taxonomy-maps/). 69 | 70 | ## Preprint 71 | 72 | Preprint of the final manuscript is available from [arXiv](https://arxiv.org/abs/2104.14956). 73 | 74 | ## Abstract 75 | 76 | Cities are complex products of human culture, characterised by a startling diversity of 77 | visible traits. Their form is constantly evolving, reflecting changing human needs and 78 | local contingencies, manifested in space by many urban patterns. Urban Morphology laid 79 | the foundation for understanding many such patterns, largely relying on qualitative 80 | research methods to extract distinct spatial identities of urban areas. However, the 81 | manual, labour-intensive and subjective nature of such approaches represents an 82 | impediment to the development of a scalable, replicable and data-driven urban form 83 | characterisation. Recently, advances in Geographic Data Science and the availability of 84 | digital mapping products, open the opportunity to overcome such limitations. And yet, 85 | our current capacity to systematically capture the heterogeneity of spatial patterns 86 | remains limited in terms of spatial parameters included in the analysis and hardly 87 | scalable due to the highly labour-intensive nature of the task. In this paper, we 88 | present a method for numerical taxonomy of urban form derived from biological 89 | systematics, which allows the rigorous detection and classification of urban types. 90 | Initially, we produce a rich numerical characterisation of urban space from minimal data 91 | input, minimizing limitations due to inconsistent data quality and availability. These 92 | are street network, building footprint, and morphological tessellation, a spatial unit 93 | derivative of Voronoi tessellation, obtained from building footprints. Hence, we derive 94 | homogeneous urban tissue types and, by determining overall morphological similarity 95 | between them, generate a hierarchical classification of urban form. After framing and 96 | presenting the method, we test it on two cities - Prague and Amsterdam - and discuss 97 | potential applications and further developments. The proposed classification method 98 | represents a step towards the development of an extensive, scalable numerical taxonomy 99 | of urban form and opens the way to more rigorous comparative morphological studies and 100 | explorations into the relationship between urban space and phenomena as diverse as 101 | environmental performance, health and place attractiveness. 102 | -------------------------------------------------------------------------------- /SupplementaryMaterial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martinfleis/numerical-taxonomy-paper/518497e2cad8f6379f02fbe2624144a6f42aa1e3/SupplementaryMaterial.pdf -------------------------------------------------------------------------------- /code_method/environment.yaml: -------------------------------------------------------------------------------- 1 | name: taxonomy 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - momepy==0.5.1 6 | - geopandas==0.10.2 7 | - pygeos==0.10.2 8 | - mapclassify==2.4.3 9 | - seaborn==0.11.2 10 | - libpysal==4.5.1 11 | - scikit-learn==1.0.1 12 | - scipy==1.7.1 13 | -------------------------------------------------------------------------------- /code_method/validation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Methodological Foundation of a Numerical Taxonomy of Urban Form\n", 8 | "\n", 9 | "## Reproducible Python code for validation\n", 10 | "\n", 11 | "Code used to perform validation using additional data sources.\n", 12 | "\n", 13 | "Validation files are expected to be polygon geometries with an attribute column representing target variable.\n", 14 | "\n", 15 | "The reproducible computational environment can be created using Docker container `darribas/gds_py:5.0`.\n", 16 | "\n", 17 | "The same code has been used to analyse all cases." 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import pandas as pd\n", 27 | "import geopandas as gpd\n", 28 | "import scipy.stats as ss\n", 29 | "import numpy as np" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "We load all data and perform spatial join based on building centroids." 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "clusters = pd.read_csv('files/200218_clusters_complete_n20.csv', index_col=0) # cluster labels" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "validation = gpd.read_file(\"validation_file_path\") # validation data" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "buildings = gpd.read_file('files/geometry.gpkg', layer='buildings') # building geometry" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "buildings['cent'] = buildings.centroid\n", 73 | "buildings = buildings.set_geometry('cent')" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "buildings = buildings.to_crs(validation.crs)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "joined = gpd.sjoin(buildings, validation, how='left')" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "joined = joined.merge(clusters, how='left', on='uID')" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "joined = joined.set_geometry('geometry')" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "Resulting DataFrame contains an attribute column with cluster labels and with target variable. Now we can measure Cramer's V and Chi-squared statistics." 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "def cramers_v(x, y):\n", 126 | " confusion_matrix = pd.crosstab(x,y)\n", 127 | " chi2 = ss.chi2_contingency(confusion_matrix)[0]\n", 128 | " n = confusion_matrix.sum().sum()\n", 129 | " phi2 = chi2/n\n", 130 | " r,k = confusion_matrix.shape\n", 131 | " phi2corr = max(0, phi2-((k-1)*(r-1))/(n-1))\n", 132 | " rcorr = r-((r-1)**2)/(n-1)\n", 133 | " kcorr = k-((k-1)**2)/(n-1)\n", 134 | " return np.sqrt(phi2corr/min((kcorr-1),(rcorr-1)))" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "cramers_v(joined.cluster, joined[\"validation_data\"])" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "confusion_matrix = pd.crosstab(joined.cluster, joined[\"validation_data\"])\n", 153 | "chi, p, dof, exp = ss.chi2_contingency(confusion_matrix)" 154 | ] 155 | } 156 | ], 157 | "metadata": { 158 | "kernelspec": { 159 | "display_name": "Python 3", 160 | "language": "python", 161 | "name": "python3" 162 | }, 163 | "language_info": { 164 | "codemirror_mode": { 165 | "name": "ipython", 166 | "version": 3 167 | }, 168 | "file_extension": ".py", 169 | "mimetype": "text/x-python", 170 | "name": "python", 171 | "nbconvert_exporter": "python", 172 | "pygments_lexer": "ipython3", 173 | "version": "3.7.8" 174 | } 175 | }, 176 | "nbformat": 4, 177 | "nbformat_minor": 4 178 | } -------------------------------------------------------------------------------- /code_production/Amsterdam/200224_extract Ams from 3d bag.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import geopandas as gpd\n", 10 | "import fiona" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "output_type": "execute_result", 20 | "data": { 21 | "text/plain": "'0.7.0+52.gc086056'" 22 | }, 23 | "metadata": {}, 24 | "execution_count": 2 25 | } 26 | ], 27 | "source": [ 28 | "gpd.__version__" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 3, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "path = '/Users/martin/Dropbox/Academia/Data/Geo/Amsterdam/raw/bag3d_2020-01-25.gpkg'" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 4, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "output_type": "execute_result", 47 | "data": { 48 | "text/plain": "['pand3d']" 49 | }, 50 | "metadata": {}, 51 | "execution_count": 4 52 | } 53 | ], 54 | "source": [ 55 | "fiona.listlayers(path)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 5, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "mask = gpd.read_file('/Users/martin/Dropbox/Academia/Data/Geo/Amsterdam/data.gpkg', layer='boundary')" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 22, 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "data": { 74 | "text/plain": [ 75 | "" 76 | ] 77 | }, 78 | "execution_count": 22, 79 | "metadata": {}, 80 | "output_type": "execute_result" 81 | }, 82 | { 83 | "data": { 84 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPEAAAD4CAYAAAA9xADBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO2deZxU5ZX3vweaZhWahkYbuhGQLYos2izGxAUiYjRiZjSSOJGoo5FkHMX40WScV8dl8rpF1JjgOBoFl6D06KsTIwQVjQsg3bLIKs1mN2tDLzQ0vZ/3j/uUXRQNXU1X1a3qOt/Ppz5969znOXXqwq+e5T73OaKqGIaRuLTzOwDDMFqHidgwEhwTsWEkOCZiw0hwTMSGkeCk+B1ALOndu7cOGDDA7zAM44TIz8/fp6oZofakEvGAAQPIy8vzOwzDOCFEZHtTdutOG0aCYyI2jATHRGwYCY6J2DASHBOxYSQ4JmLDSHBMxIaR4JiIDSPBSarFHolGVW095YdrKauspayyxjs+XEt5Za07rqHMHQ/s3ZWrx2ZzRt8efodtxBgTsY+sLCzjL6t2HlOc1XUNYfv6eNM+5i7ZzsisHlw9NpvLR/XlpE4dohi9ES+YiH3kYFUdz32yNaI+VxeVs7qonAf/sp5LR2YybWw2Z5/aExGJ6OcY8YOJ2EfOzIpe1/dwbT25+UXk5hcxuE83po3N5odj+tGrW8eofabhDzax5SM9OndgUO+uUf+cgr0HefCd9Uz4v+/zy1e+4ONNxTQ02N5qbQVriX1mVHYaW/Ydisln1dYr73y5i3e+3EW/tM5cPTabq3KyyOzROSafb0QHa4l9ZlQUu9THY0fZYR5f9BXnPvQB173wOQvX7qa2PvyJNCN+sJbYZ0Zmp/n6+Q0KizcWs3hjMb27deTKs7O4emw2A2PQzTcig4nYZ8oqa/wO4Rv2HazmmY8288xHmxk/MJ1p47K5ZEQmnTq09zs04ziYiH1EVXl80Vd+h9Eky7aWsGxrCfe+tZYfjunH1WP7c3rf7n6HZTSBidhHFq7dw5odB/wO47gcqKpjzpLtzAlaSPIPY7LonGqtc7xgIvaJ+gbl8UUb/Q6jRQQWkgjCT8b39zscw2Gz0z7xl9U7+WrPQb/DOCHeW7/H7xCMIEzEPlBX38AT723yO4wT5pOCfVTW1PkdhuEwEfvAGyt2sDVGCzyiQU1dAx9v2ud3GIbDRBxjauoaeDKBW+EA762zLnW8YCKOMa/lFbKj7LDfYbSaDzbspd7WX8cFJuIYUlVbz9MfJH4rDLD/UA0rC0v9DsPARBxTXl66nT0Hqv0OI2IsWrfX7xAMwhSxiKSJSK6IbBCR9SJyjoiMFpGlIrJSRPJEZJwrmyoiL4jIlyKySkQuCPJztrMXiMhT4p5UF5GOIvKasy8TkQFBdaaLyCb3mh5kH+jKbnJ1UyN0TaLCoeo6Zn+42e8wIordaooPwm2JnwQWqOpwYBSwHngEuE9VRwP3uPcANwKo6pnARcDvRCTwObOBm4Ah7jXF2W8ASlV1MDALeBhARNKBe4HxwDjgXhHp6eo8DMxS1SFAqfMRt/z586/Zfyh+1klHgoK9BxN6lr2t0KyIRaQ7cB7wPICq1qhqGaBAYDFtD2CnOz4deN+V3QuUATkikgl0V9UlqqrAXOAKV2cqMMcd5wKTXCt9MbBIVUtUtRRYBExx5ya6sri6AV9xyUmd2ubiuPetNfadcFriQUAx8IKIrBCR50SkK3Ab8KiIFAKPAb9x5VcBU0UkRUQGAmcD2UA/oCjIb5Gz4f4WAqhqHVAO9Aq2h9TpBZS5sqG+jkBEbnLd/bzi4uIwvm50OG/oUWll2wSL7FaT74Qj4hTgLGC2qo4BDgG/BmYAM1U1G5iJa6mBP+GJKg94AvgMqAOa2qktcI/iWOdaaj/aqPqsquaoak5Ghn9CyuzRmSF9uvn2+dEib3sppW1smJBohCPiIqBIVZe597l4op4OvOFs8/HGrKhqnarOVNXRqjoVSAM2OT9ZQX6zaOyCF+G11ohICl73vCTYHlJnH5Dmyob6ilvaYmtc36B8+JXNUvtJsyJW1d1AoYgMc6ZJwDo80ZzvbBPxhIqIdHHdbUTkIqBOVdep6i6gQkQmuDHttcBbrv7beD8KAFcCH7hx80Jgsoj0dBNak4GF7txiVxZXN+ArbmmLIgZ4z241+Uq4sy23AK+42zhbgOvwRPOkaw2r8GadAfoAC0WkAdgB/DTIzwzgRaAz8K57gdcVf0lECvBa4GkAqloiIg8Ay125+1W1xB3fBcwTkQeBFTR25+OW8QPT6ZjSrkWbwicCH31VTHVdPR1T7BljPxCvUUsOcnJyNC8vz9cYfvr8sjb58MBlIzO5f+oI0rvG9e36hEZE8lU1J9RuK7ZiTN82uj3sX1bvYvKsj1iwZpffoSQdJuIYotq2J4H2Hazh5pe/4F9e/YL9B9vO8tJ4x0QcQ9bsONCm1k4fC69V/jvvrLZWORaYiGPIoiRa3bT/UA2/fPULZrycT3FF2//h8hMTcQxJxiWK767ZzeRZH/HWyh0k0yRqLDERx4idZYdZuzO+t6eNFqWVtdw6byU/fymfvRVVfofT5jARx4hkbIVD+du6PVz0+N95c0WRtcoRxEQcIxatb7uz0i2h/HAtM19bxY1z89hzwFrlSGAijgEVVbUs2dz2Fni0hvfW7+Wixz8iN99a5dZiIo4BH2/aR229/UcN5UBVHXfMX8X1Ly5nV3nibx7oFybiGGDbux6fxRuLmfz433l9eWHzhY2jMBFHmbr6Bj7YaOPh5qioruPO/1nNpwU27GgpJuIok7+9lLLKWr/DSBgeWbDBxsgtxEQcZWxHyJaxqqichWt3+x1GQmEijiKqantQnQCPLtxIXX3beuY6mpiIo8jm4kNs21/pdxgJx+biQ7zxxQ6/w0gYTMRRxLrSJ86s976iqrbe7zASAhNxFLFbSyfOrvIqXl663e8wEgITcZTYf7CaL762hGOt4Q+LC6iospn95jARR4nFG4uxzJ+to7Sylv/+eKvfYcQ9JuIoYV3pyPDcx1vYZ1v9HBcTcRQoOVTTpvfSiiWVNfU8/UGB32HENSbiKPDip1upqrX7nJHilWXbKSyxW3XHwkQcYSqqannxs21+h9GmqK1Xnnhvk99hxC0m4gjz6rKvOVBV13xBo0W8saKIjbsr/A4jLglLxCKSJiK5IrJBRNaLyDkiMlpElorISpc6dJwr20FE5ojIl67sb4L8nO3sBSLylMvJhIh0FJHXnH2ZiAwIqjNdRDa51/Qg+0BXdpOr63vqgaraeptNjRKq8NjfNvodRlwSbkv8JLBAVYcDo4D1wCPAfao6GrjHvQe4Cuioqmfi5Sb+eZAoZ+PlbBriXlOc/QagVFUHA7OAhwFEJB24FxiPl3XxXpdYDVdmlqoOAUqdD1+Zn19kM6lRZNG6PeRvt3vvoTQrYhHpDpyHS1imqjWqWoaXD7i7K9aDxtSiCnR1idY6AzXAARHJBLqr6hKX1XAucIWrMxWY445zgUmulb4YWKSqJapaCiwCprhzE11ZXN2AL1+oq2/gvz7a7GcISYE9qng04bTEg4Bi4AURWSEiz7nUpbcBj4pIIfAYEOg25+IlIt8FfA085jIZ9sPLNxygyNlwfwvBy28MlAO9gu0hdXoBZa5sqK8jEJGbXHc/r7i4OIyve2L87+qdFJXaFjPRZtnWEv7eBhPStYZwRJyCl1R8tqqOwRPor/HSlM5U1WxgJo2pRccB9UBfYCDwKxEZBEgTvgM/qcc611L70UbVZ1U1R1VzMjKilx9YFXp36xg1/0YjjyzYQIMth/uGcERcBBSp6jL3PhdP1NOBN5xtPp54AX6CN36uVdW9wKdAjvOTFeQ3i8YueBGQDeC64T3w8hR/Yw+psw9Ic2VDffnCP5yVxSd3XcgDV4wgq2fbzHwYL6zdeYB3vrQ8TwGaFbGq7gYKRWSYM00C1uGJ5nxnmwgEbuR9DUwUj67ABGCDqu4CKkRkghvTXouXqBzgbbwfBYArgQ/cuHkhMFlEeroJrcnAQndusSuLqxvw5RudOrTnpxNO5cM7LuCJq0cz9ORufofUZvnd3zZSaxsHAGEmGReR0cBzQCqwBbgOOANv1joFqAJ+oar5ItINeAE4Ha/b+4KqPur85AAv4k14vQvcoqoqIp2Al4AxeC3wNFXd4upcD/ybC+U/VfUFZx8EzAPSgRXAP6nqcaeGY51kvKFBeX/DXv71zys4bM/GRpz//OEIrhl/qt9hxIxjJRkPS8RthViLGLxHEs9+8L2YfmaycHL3jnx4x4V0Tm3vdygx4VgithVbUWbplhK/Q2iz7DlQzZwl2/wOw3dMxFFm6Zb9fofQpvnj4gLKk3xLYBNxlFliIo4qB6rq+K+/J/ciGxNxFCmuqKZg70G/w2jz/OnTrexN4gyLJuIoYl3p2FBV28Dvk3jjABNxFLGudOz48+dfs33/Ib/D8AUTcRSxljh21DUojy/6yu8wfMFEHCX2HKhiS3Fytgx+8dbKnazdWe53GDHHRBwlrBX2hwf+si7p8jiZiKOEidgflm4p4aF3N/gdRkwxEUeJJZtNxH7x3Cdbyc0var5gG8FEHAV2lR+2bIg+829vfJk0aXRMxFHAutL+U1PfwM9fymdXedvfbcVEHAWWbraHHuKB4opqbpqb3+ZTpJqIo4At8ogfvtxRzp25q9v05nom4gizo+wwX1vKkbji7VU7md2GdyI1EUeYpTYrHZc8unBjm81UaSKOMNaVjk9U4dZ5K/hqT9tLBWMijjA2Mx2/HKqp58a5eZRV1vgdSkQxEUeQwpJK20A+ztm+v5JfvvpFm1qaaSKOINaVTgw+LdjPg++s9zuMiGEijiDWlU4cXvxsG/M+/9rvMCKCiThCqKrNTCcY/+etNSzflvgLc0zEEaKw5DA7y5N3n6dEpLZeufmlfHaUJfY8hok4QizZYpn6EpH9h2q4cU4elTV1zReOU8ISsYikiUiuiGwQkfUico6IjBaRpSKy0qUOHefKXuNsgVeDSwODiJwtIl+KSIGIPOVyMiEiHUXkNWdfFpSUHBGZLiKb3Gt6kH2gK7vJ1U2N5IVpKfboYeKybtcB7pi/KmGXZobbEj+Jl+lwODAKWA88AtynqqOBe9x7VPUVVR3t7D8FtqnqSudnNnATMMS9pjj7DUCpqg4GZgEPA4hIOnAvMB4v6+K9LrEarswsVR0ClDofvqCqlukhwfnrl7sTdsfMZkUsIt2B83D5h1W1RlXL8PIBd3fFetB0atEfA392fjKB7qq6xGU1nAtc4cpNBea441xgkmulLwYWqWqJqpYCi4Ap7txEVxZXN+Ar5mzbX8nuJN73uK3w+KKvWLBmt99htJhwWuJBQDHwgoisEJHnXMrS24BHRaQQeAz4TRN1r8aJGOiHl284QJGzBc4VAqhqHVAO9Aq2h9TpBZS5sqG+jkBEbnLd/bzi4uIwvm7LsVtLbYfbX1/Jht0H/A6jRYQj4hS8pOKzVXUMcAj4NTADmKmq2cBMXEsdQETGA5WquiZgasK3NnOupfajjarPqmqOquZkZGQ0VaTVjOjbg24dU5ovaMQ9lTX1/POcPEoOJc7SzHBEXAQUqeoy9z4XT9TTgTecbT7emDWYaTS2wgE/WUHvs2jsghcB2QAikoLXPS8JtofU2QekubKhvmLOmVk9eG56Dh1TbLK/LVBUepgZL+cnTBLzZv/XqepuoFBEhjnTJGAdnmjOd7aJwKZAHRFpB1yFlwQ84GcXUCEiE9yY9lrgLXf6bbwfBYArgQ/cuHkhMFlEeroJrcnAQndusSuLqxvw5QsTBvVi9j+dRUq7pjoJRqKxbGsJ9/3vWr/DCItwm45bgFdEZDUwGvgtcCPwOxFZ5d7fFFT+PLzWe0uInxnAc0ABsBl419mfB3qJSAFwO153HVUtAR4AlrvX/c4GcBdwu6vTi5DuvB9MHH4yv/vRKMR03CZ4eenXvLR0u99hNIsk6r2xEyEnJ0fz8vKi/jmvLNvO3W+uab6gEfektBNeumE855zWy+9QEJF8Vc0JtdsgLgpcM/5U7poy3O8wjAhQ16D84pV8CuN4yyUTcZSYccFpzLjgNL/DMCJAaWUtN87N41B1fC7NNBFHkTsvHsY14/v7HYYRATbsruD211fS0BB/w08TcRQRER6YOoKpo/v6HYoRARau3cMT729qvmCMMRFHmXbthMeuGsWk4X38DsWIAE+9v4l3Vu/yO4wjMBHHgA7t2/GHa85i/MB0v0MxIsCv5q9kzY74yYNsIo4RnTq057npOYzM6uF3KEYrqapt4Ka5eRRXVPsdCmAijiknderAi9eNY0ifbn6HYrSSneVVzHg5n5o6/5dmmohjTHrXVF66YTxZPTv7HYrRSvK2l3LPW2t830zAROwDp/ToxH9fe9TCGyMBmbe8kLlL/F2aaSL2ieo46IYZkeH+v6zj0wL/9lgzEftEwd6DfodgRIj6BmXGy/k8vGADq4vKYt69tifZfcJE3LY4UFXH7A83M/vDzfRL68yUEadwyYhTOKt/T9pF+fFUE7FPmIjbLjvKDvP8J1t5/pOt9DmpIxef4Ql63MB0UtpHvvNrIvaJzcUm4mRgb0U1Ly3dzktLt5PeNZWLvnUyU848hXNP601qhHaCMRH7QHVdPdv3H/I7DCPGlByq4bW8Ql7LK+SkTil871snM2XEKZw/NINOHdqfsF8TsQ9s21dJHD4MY8SQiqo63lyxgzdX7KBLantuOm8Qt31v6An5stlpH7DxsBFMZU09e1uxhNNE7AMmYiOU7J5dTriuidgHCmxSywghO/3El+GaiH3AWmIjFGuJE4j6BrXbS8ZRZKebiBOGotLKuHh8zYgfuqa2p2eXDidc30QcY6wrbYSSnd4FaUXGARNxjDERG6FktWI8DGGKWETSRCRXRDaIyHoROUdERovIUhFZ6VKHjgsqP1JElojIWhH5UkQ6OfvZ7n2BiDzlcjIhIh1F5DVnXyYiA4J8TReRTe41Pcg+0JXd5OqmtupKxAgTsRFKa2amIfyW+ElggaoOB0YB64FHgPtUdTRwj3sfyGr4MnCzqp4BXADUOj+z8XI2DXGvKc5+A1CqqoOBWcDDzlc6cC8wHi/r4r0usRquzCxVHQKUOh9xj91eMkLp34pJLQhDxCLSHS9B2vMAqlqjqmV4+YC7u2I9aEwtOhlYraqrXPn9qlovIplAd1Vd4rIazgWucHWmAnPccS4wybXSFwOLVLVEVUuBRcAUd26iK4urG/AVt6iqtcTGUbTm9hKEt3Z6EFAMvCAio4B84FbgNmChiDyG92PwbVd+KKAishDIAOap6iNAP7x8wwGKnA33txBAVetEpBwv0+E39pA6vYAyVa1rwlfcUlxRTUVVfKYCMfyjNbeXILzudApeUvHZqjoGOISXenQGMFNVs4GZNKYWTQG+A1zj/v5QRCYBTU2/BR4DONa5ltqPQkRucmP2vOLi4qaKxIwt++zJJeNoWrtpYjgiLsLLNbzMvc/FE/V04A1nm483Zg2U/0hV96lqJfBXV74IyAqOncYueBGQDd+MqXsAJcH2kDr7gDRXNtTXEajqs6qao6o5GRkZYXzd6HHgcG3zhYykolfXVLp2bN3DhM2KWFV3A4UiMsyZJgHr8ERzvrNNBAJJahYCI0WkixPZ+cA6Vd0FVIjIBDemvRZ4y9V5G+9HAeBK4AM3bl4ITBaRnm5CazKw0J1b7Mri6gZ8xS2Ha+v9DsGIM7Ja2ZWG8J8nvgV4xd3G2QJchyeaJ51Qq/BmnVHVUhF5HFiO18X9q6q+4/zMAF4EOgPvuhd4XfGXRKQArwWe5nyViMgDzhfA/apa4o7vAuaJyIPAChq783FLlYnYCCE7AvuPhyViVV0JhG6U/Alw9jHKv4x3mynUngeMaMJeBVx1DF9/Av7UhH0LjV34hOBwjYnYOJLWTmqBrdiKKYdrbc20cSStvb0EJuKYYmNiI5TWrtYCE3FMsTGxEYq1xAmGjYmNYESgb5q1xAlFpYnYCCKze6eI7D1tIo4h1p02gonEPWIwEccUm9gygonEeBhMxDHFxsRGMBcMi8wyYBNxDLGW2Ahwxei+/GBU34j4MhHHEBsTG+DdG37giqMWLp4wJuIYYi2x0b6d8OS0MZzU6cR3twzFRBxDbExszPzeEM7q37P5gi3ARBxDrCVObsYPTGfGBYMj7tdEHENsTJy89OjcgVlXj6Z9uxPfX/pYmIhjRG19A7X1lpQ4WXnoH86MyBLLpjARxwjrSicvPx6XzSVnZkbNv4k4RlTZpFZSMiijK//nstOj+hkm4hix1Xa6TDpS27fjqWlj6JLauo3wmsNEHANUlYcWbPA7DCPG3DllGCP69Yj655iIY8BbK3ey4usyv8MwYsh5QzO4/tyBMfksE3GUKT9cy0PvWiucTPTqmspjV42kXRRuJzWFiTiKfFlUzg9+/wm7D1T5HYoRQx67ahR9TuoUs8+L7og7SVFV5ny2jd/+dQM19bbDpR906tCOc0/rzYXD+5DeNZW9B6rYU1HN3gPV7K2o+uZvaWVks3Jcd+4ALhzeJ6I+m8NEHGHKD9dyV+5qFqzd7XcoSUdWz85MHN6HC4f34ZxBvejUoX2zdarr6imuqGZvRTV7D1S5v9Xsccd7DlRRXFHN/kM1zfoafspJ3DVleCS+SoswEUeQlYVl/MurX1BUetjvUJKC9u2EnFN7MnF4HyYO78PgPt1weevDpmNKe7J6diGrmV02auoa2HewUex7KqopDhL6voM1PP6jUWH9cEQaE3EEUFWe/2QrDy/YYEsro0yvrqmcPyyDicP78N0hGfToHLlH+o5Hako7+qZ1jtrSydYQlohFJA14Di8FiwLXA4eBZ4BOQB3wC1X9XEQGAOuBja76UlW92fk5m8ZcTH8FblVVFZGOeEnHzwb2A1er6jZXZzrw787Xg6o6x9kHAvOAdOAL4Keq2nyfJ8KUVdZwx/zVvLd+T6w/Omk4PbM7k77ltbYjs9Ki8hBBIhNuS/wksEBVr3RJ1boArwP3qeq7IvJ94BHgAld+s6qObsLPbLzEa0vxRDwFL6naDUCpqg4WkWnAw8DVIpIO3IuXB0qBfBF5W1VLXZlZqjpPRJ5xPma38Pu3ip1lh7nqmSXsKLPuc6T5VmZ3LhuZyaVnZjKgd1e/w4lrmhWxiHQHzgN+BuBauxoRUaC7K9aDY+QHDvKTCXRX1SXu/VzgCjwRTwX+wxXNBZ526U8vBhYFMiGKyCJgiojMw0un+hNXZ46rHzMRV9bUcePcPBNwBBnSpxuXjezLpSMzGdynm9/hJAzhtMSDgGLgBREZBeQDtwK3AQtF5DG8+83fDqozUERWAAeAf1fVj4F+eEnDAxQ5G+5vIYCq1olIOdAr2B5SpxdQpqp1Tfg6AhG5CZd2tX///mF83eZpaFB+9foq1u48EBF/ycyg3l29FndkX4adcpLf4SQk4Yg4BTgLuEVVl4nIk8Cv8Vrfmar6PyLyI7z8wN8DdgH9VXW/GwP/PxE5A2hqIBOYBTrWuZbajzaqPgs8C5CTkxORWacn3t/Eu2vsFtKJkp3emctG9uWykZmcntm9xTPKxpGEI+IioEhVl7n3uXgi/g5eiwwwH2/iC1WtBqrdcb6IbAaGOj9ZQX6zaOyCFwHZQJFLWt4DL9l4EY3j7ECdD4F9QJqIpLjWONhXVPnfVTt56v1NsfioNkXfHp24dGQml43sy8isHibcCNKsiFV1t4gUisgwVd0ITALW4XWzz8cT1URgE4CIZAAlqlovIoOAIcAWVS0RkQoRmQAsA64Ffu8+5m1gOrAEuBL4wM1aLwR+KyKBncUmA79x5xa7svNc3bdaezGaY3VRGXfMXxXtj2kznNy9I98/0xPumOy0mK0lTjbCnZ2+BXjFzUxvAa7DE82TruWswo078SbB7heROqAeuDkwMQXMoPEW07vuBV5X/CURKcBrgacBOOE/ACx35e4P8nUXME9EHgRWOB9RY8+BKm6cm0d1nS2jPB69u6VyyYhMLhuZydgB6SbcGCCqybM4IScnR/Py8lpcr6q2nqv/awmrisqjEFXi07NLB6aMyOQHIzMZNzCdlPb2XE00EJF8Vc0JtduKrWZQVe7MXW0CDqFzh/ZcNjKTy0b15dun9aKDCdc3TMTN8McPN/P2qpjMmSUUV4/N5j8uP8PvMAzseeLjsmDNbh5duLH5gknIuYN7+x2C4TARH4N1Ow9w++sr/Q4jLmnfTpgwKN3vMAyHibgJiiuq+ec5y6m0bWabZHR2WkQTghmtw0QcQnVdPTe/nM/OcttS51hYVzq+MBEHoarc/eYa8reX+h1KXPMdE3FcYSIOYsPuCnLzi5ovmMR0TW3PmP5pfodhBGEiDmKnPVbYLOMH2T3heMP+NYKIxTj4khGn8J8/HJGwrZmNh+MPW+wRxO7y6LXEp/bqwn2Xn8EFw7ztTK8Zfyobd1cwb/nXvLliB2UR3jo1Wth4OP4wEQexqyzyLXFqSjtmnH8aMy447aidEIedchL3/uAM7poynIVrd/Pa8kI+27w/4jFEioyTOjL0ZNtxI94wEQexK8Ld6fOGZnDf5WcwsJk9ojp1aM/U0f2YOrof2/Yd4vW8QubnF1FcUR3ReFrLdwb3tueA4xAbEwcRqXQrp3TvxB+vOYs5141tVsChDOjdlTunDOezX0/k0igmpj4RbDwcn1hL7FDVVs9Ot28nXH/uAG793lC6dWzdpe3Qvh0P/eOZrNt1IG5yG9t4OD6xlthRVlnbqgf+c07tyTv/+h3uvvT0Vgs4wEmdOvDHa86iY4r//0yD+3TjlB6xSxJmhI///zvihAZVvn1arxbXS++ayqNXjuT1n5/D8FO6N1+hhXwrszsPXDEi4n5birXC8Yt1px29unXk1RsnUFhSyfz8InLzCo9731gEpo3tz11ThpHWJTWqsf0oJ5vlW0uY7+NqMhsPxy8m4hCy07tw+0VDuXXSED4t2MfreYX8be2eI1KUntHXax3P6t/zOJ4iy/1TR/DljnI27K6I2WcGsEcP47BIj7wAAAcFSURBVBsT8TFo3044b2gG5w3NoPRQDW+t3MHbq3Zy+ai+/NOEU2O+j1Tn1Pb88ZqzuPzpTzlYXdd8hQhijx7GNybiMOjZNZWfnTuQn5070Nc4BmV04+F/HMkvX/0ipp9rXen4xia2EoxLR2bys28PiOln2qRWfGMiTkD+7fvfYlR29B+gSOvSgfunnsHZp8Zu7G+0HBNxApKa0o4//GRM1BJsp7QTrj93IB/dcSHXnjPA8gHHOSbiBCWrZxdmXT0q4n4nDu/Dwpnncc8PTqdHF5vMSgTCErGIpIlIrohsEJH1InKOiIwWkaUislJE8kRkXEid/iJyUETuCLKdLSJfikiBiDzlchAjIh1F5DVnXyYiA4LqTBeRTe41Pcg+0JXd5OpG92ZtHDJx+Mn84oLTIuJrSJ9uzLl+HH/62VhOy7AnlRKJcFviJ4EFqjocGAWsBx4B7lPV0cA97n0ws2jMtRRgNl7OpiHuNcXZbwBKVXWwq/cwgIikA/cC44FxwL1BydUeBmap6hCg1PlIOm6/aCjjB574PdzAuPfdW7/L+UMzIhiZESuaFbGIdMdLkvY8gKrWqGoZXj7gwDrDHgSlFhWRK/ASr60NsmUC3VV1iXoJoOYCV7jTU4E57jgXmORa6YuBRapaoqqlwCJgijs30ZXF1Q34SipS2rfj9z8eQ+9uHVtWL2Tca/mTEpdw/uUGAcXACyKyQkSeE5GuwG3AoyJSCDwG/AbAnbsLuC/ETz+8fMMBipwtcK4QwOUbLgd6BdtD6vQCylzZUF9HICI3ue5+XnFxcRhfN/Ho070Tv//xGMKdf7Jxb9siHBGnAGcBs1V1DHAIL8n4DGCmqmYDM2lMLXofXjf3YIifpv6LaTPnWmo/2qj6rKrmqGpORkbb7S6ec1ovfjV52HHL2Li3bRLOiq0ioEhVl7n3uXgi/g5wq7PNB55zx+OBK0XkESANaBCRKuB/gKwgv1k0dsGLgGygyOU77oGXp7gIuCCkzofAPiBNRFJcaxzsK2mZcf5p5G0rYfHGI3scaV06cPtFQ/nJuP7WbW6DNPsvqqq7gUIRCfzMTwLW4YnmfGebCGxy5b+rqgNUdQDwBPBbVX1aVXcBFSIywY1pr8VLVA7wNhCYeb4S+MCNmxcCk0Wkp5vQmgwsdOcWu7K4ugFfSUu7dsLjPxpNv7TOgDfuve7cATbubeOEu3b6FuAVdxtnC3AdnmiedC1nFd6sc3PMAF4EOuPNXAdmr58HXhKRArwWeBqAqpaIyAPAclfuflUtccd3AfNE5EFgBY3d+aSmZ9dUnv7JGP6weDO/vmQ4g/tYt7mtI16jlhzk5ORoXl6e32EYxgkhIvmqmhNqt/6VYSQ4JmLDSHBMxIaR4JiIDSPBMREbRoJjIjaMBMdEbBgJjonYMBIcE7FhJDhJtWJLRIqB7c0U6433gIXhYdfjaPy6Jqeq6lGP4iWViMNBRPKaWtqWrNj1OJp4uybWnTaMBMdEbBgJjon4aJ71O4A4w67H0cTVNbExsWEkONYSG0aCYyI2jASnzYhYRLa57BIrRSTP2R51WStWi8ibIpIWVP43LuPERhG5OMgesSwVftPUNQk6d4eIqIj0DrIl7TURkVvc917rNnkM2OP/mqhqm3gB24DeIbbJQIo7fhh42B2fDqwCOgIDgc1Ae3fuc+AcvG1x3wUucfZfAM+442nAa+44HW/fsXSgpzvu6ff1ONY1cfZsvE0ItwfOJ/M1AS4E3gM6uvd9EumatJmWuClU9W/auMH8Uhq3zJ0KzFPValXdChQA4yKZpSLqX651zALu5Mi9upP5mswAHlLVagBV3evsCXFN2pKIFfibiOSLSFM7b15P4+6ax8osEcksFfHAUddERC4HdqjqqpCySXtNgKHAd1339yMRGevsCXFNwt2yNhE4V1V3ikgfYJGIbFDVvwOIyN1AHfCKK3simSVanY3CB466JsDdeMOMUJL5mqTgdXEnAGOB10VkEAlyTdpMS6yqO93fvcCbeFkUcRMIlwHXuK4PNGacCBDIIFFE81kqaCJLRVO+fKeJa3I+3thulYhsw4v1CxE5heS9JuPw4n1DPT4HGvAeckiMa+L3REOEJiu6AicFHX+GN96YgpetIiOk/BkcOWGxhcYJi+V4v8iBCYvvO/svOXLC4vWgCYuteL/kPd1xerxek5Ay22ic2EraawLcjJeYALyudaH7rglxTXwXYIT+cQa5i70KL53q3c5e4P5BVrrXM0F17sabbdyIm1l09hxgjTv3NI2r2jrh5ZwqwJuZHBRU53pnLwCu8/t6HO+ahJT5RsTJfE2AVOBl9x2/ACYm0jWxZZeGkeC0mTGxYSQrJmLDSHBMxIaR4JiIDSPBMREbRoJjIjaMBMdEbBgJzv8Hq3w1VXpLb1cAAAAASUVORK5CYII=\n", 85 | "text/plain": [ 86 | "
" 87 | ] 88 | }, 89 | "metadata": { 90 | "needs_background": "light" 91 | }, 92 | "output_type": "display_data" 93 | } 94 | ], 95 | "source": [ 96 | "mask.plot()" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 6, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "amsterdam = gpd.read_file(path, mask=mask)" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 7, 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "output_type": "execute_result", 115 | "data": { 116 | "text/plain": "(415178, 37)" 117 | }, 118 | "metadata": {}, 119 | "execution_count": 7 120 | } 121 | ], 122 | "source": [ 123 | "amsterdam.shape" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 12, 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "data": { 133 | "text/html": [ 134 | "
\n", 135 | "\n", 148 | "\n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | "
identificatieaanduidingrecordinactiefaanduidingrecordcorrectieofficieelinonderzoekdocumentnummerdocumentdatumbouwjaarbegindatumtijdvakgeldigheideinddatumtijdvakgeldigheid...roof-0.99rmse-0.99roof_flatnr_ground_ptsnr_roof_ptsahn_file_dateahn_versionheight_validtile_idgeometry
00384100000003464False0FalseFalseGD10.004242010-11-021980-01-01NoneNone...4.590.34True501218None3True25hn1POLYGON Z ((130331.253 483337.251 0.000, 13033...
10384100000003889False0FalseFalseGD10.004242010-11-021980-01-01NoneNone...3.430.46True82869None3True25hn1POLYGON Z ((130374.022 483428.300 0.000, 13038...
20305100000000149False0FalseFalse10/formalisering25/02010-05-251980-01-01NoneNone...3.800.87False892542None3True25gz1POLYGON Z ((124352.680 476826.680 0.000, 12432...
30305100000000097False0FalseFalse2011BAG00082011-03-281993-01-01NoneNone...5.671.23False12213611None3True25gz1POLYGON Z ((123803.120 476872.458 0.000, 12379...
40305100000000150False0FalseFalse10/formalisering25/02010-05-251882-01-01NoneNone...7.191.19False404255None3True25gz1POLYGON Z ((124354.600 476868.600 0.000, 12434...
\n", 298 | "

5 rows × 37 columns

\n", 299 | "
" 300 | ], 301 | "text/plain": [ 302 | " identificatie aanduidingrecordinactief aanduidingrecordcorrectie \\\n", 303 | "0 0384100000003464 False 0 \n", 304 | "1 0384100000003889 False 0 \n", 305 | "2 0305100000000149 False 0 \n", 306 | "3 0305100000000097 False 0 \n", 307 | "4 0305100000000150 False 0 \n", 308 | "\n", 309 | " officieel inonderzoek documentnummer documentdatum bouwjaar \\\n", 310 | "0 False False GD10.00424 2010-11-02 1980-01-01 \n", 311 | "1 False False GD10.00424 2010-11-02 1980-01-01 \n", 312 | "2 False False 10/formalisering25/0 2010-05-25 1980-01-01 \n", 313 | "3 False False 2011BAG0008 2011-03-28 1993-01-01 \n", 314 | "4 False False 10/formalisering25/0 2010-05-25 1882-01-01 \n", 315 | "\n", 316 | " begindatumtijdvakgeldigheid einddatumtijdvakgeldigheid ... roof-0.99 \\\n", 317 | "0 None None ... 4.59 \n", 318 | "1 None None ... 3.43 \n", 319 | "2 None None ... 3.80 \n", 320 | "3 None None ... 5.67 \n", 321 | "4 None None ... 7.19 \n", 322 | "\n", 323 | " rmse-0.99 roof_flat nr_ground_pts nr_roof_pts ahn_file_date \\\n", 324 | "0 0.34 True 50 1218 None \n", 325 | "1 0.46 True 82 869 None \n", 326 | "2 0.87 False 89 2542 None \n", 327 | "3 1.23 False 122 13611 None \n", 328 | "4 1.19 False 40 4255 None \n", 329 | "\n", 330 | " ahn_version height_valid tile_id \\\n", 331 | "0 3 True 25hn1 \n", 332 | "1 3 True 25hn1 \n", 333 | "2 3 True 25gz1 \n", 334 | "3 3 True 25gz1 \n", 335 | "4 3 True 25gz1 \n", 336 | "\n", 337 | " geometry \n", 338 | "0 POLYGON Z ((130331.253 483337.251 0.000, 13033... \n", 339 | "1 POLYGON Z ((130374.022 483428.300 0.000, 13038... \n", 340 | "2 POLYGON Z ((124352.680 476826.680 0.000, 12432... \n", 341 | "3 POLYGON Z ((123803.120 476872.458 0.000, 12379... \n", 342 | "4 POLYGON Z ((124354.600 476868.600 0.000, 12434... \n", 343 | "\n", 344 | "[5 rows x 37 columns]" 345 | ] 346 | }, 347 | "execution_count": 12, 348 | "metadata": {}, 349 | "output_type": "execute_result" 350 | } 351 | ], 352 | "source": [ 353 | "amsterdam.head()" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": 13, 359 | "metadata": {}, 360 | "outputs": [], 361 | "source": [ 362 | "filt = amsterdam.geometry.area > 30" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": 14, 368 | "metadata": {}, 369 | "outputs": [ 370 | { 371 | "data": { 372 | "text/plain": [ 373 | "275132" 374 | ] 375 | }, 376 | "execution_count": 14, 377 | "metadata": {}, 378 | "output_type": "execute_result" 379 | } 380 | ], 381 | "source": [ 382 | "filt.sum()" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": 8, 388 | "metadata": {}, 389 | "outputs": [], 390 | "source": [ 391 | "amsterdam.to_file('/Users/martin/Dropbox/Academia/Data/Geo/Amsterdam/data.gpkg', layer='3dbag', driver='GPKG')" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": null, 397 | "metadata": {}, 398 | "outputs": [], 399 | "source": [] 400 | } 401 | ], 402 | "metadata": { 403 | "kernelspec": { 404 | "display_name": "geo_dev", 405 | "language": "python", 406 | "name": "geo_dev" 407 | }, 408 | "language_info": { 409 | "codemirror_mode": { 410 | "name": "ipython", 411 | "version": 3 412 | }, 413 | "file_extension": ".py", 414 | "mimetype": "text/x-python", 415 | "name": "python", 416 | "nbconvert_exporter": "python", 417 | "pygments_lexer": "ipython3", 418 | "version": "3.8.2-final" 419 | } 420 | }, 421 | "nbformat": 4, 422 | "nbformat_minor": 4 423 | } -------------------------------------------------------------------------------- /code_production/Amsterdam/200224_extract_network.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import geopandas as gpd" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "text/plain": [ 20 | "'0.7.0+2.g0a1799d'" 21 | ] 22 | }, 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "output_type": "execute_result" 26 | } 27 | ], 28 | "source": [ 29 | "gpd.__version__" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "boundary = gpd.read_file('/Users/martin/Dropbox/Academia/Data/Geo/Amsterdam/data.gpkg', layer='boundary')" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 4, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "network = gpd.read_file('/Users/martin/Dropbox/Academia/Data/Geo/Netherlands/nwbwegen/geogegevens/shapefile/nederland_totaal/wegvakken/wegvakken.shp', mask=boundary)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 5, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "data": { 57 | "text/plain": [ 58 | "(46756, 51)" 59 | ] 60 | }, 61 | "execution_count": 5, 62 | "metadata": {}, 63 | "output_type": "execute_result" 64 | } 65 | ], 66 | "source": [ 67 | "network.shape" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 6, 73 | "metadata": {}, 74 | "outputs": [ 75 | { 76 | "data": { 77 | "text/plain": [ 78 | "\n", 79 | "Name: Amersfoort / RD New\n", 80 | "Axis Info [cartesian]:\n", 81 | "- X[east]: Easting (metre)\n", 82 | "- Y[north]: Northing (metre)\n", 83 | "Area of Use:\n", 84 | "- name: Netherlands - onshore\n", 85 | "- bounds: (3.2, 50.75, 7.22, 53.7)\n", 86 | "Coordinate Operation:\n", 87 | "- name: RD New\n", 88 | "- method: Oblique Stereographic\n", 89 | "Datum: Amersfoort\n", 90 | "- Ellipsoid: Bessel 1841\n", 91 | "- Prime Meridian: Greenwich" 92 | ] 93 | }, 94 | "execution_count": 6, 95 | "metadata": {}, 96 | "output_type": "execute_result" 97 | } 98 | ], 99 | "source": [ 100 | "network.crs" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 7, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "network.to_file('/Users/martin/Dropbox/Academia/Data/Geo/Amsterdam/tempnet.gpkg', driver='GPKG')" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [] 118 | } 119 | ], 120 | "metadata": { 121 | "kernelspec": { 122 | "display_name": "geo_dev", 123 | "language": "python", 124 | "name": "geo_dev" 125 | }, 126 | "language_info": { 127 | "codemirror_mode": { 128 | "name": "ipython", 129 | "version": 3 130 | }, 131 | "file_extension": ".py", 132 | "mimetype": "text/x-python", 133 | "name": "python", 134 | "nbconvert_exporter": "python", 135 | "pygments_lexer": "ipython3", 136 | "version": "3.8.0" 137 | } 138 | }, 139 | "nbformat": 4, 140 | "nbformat_minor": 4 141 | } 142 | -------------------------------------------------------------------------------- /code_production/Amsterdam/200224_preprocess.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import geopandas as gpd\n", 10 | "import momepy as mm\n", 11 | "import libpysal" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "buildings = gpd.read_file('files/AMS/data.gpkg', layer='3dbag')" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 8, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "buildings = buildings.loc[buildings.geometry.area > 30].copy()\n", 30 | "buildings['uID'] = mm.unique_id(buildings)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 10, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/plain": [ 41 | "Index(['identificatie', 'aanduidingrecordinactief',\n", 42 | " 'aanduidingrecordcorrectie', 'officieel', 'inonderzoek',\n", 43 | " 'documentnummer', 'documentdatum', 'bouwjaar',\n", 44 | " 'begindatumtijdvakgeldigheid', 'einddatumtijdvakgeldigheid',\n", 45 | " 'gemeentecode', 'ground-0.00', 'ground-0.10', 'ground-0.20',\n", 46 | " 'ground-0.30', 'ground-0.40', 'ground-0.50', 'roof-0.25', 'rmse-0.25',\n", 47 | " 'roof-0.50', 'rmse-0.50', 'roof-0.75', 'rmse-0.75', 'roof-0.90',\n", 48 | " 'rmse-0.90', 'roof-0.95', 'rmse-0.95', 'roof-0.99', 'rmse-0.99',\n", 49 | " 'roof_flat', 'nr_ground_pts', 'nr_roof_pts', 'ahn_file_date',\n", 50 | " 'ahn_version', 'height_valid', 'tile_id', 'geometry', 'uID'],\n", 51 | " dtype='object')" 52 | ] 53 | }, 54 | "execution_count": 10, 55 | "metadata": {}, 56 | "output_type": "execute_result" 57 | } 58 | ], 59 | "source": [ 60 | "buildings.columns" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 18, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "buildings = buildings.loc[buildings.height_valid].copy()\n", 70 | "buildings['uID'] = mm.unique_id(buildings)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 19, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "buildings.to_file('files/AMS/elements.gpkg', layer='buildings', driver='GPKG')" 80 | ] 81 | } 82 | ], 83 | "metadata": { 84 | "kernelspec": { 85 | "display_name": "geo_dev", 86 | "language": "python", 87 | "name": "geo_dev" 88 | }, 89 | "language_info": { 90 | "codemirror_mode": { 91 | "name": "ipython", 92 | "version": 3 93 | }, 94 | "file_extension": ".py", 95 | "mimetype": "text/x-python", 96 | "name": "python", 97 | "nbconvert_exporter": "python", 98 | "pygments_lexer": "ipython3", 99 | "version": "3.8.0" 100 | } 101 | }, 102 | "nbformat": 4, 103 | "nbformat_minor": 4 104 | } 105 | -------------------------------------------------------------------------------- /code_production/Amsterdam/200224_simplify_network.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import geopandas as gpd\n", 10 | "import momepy as mm\n", 11 | "from tqdm import tqdm\n", 12 | "from shapely.wkt import loads\n", 13 | "import numpy as np\n", 14 | "from scipy.spatial import Voronoi\n", 15 | "import pandas as pd\n", 16 | "from shapely.geometry import Point, Polygon, MultiPoint\n", 17 | "from osgeo import ogr\n", 18 | "import shapely\n", 19 | "\n", 20 | "import time\n", 21 | "\n", 22 | "# input\n", 23 | "network = gpd.read_file(\n", 24 | " 'files/AMS/tempnet.gpkg', layer='tempnet'\n", 25 | ")\n", 26 | "# network = network.loc[network.type.isin(['LineString', 'MultiLineString'])]\n", 27 | "# network_cleaned = mm.network_false_nodes(network_cl)\n", 28 | "# network_cleaned['uID'] = range(len(network_cleaned))\n", 29 | "# network = network_cleaned\n", 30 | "# network = network.loc[~network.NFC.isin(['Interstate', 'Other Freeway'])]\n", 31 | "buildings = gpd.read_file('files/AMS/elements.gpkg', layer='buildings')\n", 32 | "\n", 33 | "network.crs == buildings.crs\n", 34 | "\n", 35 | "start = time.time()\n", 36 | "# parameters\n", 37 | "max_size = 100000\n", 38 | "size = 5000\n", 39 | "circom_min = 0.2\n", 40 | "circom_max = 0.75\n", 41 | "selfsnap_tolerance = 5\n", 42 | "bowtie_tolerance = 0.4\n", 43 | "\n", 44 | "\n", 45 | "#########\n", 46 | "def _remove_leading_out(input, context, tolerance=0.25):\n", 47 | " \"\"\"\n", 48 | " context defines if it leads somewhere\n", 49 | "\n", 50 | " TODO: use rtree to speed up intersections\n", 51 | " \"\"\"\n", 52 | " to_remove = []\n", 53 | " for i, ls in input.iteritems():\n", 54 | " first = Point(ls.coords[0]).buffer(tolerance)\n", 55 | " last = Point(ls.coords[-1]).buffer(tolerance)\n", 56 | " remove = False\n", 57 | " for p in [first, last]:\n", 58 | " if not input.drop(i).intersects(p).any() and not context.intersects(p).any():\n", 59 | " remove = True\n", 60 | " if remove:\n", 61 | " to_remove.append(i)\n", 62 | " return input.drop(to_remove)\n", 63 | "\n", 64 | "\n", 65 | "def _snap(input, target, tolerance, min=True):\n", 66 | " \"\"\"\n", 67 | " min True snaps to closest within tolerance, False to all within tolerance\n", 68 | "\n", 69 | " TODO: use rtree to get distances only to relevant geoms\n", 70 | " \"\"\"\n", 71 | " input = input.copy()\n", 72 | " for i, geom in input.iteritems():\n", 73 | " distances = target.distance(geom)\n", 74 | " if min:\n", 75 | " close_geom = target.loc[distances.idxmin()]\n", 76 | " geom = shapely.ops.snap(geom, close_geom, tolerance)\n", 77 | " else:\n", 78 | " close_geom = target.loc[distances < tolerance]\n", 79 | " for p in close_geom:\n", 80 | " geom = shapely.ops.snap(geom, p, tolerance)\n", 81 | "\n", 82 | " input.loc[i] = geom\n", 83 | "\n", 84 | " return input\n", 85 | "\n", 86 | "\n", 87 | "def _split(input, target, tolerance):\n", 88 | " input = input.copy()\n", 89 | " buf = target.buffer(tolerance)\n", 90 | " for i, geom in input.iteritems():\n", 91 | " intersects = target.loc[buf.intersects(geom)]\n", 92 | " if not intersects.empty:\n", 93 | " geom = shapely.ops.split(geom, intersects.unary_union)\n", 94 | "\n", 95 | " input.loc[i] = geom\n", 96 | "\n", 97 | " return input\n", 98 | "\n", 99 | "\n", 100 | "def _drop_duplicates(geoms):\n", 101 | " mp = {}\n", 102 | " for ix, geom in geoms.iteritems():\n", 103 | " inters = geoms.intersection(geom).type == 'MultiPoint'\n", 104 | "\n", 105 | " if inters.any():\n", 106 | " mp[ix] = inters.loc[inters].index.to_list()[0]\n", 107 | "\n", 108 | " drop = []\n", 109 | " keep = []\n", 110 | " for k in mp.keys():\n", 111 | " if k not in keep:\n", 112 | " drop.append(k)\n", 113 | " keep.append(mp[k])\n", 114 | "\n", 115 | " geoms = geoms.drop(drop)\n", 116 | " return geoms\n", 117 | "\n", 118 | "\n", 119 | "def _get_snapping_centroids(input, tolerance):\n", 120 | " \"\"\"\n", 121 | " generate centroids from close-by points\n", 122 | " \"\"\"\n", 123 | " points = []\n", 124 | " for i, ls in input.iteritems():\n", 125 | " first = Point(ls.coords[0])\n", 126 | " last = Point(ls.coords[-1])\n", 127 | " for p in [first, last]:\n", 128 | " if input.drop(i).intersects(p.buffer(tolerance)).any():\n", 129 | " points.append(p)\n", 130 | " points = gpd.GeoSeries(points)\n", 131 | " points = points.buffer(tolerance / 2)\n", 132 | " union = points.unary_union\n", 133 | " exploded = gpd.GeoSeries([union]).explode()\n", 134 | " return exploded.centroid\n", 135 | "\n", 136 | "\n", 137 | "# polygonize network\n", 138 | "polygonized = shapely.ops.polygonize(network.geometry)\n", 139 | "geoms = [g for g in polygonized]\n", 140 | "gdf = gpd.GeoDataFrame(geometry=geoms, crs=network.crs)\n", 141 | "\n", 142 | "\n", 143 | "# calculate parameters\n", 144 | "gdf[\"area\"] = gdf.geometry.area\n", 145 | "gdf[\"circom\"] = mm.CircularCompactness(gdf, \"area\").series\n", 146 | "\n", 147 | "\n", 148 | "# select valid and invalid network-net_blocks\n", 149 | "possible = gdf.loc[gdf[\"area\"] < max_size]\n", 150 | "possible = possible.loc[\n", 151 | " (possible[\"circom\"] > circom_max)\n", 152 | " | (possible[\"circom\"] < circom_min)\n", 153 | " | (possible[\"area\"] < size)\n", 154 | "]\n", 155 | "\n", 156 | "\n", 157 | "# check for buildings\n", 158 | "buildings[\"geometry\"] = buildings.geometry.representative_point()\n", 159 | "sindex = buildings.sindex\n", 160 | "\n", 161 | "drop = []\n", 162 | "for index, row in tqdm(possible.iterrows()):\n", 163 | " possible_matches_index = list(sindex.intersection(row.geometry.bounds))\n", 164 | " possible_matches = buildings.iloc[possible_matches_index]\n", 165 | " if possible_matches.intersects(row.geometry).any():\n", 166 | " drop.append(index)\n", 167 | "\n", 168 | "invalid = possible.drop(drop)\n", 169 | "\n", 170 | "\n", 171 | "# INSERTED - TO BE REMOVED\n", 172 | "invalid_manual = gpd.read_file('files/AMS/tempnet.gpkg', layer='invalid', )\n", 173 | "pts = invalid_manual.geometry.representative_point()\n", 174 | "trueinv = []\n", 175 | "for i, r in invalid.iterrows():\n", 176 | " if pts.intersects(r.geometry).any():\n", 177 | " trueinv.append(i)\n", 178 | "invalid = gdf[gdf.index.isin(trueinv)].copy()\n", 179 | "# UNTIL HERE\n", 180 | "\n", 181 | "valid = gdf[~gdf.index.isin(invalid.index)].copy()\n", 182 | "\n", 183 | "\n", 184 | "# select edges of valid as an input for tessellation\n", 185 | "# valid / network\n", 186 | "input = []\n", 187 | "sidx = network.sindex\n", 188 | "union = invalid.geometry.unary_union\n", 189 | "unioned = gpd.GeoSeries(union).explode().reset_index(drop=True)\n", 190 | "for i, r in tqdm(unioned.iteritems()):\n", 191 | " possible_matches_index = list(sidx.intersection(r.bounds))\n", 192 | " possible_matches = network.iloc[possible_matches_index]\n", 193 | " real = network.intersection(r.exterior)\n", 194 | " for ix, geom in real.geometry.iteritems():\n", 195 | " input.append(geom)\n", 196 | "inter = gpd.GeoSeries(input)\n", 197 | "inters = inter[~(inter.is_empty | inter.isna())]\n", 198 | "geom_types = inters.type\n", 199 | "line_idx = np.asarray(\n", 200 | " (geom_types == \"LineString\")\n", 201 | " | (geom_types == \"MultiLineString\")\n", 202 | " | (geom_types == \"LinearRing\")\n", 203 | ")\n", 204 | "intersections = inters[line_idx]\n", 205 | "\n", 206 | "\n", 207 | "# densify interesections ahead of tessellation\n", 208 | "def _densify(geom, segment):\n", 209 | " \"\"\"\n", 210 | " Returns densified geoemtry with segments no longer than `segment`.\n", 211 | " \"\"\"\n", 212 | " poly = geom\n", 213 | " wkt = geom.wkt # shapely Polygon to wkt\n", 214 | " geom = ogr.CreateGeometryFromWkt(wkt) # create ogr geometry\n", 215 | " geom.Segmentize(segment) # densify geometry by 2 metres\n", 216 | " geom.CloseRings() # fix for GDAL 2.4.1 bug\n", 217 | " wkt2 = geom.ExportToWkt() # ogr geometry to wkt\n", 218 | " try:\n", 219 | " new = loads(wkt2) # wkt to shapely Polygon\n", 220 | " return new\n", 221 | " except Exception:\n", 222 | " return poly\n", 223 | "\n", 224 | "\n", 225 | "dense = intersections.geometry.apply(_densify, segment=2)\n", 226 | "\n", 227 | "# generate point array for tessellation\n", 228 | "points = []\n", 229 | "ids = []\n", 230 | "for ix, r in dense.items():\n", 231 | " if r.type == \"MultiLineString\":\n", 232 | " for line in r:\n", 233 | " point_coords = line.coords\n", 234 | " row_array = np.array(point_coords[1:-1] if len(point_coords) > 2 else point_coords).tolist()\n", 235 | " for i, a in enumerate(row_array):\n", 236 | " points.append(row_array[i])\n", 237 | " ids.append(ix)\n", 238 | " elif r.type == \"LineString\":\n", 239 | " point_coords = r.coords\n", 240 | " row_array = np.array(point_coords[1:-1] if len(point_coords) > 2 else point_coords).tolist()\n", 241 | " for i, a in enumerate(row_array):\n", 242 | " points.append(row_array[i])\n", 243 | " ids.append(ix)\n", 244 | "\n", 245 | "\n", 246 | "# generate tessellation\n", 247 | "voronoi_diagram = Voronoi(np.array(points))\n", 248 | "\n", 249 | "\n", 250 | "# generate regions\n", 251 | "def _regions(voronoi_diagram, unique_id, ids, crs):\n", 252 | " \"\"\"\n", 253 | " Generate GeoDataFrame of Voronoi regions from scipy.spatial.Voronoi.\n", 254 | " \"\"\"\n", 255 | " # generate DataFrame of results\n", 256 | " regions = pd.DataFrame()\n", 257 | " regions[unique_id] = ids # add unique id\n", 258 | " regions[\"region\"] = voronoi_diagram.point_region # add region id for each point\n", 259 | "\n", 260 | " # add vertices of each polygon\n", 261 | " vertices = []\n", 262 | " for region in regions.region:\n", 263 | " vertices.append(voronoi_diagram.regions[region])\n", 264 | " regions[\"vertices\"] = vertices\n", 265 | "\n", 266 | " # convert vertices to Polygons\n", 267 | " polygons = []\n", 268 | " for region in tqdm(regions.vertices, desc=\"Vertices to Polygons\"):\n", 269 | " if -1 not in region:\n", 270 | " polygons.append(Polygon(voronoi_diagram.vertices[region]))\n", 271 | " else:\n", 272 | " polygons.append(None)\n", 273 | " # save polygons as geometry column\n", 274 | " regions[\"geometry\"] = polygons\n", 275 | "\n", 276 | " # generate GeoDataFrame\n", 277 | " regions_gdf = gpd.GeoDataFrame(regions.dropna(), geometry=\"geometry\")\n", 278 | " regions_gdf = regions_gdf.loc[\n", 279 | " regions_gdf[\"geometry\"].length < 1000000\n", 280 | " ] # delete errors\n", 281 | " regions_gdf = regions_gdf.loc[\n", 282 | " regions_gdf[unique_id] != -1\n", 283 | " ] # delete hull-based cells\n", 284 | " regions_gdf.crs = crs\n", 285 | " return regions_gdf\n", 286 | "\n", 287 | "\n", 288 | "regions_gdf = _regions(voronoi_diagram, \"uID\", ids, crs=network.crs)\n", 289 | "tessellation = regions_gdf[[\"uID\", \"geometry\"]].dissolve(by=\"uID\", as_index=False)\n", 290 | "\n", 291 | "\n", 292 | "# make linestrings\n", 293 | "linestrings = tessellation.geometry.exterior\n", 294 | "\n", 295 | "# clip linestrings\n", 296 | "# use geopandas.clip once released\n", 297 | "clipped = linestrings.intersection(invalid.unary_union)\n", 298 | "clipped = clipped[~clipped.is_empty & clipped.notnull()]\n", 299 | "\n", 300 | "clipped = clipped.reset_index(drop=True).explode().reset_index(drop=True)\n", 301 | "\n", 302 | "# split at corners\n", 303 | "sindex = clipped.sindex\n", 304 | "for ix, geom in tqdm(clipped.iteritems(), total=clipped.shape[0]):\n", 305 | " corners = []\n", 306 | " coords = geom.coords\n", 307 | " for i in coords:\n", 308 | " point = Point(i)\n", 309 | " possible_matches_index = list(sindex.intersection(point.bounds))\n", 310 | " possible_matches = clipped.iloc[possible_matches_index]\n", 311 | " precise_matches = sum(possible_matches.intersects(point))\n", 312 | " if precise_matches > 2:\n", 313 | " corners.append(point)\n", 314 | " if len(corners) > 1:\n", 315 | " corners = MultiPoint(corners)\n", 316 | " clipped.loc[ix] = shapely.ops.split(geom, corners)\n", 317 | " elif len(corners) == 1:\n", 318 | " clipped.loc[ix] = shapely.ops.split(geom, corners[0])\n", 319 | "clipped = clipped.explode().reset_index(drop=True)\n", 320 | "\n", 321 | "\n", 322 | "# check duplicates shapely.ops.shared_paths\n", 323 | "unique = []\n", 324 | "for ix, line in tqdm(clipped.iteritems(), total=clipped.shape[0]):\n", 325 | " if not any(l.equals(line) for l in unique):\n", 326 | " unique.append(line)\n", 327 | "\n", 328 | "unique = gpd.GeoSeries(unique, crs=network.crs)\n", 329 | "\n", 330 | "\n", 331 | "# simplify\n", 332 | "unique = unique.simplify(0.5, preserve_topology=False) # OK\n", 333 | "\n", 334 | "# fix bowties - snap\n", 335 | "# get snapping centroids\n", 336 | "centroids = _get_snapping_centroids(unique, bowtie_tolerance)\n", 337 | "\n", 338 | "# snap to centroids\n", 339 | "# no_false = _snap(no_false, centroids, bowtie_tolerance, min=True)\n", 340 | "no_false = _snap(unique, centroids, bowtie_tolerance, min=True) # OK\n", 341 | "\n", 342 | "\n", 343 | "# remove unwanted lines from network. GeoPandas PR is used\n", 344 | "overlay = gpd.overlay(network, invalid, keep_geom_type=False, how=\"difference\") # OK\n", 345 | "\n", 346 | "\n", 347 | "# remove those leading nowhere\n", 348 | "buffered = overlay.geometry.buffer(4)\n", 349 | "\n", 350 | "cleaned = _remove_leading_out(no_false, buffered) # OK 1\n", 351 | "cleaned = mm.network_false_nodes(cleaned, precision=3) # OK 2\n", 352 | "cleaned = cleaned.simplify(0.5, preserve_topology=False) # 25\n", 353 | "\n", 354 | "# fix unprecise intersections (collapse) - snap\n", 355 | "# get snapping centroids\n", 356 | "centroids = _get_snapping_centroids(cleaned, selfsnap_tolerance) # c1\n", 357 | "\n", 358 | "# snap to centroids\n", 359 | "cleaned = _snap(cleaned, centroids, selfsnap_tolerance, min=False) # 3\n", 360 | "cleaned = _split(cleaned, centroids, selfsnap_tolerance).explode() # 4 OK\n", 361 | "\n", 362 | "\n", 363 | "# remove those leading nowhere again = result fo snap\n", 364 | "cleaned = _remove_leading_out(cleaned, buffered, tolerance=0.01) # 5 OK\n", 365 | "cleaned.reset_index(drop=True, inplace=True)\n", 366 | "\n", 367 | "cleaned = mm.network_false_nodes(cleaned, precision=3) # 6 OK\n", 368 | "\n", 369 | "cleaned = _drop_duplicates(cleaned) # 7 OK\n", 370 | "\n", 371 | "\n", 372 | "# snap to network\n", 373 | "cleaned = _snap(cleaned, overlay.geometry, selfsnap_tolerance, min=True) # 8 OK\n", 374 | "cleaned = _remove_leading_out(cleaned, overlay, tolerance=0.000001) # 9 OK\n", 375 | "\n", 376 | "# combine together\n", 377 | "combined = cleaned.geometry.append(overlay.geometry).reset_index(drop=True)\n", 378 | "\n", 379 | "# merge together\n", 380 | "final = mm.network_false_nodes(combined, precision=3)\n", 381 | "print(time.time() - start, 'seconds')\n", 382 | "# DONE\n", 383 | "\n", 384 | "final.to_file('files/AMS/tempnet.gpkg', driver='GPKG', layer='network_simplified')" 385 | ] 386 | } 387 | ], 388 | "metadata": { 389 | "kernelspec": { 390 | "display_name": "geo_dev", 391 | "language": "python", 392 | "name": "geo_dev" 393 | }, 394 | "language_info": { 395 | "codemirror_mode": { 396 | "name": "ipython", 397 | "version": 3 398 | }, 399 | "file_extension": ".py", 400 | "mimetype": "text/x-python", 401 | "name": "python", 402 | "nbconvert_exporter": "python", 403 | "pygments_lexer": "ipython3", 404 | "version": "3.8.0" 405 | } 406 | }, 407 | "nbformat": 4, 408 | "nbformat_minor": 4 409 | } 410 | -------------------------------------------------------------------------------- /code_production/Amsterdam/200224_tessellation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import geopandas as gpd\n", 10 | "import momepy as mm\n", 11 | "import libpysal" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "buildings = gpd.read_file('files/AMS/elements.gpkg', layer='buildings')" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "limit = mm.buffered_limit(buildings, 100)" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "name": "stdout", 39 | "output_type": "stream", 40 | "text": [ 41 | "Inward offset...\n", 42 | "Discretization...\n" 43 | ] 44 | }, 45 | { 46 | "name": "stderr", 47 | "output_type": "stream", 48 | "text": [ 49 | " 0%| | 0/253388 [00:00 2:\n", 42 | " corners.append(point)\n", 43 | "\n", 44 | " if len(corners) > 2:\n", 45 | " for c, it in enumerate(corners):\n", 46 | " next_c = c + 1\n", 47 | " if c == (len(corners) - 1):\n", 48 | " next_c = 0\n", 49 | " if corners[c].distance(corners[next_c]) < sensitivity:\n", 50 | " change.append([corners[c], corners[next_c]])\n", 51 | " elif len(corners) == 2:\n", 52 | " if corners[0].distance(corners[1]) > 0:\n", 53 | " if corners[0].distance(corners[1]) < sensitivity:\n", 54 | " change.append([corners[0], corners[1]])\n", 55 | "\n", 56 | " if change:\n", 57 | " for points in change:\n", 58 | " x_new = np.mean([points[0].x, points[1].x])\n", 59 | " y_new = np.mean([points[0].y, points[1].y])\n", 60 | " new = [(x_new, y_new), id]\n", 61 | " changes[(points[0].x, points[0].y)] = new\n", 62 | " changes[(points[1].x, points[1].y)] = new\n", 63 | " qid = qid + 1\n", 64 | "\n", 65 | " for ix, row in tqdm(tessellation.iterrows(), total=tessellation.shape[0]):\n", 66 | " cell = row.geometry\n", 67 | " coords = list(cell.exterior.coords)\n", 68 | "\n", 69 | " moves = {}\n", 70 | " for x in coords:\n", 71 | " if x in changes.keys():\n", 72 | " moves[coords.index(x)] = changes[x]\n", 73 | " keys = list(moves.keys())\n", 74 | " delete_points = []\n", 75 | " for move, k in enumerate(keys):\n", 76 | " if move < len(keys) - 1:\n", 77 | " if (\n", 78 | " moves[keys[move]][1] == moves[keys[move + 1]][1]\n", 79 | " and keys[move + 1] - keys[move] < 5\n", 80 | " ):\n", 81 | " delete_points = delete_points + (\n", 82 | " coords[keys[move] : keys[move + 1]]\n", 83 | " )\n", 84 | " # change the code above to have if based on distance not number\n", 85 | "\n", 86 | " newcoords = [changes[x][0] if x in changes.keys() else x for x in coords]\n", 87 | " for coord in newcoords:\n", 88 | " if coord in delete_points:\n", 89 | " newcoords.remove(coord)\n", 90 | " if coords != newcoords:\n", 91 | " if not cell.interiors:\n", 92 | " # newgeom = Polygon(newcoords).buffer(0)\n", 93 | " be = Polygon(newcoords).exterior\n", 94 | " mls = be.intersection(be)\n", 95 | " if len(list(shapely.ops.polygonize(mls))) > 1:\n", 96 | " newgeom = MultiPolygon(shapely.ops.polygonize(mls))\n", 97 | " geoms = []\n", 98 | " for g, n in enumerate(newgeom):\n", 99 | " geoms.append(newgeom[g].area)\n", 100 | " newgeom = newgeom[geoms.index(max(geoms))]\n", 101 | " else:\n", 102 | " newgeom = list(shapely.ops.polygonize(mls))[0]\n", 103 | " else:\n", 104 | " newgeom = Polygon(newcoords, holes=cell.interiors)\n", 105 | " tessellation.loc[ix, \"geometry\"] = newgeom\n", 106 | " return tessellation" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "path = 'folder/AMS'\n", 116 | "tess = gpd.read_file(path + '/elements.gpkg', layer='tessellation')" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "queen = queen_corners(tess)" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "queen.to_file(path + 'queen.gpkg', layer='tessellation', driver='GPKG')" 135 | ] 136 | } 137 | ], 138 | "metadata": { 139 | "kernelspec": { 140 | "display_name": "geo_dev", 141 | "language": "python", 142 | "name": "geo_dev" 143 | }, 144 | "language_info": { 145 | "codemirror_mode": { 146 | "name": "ipython", 147 | "version": 3 148 | }, 149 | "file_extension": ".py", 150 | "mimetype": "text/x-python", 151 | "name": "python", 152 | "nbconvert_exporter": "python", 153 | "pygments_lexer": "ipython3", 154 | "version": "3.8.0" 155 | } 156 | }, 157 | "nbformat": 4, 158 | "nbformat_minor": 4 159 | } 160 | -------------------------------------------------------------------------------- /code_production/Amsterdam/200305 height processing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import geopandas as gpd" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "path = '/Users/martin/Dropbox/Academia/Data/Geo/Amsterdam/elements.gpkg'\n", 19 | "buildings = gpd.read_file(path, layer='buildings')" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 3, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/plain": [ 30 | "Index(['identificatie', 'aanduidingrecordinactief',\n", 31 | " 'aanduidingrecordcorrectie', 'officieel', 'inonderzoek',\n", 32 | " 'documentnummer', 'documentdatum', 'bouwjaar',\n", 33 | " 'begindatumtijdvakgeldigheid', 'einddatumtijdvakgeldigheid',\n", 34 | " 'gemeentecode', 'ground-0.00', 'ground-0.10', 'ground-0.20',\n", 35 | " 'ground-0.30', 'ground-0.40', 'ground-0.50', 'roof-0.25', 'rmse-0.25',\n", 36 | " 'roof-0.50', 'rmse-0.50', 'roof-0.75', 'rmse-0.75', 'roof-0.90',\n", 37 | " 'rmse-0.90', 'roof-0.95', 'rmse-0.95', 'roof-0.99', 'rmse-0.99',\n", 38 | " 'roof_flat', 'nr_ground_pts', 'nr_roof_pts', 'ahn_file_date',\n", 39 | " 'ahn_version', 'height_valid', 'tile_id', 'uID', 'issues', 'geometry'],\n", 40 | " dtype='object')" 41 | ] 42 | }, 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "buildings.columns" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 4, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "buildings['sdbHei'] = buildings['roof-0.75'].apply(lambda x: x if x > 3 else 3)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 6, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "data": { 68 | "text/plain": [ 69 | "3.0" 70 | ] 71 | }, 72 | "execution_count": 6, 73 | "metadata": {}, 74 | "output_type": "execute_result" 75 | } 76 | ], 77 | "source": [ 78 | "buildings['sdbHei']" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 7, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "buildings[['uID', 'sdbHei', 'geometry']].to_file(path, layer='buildings', driver='GPKG')" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 8, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "import fiona" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 9, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "data": { 106 | "text/plain": [ 107 | "['tessellation', 'buildings']" 108 | ] 109 | }, 110 | "execution_count": 9, 111 | "metadata": {}, 112 | "output_type": "execute_result" 113 | } 114 | ], 115 | "source": [ 116 | "fiona.listlayers(path)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 16, 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "['tempnet',\n", 128 | " 'polygonized',\n", 129 | " 'possible',\n", 130 | " 'invalid',\n", 131 | " 'network_simplified',\n", 132 | " 'manual_fix']" 133 | ] 134 | }, 135 | "execution_count": 16, 136 | "metadata": {}, 137 | "output_type": "execute_result" 138 | } 139 | ], 140 | "source": [ 141 | "fiona.listlayers(n_path)" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [] 150 | } 151 | ], 152 | "metadata": { 153 | "kernelspec": { 154 | "display_name": "geo_dev", 155 | "language": "python", 156 | "name": "geo_dev" 157 | }, 158 | "language_info": { 159 | "codemirror_mode": { 160 | "name": "ipython", 161 | "version": 3 162 | }, 163 | "file_extension": ".py", 164 | "mimetype": "text/x-python", 165 | "name": "python", 166 | "nbconvert_exporter": "python", 167 | "pygments_lexer": "ipython3", 168 | "version": "3.8.0" 169 | } 170 | }, 171 | "nbformat": 4, 172 | "nbformat_minor": 4 173 | } 174 | -------------------------------------------------------------------------------- /code_production/Amsterdam/200305_Blocks.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import geopandas as gpd\n", 10 | "import momepy as mm" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "text/plain": [ 21 | "'0+unknown'" 22 | ] 23 | }, 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "output_type": "execute_result" 27 | } 28 | ], 29 | "source": [ 30 | "mm.__version__" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "file = 'files/AMS/elements.gpkg'\n", 40 | "tess = gpd.read_file(file, layer='tessellation')\n", 41 | "blg = gpd.read_file(file, layer='buildings')\n", 42 | "streets = gpd.read_file(file, layer='network_dense')" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 4, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "limit = mm.buffered_limit(blg)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 5, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "name": "stdout", 61 | "output_type": "stream", 62 | "text": [ 63 | "Building R-tree for network...\n", 64 | "Building R-tree for buildings...\n" 65 | ] 66 | }, 67 | { 68 | "name": "stderr", 69 | "output_type": "stream", 70 | "text": [ 71 | " 0%| | 16/44039 [00:00<05:22, 136.69it/s]" 72 | ] 73 | }, 74 | { 75 | "name": "stdout", 76 | "output_type": "stream", 77 | "text": [ 78 | "Snapping...\n" 79 | ] 80 | }, 81 | { 82 | "name": "stderr", 83 | "output_type": "stream", 84 | "text": [ 85 | "100%|██████████| 44039/44039 [06:30<00:00, 112.89it/s]\n" 86 | ] 87 | } 88 | ], 89 | "source": [ 90 | "snapped = mm.snap_street_network_edge(streets, blg, 20, tess, 120, limit)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 8, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "#snapped.to_file('temp_snapped')" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 10, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "#gpd.GeoSeries(limit).to_file('temp_limit')" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 6, 114 | "metadata": {}, 115 | "outputs": [ 116 | { 117 | "name": "stdout", 118 | "output_type": "stream", 119 | "text": [ 120 | "Buffering streets...\n", 121 | "Generating spatial index...\n" 122 | ] 123 | }, 124 | { 125 | "name": "stderr", 126 | "output_type": "stream", 127 | "text": [ 128 | " 0%| | 50/252385 [00:00<08:28, 496.66it/s]" 129 | ] 130 | }, 131 | { 132 | "name": "stdout", 133 | "output_type": "stream", 134 | "text": [ 135 | "Difference...\n" 136 | ] 137 | }, 138 | { 139 | "name": "stderr", 140 | "output_type": "stream", 141 | "text": [ 142 | "100%|██████████| 252385/252385 [08:45<00:00, 480.09it/s]\n" 143 | ] 144 | }, 145 | { 146 | "name": "stdout", 147 | "output_type": "stream", 148 | "text": [ 149 | "Defining adjacency...\n" 150 | ] 151 | }, 152 | { 153 | "name": "stderr", 154 | "output_type": "stream", 155 | "text": [ 156 | "100%|██████████| 482590/482590 [02:35<00:00, 3095.06it/s]\n" 157 | ] 158 | }, 159 | { 160 | "name": "stdout", 161 | "output_type": "stream", 162 | "text": [ 163 | "Defining street-based blocks...\n", 164 | "Defining block ID...\n", 165 | "Generating centroids...\n", 166 | "Spatial join...\n", 167 | "Attribute join (tesselation)...\n", 168 | "Generating blocks...\n", 169 | "Multipart to singlepart...\n" 170 | ] 171 | }, 172 | { 173 | "name": "stderr", 174 | "output_type": "stream", 175 | "text": [ 176 | "100%|██████████| 8903/8903 [00:25<00:00, 342.96it/s]\n" 177 | ] 178 | }, 179 | { 180 | "name": "stdout", 181 | "output_type": "stream", 182 | "text": [ 183 | "Attribute join (buildings)...\n", 184 | "Attribute join (tesselation)...\n" 185 | ] 186 | } 187 | ], 188 | "source": [ 189 | "blocks = mm.Blocks(tess, snapped, blg, 'bID', 'uID')\n", 190 | "blocks_df = blocks.blocks\n", 191 | "blg['bID'] = blocks.buildings_id\n", 192 | "tess['bID'] = blocks.tessellation_id" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 7, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "tess.to_file(file, layer='tessellation', driver='GPKG')\n", 202 | "blg.to_file(file, layer='buildings', driver='GPKG')\n", 203 | "blocks_df.to_file(file, layer='blocks', driver='GPKG')\n", 204 | "#streets.to_file(file, layer='network', driver='GPKG')" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": {}, 211 | "outputs": [], 212 | "source": [] 213 | } 214 | ], 215 | "metadata": { 216 | "kernelspec": { 217 | "display_name": "momepy_env", 218 | "language": "python", 219 | "name": "momepy_env" 220 | }, 221 | "language_info": { 222 | "codemirror_mode": { 223 | "name": "ipython", 224 | "version": 3 225 | }, 226 | "file_extension": ".py", 227 | "mimetype": "text/x-python", 228 | "name": "python", 229 | "nbconvert_exporter": "python", 230 | "pygments_lexer": "ipython3", 231 | "version": "3.7.6" 232 | } 233 | }, 234 | "nbformat": 4, 235 | "nbformat_minor": 4 236 | } 237 | -------------------------------------------------------------------------------- /code_production/Amsterdam/200305_network_minor_preprocessing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import geopandas as gpd\n", 10 | "import momepy as mm" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "path = 'files/AMS/tempnet.gpkg'" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "net = gpd.read_file(path, layer='tempnet')" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "cleaned = mm.network_false_nodes(net)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "cleaned['nID'] = range(len(cleaned))" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "cleaned.to_file('files/AMS/elements.gpkg', layer='network', driver='GPKG')" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "tess_orig = gpd.read_file('files/AMS/elements.gpkg', layer='tessellation')" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "tess_orig.to_file('files/AMS/tessellation_no_queen.gpkg', layer='tessellation', driver='GPKG')" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "queen = gpd.read_file('files/AMS/queen.gpkg')" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "queen.to_file('files/AMS/elements.gpkg', layer='tessellation', driver='GPKG')" 92 | ] 93 | } 94 | ], 95 | "metadata": { 96 | "kernelspec": { 97 | "display_name": "geo_dev", 98 | "language": "python", 99 | "name": "geo_dev" 100 | }, 101 | "language_info": { 102 | "codemirror_mode": { 103 | "name": "ipython", 104 | "version": 3 105 | }, 106 | "file_extension": ".py", 107 | "mimetype": "text/x-python", 108 | "name": "python", 109 | "nbconvert_exporter": "python", 110 | "pygments_lexer": "ipython3", 111 | "version": "3.8.0" 112 | } 113 | }, 114 | "nbformat": 4, 115 | "nbformat_minor": 4 116 | } 117 | -------------------------------------------------------------------------------- /code_production/Amsterdam/200306_false_nodes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import geopandas as gpd\n", 10 | "import momepy as mm" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 8, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "zipfile = 'zip://files/AMS/ams.zip!ams'" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 9, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "network = gpd.read_file(zipfile)" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 10, 34 | "metadata": { 35 | "collapsed": true, 36 | "jupyter": { 37 | "outputs_hidden": true 38 | } 39 | }, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "\n", 46 | "RangeIndex: 40121 entries, 0 to 40120\n", 47 | "Data columns (total 63 columns):\n", 48 | "Join_Count 39937 non-null float64\n", 49 | "TARGET_FID 39937 non-null float64\n", 50 | "Id 39937 non-null float64\n", 51 | "fid_1 39937 non-null float64\n", 52 | "wvk_id 40120 non-null float64\n", 53 | "wvk_begdat 40120 non-null object\n", 54 | "jte_id_beg 40120 non-null float64\n", 55 | "jte_id_end 40120 non-null float64\n", 56 | "wegbehsrt 40120 non-null object\n", 57 | "wegnummer 1396 non-null object\n", 58 | "wegdeelltr 40120 non-null object\n", 59 | "hecto_lttr 40120 non-null object\n", 60 | "bst_code 5223 non-null object\n", 61 | "rpe_code 40120 non-null object\n", 62 | "admrichtng 1413 non-null object\n", 63 | "rijrichtng 2459 non-null object\n", 64 | "stt_naam 40120 non-null object\n", 65 | "stt_bron 40120 non-null object\n", 66 | "wpsnaamnen 40120 non-null object\n", 67 | "gme_id 40120 non-null float64\n", 68 | "gme_naam 40120 non-null object\n", 69 | "hnrstrlnks 22102 non-null object\n", 70 | "hnrstrrhts 24240 non-null object\n", 71 | "e_hnr_lnks 40120 non-null float64\n", 72 | "e_hnr_rhts 40120 non-null float64\n", 73 | "l_hnr_lnks 40120 non-null float64\n", 74 | "l_hnr_rhts 40120 non-null float64\n", 75 | "begafstand 40120 non-null float64\n", 76 | "endafstand 40120 non-null float64\n", 77 | "beginkm 40120 non-null float64\n", 78 | "eindkm 40120 non-null float64\n", 79 | "pos_tv_wol 1329 non-null object\n", 80 | "wegbehcode 40120 non-null object\n", 81 | "wegbehnaam 40120 non-null object\n", 82 | "distrcode 40120 non-null float64\n", 83 | "distrnaam 1081 non-null object\n", 84 | "dienstcode 1081 non-null object\n", 85 | "dienstnaam 1081 non-null object\n", 86 | "wegtype 308 non-null object\n", 87 | "wgtype_oms 308 non-null object\n", 88 | "routeltr 779 non-null object\n", 89 | "routenr 40120 non-null float64\n", 90 | "routeltr2 218 non-null object\n", 91 | "routenr2 40120 non-null float64\n", 92 | "routeltr3 0 non-null object\n", 93 | "routenr3 40120 non-null float64\n", 94 | "routeltr4 0 non-null object\n", 95 | "routenr4 40120 non-null float64\n", 96 | "wegnr_aw 1081 non-null object\n", 97 | "wegnr_hmp 1396 non-null object\n", 98 | "geobron_id 40120 non-null float64\n", 99 | "geobron_nm 40120 non-null object\n", 100 | "bronjaar 40120 non-null float64\n", 101 | "openlr 40120 non-null object\n", 102 | "FREQUENCY 39937 non-null float64\n", 103 | "S_ID 39937 non-null float64\n", 104 | "ORIG_FID 39937 non-null float64\n", 105 | "Shape_Leng 39937 non-null float64\n", 106 | "InLine_FID 39937 non-null float64\n", 107 | "SimLnFlag 39937 non-null float64\n", 108 | "MaxSimpTol 39937 non-null float64\n", 109 | "MinSimpTol 39937 non-null float64\n", 110 | "geometry 40121 non-null geometry\n", 111 | "dtypes: float64(31), geometry(1), object(31)\n", 112 | "memory usage: 19.3+ MB\n" 113 | ] 114 | } 115 | ], 116 | "source": [ 117 | "network.info()" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 11, 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "name": "stderr", 127 | "output_type": "stream", 128 | "text": [ 129 | " 0%| | 32/40121 [00:00<02:06, 317.56it/s]" 130 | ] 131 | }, 132 | { 133 | "name": "stdout", 134 | "output_type": "stream", 135 | "text": [ 136 | "Identifying false points...\n" 137 | ] 138 | }, 139 | { 140 | "name": "stderr", 141 | "output_type": "stream", 142 | "text": [ 143 | "100%|██████████| 40121/40121 [02:06<00:00, 315.94it/s]\n", 144 | " 0%| | 0/1167 [00:00\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
uIDbIDnIDnodeIDgeometryindex_rightidentificatieaanduidingrecordinactiefaanduidingrecordcorrectieofficieel...rmse-0.95roof-0.99rmse-0.99roof_flatnr_ground_ptsnr_roof_ptsahn_file_dateahn_versionheight_validtile_id
000.035435.013440.0POLYGON Z ((130331.253 483337.251 0.000, 13033...0.00384100000003464False0.0False...0.344.590.34True50.01218.0None3.0True25hn1
110.035435.013440.0POLYGON Z ((130374.022 483428.300 0.000, 13038...1.00384100000003889False0.0False...0.463.430.46True82.0869.0None3.0True25hn1
\n

2 rows × 42 columns

\n" 50 | }, 51 | "metadata": {}, 52 | "execution_count": 5 53 | } 54 | ], 55 | "source": [ 56 | "join.head(2)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 6, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "join.drop(columns=['nID', 'nodeID', 'geometry']).to_parquet('/Users/martin/Dropbox/Academia/Data/Geo/Amsterdam/raw/bag_data.pq')" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [] 74 | } 75 | ], 76 | "metadata": { 77 | "language_info": { 78 | "codemirror_mode": { 79 | "name": "ipython", 80 | "version": 3 81 | }, 82 | "file_extension": ".py", 83 | "mimetype": "text/x-python", 84 | "name": "python", 85 | "nbconvert_exporter": "python", 86 | "pygments_lexer": "ipython3", 87 | "version": "3.8.2-final" 88 | }, 89 | "orig_nbformat": 2, 90 | "kernelspec": { 91 | "name": "geo_dev", 92 | "display_name": "geo_dev" 93 | } 94 | }, 95 | "nbformat": 4, 96 | "nbformat_minor": 2 97 | } -------------------------------------------------------------------------------- /code_production/Amsterdam/clustering.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "liked-retailer", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "from download import download" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "id": "brilliant-healthcare", 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stdout", 21 | "output_type": "stream", 22 | "text": [ 23 | "Successfully downloaded file to ams_patterned_norm.pq\n" 24 | ] 25 | }, 26 | { 27 | "data": { 28 | "text/plain": [ 29 | "'ams_patterned_norm.pq'" 30 | ] 31 | }, 32 | "execution_count": 2, 33 | "metadata": {}, 34 | "output_type": "execute_result" 35 | } 36 | ], 37 | "source": [ 38 | "download(\"https://www.dropbox.com/s/trkbcixqut2jmtk/context_data_norm.parquet?dl=1\", \"ams_patterned_norm.pq\")" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 5, 44 | "id": "technical-publisher", 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "from sklearn.mixture import GaussianMixture\n", 49 | "import pandas" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 6, 55 | "id": "antique-square", 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "data = pandas.read_parquet('ams_patterned_norm.pq')" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 9, 65 | "id": "special-offset", 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "data = data.drop(columns=\"cluster\")" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 10, 75 | "id": "annual-prairie", 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/html": [ 81 | "
\n", 82 | "\n", 95 | "\n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | "
stcOri_meanIQ3stcOri_rangeIQ3stcOri_theilID3sdcLAL_meanIQ3sdcLAL_rangeIQ3sdcLAL_theilID3sdcAre_meanIQ3sdcAre_rangeIQ3sdcAre_theilID3sscCCo_meanIQ3...ldsCDL_simpsonxcnSCl_simpsonmtdMDi_simpsonlddNDe_simpsonlinWID_simpsonlddRea_simpsonlddARe_simpsonsddAre_simpsonmidRea_simpsonmidAre_simpson
uID
00.585140-0.896398-0.4894757.133090-1.091901-1.05791616.668079-0.238053-1.1522596.014979...1.2648231.4356122.4836931.9475021.8677602.2189711.5418491.4039832.8103041.613008
10.585140-0.896398-0.4894757.133090-1.091901-1.05791616.668079-0.238053-1.1522596.014979...1.2648231.4356122.4836931.9475021.8677602.2189711.5418491.4039832.8103041.613008
20.5712351.201597-0.1151024.9947481.363017-0.7568826.0825462.874121-0.8675412.367613...-0.4984781.4356122.4836931.9475021.8677600.237811-0.275548-0.4573940.608711-0.260924
30.9434331.084128-0.1986432.9197432.6120261.3640342.6408113.5065980.9114052.538624...1.2648231.4356122.4836931.9475021.8677602.2189711.5418491.4039832.8103041.613008
40.5712351.201597-0.1151024.9947481.363017-0.7568826.0825462.874121-0.8675412.367613...-0.4984781.4356122.4836931.9475021.8677600.237811-0.275548-0.4573940.608711-0.260924
..................................................................
194521.4318310.258860-0.4534721.4223292.1607671.044623-0.034803-0.196324-0.227110-0.932315...1.264823-0.9652720.6052871.9475021.8677600.203535-0.645944-0.4895982.2130551.104647
194531.3907880.275483-0.4528041.4575782.2381531.1386010.0647910.2198000.579435-0.182882...1.264823-1.1171810.8932851.9475021.867760-0.056267-0.858392-0.7336921.6303881.074985
194541.737126-0.577250-0.4718510.6429032.3469581.932596-0.0558940.1665601.457609-0.210482...-0.691058-1.038957-0.8859901.7610871.506825-1.042921-0.932765-0.871893-0.345487-0.591430
194551.737126-0.577250-0.4718510.6429032.3469581.932596-0.0558940.1665601.457609-0.210482...-0.691058-1.038957-0.8859901.7610871.506825-1.042921-0.932765-0.871893-0.345487-0.591430
194571.719177-0.394227-0.4700460.2547972.3989312.441633-0.094297-0.1260531.2345380.094674...-0.416872-1.014127-0.4693781.6972711.620008-0.913352-0.545799-0.868314-0.474565-0.420435
\n", 413 | "

252385 rows × 296 columns

\n", 414 | "
" 415 | ], 416 | "text/plain": [ 417 | " stcOri_meanIQ3 stcOri_rangeIQ3 stcOri_theilID3 sdcLAL_meanIQ3 \\\n", 418 | "uID \n", 419 | "0 0.585140 -0.896398 -0.489475 7.133090 \n", 420 | "1 0.585140 -0.896398 -0.489475 7.133090 \n", 421 | "2 0.571235 1.201597 -0.115102 4.994748 \n", 422 | "3 0.943433 1.084128 -0.198643 2.919743 \n", 423 | "4 0.571235 1.201597 -0.115102 4.994748 \n", 424 | "... ... ... ... ... \n", 425 | "19452 1.431831 0.258860 -0.453472 1.422329 \n", 426 | "19453 1.390788 0.275483 -0.452804 1.457578 \n", 427 | "19454 1.737126 -0.577250 -0.471851 0.642903 \n", 428 | "19455 1.737126 -0.577250 -0.471851 0.642903 \n", 429 | "19457 1.719177 -0.394227 -0.470046 0.254797 \n", 430 | "\n", 431 | " sdcLAL_rangeIQ3 sdcLAL_theilID3 sdcAre_meanIQ3 sdcAre_rangeIQ3 \\\n", 432 | "uID \n", 433 | "0 -1.091901 -1.057916 16.668079 -0.238053 \n", 434 | "1 -1.091901 -1.057916 16.668079 -0.238053 \n", 435 | "2 1.363017 -0.756882 6.082546 2.874121 \n", 436 | "3 2.612026 1.364034 2.640811 3.506598 \n", 437 | "4 1.363017 -0.756882 6.082546 2.874121 \n", 438 | "... ... ... ... ... \n", 439 | "19452 2.160767 1.044623 -0.034803 -0.196324 \n", 440 | "19453 2.238153 1.138601 0.064791 0.219800 \n", 441 | "19454 2.346958 1.932596 -0.055894 0.166560 \n", 442 | "19455 2.346958 1.932596 -0.055894 0.166560 \n", 443 | "19457 2.398931 2.441633 -0.094297 -0.126053 \n", 444 | "\n", 445 | " sdcAre_theilID3 sscCCo_meanIQ3 ... ldsCDL_simpson xcnSCl_simpson \\\n", 446 | "uID ... \n", 447 | "0 -1.152259 6.014979 ... 1.264823 1.435612 \n", 448 | "1 -1.152259 6.014979 ... 1.264823 1.435612 \n", 449 | "2 -0.867541 2.367613 ... -0.498478 1.435612 \n", 450 | "3 0.911405 2.538624 ... 1.264823 1.435612 \n", 451 | "4 -0.867541 2.367613 ... -0.498478 1.435612 \n", 452 | "... ... ... ... ... ... \n", 453 | "19452 -0.227110 -0.932315 ... 1.264823 -0.965272 \n", 454 | "19453 0.579435 -0.182882 ... 1.264823 -1.117181 \n", 455 | "19454 1.457609 -0.210482 ... -0.691058 -1.038957 \n", 456 | "19455 1.457609 -0.210482 ... -0.691058 -1.038957 \n", 457 | "19457 1.234538 0.094674 ... -0.416872 -1.014127 \n", 458 | "\n", 459 | " mtdMDi_simpson lddNDe_simpson linWID_simpson lddRea_simpson \\\n", 460 | "uID \n", 461 | "0 2.483693 1.947502 1.867760 2.218971 \n", 462 | "1 2.483693 1.947502 1.867760 2.218971 \n", 463 | "2 2.483693 1.947502 1.867760 0.237811 \n", 464 | "3 2.483693 1.947502 1.867760 2.218971 \n", 465 | "4 2.483693 1.947502 1.867760 0.237811 \n", 466 | "... ... ... ... ... \n", 467 | "19452 0.605287 1.947502 1.867760 0.203535 \n", 468 | "19453 0.893285 1.947502 1.867760 -0.056267 \n", 469 | "19454 -0.885990 1.761087 1.506825 -1.042921 \n", 470 | "19455 -0.885990 1.761087 1.506825 -1.042921 \n", 471 | "19457 -0.469378 1.697271 1.620008 -0.913352 \n", 472 | "\n", 473 | " lddARe_simpson sddAre_simpson midRea_simpson midAre_simpson \n", 474 | "uID \n", 475 | "0 1.541849 1.403983 2.810304 1.613008 \n", 476 | "1 1.541849 1.403983 2.810304 1.613008 \n", 477 | "2 -0.275548 -0.457394 0.608711 -0.260924 \n", 478 | "3 1.541849 1.403983 2.810304 1.613008 \n", 479 | "4 -0.275548 -0.457394 0.608711 -0.260924 \n", 480 | "... ... ... ... ... \n", 481 | "19452 -0.645944 -0.489598 2.213055 1.104647 \n", 482 | "19453 -0.858392 -0.733692 1.630388 1.074985 \n", 483 | "19454 -0.932765 -0.871893 -0.345487 -0.591430 \n", 484 | "19455 -0.932765 -0.871893 -0.345487 -0.591430 \n", 485 | "19457 -0.545799 -0.868314 -0.474565 -0.420435 \n", 486 | "\n", 487 | "[252385 rows x 296 columns]" 488 | ] 489 | }, 490 | "execution_count": 10, 491 | "metadata": {}, 492 | "output_type": "execute_result" 493 | } 494 | ], 495 | "source": [ 496 | "data" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": null, 502 | "id": "selected-paragraph", 503 | "metadata": {}, 504 | "outputs": [], 505 | "source": [ 506 | "%%time\n", 507 | "gmm = GaussianMixture(n_components=10, covariance_type=\"full\", max_iter=300, n_init=100, random_state=42)\n", 508 | "fitted = gmm.fit(data)" 509 | ] 510 | }, 511 | { 512 | "cell_type": "code", 513 | "execution_count": 12, 514 | "id": "oriented-baker", 515 | "metadata": {}, 516 | "outputs": [], 517 | "source": [ 518 | "data['cluster'] = gmm.predict(data)" 519 | ] 520 | }, 521 | { 522 | "cell_type": "code", 523 | "execution_count": 13, 524 | "id": "mighty-services", 525 | "metadata": {}, 526 | "outputs": [], 527 | "source": [ 528 | "data.reset_index()[['cluster', 'uID']].to_csv('ams_cluster_labels_10.csv')" 529 | ] 530 | }, 531 | { 532 | "cell_type": "code", 533 | "execution_count": 15, 534 | "id": "streaming-defense", 535 | "metadata": {}, 536 | "outputs": [ 537 | { 538 | "data": { 539 | "image/png": "iVBORw0KGgoAAAANSUhEUgAABZUAAAJCCAYAAAC4USmzAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAeqklEQVR4nO3df6zdd33f8de7djZ+ZBnJ4sReAnU3pR2sToNmsW6IFjVlCfXWZCNooZRaFlUqGKhM7bZsarWVMpVJU7uppd2sgXFH1zb1WJ2RalUUxjaqjnIpkLs0QFhpQ6gvNowSQiEd9LM/zonkZDZ++/pef7/Xfjyko3PPuff6vv6wfK+f93O/t8YYAQAAAACAjq+begAAAAAAAFuHqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABA2/bz+cGuvPLKsXv37vP5IQEAAAAAOEsf+MAHPjPG2HGq153XqLx79+6srKyczw8JAAAAAMBZqqrfP93rXP4CAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgLbtUw+42P2H9z2cox/61NQzAAAAztotN1yT7/mrz5l6BgBwnjmpPLGjH/pUfufYo1PPAAAAOCu/c+xRB2QA4CLlpPIMPG/XZfnlH/hrU88AAABo+7v/9jenngAATMRJZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2tpRuaq2VdUHq+pdy8dXVNW9VfXQ8v7yzZsJAAAAAMAcnM1J5R9M8uBJj+9Mct8Y47ok9y0fAwAAAABwAWtF5aq6Nsm+JP/upKdvSXJ4+fLhJLdu6DIAAAAAAGane1L5XyX5h0n+5KTnrh5jHEuS5f1VGzsNAAAAAIC5OWNUrqq/meT4GOMD6/kAVXVHVa1U1cqJEyfW80cAAAAAADATnZPKL0zy3VX1e0l+Kcl3VNU7kny6qnYlyfL++KneeYxxcIyxd4yxd8eOHRs0GwAAAACAKZwxKo8x/vEY49oxxu4ktyd59xjje5PcnWT/8s32Jzm6aSsBAAAAAJiF7jWVT+XNSV5SVQ8lecnyMQAAAAAAF7DtZ/PGY4z3JHnP8uXPJrlx4ycBAAAAADBX53JSGQAAAACAi4yoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAECbqAwAAAAAQJuoDAAAAABAm6gMAAAAAEDbGaNyVT2tqn6rqj5cVQ9U1Y8tn7+iqu6tqoeW95dv/lwAAAAAAKbUOan8eJLvGGN8S5IbktxcVd+a5M4k940xrkty3/IxAAAAAAAXsDNG5bHw2PLhJcvbSHJLksPL5w8nuXUzBgIAAAAAMB+taypX1baq+lCS40nuHWO8L8nVY4xjSbK8v2rTVgIAAAAAMAutqDzG+OoY44Yk1yZ5QVV9c/cDVNUdVbVSVSsnTpxY50wAAAAAAOagFZWfMMb4wyTvSXJzkk9X1a4kWd4fP837HBxj7B1j7N2xY8e5rQUAAAAAYFJnjMpVtaOqnrV8+elJvjPJR5LcnWT/8s32Jzm6SRsBAAAAAJiJ7Y232ZXkcFVtyyJC3zXGeFdV/WaSu6rq1UkeTvLyTdwJAAAAAMAMnDEqjzHuT/L8Uzz/2SQ3bsYoAAAAAADm6ayuqQwAAAAAwMVNVAYAAAAAoE1UBgAAAACgTVQGAAAAAKBNVAYAAAAAoE1UBgAAAACgTVQGAAAAAKBNVAYAAAAAoE1UBgAAAACgTVQGAAAAAKBNVAYAAAAAoE1UBgAAAACgTVQGAAAAAKBNVAYAAAAAoE1UBgAAAACgTVQGAAAAAKBNVAYAAAAAoE1UBgAAAACgTVQGAAAAAKBt+9QDAADYIlYOJatHpl4BzMXaLYv7Q2+adgcwL3tuS/YemHoFsMlEZQAAelaPJGuryc49Uy8BZuCXn3N06gnA3KytLu5FZbjgicoAAPTt3JMcuGfqFQDAHB3aN/UC4DxxTWUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADaRGUAAAAAANpEZQAAAAAA2kRlAAAAAADazhiVq+rZVfVfq+rBqnqgqn5w+fwVVXVvVT20vL988+cCAAAAADClzknlryT5oTHGc5N8a5K/V1XPS3JnkvvGGNcluW/5GAAAAACAC9gZo/IY49gY47eXL38hyYNJrklyS5LDyzc7nOTWTdoIAAAAAMBMnNU1latqd5LnJ3lfkqvHGMeSRXhOctWGrwMAAAAAYFbaUbmqLk3yH5O8YYzx6Fm83x1VtVJVKydOnFjPRgAAAAAAZqIVlavqkiyC8i+MMd65fPrTVbVr+fpdSY6f6n3HGAfHGHvHGHt37NixEZsBAAAAAJjIGaNyVVWStyZ5cIzxkye96u4k+5cv709ydOPnAQAAAAAwJ9sbb/PCJK9KslpVH1o+90+SvDnJXVX16iQPJ3n5piwEAAAAAGA2zhiVxxjvTVKnefWNGzsHAAAAAIA5a/+iPgAAAAAAEJUBAAAAAGgTlQEAAAAAaBOVAQAAAABoE5UBAAAAAGgTlQEAAAAAaBOVAQAAAABoE5UBAAAAAGgTlQEAAAAAaBOVAQAAAABoE5UBAAAAAGgTlQEAAAAAaBOVAQAAAABoE5UBAAAAAGgTlQEAAAAAaNs+9QAAAACAM1o5lKwemXoFX8va/Yv7Q/um3cHp7bkt2Xtg6hVcAJxUBgAAAOZv9Uiytjr1Cr6WndcvbszT2qpvzLBhnFQGAAAAtoade5ID90y9ArYmJ8jZQE4qAwAAAADQJioDAAAAANAmKgMAAAAA0CYqAwAAAADQJioDAAAAANAmKgMAAAAA0CYqAwAAAADQJioDAAAAANAmKgMAAAAA0CYqAwAAAADQtn3qAQBwXqwcSlaPTL0Ctra1+xf3h/ZNuwO2sj23JXsPTL0CAOCcOKkMwMVh9Uiytjr1Ctjadl6/uAHrs7bqG5wAwAXBSWUALh479yQH7pl6BQAXK6f8AYALhJPKAAAAAAC0icoAAAAAALSJygAAAAAAtInKAAAAAAC0icoAAAAAALSJygAAAAAAtInKAAAAAAC0icoAAAAAALSJygAAAAAAtInKAAAAAAC0icoAAAAAALSJygAAAAAAtInKAAAAAAC0icoAAAAAALSJygAAAAAAtInKAAAAAAC0icoAAAAAALSJygAAAAAAtInKAAAAAAC0icoAAAAAALSJygAAAAAAtInKAAAAAAC0icoAAAAAALSJygAAAAAAtInKAAAAAAC0icoAAAAAALSJygAAAAAAtInKAAAAAAC0icoAAAAAALSJygAAAAAAtInKAAAAAAC0icoAAAAAALSJygAAAAAAtInKAAAAAAC0icoAAAAAALSJygAAAAAAtInKAAAAAAC0icoAAAAAALSJygAAAAAAtInKAAAAAAC0icoAAAAAALSJygAAAAAAtInKAAAAAAC0icoAAAAAALSJygAAAAAAtInKAAAAAAC0icoAAAAAALSJygAAAAAAtInKAAAAAAC0icoAAAAAALSJygAAAAAAtInKAAAAAAC0icoAAAAAALSJygAAAAAAtInKAAAAAAC0icoAAAAAALSJygAAAAAAtInKAAAAAAC0icoAAAAAALRtn3oAAAAAAGyolUPJ6pGpV8zL2v2L+0P7pt0xR3tuS/YemHrFluKkMgAAAAAXltUjydrq1CvmZef1ixtPtrbqGxDr4KQyAAAAABeenXuSA/dMvYK5c3J7XZxUBgAAAACgTVQGAAAAAKBNVAYAAAAAoE1UBgAAAACgTVQGAAAAAKBNVAYAAAAAoE1UBgAAAACgTVQGAAAAAKBNVAYAAAAAoE1UBgAAAACgTVQGAAAAAKBNVAYAAAAAoE1UBgAAAACgTVQGAAAAAKBNVAYAAAAAoO2MUbmq3lZVx6vqf5303BVVdW9VPbS8v3xzZwIAAAAAMAedk8pvT3LzU567M8l9Y4zrkty3fAwAAAAAwAXujFF5jPHfk/yfpzx9S5LDy5cPJ7l1Y2cBAAAAADBH29f5flePMY4lyRjjWFVdtYGbAACArWzlULJ6ZOoV87N2/+L+0L5pd8zRntuSvQemXgEANG36L+qrqjuqaqWqVk6cOLHZHw4AAJja6pFkbXXqFfOz8/rFjSdbW/VNCADYYtZ7UvnTVbVreUp5V5Ljp3vDMcbBJAeTZO/evWOdHw8AANhKdu5JDtwz9Qq2Aie3AWDLWe9J5buT7F++vD/J0Y2ZAwAAAADAnJ3xpHJV/WKSFye5sqoeSfJPk7w5yV1V9eokDyd5+WaOBDgt12yky3UsOVuu7wkAAHBKZ4zKY4xXnOZVN27wFoCz98Q1G3fumXoJc+calpyNJ64FKyoDAAD8f9Z7TWWA+XDNRmCjOdEOAABwWuu9pjIAAAAAABchURkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgLbtUw8AACa0cihZPTL1ivlZu39xf2jftDvmaM9tyd4DU68AAAAm5KQyAFzMVo8ka6tTr5ifndcvbjzZ2qpvQgAAAE4qA8BFb+ee5MA9U69gK3ByGwAAiJPKAAAAAACcBVEZAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgDZRGQAAAACAtu1TDwCaVg4lq0emXjE/a/cv7g/tm3bHHO25Ldl7YOoVAAAAwAXGSWXYKlaPJGurU6+Yn53XL2482dqqb0IAAAAAm8JJZdhKdu5JDtwz9Qq2Aie3AQAAgE3ipDIAAAAAAG2iMgAAAAAAbaIyAAAAAABtojIAAAAAAG2iMgAAAAAAbaIyAAAAAABtojIAAAAAAG2iMgAAAAAAbaIyAAAAAABtojIAAAAAAG2iMgAAAAAAbaIyAAAAAABtojIAAAAAAG2iMgAAAAAAbaIyAAAAAABtojIAAAAAAG2iMgAAAAAAbaIyAAAAAABtojIAAAAAAG2iMgAAAAAAbaIyAAAAAABtojIAAAAAAG2iMgAAAAAAbaIyAAAAAABtojIAAAAAAG2iMgAAAAAAbaIyAAAAAABtojIAAAAAAG2iMgAAAAAAbaIyAAAAAABtojIAAAAAAG2iMgAAAAAAbaIyAAAAAABtojIAAAAAAG2iMgAAAAAAbaIyAAAAAABtojIAAAAAAG2iMgAAAAAAbaIyAAAAAABtojIAAAAAAG2iMgAAAAAAbaIyAAAAAABtojIAAAAAAG2iMgAAAAAAbaIyAAAAAABtojIAAAAAAG2iMgAAAAAAbaIyAAAAAABtojIAAAAAAG2iMgAAAAAAbaIyAAAAAABtojIAAAAAAG2iMgAAAAAAbaIyAAAAAABtojIAAAAAAG2iMgAAAAAAbecUlavq5qr6aFV9vKru3KhRAAAAAADM07qjclVtS/KWJC9N8rwkr6iq523UMAAAAAAA5udcTiq/IMnHxxi/O8b44yS/lOSWjZkFAAAAAMAcnUtUvibJJ096/MjyOQAAAAAALlA1xljfO1a9PMlNY4zvXz5+VZIXjDFe/5S3uyPJHcuH35Tko+ufCwAAAADAefD1Y4wdp3rF9nP4Qx9J8uyTHl+b5A+e+kZjjINJDp7DxwEAAAAAYCbO5fIX709yXVV9Q1X9qSS3J7l7Y2YBAAAAADBH6z6pPMb4SlW9LsmvJ9mW5G1jjAc2bBkAAAAAALOz7msqAwAAAABw8TmXy18AAAAAAHCREZUBAAAAAGgTlQEAAAAAaBOVJ1ZV11XVl6vqHVNvYb6q6oqq+k9V9cWq+v2q+p6pNzFfVbW7qn6tqj5XVWtV9TNVte5fzMqFq6res/wc9Njy9tGpNzFvVXV7VT24/Hz0v6vqRVNvYn6q6nVVtVJVj1fV26few3yd9PnnidtXq+qnp97F/FTVn66qty7/L/SFqvpgVb106l3MV1W9o6qOVdWjVfWxqvr+qTcxT1X13Kp6d1V9vqo+XlV/e+pNW4WoPL23JHn/1COYvbck+eMkVyd5ZZKfq6q/PO0kZuxnkxxPsivJDUm+PclrpxzErL1ujHHp8vZNU49hvqrqJUn+RZIDSf5Mkm9L8ruTjmKu/iDJm5K8beohzNtJn38uzeLr3C8l+ZWJZzFP25N8Mouva/9skh9NcldV7Z5yFLP2E0l2jzEuS/LdSd5UVX9l4k3MzPLw1dEk70pyRZI7kryjqr5x0mFbhKg8oaq6PckfJrlv4inMWFU9M8nLkvzoGOOxMcZ7k9yd5FXTLmPGviHJXWOML48x1pL8lyS+CQGcqx9L8sYxxv8cY/zJGONTY4xPTT2K+RljvHOM8atJPjv1FraU27L4pvj/mHoI8zPG+OIY45+NMX5v+TnoXUk+kUQk5JTGGA+MMR5/4uHy9hcnnMQ8/aUkfz7JT40xvjrGeHeS34je0iIqT6SqLkvyxiQ/NPUWZu8bk3x1jPGxk577cERCTu9fJ7m9qp5RVdckeWkWYRlO5Seq6jNV9RtV9eKpxzBPVbUtyd4kO5Y/FvjI8tI6T596G3DB2J/k58cYY+ohzF9VXZ3F/5MemHoL81VVP1tVf5TkI0mOJfm1iScxP3Wa5775fA/ZikTl6fx4kreOMT459RBm79Ikn3/Kc5/P4keP4VT+WxbfdHg0ySNJVpL86pSDmK1/lOQvJLkmycEk/7mqnODgVK5OckkWJwlflMWldZ6f5Ecm3ARcIKrqOVlc1uDw1FuYv6q6JMkvJDk8xvjI1HuYrzHGa7P4f/OLkrwzyeNf+z24CH0ki5+S+QdVdUlV/Y0sPh89Y9pZW4OoPIGquiHJdyb5qYmnsDU8luSypzx3WZIvTLCFmauqr0vy61l80fTMJFcmuTyL66DCk4wx3jfG+MIY4/ExxuEsftTru6bexSx9aXn/02OMY2OMzyT5yfj7AmyM70vy3jHGJ6Yewrwtv9b991n8vpnXTTyHLWB5SYP3Jrk2yWum3sO8jDH+b5Jbk+xLspbF1QTuyuJwFmcgKk/jxUl2J3m4qtaS/HCSl1XVb085itn6WJLtVXXdSc99S/yoF6d2RZJnJ/mZZSj8bJJDEX7oGTn1j4BxkRtjfC6LL679WDqwGb4vTilzBlVVSd6axU/PvGwZg6Bre1xTmVMYY9w/xvj2McafG2PclMVPcv7W1Lu2AlF5Ggez+MfshuXt3yS5J8lN001irsYYX8zi1Okbq+qZVfXCJLdk8R16eJLl6cFPJHlNVW2vqmdlcY3CD086jNmpqmdV1U1V9bTl35VXJvm2LE66w6kcSvL6qrqqqi5P8oYsflM2PMny35SnJdmWZNsT/85MvYt5qqq/nsVlmH5l6i3M3s8leW6SvzXG+NKZ3piL1/Jrldur6tKq2lZVNyV5RZJ3T72N+amq65dfqzyjqn44ya4kb5941pYgKk9gjPFHY4y1J25ZXN7gy2OME1NvY7Zem+TpWVzr5xeTvGaM4aQyp/N3ktyc5ESSjyf5SpK/P+ki5uiSJG/K4u/JZ5K8PsmtY4yPTrqKOfvxJO/P4idoHkzywST/fNJFzNWPZHHJlDuTfO/yZdff5nT2J3nnGMOl3Titqvr6JD+QxaGstap6bHl75bTLmKmRxaUuHknyuST/MskbxhhHJ13FXL0qi1/keDzJjUleMsZw/e2G8st1AQAAAADoclIZAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgDZRGQAAAACANlEZAAAAAIA2URkAAAAAgDZRGQAAAACAtv8HG6CrHJXUuqsAAAAASUVORK5CYII=\n", 540 | "text/plain": [ 541 | "
" 542 | ] 543 | }, 544 | "metadata": { 545 | "needs_background": "light" 546 | }, 547 | "output_type": "display_data" 548 | } 549 | ], 550 | "source": [ 551 | "from scipy.cluster import hierarchy\n", 552 | "import matplotlib.pyplot as plt\n", 553 | "\n", 554 | "\n", 555 | "\n", 556 | "group = data.groupby('cluster').mean()\n", 557 | "Z = hierarchy.linkage(group, 'ward')\n", 558 | "plt.figure(figsize=(25, 10))\n", 559 | "dn = hierarchy.dendrogram(Z, labels=group.index)\n", 560 | "plt.savefig('ams_hierarchy.pdf')" 561 | ] 562 | }, 563 | { 564 | "cell_type": "code", 565 | "execution_count": null, 566 | "id": "intermediate-child", 567 | "metadata": {}, 568 | "outputs": [], 569 | "source": [] 570 | } 571 | ], 572 | "metadata": { 573 | "kernelspec": { 574 | "display_name": "Python 3", 575 | "language": "python", 576 | "name": "python3" 577 | }, 578 | "language_info": { 579 | "codemirror_mode": { 580 | "name": "ipython", 581 | "version": 3 582 | }, 583 | "file_extension": ".py", 584 | "mimetype": "text/x-python", 585 | "name": "python", 586 | "nbconvert_exporter": "python", 587 | "pygments_lexer": "ipython3", 588 | "version": "3.8.8" 589 | } 590 | }, 591 | "nbformat": 4, 592 | "nbformat_minor": 5 593 | } 594 | -------------------------------------------------------------------------------- /code_production/Amsterdam/recalculate_contextual.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "I have lost parquet with contextual data and have only standardized. This regenerates the original one." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import geopandas as gpd\n", 17 | "import momepy as mm\n", 18 | "import libpysal\n", 19 | "import pandas as pd\n", 20 | "import numpy as np\n", 21 | "import seaborn as sns\n", 22 | "import matplotlib.pyplot as plt\n", 23 | "import scipy as sp\n", 24 | "import mapclassify\n", 25 | "\n", 26 | "from inequality.theil import Theil\n", 27 | "from tqdm import tqdm\n", 28 | "from momepy import limit_range\n", 29 | "from sklearn import preprocessing" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "primary = pd.read_csv(\"/Users/martin/Dropbox/Academia/Data/Geo/Amsterdam/clustering/primary.csv\", index_col=0)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 4, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "name": "stderr", 48 | "output_type": "stream", 49 | "text": [ 50 | "/opt/miniconda3/envs/stable/lib/python3.8/site-packages/geopandas/geodataframe.py:422: RuntimeWarning: Sequential read of iterator was interrupted. Resetting iterator. This can negatively impact the performance.\n", 51 | " for feature in features_lst:\n" 52 | ] 53 | } 54 | ], 55 | "source": [ 56 | "geom = gpd.read_file(\"/Users/martin/Dropbox/Academia/Data/Geo/Amsterdam/clustering/geometry.gpkg\", layer=\"tessellation\")" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 7, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "name": "stdout", 66 | "output_type": "stream", 67 | "text": [ 68 | "CPU times: user 4min 4s, sys: 15.1 s, total: 4min 19s\n", 69 | "Wall time: 4min 25s\n" 70 | ] 71 | } 72 | ], 73 | "source": [ 74 | "%time queen3 = mm.sw_high(k=3, gdf=geom, ids='uID')" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 8, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "gdf = primary.set_index('uID')\n", 84 | "spatial_weights = queen3\n", 85 | "unique_id = 'uID'" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 9, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "means = {}\n", 95 | "ranges = {}\n", 96 | "theils = {}\n", 97 | "simpsons = {}\n", 98 | "\n", 99 | "for ch in gdf.columns:\n", 100 | " means[ch] = []\n", 101 | " ranges[ch] = []\n", 102 | " theils[ch] = []\n", 103 | " simpsons[ch] = []" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 10, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "gdf = gdf.fillna(0) # normally does not happen, but to be sure\n", 113 | "chars = gdf.columns" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 11, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "gdf['lcdMes'] = gdf.apply(\n", 123 | " lambda row: row.lcdMes if row.lcdMes >= 0 else 0,\n", 124 | " axis=1,\n", 125 | " ) # normally does not happen, but to be sure" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 12, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "def theil(y):\n", 135 | " y = np.array(y)\n", 136 | " n = len(y)\n", 137 | " plus = y + np.finfo('float').tiny * (y == 0) # can't have 0 values\n", 138 | " yt = plus.sum(axis=0)\n", 139 | " s = plus / (yt * 1.0)\n", 140 | " lns = np.log(n * s)\n", 141 | " slns = s * lns\n", 142 | " t = sum(slns)\n", 143 | " return t" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 13, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "skewness = pd.DataFrame(index=chars)\n", 153 | "for c in chars:\n", 154 | " skewness.loc[c, 'skewness'] = sp.stats.skew(gdf[c])\n", 155 | "headtail = list(skewness.loc[skewness.skewness >= 1].index)\n", 156 | "to_invert = skewness.loc[skewness.skewness <= -1].index\n", 157 | "\n", 158 | "for inv in to_invert:\n", 159 | " gdf[inv + '_rev'] = gdf[inv].max() - gdf[inv]\n", 160 | "inverted = [x for x in gdf.columns if '_rev' in x]\n", 161 | "headtail = headtail + inverted\n", 162 | "natural = [x for x in chars if x not in headtail]" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 14, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "def _simpson_di(data):\n", 172 | "\n", 173 | " def p(n, N):\n", 174 | " if n == 0:\n", 175 | " return 0\n", 176 | " return float(n) / N\n", 177 | "\n", 178 | " N = sum(data.values())\n", 179 | "\n", 180 | " return sum(p(n, N) ** 2 for n in data.values() if n != 0)" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 15, 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [ 189 | "bins = {}\n", 190 | "for c in headtail:\n", 191 | " bins[c] = mapclassify.HeadTailBreaks(gdf[c]).bins\n", 192 | "for c in natural:\n", 193 | " bins[c] = mapclassify.gadf(gdf[c], method='NaturalBreaks')[1].bins" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 18, 199 | "metadata": {}, 200 | "outputs": [ 201 | { 202 | "name": "stderr", 203 | "output_type": "stream", 204 | "text": [ 205 | " 0%| | 0/252385 [00:00\n", 262 | "\n", 275 | "\n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | "
stcOri_meanIQ3stcOri_rangeIQ3stcOri_theilID3sdcLAL_meanIQ3sdcLAL_rangeIQ3sdcLAL_theilID3sdcAre_meanIQ3sdcAre_rangeIQ3sdcAre_theilID3sscCCo_meanIQ3...sddAre_theilID3sddAre_simpsonmidRea_meanIQ3midRea_rangeIQ3midRea_theilID3midRea_simpsonmidAre_meanIQ3midAre_rangeIQ3midAre_theilID3midAre_simpson
uID
027.5399860.0299265.903783e-07215.7282861.0645980.00001228314.656812446.1379780.0001240.774589...0.0000001.0000004.0000000.000.0000001.0116032.0914870.0000000.000000e+001.000000
127.5399860.0299265.903783e-07215.7282861.0645980.00001228314.656812446.1379780.0001240.774589...0.0000001.0000004.0000000.000.0000001.0116032.0914870.0000000.000000e+001.000000
227.37652415.2874306.786512e-02165.11989560.3491870.01395010780.0864067414.4035640.0543180.481731...0.0365440.59183744.0000006.500.0073131.0458284.27549210239.3902772.104411e-041.000000
331.75199914.4331475.272121e-02116.01051690.5119170.1121485078.9702898830.5425880.3929290.495462...0.0000001.00000044.0000000.000.0000001.0458284.2754920.000000-1.110223e-161.000000
427.37652415.2874306.786512e-02165.11989560.3491870.01395010780.0864067414.4035640.0543180.481731...0.0365440.59183744.0000006.500.0073131.0458284.27549210239.3902772.104411e-041.000000
..................................................................
1945237.4934948.4314456.527028e-0380.57104279.6143080.097360646.907689539.5722270.1762200.216770...0.0806391.00000021.33333310.000.0284571.048777.8947913620.8267885.944435e-031.000000
1945337.0110028.5523356.648164e-0381.40530181.4831230.101711811.8820931471.2865500.3297400.276944...0.0452131.00000021.63636410.000.0457181.048375.5807044774.8074684.905983e-031.000000
1945441.0824842.3509023.195354e-0362.12429084.1107040.138473611.9708261352.0822030.4968950.274728...0.1727820.95746726.96000026.250.1106671.052006.58625919655.1909031.327803e-020.916824
1945541.0824842.3509023.195354e-0362.12429084.1107040.138473611.9708261352.0822030.4968950.274728...0.1727820.95746726.96000026.250.1106671.052006.58625919655.1909031.327803e-020.916824
1945740.8714713.6819253.522701e-0352.93893985.3658060.162041548.358138696.9102870.4544350.299230...0.1610381.00000026.59090918.000.0883781.048144.37768712156.7191561.049451e-021.000000
\n", 593 | "

252385 rows × 296 columns

\n", 594 | "" 595 | ], 596 | "text/plain": [ 597 | " stcOri_meanIQ3 stcOri_rangeIQ3 stcOri_theilID3 sdcLAL_meanIQ3 \\\n", 598 | "uID \n", 599 | "0 27.539986 0.029926 5.903783e-07 215.728286 \n", 600 | "1 27.539986 0.029926 5.903783e-07 215.728286 \n", 601 | "2 27.376524 15.287430 6.786512e-02 165.119895 \n", 602 | "3 31.751999 14.433147 5.272121e-02 116.010516 \n", 603 | "4 27.376524 15.287430 6.786512e-02 165.119895 \n", 604 | "... ... ... ... ... \n", 605 | "19452 37.493494 8.431445 6.527028e-03 80.571042 \n", 606 | "19453 37.011002 8.552335 6.648164e-03 81.405301 \n", 607 | "19454 41.082484 2.350902 3.195354e-03 62.124290 \n", 608 | "19455 41.082484 2.350902 3.195354e-03 62.124290 \n", 609 | "19457 40.871471 3.681925 3.522701e-03 52.938939 \n", 610 | "\n", 611 | " sdcLAL_rangeIQ3 sdcLAL_theilID3 sdcAre_meanIQ3 sdcAre_rangeIQ3 \\\n", 612 | "uID \n", 613 | "0 1.064598 0.000012 28314.656812 446.137978 \n", 614 | "1 1.064598 0.000012 28314.656812 446.137978 \n", 615 | "2 60.349187 0.013950 10780.086406 7414.403564 \n", 616 | "3 90.511917 0.112148 5078.970289 8830.542588 \n", 617 | "4 60.349187 0.013950 10780.086406 7414.403564 \n", 618 | "... ... ... ... ... \n", 619 | "19452 79.614308 0.097360 646.907689 539.572227 \n", 620 | "19453 81.483123 0.101711 811.882093 1471.286550 \n", 621 | "19454 84.110704 0.138473 611.970826 1352.082203 \n", 622 | "19455 84.110704 0.138473 611.970826 1352.082203 \n", 623 | "19457 85.365806 0.162041 548.358138 696.910287 \n", 624 | "\n", 625 | " sdcAre_theilID3 sscCCo_meanIQ3 ... sddAre_theilID3 sddAre_simpson \\\n", 626 | "uID ... \n", 627 | "0 0.000124 0.774589 ... 0.000000 1.000000 \n", 628 | "1 0.000124 0.774589 ... 0.000000 1.000000 \n", 629 | "2 0.054318 0.481731 ... 0.036544 0.591837 \n", 630 | "3 0.392929 0.495462 ... 0.000000 1.000000 \n", 631 | "4 0.054318 0.481731 ... 0.036544 0.591837 \n", 632 | "... ... ... ... ... ... \n", 633 | "19452 0.176220 0.216770 ... 0.080639 1.000000 \n", 634 | "19453 0.329740 0.276944 ... 0.045213 1.000000 \n", 635 | "19454 0.496895 0.274728 ... 0.172782 0.957467 \n", 636 | "19455 0.496895 0.274728 ... 0.172782 0.957467 \n", 637 | "19457 0.454435 0.299230 ... 0.161038 1.000000 \n", 638 | "\n", 639 | " midRea_meanIQ3 midRea_rangeIQ3 midRea_theilID3 midRea_simpson \\\n", 640 | "uID \n", 641 | "0 4.000000 0.00 0.000000 1.0 \n", 642 | "1 4.000000 0.00 0.000000 1.0 \n", 643 | "2 44.000000 6.50 0.007313 1.0 \n", 644 | "3 44.000000 0.00 0.000000 1.0 \n", 645 | "4 44.000000 6.50 0.007313 1.0 \n", 646 | "... ... ... ... ... \n", 647 | "19452 21.333333 10.00 0.028457 1.0 \n", 648 | "19453 21.636364 10.00 0.045718 1.0 \n", 649 | "19454 26.960000 26.25 0.110667 1.0 \n", 650 | "19455 26.960000 26.25 0.110667 1.0 \n", 651 | "19457 26.590909 18.00 0.088378 1.0 \n", 652 | "\n", 653 | " midAre_meanIQ3 midAre_rangeIQ3 midAre_theilID3 midAre_simpson \n", 654 | "uID \n", 655 | "0 116032.091487 0.000000 0.000000e+00 1.000000 \n", 656 | "1 116032.091487 0.000000 0.000000e+00 1.000000 \n", 657 | "2 458284.275492 10239.390277 2.104411e-04 1.000000 \n", 658 | "3 458284.275492 0.000000 -1.110223e-16 1.000000 \n", 659 | "4 458284.275492 10239.390277 2.104411e-04 1.000000 \n", 660 | "... ... ... ... ... \n", 661 | "19452 48777.894791 3620.826788 5.944435e-03 1.000000 \n", 662 | "19453 48375.580704 4774.807468 4.905983e-03 1.000000 \n", 663 | "19454 52006.586259 19655.190903 1.327803e-02 0.916824 \n", 664 | "19455 52006.586259 19655.190903 1.327803e-02 0.916824 \n", 665 | "19457 48144.377687 12156.719156 1.049451e-02 1.000000 \n", 666 | "\n", 667 | "[252385 rows x 296 columns]" 668 | ] 669 | }, 670 | "execution_count": 27, 671 | "metadata": {}, 672 | "output_type": "execute_result" 673 | } 674 | ], 675 | "source": [ 676 | "contextual" 677 | ] 678 | }, 679 | { 680 | "cell_type": "code", 681 | "execution_count": 28, 682 | "metadata": {}, 683 | "outputs": [], 684 | "source": [ 685 | "contextual.to_parquet('/Users/martin/Dropbox/Academia/Data/Geo/Amsterdam/clustering/contextual.parquet')" 686 | ] 687 | }, 688 | { 689 | "cell_type": "code", 690 | "execution_count": 22, 691 | "metadata": {}, 692 | "outputs": [ 693 | { 694 | "data": { 695 | "text/plain": [ 696 | "(74,)" 697 | ] 698 | }, 699 | "execution_count": 22, 700 | "metadata": {}, 701 | "output_type": "execute_result" 702 | } 703 | ], 704 | "source": [ 705 | "chars.shape" 706 | ] 707 | }, 708 | { 709 | "cell_type": "code", 710 | "execution_count": 29, 711 | "metadata": {}, 712 | "outputs": [ 713 | { 714 | "data": { 715 | "text/plain": [ 716 | "" 717 | ] 718 | }, 719 | "execution_count": 29, 720 | "metadata": {}, 721 | "output_type": "execute_result" 722 | } 723 | ], 724 | "source": [ 725 | "mm" 726 | ] 727 | }, 728 | { 729 | "cell_type": "code", 730 | "execution_count": null, 731 | "metadata": {}, 732 | "outputs": [], 733 | "source": [] 734 | } 735 | ], 736 | "metadata": { 737 | "kernelspec": { 738 | "display_name": "stable", 739 | "language": "python", 740 | "name": "stable" 741 | }, 742 | "language_info": { 743 | "codemirror_mode": { 744 | "name": "ipython", 745 | "version": 3 746 | }, 747 | "file_extension": ".py", 748 | "mimetype": "text/x-python", 749 | "name": "python", 750 | "nbconvert_exporter": "python", 751 | "pygments_lexer": "ipython3", 752 | "version": "3.8.5" 753 | } 754 | }, 755 | "nbformat": 4, 756 | "nbformat_minor": 4 757 | } 758 | -------------------------------------------------------------------------------- /code_production/Prague/01tes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[ ]: 5 | 6 | 7 | import geopandas as gpd 8 | import momepy as mm 9 | import libpysal 10 | 11 | 12 | # In[ ]: 13 | 14 | 15 | buildings = gpd.read_file('files/prg.gpkg', layer='buildings') 16 | 17 | 18 | # In[ ]: 19 | 20 | 21 | # buildings = mm.preprocess(buildings) 22 | # buildings['uID'] = mm.unique_id(buildings) 23 | 24 | 25 | # In[ ]: 26 | 27 | 28 | buildings.to_file('files/prg.gpkg', layer='buildings', driver='GPKG') 29 | 30 | 31 | # In[ ]: 32 | 33 | 34 | limit = mm.buffered_limit(buildings, 100) 35 | 36 | 37 | # In[ ]: 38 | 39 | 40 | tessellation = mm.tessellation(buildings, 'uID', limit, queen_corners=False) 41 | tessellation.to_file('files/prg.gpkg', layer='tessallation', driver='GPKG') 42 | 43 | 44 | # In[ ]: 45 | 46 | 47 | queen = libpysal.weights.Queen.from_dataframe(tessellation) 48 | 49 | 50 | # In[ ]: 51 | 52 | 53 | islands = queen.islands 54 | tessellation.drop(islands, inplace=True) 55 | buildings.drop(islands, inplace=True) 56 | 57 | 58 | # In[ ]: 59 | 60 | 61 | tessellation.to_file('files/prg.gpkg', layer='tessallation', driver='GPKG') 62 | buildings.to_file('files/prg.gpkg', layer='buildings', driver='GPKG') 63 | 64 | 65 | # In[ ]: 66 | -------------------------------------------------------------------------------- /code_production/Prague/02queen.py: -------------------------------------------------------------------------------- 1 | import geopandas as gpd 2 | from momepy.elements import _queen_corners 3 | 4 | tess = gpd.read_file('files/prg.gpkg', layer='tessallation') 5 | 6 | sindex = tess.sindex 7 | 8 | tess = _queen_corners(tess, 2, sindex) 9 | 10 | tess.to_file('files/prg.gpkg', layer='queen', driver='GPKG') 11 | -------------------------------------------------------------------------------- /code_production/Prague/03_fix_streets.py: -------------------------------------------------------------------------------- 1 | import geopandas as gpd 2 | import momepy as mm 3 | import libpysal 4 | 5 | # streets = gpd.read_file("/Users/martin/Dropbox/Academia/Data/Geo/Prague/Redo/elements.gpkg", layer="streets") 6 | tess = gpd.read_file( 7 | "/Users/martin/Dropbox/Academia/Data/Geo/Prague/Redo/elements.gpkg", 8 | layer="tessellation", 9 | ) 10 | blg = gpd.read_file( 11 | "/Users/martin/Dropbox/Academia/Data/Geo/Prague/Redo/elements.gpkg", 12 | layer="buildings", 13 | ) 14 | 15 | # streets = mm.network_false_nodes(streets) 16 | # streets.reset_index(inplace=True, drop=True) 17 | # 18 | # streets["nID"] = mm.unique_id(streets) 19 | # blg.drop(columns=["nID"], inplace=True) 20 | # blg["nID"] = mm.get_network_id(blg, streets, "uID", "nID", 300) 21 | # tess = tess.drop(columns=["nID"]).merge(blg[["uID", "nID"]], on="uID", how="left") 22 | 23 | # blg["stbSAl"] = mm.street_alignment(blg, streets, "stbOri", "nID", "nID") 24 | 25 | # tess["stcSAl"] = mm.street_alignment(tess, streets, "stcOri", "nID", "nID") 26 | 27 | # streets["sdsLen"] = mm.perimeter(streets) 28 | 29 | # profile = mm.street_profile(streets, blg, heights="sdbHei", distance=3) 30 | # streets["sdsSPW"] = profile["widths"] 31 | # streets["sdsSPH"] = profile["heights"] 32 | # streets["sdsSPR"] = profile["profile"] 33 | # streets["sdsSPO"] = profile["openness"] 34 | # streets["sdsSWD"] = profile["width_deviations"] 35 | # streets["sdsSHD"] = profile["heights_deviations"] 36 | # streets.to_file("files/elements.gpkg", layer="streets", driver="GPKG") 37 | 38 | # streets["sssLin"] = mm.linearity(streets) 39 | # streets["sdsAre"] = mm.reached(streets, tess, "nID", "nID", mode="sum", values="sdcAre") 40 | # streets["sisBpM"] = mm.elements_count(streets, blg, "nID", "nID", weighted=True) 41 | # 42 | # # tess.to_file("files/elements.gpkg", layer="tessellation", driver="GPKG") 43 | # # blg.to_file("files/elements.gpkg", layer="buildings", driver="GPKG") 44 | # 45 | # 46 | # str_q1 = libpysal.weights.contiguity.Queen.from_dataframe(streets) 47 | # 48 | # streets["misRea"] = mm.reached( 49 | # streets, tess, "nID", "nID", spatial_weights=str_q1, mode="count" 50 | # ) 51 | # streets["mdsAre"] = mm.reached( 52 | # streets, tess, "nID", "nID", spatial_weights=str_q1, mode="sum" 53 | # ) 54 | # 55 | # streets.to_file("files/elements.gpkg", layer="streets", driver="GPKG") 56 | 57 | # graph = mm.gdf_to_nx(streets) 58 | # 59 | # print("node degree") 60 | # graph = mm.node_degree(graph) 61 | # 62 | # print("subgraph") 63 | # graph = mm.subgraph( 64 | # graph, 65 | # radius=5, 66 | # meshedness=True, 67 | # cds_length=False, 68 | # mode="sum", 69 | # degree="degree", 70 | # length="mm_len", 71 | # mean_node_degree=False, 72 | # proportion={0: True, 3: True, 4: True}, 73 | # cyclomatic=False, 74 | # edge_node_ratio=False, 75 | # gamma=False, 76 | # local_closeness=True, 77 | # closeness_distance="mm_len", 78 | # ) 79 | # print("cds length") 80 | # graph = mm.cds_length(graph, radius=3, name="ldsCDL") 81 | # 82 | # print("clustering") 83 | # graph = mm.clustering(graph, name="xcnSCl") 84 | # 85 | # print("mean_node_dist") 86 | # graph = mm.mean_node_dist(graph, name="mtdMDi") 87 | # 88 | # nodes, edges, sw = mm.nx_to_gdf(graph, spatial_weights=True) 89 | # 90 | # fo = libpysal.io.open("/Users/martin/Dropbox/Academia/Data/Geo/Prague/Redo/nodes.gal", "w") 91 | # fo.write(sw) 92 | # fo.close() 93 | # 94 | # nodes.to_file("/Users/martin/Dropbox/Academia/Data/Geo/Prague/Redo/elements.gpkg", layer="nodes", driver="GPKG") 95 | # edges.to_file("/Users/martin/Dropbox/Academia/Data/Geo/Prague/Redo/elements.gpkg", layer="edges", driver="GPKG") 96 | nodes = gpd.read_file( 97 | "/Users/martin/Dropbox/Academia/Data/Geo/Prague/Redo/elements.gpkg", layer="nodes" 98 | ) 99 | edges = gpd.read_file( 100 | "/Users/martin/Dropbox/Academia/Data/Geo/Prague/Redo/elements.gpkg", layer="edges" 101 | ) 102 | edges_w3 = mm.sw_high(k=3, gdf=edges) 103 | edges["ldsMSL"] = mm.segments_length(edges, spatial_weights=edges_w3, mean=True) 104 | 105 | edges["ldsRea"] = mm.reached(edges, tess, "nID", "nID", spatial_weights=edges_w3) 106 | edges["ldsRea"] = mm.reached( 107 | edges, tess, "nID", "nID", spatial_weights=edges_w3, mode="sum", values="sdcAre" 108 | ) 109 | # error below 110 | # Traceback (most recent call last): 111 | # File "03_fix_streets.py", line 99, in 112 | # nodes_w5 = mm.sw_high(k=5, weights=sw) 113 | # File "/home/ubuntu/momepy/utils.py", line 94, in sw_high 114 | # first_order, k=i, silence_warnings=silent 115 | # File "/home/ubuntu/miniconda3/envs/mmp/lib/python3.7/site-packages/libpysal/weights/util.py", line 427, in higher_order 116 | # return higher_order_sp(w, k, **kwargs) 117 | # File "/home/ubuntu/miniconda3/envs/mmp/lib/python3.7/site-packages/libpysal/weights/util.py", line 486, in higher_order_sp 118 | # if np.unique(np.hstack(list(w.weights.values()))) == np.array([1.0]): 119 | # ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all() 120 | sw = libpysal.io.open( 121 | "/Users/martin/Dropbox/Academia/Data/Geo/Prague/Redo/nodes.gal", "r" 122 | ).read() 123 | 124 | nodes_w5 = mm.sw_high(k=5, weights=sw) 125 | # fix for string indices (probably due to loading of GAL) 126 | nodes_w5.neighbors = { 127 | int(k): [int(i) for i in v] for k, v in nodes_w5.neighbors.items() 128 | } 129 | nodes["lddNDe"] = mm.node_density(nodes, edges, nodes_w5) 130 | nodes["linWID"] = mm.node_density( 131 | nodes, edges, nodes_w5, weighted=True, node_degree="degree" 132 | ) 133 | nodes.to_file( 134 | "/Users/martin/Dropbox/Academia/Data/Geo/Prague/Redo/elements.gpkg", 135 | layer="nodes", 136 | driver="GPKG", 137 | ) 138 | edges.to_file( 139 | "/Users/martin/Dropbox/Academia/Data/Geo/Prague/Redo/elements.gpkg", 140 | layer="edges", 141 | driver="GPKG", 142 | ) 143 | # tess.to_file("/Users/martin/Dropbox/Academia/Data/Geo/Prague/Redo/elements.gpkg", layer="tessellation", driver="GPKG") 144 | # blg.to_file("/Users/martin/Dropbox/Academia/Data/Geo/Prague/Redo/elements.gpkg", layer="buildings", driver="GPKG") 145 | 146 | try: 147 | blg.drop(columns=["nodeID"], inplace=True) 148 | except Exception: 149 | print("no nodeID") 150 | try: 151 | tess.drop(columns=["nodeID"], inplace=True) 152 | except Exception: 153 | print("no nodeID") 154 | blg["nodeID"] = mm.get_node_id(blg, nodes, edges, "nodeID", "nID") 155 | tess = tess.merge(blg[["uID", "nodeID"]], on="uID", how="left") 156 | 157 | nodes_w3 = mm.sw_high(k=3, weights=sw) 158 | nodes_w3.neighbors = { 159 | int(k): [int(i) for i in v] for k, v in nodes_w3.neighbors.items() 160 | } 161 | 162 | nodes["lddRea"] = mm.reached(nodes, tess, "nodeID", "nodeID", nodes_w3) 163 | nodes["lddARe"] = mm.reached( 164 | nodes, tess, "nodeID", "nodeID", nodes_w3, mode="sum", values="sdcAre" 165 | ) 166 | 167 | nodes["sddAre"] = mm.reached( 168 | nodes, tess, "nodeID", "nodeID", mode="sum", values="sdcAre" 169 | ) 170 | sw.neighbors = {int(k): [int(i) for i in v] for k, v in sw.neighbors.items()} 171 | 172 | nodes["midRea"] = mm.reached(nodes, tess, "nodeID", "nodeID", spatial_weights=sw) 173 | nodes["midAre"] = mm.reached( 174 | nodes, tess, "nodeID", "nodeID", spatial_weights=sw, mode="sum", values="sdcAre" 175 | ) 176 | 177 | nodes.rename( 178 | columns={ 179 | "degree": "mtdDeg", 180 | "meshedness": "lcdMes", 181 | "local_closeness": "lcnClo", 182 | "proportion_3": "linP3W", 183 | "proportion_4": "linP4W", 184 | "proportion_0": "linPDE", 185 | }, 186 | inplace=True, 187 | ) 188 | 189 | print("saving") 190 | nodes.to_file( 191 | "/Users/martin/Dropbox/Academia/Data/Geo/Prague/Redo/elements.gpkg", 192 | layer="nodes", 193 | driver="GPKG", 194 | ) 195 | edges.to_file( 196 | "/Users/martin/Dropbox/Academia/Data/Geo/Prague/Redo/elements.gpkg", 197 | layer="edges", 198 | driver="GPKG", 199 | ) 200 | tess.to_file( 201 | "/Users/martin/Dropbox/Academia/Data/Geo/Prague/Redo/elements.gpkg", 202 | layer="tessellation", 203 | driver="GPKG", 204 | ) 205 | blg.to_file( 206 | "/Users/martin/Dropbox/Academia/Data/Geo/Prague/Redo/elements.gpkg", 207 | layer="buildings", 208 | driver="GPKG", 209 | ) 210 | -------------------------------------------------------------------------------- /code_production/Prague/03blocks.py: -------------------------------------------------------------------------------- 1 | import geopandas as gpd 2 | import momepy as mm 3 | 4 | tess = gpd.read_file('files/prg.gpkg', layer='queen') 5 | blg = gpd.read_file('files/prg.gpkg', layer='buildings') 6 | 7 | streets = gpd.read_file('files/streets.gpkg', layer='streets') 8 | 9 | streets['nID'] = mm.unique_id(streets) 10 | 11 | blg['nID'] = mm.get_network_id(blg, streets, 'uID', 'nID', 150) 12 | tess = tess.merge(blg[['uID', 'nID']], on='uID', how='left') 13 | 14 | snapped = mm.snap_street_network_edge(streets, blg, 20, tess, 120) 15 | 16 | blocks, blg['bID'], tess['bID'] = mm.blocks(tess, snapped, blg, 'bID', 'uID') 17 | 18 | tess.to_file('files/elements.gpkg', layer='tessellation', driver='GPKG') 19 | blg.to_file('files/elements.gpkg', layer='buildings', driver='GPKG') 20 | blocks.to_file('files/elements.gpkg', layer='blocks', driver='GPKG') 21 | streets.to_file('files/elements.gpkg', layer='streets', driver='GPKG') 22 | -------------------------------------------------------------------------------- /code_production/Prague/04measure1.py: -------------------------------------------------------------------------------- 1 | import momepy as mm 2 | import geopandas as gpd 3 | 4 | tess = gpd.read_file("files/elements.gpkg", layer="tessellation") 5 | blg = gpd.read_file("files/elements.gpkg", layer="buildings") 6 | # blocks = gpd.read_file('files/elements.gpkg', layer='blocks') 7 | streets = gpd.read_file("files/elements.gpkg", layer="streets") 8 | 9 | # blg['sdbAre'] = mm.area(blg) 10 | # blg['sdbVol'] = mm.volume(blg, 'sdbHei', 'sdbAre') 11 | # blg['sdbPer'] = mm.perimeter(blg) 12 | # blg['sdbCoA'] = mm.courtyard_area(blg, 'sdbAre') 13 | # 14 | # blg['ssbFoF'] = mm.form_factor(blg, 'sdbVol', 'sdbAre') 15 | # blg['ssbVFR'] = mm.volume_facade_ratio(blg, 'sdbHei', 'sdbVol', 'sdbPer') 16 | # blg['ssbCCo'] = mm.circular_compactness(blg, 'sdbAre') 17 | # blg['ssbCor'] = mm.corners(blg) 18 | # blg['ssbSqu'] = mm.squareness(blg) 19 | # blg['ssbERI'] = mm.equivalent_rectangular_index(blg, 'sdbAre', 'sdbPer') 20 | # blg['ssbElo'] = mm.elongation(blg) 21 | # blg['ssbCCM'], blg['ssbCCD'] = mm.centroid_corners(blg) 22 | # 23 | # blg['stbOri'] = mm.orientation(blg) 24 | blg["stbSAl"] = mm.street_alignment(blg, streets, "stbOri", "nID", "nID") 25 | 26 | # tess['stcOri'] = mm.orientation(tess) 27 | # blg['stbCeA'] = mm.cell_alignment(blg, tess, 'stbOri', 'stcOri', 'uID', 'uID') 28 | # 29 | # tess['sdcLAL'] = mm.longest_axis_length(tess) 30 | # tess['sdcAre'] = mm.area(tess) 31 | # tess['sscCCo'] = mm.circular_compactness(tess, 'sdcAre') 32 | # tess['sscERI'] = mm.equivalent_rectangular_index(tess, 'sdcAre') 33 | 34 | tess["stcSAl"] = mm.street_alignment(tess, streets, "stcOri", "nID", "nID") 35 | 36 | # tess['sicCAR'] = mm.object_area_ratio(tess, blg, 'sdcAre', 'sdbAre', 'uID') 37 | # fa = mm.floor_area(blg, 'sdbHei', 'sdbAre') 38 | # tess['sicFAR'] = mm.object_area_ratio(tess, blg, 'sdcAre', fa, 'uID') 39 | 40 | streets["sdsLen"] = mm.perimeter(streets) 41 | 42 | profile = mm.street_profile(streets, blg, heights="sdbHei", distance=3) 43 | streets["sdsSPW"] = profile["widths"] 44 | streets["sdsSPH"] = profile["heights"] 45 | streets["sdsSPR"] = profile["profile"] 46 | streets["sdsSPO"] = profile["openness"] 47 | streets["sdsSWD"] = profile["width_deviations"] 48 | streets["sdsSHD"] = profile["heights_deviations"] 49 | 50 | streets["sssLin"] = mm.linearity(streets) 51 | streets["sdsAre"] = mm.reached(streets, tess, "nID", mode="sum", values="sdcAre") 52 | streets["sisBpM"] = mm.elements_count(streets, blg, "nID", "nID", weighted=True) 53 | 54 | tess.to_file("files/elements.gpkg", layer="tessellation", driver="GPKG") 55 | blg.to_file("files/elements.gpkg", layer="buildings", driver="GPKG") 56 | streets.to_file("files/elements.gpkg", layer="streets", driver="GPKG") 57 | -------------------------------------------------------------------------------- /code_production/Prague/04measure2.py: -------------------------------------------------------------------------------- 1 | import momepy as mm 2 | import geopandas as gpd 3 | import libpysal 4 | 5 | tess = gpd.read_file("files/elements.gpkg", layer="tessellation") 6 | blg = gpd.read_file("files/elements.gpkg", layer="buildings") 7 | blocks = gpd.read_file("files/elements.gpkg", layer="blocks") 8 | streets = gpd.read_file("files/elements.gpkg", layer="streets") 9 | 10 | blg["mtbSWR"] = mm.SharedWallsRatio(blg, "uID", "sdbPer").series 11 | 12 | queen_1 = libpysal.weights.contiguity.Queen.from_dataframe(tess, ids="uID") 13 | 14 | blg["mtbAli"] = mm.Alignment(blg, queen_1, "uID", "stbOri").series 15 | blg["mtbNDi"] = mm.NeighbourDistance(blg, queen_1, "uID").series 16 | 17 | tess["mtcWNe"] = mm.Neighbours(tess, queen_1, "uID", weighted=True).series 18 | tess["mdcAre"] = mm.CoveredArea(tess, queen_1, "uID").series 19 | 20 | str_q1 = libpysal.weights.contiguity.Queen.from_dataframe(streets) 21 | 22 | streets["misRea"] = mm.Reached( 23 | streets, tess, "nID", "nID", spatial_weights=str_q1, mode="count" 24 | ).series 25 | streets["mdsAre"] = mm.Reached(streets, tess, "nID", "nID", spatial_weights=str_q1, 26 | mode="sum").series 27 | 28 | blg_q1 = libpysal.weights.contiguity.Queen.from_dataframe(blg) 29 | 30 | blg["libNCo"] = mm.Courtyards(blg, "bID", blg_q1).series 31 | blg["ldbPWL"] = mm.PerimeterWall(blg, blg_q1).series 32 | 33 | blocks["ldkAre"] = mm.Area(blocks).series 34 | blocks["ldkPer"] = mm.Perimeter(blocks).series 35 | blocks["lskCCo"] = mm.CircularCompactness(blocks, "ldkAre").series 36 | blocks["lskERI"] = mm.EquivalentRectangularIndex(blocks, "ldkAre", "ldkPer").series 37 | blocks["lskCWA"] = mm.CompactnessWeightedAxis(blocks, "ldkAre", "ldkPer").series 38 | blocks["ltkOri"] = mm.Orientation(blocks).series 39 | 40 | blo_q1 = libpysal.weights.contiguity.Queen.from_dataframe(blocks, ids="bID") 41 | 42 | blocks["ltkWNB"] = mm.Neighbors(blocks, blo_q1, "bID", weighted=True).series 43 | blocks["likWBB"] = mm.Count(blocks, blg, "bID", "bID", weighted=True).series 44 | 45 | tess.to_file("files/elements.gpkg", layer="tessellation", driver="GPKG") 46 | blg.to_file("files/elements.gpkg", layer="buildings", driver="GPKG") 47 | blocks.to_file("files/elements.gpkg", layer="blocks", driver="GPKG") 48 | streets.to_file("files/elements.gpkg", layer="streets", driver="GPKG") 49 | -------------------------------------------------------------------------------- /code_production/Prague/04measure3.py: -------------------------------------------------------------------------------- 1 | import momepy as mm 2 | import geopandas as gpd 3 | import libpysal 4 | 5 | tess = gpd.read_file('files/elements.gpkg', layer='tessellation') 6 | blg = gpd.read_file('files/elements.gpkg', layer='buildings') 7 | blocks = gpd.read_file('files/elements.gpkg', layer='blocks') 8 | streets = gpd.read_file('files/elements.gpkg', layer='streets') 9 | 10 | 11 | queen1 = mm.sw_high(k=1, gdf=tess, ids='uID') 12 | queen3 = mm.sw_high(k=3, weights=queen1) 13 | blg_queen = mm.sw_high(k=1, gdf=blg, ids='uID') 14 | 15 | 16 | blg['ltbIBD'] = mm.MeanInterbuildingDistance(blg, queen1, 'uID', queen3).series 17 | blg['ltcBuA'] = mm.BuildingAdjacency(blg, queen3, 'uID', blg_queen).series 18 | 19 | # blg['temp_fa'] = mm.floor_area(blg, 'sdbHei', 'sdbAre') 20 | # tess = tess.merge(blg[['temp_fa', 'uID']], on='uID', how='left') 21 | # tess['licGDe'] = mm.density(tess, 'temp_fa', queen3, 'uID', 'sdcAre') 22 | tess['ltcWRB'] = mm.BlocksCount(tess, 'bID', queen3, 'uID').series 23 | 24 | tess.to_file('files/elements.gpkg', layer='tessellation', driver='GPKG') 25 | blg.to_file('files/elements.gpkg', layer='buildings', driver='GPKG') 26 | 27 | fo = libpysal.io.open('files/GRqueen1.gal', 'w') 28 | fo.write(queen1) 29 | fo.close() 30 | 31 | fo = libpysal.io.open('files/GRqueen3.gal', 'w') 32 | fo.write(queen3) 33 | fo.close() 34 | 35 | fo = libpysal.io.open('files/GRblg_queen.gal', 'w') 36 | fo.write(blg_queen) 37 | fo.close() 38 | -------------------------------------------------------------------------------- /code_production/Prague/04measure4.py: -------------------------------------------------------------------------------- 1 | import momepy as mm 2 | import geopandas as gpd 3 | import libpysal 4 | 5 | streets = gpd.read_file("files/elements.gpkg", layer="streets") 6 | 7 | graph = mm.gdf_to_nx(streets) 8 | 9 | print("node degree") 10 | graph = mm.node_degree(graph) 11 | 12 | print("subgraph") 13 | graph = mm.subgraph( 14 | graph, 15 | radius=5, 16 | meshedness=True, 17 | cds_length=False, 18 | mode="sum", 19 | degree="degree", 20 | length="mm_len", 21 | mean_node_degree=False, 22 | proportion={0: True, 3: True, 4: True}, 23 | cyclomatic=False, 24 | edge_node_ratio=False, 25 | gamma=False, 26 | local_closeness=True, 27 | closeness_distance="mm_len", 28 | ) 29 | print("cds length") 30 | graph = mm.cds_length(graph, radius=3, name="ldsCDL") 31 | 32 | print("eigenvector") 33 | try: 34 | graph = mm.eigenvector(graph, name="xcnEiC", max_iter=500) 35 | except Exception: 36 | graph = mm.eigenvector(graph, name="xcnEiC", max_iter=1000) 37 | 38 | print("clustering") 39 | graph = mm.clustering(graph, name="xcnSCl") 40 | 41 | print("mean_node_dist") 42 | graph = mm.mean_node_dist(graph, name="mtdMDi") 43 | 44 | 45 | nodes, edges, sw = mm.nx_to_gdf(graph, spatial_weights=True) 46 | 47 | print("saving") 48 | nodes.to_file("files/elements.gpkg", layer="nodes", driver="GPKG") 49 | edges.to_file("files/elements.gpkg", layer="edges", driver="GPKG") 50 | 51 | fo = libpysal.io.open("files/GRnodes.gal", "w") 52 | fo.write(sw) 53 | fo.close() 54 | 55 | edges_w3 = mm.sw_high(k=3, gdf=edges) 56 | edges["ldsMSL"] = mm.SegmentsLength(edges, spatial_weights=edges_w3, mean=True).series 57 | 58 | tess = gpd.read_file("files/elements.gpkg", layer="tessellation") 59 | 60 | edges["ldsRea"] = mm.Reached(edges, tess, "nID", "nID", spatial_weights=edges_w3).series 61 | edges["ldsRea"] = mm.Reached( 62 | edges, tess, "nID", "nID", spatial_weights=edges_w3, mode="sum", values="sdcAre" 63 | ).series 64 | 65 | nodes_w5 = mm.sw_high(k=5, weights=sw) 66 | nodes["lddNDe"] = mm.NodeDensity(nodes, edges, nodes_w5).series 67 | nodes["linWID"] = mm.NodeDensity( 68 | nodes, edges, nodes_w5, weighted=True, node_degree="degree" 69 | ).series 70 | 71 | blg = gpd.read_file("files/elements.gpkg", layer="buildings") 72 | 73 | blg["nodeID"] = mm.get_node_id(blg, nodes, edges, "nodeID", "nID") 74 | tess = tess.merge(blg[["uID", "nodeID"]], on="uID", how="left") 75 | 76 | nodes_w3 = mm.sw_high(k=3, weights=sw) 77 | 78 | nodes["lddRea"] = mm.Reached(nodes, tess, "nodeID", "nodeID", nodes_w3).series 79 | nodes["lddARe"] = mm.Reached( 80 | nodes, tess, "nodeID", "nodeID", nodes_w3, mode="sum", values="sdcAre" 81 | ).series 82 | 83 | nodes["sddAre"] = mm.Reached( 84 | nodes, tess, "nodeID", "nodeID", mode="sum", values="sdcAre" 85 | ).series 86 | nodes["midRea"] = mm.Reached(nodes, tess, "nodeID", "nodeID", spatial_weights=sw).series 87 | nodes["midAre"] = mm.Reached( 88 | nodes, tess, "nodeID", "nodeID", spatial_weights=sw, mode="sum", values="sdcAre" 89 | ).series 90 | 91 | nodes.rename( 92 | columns={ 93 | "degree": "mtdDeg", 94 | "meshedness": "lcdMes", 95 | "local_closeness": "lcnClo", 96 | "proportion_3": "linP3W", 97 | "proportion_4": "linP4W", 98 | "proportion_0": "linPDE", 99 | } 100 | ) 101 | 102 | print("saving") 103 | nodes.to_file("files/elements.gpkg", layer="nodes", driver="GPKG") 104 | edges.to_file("files/elements.gpkg", layer="edges", driver="GPKG") 105 | tess.to_file("files/elements.gpkg", layer="tessellation", driver="GPKG") 106 | blg.to_file("files/elements.gpkg", layer="buildings", driver="GPKG") 107 | # rename meshedness and closeness 108 | -------------------------------------------------------------------------------- /files/sample.gpkg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martinfleis/numerical-taxonomy-paper/518497e2cad8f6379f02fbe2624144a6f42aa1e3/files/sample.gpkg -------------------------------------------------------------------------------- /leaflet_maps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martinfleis/numerical-taxonomy-paper/518497e2cad8f6379f02fbe2624144a6f42aa1e3/leaflet_maps.png --------------------------------------------------------------------------------