├── .gitattributes ├── .github ├── release.yml └── workflows │ ├── build_docs.yml │ ├── mypy.yml │ ├── release_to_pypi.yml │ └── testing.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── ci ├── py311_latest.yaml ├── py311_oldest.yaml ├── py312_latest.yaml ├── py313_dev.yaml └── py313_latest.yaml ├── codecov.yml ├── data ├── README.md ├── aleppo_1133 │ ├── original.parquet │ └── simplified.parquet ├── auckland_869 │ ├── original.parquet │ └── simplified.parquet ├── bucaramanga_4617 │ ├── original.parquet │ └── simplified.parquet ├── douala_809 │ ├── original.parquet │ └── simplified.parquet ├── generate_simplified.py ├── liege_1656 │ ├── original.parquet │ └── simplified.parquet ├── slc_4881 │ ├── original.parquet │ └── simplified.parquet └── wuhan_8989 │ ├── original.parquet │ └── simplified.parquet ├── docs ├── Makefile ├── make.bat └── source │ ├── _static │ ├── custom.css │ ├── icon.png │ ├── logo.svg │ └── references.bib │ ├── api.rst │ ├── conf.py │ ├── index.md │ ├── intro.ipynb │ ├── references.rst │ └── simple_preprocessing.ipynb ├── environment.yml ├── neatnet ├── __init__.py ├── artifacts.py ├── continuity.py ├── gaps.py ├── geometry.py ├── nodes.py ├── simplify.py └── tests │ ├── conftest.py │ ├── data │ ├── apalachicola_original.parquet │ ├── apalachicola_simplified_exclusion_mask.parquet │ └── apalachicola_simplified_standard.parquet │ ├── test_artifacts.py │ ├── test_continuity.py │ ├── test_gaps.py │ ├── test_geometry.py │ ├── test_nodes.py │ └── test_simplify.py ├── pixi.lock └── pyproject.toml /.gitattributes: -------------------------------------------------------------------------------- 1 | # GitHub syntax highlighting 2 | pixi.lock linguist-language=YAML linguist-generated=true 3 | -------------------------------------------------------------------------------- /.github/release.yml: -------------------------------------------------------------------------------- 1 | --- 2 | changelog: 3 | exclude: 4 | labels: 5 | - ignore-for-release 6 | authors: 7 | - dependabot 8 | - pre-commit-ci 9 | categories: 10 | - title: API Changes 11 | labels: 12 | - api 13 | - title: Enhancements 14 | labels: 15 | - enhancement 16 | - title: Bug Fixes 17 | labels: 18 | - bug 19 | - title: Deprecations 20 | labels: 21 | - deprecation 22 | - title: Documentation 23 | labels: 24 | - documentation 25 | - title: Other Changes 26 | labels: 27 | - "*" 28 | -------------------------------------------------------------------------------- /.github/workflows/build_docs.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Build Docs 3 | 4 | on: 5 | push: 6 | tags: 7 | - 'v*' 8 | workflow_dispatch: 9 | inputs: 10 | version: 11 | description: Manual Doc Build 12 | default: run-doc-build 13 | required: false 14 | jobs: 15 | docs: 16 | name: build & push docs 17 | runs-on: ${{ matrix.os }} 18 | timeout-minutes: 90 19 | strategy: 20 | matrix: 21 | os: ['ubuntu-latest'] 22 | environment-file: [ci/py313_latest.yaml] 23 | experimental: [false] 24 | defaults: 25 | run: 26 | shell: bash -l {0} 27 | 28 | steps: 29 | - name: checkout repo 30 | uses: actions/checkout@v4 31 | with: 32 | fetch-depth: 0 33 | 34 | - name: setup micromamba 35 | uses: mamba-org/setup-micromamba@v1 36 | with: 37 | environment-file: ${{ matrix.environment-file }} 38 | micromamba-version: 'latest' 39 | 40 | - name: install package 41 | run: pip install . 42 | 43 | - name: make docs 44 | run: cd docs; make html 45 | 46 | - name: commit docs 47 | run: | 48 | git clone \ 49 | https://github.com/ammaraskar/sphinx-action-test.git \ 50 | --branch gh-pages \ 51 | --single-branch gh-pages 52 | cp -r docs/build/html/* gh-pages/ 53 | cd gh-pages 54 | git config --local user.email "action@github.com" 55 | git config --local user.name "GitHub Action" 56 | git add . 57 | git commit -m "Update documentation" -a || true 58 | # The above command will fail if no changes were present, 59 | # so we ignore the return code. 60 | 61 | - name: push to gh-pages 62 | uses: ad-m/github-push-action@master 63 | with: 64 | branch: gh-pages 65 | directory: gh-pages 66 | github_token: ${{ secrets.GITHUB_TOKEN }} 67 | force: true 68 | -------------------------------------------------------------------------------- /.github/workflows/mypy.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: MyPy Type Checking 3 | 4 | on: 5 | push: 6 | branches: [main] 7 | pull_request: 8 | branches: 9 | - "*" 10 | schedule: 11 | - cron: "59 23 * * *" 12 | workflow_dispatch: 13 | inputs: 14 | version: 15 | description: Manual Type Checking 16 | default: type_checking 17 | required: false 18 | 19 | jobs: 20 | mypy: 21 | runs-on: ubuntu-latest 22 | defaults: 23 | run: 24 | shell: bash -l {0} 25 | 26 | steps: 27 | - uses: actions/checkout@v4 28 | 29 | - name: setup micromamba 30 | uses: mamba-org/setup-micromamba@v2 31 | with: 32 | environment-file: ci/py313_latest.yaml 33 | create-args: >- 34 | mypy 35 | 36 | - name: Install package 37 | run: pip install . 38 | 39 | - name: Check package 40 | run: | 41 | mypy neatnet/ --ignore-missing-imports --install-types --non-interactive 42 | -------------------------------------------------------------------------------- /.github/workflows/release_to_pypi.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Publish neatnet to PyPI / GitHub 3 | 4 | on: 5 | push: 6 | tags: 7 | - "v*" 8 | 9 | jobs: 10 | build-n-publish: 11 | name: Build and publish neatnet to PyPI 12 | runs-on: ubuntu-latest 13 | permissions: 14 | id-token: write # MANDATORY for trusted publishing to PyPI 15 | contents: write # MANDATORY for the Github release action 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | with: 20 | fetch-depth: 0 # Fetch all history for all branches and tags. 21 | 22 | - name: Set up Python 23 | uses: actions/setup-python@v5 24 | with: 25 | python-version: "3.x" 26 | 27 | - name: Build source and wheel distributions 28 | run: | 29 | python -m pip install --upgrade pip 30 | python -m pip install --upgrade build twine 31 | python -m build 32 | twine check --strict dist/* 33 | 34 | - name: Create Release Notes 35 | uses: actions/github-script@v7 36 | with: 37 | github-token: ${{secrets.GITHUB_TOKEN}} 38 | script: | 39 | await github.request(`POST /repos/${{ github.repository }}/releases`, { 40 | tag_name: "${{ github.ref }}", 41 | generate_release_notes: true 42 | }); 43 | 44 | - name: Publish distribution to PyPI 45 | uses: pypa/gh-action-pypi-publish@release/v1 46 | -------------------------------------------------------------------------------- /.github/workflows/testing.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Continuous Integration 3 | 4 | on: 5 | push: 6 | branches: [main] 7 | pull_request: 8 | branches: 9 | - "*" 10 | schedule: 11 | - cron: "59 23 * * *" 12 | workflow_dispatch: 13 | inputs: 14 | version: 15 | description: Manual CI Run 16 | default: test 17 | required: false 18 | 19 | jobs: 20 | tests: 21 | name: ${{ matrix.os }}, ${{ matrix.environment-file }} 22 | runs-on: ${{ matrix.os }} 23 | timeout-minutes: 60 24 | strategy: 25 | matrix: 26 | os: [ubuntu-latest] 27 | environment-file: [ 28 | py311_oldest, 29 | py311_latest, 30 | py312_latest, 31 | py313_latest, 32 | py313_dev, 33 | ] 34 | include: 35 | - environment-file: py313_latest 36 | os: macos-13 # Intel 37 | - environment-file: py313_latest 38 | os: macos-latest # Apple Silicon 39 | - environment-file: py313_latest 40 | os: windows-latest 41 | fail-fast: false 42 | 43 | defaults: 44 | run: 45 | shell: bash -l {0} 46 | 47 | steps: 48 | - name: checkout repo 49 | uses: actions/checkout@v4 50 | with: 51 | fetch-depth: 0 # Fetch all history for all branches and tags. 52 | 53 | - name: setup micromamba 54 | uses: mamba-org/setup-micromamba@v2 55 | with: 56 | environment-file: ci/${{ matrix.environment-file }}.yaml 57 | micromamba-version: "latest" 58 | 59 | - name: install package 60 | run: "pip install -e . --no-deps" 61 | 62 | - name: spatial versions 63 | run: 'python -c "import geopandas; geopandas.show_versions();"' 64 | 65 | - name: run tests 66 | run: | 67 | pytest \ 68 | neatnet/ \ 69 | --verbose \ 70 | -r a \ 71 | --numprocesses logical \ 72 | --color yes \ 73 | --cov neatnet \ 74 | --cov-append \ 75 | --cov-report term-missing \ 76 | --cov-report xml . \ 77 | --env_type ${{ matrix.environment-file }} \ 78 | -m "not wuhan" 79 | 80 | - name: run tests (Wuhan) 81 | run: | 82 | pytest \ 83 | neatnet/ \ 84 | --verbose \ 85 | -r a \ 86 | --numprocesses logical \ 87 | --color yes \ 88 | --cov neatnet \ 89 | --cov-append \ 90 | --cov-report term-missing \ 91 | --cov-report xml . \ 92 | --env_type ${{ matrix.environment-file }} \ 93 | -m wuhan 94 | if: ${{ github.event_name != 'pull_request' }} 95 | 96 | - name: zip artifacts - Ubuntu & macOS 97 | run: zip ci_artifacts.zip ci_artifacts -r 98 | if: matrix.os != 'windows-latest' && (success() || failure()) 99 | 100 | - name: zip artifacts - Windows 101 | shell: powershell 102 | run: Compress-Archive -Path ci_artifacts -Destination ci_artifacts.zip 103 | if: matrix.os == 'windows-latest' && (success() || failure()) 104 | 105 | - name: archive observed simplified networks 106 | uses: actions/upload-artifact@v4 107 | with: 108 | name: ci_artifacts-${{ matrix.os }}-${{ matrix.environment-file }} 109 | path: ci_artifacts.zip 110 | if: success() || failure() 111 | 112 | - name: codecov 113 | uses: codecov/codecov-action@v4 114 | with: 115 | token: ${{ secrets.CODECOV_TOKEN }} 116 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | 164 | cache/ 165 | 166 | # macOS stuff 167 | *.DS_Store 168 | # pixi environments 169 | .pixi 170 | *.egg-info 171 | 172 | ci_artifacts/ 173 | docs/source/generated -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | repos: 3 | - repo: https://github.com/astral-sh/ruff-pre-commit 4 | rev: "v0.11.8" 5 | hooks: 6 | - id: ruff 7 | files: "neatnet\/|docs\/source\/" 8 | - id: ruff-format 9 | files: "neatnet\/|docs\/source\/" 10 | - repo: https://github.com/adrienverge/yamllint 11 | rev: "v1.37.0" 12 | hooks: 13 | - id: yamllint 14 | files: "." 15 | args: [ 16 | -d, 17 | "{extends: default, ignore: .pixi/*, rules: {line-length: {max: 90}}}", 18 | ".", 19 | ] 20 | - repo: https://github.com/pre-commit/pre-commit-hooks 21 | rev: "v5.0.0" 22 | hooks: 23 | - id: check-toml 24 | files: "." 25 | 26 | ci: 27 | autofix_prs: false 28 | autoupdate_schedule: quarterly 29 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to `neatnet` 2 | 3 | First off, thanks for taking the time to contribute! ❤️ 4 | 5 | All types of contributions are encouraged and valued. See this page for different ways to help and details about how this project handles them. Please make sure to read the relevant section before making your contribution. It will make it a lot easier for us maintainers and smooth out the experience for all involved. The community looks forward to your contributions. 🎉 6 | 7 | > And if you like the project, but just don't have time to contribute, that's fine. There are other easy ways to support the project and show your appreciation, which we would also be very happy about: 8 | > - Star the project 9 | > - Tweet about it 10 | > - Refer this project in your project's `README` 11 | > - Mention the project at local meetups and tell your friends/colleagues 12 | 13 | ## I Have a Question 14 | 15 | > If you want to ask a question, we assume that you have read the available [Documentation](https://uscuni.org/neatnet/). 16 | 17 | Before you ask a question, it is best to search for existing [Issues](https://github.com/uscuni/neatnet/issues) that might help you. In case you have found a suitable issue and still need clarification, you can write your question in this issue. It is also advisable to search the internet for answers first, especially [Stack Overflow](https://stackoverflow.com). 18 | 19 | If you then still feel the need to ask a question and need clarification, we recommend the following: 20 | 21 | - Open an [Issue](https://github.com/uscuni/neatnet/issues/new). 22 | - Provide as much context as you can about what you're running into. 23 | - Provide project and platform versions (`python`, `shapely`, `geopandas`, etc.), depending on what seems relevant. 24 | 25 | We will then take care of the issue as soon as possible. 26 | 27 | ## I Want To Contribute 28 | 29 | ### Reporting Bugs 30 | 31 | #### Before Submitting a Bug Report 32 | 33 | A good bug report shouldn't leave others needing to chase you up for more information. Therefore, we ask you to investigate carefully, collect information and describe the issue in detail in your report. Please complete the following steps in advance to help us fix any potential bug as fast as possible. 34 | 35 | - Make sure that you are using the latest version. 36 | - Determine if your bug is really a bug and not an error on your side, e.g. using incompatible environment components/versions (Make sure that you have read the [documentation](https://uscuni.org/neatnet/).). 37 | - To see if other users have experienced (and potentially already solved) the same issue you are having, check if there is not already a bug report existing for your bug or error in the [bug tracker](https://github.com/uscuni/neatnet/issues). 38 | - Also make sure to search the internet (especially [Stack Overflow](https://stackoverflow.com)) to see if users outside of the GitHub community have discussed the issue. 39 | - Collect information about the bug: 40 | - Stack trace (Traceback) 41 | - OS, Platform and Version (Windows, Linux, macOS, x86, ARM) 42 | - Version of Python and relevant dependencies. 43 | - Possibly your input and the output 44 | - Can you reliably reproduce the issue? And can you also reproduce it with older versions? 45 | 46 | #### How Do I Submit a Good Bug Report? 47 | 48 | We use GitHub issues to track bugs and errors. If you run into an issue with the project: 49 | 50 | - Open an [Issue](https://github.com/uscuni/neatnet/issues/new). (Since we can't be sure at this point whether it is a bug or not, we ask you not to talk about a bug yet and not to label the issue.) 51 | - Explain the behavior you would expect and the actual behavior. 52 | - Please provide as much context as possible and describe the *reproduction steps* that someone else can follow to recreate the issue on their own. This usually includes your code. For good bug reports you should isolate the problem and create a reduced test case. This is known as a [mininum reproducible example](https://en.wikipedia.org/wiki/Minimal_reproducible_example#:~:text=In%20computing%2C%20a%20minimal%20reproducible,to%20be%20demonstrated%20and%20reproduced.) – or MRE for short. 53 | - Provide the information you collected in the previous section. 54 | 55 | Once it's filed: 56 | 57 | - The project team will label the issue accordingly. 58 | - A team member will try to reproduce the issue with your provided steps. If there are no reproduction steps or no obvious way to reproduce the issue, the team will ask you for those steps. 59 | - If the team is able to reproduce the issue, it will be left to be implemented by someone. 60 | 61 | ### Suggesting Enhancements 62 | 63 | This section guides you through submitting an enhancement suggestion for `neatnet`, **including completely new features and minor improvements to existing functionality**. Following these guidelines will help maintainers and the community to understand your suggestion and find related suggestions. 64 | 65 | #### Before Submitting an Enhancement 66 | 67 | - Make sure that you are using the latest version. 68 | - Read the [documentation](https://uscuni.org/neatnet/) carefully and find out if the functionality is already covered, maybe by an individual configuration. 69 | - Perform a [search](https://github.com/uscuni/neatnet/issues) to see if the enhancement has already been suggested. If it has, add a comment to the existing issue instead of opening a new one. 70 | - Find out whether your idea fits with the scope and aims of the project. It's up to you to make a strong case to convince the project's developers of the merits of this feature. Keep in mind that we want features that will be useful to the majority of our users and not just a small subset. If you're just targeting a minority of users, consider writing an add-on/plugin library. 71 | 72 | #### How Do I Submit a Good Enhancement Suggestion? 73 | 74 | Enhancement suggestions are tracked as [GitHub issues](https://github.com/uscuni/neatnet/issues). 75 | 76 | - Use a **clear and descriptive title** for the issue to identify the suggestion. 77 | - Provide a **step-by-step description of the suggested enhancement** in as many details as possible. 78 | - **Describe the current behavior** and **explain which behavior you expected to see instead** and why. At this point you can also tell which alternatives do not work for you. 79 | - **Explain why this enhancement would be useful** to most neatnet's users. You may also want to point out the other projects that solved it better and which could serve as inspiration. 80 | 81 | ### Code Contribution 82 | 83 | You can create a development environment using [Pixi](https://pixi.sh): 84 | 85 | ```sh 86 | pixi install -e tests 87 | ``` 88 | 89 | Or with conda using the `environment.yml` file: 90 | 91 | ```sh 92 | conda env create -f environment.yml 93 | ``` 94 | 95 | To install `neatnet` to the environment in an editable form, clone the repository, navigate to the main directory and install it with pip: 96 | 97 | ```sh 98 | pip install -e . 99 | ``` 100 | 101 | When submitting a pull request: 102 | 103 | - All existing tests should pass. Please make sure that the test suite passes, both locally and on GitHub Actions. Status on GHA will be visible on a pull request. GHA are automatically enabled on your own fork as well. To trigger a check, make a PR to your own fork. 104 | - Ensure that documentation has built correctly. It will be automatically built for each PR. 105 | - New functionality ***must*** include tests. Please write reasonable tests for your code and make sure that they pass on your pull request. 106 | - Classes, methods, functions, etc. should have docstrings. The first line of a docstring should be a standalone summary. Parameters and return values should be documented explicitly. 107 | - Follow PEP 8 when possible. We use ``Ruff`` for linting and formatting to ensure robustness & consistency in code throughout the project. It included in the ``pre-commit`` hook and will be checked on every PR. 108 | - `neatnet` supports Python 3.11+ only. When possible, do not introduce additional dependencies. If that is necessary, make sure they can be treated as optional. 109 | 110 | #### Procedure 111 | 112 | 1. *After* opening an issue and discussing with the development team, create a PR with the proposed changes. 113 | 2. If [testing fails](https://github.com/uscuni/neatnet/actions/runs/11368511561) due to an update in the code base: 114 | 3. Observed data is [saved as artifacts](https://github.com/uscuni/neatnet/actions/runs/11368511561#artifacts) from the workflow and can be download locally. 115 | 4. We determine the `ci_artifacts-ubuntu-latest-py313_latest` data as the "truth." 116 | 5. After comparison of the current "known" data with new data from (3.), if new data is "truthier," update your PR with the new "known" data. 117 | 118 | #### Handling Edge Cases in Testing 119 | 120 | Edge cases will crop up in full-scale FUA testing that we can ignore (following a thorough investigation – e.g. [`neatnet#77`](https://github.com/uscuni/neatnet/issues/77)) during testing. Once it is determined the geometry in question is not caused by a bug on our end, it can be added to the `KNOWN_BAD_GEOMS` collection in `tests/conftest.py`. This collection is a dictionary keyed by `_CODE` of the city/FUA where the values are lists of index locations of simplified edges that can be ignored if they fail equality testing. As an example, see our initial "bad" geometries [here](https://github.com/uscuni/neatnet/blob/1be6b44b1a06d52453ecbaee205ae649101c4ea4/neatnet/tests/conftest.py#L25-L39), which were due to a variant number of coordinates in those resultant simplified edges created by [different versions of `shapely`](https://github.com/uscuni/neatnet/pull/67#issuecomment-2457333724). 121 | 122 | ##### Code Structure 123 | 124 | Code should be linted and formatted via `ruff`. With the [`.pre-commit` hooks](https://github.com/uscuni/neatnet/blob/main/.pre-commit-config.yaml) we have code in commits will be formatted and linted automatically once [`pre-commit` is installed](https://pre-commit.com/#installation). 125 | 126 | ## Attribution 127 | 128 | This guide is based on the **contributing-gen**. [Make your own](https://github.com/bttger/contributing-gen)! 129 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2024, Research Team on Urban Structure 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # `neatnet`: Street Geometry Processing Toolkit 2 | 3 | [![Continuous Integration](https://github.com/uscuni/neatnet/actions/workflows/testing.yml/badge.svg)](https://github.com/uscuni/neatnet/actions/workflows/testing.yml) [![codecov](https://codecov.io/gh/uscuni/neatnet/graph/badge.svg?token=GFISMU0WPS)](https://codecov.io/gh/uscuni/neatnet) 4 | 5 | ## Introduction 6 | 7 | `neatnet` offers a set of tools pre-processing of street network geometry aimed at its simplification. This typically means removal of dual carrieageways, roundabouts and similar transportation-focused geometries and their replacement with a new geometry representing the street space via its centerline. The resulting geometry shall be closer to a morphological representation of space than the original source, that is typically drawn with transportation in mind (e.g. OpenStreetMap). 8 | 9 | ## Examples 10 | 11 | ```py 12 | import neatnet 13 | 14 | simplified = neatnet.neatify(gdf) 15 | ``` 16 | 17 | ## Contribution 18 | 19 | While we consider the API stable, the project is young and may be evolving fast. All contributions are very welcome, see our guidelines in [`CONTRIBUTING.md`](https://github.com/uscuni/neatnet/blob/main/CONTRIBUTING.md). 20 | 21 | ## Recommended Citations 22 | 23 | The package is a result of a scientific collaboration between [The Research Team on Urban Structure](https://uscuni.org) of Charles University (USCUNI), [NEtwoRks, Data, and Society](https://nerds.itu.dk) research group of IT University Copenhagen (NERDS) and [Oak Ridge National Laboratory](https://www.ornl.gov/gshsd). 24 | 25 | If you use `neatnet` for a research purpose, please consider citing the original paper introducing it. 26 | 27 | ### Canonical Citation (primary) 28 | 29 | *forthcoming* 30 | 31 | ### Repository Citation (secondary) 32 | 33 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.14765801.svg)](https://doi.org/10.5281/zenodo.14765801) 34 | 35 | * **Fleischmann, M., Vybornova, A., & Gaboardi, J.D.** (2025). `uscuni/neatnet`. Zenodo. https://doi.org/10.5281/zenodo.14765801 36 | 37 | ## Funding 38 | 39 | The development has been supported by the Charles University’s Primus program through the project "Influence of Socioeconomic and Cultural Factors on Urban Structure in Central Europe", project reference `PRIMUS/24/SCI/023`. 40 | 41 | --------------------------------------- 42 | 43 | This package developed & and maintained by: 44 | * [Martin Fleischmann](https://github.com/martinfleis) 45 | * [Anastassia Vybornova](https://github.com/anastassiavybornova) 46 | * [James D. Gaboardi](https://github.com/jGaboardi) 47 | 48 | 49 | Copyright (c) 2024-, neatnet Developers 50 | -------------------------------------------------------------------------------- /ci/py311_latest.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: py311_neatnet-latest 3 | channels: 4 | - conda-forge 5 | dependencies: 6 | - python=3.11 7 | - esda 8 | - geopandas 9 | - libpysal 10 | - momepy 11 | - networkx 12 | - numpy 13 | - osmnx 14 | - pandas 15 | - pyarrow 16 | - pyogrio 17 | - scipy 18 | - shapely 19 | # testing 20 | - matplotlib 21 | - pre-commit 22 | - pytest 23 | - pytest-cov 24 | - pytest-xdist 25 | -------------------------------------------------------------------------------- /ci/py311_oldest.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: py311_neatnet-oldest 3 | channels: 4 | - conda-forge 5 | dependencies: 6 | - python=3.11 7 | - esda=2.6.0 8 | - geopandas=1.0.1 9 | - libpysal=4.12.1 10 | - momepy=0.9.0 11 | - networkx=3.3 12 | - numpy=2.0.0 13 | - pandas=2.2.3 14 | - scipy=1.14.1 15 | - shapely=2.0.6 16 | # testing 17 | - matplotlib 18 | - pre-commit 19 | - pyarrow=17.0 20 | - pytest 21 | - pytest-cov 22 | - pytest-xdist 23 | -------------------------------------------------------------------------------- /ci/py312_latest.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: py312_neatnet-latest 3 | channels: 4 | - conda-forge 5 | dependencies: 6 | - python=3.12 7 | - esda 8 | - geopandas 9 | - libpysal 10 | - momepy 11 | - networkx 12 | - numpy 13 | - osmnx 14 | - pandas 15 | - pyarrow 16 | - pyogrio 17 | - scipy 18 | - shapely 19 | # testing 20 | - matplotlib 21 | - pre-commit 22 | - pytest 23 | - pytest-cov 24 | - pytest-xdist 25 | -------------------------------------------------------------------------------- /ci/py313_dev.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: py313_neatnet-dev 3 | channels: 4 | - conda-forge 5 | dependencies: 6 | - python=3.13 7 | - git 8 | - pip 9 | # testing 10 | - pre-commit 11 | - pytest 12 | - pytest-cov 13 | - pytest-xdist 14 | - pip: 15 | # dev versions of packages 16 | - --pre \ 17 | --index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \ 18 | --extra-index-url https://pypi.org/simple 19 | - matplotlib 20 | - networkx 21 | - numpy 22 | - pandas 23 | - pyarrow 24 | - pyogrio 25 | - scipy 26 | - shapely 27 | - git+https://github.com/pysal/esda.git 28 | - git+https://github.com/geopandas/geopandas.git 29 | - git+https://github.com/pysal/libpysal.git 30 | - git+https://github.com/pysal/momepy.git 31 | -------------------------------------------------------------------------------- /ci/py313_latest.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: py313_neatnet-latest 3 | channels: 4 | - conda-forge 5 | dependencies: 6 | - python=3.13 7 | - esda 8 | - geopandas 9 | - libpysal 10 | - momepy 11 | - networkx 12 | - numpy 13 | - osmnx 14 | - pandas 15 | - pyarrow 16 | - pyogrio 17 | - scipy 18 | - shapely 19 | # testing 20 | - matplotlib 21 | - pre-commit 22 | - pytest 23 | - pytest-cov 24 | - pytest-xdist 25 | # docs 26 | - ipykernel 27 | - myst-nb 28 | - numpydoc 29 | - sphinx 30 | - sphinxcontrib-bibtex 31 | - sphinx-autosummary-accessors 32 | - sphinx-copybutton 33 | - sphinx-book-theme 34 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | --- 2 | codecov: 3 | notify: 4 | after_n_builds: 6 5 | coverage: 6 | range: 50..95 7 | round: nearest 8 | precision: 1 9 | status: 10 | project: 11 | default: 12 | threshold: 2% 13 | patch: 14 | default: 15 | threshold: 2% 16 | target: 80% 17 | ignore: 18 | - "tests/*" 19 | comment: 20 | layout: "reach, diff, files" 21 | behavior: once 22 | after_n_builds: 6 23 | require_changes: true 24 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | # FUA testing data 2 | 3 | ## Contents 4 | 5 | * This `README.md` 6 | * `generate_simplified.py` – see [neatnet#7](https://github.com/uscuni/neatnet/issues/7) 7 | * Data 8 | * There is a directory for each FUA listed below that contains 2 files: 9 | * `original.parquet`: The original input street network derived from [OSM](https://www.openstreetmap.org/about) via [OSMNX](https://osmnx.readthedocs.io/en/stable/). 10 | * `simplified.parquet`: The simplified street network following our algorithm with *default parameters*. 11 | 12 | ## FUA Information 13 | 14 | | FUA | City | Shortand | 15 | | --- | --- | --- | 16 | | 1133 | Aleppo, Syria, Middle East / Asia | `aleppo_1133` | 17 | | 869 | Auckland, New Zealand, Oceania / Asia | `auckland_869` | 18 | | 809 | Douala, Cameroon, Africa | `douala_809` | 19 | | 1656 | Liège, Belgium, Europe | `liege_1656` | 20 | | 4617 | Bucaramanga, Colombia, S. America | `bucaramanga_4617` | 21 | | 4881 | Salt Lake City, Utah, USA, N. America | `slc_4881` | 22 | 23 | --------------------------------------- 24 | 25 | Copyright (c) 2024-, neatnet Developers 26 | -------------------------------------------------------------------------------- /data/aleppo_1133/original.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/aleppo_1133/original.parquet -------------------------------------------------------------------------------- /data/aleppo_1133/simplified.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/aleppo_1133/simplified.parquet -------------------------------------------------------------------------------- /data/auckland_869/original.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/auckland_869/original.parquet -------------------------------------------------------------------------------- /data/auckland_869/simplified.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/auckland_869/simplified.parquet -------------------------------------------------------------------------------- /data/bucaramanga_4617/original.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/bucaramanga_4617/original.parquet -------------------------------------------------------------------------------- /data/bucaramanga_4617/simplified.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/bucaramanga_4617/simplified.parquet -------------------------------------------------------------------------------- /data/douala_809/original.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/douala_809/original.parquet -------------------------------------------------------------------------------- /data/douala_809/simplified.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/douala_809/simplified.parquet -------------------------------------------------------------------------------- /data/generate_simplified.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import pathlib 3 | import time 4 | 5 | import geopandas 6 | 7 | import neatnet 8 | 9 | start_time = time.time() 10 | 11 | logging.basicConfig( 12 | filename="simplified_generation.log", 13 | filemode="a", 14 | format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s", 15 | datefmt="%H:%M:%S", 16 | level=logging.NOTSET, 17 | ) 18 | logger = logging.getLogger(__name__) 19 | 20 | logging.info("") 21 | logging.info("") 22 | logging.info(" |‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾|") 23 | logging.info(" | Generating Simplified Street Networks |") 24 | logging.info(" |_______________________________________|") 25 | logging.info("") 26 | logging.info("") 27 | logging.info("") 28 | 29 | fua_city = { 30 | # 1133: "aleppo", 31 | # 869: "auckland", 32 | # 4617: "bucaramanga", 33 | # 809: "douala", 34 | # 1656: "liege", 35 | # 4881: "slc", 36 | 8989: "wuhan", 37 | } 38 | 39 | # dict of cityname: fua ID 40 | city_fua = {c: f for f, c in fua_city.items()} 41 | 42 | for city, fua in city_fua.items(): 43 | t1 = time.time() 44 | aoi = f"{city}_{fua}" 45 | 46 | logging.info("") 47 | logging.info("") 48 | logging.info("") 49 | logging.info("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >>>>") 50 | logging.info("") 51 | logging.info("") 52 | logging.info(f" ** {aoi} **") 53 | logging.info("") 54 | logging.info("") 55 | 56 | # input data 57 | original = geopandas.read_parquet(pathlib.Path(aoi, "original.parquet")) 58 | 59 | # output data 60 | simplified = neatnet.neatify(original) 61 | simplified.to_parquet(pathlib.Path(aoi, "simplified.parquet")) 62 | 63 | t2 = round((time.time() - t1) / 60.0, 2) 64 | 65 | logging.info("") 66 | logging.info("") 67 | logging.info(f"\t{aoi} runtime: {t2} minutes") 68 | logging.info("") 69 | logging.info("") 70 | logging.info("") 71 | logging.info("<<<< ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 72 | logging.info("") 73 | 74 | endtime_time = round((time.time() - start_time) / 60.0, 2) 75 | 76 | logging.info("") 77 | logging.info("") 78 | logging.info(f"Total runtime: {endtime_time} minutes") 79 | logging.info( 80 | "=========================================================================" 81 | ) 82 | logging.info("") 83 | logging.info("") 84 | -------------------------------------------------------------------------------- /data/liege_1656/original.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/liege_1656/original.parquet -------------------------------------------------------------------------------- /data/liege_1656/simplified.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/liege_1656/simplified.parquet -------------------------------------------------------------------------------- /data/slc_4881/original.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/slc_4881/original.parquet -------------------------------------------------------------------------------- /data/slc_4881/simplified.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/slc_4881/simplified.parquet -------------------------------------------------------------------------------- /data/wuhan_8989/original.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/wuhan_8989/original.parquet -------------------------------------------------------------------------------- /data/wuhan_8989/simplified.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/wuhan_8989/simplified.parquet -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/_static/custom.css: -------------------------------------------------------------------------------- 1 | .logo-frontpage { 2 | margin-bottom: 30px; 3 | } -------------------------------------------------------------------------------- /docs/source/_static/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/docs/source/_static/icon.png -------------------------------------------------------------------------------- /docs/source/_static/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /docs/source/_static/references.bib: -------------------------------------------------------------------------------- 1 | @article{fleischmann_shape-based_2024, 2 | title = {A Shape-Based Heuristic for the Detection of Urban Block Artifacts in Street Networks}, 3 | author = {Fleischmann, Martin and Vybornova, Anastassia}, 4 | year = {2024}, 5 | month = jun, 6 | journal = {Journal of Spatial Information Science}, 7 | volume = {28}, 8 | pages = {75--102}, 9 | doi = {10.5311/JOSIS.2024.28.319}, 10 | urldate = {2024-06-27}, 11 | abstract = {Street networks are ubiquitous components of cities, guiding their development and enabling movement from place to place; street networks are also the critical components of many urban analytical methods. However, their graph representation is often designed primarily for transportation purposes. This representation is less suitable for other use cases where transportation networks need to be simplified as a mandatory pre-processing step, e.g., in the case of morphological analysis, visual navigation, or drone flight routing. While the urgent demand for automated pre-processing methods comes from various fields, it is still an unsolved challenge. In this article, we tackle this challenge by proposing a cheap computational heuristic for the identification of ``face artifacts'', i.e., geometries that are enclosed by transportation edges but do not represent urban blocks. The heuristic is based on combining the frequency distributions of shape compactness metrics and area measurements of street network face polygons. We test our method on 131 globally sampled large cities and show that it successfully identifies face artifacts in 89{\textbackslash}\% of analyzed cities. Our heuristic of detecting artifacts caused by data being collected for another purpose is the first step towards an automated street network simplification workflow. Moreover, the proposed face artifact index uncovers differences in structural rules guiding the development of cities in different world regions.}, 12 | copyright = {Creative Commons Attribution-NoDerivatives 4.0 International Licence (CC-BY-ND)}, 13 | langid = {english} 14 | } 15 | -------------------------------------------------------------------------------- /docs/source/api.rst: -------------------------------------------------------------------------------- 1 | .. _reference: 2 | 3 | .. currentmodule:: neatnet 4 | 5 | API reference 6 | ============= 7 | 8 | The API reference provides an overview of all public functions in ``neatnet``. 9 | 10 | Network Simplification Routines 11 | ------------------------------- 12 | 13 | The top-level function that performs complete adaptive simplification of street networks 14 | is the primary API of ``neatnet``. 15 | 16 | .. autosummary:: 17 | :toctree: generated/ 18 | 19 | neatify 20 | 21 | The minimal topology fixing can be done using another routine: 22 | 23 | .. autosummary:: 24 | :toctree: generated/ 25 | 26 | fix_topology 27 | 28 | 29 | Node Simplification 30 | ------------------- 31 | 32 | Some of the individual components are also exposed as independent functions (note that 33 | most are consumed by :func:`neatify`). 34 | 35 | 36 | A subset of functions dealing with network nodes: 37 | 38 | .. autosummary:: 39 | :toctree: generated/ 40 | 41 | consolidate_nodes 42 | remove_interstitial_nodes 43 | induce_nodes 44 | 45 | Face artifact detection 46 | ----------------------- 47 | 48 | A subset dealing with face artifacts: 49 | 50 | .. autosummary:: 51 | :toctree: generated/ 52 | 53 | FaceArtifacts 54 | get_artifacts 55 | 56 | Gap filling 57 | ----------- 58 | 59 | Snapping and extending lines in case of imprecise topology: 60 | 61 | .. autosummary:: 62 | :toctree: generated/ 63 | 64 | close_gaps 65 | extend_lines 66 | 67 | Internal components 68 | ------------------- 69 | 70 | For debugging purposes, users may use some parts of the internal API used within :func:`neatify`. 71 | 72 | .. autosummary:: 73 | :toctree: generated/ 74 | 75 | get_artifacts 76 | neatify_loop 77 | neatify_singletons 78 | neatify_pairs 79 | neatify_clusters 80 | 81 | None of the other functions is intended for public use and their API can change without a warning. -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | import os 9 | import sys 10 | 11 | import sphinx_autosummary_accessors 12 | 13 | sys.path.insert(0, os.path.abspath("../neatnet/")) 14 | 15 | import neatnet # noqa 16 | 17 | project = "neatnet" 18 | copyright = "2024-, neatnet Developers" # noqa: A001 19 | author = "Martin Fleischmann, Anastassia Vybornova, James D. Gaboardi" 20 | 21 | version = neatnet.__version__ 22 | release = neatnet.__version__ 23 | 24 | language = "en" 25 | 26 | # -- General configuration --------------------------------------------------- 27 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 28 | 29 | extensions = [ 30 | "numpydoc", 31 | "myst_nb", 32 | "sphinx.ext.autodoc", 33 | "sphinx.ext.autosummary", 34 | "sphinx.ext.intersphinx", 35 | "sphinx.ext.viewcode", 36 | "sphinx.ext.mathjax", 37 | "sphinxcontrib.bibtex", 38 | "sphinx_autosummary_accessors", 39 | "sphinx_copybutton", 40 | ] 41 | 42 | bibtex_bibfiles = ["_static/references.bib"] 43 | 44 | master_doc = "index" 45 | 46 | templates_path = [ 47 | "_templates", 48 | sphinx_autosummary_accessors.templates_path, 49 | ] 50 | exclude_patterns = [] 51 | 52 | intersphinx_mapping = { 53 | "esda": ( 54 | "https://pysal.org/esda/", 55 | "https://pysal.org/esda//objects.inv", 56 | ), 57 | "geopandas": ("https://geopandas.org/en/latest", None), 58 | "libpysal": ( 59 | "https://pysal.org/libpysal/", 60 | "https://pysal.org/libpysal//objects.inv", 61 | ), 62 | "momepy": ("http://docs.momepy.org/en/stable/", None), 63 | "pandas": ("https://pandas.pydata.org/docs", None), 64 | "pyproj": ("https://pyproj4.github.io/pyproj/latest/", None), 65 | "python": ("https://docs.python.org/3", None), 66 | "shapely": ("https://shapely.readthedocs.io/en/latest/", None), 67 | } 68 | 69 | # -- Options for HTML output ------------------------------------------------- 70 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 71 | 72 | autosummary_generate = True 73 | numpydoc_show_class_members = False 74 | numpydoc_use_plots = True 75 | class_members_toctree = True 76 | numpydoc_show_inherited_class_members = True 77 | numpydoc_xref_param_type = True 78 | autodoc_default_options = {"members": True, "undoc-members": True} 79 | plot_include_source = True 80 | 81 | html_theme = "sphinx_book_theme" 82 | html_static_path = ["_static"] 83 | html_css_files = ["custom.css"] 84 | # html_logo = "_static/logo.svg" 85 | # html_favicon = "_static/icon.png" 86 | html_theme_options = { 87 | "use_sidenotes": True, 88 | } 89 | nb_execution_mode = "off" 90 | autodoc_typehints = "none" 91 | -------------------------------------------------------------------------------- /docs/source/index.md: -------------------------------------------------------------------------------- 1 | ```{include} ../../README.md 2 | ``` 3 | 4 | ```{toctree} 5 | :hidden: 6 | :caption: User Guide 7 | intro 8 | simple_preprocessing 9 | ``` 10 | 11 | ```{toctree} 12 | :hidden: 13 | :caption: API 14 | api 15 | 16 | ``` 17 | 18 | ```{toctree} 19 | :hidden: 20 | :caption: References 21 | references 22 | ``` 23 | 24 | ```{toctree} 25 | :hidden: 26 | :caption: For contributors 27 | GitHub 28 | ``` 29 | -------------------------------------------------------------------------------- /docs/source/references.rst: -------------------------------------------------------------------------------- 1 | .. reference for the docs 2 | 3 | References 4 | ========== 5 | 6 | .. bibliography:: _static/references.bib 7 | :all: 8 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: neatnet 3 | channels: 4 | - conda-forge 5 | dependencies: 6 | - python=3.13 7 | - esda>=2.6.0 8 | - geopandas>=1.0.1 9 | - libpysal>=4.12.1 10 | - momepy>=0.9.0 11 | - networkx>=3.3 12 | - numpy>=2 13 | - pandas>=2.2.3 14 | - scipy>=1.14.1 15 | - shapely>=2.0.6 16 | - scikit-learn>=1.2.0 17 | # testing 18 | - codecov 19 | - coverage 20 | - matplotlib 21 | - mypy>=1.15.0,<2 22 | - pre-commit 23 | - pyarrow>=17.0 24 | - pytest 25 | - pytest-cov 26 | - pytest-xdist 27 | - ruff 28 | - yamllint 29 | # docs 30 | - ipykernel 31 | - ipywidgets 32 | - jupyterlab 33 | - myst-nb 34 | - numpydoc 35 | - sphinx 36 | - sphinxcontrib-bibtex 37 | - sphinx-autosummary-accessors 38 | - sphinx-book-theme 39 | - sphinx-copybutton 40 | -------------------------------------------------------------------------------- /neatnet/__init__.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | from importlib.metadata import PackageNotFoundError, version 3 | 4 | from . import simplify 5 | from .artifacts import FaceArtifacts, get_artifacts 6 | from .gaps import close_gaps, extend_lines 7 | from .nodes import ( 8 | consolidate_nodes, 9 | fix_topology, 10 | induce_nodes, 11 | remove_interstitial_nodes, 12 | split, 13 | ) 14 | from .simplify import ( 15 | neatify, 16 | neatify_clusters, 17 | neatify_loop, 18 | neatify_pairs, 19 | neatify_singletons, 20 | ) 21 | 22 | with contextlib.suppress(PackageNotFoundError): 23 | __version__ = version("neatnet") 24 | -------------------------------------------------------------------------------- /neatnet/continuity.py: -------------------------------------------------------------------------------- 1 | import geopandas 2 | import momepy 3 | 4 | 5 | def continuity( 6 | streets: geopandas.GeoDataFrame, angle_threshold: float = 120 7 | ) -> tuple[geopandas.GeoDataFrame, momepy.COINS]: 8 | """Assign COINS-based information to streets. 9 | 10 | Parameters 11 | ---------- 12 | streets : geopandas.GeoDataFrame 13 | Street network. 14 | angle_threshold : float = 120 15 | See the ``angle_threshold`` keyword argument in ``momepy.COINS()``. 16 | 17 | Returns 18 | ------- 19 | streets : geopandas.GeoDataFrame 20 | The input ``streets`` with additional columns describing COINS information. 21 | coins : momepy.COINS 22 | **This is not used in production.** 23 | 24 | Notes 25 | ----- 26 | The returned ``coins`` object is not used in production, but is 27 | very helpful in testing & debugging. See gh:neatnet#49. 28 | """ 29 | streets = streets.copy() 30 | 31 | # Measure continuity of street network 32 | coins = momepy.COINS(streets, angle_threshold=angle_threshold, flow_mode=True) 33 | 34 | # Assing continuity group 35 | group, end = coins.stroke_attribute(True) 36 | streets["coins_group"] = group 37 | streets["coins_end"] = end 38 | 39 | # Assign length of each continuity group and a number of segments within the group. 40 | coins_grouped = streets.length.groupby(streets.coins_group) 41 | streets["coins_len"] = coins_grouped.sum()[streets.coins_group].values 42 | streets["coins_count"] = coins_grouped.size()[streets.coins_group].values 43 | 44 | return streets, coins 45 | 46 | 47 | def get_stroke_info( 48 | artifacts: geopandas.GeoSeries | geopandas.GeoDataFrame, 49 | streets: geopandas.GeoSeries | geopandas.GeoDataFrame, 50 | ) -> tuple[list[int], list[int], list[int], list[int]]: 51 | """Generate information about strokes within ``artifacts`` and the 52 | resulting lists can be assigned as columns to ``artifacts``. Classifies 53 | the strokes within the CES typology. 54 | 55 | * 'continuing' strokes - continues before and after artifact. 56 | * 'ending' strokes - continues only at one end. 57 | * 'single' strokes - does not continue. 58 | 59 | Parameters 60 | ---------- 61 | artifacts : geopandas.GeoSeries | geopandas.GeoDataFrame 62 | Polygons representing the artifacts. 63 | streets : geopandas.GeoSeries | geopandas.GeoDataFrame 64 | LineStrings representing the street network. 65 | 66 | Returns 67 | ------- 68 | strokes : list[int] 69 | All strokes counts. 70 | c_ : list[int] 71 | Counts for 'continuing' strokes - continues before and after artifact. 72 | e_ : list[int] 73 | Counts for 'ending' strokes - continues only at one end. 74 | s_ : list[int] 75 | Counts for 'single' strokes - does not continue. 76 | """ 77 | strokes = [] 78 | c_ = [] 79 | e_ = [] 80 | s_ = [] 81 | for geom in artifacts.geometry: 82 | singles = 0 83 | ends = 0 84 | edges = streets.iloc[streets.sindex.query(geom, predicate="covers")] 85 | ecg = edges.coins_group 86 | if ecg.nunique() == 1 and edges.shape[0] == edges.coins_count.iloc[0]: 87 | # roundabout special case 88 | singles = 1 89 | mains = 0 90 | else: 91 | all_ends = edges[edges.coins_end] 92 | ae_cg = all_ends.coins_group 93 | mains = edges[~ecg.isin(ae_cg)].coins_group.nunique() 94 | visited = [] 95 | for coins_count, group in zip(all_ends.coins_count, ae_cg, strict=True): 96 | if group not in visited: 97 | if coins_count == (ecg == group).sum(): 98 | singles += 1 99 | visited.append(group) 100 | else: 101 | # do not add to visited -- may be disjoint within the artifact 102 | ends += 1 103 | strokes.append(ecg.nunique()) 104 | c_.append(mains) 105 | e_.append(ends) 106 | s_.append(singles) 107 | return strokes, c_, e_, s_ 108 | -------------------------------------------------------------------------------- /neatnet/gaps.py: -------------------------------------------------------------------------------- 1 | import math 2 | import operator 3 | 4 | import geopandas as gpd 5 | import numpy as np 6 | import shapely 7 | 8 | __all__ = [ 9 | "close_gaps", 10 | "extend_lines", 11 | ] 12 | 13 | 14 | def close_gaps( 15 | gdf: gpd.GeoDataFrame | gpd.GeoSeries, tolerance: float 16 | ) -> gpd.GeoSeries: 17 | """Close gaps in LineString geometry where it should be contiguous. 18 | 19 | Snaps both lines to a centroid of a gap in between. 20 | 21 | Parameters 22 | ---------- 23 | gdf : GeoDataFrame | GeoSeries 24 | GeoDataFrame or GeoSeries containing LineString representation of a network. 25 | tolerance : float 26 | nodes within a tolerance will be snapped together 27 | 28 | Returns 29 | ------- 30 | GeoSeries 31 | 32 | See also 33 | -------- 34 | neatnet.extend_lines 35 | neatnet.remove_interstitial_nodes 36 | """ 37 | geom = gdf.geometry.array 38 | coords = shapely.get_coordinates(geom) 39 | indices = shapely.get_num_coordinates(geom) 40 | 41 | # generate a list of start and end coordinates and create point geometries 42 | edges = [0] 43 | i = 0 44 | for ind in indices: 45 | ix = i + ind 46 | edges.append(ix - 1) 47 | edges.append(ix) 48 | i = ix 49 | edges = edges[:-1] 50 | points = shapely.points(np.unique(coords[edges], axis=0)) 51 | 52 | buffered = shapely.buffer(points, tolerance / 2) 53 | 54 | dissolved = shapely.union_all(buffered) 55 | 56 | exploded = [ 57 | shapely.get_geometry(dissolved, i) 58 | for i in range(shapely.get_num_geometries(dissolved)) 59 | ] 60 | 61 | centroids = shapely.centroid(exploded) 62 | 63 | snapped = shapely.snap(geom, shapely.union_all(centroids), tolerance) 64 | 65 | return gpd.GeoSeries(snapped, crs=gdf.crs) 66 | 67 | 68 | def extend_lines( 69 | gdf: gpd.GeoDataFrame, 70 | tolerance: float, 71 | *, 72 | target: None | gpd.GeoDataFrame | gpd.GeoSeries = None, 73 | barrier: None | gpd.GeoDataFrame | gpd.GeoSeries = None, 74 | extension: int | float = 0, 75 | ) -> gpd.GeoDataFrame: 76 | """Extends lines from ``gdf`` to itself or target within a set tolerance. 77 | 78 | Extends unjoined ends of LineString segments to join with other segments or target. 79 | If ``target`` is passed, extend lines to target. Otherwise extend lines to itself. 80 | 81 | If ``barrier`` is passed, each extended line is checked for intersection with 82 | ``barrier``. If they intersect, extended line is not returned. This can be 83 | useful if you don't want to extend street network segments through buildings. 84 | 85 | Parameters 86 | ---------- 87 | gdf : GeoDataFrame 88 | GeoDataFrame containing LineString geometry 89 | tolerance : float 90 | tolerance in snapping (by how much could be each segment 91 | extended). 92 | target : None | GeoDataFrame | GeoSeries 93 | target geometry to which ``gdf`` gets extended. Has to be 94 | (Multi)LineString geometry. 95 | barrier : None | GeoDataFrame | GeoSeries = None 96 | extended line is not used if it intersects barrier 97 | extension : int | float = 0 98 | by how much to extend line beyond the snapped geometry. Useful 99 | when creating enclosures to avoid floating point imprecision. 100 | 101 | Returns 102 | ------- 103 | GeoDataFrame 104 | GeoDataFrame of with extended geometry 105 | 106 | See also 107 | -------- 108 | neatnet.close_gaps 109 | neatnet.remove_interstitial_nodes 110 | """ 111 | 112 | # explode to avoid MultiLineStrings 113 | # reset index due to the bug in GeoPandas explode 114 | df = gdf.reset_index(drop=True).explode(ignore_index=True) 115 | 116 | if target is None: 117 | target = df 118 | itself = True 119 | else: 120 | itself = False 121 | 122 | # get underlying shapely geometry 123 | geom = df.geometry.array 124 | 125 | # extract array of coordinates and number per geometry 126 | coords = shapely.get_coordinates(geom) 127 | indices = shapely.get_num_coordinates(geom) 128 | 129 | # generate a list of start and end coordinates and create point geometries 130 | edges = [0] 131 | i = 0 132 | for ind in indices: 133 | ix = i + ind 134 | edges.append(ix - 1) 135 | edges.append(ix) 136 | i = ix 137 | edges = edges[:-1] 138 | points = shapely.points(np.unique(coords[edges], axis=0)) 139 | 140 | # query LineString geometry to identify points intersecting 2 geometries 141 | tree = shapely.STRtree(geom) 142 | inp, res = tree.query(points, predicate="intersects") 143 | unique, counts = np.unique(inp, return_counts=True) 144 | ends = np.unique(res[np.isin(inp, unique[counts == 1])]) 145 | 146 | new_geoms = [] 147 | # iterate over cul-de-sac-like segments and attempt to snap them to street network 148 | for line in ends: 149 | l_coords = shapely.get_coordinates(geom[line]) 150 | 151 | start = shapely.points(l_coords[0]) 152 | end = shapely.points(l_coords[-1]) 153 | 154 | first = list(tree.query(start, predicate="intersects")) 155 | second = list(tree.query(end, predicate="intersects")) 156 | first.remove(line) 157 | second.remove(line) 158 | 159 | t = target if not itself else target.drop(line) 160 | 161 | if first and not second: 162 | snapped = _extend_line(l_coords, t, tolerance) 163 | if ( 164 | barrier is not None 165 | and barrier.sindex.query( 166 | shapely.linestrings(snapped), predicate="intersects" 167 | ).size 168 | > 0 169 | ): 170 | new_geoms.append(geom[line]) 171 | else: 172 | if extension == 0: 173 | new_geoms.append(shapely.linestrings(snapped)) 174 | else: 175 | new_geoms.append( 176 | shapely.linestrings( 177 | _extend_line(snapped, t, extension, snap=False) 178 | ) 179 | ) 180 | elif not first and second: 181 | snapped = _extend_line(np.flip(l_coords, axis=0), t, tolerance) 182 | if ( 183 | barrier is not None 184 | and barrier.sindex.query( 185 | shapely.linestrings(snapped), predicate="intersects" 186 | ).size 187 | > 0 188 | ): 189 | new_geoms.append(geom[line]) 190 | else: 191 | if extension == 0: 192 | new_geoms.append(shapely.linestrings(snapped)) 193 | else: 194 | new_geoms.append( 195 | shapely.linestrings( 196 | _extend_line(snapped, t, extension, snap=False) 197 | ) 198 | ) 199 | elif not first and not second: 200 | one_side = _extend_line(l_coords, t, tolerance) 201 | one_side_e = _extend_line(one_side, t, extension, snap=False) 202 | snapped = _extend_line(np.flip(one_side_e, axis=0), t, tolerance) 203 | if ( 204 | barrier is not None 205 | and barrier.sindex.query( 206 | shapely.linestrings(snapped), predicate="intersects" 207 | ).size 208 | > 0 209 | ): 210 | new_geoms.append(geom[line]) 211 | else: 212 | if extension == 0: 213 | new_geoms.append(shapely.linestrings(snapped)) 214 | else: 215 | new_geoms.append( 216 | shapely.linestrings( 217 | _extend_line(snapped, t, extension, snap=False) 218 | ) 219 | ) 220 | 221 | df.iloc[ends, df.columns.get_loc(df.geometry.name)] = new_geoms 222 | return df 223 | 224 | 225 | def _extend_line( 226 | coords: np.ndarray, 227 | target: gpd.GeoDataFrame | gpd.GeoSeries, 228 | tolerance: float, 229 | snap: bool = True, 230 | ) -> np.ndarray: 231 | """Extends a line geometry to snap on the target within a tolerance.""" 232 | 233 | if snap: 234 | extrapolation = _get_extrapolated_line( 235 | coords[-4:] if len(coords.shape) == 1 else coords[-2:].flatten(), 236 | tolerance, 237 | ) 238 | int_idx = target.sindex.query(extrapolation, predicate="intersects") 239 | intersection = shapely.intersection( 240 | target.iloc[int_idx].geometry.array, extrapolation 241 | ) 242 | if intersection.size > 0: 243 | if len(intersection) > 1: 244 | distances = {} 245 | ix = 0 246 | for p in intersection: 247 | distance = shapely.distance(p, shapely.points(coords[-1])) 248 | distances[ix] = distance 249 | ix = ix + 1 250 | minimal = min(distances.items(), key=operator.itemgetter(1))[0] 251 | new_point_coords = shapely.get_coordinates(intersection[minimal]) 252 | 253 | else: 254 | new_point_coords = shapely.get_coordinates(intersection[0]) 255 | coo = np.append(coords, new_point_coords) 256 | new = np.reshape(coo, (len(coo) // 2, 2)) 257 | 258 | return new 259 | return coords 260 | 261 | extrapolation = _get_extrapolated_line( 262 | coords[-4:] if len(coords.shape) == 1 else coords[-2:].flatten(), 263 | tolerance, 264 | point=True, 265 | ) 266 | return np.vstack([coords, extrapolation]) 267 | 268 | 269 | def _get_extrapolated_line( 270 | coords: np.ndarray, tolerance: float, point: bool = False 271 | ) -> tuple[float, float] | shapely.LineString: 272 | """Creates a shapely line extrapolated in p1->p2 direction.""" 273 | 274 | p1 = coords[:2] 275 | p2 = coords[2:] 276 | a = p2 277 | 278 | # defining new point based on the vector between existing points 279 | if p1[0] >= p2[0] and p1[1] >= p2[1]: 280 | b = ( 281 | p2[0] 282 | - tolerance 283 | * math.cos( 284 | math.atan( 285 | math.fabs(p1[1] - p2[1] + 0.000001) 286 | / math.fabs(p1[0] - p2[0] + 0.000001) 287 | ) 288 | ), 289 | p2[1] 290 | - tolerance 291 | * math.sin( 292 | math.atan( 293 | math.fabs(p1[1] - p2[1] + 0.000001) 294 | / math.fabs(p1[0] - p2[0] + 0.000001) 295 | ) 296 | ), 297 | ) 298 | elif p1[0] <= p2[0] and p1[1] >= p2[1]: 299 | b = ( 300 | p2[0] 301 | + tolerance 302 | * math.cos( 303 | math.atan( 304 | math.fabs(p1[1] - p2[1] + 0.000001) 305 | / math.fabs(p1[0] - p2[0] + 0.000001) 306 | ) 307 | ), 308 | p2[1] 309 | - tolerance 310 | * math.sin( 311 | math.atan( 312 | math.fabs(p1[1] - p2[1] + 0.000001) 313 | / math.fabs(p1[0] - p2[0] + 0.000001) 314 | ) 315 | ), 316 | ) 317 | elif p1[0] <= p2[0] and p1[1] <= p2[1]: 318 | b = ( 319 | p2[0] 320 | + tolerance 321 | * math.cos( 322 | math.atan( 323 | math.fabs(p1[1] - p2[1] + 0.000001) 324 | / math.fabs(p1[0] - p2[0] + 0.000001) 325 | ) 326 | ), 327 | p2[1] 328 | + tolerance 329 | * math.sin( 330 | math.atan( 331 | math.fabs(p1[1] - p2[1] + 0.000001) 332 | / math.fabs(p1[0] - p2[0] + 0.000001) 333 | ) 334 | ), 335 | ) 336 | else: 337 | b = ( 338 | p2[0] 339 | - tolerance 340 | * math.cos( 341 | math.atan( 342 | math.fabs(p1[1] - p2[1] + 0.000001) 343 | / math.fabs(p1[0] - p2[0] + 0.000001) 344 | ) 345 | ), 346 | p2[1] 347 | + tolerance 348 | * math.sin( 349 | math.atan( 350 | math.fabs(p1[1] - p2[1] + 0.000001) 351 | / math.fabs(p1[0] - p2[0] + 0.000001) 352 | ) 353 | ), 354 | ) 355 | if point: 356 | return b 357 | return shapely.linestrings([a, b]) 358 | -------------------------------------------------------------------------------- /neatnet/geometry.py: -------------------------------------------------------------------------------- 1 | """Geometry-related functions""" 2 | 3 | import collections 4 | import math 5 | import warnings 6 | 7 | import geopandas as gpd 8 | import numpy as np 9 | import pandas as pd 10 | import shapely 11 | from libpysal import graph 12 | from scipy import spatial 13 | 14 | from .nodes import consolidate_nodes 15 | 16 | 17 | def _is_within( 18 | line: np.ndarray, poly: shapely.Polygon, rtol: float = 1e-4 19 | ) -> np.ndarray: 20 | """Check if the line is within a polygon with a set relative tolerance. 21 | 22 | Parameters 23 | ---------- 24 | line : np.ndarray[shapely.LineString] 25 | Input line to check relationship. 26 | poly : shapely.Polygon 27 | Input polygon to check relationship. 28 | rtol : float = -1e4 29 | The set relative tolerance. 30 | 31 | Returns 32 | ------- 33 | np.ndarray 34 | ``True`` if ``line`` is either entirely within ``poly`` or if 35 | ``line`` is within `poly`` based on a relaxed ``rtol`` relative tolerance. 36 | """ 37 | 38 | within = shapely.within(line, poly) 39 | if within.all(): 40 | return within 41 | 42 | intersection = shapely.intersection(line, poly) 43 | return np.abs(shapely.length(intersection) - shapely.length(line)) <= rtol 44 | 45 | 46 | def angle_between_two_lines( 47 | line1: shapely.LineString, line2: shapely.LineString 48 | ) -> float: 49 | """Return the angle between 2 two lines (assuming they share a vertex). 50 | Based on ``momepy.coins`` but adapted to shapely lines. 51 | """ 52 | 53 | return_bad = 0.0 54 | 55 | lines_distinct = line1 != line2 56 | if not lines_distinct: 57 | warnings.warn( 58 | f"Input lines are identical - must be distinct. Returning {return_bad}.", 59 | UserWarning, 60 | stacklevel=2, 61 | ) 62 | return return_bad 63 | 64 | # extract points 65 | a, b, c, d = shapely.get_coordinates([line1, line2]).tolist() 66 | a, b, c, d = tuple(a), tuple(b), tuple(c), tuple(d) 67 | 68 | # assertion: we expect exactly 2 of the 4 points to be identical 69 | # (lines touch in this point) 70 | points = collections.Counter([a, b, c, d]) 71 | 72 | lines_share_vertex = max(points.values()) > 1 73 | if not lines_share_vertex: 74 | warnings.warn( 75 | f"Input lines do not share a vertex. Returning {return_bad}.", 76 | UserWarning, 77 | stacklevel=2, 78 | ) 79 | return return_bad 80 | 81 | # points where line touch = "origin" (for vector-based angle calculation) 82 | origin = [k for k, v in points.items() if v == 2][0] 83 | # other 2 unique points (one on each line) 84 | point1, point2 = (k for k, v in points.items() if v == 1) 85 | 86 | # translate lines into vectors (numpy arrays) 87 | v1 = [point1[0] - origin[0], point1[1] - origin[1]] 88 | v2 = [point2[0] - origin[0], point2[1] - origin[1]] 89 | 90 | # compute angle between 2 vectors in degrees 91 | dot_product = v1[0] * v2[0] + v1[1] * v2[1] 92 | norm_v1 = math.sqrt(v1[0] ** 2 + v1[1] ** 2) 93 | norm_v2 = math.sqrt(v2[0] ** 2 + v2[1] ** 2) 94 | cos_theta = round(dot_product / (norm_v1 * norm_v2), 6) # precision issues fix 95 | angle = math.degrees(math.acos(cos_theta)) 96 | 97 | return angle 98 | 99 | 100 | def voronoi_skeleton( 101 | lines: list | np.ndarray | gpd.GeoSeries, 102 | poly: None | shapely.Polygon = None, 103 | snap_to: None | gpd.GeoSeries = None, 104 | max_segment_length: float | int = 1, 105 | buffer: None | float | int = None, 106 | secondary_snap_to: None | gpd.GeoSeries = None, 107 | clip_limit: None | float | int = 2, 108 | consolidation_tolerance: None | float | int = None, 109 | ) -> tuple[np.ndarray, np.ndarray]: 110 | """ 111 | Returns average geometry. 112 | 113 | Parameters 114 | ---------- 115 | lines : list | numpy.ndarray | geopandas.GeoSeries 116 | LineStrings connected at endpoints. If ``poly`` is passed in, ``lines`` 117 | must be a ``geopandas.GeoSeries``. 118 | poly : None | shapely.Polygon = None 119 | Polygon enclosed by ``lines``. 120 | snap_to : None | gpd.GeoSeries = None 121 | Series of geometries that shall be connected to the skeleton. 122 | max_segment_length: float | int = 1 123 | Additional vertices will be added so that all line segments 124 | are no longer than this value. Must be greater than 0. 125 | buffer : None | float | int = None 126 | Optional custom buffer distance for dealing with Voronoi infinity issues. 127 | secondary_snap_to : None | gpd.GeoSeries = None 128 | Fall-back series of geometries that shall be connected to the skeleton. 129 | clip_limit : None | float | int = 2 130 | Following generation of the Voronoi linework, we clip to fit inside the polygon. 131 | To ensure we get a space to make proper topological connections from the 132 | linework to the actual points on the edge of the polygon, we clip using a 133 | polygon with a negative buffer of ``clip_limit`` or the radius of 134 | maximum inscribed circle, whichever is smaller. 135 | consolidation_tolerance : None | float | int = None 136 | Tolerance passed to node consolidation within the resulting skeleton. 137 | If ``None``, no consolidation happens. 138 | 139 | Returns 140 | ------- 141 | edgelines : numpy.ndarray 142 | Array of averaged geometries. 143 | splitters : numpy.ndarray 144 | Split points. 145 | """ 146 | if buffer is None: 147 | buffer = max_segment_length * 20 148 | if not poly: 149 | if not isinstance(lines, gpd.GeoSeries): 150 | lines = gpd.GeoSeries(lines) 151 | poly = shapely.box(*lines.total_bounds) 152 | # get an additional line around the lines to avoid infinity issues with Voronoi 153 | extended_lines = list(lines) + [poly.buffer(buffer).boundary] 154 | 155 | # interpolate lines to represent them as points for Voronoi 156 | shapely_lines = extended_lines 157 | points, ids = shapely.get_coordinates( 158 | shapely.segmentize(shapely_lines, max_segment_length), return_index=True 159 | ) 160 | 161 | # remove duplicated coordinates 162 | unq, count = np.unique(points, axis=0, return_counts=True) 163 | mask = np.isin(points, unq[count > 1]).all(axis=1) 164 | points = points[~mask] 165 | ids = ids[~mask] 166 | 167 | # generate Voronoi diagram 168 | voronoi_diagram = spatial.Voronoi(points) 169 | 170 | # get all rigdes and filter only those between the two lines 171 | pts = voronoi_diagram.ridge_points 172 | mapped = np.take(ids, pts) 173 | rigde_vertices = np.array(voronoi_diagram.ridge_vertices) 174 | 175 | # iterate over segment-pairs and keep rigdes between input geometries 176 | _edgelines = [] 177 | to_add = [] 178 | splitters = [] 179 | 180 | # determine the negative buffer distance to avoid overclipping of narrow polygons 181 | # this can still result in some missing links, but only in rare cases 182 | dist = min([clip_limit, shapely.ops.polylabel(poly).distance(poly.boundary) * 0.4]) 183 | limit = poly.buffer(-dist) 184 | 185 | # drop ridges that are between points coming from the same line 186 | selfs = mapped[:, 0] == mapped[:, 1] 187 | buff = (mapped == mapped.max()).any(axis=1) 188 | mapped = mapped[~(selfs | buff)] 189 | rigde_vertices = rigde_vertices[~(selfs | buff)] 190 | unique = np.unique(np.sort(mapped, axis=1), axis=0) 191 | 192 | for a, b in unique: 193 | mask = ((mapped[:, 0] == a) | (mapped[:, 0] == b)) & ( 194 | (mapped[:, 1] == a) | (mapped[:, 1] == b) 195 | ) 196 | 197 | verts = rigde_vertices[mask] 198 | 199 | # generate the line in between the lines 200 | edgeline = shapely.line_merge( 201 | shapely.multilinestrings(voronoi_diagram.vertices[verts]) 202 | ) 203 | 204 | # check if the edgeline is within polygon 205 | if not edgeline.within(limit): 206 | if not isinstance(edgeline, shapely.MultiLineString): 207 | # if not, clip it by the polygon with a small negative buffer to keep 208 | # the gap between edgeline and poly boundary to avoid possible 209 | # overlapping lines 210 | edgeline = shapely.intersection(edgeline, limit) 211 | 212 | # in edge cases, this can result in a MultiLineString with one sliver 213 | # part 214 | edgeline = _remove_sliver(edgeline) 215 | # if the edgeline is a MultiLineString, treat each part independently 216 | else: 217 | parts = [] 218 | for part in edgeline.geoms: 219 | part = shapely.intersection(part, limit) 220 | part = _remove_sliver(part) 221 | if not part.is_empty: 222 | parts.append(part) 223 | edgeline = shapely.MultiLineString(parts) 224 | 225 | # check if a, b lines share a node 226 | intersection = shapely_lines[b].intersection(shapely_lines[a]) 227 | # if they do, add shortest line from the edgeline to the shared node and 228 | # combine it with the edgeline. Also, avoid an inner loop in more complex input 229 | # that would create connection across 230 | if not intersection.is_empty and not ( 231 | intersection.geom_type == "MultiPoint" 232 | and (len(intersection.geoms) == 2 and len(lines) != 2) 233 | ): 234 | # we need union of edgeline and shortest because snap is buggy in GEOS 235 | # and line_merge as well. This results in a MultiLineString but we can 236 | # deal with those later. For now, we just need this extended edgeline to 237 | # be a single geometry to ensure the component discovery below works as 238 | # intended 239 | # get_parts is needed as in case of voronoi based on two lines, these 240 | # intersect on both ends, hence both need to be extended 241 | edgeline = shapely.union( 242 | edgeline, 243 | shapely.union_all( 244 | shapely.shortest_line( 245 | shapely.get_parts(intersection), edgeline.boundary 246 | ) 247 | ), 248 | ) 249 | # add final edgeline to the list 250 | _edgelines.append(edgeline) 251 | 252 | edgelines = np.array(_edgelines)[~(shapely.is_empty(_edgelines))] 253 | 254 | if edgelines.shape[0] > 0: 255 | # if there is no explicit snapping target, snap to the boundary of the polygon 256 | # via the shortest line. That is by definition always within the polygon 257 | # (Martin thinks) (James concurs) 258 | if snap_to is not False: 259 | if snap_to is None: 260 | sl = shapely.shortest_line( 261 | shapely.union_all(edgelines).boundary, poly.boundary 262 | ) 263 | to_add.append(sl) 264 | splitters.append(shapely.get_point(sl, -1)) 265 | 266 | # if we have some snapping targets, we need to figure out 267 | # what shall be snapped to what 268 | else: 269 | additions, splits = snap_to_targets( 270 | edgelines, poly, snap_to, secondary_snap_to 271 | ) 272 | to_add.extend(additions) 273 | splitters.extend(splits) 274 | 275 | # concatenate edgelines and their additions snapping to edge 276 | edgelines = np.concatenate([edgelines, to_add]) 277 | # simplify to avoid unnecessary point density and some wobbliness 278 | edgelines = shapely.simplify(edgelines, max_segment_length) 279 | # drop empty 280 | edgelines = edgelines[edgelines != None] # noqa: E711 281 | 282 | edgelines = shapely.line_merge(edgelines[shapely.length(edgelines) > 0]) 283 | edgelines = _as_parts(edgelines) 284 | edgelines = _consolidate(edgelines, consolidation_tolerance) 285 | 286 | return edgelines, np.array(splitters) 287 | 288 | 289 | def _remove_sliver( 290 | edgeline: shapely.LineString | shapely.MultiLineString, 291 | ) -> shapely.LineString: 292 | """Remove sliver(s) if present.""" 293 | if edgeline.geom_type == "MultiLineString": 294 | parts = shapely.get_parts(edgeline) 295 | edgeline = parts[np.argmax(shapely.length(parts))] 296 | return edgeline 297 | 298 | 299 | def _as_parts(edgelines: np.ndarray) -> np.ndarray: 300 | """Return constituent LineStrings if MultiLineString present.""" 301 | geom_types = np.unique(shapely.get_type_id(edgelines)) 302 | if geom_types.shape[0] > 1 or (geom_types == 5).all(): 303 | edgelines = shapely.get_parts(edgelines) 304 | return edgelines 305 | 306 | 307 | def _consolidate( 308 | edgelines: np.ndarray, consolidation_tolerance: None | float | int 309 | ) -> np.ndarray: 310 | """Return ``edgelines`` from consolidated nodes, if criteria met.""" 311 | if consolidation_tolerance and edgelines.shape[0] > 0: 312 | edgelines = consolidate_nodes( 313 | edgelines, tolerance=consolidation_tolerance, preserve_ends=True 314 | ).geometry.to_numpy() 315 | return edgelines 316 | 317 | 318 | def snap_to_targets( 319 | edgelines: np.ndarray, 320 | poly: shapely.Polygon, 321 | snap_to: gpd.GeoSeries, 322 | secondary_snap_to: None | gpd.GeoSeries = None, 323 | ) -> tuple[list[shapely.LineString], list[shapely.Point]]: 324 | """Snap edgelines to vertices. 325 | 326 | Parameters 327 | ---------- 328 | edgelines : numpy.ndarray 329 | Voronoi skeleton edges. 330 | poly : None | shapely.Polygon = None 331 | Polygon enclosed by ``lines``. 332 | snap_to : None | gpd.GeoSeries = None 333 | Series of geometries that shall be connected to the skeleton. 334 | secondary_snap_to : None | gpd.GeoSeries = None 335 | Fall-back series of geometries that shall be connected to the skeleton. 336 | 337 | Returns 338 | ------- 339 | to_add, to_split : tuple[list[shapely.LineString], list[shapely.Point]] 340 | Lines to add and points where to split. 341 | """ 342 | 343 | to_add: list = [] 344 | to_split: list = [] 345 | 346 | # generate graph from lines 347 | comp_labels, comp_counts, components = _prep_components(edgelines) 348 | 349 | primary_union = shapely.union_all(snap_to) 350 | secondary_union = shapely.union_all(secondary_snap_to) 351 | 352 | # if there are muliple components, loop over all and treat each 353 | if len(components) > 1: 354 | for comp_label, comp in components.geometry.items(): 355 | cbound = comp.boundary 356 | 357 | # if component does not interest the boundary, it needs to be snapped 358 | # if it does but has only one part, this part interesect only on one 359 | # side (the node remaining from the removed edge) and needs to be 360 | # snapped on the other side as well 361 | if ( 362 | (not comp.intersects(poly.boundary)) 363 | or comp_counts[comp_label] == 1 364 | or ( 365 | not comp.intersects(primary_union) 366 | ) # ! this fixes one thing but may break others 367 | ): 368 | # add segment composed of the shortest line to the nearest snapping 369 | # target. We use boundary to snap to endpoints of edgelines only 370 | sl = shapely.shortest_line(cbound, primary_union) 371 | if _is_within(sl, poly): 372 | to_split, to_add = _split_add(sl, to_split, to_add) 373 | else: 374 | if secondary_snap_to is not None: 375 | sl = shapely.shortest_line(cbound, secondary_union) 376 | to_split, to_add = _split_add(sl, to_split, to_add) 377 | else: 378 | # if there is a single component, ensure it gets a shortest line to an 379 | # endpoint from each snapping target 380 | for target in snap_to: 381 | sl = shapely.shortest_line(components.boundary.item(), target) 382 | if _is_within(sl, poly): 383 | to_split, to_add = _split_add(sl, to_split, to_add) 384 | else: 385 | warnings.warn( 386 | "Could not create a connection as it would lead outside " 387 | "of the artifact.", 388 | UserWarning, 389 | stacklevel=2, 390 | ) 391 | return to_add, to_split 392 | 393 | 394 | def _prep_components( 395 | lines: np.ndarray | gpd.GeoSeries, 396 | ) -> tuple[pd.Series, pd.Series, gpd.GeoSeries]: 397 | """Helper for preparing graph components & labels in PySAL.""" 398 | 399 | # cast edgelines to gdf 400 | lines = gpd.GeoDataFrame(geometry=lines) 401 | 402 | # build queen contiguity on edgelines and extract component labels 403 | not_empty = ~lines.is_empty 404 | not_nan = ~lines.geometry.isna() 405 | lines = lines[not_empty | not_nan] 406 | comp_labels = graph.Graph.build_contiguity(lines, rook=False).component_labels 407 | 408 | # compute size of each component 409 | comp_counts = comp_labels.value_counts() 410 | 411 | # get MultiLineString geometry per connected component 412 | components = lines.dissolve(comp_labels) 413 | 414 | return comp_labels, comp_counts, components 415 | 416 | 417 | def _split_add(line: shapely.LineString, splits: list, adds: list) -> tuple[list, list]: 418 | """Helper for preparing splitter points & added lines.""" 419 | splits.append(shapely.get_point(line, -1)) 420 | adds.append(line) 421 | return splits, adds 422 | -------------------------------------------------------------------------------- /neatnet/nodes.py: -------------------------------------------------------------------------------- 1 | import collections.abc 2 | import typing 3 | 4 | import geopandas as gpd 5 | import networkx as nx 6 | import numpy as np 7 | import pandas as pd 8 | import pyproj 9 | import shapely 10 | from scipy import sparse 11 | from sklearn.cluster import DBSCAN 12 | 13 | 14 | def _fill_attrs(gdf: gpd.GeoDataFrame, source_row: pd.Series) -> gpd.GeoDataFrame: 15 | """Thoughtful attribute assignment to lines split into segments by new nodes – 16 | taking list-like values into consideration. See gh#213. Regarding iterables, 17 | currently only supports list values – others can be added based on input type 18 | in the future on an ad hoc basis as problems arise. Called from within ``split()``. 19 | 20 | Parameters 21 | ---------- 22 | gdf : geopandas.GeoDataFrame 23 | The new frame of split linestrings. 24 | source_row: pandas.Series 25 | The original source row. 26 | 27 | Returns 28 | ------- 29 | geopandas.GeoDataFrame 30 | The input ``gdf`` with updated columns based on values in ``source_row``. 31 | """ 32 | 33 | def _populate_column(attr): 34 | """Return the attribute if scalar, create vector of input if not.""" 35 | if isinstance(attr, collections.abc.Sequence) and not isinstance(attr, str): 36 | attr = [attr] * gdf.shape[0] 37 | return attr 38 | 39 | for col in source_row.index.drop(["geometry", "_status"], errors="ignore"): 40 | gdf[col] = _populate_column(source_row[col]) 41 | 42 | return gdf 43 | 44 | 45 | def split( 46 | split_points: list | np.ndarray | gpd.GeoSeries, 47 | cleaned_streets: gpd.GeoDataFrame, 48 | crs: str | pyproj.CRS, 49 | *, 50 | eps: float = 1e-4, 51 | ) -> gpd.GeoSeries | gpd.GeoDataFrame: 52 | """Split lines on new nodes. 53 | 54 | Parameters 55 | ---------- 56 | split_points : list | numpy.ndarray 57 | Points to split the ``cleaned_roads``. 58 | cleaned_streets : geopandas.GeoDataFrame 59 | Line geometries to be split with ``split_points``. 60 | crs : str | pyproj.CRS 61 | Anything accepted by ``pyproj.CRS``. 62 | eps : float = 1e-4 63 | Tolerance epsilon for point snapping. 64 | 65 | Returns 66 | ------- 67 | geopandas.GeoSeries | geopandas.GeoDataFrame 68 | Resultant split line geometries. 69 | """ 70 | split_points = gpd.GeoSeries(split_points, crs=crs) 71 | for split in split_points.drop_duplicates(): 72 | _, ix = cleaned_streets.sindex.nearest(split, max_distance=eps) 73 | row = cleaned_streets.iloc[ix] 74 | edge = row.geometry 75 | if edge.shape[0] == 1: 76 | row = row.iloc[0] 77 | lines_split = _snap_n_split(edge.item(), split, eps) 78 | if lines_split.shape[0] > 1: 79 | gdf_split = gpd.GeoDataFrame(geometry=lines_split, crs=crs) 80 | gdf_split = _fill_attrs(gdf_split, row) 81 | gdf_split["_status"] = "changed" 82 | cleaned_streets = pd.concat( 83 | [cleaned_streets.drop(edge.index[0]), gdf_split], 84 | ignore_index=True, 85 | ) 86 | elif edge.shape[0] > 1: 87 | to_be_dropped = [] 88 | to_be_added = [] 89 | for i, e in edge.items(): 90 | lines_split = _snap_n_split(e, split, eps) 91 | if lines_split.shape[0] > 1: 92 | to_be_dropped.append(i) 93 | to_be_added.append(lines_split) 94 | 95 | if to_be_added: 96 | gdf_split = pd.DataFrame( 97 | {"geometry": to_be_added, "_orig": to_be_dropped} 98 | ).explode("geometry") 99 | gdf_split = pd.concat( 100 | [ 101 | gdf_split.drop(columns="_orig").reset_index(drop=True), 102 | row.drop(columns="geometry") 103 | .loc[gdf_split["_orig"]] 104 | .reset_index(drop=True), 105 | ], 106 | axis=1, 107 | ) 108 | gdf_split["_status"] = "changed" 109 | cleaned_streets = pd.concat( 110 | [cleaned_streets.drop(to_be_dropped), gdf_split], 111 | ignore_index=True, 112 | ) 113 | cleaned_streets = gpd.GeoDataFrame( 114 | cleaned_streets, geometry="geometry", crs=crs 115 | ) 116 | 117 | return cleaned_streets.reset_index(drop=True) 118 | 119 | 120 | def _snap_n_split(e: shapely.LineString, s: shapely.Point, tol: float) -> np.ndarray: 121 | """Snap point to edge and return lines to split.""" 122 | snapped = shapely.snap(e, s, tolerance=tol) 123 | _lines_split = shapely.get_parts(shapely.ops.split(snapped, s)) 124 | return _lines_split[~shapely.is_empty(_lines_split)] 125 | 126 | 127 | def _status(x: pd.Series) -> str: 128 | """Determine the status of edge line(s).""" 129 | if len(x) == 1: 130 | return x.iloc[0] 131 | return "changed" 132 | 133 | 134 | def get_components( 135 | edgelines: list | np.ndarray | gpd.GeoSeries, 136 | *, 137 | ignore: None | gpd.GeoSeries = None, 138 | ) -> np.ndarray: 139 | """Associate edges with connected component labels and return. 140 | 141 | Parameters 142 | ---------- 143 | edgelines : list | np.ndarray | gpd.GeoSeries 144 | Collection of line objects. 145 | ignore : None | gpd.GeoSeries = None 146 | Nodes to ignore when labeling components. 147 | 148 | Returns 149 | ------- 150 | np.ndarray 151 | Edge connected component labels. 152 | 153 | Notes 154 | ----- 155 | See [https://github.com/uscuni/neatnet/issues/56] for detailed explanation of 156 | output. 157 | """ 158 | edgelines = np.array(edgelines) 159 | start_points = shapely.get_point(edgelines, 0) 160 | end_points = shapely.get_point(edgelines, -1) 161 | points = shapely.points( 162 | np.unique( 163 | shapely.get_coordinates(np.concatenate([start_points, end_points])), axis=0 164 | ) 165 | ) 166 | if ignore is not None: 167 | mask = np.isin(points, ignore) 168 | points = points[~mask] 169 | # query LineString geometry to identify points intersecting 2 geometries 170 | inp, res = shapely.STRtree(shapely.boundary(edgelines)).query( 171 | points, predicate="intersects" 172 | ) 173 | unique, counts = np.unique(inp, return_counts=True) 174 | mask = np.isin(inp, unique[counts == 2]) 175 | merge_res = res[mask] 176 | merge_inp = inp[mask] 177 | closed = np.arange(len(edgelines))[shapely.is_closed(edgelines)] 178 | mask = np.isin(merge_res, closed) | np.isin(merge_inp, closed) 179 | merge_res = merge_res[~mask] 180 | merge_inp = merge_inp[~mask] 181 | g = nx.Graph(list(zip((merge_inp * -1) - 1, merge_res, strict=True))) 182 | components = { 183 | i: {v for v in k if v > -1} for i, k in enumerate(nx.connected_components(g)) 184 | } 185 | component_labels = {value: key for key in components for value in components[key]} 186 | labels = pd.Series(component_labels, index=range(len(edgelines))) 187 | 188 | max_label = len(edgelines) - 1 if pd.isna(labels.max()) else int(labels.max()) 189 | filling = pd.Series(range(max_label + 1, max_label + len(edgelines) + 1)) 190 | labels = labels.fillna(filling) 191 | 192 | return labels.values 193 | 194 | 195 | def weld_edges( 196 | edgelines: list | np.ndarray | gpd.GeoSeries, 197 | *, 198 | ignore: None | gpd.GeoSeries = None, 199 | ) -> list | np.ndarray | gpd.GeoSeries: 200 | """Combine lines sharing an endpoint (if only 2 lines share that point). 201 | Lightweight version of ``remove_interstitial_nodes()``. 202 | 203 | Parameters 204 | ---------- 205 | edgelines : list | np.ndarray | gpd.GeoSeries 206 | Collection of line objects. 207 | ignore : None | gpd.GeoSeries = None 208 | Nodes to ignore when welding components. 209 | 210 | Returns 211 | ------- 212 | list | np.ndarray | gpd.GeoSeries 213 | Resultant welded ``edgelines`` if more than 1 passed in, otherwise 214 | the original ``edgelines`` object. 215 | """ 216 | if len(edgelines) < 2: 217 | return edgelines 218 | labels = get_components(edgelines, ignore=ignore) 219 | return ( 220 | gpd.GeoSeries(edgelines) 221 | .groupby(labels) 222 | .agg(lambda x: shapely.line_merge(shapely.GeometryCollection(x.values))) 223 | ).tolist() 224 | 225 | 226 | def induce_nodes(streets: gpd.GeoDataFrame, *, eps: float = 1e-4) -> gpd.GeoDataFrame: 227 | """Adding potentially missing nodes on intersections of individual LineString 228 | endpoints with the remaining network. The idea behind is that if a line ends 229 | on an intersection with another, there should be a node on both of them. 230 | 231 | Parameters 232 | ---------- 233 | streets : geopandas.GeoDataFrame 234 | Input LineString geometries. 235 | eps : float = 1e-4 236 | Tolerance epsilon for point snapping passed into ``nodes.split()``. 237 | 238 | Returns 239 | ------- 240 | geopandas.GeoDataFrame 241 | Updated ``streets`` with (potentially) added nodes. 242 | """ 243 | 244 | sindex_kws = {"predicate": "dwithin", "distance": 1e-4} 245 | 246 | # identify degree mismatch cases 247 | nodes_degree_mismatch = _identify_degree_mismatch(streets, sindex_kws) 248 | 249 | # ensure loop topology cases: 250 | # - loop nodes intersecting non-loops 251 | # - loop nodes intersecting other loops 252 | nodes_off_loops, nodes_on_loops = _makes_loop_contact(streets, sindex_kws) 253 | 254 | # all nodes to induce 255 | nodes_to_induce = pd.concat( 256 | [nodes_degree_mismatch, nodes_off_loops, nodes_on_loops] 257 | ) 258 | 259 | return split(nodes_to_induce.geometry, streets, streets.crs, eps=eps) 260 | 261 | 262 | def _identify_degree_mismatch( 263 | edges: gpd.GeoDataFrame, sindex_kws: dict 264 | ) -> gpd.GeoSeries: 265 | """Helper to identify difference of observed vs. expected node degree.""" 266 | nodes = _nodes_degrees_from_edges(edges.geometry) 267 | nodes = nodes.set_crs(edges.crs) 268 | nix, eix = edges.sindex.query(nodes.geometry, **sindex_kws) 269 | coo_vals = ([True] * len(nix), (nix, eix)) 270 | coo_shape = (len(nodes), len(edges)) 271 | intersects = sparse.coo_array(coo_vals, shape=coo_shape, dtype=np.bool_) 272 | nodes["expected_degree"] = intersects.sum(axis=1) 273 | return nodes[nodes["degree"] != nodes["expected_degree"]].geometry 274 | 275 | 276 | def _nodes_from_edges( 277 | edgelines: list | np.ndarray | gpd.GeoSeries, 278 | return_degrees=False, 279 | ) -> np.ndarray | tuple[np.ndarray, np.ndarray]: 280 | """Helper to get network nodes from edges' geometries.""" 281 | edgelines = np.array(edgelines) 282 | start_points = shapely.get_point(edgelines, 0) 283 | end_points = shapely.get_point(edgelines, -1) 284 | node_coords = np.unique( 285 | shapely.get_coordinates(np.concatenate([start_points, end_points])), 286 | axis=0, 287 | return_counts=return_degrees, 288 | ) 289 | if return_degrees: 290 | node_coords, degrees = node_coords 291 | node_points = shapely.points(node_coords) 292 | if return_degrees: 293 | return node_points, degrees 294 | else: 295 | return node_points 296 | 297 | 298 | def _nodes_degrees_from_edges( 299 | edgelines: list | np.ndarray | gpd.GeoSeries, 300 | ) -> gpd.GeoDataFrame: 301 | """Helper to get network nodes and their degrees from edges' geometries.""" 302 | node_points, degrees = _nodes_from_edges(edgelines, return_degrees=True) 303 | nodes_gdf = gpd.GeoDataFrame({"degree": degrees, "geometry": node_points}) 304 | return nodes_gdf 305 | 306 | 307 | def _makes_loop_contact( 308 | edges: gpd.GeoDataFrame, sindex_kws: dict 309 | ) -> tuple[gpd.GeoSeries, gpd.GeoSeries]: 310 | """Helper to identify: 311 | 1. loop nodes intersecting non-loops 312 | 2. loop nodes intersecting other loops 313 | """ 314 | 315 | loops, not_loops = _loops_and_non_loops(edges) 316 | loop_points = shapely.points(loops.get_coordinates().values) 317 | loop_gdf = gpd.GeoDataFrame(geometry=loop_points, crs=edges.crs) 318 | loop_point_geoms = loop_gdf.geometry 319 | 320 | # loop points intersecting non-loops 321 | nodes_from_non_loops_ix, _ = not_loops.sindex.query(loop_point_geoms, **sindex_kws) 322 | 323 | # loop points intersecting other loops 324 | nodes_from_loops_ix, _ = loops.sindex.query(loop_point_geoms, **sindex_kws) 325 | loop_x_loop, n_loop_x_loop = np.unique(nodes_from_loops_ix, return_counts=True) 326 | nodes_from_loops_ix = loop_x_loop[n_loop_x_loop > 1] 327 | 328 | # tease out both varieties 329 | nodes_non_loops = loop_gdf.loc[nodes_from_non_loops_ix] 330 | nodes_loops = loop_gdf.loc[nodes_from_loops_ix] 331 | 332 | return nodes_non_loops.geometry, nodes_loops.geometry 333 | 334 | 335 | def _loops_and_non_loops( 336 | edges: gpd.GeoDataFrame, 337 | ) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: 338 | """Bifurcate edge gdf into loops and non-loops.""" 339 | loop_mask = edges.is_ring 340 | not_loops = edges[~loop_mask] 341 | loops = edges[loop_mask] 342 | return loops, not_loops 343 | 344 | 345 | def remove_interstitial_nodes( 346 | gdf: gpd.GeoSeries | gpd.GeoDataFrame, *, aggfunc: str | dict = "first", **kwargs 347 | ) -> gpd.GeoSeries | gpd.GeoDataFrame: 348 | """Clean topology of existing LineString geometry by removal of nodes of degree 2. 349 | 350 | Returns the original gdf if there’s no node of degree 2. 351 | 352 | Parameters 353 | ---------- 354 | gdf : geopandas.GeoSeries | geopandas.GeoDataFrame 355 | Input edgelines process. If any edges are ``MultiLineString`` they 356 | will be exploded into constituent ``LineString`` components. 357 | aggfunc : str | dict = 'first' 358 | Aggregate function for processing non-spatial component. 359 | **kwargs 360 | Keyword arguments for ``aggfunc``. 361 | 362 | Returns 363 | ------- 364 | geopandas.GeoSeries | geopandas.GeoDataFrame 365 | The original input ``gdf`` if only 1 edgeline, otherwise the processed 366 | edgeline without interstitial nodes. 367 | 368 | Notes 369 | ----- 370 | Any 3D geometries are (potentially) downcast in loops. 371 | """ 372 | 373 | def merge_geometries(block: gpd.GeoSeries) -> shapely.LineString: 374 | """Helper in processing the spatial component.""" 375 | return shapely.line_merge(shapely.GeometryCollection(block.values)) 376 | 377 | if len(gdf) < 2: 378 | return gdf 379 | 380 | if isinstance(gdf, gpd.GeoSeries): 381 | gdf = gdf.to_frame("geometry") 382 | 383 | gdf = gdf.explode(ignore_index=True) 384 | 385 | labels = get_components(gdf.geometry) 386 | 387 | # Process non-spatial component 388 | data = gdf.drop(labels=gdf.geometry.name, axis=1) 389 | aggregated_data = data.groupby(by=labels).agg(aggfunc, **kwargs) 390 | aggregated_data.columns = aggregated_data.columns.to_flat_index() 391 | 392 | # Process spatial component 393 | g = gdf.groupby(group_keys=False, by=labels)[gdf.geometry.name].agg( 394 | merge_geometries 395 | ) 396 | aggregated_geometry = gpd.GeoDataFrame(g, geometry=gdf.geometry.name, crs=gdf.crs) 397 | 398 | # Recombine 399 | aggregated = aggregated_geometry.join(aggregated_data) 400 | 401 | # Derive nodes 402 | nodes = _nodes_from_edges(aggregated.geometry) 403 | # Bifurcate edges into loops and non-loops 404 | loops, not_loops = _loops_and_non_loops(aggregated) 405 | 406 | # Ensure: 407 | # - all loops have exactly 1 endpoint; and 408 | # - that endpoint shares a node with an intersecting line 409 | fixed_loops = [] 410 | fixed_index = [] 411 | node_ix, loop_ix = loops.sindex.query(nodes, predicate="intersects") 412 | for ix in np.unique(loop_ix): 413 | loop_geom = loops.geometry.iloc[ix] 414 | target_nodes = nodes[node_ix[loop_ix == ix]] 415 | if len(target_nodes) == 2: 416 | new_sequence = _rotate_loop_coords(loop_geom, not_loops) 417 | fixed_loops.append(shapely.LineString(new_sequence)) 418 | fixed_index.append(ix) 419 | 420 | aggregated.loc[loops.index[fixed_index], aggregated.geometry.name] = fixed_loops 421 | return aggregated.reset_index(drop=True) 422 | 423 | 424 | def _rotate_loop_coords( 425 | loop_geom: shapely.LineString, not_loops: gpd.GeoDataFrame 426 | ) -> np.ndarray: 427 | """Rotate loop node coordinates if needed to ensure topology.""" 428 | 429 | loop_coords = shapely.get_coordinates(loop_geom) 430 | loop_points = gpd.GeoDataFrame(geometry=shapely.points(loop_coords)) 431 | loop_points_ix, _ = not_loops.sindex.query( 432 | loop_points.geometry, predicate="dwithin", distance=1e-4 433 | ) 434 | 435 | mode = loop_points.loc[loop_points_ix].geometry.mode() 436 | 437 | # if there is a non-planar intersection, we may have multiple points. Check with 438 | # entrypoints only in that case 439 | if mode.shape[0] > 1: 440 | loop_points_ix, _ = not_loops.sindex.query( 441 | loop_points.geometry, predicate="dwithin", distance=1e-4 442 | ) 443 | new_mode = loop_points.loc[loop_points_ix].geometry.mode() 444 | # if that did not help, just pick one to avoid failure and hope for the best 445 | if new_mode.empty | new_mode.shape[0] > 1: 446 | mode = mode.iloc[[0]] 447 | 448 | new_start = mode.get_coordinates().values 449 | _coords_match = (loop_coords == new_start).all(axis=1) 450 | new_start_idx = np.where(_coords_match)[0].squeeze() 451 | 452 | rolled_coords = np.roll(loop_coords[:-1], -new_start_idx, axis=0) 453 | new_sequence = np.append(rolled_coords, rolled_coords[[0]], axis=0) 454 | return new_sequence 455 | 456 | 457 | def fix_topology( 458 | streets: gpd.GeoDataFrame, 459 | *, 460 | eps: float = 1e-4, 461 | **kwargs, 462 | ) -> gpd.GeoDataFrame: 463 | """Fix street network topology. This ensures correct topology of the network by: 464 | 465 | 1. Adding potentially missing nodes... 466 | on intersections of individual LineString endpoints 467 | with the remaining network. The idea behind is that 468 | if a line ends on an intersection with another, there 469 | should be a node on both of them. 470 | 2. Removing nodes of degree 2... 471 | that have no meaning in the network used within our framework. 472 | 3. Removing duplicated geometries (irrespective of orientation). 473 | 474 | Parameters 475 | ---------- 476 | streets : geopandas.GeoDataFrame 477 | Input LineString geometries. 478 | eps : float = 1e-4 479 | Tolerance epsilon for point snapping passed into ``nodes.split()``. 480 | **kwargs : dict 481 | Key word arguments passed into ``remove_interstitial_nodes()``. 482 | 483 | Returns 484 | ------- 485 | gpd.GeoDataFrame 486 | The input streets that now have fixed topology and are ready 487 | to proceed through the simplification algorithm. 488 | """ 489 | streets = streets[~streets.geometry.normalize().duplicated()].copy() 490 | streets_w_nodes = induce_nodes(streets, eps=eps) 491 | return remove_interstitial_nodes(streets_w_nodes, **kwargs) 492 | 493 | 494 | def consolidate_nodes( 495 | gdf: gpd.GeoDataFrame, 496 | *, 497 | tolerance: float = 2.0, 498 | preserve_ends: bool = False, 499 | ) -> gpd.GeoSeries: 500 | """Return geometry with consolidated nodes. 501 | 502 | Replace clusters of nodes with a single node (weighted centroid 503 | of a cluster) and snap linestring geometry to it. Cluster is 504 | defined using hierarchical clustering with average linkage 505 | on coordinates cut at a cophenetic distance equal to ``tolerance``. 506 | 507 | The use of hierachical clustering avoids the chaining effect of a sequence 508 | of intersections within ``tolerance`` from each other that would happen with 509 | DBSCAN and similar solutions. 510 | 511 | Parameters 512 | ---------- 513 | gdf : geopandas.GeoDataFrame 514 | GeoDataFrame with LineStrings (usually representing street network). 515 | tolerance : float = 2.0 516 | The maximum distance between two nodes for one to be considered 517 | as in the neighborhood of the other. Nodes within tolerance are 518 | considered a part of a single cluster and will be consolidated. 519 | preserve_ends : bool = False 520 | If ``True``, nodes of a degree 1 will be excluded from the consolidation. 521 | 522 | Returns 523 | ------- 524 | geopandas.GeoSeries 525 | Updated input ``gdf`` of LineStrings with consolidated nodes. 526 | """ 527 | from scipy.cluster import hierarchy 528 | 529 | if isinstance(gdf, gpd.GeoSeries): 530 | gdf = gdf.to_frame("geometry") 531 | elif isinstance(gdf, np.ndarray): 532 | gdf = gpd.GeoDataFrame(geometry=gdf) 533 | 534 | nodes = _nodes_degrees_from_edges(gdf.geometry) 535 | 536 | if preserve_ends: 537 | # keep at least one meter of original geometry around each end 538 | ends = nodes[nodes["degree"] == 1].buffer(1) 539 | nodes = nodes[nodes["degree"] > 1].copy() 540 | 541 | # if all we have are ends, return the original 542 | # - this is generally when called from within ``geometry._consolidate()`` 543 | if nodes.shape[0] < 2: 544 | gdf["_status"] = "original" 545 | return gdf 546 | 547 | # get clusters of nodes which should be consolidated 548 | # first get components of possible clusters to and then do the linkage itself 549 | # otherwise is dead slow and needs a ton of memory 550 | db = DBSCAN(eps=tolerance, min_samples=2).fit(nodes.get_coordinates()) 551 | comp_labels = db.labels_ 552 | mask = comp_labels > -1 553 | components = comp_labels[mask] 554 | nodes_to_merge = nodes[mask] 555 | 556 | def get_labels(nodes): 557 | linkage = hierarchy.linkage(shapely.get_coordinates(nodes), method="average") 558 | labels = ( 559 | hierarchy.fcluster(linkage, tolerance, criterion="distance").astype(str) 560 | + f"_{nodes.name}" 561 | ) 562 | return labels 563 | 564 | grouped = ( 565 | pd.Series(nodes_to_merge.geometry).groupby(components).transform(get_labels) 566 | ) 567 | nodes["lab"] = grouped 568 | unique, counts = np.unique(nodes["lab"].dropna(), return_counts=True) 569 | actual_clusters = unique[counts > 1] 570 | change = nodes[nodes["lab"].isin(actual_clusters)] 571 | 572 | # no change needed, return the original 573 | if change.empty: 574 | gdf["_status"] = "original" 575 | return gdf 576 | 577 | gdf = gdf.copy() 578 | # get geometry 579 | geom = gdf.geometry.copy() 580 | status = pd.Series("original", index=geom.index) 581 | 582 | # loop over clusters, cut out geometry within tolerance / 2 and replace it 583 | # with spider-like geometry to the weighted centroid of a cluster 584 | spiders = [] 585 | midpoints = [] 586 | 587 | clusters = change.dissolve(change["lab"]) 588 | 589 | # TODO: not optimal but avoids some MultiLineStrings but not all 590 | cookies = clusters.buffer(tolerance / 2).convex_hull 591 | 592 | if preserve_ends: 593 | cookies = cookies.to_frame().overlay(ends.to_frame(), how="difference") 594 | 595 | for cluster, cookie in zip(clusters.geometry, cookies.geometry, strict=True): 596 | inds = geom.sindex.query(cookie, predicate="intersects") 597 | pts = shapely.get_coordinates(geom.iloc[inds].intersection(cookie.boundary)) 598 | if pts.shape[0] > 0: 599 | # TODO: this may result in MultiLineString - we need to avoid that 600 | # TODO: It is temporarily fixed by that explode in return 601 | geom.iloc[inds] = geom.iloc[inds].difference(cookie) 602 | 603 | status.iloc[inds] = "changed" 604 | midpoint = np.mean(shapely.get_coordinates(cluster), axis=0) 605 | midpoints.append(midpoint) 606 | mids = np.array([midpoint] * len(pts)) 607 | 608 | spider = shapely.linestrings( 609 | np.array([pts[:, 0], mids[:, 0]]).T, 610 | y=np.array([pts[:, 1], mids[:, 1]]).T, 611 | ) 612 | spiders.append(spider) 613 | 614 | gdf = gdf.set_geometry(geom) 615 | gdf["_status"] = status 616 | 617 | if spiders: 618 | # combine geometries 619 | geoms = np.hstack(spiders) 620 | gdf = pd.concat([gdf, gpd.GeoDataFrame(geometry=geoms, crs=geom.crs)]) 621 | 622 | agg: dict[str, str | typing.Callable] = {"_status": _status} 623 | for c in gdf.columns.drop(gdf.active_geometry_name): 624 | if c != "_status": 625 | agg[c] = "first" 626 | return remove_interstitial_nodes( 627 | gdf[~gdf.geometry.is_empty].explode(), 628 | # NOTE: this aggfunc needs to be able to process all the columns 629 | aggfunc=agg, 630 | ) 631 | -------------------------------------------------------------------------------- /neatnet/simplify.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import typing 3 | import warnings 4 | 5 | import geopandas as gpd 6 | import numpy as np 7 | import pandas as pd 8 | import shapely 9 | from libpysal import graph 10 | from scipy import sparse 11 | 12 | from .artifacts import ( 13 | get_artifacts, 14 | n1_g1_identical, 15 | nx_gx, 16 | nx_gx_cluster, 17 | nx_gx_identical, 18 | ) 19 | from .continuity import continuity, get_stroke_info 20 | from .nodes import ( 21 | _nodes_degrees_from_edges, 22 | _nodes_from_edges, 23 | _status, 24 | consolidate_nodes, 25 | fix_topology, 26 | induce_nodes, 27 | remove_interstitial_nodes, 28 | split, 29 | ) 30 | 31 | DEBUGGING = False 32 | 33 | logger = logging.getLogger(__name__) 34 | 35 | 36 | def _check_input_crs(streets: gpd.GeoDataFrame, exclusion_mask: gpd.GeoSeries): 37 | """Ensure input data is in appropriate Coordinate reference systems.""" 38 | 39 | streets_crs = streets.crs 40 | streets_has_crs = streets_crs is not None 41 | 42 | if not streets_has_crs: 43 | warnings.warn( 44 | ( 45 | "The input `streets` data does not have an assigned " 46 | "coordinate reference system. Assuming a projected CRS in meters." 47 | ), 48 | category=UserWarning, 49 | stacklevel=2, 50 | ) 51 | 52 | else: 53 | if not streets_crs.is_projected: 54 | raise ValueError( 55 | "The input `streets` data are not in a projected " 56 | "coordinate reference system. Reproject and rerun." 57 | ) 58 | 59 | if streets_crs.axis_info[0].unit_name != "metre": 60 | warnings.warn( 61 | ( 62 | "The input `streets` data coordinate reference system is projected " 63 | "but not in meters. All `neatnet` defaults assume meters. " 64 | "Either reproject and rerun or proceed with caution." 65 | ), 66 | category=UserWarning, 67 | stacklevel=2, 68 | ) 69 | 70 | if exclusion_mask is not None and exclusion_mask.crs != streets_crs: 71 | raise ValueError( 72 | "The input `streets` and `exclusion_mask` data are in " 73 | "different coordinate reference systems. Reproject and rerun." 74 | ) 75 | 76 | 77 | def _link_nodes_artifacts( 78 | step: str, 79 | streets: gpd.GeoDataFrame, 80 | artifacts: gpd.GeoDataFrame, 81 | eps: None | float, 82 | ) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: 83 | """Helper to prep nodes & artifacts when simplifying singletons & pairs.""" 84 | 85 | # Get nodes from the network 86 | nodes = _nodes_degrees_from_edges(streets.geometry) 87 | 88 | if step == "singletons": 89 | node_geom = nodes.geometry 90 | sindex_kwargs = {"predicate": "dwithin", "distance": eps} 91 | else: 92 | node_geom = nodes.buffer(0.1) 93 | sindex_kwargs = {"predicate": "intersects"} 94 | 95 | # Link nodes to artifacts 96 | node_idx, artifact_idx = artifacts.sindex.query(node_geom, **sindex_kwargs) 97 | 98 | intersects = sparse.coo_array( 99 | ([True] * len(node_idx), (node_idx, artifact_idx)), 100 | shape=(len(nodes), len(artifacts)), 101 | dtype=np.bool_, 102 | ) 103 | 104 | # Compute number of nodes per artifact 105 | artifacts["node_count"] = intersects.sum(axis=0) 106 | 107 | return nodes, artifacts 108 | 109 | 110 | def _classify_strokes( 111 | artifacts: gpd.GeoDataFrame, streets: gpd.GeoDataFrame 112 | ) -> gpd.GeoDataFrame: 113 | """Classify artifacts with ``{C,E,S}`` typology.""" 114 | 115 | strokes, c_, e_, s_ = get_stroke_info(artifacts, streets) 116 | 117 | artifacts["stroke_count"] = strokes 118 | artifacts["C"] = c_ 119 | artifacts["E"] = e_ 120 | artifacts["S"] = s_ 121 | 122 | return artifacts 123 | 124 | 125 | def _identify_non_planar( 126 | artifacts: gpd.GeoDataFrame, streets: gpd.GeoDataFrame 127 | ) -> gpd.GeoDataFrame: 128 | """Filter artifacts caused by non-planar intersections.""" 129 | 130 | # Note from within `neatify_singletons()` 131 | # TODO: This is not perfect. 132 | # TODO: Some 3CC artifacts were non-planar but not captured here. 133 | 134 | artifacts["non_planar"] = artifacts["stroke_count"] > artifacts["node_count"] 135 | a_idx, r_idx = streets.sindex.query( 136 | artifacts.geometry.boundary, predicate="overlaps" 137 | ) 138 | artifacts.iloc[np.unique(a_idx), artifacts.columns.get_loc("non_planar")] = True 139 | 140 | return artifacts 141 | 142 | 143 | def neatify_singletons( 144 | artifacts: gpd.GeoDataFrame, 145 | streets: gpd.GeoDataFrame, 146 | *, 147 | max_segment_length: float | int = 1, 148 | compute_coins: bool = True, 149 | min_dangle_length: float | int = 10, 150 | eps: float = 1e-4, 151 | clip_limit: float | int = 2, 152 | simplification_factor: float | int = 2, 153 | consolidation_tolerance: float | int = 10, 154 | ) -> gpd.GeoDataFrame: 155 | """Simplification of singleton face artifacts – the first simplification step in 156 | the procedure detailed in ``simplify.neatify_loop()``. 157 | 158 | This process extracts nodes from network edges before computing and labeling 159 | face artifacts with a ``{C, E, S}`` typology through ``momepy.COINS`` via the 160 | constituent street geometries. 161 | 162 | Next, each artifact is iterated over and constituent line geometries are either 163 | dropped or added in the following order of typologies: 164 | 165 | 1. 1 node and 1 continuity group 166 | 2. more than 1 node and 1 or more identical continuity groups 167 | 3. 2 or more nodes and 2 or more continuity groups 168 | 169 | Non-planar geometries are ignored. 170 | 171 | Parameters 172 | ---------- 173 | artifacts : geopandas.GeoDataFrame 174 | Face artifact polygons. 175 | streets : geopandas.GeoDataFrame 176 | Preprocessed street network data. 177 | max_segment_length : float | int = 1 178 | Additional nodes will be added so that all line segments 179 | are no longer than this value. Must be greater than 0. 180 | Used in multiple internal geometric operations. 181 | compute_coins : bool = True 182 | Flag for computing and labeling artifacts with a ``{C, E, S}`` typology through 183 | ``momepy.COINS`` via the constituent street geometries. 184 | min_dangle_length : float | int = 10 185 | The threshold for determining if linestrings are dangling slivers to be 186 | removed or not. 187 | eps : float = 1e-4 188 | Tolerance epsilon used in multiple internal geometric operations. 189 | clip_limit : float | int = 2 190 | Following generation of the Voronoi linework, we clip to fit inside the 191 | polygon. To ensure we get a space to make proper topological connections 192 | from the linework to the actual points on the edge of the polygon, we clip 193 | using a polygon with a negative buffer of ``clip_limit`` or the radius of 194 | maximum inscribed circle, whichever is smaller. 195 | simplification_factor : float | int = 2 196 | The factor by which singles, pairs, and clusters are simplified. The 197 | ``max_segment_length`` is multiplied by this factor to get the 198 | simplification epsilon. 199 | consolidation_tolerance : float | int = 10 200 | Tolerance passed to node consolidation when generating Voronoi skeletons. 201 | 202 | Returns 203 | ------- 204 | geopandas.GeoDataFrame 205 | The street network line data following the singleton procedure. 206 | """ 207 | 208 | # Extract network nodes and relate to artifacts 209 | nodes, artifacts = _link_nodes_artifacts("singletons", streets, artifacts, eps) 210 | 211 | # Compute number of stroke groups per artifact 212 | if compute_coins: 213 | streets, _ = continuity(streets) 214 | artifacts = _classify_strokes(artifacts, streets) 215 | 216 | # Filter artifacts caused by non-planar intersections 217 | artifacts = _identify_non_planar(artifacts, streets) 218 | 219 | # Count intersititial nodes (primes) 220 | _prime_count = artifacts["node_count"] - artifacts[["C", "E", "S"]].sum(axis=1) 221 | artifacts["interstitial_nodes"] = _prime_count 222 | 223 | # Define the type label 224 | ces_type = [] 225 | for x in artifacts[["node_count", "C", "E", "S"]].itertuples(): 226 | ces_type.append(f"{x.node_count}{'C' * x.C}{'E' * x.E}{'S' * x.S}") 227 | artifacts["ces_type"] = ces_type 228 | 229 | # Collect changes 230 | to_drop: list[int] = [] 231 | to_add: list[int] = [] 232 | split_points: list[shapely.Point] = [] 233 | 234 | # Isolate planar artifacts 235 | planar = artifacts[~artifacts["non_planar"]].copy() 236 | planar["buffered"] = planar.buffer(eps) 237 | if artifacts["non_planar"].any(): 238 | logger.debug(f"IGNORING {artifacts.non_planar.sum()} non planar artifacts") 239 | 240 | # Iterate over each singleton planar artifact and simplify based on typology 241 | for artifact in planar.itertuples(): 242 | n_nodes = artifact.node_count 243 | n_strokes = artifact.stroke_count 244 | cestype = artifact.ces_type 245 | 246 | # Get edges relevant for an artifact 247 | edges = streets.iloc[ 248 | streets.sindex.query(artifact.buffered, predicate="covers") 249 | ] 250 | 251 | # Dispatch by typology 252 | try: 253 | # 1 node and 1 continuity group 254 | if (n_nodes == 1) and (n_strokes == 1): 255 | logger.debug("FUNCTION n1_g1_identical") 256 | n1_g1_identical( 257 | edges, 258 | to_drop=to_drop, 259 | to_add=to_add, 260 | geom=artifact.geometry, 261 | max_segment_length=max_segment_length, 262 | clip_limit=clip_limit, 263 | ) 264 | # More than 1 node and 1 or more identical continuity groups 265 | elif (n_nodes > 1) and (len(set(cestype[1:])) == 1): 266 | logger.debug("FUNCTION nx_gx_identical") 267 | nx_gx_identical( 268 | edges, 269 | geom=artifact.geometry, 270 | to_add=to_add, 271 | to_drop=to_drop, 272 | nodes=nodes, 273 | angle=75, 274 | max_segment_length=max_segment_length, 275 | clip_limit=clip_limit, 276 | consolidation_tolerance=consolidation_tolerance, 277 | ) 278 | # 2 or more nodes and 2 or more continuity groups 279 | elif (n_nodes > 1) and (len(cestype) > 2): 280 | logger.debug("FUNCTION nx_gx") 281 | nx_gx( 282 | edges, 283 | artifact=artifact, 284 | to_drop=to_drop, 285 | to_add=to_add, 286 | split_points=split_points, 287 | nodes=nodes, 288 | max_segment_length=max_segment_length, 289 | clip_limit=clip_limit, 290 | min_dangle_length=min_dangle_length, 291 | consolidation_tolerance=consolidation_tolerance, 292 | ) 293 | else: 294 | logger.debug("NON PLANAR") 295 | except Exception as e: 296 | if DEBUGGING: 297 | raise e 298 | warnings.warn( 299 | f"An error occured at location {artifact.geometry.centroid}. " 300 | f"The artifact has not been simplified. The original message:\n{e}", 301 | UserWarning, 302 | stacklevel=2, 303 | ) 304 | 305 | cleaned_streets = streets.drop(to_drop) 306 | # split lines on new nodes 307 | cleaned_streets = split(split_points, streets.drop(to_drop), streets.crs) 308 | 309 | if to_add: 310 | # Create new streets with fixed geometry. 311 | # Note: ``to_add`` and ``to_drop`` lists shall be global and 312 | # this step should happen only once, not for every artifact 313 | _add_merged = gpd.GeoSeries(to_add).line_merge() 314 | new = gpd.GeoDataFrame(geometry=_add_merged, crs=streets.crs).explode() 315 | new = new[~new.normalize().duplicated()].copy() 316 | new["_status"] = "new" 317 | new.geometry = new.simplify(max_segment_length * simplification_factor) 318 | new_streets = pd.concat([cleaned_streets, new], ignore_index=True) 319 | agg: dict[str, str | typing.Callable] = {"_status": _status} 320 | for c in cleaned_streets.columns.drop(cleaned_streets.active_geometry_name): 321 | if c != "_status": 322 | agg[c] = "first" 323 | non_empties = new_streets[~(new_streets.is_empty | new_streets.geometry.isna())] 324 | new_streets = remove_interstitial_nodes(non_empties, aggfunc=agg) 325 | 326 | final = new_streets 327 | else: 328 | final = cleaned_streets 329 | 330 | if "coins_group" in final.columns: 331 | final = final.drop( 332 | columns=[c for c in streets.columns if c.startswith("coins_")] 333 | ) 334 | return final 335 | 336 | 337 | def neatify_pairs( 338 | artifacts: gpd.GeoDataFrame, 339 | streets: gpd.GeoDataFrame, 340 | *, 341 | max_segment_length: float | int = 1, 342 | min_dangle_length: float | int = 20, 343 | clip_limit: float | int = 2, 344 | simplification_factor: float | int = 2, 345 | consolidation_tolerance: float | int = 10, 346 | ) -> gpd.GeoDataFrame: 347 | """Simplification of pairs of face artifacts – the second simplification step in 348 | the procedure detailed in ``simplify.neatify_loop()``. 349 | 350 | This process extracts nodes from network edges before identifying non-planarity 351 | and cluster information. 352 | 353 | If paired artifacts are present we further classify them as grouped by 354 | first vs. last instance of duplicated component label, and whether 355 | or not to be simplified with the clustered process. 356 | 357 | Finally, simplification is performed based on the following order of typologies: 358 | 1. Singletons – merged pairs & first instance (w/o COINS) 359 | 2. Singletons – Second instance – w/ COINS 360 | 3. Clusters 361 | 362 | Parameters 363 | ---------- 364 | artifacts : geopandas.GeoDataFrame 365 | Face artifact polygons. 366 | streets : geopandas.GeoDataFrame 367 | Preprocessed street network data. 368 | max_segment_length : float | int = 1 369 | Additional vertices will be added so that all line segments 370 | are no longer than this value. Must be greater than 0. 371 | Used in multiple internal geometric operations. 372 | min_dangle_length : float | int = 20 373 | The threshold for determining if linestrings are dangling slivers to be 374 | removed or not. 375 | clip_limit : float | int = 2 376 | Following generation of the Voronoi linework, we clip to fit inside the 377 | polygon. To ensure we get a space to make proper topological connections 378 | from the linework to the actual points on the edge of the polygon, we clip 379 | using a polygon with a negative buffer of ``clip_limit`` or the radius of 380 | maximum inscribed circle, whichever is smaller. 381 | simplification_factor : float | int = 2 382 | The factor by which singles, pairs, and clusters are simplified. The 383 | ``max_segment_length`` is multiplied by this factor to get the 384 | simplification epsilon. 385 | consolidation_tolerance : float | int = 10 386 | Tolerance passed to node consolidation when generating Voronoi skeletons. 387 | 388 | Returns 389 | ------- 390 | geopandas.GeoDataFrame 391 | The street network line data following the pairs procedure. 392 | """ 393 | 394 | # Extract network nodes and relate to artifacts 395 | nodes, artifacts = _link_nodes_artifacts("pairs", streets, artifacts, None) 396 | 397 | # Compute number of stroke groups per artifact 398 | streets, _ = continuity(streets) 399 | artifacts = _classify_strokes(artifacts, streets) 400 | 401 | # Filter artifacts caused by non-planar intersections 402 | artifacts = _identify_non_planar(artifacts, streets) 403 | 404 | # Identify non-planar clusters 405 | _id_np = lambda x: sum(artifacts.loc[artifacts["comp"] == x.comp]["non_planar"]) # noqa: E731 406 | artifacts["non_planar_cluster"] = artifacts.apply(_id_np, axis=1) 407 | # Subset non-planar clusters and planar artifacts 408 | np_clusters = artifacts[artifacts.non_planar_cluster > 0] 409 | artifacts_planar = artifacts[artifacts.non_planar_cluster == 0] 410 | 411 | # Isolate planar artifacts 412 | _planar_grouped = artifacts_planar.groupby("comp")[artifacts_planar.columns] 413 | _solutions = _planar_grouped.apply(get_solution, streets=streets) 414 | artifacts_w_info = artifacts.merge(_solutions, left_on="comp", right_index=True) 415 | 416 | # Isolate non-planar clusters of value 2 – e.g., artifact under highway 417 | _np_clust_2 = np_clusters["non_planar_cluster"] == 2 418 | artifacts_under_np = np_clusters[_np_clust_2].dissolve("comp", as_index=False) 419 | 420 | # Determine typology dispatch if artifacts are present 421 | if not artifacts_w_info.empty: 422 | agg = { 423 | "coins_group": "first", 424 | "coins_end": lambda x: x.any(), 425 | "_status": _status, 426 | } 427 | for c in streets.columns.drop( 428 | [streets.active_geometry_name, "coins_count"], errors="ignore" 429 | ): 430 | if c not in agg: 431 | agg[c] = "first" 432 | 433 | sol_drop = "solution == 'drop_interline'" 434 | sol_iter = "solution == 'iterate'" 435 | 436 | # Determine artifacts and street edges to drop 437 | _to_drop = artifacts_w_info.drop_duplicates("comp").query(sol_drop).drop_id 438 | _drop_streets = streets.drop(_to_drop.dropna().values) 439 | 440 | # Re-run node cleaning on subset of fresh street edges 441 | streets_cleaned = remove_interstitial_nodes( 442 | _drop_streets, 443 | aggfunc=agg, 444 | ) 445 | 446 | # Isolate drops to create merged pairs 447 | merged_pairs = artifacts_w_info.query(sol_drop).dissolve("comp", as_index=False) 448 | 449 | # Sort artifacts by their node count low-to-high 450 | sorted_node_count = artifacts_w_info.sort_values("node_count", ascending=False) 451 | 452 | # Isolate artifacts to process as singletons – first instance 453 | _1st = sorted_node_count.query(sol_iter).drop_duplicates("comp", keep="first") 454 | _planar_clusters = np_clusters[~np_clusters["non_planar"]] 455 | _1st = pd.concat([_1st, _planar_clusters], ignore_index=True) 456 | 457 | # Isolate artifacts to process as singletons – last instance 458 | _2nd = sorted_node_count.query(sol_iter).drop_duplicates("comp", keep="last") 459 | 460 | # Isolate artifacts to process as clusters 461 | for_skeleton = artifacts_w_info.query("solution == 'skeleton'") 462 | 463 | # Otherwise instantiate artifact containers as empty 464 | else: 465 | merged_pairs = pd.DataFrame() 466 | _1st = pd.DataFrame() 467 | _2nd = pd.DataFrame() 468 | for_skeleton = pd.DataFrame() 469 | streets_cleaned = streets 470 | 471 | # Generate counts of COINs groups for edges 472 | coins_count = ( 473 | streets_cleaned.groupby("coins_group", as_index=False) 474 | .geometry.count() 475 | .rename(columns={"geometry": "coins_count"}) 476 | ) 477 | streets_cleaned = streets_cleaned.merge(coins_count, on="coins_group", how="left") 478 | 479 | # Add under non-planars to cluster dispatcher 480 | if not artifacts_under_np.empty: 481 | for_skeleton = pd.concat([for_skeleton, artifacts_under_np]) 482 | 483 | # Dispatch singleton simplifier 484 | if not merged_pairs.empty or not _1st.empty: 485 | # Merged pairs & first instance – w/o COINS 486 | streets_cleaned = neatify_singletons( 487 | pd.concat([merged_pairs, _1st]), 488 | streets_cleaned, 489 | max_segment_length=max_segment_length, 490 | clip_limit=clip_limit, 491 | compute_coins=False, 492 | min_dangle_length=min_dangle_length, 493 | simplification_factor=simplification_factor, 494 | consolidation_tolerance=consolidation_tolerance, 495 | ) 496 | # Second instance – w/ COINS 497 | if not _2nd.empty: 498 | streets_cleaned = neatify_singletons( 499 | _2nd, 500 | streets_cleaned, 501 | max_segment_length=max_segment_length, 502 | clip_limit=clip_limit, 503 | compute_coins=True, 504 | min_dangle_length=min_dangle_length, 505 | simplification_factor=simplification_factor, 506 | consolidation_tolerance=consolidation_tolerance, 507 | ) 508 | 509 | # Dispatch cluster simplifier 510 | if not for_skeleton.empty: 511 | streets_cleaned = neatify_clusters( 512 | for_skeleton, 513 | streets_cleaned, 514 | max_segment_length=max_segment_length, 515 | simplification_factor=simplification_factor, 516 | min_dangle_length=min_dangle_length, 517 | consolidation_tolerance=consolidation_tolerance, 518 | ) 519 | 520 | return streets_cleaned 521 | 522 | 523 | def neatify_clusters( 524 | artifacts: gpd.GeoDataFrame, 525 | streets: gpd.GeoDataFrame, 526 | *, 527 | max_segment_length: float | int = 1, 528 | eps: float = 1e-4, 529 | simplification_factor: float | int = 2, 530 | min_dangle_length: float | int = 20, 531 | consolidation_tolerance: float | int = 10, 532 | ) -> gpd.GeoDataFrame: 533 | """Simplification of clusters of face artifacts – the third simplification step in 534 | the procedure detailed in ``simplify.neatify_loop()``. 535 | 536 | This process extracts nodes from network edges before iterating over each 537 | cluster artifact and performing simplification. 538 | 539 | Parameters 540 | ---------- 541 | artifacts : geopandas.GeoDataFrame 542 | Face artifact polygons. 543 | streets : geopandas.GeoDataFrame 544 | Preprocessed street network data. 545 | max_segment_length : float | int = 1 546 | Additional vertices will be added so that all line segments 547 | are no longer than this value. Must be greater than 0. 548 | Used in multiple internal geometric operations. 549 | eps : float = 1e-4 550 | Tolerance epsilon used in multiple internal geometric operations. 551 | simplification_factor : float | int = 2 552 | The factor by which singles, pairs, and clusters are simplified. The 553 | ``max_segment_length`` is multiplied by this factor to get the 554 | simplification epsilon. 555 | min_dangle_length : float | int = 20 556 | The threshold for determining if linestrings are dangling slivers to be 557 | removed or not. 558 | consolidation_tolerance : float | int = 10 559 | Tolerance passed to node consolidation when generating Voronoi skeletons. 560 | 561 | Returns 562 | ------- 563 | geopandas.GeoDataFrame 564 | The street network line data following the clusters procedure. 565 | """ 566 | 567 | # Get nodes from the network 568 | nodes = gpd.GeoSeries(_nodes_from_edges(streets.geometry)) 569 | 570 | # Collect changes 571 | to_drop: list[int] = [] 572 | to_add: list[int] = [] 573 | 574 | for _, artifact in artifacts.groupby("comp"): 575 | # Get artifact cluster polygon 576 | cluster_geom = artifact.union_all() 577 | # Get edges relevant for an artifact 578 | edges = streets.iloc[ 579 | streets.sindex.query(cluster_geom, predicate="intersects") 580 | ].copy() 581 | 582 | # Clusters of 2 or more nodes and 2 or more continuity groups 583 | nx_gx_cluster( 584 | edges=edges, 585 | cluster_geom=cluster_geom, 586 | nodes=nodes, 587 | to_drop=to_drop, 588 | to_add=to_add, 589 | eps=eps, 590 | max_segment_length=max_segment_length, 591 | min_dangle_length=min_dangle_length, 592 | consolidation_tolerance=consolidation_tolerance, 593 | ) 594 | 595 | cleaned_streets = streets.drop(to_drop) 596 | 597 | # Create new street with fixed geometry. 598 | # Note: ``to_add`` and ``to_drop`` lists shall be global and 599 | # this step should happen only once, not for every artifact 600 | new = gpd.GeoDataFrame(geometry=to_add, crs=streets.crs) 601 | new["_status"] = "new" 602 | new["geometry"] = new.line_merge().simplify( 603 | max_segment_length * simplification_factor 604 | ) 605 | new_streets = pd.concat([cleaned_streets, new], ignore_index=True).explode() 606 | agg: dict[str, str | typing.Callable] = {"_status": _status} 607 | for c in new_streets.columns.drop(new_streets.active_geometry_name): 608 | if c != "_status": 609 | agg[c] = "first" 610 | new_streets = remove_interstitial_nodes( 611 | new_streets[~new_streets.is_empty], aggfunc=agg 612 | ).drop_duplicates("geometry") 613 | 614 | return new_streets 615 | 616 | 617 | def get_type(edges: gpd.GeoDataFrame, shared_edge: int) -> str: 618 | """Classify artifact edges according to the ``{C, E, S}`` 619 | schema when considering solutions for pairs of artifacts. 620 | 621 | Parameters 622 | ---------- 623 | edges : geopandas.GeoDataFrame 624 | Artifact edges in consideration. 625 | shared_edge : int 626 | The index location of the shared edge of the pair. 627 | 628 | Returns 629 | ------- 630 | str 631 | Classification for an edge in ``{C, E, S}``. 632 | """ 633 | 634 | if ( # Roundabout special case 635 | edges["coins_group"].nunique() == 1 636 | and edges.shape[0] == edges["coins_count"].iloc[0] 637 | ): 638 | return "S" 639 | 640 | all_ends = edges[edges["coins_end"]] 641 | mains = edges[~edges["coins_group"].isin(all_ends["coins_group"])] 642 | shared = edges.loc[shared_edge] 643 | 644 | if shared_edge in mains.index: 645 | return "C" 646 | 647 | if shared["coins_count"] == (edges["coins_group"] == shared["coins_group"]).sum(): 648 | return "S" 649 | 650 | return "E" 651 | 652 | 653 | def get_solution(group: gpd.GeoDataFrame, streets: gpd.GeoDataFrame) -> pd.Series: 654 | """Determine the solution for paired planar artifacts. 655 | 656 | Parameters 657 | ---------- 658 | group : geopandas.GeoDataFrame 659 | Dissolved group of connected planar artifacts. 660 | streets : geopandas.GeoDataFrame 661 | Street network data. 662 | 663 | Returns 664 | ------- 665 | pandas.Series 666 | The determined solution and edge to drop. 667 | """ 668 | 669 | def _relate(loc: int) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: 670 | """Isolate intersecting & covering street geometries.""" 671 | _geom = group.geometry.iloc[loc] 672 | _streets = streets.iloc[streets.sindex.query(_geom, predicate="intersects")] 673 | _covers = _streets.iloc[_streets.sindex.query(_geom, predicate="covers")] 674 | return _streets, _covers 675 | 676 | cluster_geom = group.union_all() 677 | 678 | streets_a, covers_a = _relate(0) 679 | streets_b, covers_b = _relate(1) 680 | 681 | # Find the street segment that is contained within the cluster geometry 682 | shared = streets.index[streets.sindex.query(cluster_geom, predicate="contains")] 683 | 684 | if shared.empty or covers_a.empty or covers_b.empty: 685 | return pd.Series({"solution": "non_planar", "drop_id": None}) 686 | 687 | shared = shared.item() 688 | 689 | if (np.invert(streets_b.index.isin(covers_a.index)).sum() == 1) or ( 690 | np.invert(streets_a.index.isin(covers_b.index)).sum() == 1 691 | ): 692 | return pd.Series({"solution": "drop_interline", "drop_id": shared}) 693 | 694 | seen_by_a = get_type(covers_a, shared) 695 | seen_by_b = get_type(covers_b, shared) 696 | 697 | if seen_by_a == "C" and seen_by_b == "C": 698 | return pd.Series({"solution": "iterate", "drop_id": shared}) 699 | 700 | if seen_by_a == seen_by_b: 701 | return pd.Series({"solution": "drop_interline", "drop_id": shared}) 702 | 703 | return pd.Series({"solution": "skeleton", "drop_id": shared}) 704 | 705 | 706 | def neatify( 707 | streets: gpd.GeoDataFrame, 708 | *, 709 | exclusion_mask: None | gpd.GeoSeries = None, 710 | predicate: str = "intersects", 711 | max_segment_length: float | int = 1, 712 | min_dangle_length: float | int = 20, 713 | clip_limit: float | int = 2, 714 | simplification_factor: float | int = 2, 715 | consolidation_tolerance: float | int = 10, 716 | artifact_threshold: None | float | int = None, 717 | artifact_threshold_fallback: float | int = 7, 718 | area_threshold_blocks: float | int = 1e5, 719 | isoareal_threshold_blocks: float | int = 0.5, 720 | area_threshold_circles: float | int = 5e4, 721 | isoareal_threshold_circles_enclosed: float | int = 0.75, 722 | isoperimetric_threshold_circles_touching: float | int = 0.9, 723 | eps: float = 1e-4, 724 | n_loops: int = 2, 725 | ) -> gpd.GeoDataFrame: 726 | """Top-level workflow for simplifying street networks. The input raw street network 727 | data, which must be in a projected coordinate reference system and is expected to be 728 | in meters, is first preprocessed (topological corrections & node consolidation) 729 | before two iterations of artifact detection and simplification. 730 | 731 | Each iteration of the simplification procedure which includes (1.) the removal 732 | of false nodes; (2.) face artifact classification; and (3.) the line-based 733 | simplification of face artifacts in the order of single artifacts, pairs of 734 | artifacts, clusters of artifacts. 735 | 736 | For further information on face artifact detection and extraction 737 | see :cite:`fleischmann_shape-based_2024`. 738 | 739 | This algorithm is designed for use with only "street" network geometries as input. 740 | While passing in other types of pathing (e.g., sidewalks, canals) will likely yield 741 | valid geometric results, that behavior is untested. 742 | 743 | Parameters 744 | ---------- 745 | streets : geopandas.GeoDataFrame 746 | Raw street network data. This input *must* be in a projected coordinate 747 | reference system and *should* be in meters. All defaults arguments assume 748 | meters. The internal algorithm is designed for use with street network 749 | geometries, not other types of pathing (e.g., sidewalks, canals), which 750 | should be filtered out. 751 | exclusion_mask : None | geopandas.GeoSeries = None 752 | Geometries used to determine face artifacts to exclude from returned output. 753 | predicate : str = 'intersects' 754 | The spatial predicate used to exclude face artifacts from returned output. 755 | max_segment_length : float | int = 1 756 | Additional vertices will be added so that all line segments 757 | are no longer than this value. Must be greater than 0. 758 | Used in multiple internal geometric operations. 759 | min_dangle_length : float | int 760 | The threshold for determining if linestrings are dangling slivers to be 761 | removed or not. 762 | clip_limit : float | int = 2 763 | Following generation of the Voronoi linework, we clip to fit inside the 764 | polygon. To ensure we get a space to make proper topological connections 765 | from the linework to the actual points on the edge of the polygon, we clip 766 | using a polygon with a negative buffer of ``clip_limit`` or the radius of 767 | maximum inscribed circle, whichever is smaller. 768 | simplification_factor : float | int = 2 769 | The factor by which singles, pairs, and clusters are simplified. The 770 | ``max_segment_length`` is multiplied by this factor to get the 771 | simplification epsilon. 772 | consolidation_tolerance : float | int = 10 773 | Tolerance passed to node consolidation when generating Voronoi skeletons. 774 | artifact_threshold : None | float | int = None 775 | When ``artifact_threshold`` is passed, the computed value from 776 | ``momepy.FaceArtifacts.threshold`` is not used in favor of the 777 | given value. This is useful for small networks where artifact 778 | detection may fail or become unreliable. 779 | artifact_threshold_fallback : float | int = 7 780 | If artifact threshold detection fails, this value is used as a fallback. 781 | area_threshold_blocks : float | int = 1e5 782 | This is the first threshold for detecting block-like artifacts whose 783 | Face Artifact Index (see :cite:`fleischmann_shape-based_2024`) is above 784 | the value passed in ``artifact_threshold``. 785 | If a polygon has an area below ``area_threshold_blocks``, *and* 786 | is of elongated shape (see also ``isoareal_threshold_blocks``), 787 | *and* touches at least one polygon that has already been classified as artifact, 788 | then it will be classified as an artifact. 789 | isoareal_threshold_blocks : float | int = 0.5 790 | This is the second threshold for detecting block-like artifacts whose 791 | Face Artifact Index (see :cite:`fleischmann_shape-based_2024`) is above the 792 | value passed in ``artifact_threshold``. If a polygon has an isoareal quotient 793 | below ``isoareal_threshold_blocks`` (see ``esda.shape.isoareal_quotient``), 794 | i.e., if it has an elongated shape; *and* it has a sufficiently small area 795 | (see also ``area_threshold_blocks``), *and* if it touches at least one 796 | polygon that has already been detected as an artifact, 797 | then it will be classified as an artifact. 798 | area_threshold_circles : float | int = 5e4 799 | This is the first threshold for detecting circle-like artifacts whose 800 | Face Artifact Index (see :cite:`fleischmann_shape-based_2024`) is above the 801 | value passed in ``artifact_threshold``. If a polygon has an area below 802 | ``area_threshold_circles``, *and* one of the following 2 cases is given: 803 | (a) the polygon is touched, but not enclosed by polygons already classified 804 | as artifacts, *and* with an isoperimetric quotient 805 | (see ``esda.shape.isoperimetric_quotient``) 806 | above ``isoperimetric_threshold_circles_touching``, i.e., if its shape 807 | is close to circular; or (b) the polygon is fully enclosed by polygons 808 | already classified as artifacts, *and* with an isoareal quotient 809 | above 810 | ``isoareal_threshold_circles_enclosed``, i.e., if its shape is 811 | close to circular; then it will be classified as an artifact. 812 | isoareal_threshold_circles_enclosed : float | int = 0.75 813 | This is the second threshold for detecting circle-like artifacts whose 814 | Face Artifact Index (see :cite:`fleischmann_shape-based_2024`) is above the 815 | value passed in ``artifact_threshold``. If a polygon has a sufficiently small 816 | area (see also ``area_threshold_circles``), *and* the polygon is 817 | fully enclosed by polygons already classified as artifacts, 818 | *and* its isoareal quotient (see ``esda.shape.isoareal_quotient``) 819 | is above the value passed to ``isoareal_threshold_circles_enclosed``, 820 | i.e., if its shape is close to circular; 821 | then it will be classified as an artifact. 822 | isoperimetric_threshold_circles_touching : float | int = 0.9 823 | This is the third threshold for detecting circle-like artifacts whose 824 | Face Artifact Index (see :cite:`fleischmann_shape-based_2024`) 825 | is above the value passed in ``artifact_threshold``. 826 | If a polygon has a sufficiently small area 827 | (see also ``area_threshold_circles``), *and* the polygon is touched 828 | by at least one polygon already classified as artifact, 829 | *and* its isoperimetric quotient (see ``esda.shape.isoperimetric_quotient``) 830 | is above the value passed to ``isoperimetric_threshold_circles_touching``, 831 | i.e., if its shape is close to circular; 832 | then it will be classified as an artifact. 833 | eps : float = 1e-4 834 | Tolerance epsilon used in multiple internal geometric operations. 835 | n_loops : int = 2 836 | Number of loops through the simplification pipeline. It is recommended to stick 837 | to the default value and increase it only very conservatively. 838 | 839 | Returns 840 | ------- 841 | geopandas.GeoDataFrame 842 | The final, simplified street network line data. 843 | 844 | Notes 845 | ----- 846 | As is noted above, the input network data must be in a projected coordinate 847 | reference system and is expected to be in meters. However, it may be possible to 848 | work with network data projected in feet if all default arguments are adjusted. 849 | """ 850 | 851 | _check_input_crs(streets, exclusion_mask) 852 | 853 | streets = fix_topology(streets, eps=eps) 854 | 855 | # Merge nearby nodes (up to double of distance used in skeleton). 856 | streets = consolidate_nodes(streets, tolerance=max_segment_length * 2.1) 857 | 858 | # Identify artifacts 859 | artifacts, threshold = get_artifacts( 860 | streets, 861 | exclusion_mask=exclusion_mask, 862 | predicate=predicate, 863 | threshold=artifact_threshold, 864 | threshold_fallback=artifact_threshold_fallback, 865 | area_threshold_blocks=area_threshold_blocks, 866 | isoareal_threshold_blocks=isoareal_threshold_blocks, 867 | area_threshold_circles=area_threshold_circles, 868 | isoareal_threshold_circles_enclosed=isoareal_threshold_circles_enclosed, 869 | isoperimetric_threshold_circles_touching=isoperimetric_threshold_circles_touching, 870 | ) 871 | 872 | # Loop 1 873 | new_streets = neatify_loop( 874 | streets, 875 | artifacts, 876 | max_segment_length=max_segment_length, 877 | min_dangle_length=min_dangle_length, 878 | clip_limit=clip_limit, 879 | simplification_factor=simplification_factor, 880 | consolidation_tolerance=consolidation_tolerance, 881 | eps=eps, 882 | ) 883 | 884 | # This is potentially fixing some minor erroneous edges coming from Voronoi 885 | new_streets = induce_nodes(new_streets, eps=eps) 886 | new_streets = new_streets[~new_streets.geometry.normalize().duplicated()].copy() 887 | 888 | for _ in range(2, n_loops + 1): 889 | # Identify artifacts based on the first loop network 890 | artifacts, _ = get_artifacts( 891 | new_streets, 892 | threshold=threshold, 893 | threshold_fallback=artifact_threshold_fallback, 894 | area_threshold_blocks=area_threshold_blocks, 895 | isoareal_threshold_blocks=isoareal_threshold_blocks, 896 | area_threshold_circles=area_threshold_circles, 897 | isoareal_threshold_circles_enclosed=isoareal_threshold_circles_enclosed, 898 | isoperimetric_threshold_circles_touching=isoperimetric_threshold_circles_touching, 899 | exclusion_mask=exclusion_mask, 900 | predicate=predicate, 901 | ) 902 | 903 | new_streets = neatify_loop( 904 | new_streets, 905 | artifacts, 906 | max_segment_length=max_segment_length, 907 | min_dangle_length=min_dangle_length, 908 | clip_limit=clip_limit, 909 | simplification_factor=simplification_factor, 910 | consolidation_tolerance=consolidation_tolerance, 911 | eps=eps, 912 | ) 913 | 914 | # This is potentially fixing some minor erroneous edges coming from Voronoi 915 | new_streets = induce_nodes(new_streets, eps=eps) 916 | new_streets = new_streets[~new_streets.geometry.normalize().duplicated()].copy() 917 | 918 | return new_streets 919 | 920 | 921 | def neatify_loop( 922 | streets: gpd.GeoDataFrame, 923 | artifacts: gpd.GeoDataFrame, 924 | *, 925 | max_segment_length: float | int = 1, 926 | min_dangle_length: float | int = 20, 927 | clip_limit: float | int = 2, 928 | simplification_factor: float | int = 2, 929 | consolidation_tolerance: float | int = 10, 930 | eps: float = 1e-4, 931 | ) -> gpd.GeoDataFrame: 932 | """Perform an iteration of the simplification procedure which includes: 933 | 1. Removal of false nodes 934 | 2. Artifact classification 935 | 3. Simplifying artifacts: 936 | - Single artifacts 937 | - Pairs of artifacts 938 | - Clusters of artifacts 939 | 940 | Parameters 941 | ---------- 942 | streets : geopandas.GeoDataFrame 943 | Raw street network data. 944 | artifacts : geopandas.GeoDataFrame 945 | Face artifact polygons. 946 | max_segment_length : float | int = 1 947 | Additional vertices will be added so that all line segments 948 | are no longer than this value. Must be greater than 0. 949 | Used in multiple internal geometric operations. 950 | min_dangle_length : float | int = 20 951 | The threshold for determining if linestrings are dangling slivers to be 952 | removed or not. 953 | clip_limit : float | int = 2 954 | Following generation of the Voronoi linework, we clip to fit inside the 955 | polygon. To ensure we get a space to make proper topological connections 956 | from the linework to the actual points on the edge of the polygon, we clip 957 | using a polygon with a negative buffer of ``clip_limit`` or the radius of 958 | maximum inscribed circle, whichever is smaller. 959 | simplification_factor : float | int = 2 960 | The factor by which singles, pairs, and clusters are simplified. The 961 | ``max_segment_length`` is multiplied by this factor to get the 962 | simplification epsilon. 963 | consolidation_tolerance : float | int = 10 964 | Tolerance passed to node consolidation when generating Voronoi skeletons. 965 | eps : float = 1e-4 966 | Tolerance epsilon used in multiple internal geometric operations. 967 | 968 | Returns 969 | ------- 970 | geopandas.GeoDataFrame 971 | The street network line data following 1 iteration of simplification. 972 | """ 973 | 974 | # Remove edges fully within the artifact (dangles). 975 | _, r_idx = streets.sindex.query(artifacts.geometry, predicate="contains") 976 | # Dropping may lead to new false nodes – drop those 977 | streets = remove_interstitial_nodes(streets.drop(streets.index[r_idx])) 978 | 979 | # Filter singleton artifacts 980 | rook = graph.Graph.build_contiguity(artifacts, rook=True) 981 | 982 | # Keep only those artifacts which occur as isolates, 983 | # e.g. artifacts that are not part of a larger intersection 984 | singles = artifacts.loc[artifacts.index.intersection(rook.isolates)].copy() 985 | 986 | # Filter doubles 987 | artifacts["comp"] = rook.component_labels 988 | counts = artifacts["comp"].value_counts() 989 | doubles = artifacts.loc[artifacts["comp"].isin(counts[counts == 2].index)].copy() 990 | 991 | # Filter clusters 992 | clusters = artifacts.loc[artifacts["comp"].isin(counts[counts > 2].index)].copy() 993 | 994 | if not singles.empty: 995 | # NOTE: this drops attributes 996 | streets = neatify_singletons( 997 | singles, 998 | streets, 999 | max_segment_length=max_segment_length, 1000 | simplification_factor=simplification_factor, 1001 | consolidation_tolerance=consolidation_tolerance, 1002 | ) 1003 | if not doubles.empty: 1004 | streets = neatify_pairs( 1005 | doubles, 1006 | streets, 1007 | max_segment_length=max_segment_length, 1008 | min_dangle_length=min_dangle_length, 1009 | clip_limit=clip_limit, 1010 | simplification_factor=simplification_factor, 1011 | consolidation_tolerance=consolidation_tolerance, 1012 | ) 1013 | if not clusters.empty: 1014 | streets = neatify_clusters( 1015 | clusters, 1016 | streets, 1017 | max_segment_length=max_segment_length, 1018 | simplification_factor=simplification_factor, 1019 | eps=eps, 1020 | min_dangle_length=min_dangle_length, 1021 | consolidation_tolerance=consolidation_tolerance, 1022 | ) 1023 | 1024 | if "coins_group" in streets.columns: 1025 | streets = streets.drop( 1026 | columns=[c for c in streets.columns if c.startswith("coins_")] 1027 | ) 1028 | return streets 1029 | -------------------------------------------------------------------------------- /neatnet/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import platform 3 | import warnings 4 | 5 | import geopandas.testing 6 | import matplotlib.pyplot 7 | import numpy 8 | import pandas 9 | import pytest 10 | import shapely 11 | 12 | import neatnet 13 | 14 | # set the global exception raiser for testing & debugging 15 | # See gh#121 16 | neatnet.simplify.DEBUGGING = False 17 | 18 | line_collection = ( # type: ignore[valid-type, misc] 19 | list[shapely.LineString] 20 | | tuple[shapely.LineString] 21 | | numpy.ndarray 22 | | pandas.Series 23 | | geopandas.GeoSeries 24 | ) 25 | 26 | geometry_collection = ( 27 | list[shapely.GeometryCollection] 28 | | tuple[shapely.GeometryCollection] 29 | | numpy.ndarray 30 | | pandas.Series 31 | | geopandas.GeoSeries 32 | ) 33 | 34 | 35 | #################################################### 36 | # see: 37 | # - gh#106 38 | # - gh#102 39 | # - gh#77 40 | # - gh#75 41 | # - gh#74 42 | KNOWN_BAD_GEOMS = { 43 | "aleppo_1133": [], 44 | "auckland_869": [1412], 45 | "bucaramanga_4617": [], 46 | "douala_809": [], 47 | "liege_1656": [921], 48 | "slc_4881": [1144, 1146], 49 | "wuhan_8989": [], 50 | "apalachicola_standard": [324], 51 | "apalachicola_exclusion_mask": [], 52 | } 53 | #################################################### 54 | 55 | 56 | def polygonize( 57 | collection: line_collection, # type: ignore[valid-type] 58 | as_geom: bool = True, # type: ignore[valid-type] 59 | ) -> shapely.Polygon | geopandas.GeoSeries: 60 | """Testing helper -- Create polygon from collection of lines.""" 61 | if isinstance(collection, pandas.Series | geopandas.GeoSeries): 62 | _poly = geopandas.GeoSeries(collection).polygonize() 63 | if as_geom: 64 | return _poly.squeeze() 65 | else: 66 | return _poly 67 | else: 68 | return shapely.polygonize(collection).buffer(0) 69 | 70 | 71 | def is_geopandas(collection: geometry_collection) -> bool: # type: ignore[valid-type] 72 | return isinstance(collection, geopandas.GeoSeries | geopandas.GeoDataFrame) 73 | 74 | 75 | def geom_test( 76 | collection1: geometry_collection, # type: ignore[valid-type] 77 | collection2: geometry_collection, # type: ignore[valid-type] 78 | tolerance: float = 1e-1, 79 | aoi: None | str = None, 80 | ) -> bool: 81 | """Testing helper -- geometry verification.""" 82 | 83 | if not is_geopandas(collection1): 84 | collection1 = geopandas.GeoSeries(collection1) 85 | 86 | if not is_geopandas(collection2): 87 | collection2 = geopandas.GeoSeries(collection2) 88 | 89 | geoms1 = collection1.geometry.normalize() # type: ignore[attr-defined] 90 | geoms2 = collection2.geometry.normalize() # type: ignore[attr-defined] 91 | 92 | if aoi and aoi.startswith("apalachicola"): 93 | # Varied index order across OSs. 94 | # See [https://github.com/uscuni/neatnet/pull/104#issuecomment-2495572388] 95 | geoms1 = geoms1.sort_values().reset_index(drop=True) 96 | geoms2 = geoms2.sort_values().reset_index(drop=True) 97 | 98 | try: 99 | assert shapely.equals_exact(geoms1, geoms2, tolerance=tolerance).all() 100 | except AssertionError: 101 | unexpected_bad = {} 102 | for ix in geoms1.index: 103 | g1 = geoms1.loc[ix] 104 | g2 = geoms2.loc[ix] 105 | if ( 106 | not shapely.equals_exact(g1, g2, tolerance=tolerance) 107 | and ix not in KNOWN_BAD_GEOMS[aoi] # type: ignore[index] 108 | ): 109 | unexpected_bad[ix] = { 110 | "n_coords": { 111 | "g1": shapely.get_coordinates(g1).shape[0], 112 | "g2": shapely.get_coordinates(g2).shape[0], 113 | }, 114 | "length": {"g1": g1.length, "g2": g2.length}, 115 | } 116 | if unexpected_bad: 117 | raise AssertionError( 118 | f"Problem in '{aoi}' – check locs: {unexpected_bad}" 119 | ) from None 120 | return True 121 | 122 | 123 | def difference_plot( 124 | aoi: str, 125 | writedir: pathlib.Path, 126 | known: geopandas.GeoDataFrame, 127 | observed: geopandas.GeoDataFrame, 128 | diff_buff: int = 50, 129 | ): 130 | """Plot difference locations observed simplified in relation to known simplified.""" 131 | 132 | crs = known.crs 133 | 134 | # unioned multilinestring of each - known & observed 135 | known = geopandas.GeoDataFrame(geometry=[known.union_all()], crs=crs) 136 | observed = geopandas.GeoDataFrame(geometry=[observed.union_all()], crs=crs) 137 | 138 | # unioned difference of k-o + o-k 139 | known_observed_diff = known.difference(observed) 140 | observed_known_diff = observed.difference(known) 141 | differences = geopandas.GeoDataFrame( 142 | geometry=[ 143 | pandas.concat([known_observed_diff, observed_known_diff]) 144 | .explode() 145 | .union_all() 146 | ], 147 | crs=crs, 148 | ) 149 | 150 | # plot difference locations in relation to known 151 | base = known.plot(figsize=(15, 15), zorder=2, alpha=0.4, ec="k", lw=0.5) 152 | with warnings.catch_warnings(): 153 | # See GL#188 154 | warnings.filterwarnings( 155 | "ignore", 156 | message="The GeoSeries you are attempting to plot", 157 | category=UserWarning, 158 | ) 159 | differences.buffer(diff_buff).plot(ax=base, zorder=1, fc="r", alpha=0.6) 160 | base.set_title(f"known vs. observed differences - {aoi}") 161 | matplotlib.pyplot.savefig(writedir / f"{aoi}.png", dpi=300, bbox_inches="tight") 162 | 163 | 164 | def pytest_addoption(parser): 165 | """Add custom command line arguments.""" 166 | 167 | # flag for determining CI environment 168 | parser.addoption( 169 | "--env_type", 170 | action="store", 171 | default="latest", 172 | help="Testing environment type label", 173 | type=str, 174 | ) 175 | 176 | 177 | def pytest_configure(config): # noqa: ARG001 178 | """PyTest session attributes, methods, etc.""" 179 | 180 | valid_env_types = ["oldest", "latest", "dev"] 181 | pytest.env_type = config.getoption("env_type").split("_")[-1] 182 | assert pytest.env_type in valid_env_types 183 | 184 | pytest.ubuntu = "ubuntu" in platform.version().lower() 185 | 186 | pytest.polygonize = polygonize 187 | pytest.geom_test = geom_test 188 | pytest.difference_plot = difference_plot 189 | -------------------------------------------------------------------------------- /neatnet/tests/data/apalachicola_original.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/neatnet/tests/data/apalachicola_original.parquet -------------------------------------------------------------------------------- /neatnet/tests/data/apalachicola_simplified_exclusion_mask.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/neatnet/tests/data/apalachicola_simplified_exclusion_mask.parquet -------------------------------------------------------------------------------- /neatnet/tests/data/apalachicola_simplified_standard.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/neatnet/tests/data/apalachicola_simplified_standard.parquet -------------------------------------------------------------------------------- /neatnet/tests/test_artifacts.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | import geopandas 4 | import pytest 5 | import shapely 6 | 7 | import neatnet 8 | 9 | 10 | def test_get_artifacts_error(): 11 | path = pathlib.Path("neatnet", "tests", "data", "apalachicola_original.parquet") 12 | with pytest.raises( # noqa: SIM117 13 | ValueError, 14 | match=( 15 | "No threshold for artifact detection found. Pass explicit " 16 | "`threshold` or `threshold_fallback` to provide the value directly." 17 | ), 18 | ): 19 | with pytest.warns( 20 | UserWarning, 21 | match=( 22 | "Input streets could not not be polygonized. " 23 | "Identification of face artifacts not possible." 24 | ), 25 | ): 26 | neatnet.artifacts.get_artifacts(geopandas.read_parquet(path).iloc[:3]) 27 | 28 | 29 | def test_FaceArtifacts(): # noqa: N802 30 | osmnx = pytest.importorskip("osmnx") 31 | type_filter = ( 32 | '["highway"~"living_street|motorway|motorway_link|pedestrian|primary' 33 | "|primary_link|residential|secondary|secondary_link|service|tertiary" 34 | '|tertiary_link|trunk|trunk_link|unclassified|service"]' 35 | ) 36 | streets_graph = osmnx.graph_from_point( 37 | (35.7798, -78.6421), 38 | dist=1000, 39 | network_type="all_private", 40 | custom_filter=type_filter, 41 | retain_all=True, 42 | simplify=False, 43 | ) 44 | streets_graph = osmnx.projection.project_graph(streets_graph) 45 | gdf = osmnx.graph_to_gdfs( 46 | osmnx.convert.to_undirected(streets_graph), 47 | nodes=False, 48 | edges=True, 49 | node_geometry=False, 50 | fill_edge_geometry=True, 51 | ) 52 | fa = neatnet.FaceArtifacts(gdf) 53 | assert 6 < fa.threshold < 9 54 | assert isinstance(fa.face_artifacts, geopandas.GeoDataFrame) 55 | assert fa.face_artifacts.shape[0] > 200 56 | assert fa.face_artifacts.shape[1] == 2 57 | 58 | with pytest.warns(UserWarning, match="No threshold found"): 59 | neatnet.FaceArtifacts(gdf.cx[712104:713000, 3961073:3961500]) 60 | 61 | fa_ipq = neatnet.FaceArtifacts(gdf, index="isoperimetric_quotient") 62 | assert 6 < fa_ipq.threshold < 9 63 | assert fa_ipq.threshold != fa.threshold 64 | 65 | fa_dia = neatnet.FaceArtifacts(gdf, index="diameter_ratio") 66 | assert 6 < fa_dia.threshold < 9 67 | assert fa_dia.threshold != fa.threshold 68 | 69 | fa = neatnet.FaceArtifacts(gdf, index="isoperimetric_quotient") 70 | assert 6 < fa.threshold < 9 71 | 72 | with pytest.raises(ValueError, match="'banana' is not supported"): 73 | neatnet.FaceArtifacts(gdf, index="banana") 74 | 75 | p1, p2, p3, p4 = ( 76 | shapely.Point(1, 0), 77 | shapely.Point(2, 0), 78 | shapely.Point(3, 0), 79 | shapely.Point(2, 1), 80 | ) 81 | inverted_t = [ 82 | shapely.LineString((p1, p2)), 83 | shapely.LineString((p2, p3)), 84 | shapely.LineString((p2, p4)), 85 | ] 86 | 87 | with pytest.warns( 88 | UserWarning, 89 | match=( 90 | "Input streets could not not be polygonized. " 91 | "Identification of face artifacts not possible." 92 | ), 93 | ): 94 | neatnet.FaceArtifacts(geopandas.GeoDataFrame(geometry=inverted_t)) 95 | -------------------------------------------------------------------------------- /neatnet/tests/test_continuity.py: -------------------------------------------------------------------------------- 1 | import io 2 | 3 | import geopandas.testing 4 | import momepy 5 | import pandas 6 | import pytest 7 | import shapely 8 | 9 | import neatnet 10 | 11 | 12 | @pytest.fixture 13 | def streets() -> geopandas.GeoDataFrame: 14 | """Toy set of 'streets' for testing only.""" 15 | inita = 2 16 | final = 8 17 | grid = list(range(inita, final)) 18 | vert_points = list(zip(grid[:-1], grid[1:], strict=True)) 19 | hori_points = [(j, i) for i, j in vert_points] 20 | vert_lines = [ 21 | shapely.LineString(i) 22 | for i in list(zip(hori_points[:-1], vert_points[1:], strict=True)) 23 | ] 24 | hori_lines = [ 25 | shapely.LineString(i) 26 | for i in list(zip(vert_points[:-1], hori_points[1:], strict=True)) 27 | ] 28 | return geopandas.GeoDataFrame( 29 | geometry=( 30 | vert_lines 31 | + hori_lines 32 | + [ 33 | shapely.LineString(((4, 5), (3, 6))), 34 | shapely.LineString(((3, 6), (4, 4))), 35 | shapely.LineString(((6, 3), (5, 4))), 36 | shapely.LineString(((3, 6), (3, 4))), 37 | shapely.LineString(((5, 5), (6, 6))), 38 | shapely.LineString(((6, 7), (7, 7))), 39 | shapely.LineString(((7, 6), (7, 7))), 40 | ] 41 | ) 42 | ) 43 | 44 | 45 | def test_continuity(streets): 46 | observed_continuity, observed_coins = neatnet.continuity.continuity(streets) 47 | 48 | assert isinstance(observed_continuity, geopandas.GeoDataFrame) 49 | known_continuity = ( 50 | geopandas.GeoDataFrame( 51 | pandas.read_csv( 52 | io.StringIO( 53 | "geometry coins_group coins_end coins_len coins_count\n" 54 | "LINESTRING (3 2, 3 4) 0 True 4.0 2\n" 55 | "LINESTRING (4 3, 4 5) 1 True 4.0 2\n" 56 | "LINESTRING (5 4, 5 6) 2 True 11.414213562373096 7\n" 57 | "LINESTRING (6 5, 6 7) 2 False 11.414213562373096 7\n" 58 | "LINESTRING (2 3, 4 3) 1 True 4.0 2\n" 59 | "LINESTRING (3 4, 5 4) 3 True 3.414213562373095 2\n" 60 | "LINESTRING (4 5, 6 5) 2 False 11.414213562373096 7\n" 61 | "LINESTRING (5 6, 7 6) 2 False 11.414213562373096 7\n" 62 | "LINESTRING (4 5, 3 6) 2 True 11.414213562373096 7\n" 63 | "LINESTRING (3 6, 4 4) 4 True 2.23606797749979 1\n" 64 | "LINESTRING (6 3, 5 4) 3 True 3.414213562373095 2\n" 65 | "LINESTRING (3 6, 3 4) 0 True 4.0 2\n" 66 | "LINESTRING (5 5, 6 6) 5 True 1.4142135623730951 1\n" 67 | "LINESTRING (6 7, 7 7) 2 False 11.414213562373096 7\n" 68 | "LINESTRING (7 6, 7 7) 2 False 11.414213562373096 7\n" 69 | ), 70 | sep="\t", 71 | ) 72 | ) 73 | .pipe(lambda df: df.assign(**{"geometry": shapely.from_wkt(df["geometry"])})) 74 | .set_geometry("geometry") 75 | ) 76 | geopandas.testing.assert_geodataframe_equal(observed_continuity, known_continuity) 77 | 78 | assert isinstance(observed_coins, momepy.COINS) 79 | assert observed_coins.already_merged 80 | assert observed_coins.merging_list == [ 81 | [0, 11], 82 | [1, 4], 83 | [2, 3, 6, 7, 8, 13, 14], 84 | [5, 10], 85 | [9], 86 | [12], 87 | ] 88 | assert len(observed_coins.angle_pairs) == 36 89 | 90 | 91 | def test_get_stroke_info(streets): 92 | known_strokes = [0, 0, 2, 1, 1, 1, 2] 93 | known_c_ = [0, 0, 0, 0, 0, 1, 0] 94 | known_e_ = [0, 0, 1, 0, 0, 0, 1] 95 | known_s_ = [0, 0, 1, 1, 1, 0, 1] 96 | 97 | observed = neatnet.continuity.get_stroke_info( 98 | neatnet.artifacts.get_artifacts(streets, threshold=1)[0], 99 | neatnet.continuity.continuity(streets.copy())[0], 100 | ) 101 | 102 | observed_strokes = observed[0] 103 | observed_c_ = observed[1] 104 | observed_e_ = observed[2] 105 | observed_s_ = observed[3] 106 | 107 | assert observed_strokes == known_strokes 108 | assert observed_c_ == known_c_ 109 | assert observed_e_ == known_e_ 110 | assert observed_s_ == known_s_ 111 | -------------------------------------------------------------------------------- /neatnet/tests/test_gaps.py: -------------------------------------------------------------------------------- 1 | import geopandas as gpd 2 | import pytest 3 | from shapely.geometry import LineString 4 | 5 | import neatnet 6 | 7 | 8 | def test_close_gaps(): 9 | l1 = LineString([(1, 0), (2, 1)]) 10 | l2 = LineString([(2.1, 1), (3, 2)]) 11 | l3 = LineString([(3.1, 2), (4, 0)]) 12 | l4 = LineString([(4.1, 0), (5, 0)]) 13 | l5 = LineString([(5.1, 0), (6, 0)]) 14 | df = gpd.GeoDataFrame(geometry=[l1, l2, l3, l4, l5]) 15 | 16 | closed = neatnet.close_gaps(df, 0.25) 17 | assert len(closed) == len(df) 18 | 19 | merged = neatnet.remove_interstitial_nodes(closed) 20 | assert len(merged) == 1 21 | assert merged.length[0] == pytest.approx(7.0502, rel=1e-3) 22 | 23 | 24 | def test_extend_lines(): 25 | l1 = LineString([(1, 0), (1.9, 0)]) 26 | l2 = LineString([(2.1, -1), (2.1, 1)]) 27 | l3 = LineString([(2, 1.1), (3, 1.1)]) 28 | gdf = gpd.GeoDataFrame([1, 2, 3], geometry=[l1, l2, l3]) 29 | 30 | ext1 = neatnet.extend_lines(gdf, 2) 31 | assert ext1.length.sum() > gdf.length.sum() 32 | assert ext1.length.sum() == pytest.approx(4.2, rel=1e-3) 33 | 34 | target = gpd.GeoSeries([l2.centroid.buffer(3)]) 35 | ext2 = neatnet.extend_lines(gdf, 3, target=target) 36 | 37 | assert ext2.length.sum() > gdf.length.sum() 38 | assert ext2.length.sum() == pytest.approx(17.3776, rel=1e-3) 39 | 40 | barrier = LineString([(2, -1), (2, 1)]) 41 | ext3 = neatnet.extend_lines(gdf, 2, barrier=gpd.GeoSeries([barrier])) 42 | 43 | assert ext3.length.sum() > gdf.length.sum() 44 | assert ext3.length.sum() == pytest.approx(4, rel=1e-3) 45 | 46 | ext4 = neatnet.extend_lines(gdf, 2, extension=1) 47 | assert ext4.length.sum() > gdf.length.sum() 48 | assert ext4.length.sum() == pytest.approx(10.2, rel=1e-3) 49 | 50 | gdf = gpd.GeoDataFrame([1, 2, 3, 4], geometry=[l1, l2, l3, barrier]) 51 | ext5 = neatnet.extend_lines(gdf, 2) 52 | assert ext5.length.sum() > gdf.length.sum() 53 | assert ext5.length.sum() == pytest.approx(6.2, rel=1e-3) 54 | -------------------------------------------------------------------------------- /neatnet/tests/test_geometry.py: -------------------------------------------------------------------------------- 1 | import geopandas.testing 2 | import numpy 3 | import pandas 4 | import pytest 5 | import shapely 6 | 7 | import neatnet 8 | 9 | 10 | class TestIsWithin: 11 | def setup_method(self): 12 | self.polygon = shapely.Polygon(((0, 0), (10, 0), (10, 10), (0, 10), (0, 0))) 13 | 14 | def test_within_fully(self): 15 | line = shapely.LineString(((2, 2), (8, 8))) 16 | 17 | known = True 18 | observed = neatnet.geometry._is_within(line, self.polygon) 19 | 20 | assert known == observed 21 | 22 | def test_within_tol(self): 23 | line = shapely.LineString(((2, 2), (2, 10.0001))) 24 | 25 | known = True 26 | observed = neatnet.geometry._is_within(line, self.polygon) 27 | 28 | assert known == observed 29 | 30 | def test_not_within_tol(self): 31 | line = shapely.LineString(((2, 2), (2, 10.001))) 32 | 33 | known = False 34 | observed = neatnet.geometry._is_within(line, self.polygon) 35 | 36 | assert known == observed 37 | 38 | def test_within_tol_strict(self): 39 | line = shapely.LineString(((2, 2), (2, 10.0000001))) 40 | 41 | known = True 42 | observed = neatnet.geometry._is_within(line, self.polygon, rtol=1e-7) 43 | 44 | assert known == observed 45 | 46 | def test_not_within_tol_strict(self): 47 | line = shapely.LineString(((2, 2), (2, 10.000001))) 48 | 49 | known = False 50 | observed = neatnet.geometry._is_within(line, self.polygon, rtol=1e-7) 51 | 52 | assert known == observed 53 | 54 | def test_within_tol_relaxed(self): 55 | line = shapely.LineString(((2, 2), (2, 11))) 56 | 57 | known = True 58 | observed = neatnet.geometry._is_within(line, self.polygon, rtol=1) 59 | 60 | assert known == observed 61 | 62 | def test_not_within_tol_relaxed(self): 63 | line = shapely.LineString(((2, 2), (2, 12))) 64 | 65 | known = False 66 | observed = neatnet.geometry._is_within(line, self.polygon, rtol=1) 67 | 68 | assert known == observed 69 | 70 | def test_not_within(self): 71 | line = shapely.LineString(((11, 11), (12, 12))) 72 | 73 | known = False 74 | observed = neatnet.geometry._is_within(line, self.polygon) 75 | 76 | assert known == observed 77 | 78 | 79 | class TestAngleBetween2Lines: 80 | def setup_method(self): 81 | self.line1 = shapely.LineString(((0, 0), (1, 0))) 82 | self.line2 = shapely.LineString(((1, 0), (1, 1))) 83 | self.line3 = shapely.LineString(((0, 0), (0, 1))) 84 | self.line4 = shapely.LineString(((0, 1), (1, 1))) 85 | 86 | def test_q1(self): 87 | known = 90.0 88 | observed = neatnet.geometry.angle_between_two_lines(self.line1, self.line3) 89 | assert observed == known 90 | 91 | def test_q2(self): 92 | known = 90.0 93 | observed = neatnet.geometry.angle_between_two_lines(self.line1, self.line2) 94 | assert observed == known 95 | 96 | def test_q3(self): 97 | known = 90.0 98 | observed = neatnet.geometry.angle_between_two_lines(self.line2, self.line4) 99 | assert observed == known 100 | 101 | def test_q4(self): 102 | known = 90.0 103 | observed = neatnet.geometry.angle_between_two_lines(self.line3, self.line4) 104 | assert observed == known 105 | 106 | def test_indistinct(self): 107 | known = 0.0 108 | with pytest.warns( 109 | UserWarning, 110 | match="Input lines are identical - must be distinct. Returning 0.0.", 111 | ): 112 | observed = neatnet.geometry.angle_between_two_lines(self.line1, self.line1) 113 | assert observed == known 114 | 115 | def test_not_adjacent(self): 116 | known = 0.0 117 | with pytest.warns( 118 | UserWarning, match="Input lines do not share a vertex. Returning 0.0." 119 | ): 120 | observed = neatnet.geometry.angle_between_two_lines(self.line1, self.line4) 121 | assert observed == known 122 | 123 | 124 | voronoi_skeleton_params = pytest.mark.parametrize( 125 | "lines_type,as_poly,buffer", 126 | [ 127 | (list, False, None), 128 | (list, True, 0.001), 129 | (numpy.array, False, 0.01), 130 | (numpy.array, True, 0.1), 131 | (pandas.Series, False, 1), 132 | (pandas.Series, True, 2.0), 133 | (geopandas.GeoSeries, False, 5), 134 | (geopandas.GeoSeries, True, 10.314), 135 | ], 136 | ) 137 | 138 | 139 | class TestVoronoiSkeleton: 140 | def setup_method(self): 141 | self.square = [ 142 | shapely.LineString(((0, 0), (1000, 0))), 143 | shapely.LineString(((1000, 0), (1000, 1000))), 144 | shapely.LineString(((0, 0), (0, 1000))), 145 | shapely.LineString(((0, 1000), (1000, 1000))), 146 | ] 147 | self.known_square_skeleton_edges = numpy.array( 148 | [ 149 | shapely.LineString(((1000, 0), (998, 2), (500, 500))), 150 | shapely.LineString(((0, 0), (2, 2), (500, 500))), 151 | shapely.LineString(((1000, 1000), (998, 998), (500, 500))), 152 | shapely.LineString(((0, 1000), (2, 998), (500, 500))), 153 | ] 154 | ) 155 | self.known_square_skeleton_splits = [shapely.Point(0, 0)] 156 | self.known_square_skeleton_splits_snap_to = [ 157 | shapely.Point(1000, 0), 158 | shapely.Point(0, 0), 159 | shapely.Point(0, 1000), 160 | shapely.Point(1000, 1000), 161 | ] 162 | 163 | @voronoi_skeleton_params 164 | def test_square(self, lines_type, as_poly, buffer): 165 | known_edges = self.known_square_skeleton_edges 166 | known_splits = self.known_square_skeleton_splits 167 | 168 | lines = lines_type(self.square) 169 | poly = pytest.polygonize(lines) if as_poly else None 170 | observed_edges, observed_splits = neatnet.geometry.voronoi_skeleton( 171 | lines, 172 | poly=poly, 173 | buffer=buffer, 174 | ) 175 | 176 | pytest.geom_test(observed_edges, known_edges) 177 | pytest.geom_test(observed_splits, known_splits) 178 | 179 | @voronoi_skeleton_params 180 | def test_square_snap_to(self, lines_type, as_poly, buffer): 181 | known_edges = self.known_square_skeleton_edges 182 | known_splits = self.known_square_skeleton_splits_snap_to 183 | 184 | lines = lines_type(self.square) 185 | poly = pytest.polygonize(lines) if as_poly else None 186 | observed_edges, observed_splits = neatnet.geometry.voronoi_skeleton( 187 | lines, 188 | poly=poly, 189 | buffer=buffer, 190 | snap_to=( 191 | pytest.polygonize(geopandas.GeoSeries(lines), as_geom=False) 192 | .extract_unique_points() 193 | .explode() 194 | ), 195 | ) 196 | 197 | pytest.geom_test(observed_edges, known_edges) 198 | pytest.geom_test(observed_splits, known_splits) 199 | 200 | 201 | line_100_900 = shapely.LineString(((1000, 1000), (1000, 9000))) 202 | line_100_120 = shapely.LineString(((1000, 1020), (1020, 1020))) 203 | lines_100_900_100_120 = shapely.MultiLineString((line_100_900, line_100_120)) 204 | line_110_900 = shapely.LineString(((1000, 9000), (1100, 9000))) 205 | 206 | 207 | def test_remove_sliver(): 208 | known = line_100_900 209 | observed = neatnet.geometry._remove_sliver(lines_100_900_100_120) 210 | assert observed == known 211 | 212 | 213 | def test_as_parts(): 214 | known = numpy.array([line_100_900, line_100_120, line_110_900]) 215 | observed = neatnet.geometry._as_parts( 216 | numpy.array([lines_100_900_100_120, line_110_900]) 217 | ) 218 | numpy.testing.assert_array_equal(observed, known) 219 | 220 | 221 | @pytest.mark.parametrize("tolerance", [0.1, 1, 10, 100, 1_000, 10_000, 100_000]) 222 | def test_consolidate(tolerance): 223 | known = numpy.array([line_100_900, line_100_120, line_110_900]) 224 | observed = neatnet.geometry._consolidate( 225 | numpy.array([line_100_900, line_100_120, line_110_900]), tolerance 226 | ) 227 | numpy.testing.assert_array_equal(observed, known) 228 | 229 | 230 | def test_prep_components(): 231 | line1 = shapely.LineString(((1, 1), (1, 2))) 232 | line2 = shapely.LineString(((1, 2), (2, 2))) 233 | line3 = shapely.LineString(((3, 0), (3, 3))) 234 | 235 | known_labels = pandas.Series( 236 | [0, 0, 1], 237 | index=pandas.Index([0, 1, 2], name="focal"), 238 | name="component labels", 239 | dtype=numpy.int32, 240 | ) 241 | known_counts = pandas.Series( 242 | [2, 1], 243 | index=pandas.Index([0, 1], name="component labels", dtype=numpy.int32), 244 | name="count", 245 | dtype=numpy.int64, 246 | ) 247 | known_comps = geopandas.GeoDataFrame( 248 | geometry=[ 249 | shapely.MultiLineString( 250 | ( 251 | shapely.LineString(((1, 1), (1, 2))), 252 | shapely.LineString(((1, 2), (2, 2))), 253 | ) 254 | ), 255 | shapely.LineString(((3, 0), (3, 3))), 256 | ], 257 | index=pandas.Index([0, 1], name="component labels", dtype=numpy.int32), 258 | ) 259 | 260 | observed_labels, observed_counts, observed_comps = ( 261 | neatnet.geometry._prep_components([line1, line2, line3]) 262 | ) 263 | 264 | pandas.testing.assert_series_equal(observed_labels, known_labels) 265 | pandas.testing.assert_series_equal(observed_counts, known_counts) 266 | geopandas.testing.assert_geodataframe_equal(observed_comps, known_comps) 267 | 268 | 269 | def test_split_add(): 270 | _x = 1100 271 | x1, y1 = _x, 0 272 | x2, y2 = _x, 1000 273 | sl = shapely.LineString(((x1, y1), (x2, y2))) 274 | known_splits = [shapely.Point((x2, y2))] 275 | known_adds = [sl] 276 | observed_splits, observed_adds = neatnet.geometry._split_add(sl, [], []) 277 | assert observed_splits == known_splits 278 | assert observed_adds == known_adds 279 | 280 | 281 | class TestSnapToTargets: 282 | def setup_method(self): 283 | # edgelines 284 | line1 = shapely.LineString(((100, 100), (1000, 100))) 285 | line2 = shapely.LineString(((1000, 100), (1000, 1000))) 286 | line3 = shapely.LineString(((100, 100), (100, 1000))) 287 | line4 = shapely.LineString(((100, 1000), (1000, 1000))) 288 | self.lines = [line1, line2, line3, line4] 289 | 290 | # poly 291 | self.poly = shapely.polygonize(self.lines).buffer(0) 292 | 293 | # snap_to 294 | self.snap_to_1 = ( 295 | geopandas.GeoSeries(self.lines) 296 | .polygonize() 297 | .extract_unique_points() 298 | .explode() 299 | ) 300 | 301 | def test_warn(self): 302 | with pytest.warns( 303 | UserWarning, 304 | match=( 305 | "Could not create a connection as it would " 306 | "lead outside of the artifact." 307 | ), 308 | ): 309 | neatnet.geometry.snap_to_targets( 310 | self.lines, 311 | self.poly, 312 | snap_to=self.snap_to_1, 313 | ) 314 | 315 | def test_secondary(self): 316 | known = ([None], [None]) 317 | 318 | line1_b = shapely.LineString(((500, 500), (1500, 500))) 319 | line2_b = shapely.LineString(((1500, 500), (1500, 1500))) 320 | line3_b = shapely.LineString(((500, 500), (500, 1500))) 321 | line4_b = shapely.LineString(((500, 1500), (1500, 1500))) 322 | lines_b = [line1_b, line2_b, line3_b, line4_b] 323 | snap_to_2 = ( 324 | geopandas.GeoSeries(lines_b).polygonize().extract_unique_points().explode() 325 | ) 326 | 327 | observed = neatnet.geometry.snap_to_targets( 328 | self.lines + lines_b, 329 | self.poly, 330 | snap_to=self.snap_to_1, 331 | secondary_snap_to=snap_to_2, 332 | ) 333 | 334 | assert observed == known 335 | -------------------------------------------------------------------------------- /neatnet/tests/test_simplify.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | import geopandas 4 | import momepy 5 | import numpy 6 | import pytest 7 | import shapely 8 | from pandas.testing import assert_frame_equal, assert_series_equal 9 | 10 | import neatnet 11 | 12 | test_data = pathlib.Path("neatnet", "tests", "data") 13 | full_fua_data = pathlib.Path("data") 14 | 15 | ci_artifacts = pathlib.Path("ci_artifacts") 16 | 17 | 18 | AC = "apalachicola" 19 | AC_STREETS = geopandas.read_parquet(test_data / f"{AC}_original.parquet") 20 | AC_EXCLUSION_MASK = geopandas.GeoSeries( 21 | [ 22 | shapely.Polygon( 23 | ( 24 | (-9461361.807208396, 3469029.2708674935), 25 | (-9461009.046874022, 3469029.2708674935), 26 | (-9461009.046874022, 3469240.1785251377), 27 | (-9461361.807208396, 3469240.1785251377), 28 | (-9461361.807208396, 3469029.2708674935), 29 | ) 30 | ), 31 | shapely.Polygon( 32 | ( 33 | (-9461429.266819818, 3469157.7482423405), 34 | (-9461361.807208396, 3469157.7482423405), 35 | (-9461361.807208396, 3469240.1785251377), 36 | (-9461429.266819818, 3469240.1785251377), 37 | (-9461429.266819818, 3469157.7482423405), 38 | ) 39 | ), 40 | ], 41 | crs=AC_STREETS.crs, 42 | ) 43 | 44 | 45 | @pytest.mark.parametrize( 46 | "scenario,tol,known_length", 47 | [ 48 | ("standard", 1.5, 64566.0), 49 | ("exclusion_mask", 1.05, 65765.0), 50 | ], 51 | ) 52 | def test_neatify_small(scenario, tol, known_length): 53 | original = AC_STREETS.copy() 54 | 55 | known = geopandas.read_parquet(test_data / f"{AC}_simplified_{scenario}.parquet") 56 | exclusion_mask = AC_EXCLUSION_MASK.copy() if scenario == "exclusion_mask" else None 57 | 58 | observed = neatnet.neatify(original, exclusion_mask=exclusion_mask) 59 | observed_length = observed.geometry.length.sum() 60 | 61 | # storing GH artifacts 62 | artifact_dir = ci_artifacts / AC 63 | artifact_dir.mkdir(parents=True, exist_ok=True) 64 | observed.to_parquet(artifact_dir / f"simplified_{scenario}.parquet") 65 | 66 | assert pytest.approx(observed_length, rel=0.0001) == known_length 67 | assert observed.index.dtype == numpy.dtype("int64") 68 | 69 | assert observed.shape == known.shape 70 | assert_series_equal(known["_status"], observed["_status"]) 71 | assert_frame_equal( 72 | known.drop(columns=["_status", "geometry"]), 73 | observed.drop(columns=["_status", "geometry"]), 74 | ) 75 | 76 | pytest.geom_test(known, observed, tolerance=tol, aoi=f"{AC}_{scenario}") 77 | 78 | 79 | @pytest.mark.parametrize( 80 | "aoi,tol,known_length", 81 | [ 82 | ("aleppo_1133", 0.2, 4_361_625), 83 | ("auckland_869", 0.3, 1_268_048), 84 | ("bucaramanga_4617", 0.2, 1_681_011), 85 | ("douala_809", 0.1, 2_961_364), 86 | ("liege_1656", 0.3, 2_350_782), 87 | ("slc_4881", 0.3, 1_762_456), 88 | ], 89 | ) 90 | def test_neatify_full_fua(aoi, tol, known_length): 91 | known = geopandas.read_parquet(full_fua_data / aoi / "simplified.parquet") 92 | observed = neatnet.neatify( 93 | geopandas.read_parquet(full_fua_data / aoi / "original.parquet") 94 | ) 95 | observed_length = observed.geometry.length.sum() 96 | assert "highway" in observed.columns 97 | 98 | # storing GH artifacts 99 | artifact_dir = ci_artifacts / aoi 100 | artifact_dir.mkdir(parents=True, exist_ok=True) 101 | observed.to_parquet(artifact_dir / "simplified.parquet") 102 | pytest.difference_plot(aoi, artifact_dir, known, observed) 103 | 104 | assert pytest.approx(observed_length, rel=0.0001) == known_length 105 | assert observed.index.dtype == numpy.dtype("int64") 106 | 107 | if pytest.ubuntu and pytest.env_type != "oldest": 108 | assert_series_equal(known["_status"], observed["_status"]) 109 | assert_frame_equal( 110 | known.drop(columns=["_status", "geometry"]), 111 | observed.drop(columns=["_status", "geometry"]), 112 | ) 113 | pytest.geom_test(known, observed, tolerance=tol, aoi=aoi) 114 | 115 | 116 | @pytest.mark.wuhan 117 | def test_neatify_wuhan(aoi="wuhan_8989", tol=0.3, known_length=4_702_861): 118 | known = geopandas.read_parquet(full_fua_data / aoi / "simplified.parquet") 119 | observed = neatnet.neatify( 120 | geopandas.read_parquet(full_fua_data / aoi / "original.parquet") 121 | ) 122 | observed_length = observed.geometry.length.sum() 123 | assert "highway" in observed.columns 124 | 125 | # storing GH artifacts 126 | artifact_dir = ci_artifacts / aoi 127 | artifact_dir.mkdir(parents=True, exist_ok=True) 128 | observed.to_parquet(artifact_dir / "simplified.parquet") 129 | pytest.difference_plot(aoi, artifact_dir, known, observed) 130 | 131 | assert pytest.approx(observed_length, rel=0.0001) == known_length 132 | assert observed.index.dtype == numpy.dtype("int64") 133 | 134 | if pytest.ubuntu and pytest.env_type != "oldest": 135 | assert_series_equal(known["_status"], observed["_status"]) 136 | assert_frame_equal( 137 | known.drop(columns=["_status", "geometry"]), 138 | observed.drop(columns=["_status", "geometry"]), 139 | ) 140 | pytest.geom_test(known, observed, tolerance=tol, aoi=aoi) 141 | 142 | 143 | def test_neatify_fallback(): 144 | streets = geopandas.read_file(momepy.datasets.get_path("bubenec"), layer="streets") 145 | with pytest.warns(UserWarning, match="No threshold for artifact"): 146 | simple = neatnet.neatify(streets) 147 | # only topology is fixed 148 | assert simple.shape == (31, 2) 149 | 150 | 151 | class TestCheckCRS: 152 | def test_projected_street_matching_mask(self): 153 | assert neatnet.simplify._check_input_crs(AC_STREETS, AC_EXCLUSION_MASK) is None 154 | 155 | def test_projected_street_no_mask(self): 156 | assert neatnet.simplify._check_input_crs(AC_STREETS, None) is None 157 | 158 | def test_projected_street_mismatch_mask(self): 159 | with pytest.raises( 160 | ValueError, 161 | match=( 162 | "The input `streets` and `exclusion_mask` data are in " 163 | "different coordinate reference systems. Reproject and rerun." 164 | ), 165 | ): 166 | neatnet.simplify._check_input_crs( 167 | AC_STREETS, AC_EXCLUSION_MASK.to_crs(4326) 168 | ) 169 | 170 | def test_no_crs_street_no_mask(self): 171 | with pytest.warns( 172 | UserWarning, 173 | match=( 174 | "The input `streets` data does not have an assigned " 175 | "coordinate reference system. Assuming a projected CRS in meters." 176 | ), 177 | ): 178 | neatnet.simplify._check_input_crs( 179 | AC_STREETS.set_crs(None, allow_override=True), None 180 | ) 181 | 182 | def test_projected_street_feet(self): 183 | with pytest.warns( 184 | UserWarning, 185 | match=( 186 | "The input `streets` data coordinate reference system is projected " 187 | "but not in meters. All `neatnet` defaults assume meters. " 188 | "Either reproject and rerun or proceed with caution." 189 | ), 190 | ): 191 | neatnet.simplify._check_input_crs(AC_STREETS.to_crs(6441), None) 192 | 193 | def test_geographic_street(self): 194 | with pytest.raises( 195 | ValueError, 196 | match=( 197 | "The input `streets` data are not in a projected " 198 | "coordinate reference system. Reproject and rerun." 199 | ), 200 | ): 201 | neatnet.simplify._check_input_crs(AC_STREETS.to_crs(4326), None) 202 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0", "setuptools_scm[toml]>=6.2"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.setuptools_scm] 6 | 7 | [project] 8 | name = "neatnet" 9 | dynamic = ["version"] 10 | authors = [ 11 | { name = "Martin Fleischmann", email = "martin@martinfleischmann.net" }, 12 | { name = "Anastassia Vybornova", email = "anvy@itu.dk" }, 13 | { name = "James D. Gaboardi", email = "jgaboardi@gmail.com" }, 14 | ] 15 | 16 | license = { text = "BSD 3-Clause" } 17 | description = "Street geometry processing toolkit" 18 | keywords = [""] 19 | readme = "README.md" 20 | classifiers = [ 21 | "Programming Language :: Python :: 3", 22 | "License :: OSI Approved :: BSD License", 23 | "Operating System :: OS Independent", 24 | "Intended Audience :: Science/Research", 25 | "Topic :: Scientific/Engineering :: GIS", 26 | ] 27 | requires-python = ">=3.11" 28 | dependencies = [ 29 | "esda>=2.6.0", 30 | "geopandas>=1.0.1", 31 | "libpysal>=4.12.1", 32 | "momepy>=0.9.0", 33 | "networkx>=3.3", 34 | "numpy>=2", 35 | "pandas>=2.2.3", 36 | "scipy>=1.14.1", 37 | "shapely>=2.0.6", 38 | "scikit-learn>=1.2.0", 39 | ] 40 | 41 | [project.urls] 42 | Home = "https://github.com/uscuni/" 43 | Repository = "https://github.com/uscuni/neatnet" 44 | 45 | [project.optional-dependencies] 46 | tests = [ 47 | "codecov", 48 | "coverage", 49 | "matplotlib", 50 | "mypy>=1.15.0,<2", 51 | "pre-commit", 52 | "pyarrow>=17.0", 53 | "pytest", 54 | "pytest-cov", 55 | "pytest-xdist", 56 | "ruff", 57 | "yamllint", 58 | ] 59 | 60 | docs = [ 61 | "ipykernel", 62 | "ipywidgets", 63 | "jupyterlab", 64 | "myst_nb", 65 | "numpydoc", 66 | "sphinx", 67 | "sphinxcontrib-bibtex", 68 | "sphinx_autosummary_accessors", 69 | "sphinx_book_theme", 70 | "sphinx_copybutton", 71 | ] 72 | all = ["neatnet[tests,docs]"] 73 | 74 | 75 | [tool.setuptools.packages.find] 76 | include = ["neatnet", "neatnet.*"] 77 | 78 | [tool.ruff] 79 | line-length = 88 80 | extend-include = ["*.ipynb"] 81 | 82 | [tool.ruff.lint] 83 | select = ["E", "F", "W", "I", "UP", "N", "B", "A", "C4", "SIM", "ARG"] 84 | 85 | [tool.ruff.lint.per-file-ignores] 86 | "*__init__.py" = [ 87 | "F401", # imported but unused 88 | "F403", # star import; unable to detect undefined names 89 | ] 90 | 91 | [tool.coverage.run] 92 | source = ["./neatnet"] 93 | 94 | [tool.coverage.report] 95 | exclude_lines = [ 96 | "if self.debug:", 97 | "pragma: no cover", 98 | "raise NotImplementedError", 99 | "except ModuleNotFoundError:", 100 | "except ImportError", 101 | ] 102 | ignore_errors = true 103 | omit = ["neatnet/tests/*"] 104 | 105 | [tool.pytest.ini_options] 106 | filterwarnings = [ 107 | # this is an internal warning thrown within ``neatnet.geometry.snap_to_targets()`` 108 | 'ignore:Could not create a connection*:UserWarning', 109 | ] 110 | markers = [ 111 | 'wuhan:Wuhan takes ages to run' 112 | ] 113 | [tool.pixi.project] 114 | channels = ["conda-forge"] 115 | platforms = ["linux-64", "osx-arm64", "osx-64", "win-64"] 116 | 117 | [tool.pixi.dependencies] 118 | python = "3.13.*" 119 | esda = "*" 120 | geopandas = "*" 121 | libpysal = "*" 122 | momepy = "*" 123 | networkx = "*" 124 | numpy = "*" 125 | osmnx = "*" 126 | pandas = "*" 127 | pyarrow = "*" 128 | pyogrio = "*" 129 | scipy = "*" 130 | shapely = "*" 131 | mypy = ">=1.15.0,<2" 132 | pre-commit = "*" 133 | pytest = "*" 134 | pytest-cov = "*" 135 | pytest-xdist = "*" 136 | ruff = "*" 137 | yamllint = "*" 138 | 139 | [tool.pixi.pypi-dependencies] 140 | neatnet = { path = ".", editable = true } 141 | 142 | [tool.pixi.environments] 143 | default = { solve-group = "default" } 144 | tests = { features = ["tests", "docs"], solve-group = "default" } 145 | 146 | [tool.pixi.tasks] 147 | --------------------------------------------------------------------------------