├── .gitattributes
├── .github
    ├── release.yml
    └── workflows
    │   ├── build_docs.yml
    │   ├── mypy.yml
    │   ├── release_to_pypi.yml
    │   └── testing.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── ci
    ├── py311_latest.yaml
    ├── py311_oldest.yaml
    ├── py312_latest.yaml
    ├── py313_dev.yaml
    └── py313_latest.yaml
├── codecov.yml
├── data
    ├── README.md
    ├── aleppo_1133
    │   ├── original.parquet
    │   └── simplified.parquet
    ├── auckland_869
    │   ├── original.parquet
    │   └── simplified.parquet
    ├── bucaramanga_4617
    │   ├── original.parquet
    │   └── simplified.parquet
    ├── douala_809
    │   ├── original.parquet
    │   └── simplified.parquet
    ├── generate_simplified.py
    ├── liege_1656
    │   ├── original.parquet
    │   └── simplified.parquet
    ├── slc_4881
    │   ├── original.parquet
    │   └── simplified.parquet
    └── wuhan_8989
    │   ├── original.parquet
    │   └── simplified.parquet
├── docs
    ├── Makefile
    ├── make.bat
    └── source
    │   ├── _static
    │       ├── custom.css
    │       ├── icon.png
    │       ├── logo.svg
    │       └── references.bib
    │   ├── api.rst
    │   ├── conf.py
    │   ├── index.md
    │   ├── intro.ipynb
    │   ├── references.rst
    │   └── simple_preprocessing.ipynb
├── environment.yml
├── neatnet
    ├── __init__.py
    ├── artifacts.py
    ├── continuity.py
    ├── gaps.py
    ├── geometry.py
    ├── nodes.py
    ├── simplify.py
    └── tests
    │   ├── conftest.py
    │   ├── data
    │       ├── apalachicola_original.parquet
    │       ├── apalachicola_simplified_exclusion_mask.parquet
    │       └── apalachicola_simplified_standard.parquet
    │   ├── test_artifacts.py
    │   ├── test_continuity.py
    │   ├── test_gaps.py
    │   ├── test_geometry.py
    │   ├── test_nodes.py
    │   └── test_simplify.py
├── pixi.lock
└── pyproject.toml


/.gitattributes:
--------------------------------------------------------------------------------
1 | # GitHub syntax highlighting
2 | pixi.lock linguist-language=YAML linguist-generated=true
3 | 


--------------------------------------------------------------------------------
/.github/release.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | changelog:
 3 |   exclude:
 4 |     labels:
 5 |       - ignore-for-release
 6 |     authors:
 7 |       - dependabot
 8 |       - pre-commit-ci
 9 |   categories:
10 |     - title: API Changes
11 |       labels:
12 |         - api
13 |     - title: Enhancements
14 |       labels:
15 |         - enhancement
16 |     - title: Bug Fixes
17 |       labels:
18 |         - bug
19 |     - title: Deprecations
20 |       labels:
21 |         - deprecation
22 |     - title: Documentation
23 |       labels:
24 |         - documentation
25 |     - title: Other Changes
26 |       labels:
27 |         - "*"
28 | 


--------------------------------------------------------------------------------
/.github/workflows/build_docs.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Build Docs
 3 | 
 4 | on:
 5 |   push:
 6 |     tags:
 7 |       - 'v*'
 8 |   workflow_dispatch:
 9 |     inputs:
10 |       version:
11 |         description: Manual Doc Build
12 |         default: run-doc-build
13 |         required: false
14 | jobs:
15 |   docs:
16 |     name: build & push docs
17 |     runs-on: ${{ matrix.os }}
18 |     timeout-minutes: 90
19 |     strategy:
20 |       matrix:
21 |         os: ['ubuntu-latest']
22 |         environment-file: [ci/py313_latest.yaml]
23 |         experimental: [false]
24 |     defaults:
25 |       run:
26 |         shell: bash -l {0}
27 | 
28 |     steps:
29 |       - name: checkout repo
30 |         uses: actions/checkout@v4
31 |         with:
32 |           fetch-depth: 0
33 | 
34 |       - name: setup micromamba
35 |         uses: mamba-org/setup-micromamba@v1
36 |         with:
37 |           environment-file: ${{ matrix.environment-file }}
38 |           micromamba-version: 'latest'
39 | 
40 |       - name: install package
41 |         run: pip install .
42 | 
43 |       - name: make docs
44 |         run: cd docs; make html
45 | 
46 |       - name: commit docs
47 |         run: |
48 |           git clone \
49 |             https://github.com/ammaraskar/sphinx-action-test.git \
50 |             --branch gh-pages \
51 |             --single-branch gh-pages
52 |           cp -r docs/build/html/* gh-pages/
53 |           cd gh-pages
54 |           git config --local user.email "action@github.com"
55 |           git config --local user.name "GitHub Action"
56 |           git add .
57 |           git commit -m "Update documentation" -a || true
58 |           # The above command will fail if no changes were present,
59 |           # so we ignore the return code.
60 | 
61 |       - name: push to gh-pages
62 |         uses: ad-m/github-push-action@master
63 |         with:
64 |           branch: gh-pages
65 |           directory: gh-pages
66 |           github_token: ${{ secrets.GITHUB_TOKEN }}
67 |           force: true
68 | 


--------------------------------------------------------------------------------
/.github/workflows/mypy.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: MyPy Type Checking
 3 | 
 4 | on:
 5 |   push:
 6 |     branches: [main]
 7 |   pull_request:
 8 |     branches:
 9 |       - "*"
10 |   schedule:
11 |     - cron: "59 23 * * *"
12 |   workflow_dispatch:
13 |     inputs:
14 |       version:
15 |         description: Manual Type Checking
16 |         default: type_checking
17 |         required: false
18 | 
19 | jobs:
20 |   mypy:
21 |     runs-on: ubuntu-latest
22 |     defaults:
23 |       run:
24 |         shell: bash -l {0}
25 | 
26 |     steps:
27 |       - uses: actions/checkout@v4
28 | 
29 |       - name: setup micromamba
30 |         uses: mamba-org/setup-micromamba@v2
31 |         with:
32 |           environment-file: ci/py313_latest.yaml
33 |           create-args: >-
34 |             mypy
35 | 
36 |       - name: Install package
37 |         run: pip install .
38 | 
39 |       - name: Check package
40 |         run: |
41 |             mypy neatnet/ --ignore-missing-imports --install-types --non-interactive
42 | 


--------------------------------------------------------------------------------
/.github/workflows/release_to_pypi.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Publish neatnet to PyPI / GitHub
 3 | 
 4 | on:
 5 |   push:
 6 |     tags:
 7 |       - "v*"
 8 | 
 9 | jobs:
10 |   build-n-publish:
11 |     name: Build and publish neatnet to PyPI
12 |     runs-on: ubuntu-latest
13 |     permissions:
14 |       id-token: write  # MANDATORY for trusted publishing to PyPI
15 |       contents: write  # MANDATORY for the Github release action
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v4
19 |         with:
20 |           fetch-depth: 0  # Fetch all history for all branches and tags.
21 | 
22 |       - name: Set up Python
23 |         uses: actions/setup-python@v5
24 |         with:
25 |           python-version: "3.x"
26 | 
27 |       - name: Build source and wheel distributions
28 |         run: |
29 |           python -m pip install --upgrade pip
30 |           python -m pip install --upgrade build twine
31 |           python -m build
32 |           twine check --strict dist/*
33 | 
34 |       - name: Create Release Notes
35 |         uses: actions/github-script@v7
36 |         with:
37 |           github-token: ${{secrets.GITHUB_TOKEN}}
38 |           script: |
39 |             await github.request(`POST /repos/${{ github.repository }}/releases`, {
40 |                 tag_name: "${{ github.ref }}",
41 |                 generate_release_notes: true
42 |             });
43 | 
44 |       - name: Publish distribution to PyPI
45 |         uses: pypa/gh-action-pypi-publish@release/v1
46 | 


--------------------------------------------------------------------------------
/.github/workflows/testing.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | name: Continuous Integration
  3 | 
  4 | on:
  5 |   push:
  6 |     branches: [main]
  7 |   pull_request:
  8 |     branches:
  9 |       - "*"
 10 |   schedule:
 11 |     - cron: "59 23 * * *"
 12 |   workflow_dispatch:
 13 |     inputs:
 14 |       version:
 15 |         description: Manual CI Run
 16 |         default: test
 17 |         required: false
 18 | 
 19 | jobs:
 20 |   tests:
 21 |     name: ${{ matrix.os }}, ${{ matrix.environment-file }}
 22 |     runs-on: ${{ matrix.os }}
 23 |     timeout-minutes: 60
 24 |     strategy:
 25 |       matrix:
 26 |         os: [ubuntu-latest]
 27 |         environment-file: [
 28 |           py311_oldest,
 29 |           py311_latest,
 30 |           py312_latest,
 31 |           py313_latest,
 32 |           py313_dev,
 33 |         ]
 34 |         include:
 35 |           - environment-file: py313_latest
 36 |             os: macos-13  # Intel
 37 |           - environment-file: py313_latest
 38 |             os: macos-latest  # Apple Silicon
 39 |           - environment-file: py313_latest
 40 |             os: windows-latest
 41 |       fail-fast: false
 42 | 
 43 |     defaults:
 44 |       run:
 45 |         shell: bash -l {0}
 46 | 
 47 |     steps:
 48 |       - name: checkout repo
 49 |         uses: actions/checkout@v4
 50 |         with:
 51 |           fetch-depth: 0  # Fetch all history for all branches and tags.
 52 | 
 53 |       - name: setup micromamba
 54 |         uses: mamba-org/setup-micromamba@v2
 55 |         with:
 56 |           environment-file: ci/${{ matrix.environment-file }}.yaml
 57 |           micromamba-version: "latest"
 58 | 
 59 |       - name: install package
 60 |         run: "pip install -e . --no-deps"
 61 | 
 62 |       - name: spatial versions
 63 |         run: 'python -c "import geopandas; geopandas.show_versions();"'
 64 | 
 65 |       - name: run tests
 66 |         run: |
 67 |           pytest \
 68 |           neatnet/ \
 69 |           --verbose \
 70 |           -r a \
 71 |           --numprocesses logical \
 72 |           --color yes \
 73 |           --cov neatnet \
 74 |           --cov-append \
 75 |           --cov-report term-missing \
 76 |           --cov-report xml . \
 77 |           --env_type ${{ matrix.environment-file }} \
 78 |           -m "not wuhan"
 79 | 
 80 |       - name: run tests (Wuhan)
 81 |         run: |
 82 |           pytest \
 83 |           neatnet/ \
 84 |           --verbose \
 85 |           -r a \
 86 |           --numprocesses logical \
 87 |           --color yes \
 88 |           --cov neatnet \
 89 |           --cov-append \
 90 |           --cov-report term-missing \
 91 |           --cov-report xml . \
 92 |           --env_type ${{ matrix.environment-file }} \
 93 |           -m wuhan
 94 |         if: ${{ github.event_name != 'pull_request' }}
 95 | 
 96 |       - name: zip artifacts - Ubuntu & macOS
 97 |         run: zip ci_artifacts.zip ci_artifacts -r
 98 |         if: matrix.os != 'windows-latest' && (success() || failure())
 99 | 
100 |       - name: zip artifacts - Windows
101 |         shell: powershell
102 |         run: Compress-Archive -Path ci_artifacts -Destination ci_artifacts.zip
103 |         if: matrix.os == 'windows-latest' && (success() || failure())
104 | 
105 |       - name: archive observed simplified networks
106 |         uses: actions/upload-artifact@v4
107 |         with:
108 |           name: ci_artifacts-${{ matrix.os }}-${{ matrix.environment-file }}
109 |           path: ci_artifacts.zip
110 |         if: success() || failure()
111 | 
112 |       - name: codecov
113 |         uses: codecov/codecov-action@v4
114 |         with:
115 |           token: ${{ secrets.CODECOV_TOKEN }}
116 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 | 
164 | cache/
165 | 
166 | # macOS stuff
167 | *.DS_Store
168 | # pixi environments
169 | .pixi
170 | *.egg-info
171 | 
172 | ci_artifacts/
173 | docs/source/generated


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | repos:
 3 |   - repo: https://github.com/astral-sh/ruff-pre-commit
 4 |     rev: "v0.11.8"
 5 |     hooks:
 6 |       - id: ruff
 7 |         files: "neatnet\/|docs\/source\/"
 8 |       - id: ruff-format
 9 |         files: "neatnet\/|docs\/source\/"
10 |   - repo: https://github.com/adrienverge/yamllint
11 |     rev: "v1.37.0"
12 |     hooks:
13 |       - id: yamllint
14 |         files: "."
15 |         args: [
16 |           -d,
17 |           "{extends: default, ignore: .pixi/*, rules: {line-length: {max: 90}}}",
18 |           ".",
19 |         ]
20 |   - repo: https://github.com/pre-commit/pre-commit-hooks
21 |     rev: "v5.0.0"
22 |     hooks:
23 |       - id: check-toml
24 |         files: "."
25 | 
26 | ci:
27 |   autofix_prs: false
28 |   autoupdate_schedule: quarterly
29 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contributing to `neatnet`
  2 | 
  3 | First off, thanks for taking the time to contribute! ❤️
  4 | 
  5 | All types of contributions are encouraged and valued. See this page for different ways to help and details about how this project handles them. Please make sure to read the relevant section before making your contribution. It will make it a lot easier for us maintainers and smooth out the experience for all involved. The community looks forward to your contributions. 🎉
  6 | 
  7 | > And if you like the project, but just don't have time to contribute, that's fine. There are other easy ways to support the project and show your appreciation, which we would also be very happy about:
  8 | > - Star the project
  9 | > - Tweet about it
 10 | > - Refer this project in your project's `README`
 11 | > - Mention the project at local meetups and tell your friends/colleagues
 12 | 
 13 | ## I Have a Question
 14 | 
 15 | > If you want to ask a question, we assume that you have read the available [Documentation](https://uscuni.org/neatnet/).
 16 | 
 17 | Before you ask a question, it is best to search for existing [Issues](https://github.com/uscuni/neatnet/issues) that might help you. In case you have found a suitable issue and still need clarification, you can write your question in this issue. It is also advisable to search the internet for answers first, especially [Stack Overflow](https://stackoverflow.com).
 18 | 
 19 | If you then still feel the need to ask a question and need clarification, we recommend the following:
 20 | 
 21 | - Open an [Issue](https://github.com/uscuni/neatnet/issues/new).
 22 | - Provide as much context as you can about what you're running into.
 23 | - Provide project and platform versions (`python`, `shapely`, `geopandas`, etc.), depending on what seems relevant.
 24 | 
 25 | We will then take care of the issue as soon as possible.
 26 | 
 27 | ## I Want To Contribute
 28 | 
 29 | ### Reporting Bugs
 30 | 
 31 | #### Before Submitting a Bug Report
 32 | 
 33 | A good bug report shouldn't leave others needing to chase you up for more information. Therefore, we ask you to investigate carefully, collect information and describe the issue in detail in your report. Please complete the following steps in advance to help us fix any potential bug as fast as possible.
 34 | 
 35 | - Make sure that you are using the latest version.
 36 | - Determine if your bug is really a bug and not an error on your side, e.g. using incompatible environment components/versions (Make sure that you have read the [documentation](https://uscuni.org/neatnet/).).
 37 | - To see if other users have experienced (and potentially already solved) the same issue you are having, check if there is not already a bug report existing for your bug or error in the [bug tracker](https://github.com/uscuni/neatnet/issues).
 38 | - Also make sure to search the internet (especially [Stack Overflow](https://stackoverflow.com)) to see if users outside of the GitHub community have discussed the issue.
 39 | - Collect information about the bug:
 40 |   - Stack trace (Traceback)
 41 |   - OS, Platform and Version (Windows, Linux, macOS, x86, ARM)
 42 |   - Version of Python and relevant dependencies.
 43 |   - Possibly your input and the output
 44 |   - Can you reliably reproduce the issue? And can you also reproduce it with older versions?
 45 | 
 46 | #### How Do I Submit a Good Bug Report?
 47 | 
 48 | We use GitHub issues to track bugs and errors. If you run into an issue with the project:
 49 | 
 50 | - Open an [Issue](https://github.com/uscuni/neatnet/issues/new). (Since we can't be sure at this point whether it is a bug or not, we ask you not to talk about a bug yet and not to label the issue.)
 51 | - Explain the behavior you would expect and the actual behavior.
 52 | - Please provide as much context as possible and describe the *reproduction steps* that someone else can follow to recreate the issue on their own. This usually includes your code. For good bug reports you should isolate the problem and create a reduced test case. This is known as a [mininum reproducible example](https://en.wikipedia.org/wiki/Minimal_reproducible_example#:~:text=In%20computing%2C%20a%20minimal%20reproducible,to%20be%20demonstrated%20and%20reproduced.) – or MRE for short.
 53 | - Provide the information you collected in the previous section.
 54 | 
 55 | Once it's filed:
 56 | 
 57 | - The project team will label the issue accordingly.
 58 | - A team member will try to reproduce the issue with your provided steps. If there are no reproduction steps or no obvious way to reproduce the issue, the team will ask you for those steps.
 59 | - If the team is able to reproduce the issue, it will be left to be implemented by someone.
 60 | 
 61 | ### Suggesting Enhancements
 62 | 
 63 | This section guides you through submitting an enhancement suggestion for `neatnet`, **including completely new features and minor improvements to existing functionality**. Following these guidelines will help maintainers and the community to understand your suggestion and find related suggestions.
 64 | 
 65 | #### Before Submitting an Enhancement
 66 | 
 67 | - Make sure that you are using the latest version.
 68 | - Read the [documentation](https://uscuni.org/neatnet/) carefully and find out if the functionality is already covered, maybe by an individual configuration.
 69 | - Perform a [search](https://github.com/uscuni/neatnet/issues) to see if the enhancement has already been suggested. If it has, add a comment to the existing issue instead of opening a new one.
 70 | - Find out whether your idea fits with the scope and aims of the project. It's up to you to make a strong case to convince the project's developers of the merits of this feature. Keep in mind that we want features that will be useful to the majority of our users and not just a small subset. If you're just targeting a minority of users, consider writing an add-on/plugin library.
 71 | 
 72 | #### How Do I Submit a Good Enhancement Suggestion?
 73 | 
 74 | Enhancement suggestions are tracked as [GitHub issues](https://github.com/uscuni/neatnet/issues).
 75 | 
 76 | - Use a **clear and descriptive title** for the issue to identify the suggestion.
 77 | - Provide a **step-by-step description of the suggested enhancement** in as many details as possible.
 78 | - **Describe the current behavior** and **explain which behavior you expected to see instead** and why. At this point you can also tell which alternatives do not work for you.
 79 | - **Explain why this enhancement would be useful** to most neatnet's users. You may also want to point out the other projects that solved it better and which could serve as inspiration.
 80 | 
 81 | ### Code Contribution
 82 | 
 83 | You can create a development environment using [Pixi](https://pixi.sh):
 84 | 
 85 | ```sh
 86 | pixi install -e tests
 87 | ```
 88 | 
 89 | Or with conda using the `environment.yml` file:
 90 | 
 91 | ```sh
 92 | conda env create -f environment.yml
 93 | ```
 94 | 
 95 | To install `neatnet` to the environment in an editable form, clone the repository, navigate to the main directory and install it with pip:
 96 | 
 97 | ```sh
 98 | pip install -e .
 99 | ```
100 | 
101 | When submitting a pull request:
102 | 
103 | - All existing tests should pass. Please make sure that the test suite passes, both locally and on GitHub Actions. Status on GHA will be visible on a pull request. GHA are automatically enabled on your own fork as well. To trigger a check, make a PR to your own fork.
104 | - Ensure that documentation has built correctly. It will be automatically built for each PR.
105 | - New functionality ***must*** include tests. Please write reasonable tests for your code and make sure that they pass on your pull request.
106 | - Classes, methods, functions, etc. should have docstrings. The first line of a docstring should be a standalone summary. Parameters and return values should be documented explicitly.
107 | - Follow PEP 8 when possible. We use ``Ruff`` for linting and formatting to ensure robustness & consistency in code throughout the project. It included in the ``pre-commit`` hook and will be checked on every PR.
108 | - `neatnet` supports Python 3.11+ only. When possible, do not introduce additional dependencies. If that is necessary, make sure they can be treated as optional.
109 | 
110 | #### Procedure
111 | 
112 | 1. *After* opening an issue and discussing with the development team, create a PR with the proposed changes.
113 | 2. If [testing fails](https://github.com/uscuni/neatnet/actions/runs/11368511561) due to an update in the code base:
114 | 3. Observed data is [saved as artifacts](https://github.com/uscuni/neatnet/actions/runs/11368511561#artifacts) from the workflow and can be download locally.
115 | 4. We determine the `ci_artifacts-ubuntu-latest-py313_latest` data as the "truth."
116 | 5. After comparison of the current "known" data with new data from (3.), if new data is "truthier," update your PR with the new "known" data.
117 | 
118 | #### Handling Edge Cases in Testing
119 | 
120 | Edge cases will crop up in full-scale FUA testing that we can ignore (following a thorough investigation – e.g. [`neatnet#77`](https://github.com/uscuni/neatnet/issues/77)) during testing. Once it is determined the geometry in question is not caused by a bug on our end, it can be added to the `KNOWN_BAD_GEOMS` collection in `tests/conftest.py`. This collection is a dictionary keyed by `<NAME>_CODE` of the city/FUA where the values are lists of index locations of simplified edges that can be ignored if they fail equality testing. As an example, see our initial "bad" geometries [here](https://github.com/uscuni/neatnet/blob/1be6b44b1a06d52453ecbaee205ae649101c4ea4/neatnet/tests/conftest.py#L25-L39), which were due to a variant number of coordinates in those resultant simplified edges created by [different versions of `shapely`](https://github.com/uscuni/neatnet/pull/67#issuecomment-2457333724).
121 | 
122 | ##### Code Structure
123 | 
124 | Code should be linted and formatted via `ruff`. With the [`.pre-commit` hooks](https://github.com/uscuni/neatnet/blob/main/.pre-commit-config.yaml) we have code in commits will be formatted and linted automatically once [`pre-commit` is installed](https://pre-commit.com/#installation).
125 | 
126 | ## Attribution
127 | 
128 | This guide is based on the **contributing-gen**. [Make your own](https://github.com/bttger/contributing-gen)!
129 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2024, Research Team on Urban Structure
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its
16 |    contributors may be used to endorse or promote products derived from
17 |    this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # `neatnet`: Street Geometry Processing Toolkit
 2 | 
 3 | [![Continuous Integration](https://github.com/uscuni/neatnet/actions/workflows/testing.yml/badge.svg)](https://github.com/uscuni/neatnet/actions/workflows/testing.yml) [![codecov](https://codecov.io/gh/uscuni/neatnet/graph/badge.svg?token=GFISMU0WPS)](https://codecov.io/gh/uscuni/neatnet)
 4 | 
 5 | ## Introduction
 6 | 
 7 | `neatnet` offers a set of tools pre-processing of street network geometry aimed at its simplification. This typically means removal of dual carrieageways, roundabouts and similar transportation-focused geometries and their replacement with a new geometry representing the street space via its centerline. The resulting geometry shall be closer to a morphological representation of space than the original source, that is typically drawn with transportation in mind (e.g. OpenStreetMap).
 8 | 
 9 | ## Examples
10 | 
11 | ```py
12 | import neatnet
13 | 
14 | simplified = neatnet.neatify(gdf)
15 | ```
16 | 
17 | ## Contribution
18 | 
19 | While we consider the API stable, the project is young and may be evolving fast. All contributions are very welcome, see our guidelines in [`CONTRIBUTING.md`](https://github.com/uscuni/neatnet/blob/main/CONTRIBUTING.md).
20 | 
21 | ## Recommended Citations
22 | 
23 | The package is a result of a scientific collaboration between [The Research Team on Urban Structure](https://uscuni.org) of Charles University (USCUNI), [NEtwoRks, Data, and Society](https://nerds.itu.dk) research group of IT University Copenhagen (NERDS) and [Oak Ridge National Laboratory](https://www.ornl.gov/gshsd).
24 | 
25 | If you use `neatnet` for a research purpose, please consider citing the original paper introducing it.
26 | 
27 | ### Canonical Citation (primary)
28 | 
29 | *forthcoming*
30 | 
31 | ### Repository Citation (secondary)
32 | 
33 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.14765801.svg)](https://doi.org/10.5281/zenodo.14765801)
34 | 
35 | * **Fleischmann, M., Vybornova, A., & Gaboardi, J.D.** (2025). `uscuni/neatnet`. Zenodo. https://doi.org/10.5281/zenodo.14765801
36 | 
37 | ## Funding
38 | 
39 | The development has been supported by the Charles University’s Primus program through the project "Influence of Socioeconomic and Cultural Factors on Urban Structure in Central Europe", project reference `PRIMUS/24/SCI/023`.
40 | 
41 | ---------------------------------------
42 | 
43 | This package developed & and maintained by:
44 | * [Martin Fleischmann](https://github.com/martinfleis)
45 | * [Anastassia Vybornova](https://github.com/anastassiavybornova)
46 | * [James D. Gaboardi](https://github.com/jGaboardi)
47 | 
48 | 
49 | Copyright (c) 2024-, neatnet Developers
50 | 


--------------------------------------------------------------------------------
/ci/py311_latest.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: py311_neatnet-latest
 3 | channels:
 4 |   - conda-forge
 5 | dependencies:
 6 |   - python=3.11
 7 |   - esda
 8 |   - geopandas
 9 |   - libpysal
10 |   - momepy
11 |   - networkx
12 |   - numpy
13 |   - osmnx
14 |   - pandas
15 |   - pyarrow
16 |   - pyogrio
17 |   - scipy
18 |   - shapely
19 |   # testing
20 |   - matplotlib
21 |   - pre-commit
22 |   - pytest
23 |   - pytest-cov
24 |   - pytest-xdist
25 | 


--------------------------------------------------------------------------------
/ci/py311_oldest.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: py311_neatnet-oldest
 3 | channels:
 4 |   - conda-forge
 5 | dependencies:
 6 |   - python=3.11
 7 |   - esda=2.6.0
 8 |   - geopandas=1.0.1
 9 |   - libpysal=4.12.1
10 |   - momepy=0.9.0
11 |   - networkx=3.3
12 |   - numpy=2.0.0
13 |   - pandas=2.2.3
14 |   - scipy=1.14.1
15 |   - shapely=2.0.6
16 |   # testing
17 |   - matplotlib
18 |   - pre-commit
19 |   - pyarrow=17.0
20 |   - pytest
21 |   - pytest-cov
22 |   - pytest-xdist
23 | 


--------------------------------------------------------------------------------
/ci/py312_latest.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: py312_neatnet-latest
 3 | channels:
 4 |   - conda-forge
 5 | dependencies:
 6 |   - python=3.12
 7 |   - esda
 8 |   - geopandas
 9 |   - libpysal
10 |   - momepy
11 |   - networkx
12 |   - numpy
13 |   - osmnx
14 |   - pandas
15 |   - pyarrow
16 |   - pyogrio
17 |   - scipy
18 |   - shapely
19 |   # testing
20 |   - matplotlib
21 |   - pre-commit
22 |   - pytest
23 |   - pytest-cov
24 |   - pytest-xdist
25 | 


--------------------------------------------------------------------------------
/ci/py313_dev.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: py313_neatnet-dev
 3 | channels:
 4 |   - conda-forge
 5 | dependencies:
 6 |   - python=3.13
 7 |   - git
 8 |   - pip
 9 |   # testing
10 |   - pre-commit
11 |   - pytest
12 |   - pytest-cov
13 |   - pytest-xdist
14 |   - pip:
15 |       # dev versions of packages
16 |       - --pre \
17 |         --index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple  \
18 |         --extra-index-url https://pypi.org/simple
19 |       - matplotlib
20 |       - networkx
21 |       - numpy
22 |       - pandas
23 |       - pyarrow
24 |       - pyogrio
25 |       - scipy
26 |       - shapely
27 |       - git+https://github.com/pysal/esda.git
28 |       - git+https://github.com/geopandas/geopandas.git
29 |       - git+https://github.com/pysal/libpysal.git
30 |       - git+https://github.com/pysal/momepy.git
31 | 


--------------------------------------------------------------------------------
/ci/py313_latest.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: py313_neatnet-latest
 3 | channels:
 4 |   - conda-forge
 5 | dependencies:
 6 |   - python=3.13
 7 |   - esda
 8 |   - geopandas
 9 |   - libpysal
10 |   - momepy
11 |   - networkx
12 |   - numpy
13 |   - osmnx
14 |   - pandas
15 |   - pyarrow
16 |   - pyogrio
17 |   - scipy
18 |   - shapely
19 |   # testing
20 |   - matplotlib
21 |   - pre-commit
22 |   - pytest
23 |   - pytest-cov
24 |   - pytest-xdist
25 |   # docs
26 |   - ipykernel
27 |   - myst-nb
28 |   - numpydoc
29 |   - sphinx
30 |   - sphinxcontrib-bibtex
31 |   - sphinx-autosummary-accessors
32 |   - sphinx-copybutton
33 |   - sphinx-book-theme
34 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | codecov:
 3 |   notify:
 4 |     after_n_builds: 6
 5 | coverage:
 6 |   range: 50..95
 7 |   round: nearest
 8 |   precision: 1
 9 |   status:
10 |     project:
11 |       default:
12 |         threshold: 2%
13 |     patch:
14 |       default:
15 |         threshold: 2%
16 |         target: 80%
17 |   ignore:
18 |     - "tests/*"
19 | comment:
20 |   layout: "reach, diff, files"
21 |   behavior: once
22 |   after_n_builds: 6
23 |   require_changes: true
24 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
 1 | # FUA testing data
 2 | 
 3 | ## Contents
 4 | 
 5 | * This `README.md`
 6 | * `generate_simplified.py` – see [neatnet#7](https://github.com/uscuni/neatnet/issues/7)
 7 | * Data
 8 |    * There is a directory for each FUA listed below that contains 2 files:
 9 |       * `original.parquet`: The original input street network derived from [OSM](https://www.openstreetmap.org/about) via [OSMNX](https://osmnx.readthedocs.io/en/stable/).
10 |       * `simplified.parquet`: The simplified street network following our algorithm with *default parameters*.
11 | 
12 | ## FUA Information
13 | 
14 | | FUA  | City                                   | Shortand              |
15 | | ---  | ---                                    | ---                   |
16 | | 1133 | Aleppo, Syria, Middle East / Asia      | `aleppo_1133`         |
17 | | 869  | Auckland, New Zealand, Oceania / Asia  | `auckland_869`        |
18 | | 809  | Douala, Cameroon, Africa               | `douala_809`          |
19 | | 1656 | Liège, Belgium, Europe                 | `liege_1656`          |
20 | | 4617 | Bucaramanga, Colombia, S. America      | `bucaramanga_4617`    |
21 | | 4881 | Salt Lake City, Utah, USA, N. America  | `slc_4881`            |
22 | 
23 | ---------------------------------------
24 | 
25 | Copyright (c) 2024-, neatnet Developers
26 | 


--------------------------------------------------------------------------------
/data/aleppo_1133/original.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/aleppo_1133/original.parquet


--------------------------------------------------------------------------------
/data/aleppo_1133/simplified.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/aleppo_1133/simplified.parquet


--------------------------------------------------------------------------------
/data/auckland_869/original.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/auckland_869/original.parquet


--------------------------------------------------------------------------------
/data/auckland_869/simplified.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/auckland_869/simplified.parquet


--------------------------------------------------------------------------------
/data/bucaramanga_4617/original.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/bucaramanga_4617/original.parquet


--------------------------------------------------------------------------------
/data/bucaramanga_4617/simplified.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/bucaramanga_4617/simplified.parquet


--------------------------------------------------------------------------------
/data/douala_809/original.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/douala_809/original.parquet


--------------------------------------------------------------------------------
/data/douala_809/simplified.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/douala_809/simplified.parquet


--------------------------------------------------------------------------------
/data/generate_simplified.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import pathlib
 3 | import time
 4 | 
 5 | import geopandas
 6 | 
 7 | import neatnet
 8 | 
 9 | start_time = time.time()
10 | 
11 | logging.basicConfig(
12 |     filename="simplified_generation.log",
13 |     filemode="a",
14 |     format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s",
15 |     datefmt="%H:%M:%S",
16 |     level=logging.NOTSET,
17 | )
18 | logger = logging.getLogger(__name__)
19 | 
20 | logging.info("")
21 | logging.info("")
22 | logging.info(" |‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾|")
23 | logging.info(" | Generating Simplified Street Networks |")
24 | logging.info(" |_______________________________________|")
25 | logging.info("")
26 | logging.info("")
27 | logging.info("")
28 | 
29 | fua_city = {
30 |     # 1133: "aleppo",
31 |     # 869: "auckland",
32 |     # 4617: "bucaramanga",
33 |     # 809: "douala",
34 |     # 1656: "liege",
35 |     # 4881: "slc",
36 |     8989: "wuhan",
37 | }
38 | 
39 | # dict of cityname: fua ID
40 | city_fua = {c: f for f, c in fua_city.items()}
41 | 
42 | for city, fua in city_fua.items():
43 |     t1 = time.time()
44 |     aoi = f"{city}_{fua}"
45 | 
46 |     logging.info("")
47 |     logging.info("")
48 |     logging.info("")
49 |     logging.info("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >>>>")
50 |     logging.info("")
51 |     logging.info("")
52 |     logging.info(f"  ** {aoi} **")
53 |     logging.info("")
54 |     logging.info("")
55 | 
56 |     # input data
57 |     original = geopandas.read_parquet(pathlib.Path(aoi, "original.parquet"))
58 | 
59 |     # output data
60 |     simplified = neatnet.neatify(original)
61 |     simplified.to_parquet(pathlib.Path(aoi, "simplified.parquet"))
62 | 
63 |     t2 = round((time.time() - t1) / 60.0, 2)
64 | 
65 |     logging.info("")
66 |     logging.info("")
67 |     logging.info(f"\t{aoi} runtime: {t2} minutes")
68 |     logging.info("")
69 |     logging.info("")
70 |     logging.info("")
71 |     logging.info("<<<< ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
72 |     logging.info("")
73 | 
74 | endtime_time = round((time.time() - start_time) / 60.0, 2)
75 | 
76 | logging.info("")
77 | logging.info("")
78 | logging.info(f"Total runtime: {endtime_time} minutes")
79 | logging.info(
80 |     "========================================================================="
81 | )
82 | logging.info("")
83 | logging.info("")
84 | 


--------------------------------------------------------------------------------
/data/liege_1656/original.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/liege_1656/original.parquet


--------------------------------------------------------------------------------
/data/liege_1656/simplified.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/liege_1656/simplified.parquet


--------------------------------------------------------------------------------
/data/slc_4881/original.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/slc_4881/original.parquet


--------------------------------------------------------------------------------
/data/slc_4881/simplified.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/slc_4881/simplified.parquet


--------------------------------------------------------------------------------
/data/wuhan_8989/original.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/wuhan_8989/original.parquet


--------------------------------------------------------------------------------
/data/wuhan_8989/simplified.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/data/wuhan_8989/simplified.parquet


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/source/_static/custom.css:
--------------------------------------------------------------------------------
1 | .logo-frontpage {
2 |     margin-bottom: 30px;
3 | }


--------------------------------------------------------------------------------
/docs/source/_static/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/docs/source/_static/icon.png


--------------------------------------------------------------------------------
/docs/source/_static/logo.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="325" height="58" viewBox="0 0 325 58">
 3 | <defs>
 4 | <g>
 5 | <g id="glyph-0-0">
 6 | <path d="M 2.875 7.78125 C 2.359375 9.953125 2.234375 10.390625 -0.578125 10.390625 C -1.34375 10.390625 -2.046875 10.390625 -2.046875 11.609375 C -2.046875 12.109375 -1.71875 12.375 -1.21875 12.375 C 0.515625 12.375 2.359375 12.171875 4.140625 12.171875 C 6.25 12.171875 8.421875 12.375 10.453125 12.375 C 10.78125 12.375 11.609375 12.375 11.609375 11.09375 C 11.609375 10.390625 10.96875 10.390625 10.078125 10.390625 C 6.890625 10.390625 6.890625 9.953125 6.890625 9.375 C 6.890625 8.609375 9.5625 -1.78125 10.015625 -3.375 C 10.84375 -1.53125 12.625 0.703125 15.875 0.703125 C 23.28125 0.703125 31.25 -8.609375 31.25 -17.984375 C 31.25 -23.96875 27.609375 -28.1875 22.765625 -28.1875 C 19.578125 -28.1875 16.515625 -25.890625 14.40625 -23.40625 C 13.765625 -26.84375 11.03125 -28.1875 8.671875 -28.1875 C 5.734375 -28.1875 4.53125 -25.703125 3.953125 -24.546875 C 2.8125 -22.375 1.984375 -18.5625 1.984375 -18.359375 C 1.984375 -17.734375 2.609375 -17.734375 2.734375 -17.734375 C 3.375 -17.734375 3.4375 -17.796875 3.828125 -19.1875 C 4.90625 -23.71875 6.1875 -26.78125 8.484375 -26.78125 C 9.5625 -26.78125 10.453125 -26.265625 10.453125 -23.84375 C 10.453125 -22.375 10.265625 -21.6875 10.015625 -20.59375 Z M 14.09375 -19.890625 C 14.53125 -21.609375 16.265625 -23.40625 17.40625 -24.359375 C 19.640625 -26.328125 21.484375 -26.78125 22.578125 -26.78125 C 25.125 -26.78125 26.65625 -24.546875 26.65625 -20.78125 C 26.65625 -17.03125 24.546875 -9.6875 23.40625 -7.265625 C 21.234375 -2.8125 18.171875 -0.703125 15.8125 -0.703125 C 11.609375 -0.703125 10.78125 -6 10.78125 -6.375 C 10.78125 -6.5 10.78125 -6.625 10.96875 -7.390625 Z M 14.09375 -19.890625 "/>
 7 | </g>
 8 | <g id="glyph-0-1">
 9 | <path d="M 16.453125 -43.546875 C 16.453125 -43.609375 16.453125 -44.25 15.625 -44.25 C 14.15625 -44.25 9.5 -43.75 7.84375 -43.609375 C 7.328125 -43.546875 6.625 -43.484375 6.625 -42.28125 C 6.625 -41.578125 7.265625 -41.578125 8.21875 -41.578125 C 11.28125 -41.578125 11.34375 -41 11.34375 -40.484375 L 11.15625 -39.21875 L 3.125 -7.328125 C 2.9375 -6.625 2.8125 -6.1875 2.8125 -5.171875 C 2.8125 -1.53125 5.609375 0.703125 8.609375 0.703125 C 10.71875 0.703125 12.3125 -0.578125 13.390625 -2.875 C 14.53125 -5.296875 15.296875 -8.984375 15.296875 -9.125 C 15.296875 -9.75 14.734375 -9.75 14.53125 -9.75 C 13.90625 -9.75 13.84375 -9.5 13.640625 -8.609375 C 12.5625 -4.46875 11.34375 -0.703125 8.796875 -0.703125 C 6.890625 -0.703125 6.890625 -2.734375 6.890625 -3.640625 C 6.890625 -5.171875 6.953125 -5.484375 7.265625 -6.703125 Z M 16.453125 -43.546875 "/>
10 | </g>
11 | <g id="glyph-0-2">
12 | <path d="M 23.78125 -24.109375 C 22.640625 -26.46875 20.78125 -28.1875 17.921875 -28.1875 C 10.453125 -28.1875 2.546875 -18.8125 2.546875 -9.5 C 2.546875 -3.5 6.0625 0.703125 11.03125 0.703125 C 12.3125 0.703125 15.5 0.453125 19.328125 -4.078125 C 19.828125 -1.40625 22.0625 0.703125 25.125 0.703125 C 27.359375 0.703125 28.828125 -0.765625 29.84375 -2.8125 C 30.921875 -5.09375 31.75 -8.984375 31.75 -9.125 C 31.75 -9.75 31.1875 -9.75 30.984375 -9.75 C 30.359375 -9.75 30.28125 -9.5 30.09375 -8.609375 C 29.015625 -4.46875 27.859375 -0.703125 25.25 -0.703125 C 23.53125 -0.703125 23.34375 -2.359375 23.34375 -3.640625 C 23.34375 -5.03125 23.46875 -5.546875 24.171875 -8.359375 C 24.875 -11.03125 25 -11.671875 25.5625 -14.09375 L 27.859375 -23.015625 C 28.3125 -24.8125 28.3125 -24.9375 28.3125 -25.1875 C 28.3125 -26.265625 27.546875 -26.90625 26.46875 -26.90625 C 24.9375 -26.90625 23.96875 -25.5 23.78125 -24.109375 Z M 19.640625 -7.59375 C 19.328125 -6.4375 19.328125 -6.3125 18.359375 -5.234375 C 15.5625 -1.71875 12.9375 -0.703125 11.15625 -0.703125 C 7.96875 -0.703125 7.078125 -4.203125 7.078125 -6.703125 C 7.078125 -9.890625 9.125 -17.734375 10.578125 -20.65625 C 12.5625 -24.421875 15.4375 -26.78125 17.984375 -26.78125 C 22.125 -26.78125 23.015625 -21.546875 23.015625 -21.171875 C 23.015625 -20.78125 22.890625 -20.40625 22.828125 -20.09375 Z M 19.640625 -7.59375 "/>
13 | </g>
14 | <g id="glyph-0-3">
15 | <path d="M 25.25 -24.234375 C 24.234375 -24.234375 23.34375 -24.234375 22.453125 -23.34375 C 21.421875 -22.375 21.296875 -21.296875 21.296875 -20.84375 C 21.296875 -19.328125 22.453125 -18.625 23.65625 -18.625 C 25.5 -18.625 27.234375 -20.15625 27.234375 -22.703125 C 27.234375 -25.828125 24.234375 -28.1875 19.703125 -28.1875 C 11.09375 -28.1875 2.609375 -19.0625 2.609375 -10.078125 C 2.609375 -4.34375 6.3125 0.703125 12.9375 0.703125 C 22.0625 0.703125 27.421875 -6.0625 27.421875 -6.828125 C 27.421875 -7.203125 27.03125 -7.65625 26.65625 -7.65625 C 26.328125 -7.65625 26.203125 -7.53125 25.828125 -7.015625 C 20.78125 -0.703125 13.84375 -0.703125 13.078125 -0.703125 C 9.0625 -0.703125 7.328125 -3.828125 7.328125 -7.65625 C 7.328125 -10.265625 8.609375 -16.453125 10.78125 -20.40625 C 12.75 -24.046875 16.265625 -26.78125 19.765625 -26.78125 C 21.9375 -26.78125 24.359375 -25.953125 25.25 -24.234375 Z M 25.25 -24.234375 "/>
16 | </g>
17 | <g id="glyph-0-4">
18 | <path d="M 11.921875 -14.734375 C 13.765625 -14.734375 18.484375 -14.859375 21.6875 -16.203125 C 26.140625 -18.109375 26.46875 -21.875 26.46875 -22.765625 C 26.46875 -25.5625 24.046875 -28.1875 19.640625 -28.1875 C 12.5625 -28.1875 2.9375 -22 2.9375 -10.84375 C 2.9375 -4.34375 6.703125 0.703125 12.9375 0.703125 C 22.0625 0.703125 27.421875 -6.0625 27.421875 -6.828125 C 27.421875 -7.203125 27.03125 -7.65625 26.65625 -7.65625 C 26.328125 -7.65625 26.203125 -7.53125 25.828125 -7.015625 C 20.78125 -0.703125 13.84375 -0.703125 13.078125 -0.703125 C 8.09375 -0.703125 7.53125 -6.0625 7.53125 -8.09375 C 7.53125 -8.859375 7.59375 -10.84375 8.546875 -14.734375 Z M 8.921875 -16.125 C 11.40625 -25.828125 17.984375 -26.78125 19.640625 -26.78125 C 22.640625 -26.78125 24.359375 -24.9375 24.359375 -22.765625 C 24.359375 -16.125 14.15625 -16.125 11.546875 -16.125 Z M 8.921875 -16.125 "/>
19 | </g>
20 | <g id="glyph-0-5">
21 | <path d="M 18.296875 -43.546875 C 18.296875 -43.609375 18.296875 -44.25 17.46875 -44.25 C 16 -44.25 11.34375 -43.75 9.6875 -43.609375 C 9.1875 -43.546875 8.484375 -43.484375 8.484375 -42.34375 C 8.484375 -41.578125 9.0625 -41.578125 10.015625 -41.578125 C 13.078125 -41.578125 13.203125 -41.125 13.203125 -40.484375 L 13.015625 -39.21875 L 3.765625 -2.484375 C 3.5 -1.59375 3.5 -1.46875 3.5 -1.078125 C 3.5 0.375 4.78125 0.703125 5.359375 0.703125 C 6.375 0.703125 7.390625 -0.0625 7.71875 -0.953125 L 8.921875 -5.796875 L 10.328125 -11.546875 C 10.71875 -12.9375 11.09375 -14.34375 11.40625 -15.8125 C 11.546875 -16.203125 12.046875 -18.296875 12.109375 -18.6875 C 12.3125 -19.25 14.28125 -22.828125 16.453125 -24.546875 C 17.859375 -25.5625 19.828125 -26.78125 22.578125 -26.78125 C 25.3125 -26.78125 26.015625 -24.609375 26.015625 -22.3125 C 26.015625 -18.875 23.59375 -11.921875 22.0625 -8.03125 C 21.546875 -6.5625 21.234375 -5.796875 21.234375 -4.53125 C 21.234375 -1.53125 23.46875 0.703125 26.46875 0.703125 C 32.453125 0.703125 34.8125 -8.609375 34.8125 -9.125 C 34.8125 -9.75 34.234375 -9.75 34.046875 -9.75 C 33.40625 -9.75 33.40625 -9.5625 33.09375 -8.609375 C 32.140625 -5.234375 30.09375 -0.703125 26.59375 -0.703125 C 25.5 -0.703125 25.0625 -1.34375 25.0625 -2.8125 C 25.0625 -4.40625 25.640625 -5.9375 26.203125 -7.328125 C 27.234375 -10.078125 30.09375 -17.65625 30.09375 -21.359375 C 30.09375 -25.5 27.546875 -28.1875 22.765625 -28.1875 C 18.75 -28.1875 15.6875 -26.203125 13.328125 -23.28125 Z M 18.296875 -43.546875 "/>
22 | </g>
23 | <g id="glyph-0-6">
24 | <path d="M 29.90625 -17.40625 C 29.90625 -24.046875 25.4375 -28.1875 19.703125 -28.1875 C 11.15625 -28.1875 2.609375 -19.125 2.609375 -10.078125 C 2.609375 -3.765625 6.890625 0.703125 12.8125 0.703125 C 21.296875 0.703125 29.90625 -8.09375 29.90625 -17.40625 Z M 12.875 -0.703125 C 10.140625 -0.703125 7.328125 -2.671875 7.328125 -7.65625 C 7.328125 -10.78125 8.984375 -17.65625 11.03125 -20.921875 C 14.21875 -25.828125 17.859375 -26.78125 19.640625 -26.78125 C 23.34375 -26.78125 25.25 -23.71875 25.25 -19.890625 C 25.25 -17.40625 23.96875 -10.71875 21.546875 -6.5625 C 19.328125 -2.875 15.8125 -0.703125 12.875 -0.703125 Z M 12.875 -0.703125 "/>
25 | </g>
26 | <g id="glyph-0-7">
27 | <path d="M 32.90625 -43.546875 C 32.90625 -43.609375 32.90625 -44.25 32.078125 -44.25 C 31.125 -44.25 25.0625 -43.671875 23.96875 -43.546875 C 23.46875 -43.484375 23.078125 -43.171875 23.078125 -42.34375 C 23.078125 -41.578125 23.65625 -41.578125 24.609375 -41.578125 C 27.671875 -41.578125 27.796875 -41.125 27.796875 -40.484375 L 27.609375 -39.21875 L 23.78125 -24.109375 C 22.640625 -26.46875 20.78125 -28.1875 17.921875 -28.1875 C 10.453125 -28.1875 2.546875 -18.8125 2.546875 -9.5 C 2.546875 -3.5 6.0625 0.703125 11.03125 0.703125 C 12.3125 0.703125 15.5 0.453125 19.328125 -4.078125 C 19.828125 -1.40625 22.0625 0.703125 25.125 0.703125 C 27.359375 0.703125 28.828125 -0.765625 29.84375 -2.8125 C 30.921875 -5.09375 31.75 -8.984375 31.75 -9.125 C 31.75 -9.75 31.1875 -9.75 30.984375 -9.75 C 30.359375 -9.75 30.28125 -9.5 30.09375 -8.609375 C 29.015625 -4.46875 27.859375 -0.703125 25.25 -0.703125 C 23.53125 -0.703125 23.34375 -2.359375 23.34375 -3.640625 C 23.34375 -5.171875 23.46875 -5.609375 23.71875 -6.703125 Z M 19.640625 -7.59375 C 19.328125 -6.4375 19.328125 -6.3125 18.359375 -5.234375 C 15.5625 -1.71875 12.9375 -0.703125 11.15625 -0.703125 C 7.96875 -0.703125 7.078125 -4.203125 7.078125 -6.703125 C 7.078125 -9.890625 9.125 -17.734375 10.578125 -20.65625 C 12.5625 -24.421875 15.4375 -26.78125 17.984375 -26.78125 C 22.125 -26.78125 23.015625 -21.546875 23.015625 -21.171875 C 23.015625 -20.78125 22.890625 -20.40625 22.828125 -20.09375 Z M 19.640625 -7.59375 "/>
28 | </g>
29 | <g id="glyph-0-8">
30 | <path d="M 5.609375 -3.765625 C 5.421875 -2.8125 5.03125 -1.34375 5.03125 -1.015625 C 5.03125 0.125 5.9375 0.703125 6.890625 0.703125 C 7.65625 0.703125 8.796875 0.1875 9.25 -1.078125 C 9.375 -1.34375 11.546875 -10.015625 11.796875 -11.15625 C 12.3125 -13.265625 13.453125 -17.734375 13.84375 -19.453125 C 14.09375 -20.28125 15.875 -23.28125 17.40625 -24.671875 C 17.921875 -25.125 19.765625 -26.78125 22.515625 -26.78125 C 24.171875 -26.78125 25.125 -26.015625 25.1875 -26.015625 C 23.28125 -25.703125 21.875 -24.171875 21.875 -22.515625 C 21.875 -21.484375 22.578125 -20.28125 24.296875 -20.28125 C 26.015625 -20.28125 27.796875 -21.75 27.796875 -24.046875 C 27.796875 -26.265625 25.765625 -28.1875 22.515625 -28.1875 C 18.359375 -28.1875 15.5625 -25.0625 14.34375 -23.28125 C 13.84375 -26.140625 11.546875 -28.1875 8.546875 -28.1875 C 5.609375 -28.1875 4.40625 -25.703125 3.828125 -24.546875 C 2.671875 -22.375 1.84375 -18.5625 1.84375 -18.359375 C 1.84375 -17.734375 2.484375 -17.734375 2.609375 -17.734375 C 3.25 -17.734375 3.3125 -17.796875 3.703125 -19.1875 C 4.78125 -23.71875 6.0625 -26.78125 8.359375 -26.78125 C 9.4375 -26.78125 10.328125 -26.265625 10.328125 -23.84375 C 10.328125 -22.515625 10.140625 -21.8125 9.3125 -18.484375 Z M 5.609375 -3.765625 "/>
31 | </g>
32 | </g>
33 | </defs>
34 | <g fill="rgb(0%, 0%, 0%)" fill-opacity="1">
35 | <use xlink:href="#glyph-0-0" x="3.0012" y="45.1518"/>
36 | <use xlink:href="#glyph-0-1" x="35.079178" y="45.1518"/>
37 | </g>
38 | <g fill="rgb(0%, 0%, 0%)" fill-opacity="1">
39 | <use xlink:href="#glyph-0-2" x="55.361438" y="45.1518"/>
40 | </g>
41 | <g fill="rgb(0%, 0%, 0%)" fill-opacity="1">
42 | <use xlink:href="#glyph-0-3" x="89.097192" y="45.1518"/>
43 | </g>
44 | <g fill="rgb(0%, 0%, 0%)" fill-opacity="1">
45 | <use xlink:href="#glyph-0-4" x="116.711925" y="45.1518"/>
46 | </g>
47 | <g fill="rgb(0%, 0%, 0%)" fill-opacity="1">
48 | <use xlink:href="#glyph-0-5" x="146.43076" y="45.1518"/>
49 | <use xlink:href="#glyph-0-6" x="183.163264" y="45.1518"/>
50 | </g>
51 | <g fill="rgb(0%, 0%, 0%)" fill-opacity="1">
52 | <use xlink:href="#glyph-0-1" x="214.093551" y="45.1518"/>
53 | </g>
54 | <g fill="rgb(0%, 0%, 0%)" fill-opacity="1">
55 | <use xlink:href="#glyph-0-7" x="234.31205" y="45.1518"/>
56 | <use xlink:href="#glyph-0-4" x="267.473959" y="45.1518"/>
57 | </g>
58 | <g fill="rgb(0%, 0%, 0%)" fill-opacity="1">
59 | <use xlink:href="#glyph-0-8" x="297.192793" y="45.1518"/>
60 | </g>
61 | </svg>
62 | 


--------------------------------------------------------------------------------
/docs/source/_static/references.bib:
--------------------------------------------------------------------------------
 1 | @article{fleischmann_shape-based_2024,
 2 |   title = {A Shape-Based Heuristic for the Detection of Urban Block Artifacts in Street Networks},
 3 |   author = {Fleischmann, Martin and Vybornova, Anastassia},
 4 |   year = {2024},
 5 |   month = jun,
 6 |   journal = {Journal of Spatial Information Science},
 7 |   volume = {28},
 8 |   pages = {75--102},
 9 |   doi = {10.5311/JOSIS.2024.28.319},
10 |   urldate = {2024-06-27},
11 |   abstract = {Street networks are ubiquitous components of cities, guiding their development and enabling movement from place to place; street networks are also the critical components of many urban analytical methods. However, their graph representation is often designed primarily for transportation purposes. This representation is less suitable for other use cases where transportation networks need to be simplified as a mandatory pre-processing step, e.g., in the case of morphological analysis, visual navigation, or drone flight routing. While the urgent demand for automated pre-processing methods comes from various fields, it is still an unsolved challenge. In this article, we tackle this challenge by proposing a cheap computational heuristic for the identification of ``face artifacts'', i.e., geometries that are enclosed by transportation edges but do not represent urban blocks. The heuristic is based on combining the frequency distributions of shape compactness metrics and area measurements of street network face polygons. We test our method on 131 globally sampled large cities and show that it successfully identifies face artifacts in 89{\textbackslash}\% of analyzed cities. Our heuristic of detecting artifacts caused by data being collected for another purpose is the first step towards an automated street network simplification workflow. Moreover, the proposed face artifact index uncovers differences in structural rules guiding the development of cities in different world regions.},
12 |   copyright = {Creative Commons Attribution-NoDerivatives 4.0 International Licence (CC-BY-ND)},
13 |   langid = {english}
14 | }
15 | 


--------------------------------------------------------------------------------
/docs/source/api.rst:
--------------------------------------------------------------------------------
 1 | .. _reference:
 2 | 
 3 | .. currentmodule:: neatnet
 4 | 
 5 | API reference
 6 | =============
 7 | 
 8 | The API reference provides an overview of all public functions in ``neatnet``.
 9 | 
10 | Network Simplification Routines
11 | -------------------------------
12 | 
13 | The top-level function that performs complete adaptive simplification of street networks
14 | is the primary API of ``neatnet``.
15 | 
16 | .. autosummary::
17 |    :toctree: generated/
18 | 
19 |    neatify
20 | 
21 | The minimal topology fixing can be done using another routine:
22 | 
23 | .. autosummary::
24 |    :toctree: generated/
25 | 
26 |    fix_topology
27 | 
28 | 
29 | Node Simplification
30 | -------------------
31 | 
32 | Some of the individual components are also exposed as independent functions (note that
33 | most are consumed by :func:`neatify`).
34 | 
35 | 
36 | A subset of functions dealing with network nodes:
37 | 
38 | .. autosummary::
39 |    :toctree: generated/
40 | 
41 |    consolidate_nodes
42 |    remove_interstitial_nodes
43 |    induce_nodes
44 | 
45 | Face artifact detection
46 | -----------------------
47 | 
48 | A subset dealing with face artifacts:
49 | 
50 | .. autosummary::
51 |    :toctree: generated/
52 | 
53 |    FaceArtifacts
54 |    get_artifacts
55 | 
56 | Gap filling
57 | -----------
58 | 
59 | Snapping and extending lines in case of imprecise topology:
60 | 
61 | .. autosummary::
62 |    :toctree: generated/
63 | 
64 |    close_gaps
65 |    extend_lines
66 | 
67 | Internal components
68 | -------------------
69 | 
70 | For debugging purposes, users may use some parts of the internal API used within :func:`neatify`.
71 | 
72 | .. autosummary::
73 |    :toctree: generated/
74 | 
75 |    get_artifacts
76 |    neatify_loop
77 |    neatify_singletons
78 |    neatify_pairs
79 |    neatify_clusters
80 | 
81 | None of the other functions is intended for public use and their API can change without a warning.


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | # -- Project information -----------------------------------------------------
 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 8 | import os
 9 | import sys
10 | 
11 | import sphinx_autosummary_accessors
12 | 
13 | sys.path.insert(0, os.path.abspath("../neatnet/"))
14 | 
15 | import neatnet  # noqa
16 | 
17 | project = "neatnet"
18 | copyright = "2024-, neatnet Developers"  # noqa: A001
19 | author = "Martin Fleischmann, Anastassia Vybornova, James D. Gaboardi"
20 | 
21 | version = neatnet.__version__
22 | release = neatnet.__version__
23 | 
24 | language = "en"
25 | 
26 | # -- General configuration ---------------------------------------------------
27 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
28 | 
29 | extensions = [
30 |     "numpydoc",
31 |     "myst_nb",
32 |     "sphinx.ext.autodoc",
33 |     "sphinx.ext.autosummary",
34 |     "sphinx.ext.intersphinx",
35 |     "sphinx.ext.viewcode",
36 |     "sphinx.ext.mathjax",
37 |     "sphinxcontrib.bibtex",
38 |     "sphinx_autosummary_accessors",
39 |     "sphinx_copybutton",
40 | ]
41 | 
42 | bibtex_bibfiles = ["_static/references.bib"]
43 | 
44 | master_doc = "index"
45 | 
46 | templates_path = [
47 |     "_templates",
48 |     sphinx_autosummary_accessors.templates_path,
49 | ]
50 | exclude_patterns = []
51 | 
52 | intersphinx_mapping = {
53 |     "esda": (
54 |         "https://pysal.org/esda/",
55 |         "https://pysal.org/esda//objects.inv",
56 |     ),
57 |     "geopandas": ("https://geopandas.org/en/latest", None),
58 |     "libpysal": (
59 |         "https://pysal.org/libpysal/",
60 |         "https://pysal.org/libpysal//objects.inv",
61 |     ),
62 |     "momepy": ("http://docs.momepy.org/en/stable/", None),
63 |     "pandas": ("https://pandas.pydata.org/docs", None),
64 |     "pyproj": ("https://pyproj4.github.io/pyproj/latest/", None),
65 |     "python": ("https://docs.python.org/3", None),
66 |     "shapely": ("https://shapely.readthedocs.io/en/latest/", None),
67 | }
68 | 
69 | # -- Options for HTML output -------------------------------------------------
70 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
71 | 
72 | autosummary_generate = True
73 | numpydoc_show_class_members = False
74 | numpydoc_use_plots = True
75 | class_members_toctree = True
76 | numpydoc_show_inherited_class_members = True
77 | numpydoc_xref_param_type = True
78 | autodoc_default_options = {"members": True, "undoc-members": True}
79 | plot_include_source = True
80 | 
81 | html_theme = "sphinx_book_theme"
82 | html_static_path = ["_static"]
83 | html_css_files = ["custom.css"]
84 | # html_logo = "_static/logo.svg"
85 | # html_favicon = "_static/icon.png"
86 | html_theme_options = {
87 |     "use_sidenotes": True,
88 | }
89 | nb_execution_mode = "off"
90 | autodoc_typehints = "none"
91 | 


--------------------------------------------------------------------------------
/docs/source/index.md:
--------------------------------------------------------------------------------
 1 | ```{include} ../../README.md
 2 | ```
 3 | 
 4 | ```{toctree}
 5 | :hidden:
 6 | :caption: User Guide
 7 | intro
 8 | simple_preprocessing
 9 | ```
10 | 
11 | ```{toctree}
12 | :hidden:
13 | :caption: API
14 | api
15 | 
16 | ```
17 | 
18 | ```{toctree}
19 | :hidden:
20 | :caption: References
21 | references
22 | ```
23 | 
24 | ```{toctree}
25 | :hidden:
26 | :caption: For contributors
27 | GitHub <https://github.com/uscuni/neatnet>
28 | ```
29 | 


--------------------------------------------------------------------------------
/docs/source/references.rst:
--------------------------------------------------------------------------------
1 | .. reference for the docs
2 | 
3 | References
4 | ==========
5 | 
6 | .. bibliography:: _static/references.bib
7 |    :all:
8 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: neatnet
 3 | channels:
 4 |   - conda-forge
 5 | dependencies:
 6 |   - python=3.13
 7 |   - esda>=2.6.0
 8 |   - geopandas>=1.0.1
 9 |   - libpysal>=4.12.1
10 |   - momepy>=0.9.0
11 |   - networkx>=3.3
12 |   - numpy>=2
13 |   - pandas>=2.2.3
14 |   - scipy>=1.14.1
15 |   - shapely>=2.0.6
16 |   - scikit-learn>=1.2.0
17 |   # testing
18 |   - codecov
19 |   - coverage
20 |   - matplotlib
21 |   - mypy>=1.15.0,<2
22 |   - pre-commit
23 |   - pyarrow>=17.0
24 |   - pytest
25 |   - pytest-cov
26 |   - pytest-xdist
27 |   - ruff
28 |   - yamllint
29 |   # docs
30 |   - ipykernel
31 |   - ipywidgets
32 |   - jupyterlab
33 |   - myst-nb
34 |   - numpydoc
35 |   - sphinx
36 |   - sphinxcontrib-bibtex
37 |   - sphinx-autosummary-accessors
38 |   - sphinx-book-theme
39 |   - sphinx-copybutton
40 | 


--------------------------------------------------------------------------------
/neatnet/__init__.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | from importlib.metadata import PackageNotFoundError, version
 3 | 
 4 | from . import simplify
 5 | from .artifacts import FaceArtifacts, get_artifacts
 6 | from .gaps import close_gaps, extend_lines
 7 | from .nodes import (
 8 |     consolidate_nodes,
 9 |     fix_topology,
10 |     induce_nodes,
11 |     remove_interstitial_nodes,
12 |     split,
13 | )
14 | from .simplify import (
15 |     neatify,
16 |     neatify_clusters,
17 |     neatify_loop,
18 |     neatify_pairs,
19 |     neatify_singletons,
20 | )
21 | 
22 | with contextlib.suppress(PackageNotFoundError):
23 |     __version__ = version("neatnet")
24 | 


--------------------------------------------------------------------------------
/neatnet/continuity.py:
--------------------------------------------------------------------------------
  1 | import geopandas
  2 | import momepy
  3 | 
  4 | 
  5 | def continuity(
  6 |     streets: geopandas.GeoDataFrame, angle_threshold: float = 120
  7 | ) -> tuple[geopandas.GeoDataFrame, momepy.COINS]:
  8 |     """Assign COINS-based information to streets.
  9 | 
 10 |     Parameters
 11 |     ----------
 12 |     streets : geopandas.GeoDataFrame
 13 |         Street network.
 14 |     angle_threshold : float = 120
 15 |         See the ``angle_threshold`` keyword argument in ``momepy.COINS()``.
 16 | 
 17 |     Returns
 18 |     -------
 19 |     streets : geopandas.GeoDataFrame
 20 |         The input ``streets`` with additional columns describing COINS information.
 21 |     coins : momepy.COINS
 22 |         **This is not used in production.**
 23 | 
 24 |     Notes
 25 |     -----
 26 |     The returned ``coins`` object is not used in production, but is
 27 |     very helpful in testing & debugging. See gh:neatnet#49.
 28 |     """
 29 |     streets = streets.copy()
 30 | 
 31 |     # Measure continuity of street network
 32 |     coins = momepy.COINS(streets, angle_threshold=angle_threshold, flow_mode=True)
 33 | 
 34 |     # Assing continuity group
 35 |     group, end = coins.stroke_attribute(True)
 36 |     streets["coins_group"] = group
 37 |     streets["coins_end"] = end
 38 | 
 39 |     # Assign length of each continuity group and a number of segments within the group.
 40 |     coins_grouped = streets.length.groupby(streets.coins_group)
 41 |     streets["coins_len"] = coins_grouped.sum()[streets.coins_group].values
 42 |     streets["coins_count"] = coins_grouped.size()[streets.coins_group].values
 43 | 
 44 |     return streets, coins
 45 | 
 46 | 
 47 | def get_stroke_info(
 48 |     artifacts: geopandas.GeoSeries | geopandas.GeoDataFrame,
 49 |     streets: geopandas.GeoSeries | geopandas.GeoDataFrame,
 50 | ) -> tuple[list[int], list[int], list[int], list[int]]:
 51 |     """Generate information about strokes within ``artifacts`` and the
 52 |     resulting lists can be assigned as columns to ``artifacts``. Classifies
 53 |     the strokes within the CES typology.
 54 | 
 55 |         * 'continuing' strokes - continues before and after artifact.
 56 |         * 'ending' strokes - continues only at one end.
 57 |         * 'single' strokes - does not continue.
 58 | 
 59 |     Parameters
 60 |     ----------
 61 |     artifacts : geopandas.GeoSeries | geopandas.GeoDataFrame
 62 |         Polygons representing the artifacts.
 63 |     streets : geopandas.GeoSeries | geopandas.GeoDataFrame
 64 |         LineStrings representing the street network.
 65 | 
 66 |     Returns
 67 |     -------
 68 |     strokes : list[int]
 69 |         All strokes counts.
 70 |     c_ : list[int]
 71 |         Counts for 'continuing' strokes - continues before and after artifact.
 72 |     e_ : list[int]
 73 |         Counts for 'ending' strokes - continues only at one end.
 74 |     s_ : list[int]
 75 |         Counts for 'single' strokes - does not continue.
 76 |     """
 77 |     strokes = []
 78 |     c_ = []
 79 |     e_ = []
 80 |     s_ = []
 81 |     for geom in artifacts.geometry:
 82 |         singles = 0
 83 |         ends = 0
 84 |         edges = streets.iloc[streets.sindex.query(geom, predicate="covers")]
 85 |         ecg = edges.coins_group
 86 |         if ecg.nunique() == 1 and edges.shape[0] == edges.coins_count.iloc[0]:
 87 |             # roundabout special case
 88 |             singles = 1
 89 |             mains = 0
 90 |         else:
 91 |             all_ends = edges[edges.coins_end]
 92 |             ae_cg = all_ends.coins_group
 93 |             mains = edges[~ecg.isin(ae_cg)].coins_group.nunique()
 94 |             visited = []
 95 |             for coins_count, group in zip(all_ends.coins_count, ae_cg, strict=True):
 96 |                 if group not in visited:
 97 |                     if coins_count == (ecg == group).sum():
 98 |                         singles += 1
 99 |                         visited.append(group)
100 |                     else:
101 |                         # do not add to visited -- may be disjoint within the artifact
102 |                         ends += 1
103 |         strokes.append(ecg.nunique())
104 |         c_.append(mains)
105 |         e_.append(ends)
106 |         s_.append(singles)
107 |     return strokes, c_, e_, s_
108 | 


--------------------------------------------------------------------------------
/neatnet/gaps.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import operator
  3 | 
  4 | import geopandas as gpd
  5 | import numpy as np
  6 | import shapely
  7 | 
  8 | __all__ = [
  9 |     "close_gaps",
 10 |     "extend_lines",
 11 | ]
 12 | 
 13 | 
 14 | def close_gaps(
 15 |     gdf: gpd.GeoDataFrame | gpd.GeoSeries, tolerance: float
 16 | ) -> gpd.GeoSeries:
 17 |     """Close gaps in LineString geometry where it should be contiguous.
 18 | 
 19 |     Snaps both lines to a centroid of a gap in between.
 20 | 
 21 |     Parameters
 22 |     ----------
 23 |     gdf : GeoDataFrame | GeoSeries
 24 |         GeoDataFrame  or GeoSeries containing LineString representation of a network.
 25 |     tolerance : float
 26 |         nodes within a tolerance will be snapped together
 27 | 
 28 |     Returns
 29 |     -------
 30 |     GeoSeries
 31 | 
 32 |     See also
 33 |     --------
 34 |     neatnet.extend_lines
 35 |     neatnet.remove_interstitial_nodes
 36 |     """
 37 |     geom = gdf.geometry.array
 38 |     coords = shapely.get_coordinates(geom)
 39 |     indices = shapely.get_num_coordinates(geom)
 40 | 
 41 |     # generate a list of start and end coordinates and create point geometries
 42 |     edges = [0]
 43 |     i = 0
 44 |     for ind in indices:
 45 |         ix = i + ind
 46 |         edges.append(ix - 1)
 47 |         edges.append(ix)
 48 |         i = ix
 49 |     edges = edges[:-1]
 50 |     points = shapely.points(np.unique(coords[edges], axis=0))
 51 | 
 52 |     buffered = shapely.buffer(points, tolerance / 2)
 53 | 
 54 |     dissolved = shapely.union_all(buffered)
 55 | 
 56 |     exploded = [
 57 |         shapely.get_geometry(dissolved, i)
 58 |         for i in range(shapely.get_num_geometries(dissolved))
 59 |     ]
 60 | 
 61 |     centroids = shapely.centroid(exploded)
 62 | 
 63 |     snapped = shapely.snap(geom, shapely.union_all(centroids), tolerance)
 64 | 
 65 |     return gpd.GeoSeries(snapped, crs=gdf.crs)
 66 | 
 67 | 
 68 | def extend_lines(
 69 |     gdf: gpd.GeoDataFrame,
 70 |     tolerance: float,
 71 |     *,
 72 |     target: None | gpd.GeoDataFrame | gpd.GeoSeries = None,
 73 |     barrier: None | gpd.GeoDataFrame | gpd.GeoSeries = None,
 74 |     extension: int | float = 0,
 75 | ) -> gpd.GeoDataFrame:
 76 |     """Extends lines from ``gdf`` to itself or target within a set tolerance.
 77 | 
 78 |     Extends unjoined ends of LineString segments to join with other segments or target.
 79 |     If ``target`` is passed, extend lines to target. Otherwise extend lines to itself.
 80 | 
 81 |     If ``barrier`` is passed, each extended line is checked for intersection with
 82 |     ``barrier``. If they intersect, extended line is not returned. This can be
 83 |     useful if you don't want to extend street network segments through buildings.
 84 | 
 85 |     Parameters
 86 |     ----------
 87 |     gdf : GeoDataFrame
 88 |         GeoDataFrame containing LineString geometry
 89 |     tolerance : float
 90 |         tolerance in snapping (by how much could be each segment
 91 |         extended).
 92 |     target : None | GeoDataFrame | GeoSeries
 93 |         target geometry to which ``gdf`` gets extended. Has to be
 94 |         (Multi)LineString geometry.
 95 |     barrier : None | GeoDataFrame | GeoSeries = None
 96 |         extended line is not used if it intersects barrier
 97 |     extension : int | float = 0
 98 |         by how much to extend line beyond the snapped geometry. Useful
 99 |         when creating enclosures to avoid floating point imprecision.
100 | 
101 |     Returns
102 |     -------
103 |     GeoDataFrame
104 |         GeoDataFrame of with extended geometry
105 | 
106 |     See also
107 |     --------
108 |     neatnet.close_gaps
109 |     neatnet.remove_interstitial_nodes
110 |     """
111 | 
112 |     # explode to avoid MultiLineStrings
113 |     # reset index due to the bug in GeoPandas explode
114 |     df = gdf.reset_index(drop=True).explode(ignore_index=True)
115 | 
116 |     if target is None:
117 |         target = df
118 |         itself = True
119 |     else:
120 |         itself = False
121 | 
122 |     # get underlying shapely geometry
123 |     geom = df.geometry.array
124 | 
125 |     # extract array of coordinates and number per geometry
126 |     coords = shapely.get_coordinates(geom)
127 |     indices = shapely.get_num_coordinates(geom)
128 | 
129 |     # generate a list of start and end coordinates and create point geometries
130 |     edges = [0]
131 |     i = 0
132 |     for ind in indices:
133 |         ix = i + ind
134 |         edges.append(ix - 1)
135 |         edges.append(ix)
136 |         i = ix
137 |     edges = edges[:-1]
138 |     points = shapely.points(np.unique(coords[edges], axis=0))
139 | 
140 |     # query LineString geometry to identify points intersecting 2 geometries
141 |     tree = shapely.STRtree(geom)
142 |     inp, res = tree.query(points, predicate="intersects")
143 |     unique, counts = np.unique(inp, return_counts=True)
144 |     ends = np.unique(res[np.isin(inp, unique[counts == 1])])
145 | 
146 |     new_geoms = []
147 |     # iterate over cul-de-sac-like segments and attempt to snap them to street network
148 |     for line in ends:
149 |         l_coords = shapely.get_coordinates(geom[line])
150 | 
151 |         start = shapely.points(l_coords[0])
152 |         end = shapely.points(l_coords[-1])
153 | 
154 |         first = list(tree.query(start, predicate="intersects"))
155 |         second = list(tree.query(end, predicate="intersects"))
156 |         first.remove(line)
157 |         second.remove(line)
158 | 
159 |         t = target if not itself else target.drop(line)
160 | 
161 |         if first and not second:
162 |             snapped = _extend_line(l_coords, t, tolerance)
163 |             if (
164 |                 barrier is not None
165 |                 and barrier.sindex.query(
166 |                     shapely.linestrings(snapped), predicate="intersects"
167 |                 ).size
168 |                 > 0
169 |             ):
170 |                 new_geoms.append(geom[line])
171 |             else:
172 |                 if extension == 0:
173 |                     new_geoms.append(shapely.linestrings(snapped))
174 |                 else:
175 |                     new_geoms.append(
176 |                         shapely.linestrings(
177 |                             _extend_line(snapped, t, extension, snap=False)
178 |                         )
179 |                     )
180 |         elif not first and second:
181 |             snapped = _extend_line(np.flip(l_coords, axis=0), t, tolerance)
182 |             if (
183 |                 barrier is not None
184 |                 and barrier.sindex.query(
185 |                     shapely.linestrings(snapped), predicate="intersects"
186 |                 ).size
187 |                 > 0
188 |             ):
189 |                 new_geoms.append(geom[line])
190 |             else:
191 |                 if extension == 0:
192 |                     new_geoms.append(shapely.linestrings(snapped))
193 |                 else:
194 |                     new_geoms.append(
195 |                         shapely.linestrings(
196 |                             _extend_line(snapped, t, extension, snap=False)
197 |                         )
198 |                     )
199 |         elif not first and not second:
200 |             one_side = _extend_line(l_coords, t, tolerance)
201 |             one_side_e = _extend_line(one_side, t, extension, snap=False)
202 |             snapped = _extend_line(np.flip(one_side_e, axis=0), t, tolerance)
203 |             if (
204 |                 barrier is not None
205 |                 and barrier.sindex.query(
206 |                     shapely.linestrings(snapped), predicate="intersects"
207 |                 ).size
208 |                 > 0
209 |             ):
210 |                 new_geoms.append(geom[line])
211 |             else:
212 |                 if extension == 0:
213 |                     new_geoms.append(shapely.linestrings(snapped))
214 |                 else:
215 |                     new_geoms.append(
216 |                         shapely.linestrings(
217 |                             _extend_line(snapped, t, extension, snap=False)
218 |                         )
219 |                     )
220 | 
221 |     df.iloc[ends, df.columns.get_loc(df.geometry.name)] = new_geoms
222 |     return df
223 | 
224 | 
225 | def _extend_line(
226 |     coords: np.ndarray,
227 |     target: gpd.GeoDataFrame | gpd.GeoSeries,
228 |     tolerance: float,
229 |     snap: bool = True,
230 | ) -> np.ndarray:
231 |     """Extends a line geometry to snap on the target within a tolerance."""
232 | 
233 |     if snap:
234 |         extrapolation = _get_extrapolated_line(
235 |             coords[-4:] if len(coords.shape) == 1 else coords[-2:].flatten(),
236 |             tolerance,
237 |         )
238 |         int_idx = target.sindex.query(extrapolation, predicate="intersects")
239 |         intersection = shapely.intersection(
240 |             target.iloc[int_idx].geometry.array, extrapolation
241 |         )
242 |         if intersection.size > 0:
243 |             if len(intersection) > 1:
244 |                 distances = {}
245 |                 ix = 0
246 |                 for p in intersection:
247 |                     distance = shapely.distance(p, shapely.points(coords[-1]))
248 |                     distances[ix] = distance
249 |                     ix = ix + 1
250 |                 minimal = min(distances.items(), key=operator.itemgetter(1))[0]
251 |                 new_point_coords = shapely.get_coordinates(intersection[minimal])
252 | 
253 |             else:
254 |                 new_point_coords = shapely.get_coordinates(intersection[0])
255 |             coo = np.append(coords, new_point_coords)
256 |             new = np.reshape(coo, (len(coo) // 2, 2))
257 | 
258 |             return new
259 |         return coords
260 | 
261 |     extrapolation = _get_extrapolated_line(
262 |         coords[-4:] if len(coords.shape) == 1 else coords[-2:].flatten(),
263 |         tolerance,
264 |         point=True,
265 |     )
266 |     return np.vstack([coords, extrapolation])
267 | 
268 | 
269 | def _get_extrapolated_line(
270 |     coords: np.ndarray, tolerance: float, point: bool = False
271 | ) -> tuple[float, float] | shapely.LineString:
272 |     """Creates a shapely line extrapolated in p1->p2 direction."""
273 | 
274 |     p1 = coords[:2]
275 |     p2 = coords[2:]
276 |     a = p2
277 | 
278 |     # defining new point based on the vector between existing points
279 |     if p1[0] >= p2[0] and p1[1] >= p2[1]:
280 |         b = (
281 |             p2[0]
282 |             - tolerance
283 |             * math.cos(
284 |                 math.atan(
285 |                     math.fabs(p1[1] - p2[1] + 0.000001)
286 |                     / math.fabs(p1[0] - p2[0] + 0.000001)
287 |                 )
288 |             ),
289 |             p2[1]
290 |             - tolerance
291 |             * math.sin(
292 |                 math.atan(
293 |                     math.fabs(p1[1] - p2[1] + 0.000001)
294 |                     / math.fabs(p1[0] - p2[0] + 0.000001)
295 |                 )
296 |             ),
297 |         )
298 |     elif p1[0] <= p2[0] and p1[1] >= p2[1]:
299 |         b = (
300 |             p2[0]
301 |             + tolerance
302 |             * math.cos(
303 |                 math.atan(
304 |                     math.fabs(p1[1] - p2[1] + 0.000001)
305 |                     / math.fabs(p1[0] - p2[0] + 0.000001)
306 |                 )
307 |             ),
308 |             p2[1]
309 |             - tolerance
310 |             * math.sin(
311 |                 math.atan(
312 |                     math.fabs(p1[1] - p2[1] + 0.000001)
313 |                     / math.fabs(p1[0] - p2[0] + 0.000001)
314 |                 )
315 |             ),
316 |         )
317 |     elif p1[0] <= p2[0] and p1[1] <= p2[1]:
318 |         b = (
319 |             p2[0]
320 |             + tolerance
321 |             * math.cos(
322 |                 math.atan(
323 |                     math.fabs(p1[1] - p2[1] + 0.000001)
324 |                     / math.fabs(p1[0] - p2[0] + 0.000001)
325 |                 )
326 |             ),
327 |             p2[1]
328 |             + tolerance
329 |             * math.sin(
330 |                 math.atan(
331 |                     math.fabs(p1[1] - p2[1] + 0.000001)
332 |                     / math.fabs(p1[0] - p2[0] + 0.000001)
333 |                 )
334 |             ),
335 |         )
336 |     else:
337 |         b = (
338 |             p2[0]
339 |             - tolerance
340 |             * math.cos(
341 |                 math.atan(
342 |                     math.fabs(p1[1] - p2[1] + 0.000001)
343 |                     / math.fabs(p1[0] - p2[0] + 0.000001)
344 |                 )
345 |             ),
346 |             p2[1]
347 |             + tolerance
348 |             * math.sin(
349 |                 math.atan(
350 |                     math.fabs(p1[1] - p2[1] + 0.000001)
351 |                     / math.fabs(p1[0] - p2[0] + 0.000001)
352 |                 )
353 |             ),
354 |         )
355 |     if point:
356 |         return b
357 |     return shapely.linestrings([a, b])
358 | 


--------------------------------------------------------------------------------
/neatnet/geometry.py:
--------------------------------------------------------------------------------
  1 | """Geometry-related functions"""
  2 | 
  3 | import collections
  4 | import math
  5 | import warnings
  6 | 
  7 | import geopandas as gpd
  8 | import numpy as np
  9 | import pandas as pd
 10 | import shapely
 11 | from libpysal import graph
 12 | from scipy import spatial
 13 | 
 14 | from .nodes import consolidate_nodes
 15 | 
 16 | 
 17 | def _is_within(
 18 |     line: np.ndarray, poly: shapely.Polygon, rtol: float = 1e-4
 19 | ) -> np.ndarray:
 20 |     """Check if the line is within a polygon with a set relative tolerance.
 21 | 
 22 |     Parameters
 23 |     ----------
 24 |     line : np.ndarray[shapely.LineString]
 25 |         Input line to check relationship.
 26 |     poly : shapely.Polygon
 27 |         Input polygon to check relationship.
 28 |     rtol : float = -1e4
 29 |         The set relative tolerance.
 30 | 
 31 |     Returns
 32 |     -------
 33 |     np.ndarray
 34 |         ``True`` if ``line`` is either entirely within ``poly`` or if
 35 |         ``line`` is within `poly`` based on a relaxed ``rtol`` relative tolerance.
 36 |     """
 37 | 
 38 |     within = shapely.within(line, poly)
 39 |     if within.all():
 40 |         return within
 41 | 
 42 |     intersection = shapely.intersection(line, poly)
 43 |     return np.abs(shapely.length(intersection) - shapely.length(line)) <= rtol
 44 | 
 45 | 
 46 | def angle_between_two_lines(
 47 |     line1: shapely.LineString, line2: shapely.LineString
 48 | ) -> float:
 49 |     """Return the angle between 2 two lines (assuming they share a vertex).
 50 |     Based on ``momepy.coins`` but adapted to shapely lines.
 51 |     """
 52 | 
 53 |     return_bad = 0.0
 54 | 
 55 |     lines_distinct = line1 != line2
 56 |     if not lines_distinct:
 57 |         warnings.warn(
 58 |             f"Input lines are identical - must be distinct. Returning {return_bad}.",
 59 |             UserWarning,
 60 |             stacklevel=2,
 61 |         )
 62 |         return return_bad
 63 | 
 64 |     # extract points
 65 |     a, b, c, d = shapely.get_coordinates([line1, line2]).tolist()
 66 |     a, b, c, d = tuple(a), tuple(b), tuple(c), tuple(d)
 67 | 
 68 |     # assertion: we expect exactly 2 of the 4 points to be identical
 69 |     # (lines touch in this point)
 70 |     points = collections.Counter([a, b, c, d])
 71 | 
 72 |     lines_share_vertex = max(points.values()) > 1
 73 |     if not lines_share_vertex:
 74 |         warnings.warn(
 75 |             f"Input lines do not share a vertex. Returning {return_bad}.",
 76 |             UserWarning,
 77 |             stacklevel=2,
 78 |         )
 79 |         return return_bad
 80 | 
 81 |     # points where line touch = "origin" (for vector-based angle calculation)
 82 |     origin = [k for k, v in points.items() if v == 2][0]
 83 |     # other 2 unique points (one on each line)
 84 |     point1, point2 = (k for k, v in points.items() if v == 1)
 85 | 
 86 |     # translate lines into vectors (numpy arrays)
 87 |     v1 = [point1[0] - origin[0], point1[1] - origin[1]]
 88 |     v2 = [point2[0] - origin[0], point2[1] - origin[1]]
 89 | 
 90 |     # compute angle between 2 vectors in degrees
 91 |     dot_product = v1[0] * v2[0] + v1[1] * v2[1]
 92 |     norm_v1 = math.sqrt(v1[0] ** 2 + v1[1] ** 2)
 93 |     norm_v2 = math.sqrt(v2[0] ** 2 + v2[1] ** 2)
 94 |     cos_theta = round(dot_product / (norm_v1 * norm_v2), 6)  # precision issues fix
 95 |     angle = math.degrees(math.acos(cos_theta))
 96 | 
 97 |     return angle
 98 | 
 99 | 
100 | def voronoi_skeleton(
101 |     lines: list | np.ndarray | gpd.GeoSeries,
102 |     poly: None | shapely.Polygon = None,
103 |     snap_to: None | gpd.GeoSeries = None,
104 |     max_segment_length: float | int = 1,
105 |     buffer: None | float | int = None,
106 |     secondary_snap_to: None | gpd.GeoSeries = None,
107 |     clip_limit: None | float | int = 2,
108 |     consolidation_tolerance: None | float | int = None,
109 | ) -> tuple[np.ndarray, np.ndarray]:
110 |     """
111 |     Returns average geometry.
112 | 
113 |     Parameters
114 |     ----------
115 |     lines : list | numpy.ndarray | geopandas.GeoSeries
116 |         LineStrings connected at endpoints. If ``poly`` is passed in, ``lines``
117 |         must be a ``geopandas.GeoSeries``.
118 |     poly : None | shapely.Polygon = None
119 |         Polygon enclosed by ``lines``.
120 |     snap_to : None | gpd.GeoSeries = None
121 |         Series of geometries that shall be connected to the skeleton.
122 |     max_segment_length: float | int = 1
123 |         Additional vertices will be added so that all line segments
124 |         are no longer than this value. Must be greater than 0.
125 |     buffer : None | float | int = None
126 |         Optional custom buffer distance for dealing with Voronoi infinity issues.
127 |     secondary_snap_to : None | gpd.GeoSeries = None
128 |         Fall-back series of geometries that shall be connected to the skeleton.
129 |     clip_limit : None | float | int = 2
130 |         Following generation of the Voronoi linework, we clip to fit inside the polygon.
131 |         To ensure we get a space to make proper topological connections from the
132 |         linework to the actual points on the edge of the polygon, we clip using a
133 |         polygon with a negative buffer of ``clip_limit`` or the radius of
134 |         maximum inscribed circle, whichever is smaller.
135 |     consolidation_tolerance : None | float | int = None
136 |         Tolerance passed to node consolidation within the resulting skeleton.
137 |         If ``None``, no consolidation happens.
138 | 
139 |     Returns
140 |     -------
141 |     edgelines : numpy.ndarray
142 |         Array of averaged geometries.
143 |     splitters : numpy.ndarray
144 |         Split points.
145 |     """
146 |     if buffer is None:
147 |         buffer = max_segment_length * 20
148 |     if not poly:
149 |         if not isinstance(lines, gpd.GeoSeries):
150 |             lines = gpd.GeoSeries(lines)
151 |         poly = shapely.box(*lines.total_bounds)
152 |     # get an additional line around the lines to avoid infinity issues with Voronoi
153 |     extended_lines = list(lines) + [poly.buffer(buffer).boundary]
154 | 
155 |     # interpolate lines to represent them as points for Voronoi
156 |     shapely_lines = extended_lines
157 |     points, ids = shapely.get_coordinates(
158 |         shapely.segmentize(shapely_lines, max_segment_length), return_index=True
159 |     )
160 | 
161 |     # remove duplicated coordinates
162 |     unq, count = np.unique(points, axis=0, return_counts=True)
163 |     mask = np.isin(points, unq[count > 1]).all(axis=1)
164 |     points = points[~mask]
165 |     ids = ids[~mask]
166 | 
167 |     # generate Voronoi diagram
168 |     voronoi_diagram = spatial.Voronoi(points)
169 | 
170 |     # get all rigdes and filter only those between the two lines
171 |     pts = voronoi_diagram.ridge_points
172 |     mapped = np.take(ids, pts)
173 |     rigde_vertices = np.array(voronoi_diagram.ridge_vertices)
174 | 
175 |     # iterate over segment-pairs and keep rigdes between input geometries
176 |     _edgelines = []
177 |     to_add = []
178 |     splitters = []
179 | 
180 |     # determine the negative buffer distance to avoid overclipping of narrow polygons
181 |     # this can still result in some missing links, but only in rare cases
182 |     dist = min([clip_limit, shapely.ops.polylabel(poly).distance(poly.boundary) * 0.4])
183 |     limit = poly.buffer(-dist)
184 | 
185 |     # drop ridges that are between points coming from the same line
186 |     selfs = mapped[:, 0] == mapped[:, 1]
187 |     buff = (mapped == mapped.max()).any(axis=1)
188 |     mapped = mapped[~(selfs | buff)]
189 |     rigde_vertices = rigde_vertices[~(selfs | buff)]
190 |     unique = np.unique(np.sort(mapped, axis=1), axis=0)
191 | 
192 |     for a, b in unique:
193 |         mask = ((mapped[:, 0] == a) | (mapped[:, 0] == b)) & (
194 |             (mapped[:, 1] == a) | (mapped[:, 1] == b)
195 |         )
196 | 
197 |         verts = rigde_vertices[mask]
198 | 
199 |         # generate the line in between the lines
200 |         edgeline = shapely.line_merge(
201 |             shapely.multilinestrings(voronoi_diagram.vertices[verts])
202 |         )
203 | 
204 |         # check if the edgeline is within polygon
205 |         if not edgeline.within(limit):
206 |             if not isinstance(edgeline, shapely.MultiLineString):
207 |                 # if not, clip it by the polygon with a small negative buffer to keep
208 |                 # the gap between edgeline and poly boundary to avoid possible
209 |                 # overlapping lines
210 |                 edgeline = shapely.intersection(edgeline, limit)
211 | 
212 |                 # in edge cases, this can result in a MultiLineString with one sliver
213 |                 # part
214 |                 edgeline = _remove_sliver(edgeline)
215 |             # if the edgeline is a MultiLineString, treat each part independently
216 |             else:
217 |                 parts = []
218 |                 for part in edgeline.geoms:
219 |                     part = shapely.intersection(part, limit)
220 |                     part = _remove_sliver(part)
221 |                     if not part.is_empty:
222 |                         parts.append(part)
223 |                 edgeline = shapely.MultiLineString(parts)
224 | 
225 |         # check if a, b lines share a node
226 |         intersection = shapely_lines[b].intersection(shapely_lines[a])
227 |         # if they do, add shortest line from the edgeline to the shared node and
228 |         # combine it with the edgeline. Also, avoid an inner loop in more complex input
229 |         # that would create connection across
230 |         if not intersection.is_empty and not (
231 |             intersection.geom_type == "MultiPoint"
232 |             and (len(intersection.geoms) == 2 and len(lines) != 2)
233 |         ):
234 |             # we need union of edgeline and shortest because snap is buggy in GEOS
235 |             # and line_merge as well. This results in a MultiLineString but we can
236 |             # deal with those later. For now, we just need this extended edgeline to
237 |             # be a single geometry to ensure the component discovery below works as
238 |             # intended
239 |             # get_parts is needed as in case of voronoi based on two lines, these
240 |             # intersect on both ends, hence both need to be extended
241 |             edgeline = shapely.union(
242 |                 edgeline,
243 |                 shapely.union_all(
244 |                     shapely.shortest_line(
245 |                         shapely.get_parts(intersection), edgeline.boundary
246 |                     )
247 |                 ),
248 |             )
249 |         # add final edgeline to the list
250 |         _edgelines.append(edgeline)
251 | 
252 |     edgelines = np.array(_edgelines)[~(shapely.is_empty(_edgelines))]
253 | 
254 |     if edgelines.shape[0] > 0:
255 |         # if there is no explicit snapping target, snap to the boundary of the polygon
256 |         # via the shortest line. That is by definition always within the polygon
257 |         # (Martin thinks) (James concurs)
258 |         if snap_to is not False:
259 |             if snap_to is None:
260 |                 sl = shapely.shortest_line(
261 |                     shapely.union_all(edgelines).boundary, poly.boundary
262 |                 )
263 |                 to_add.append(sl)
264 |                 splitters.append(shapely.get_point(sl, -1))
265 | 
266 |             # if we have some snapping targets, we need to figure out
267 |             # what shall be snapped to what
268 |             else:
269 |                 additions, splits = snap_to_targets(
270 |                     edgelines, poly, snap_to, secondary_snap_to
271 |                 )
272 |                 to_add.extend(additions)
273 |                 splitters.extend(splits)
274 | 
275 |             # concatenate edgelines and their additions snapping to edge
276 |             edgelines = np.concatenate([edgelines, to_add])
277 |         # simplify to avoid unnecessary point density and some wobbliness
278 |         edgelines = shapely.simplify(edgelines, max_segment_length)
279 |     # drop empty
280 |     edgelines = edgelines[edgelines != None]  # noqa: E711
281 | 
282 |     edgelines = shapely.line_merge(edgelines[shapely.length(edgelines) > 0])
283 |     edgelines = _as_parts(edgelines)
284 |     edgelines = _consolidate(edgelines, consolidation_tolerance)
285 | 
286 |     return edgelines, np.array(splitters)
287 | 
288 | 
289 | def _remove_sliver(
290 |     edgeline: shapely.LineString | shapely.MultiLineString,
291 | ) -> shapely.LineString:
292 |     """Remove sliver(s) if present."""
293 |     if edgeline.geom_type == "MultiLineString":
294 |         parts = shapely.get_parts(edgeline)
295 |         edgeline = parts[np.argmax(shapely.length(parts))]
296 |     return edgeline
297 | 
298 | 
299 | def _as_parts(edgelines: np.ndarray) -> np.ndarray:
300 |     """Return constituent LineStrings if MultiLineString present."""
301 |     geom_types = np.unique(shapely.get_type_id(edgelines))
302 |     if geom_types.shape[0] > 1 or (geom_types == 5).all():
303 |         edgelines = shapely.get_parts(edgelines)
304 |     return edgelines
305 | 
306 | 
307 | def _consolidate(
308 |     edgelines: np.ndarray, consolidation_tolerance: None | float | int
309 | ) -> np.ndarray:
310 |     """Return ``edgelines`` from consolidated nodes, if criteria met."""
311 |     if consolidation_tolerance and edgelines.shape[0] > 0:
312 |         edgelines = consolidate_nodes(
313 |             edgelines, tolerance=consolidation_tolerance, preserve_ends=True
314 |         ).geometry.to_numpy()
315 |     return edgelines
316 | 
317 | 
318 | def snap_to_targets(
319 |     edgelines: np.ndarray,
320 |     poly: shapely.Polygon,
321 |     snap_to: gpd.GeoSeries,
322 |     secondary_snap_to: None | gpd.GeoSeries = None,
323 | ) -> tuple[list[shapely.LineString], list[shapely.Point]]:
324 |     """Snap edgelines to vertices.
325 | 
326 |     Parameters
327 |     ----------
328 |     edgelines : numpy.ndarray
329 |         Voronoi skeleton edges.
330 |     poly : None | shapely.Polygon = None
331 |         Polygon enclosed by ``lines``.
332 |     snap_to : None | gpd.GeoSeries = None
333 |         Series of geometries that shall be connected to the skeleton.
334 |     secondary_snap_to : None | gpd.GeoSeries = None
335 |         Fall-back series of geometries that shall be connected to the skeleton.
336 | 
337 |     Returns
338 |     -------
339 |     to_add, to_split : tuple[list[shapely.LineString], list[shapely.Point]]
340 |         Lines to add and points where to split.
341 |     """
342 | 
343 |     to_add: list = []
344 |     to_split: list = []
345 | 
346 |     # generate graph from lines
347 |     comp_labels, comp_counts, components = _prep_components(edgelines)
348 | 
349 |     primary_union = shapely.union_all(snap_to)
350 |     secondary_union = shapely.union_all(secondary_snap_to)
351 | 
352 |     # if there are muliple components, loop over all and treat each
353 |     if len(components) > 1:
354 |         for comp_label, comp in components.geometry.items():
355 |             cbound = comp.boundary
356 | 
357 |             # if component does not interest the boundary, it needs to be snapped
358 |             # if it does but has only one part, this part interesect only on one
359 |             # side (the node remaining from the removed edge) and needs to be
360 |             # snapped on the other side as well
361 |             if (
362 |                 (not comp.intersects(poly.boundary))
363 |                 or comp_counts[comp_label] == 1
364 |                 or (
365 |                     not comp.intersects(primary_union)
366 |                 )  # ! this fixes one thing but may break others
367 |             ):
368 |                 # add segment composed of the shortest line to the nearest snapping
369 |                 # target. We use boundary to snap to endpoints of edgelines only
370 |                 sl = shapely.shortest_line(cbound, primary_union)
371 |                 if _is_within(sl, poly):
372 |                     to_split, to_add = _split_add(sl, to_split, to_add)
373 |                 else:
374 |                     if secondary_snap_to is not None:
375 |                         sl = shapely.shortest_line(cbound, secondary_union)
376 |                         to_split, to_add = _split_add(sl, to_split, to_add)
377 |     else:
378 |         # if there is a single component, ensure it gets a shortest line to an
379 |         # endpoint from each snapping target
380 |         for target in snap_to:
381 |             sl = shapely.shortest_line(components.boundary.item(), target)
382 |             if _is_within(sl, poly):
383 |                 to_split, to_add = _split_add(sl, to_split, to_add)
384 |             else:
385 |                 warnings.warn(
386 |                     "Could not create a connection as it would lead outside "
387 |                     "of the artifact.",
388 |                     UserWarning,
389 |                     stacklevel=2,
390 |                 )
391 |     return to_add, to_split
392 | 
393 | 
394 | def _prep_components(
395 |     lines: np.ndarray | gpd.GeoSeries,
396 | ) -> tuple[pd.Series, pd.Series, gpd.GeoSeries]:
397 |     """Helper for preparing graph components & labels in PySAL."""
398 | 
399 |     # cast edgelines to gdf
400 |     lines = gpd.GeoDataFrame(geometry=lines)
401 | 
402 |     # build queen contiguity on edgelines and extract component labels
403 |     not_empty = ~lines.is_empty
404 |     not_nan = ~lines.geometry.isna()
405 |     lines = lines[not_empty | not_nan]
406 |     comp_labels = graph.Graph.build_contiguity(lines, rook=False).component_labels
407 | 
408 |     # compute size of each component
409 |     comp_counts = comp_labels.value_counts()
410 | 
411 |     # get MultiLineString geometry per connected component
412 |     components = lines.dissolve(comp_labels)
413 | 
414 |     return comp_labels, comp_counts, components
415 | 
416 | 
417 | def _split_add(line: shapely.LineString, splits: list, adds: list) -> tuple[list, list]:
418 |     """Helper for preparing splitter points & added lines."""
419 |     splits.append(shapely.get_point(line, -1))
420 |     adds.append(line)
421 |     return splits, adds
422 | 


--------------------------------------------------------------------------------
/neatnet/nodes.py:
--------------------------------------------------------------------------------
  1 | import collections.abc
  2 | import typing
  3 | 
  4 | import geopandas as gpd
  5 | import networkx as nx
  6 | import numpy as np
  7 | import pandas as pd
  8 | import pyproj
  9 | import shapely
 10 | from scipy import sparse
 11 | from sklearn.cluster import DBSCAN
 12 | 
 13 | 
 14 | def _fill_attrs(gdf: gpd.GeoDataFrame, source_row: pd.Series) -> gpd.GeoDataFrame:
 15 |     """Thoughtful attribute assignment to lines split into segments by new nodes –
 16 |     taking list-like values into consideration. See gh#213. Regarding iterables,
 17 |     currently only supports list values – others can be added based on input type
 18 |     in the future on an ad hoc basis as problems arise. Called from within ``split()``.
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     gdf : geopandas.GeoDataFrame
 23 |         The new frame of split linestrings.
 24 |     source_row: pandas.Series
 25 |         The original source row.
 26 | 
 27 |     Returns
 28 |     -------
 29 |     geopandas.GeoDataFrame
 30 |         The input ``gdf`` with updated columns based on values in ``source_row``.
 31 |     """
 32 | 
 33 |     def _populate_column(attr):
 34 |         """Return the attribute if scalar, create vector of input if not."""
 35 |         if isinstance(attr, collections.abc.Sequence) and not isinstance(attr, str):
 36 |             attr = [attr] * gdf.shape[0]
 37 |         return attr
 38 | 
 39 |     for col in source_row.index.drop(["geometry", "_status"], errors="ignore"):
 40 |         gdf[col] = _populate_column(source_row[col])
 41 | 
 42 |     return gdf
 43 | 
 44 | 
 45 | def split(
 46 |     split_points: list | np.ndarray | gpd.GeoSeries,
 47 |     cleaned_streets: gpd.GeoDataFrame,
 48 |     crs: str | pyproj.CRS,
 49 |     *,
 50 |     eps: float = 1e-4,
 51 | ) -> gpd.GeoSeries | gpd.GeoDataFrame:
 52 |     """Split lines on new nodes.
 53 | 
 54 |     Parameters
 55 |     ----------
 56 |     split_points : list | numpy.ndarray
 57 |         Points to split the ``cleaned_roads``.
 58 |     cleaned_streets : geopandas.GeoDataFrame
 59 |         Line geometries to be split with ``split_points``.
 60 |     crs : str | pyproj.CRS
 61 |         Anything accepted by ``pyproj.CRS``.
 62 |     eps : float = 1e-4
 63 |         Tolerance epsilon for point snapping.
 64 | 
 65 |     Returns
 66 |     -------
 67 |     geopandas.GeoSeries | geopandas.GeoDataFrame
 68 |         Resultant split line geometries.
 69 |     """
 70 |     split_points = gpd.GeoSeries(split_points, crs=crs)
 71 |     for split in split_points.drop_duplicates():
 72 |         _, ix = cleaned_streets.sindex.nearest(split, max_distance=eps)
 73 |         row = cleaned_streets.iloc[ix]
 74 |         edge = row.geometry
 75 |         if edge.shape[0] == 1:
 76 |             row = row.iloc[0]
 77 |             lines_split = _snap_n_split(edge.item(), split, eps)
 78 |             if lines_split.shape[0] > 1:
 79 |                 gdf_split = gpd.GeoDataFrame(geometry=lines_split, crs=crs)
 80 |                 gdf_split = _fill_attrs(gdf_split, row)
 81 |                 gdf_split["_status"] = "changed"
 82 |                 cleaned_streets = pd.concat(
 83 |                     [cleaned_streets.drop(edge.index[0]), gdf_split],
 84 |                     ignore_index=True,
 85 |                 )
 86 |         elif edge.shape[0] > 1:
 87 |             to_be_dropped = []
 88 |             to_be_added = []
 89 |             for i, e in edge.items():
 90 |                 lines_split = _snap_n_split(e, split, eps)
 91 |                 if lines_split.shape[0] > 1:
 92 |                     to_be_dropped.append(i)
 93 |                     to_be_added.append(lines_split)
 94 | 
 95 |             if to_be_added:
 96 |                 gdf_split = pd.DataFrame(
 97 |                     {"geometry": to_be_added, "_orig": to_be_dropped}
 98 |                 ).explode("geometry")
 99 |                 gdf_split = pd.concat(
100 |                     [
101 |                         gdf_split.drop(columns="_orig").reset_index(drop=True),
102 |                         row.drop(columns="geometry")
103 |                         .loc[gdf_split["_orig"]]
104 |                         .reset_index(drop=True),
105 |                     ],
106 |                     axis=1,
107 |                 )
108 |                 gdf_split["_status"] = "changed"
109 |                 cleaned_streets = pd.concat(
110 |                     [cleaned_streets.drop(to_be_dropped), gdf_split],
111 |                     ignore_index=True,
112 |                 )
113 |                 cleaned_streets = gpd.GeoDataFrame(
114 |                     cleaned_streets, geometry="geometry", crs=crs
115 |                 )
116 | 
117 |     return cleaned_streets.reset_index(drop=True)
118 | 
119 | 
120 | def _snap_n_split(e: shapely.LineString, s: shapely.Point, tol: float) -> np.ndarray:
121 |     """Snap point to edge and return lines to split."""
122 |     snapped = shapely.snap(e, s, tolerance=tol)
123 |     _lines_split = shapely.get_parts(shapely.ops.split(snapped, s))
124 |     return _lines_split[~shapely.is_empty(_lines_split)]
125 | 
126 | 
127 | def _status(x: pd.Series) -> str:
128 |     """Determine the status of edge line(s)."""
129 |     if len(x) == 1:
130 |         return x.iloc[0]
131 |     return "changed"
132 | 
133 | 
134 | def get_components(
135 |     edgelines: list | np.ndarray | gpd.GeoSeries,
136 |     *,
137 |     ignore: None | gpd.GeoSeries = None,
138 | ) -> np.ndarray:
139 |     """Associate edges with connected component labels and return.
140 | 
141 |     Parameters
142 |     ----------
143 |     edgelines : list | np.ndarray | gpd.GeoSeries
144 |         Collection of line objects.
145 |     ignore : None | gpd.GeoSeries = None
146 |         Nodes to ignore when labeling components.
147 | 
148 |     Returns
149 |     -------
150 |     np.ndarray
151 |         Edge connected component labels.
152 | 
153 |     Notes
154 |     -----
155 |     See [https://github.com/uscuni/neatnet/issues/56] for detailed explanation of
156 |     output.
157 |     """
158 |     edgelines = np.array(edgelines)
159 |     start_points = shapely.get_point(edgelines, 0)
160 |     end_points = shapely.get_point(edgelines, -1)
161 |     points = shapely.points(
162 |         np.unique(
163 |             shapely.get_coordinates(np.concatenate([start_points, end_points])), axis=0
164 |         )
165 |     )
166 |     if ignore is not None:
167 |         mask = np.isin(points, ignore)
168 |         points = points[~mask]
169 |     # query LineString geometry to identify points intersecting 2 geometries
170 |     inp, res = shapely.STRtree(shapely.boundary(edgelines)).query(
171 |         points, predicate="intersects"
172 |     )
173 |     unique, counts = np.unique(inp, return_counts=True)
174 |     mask = np.isin(inp, unique[counts == 2])
175 |     merge_res = res[mask]
176 |     merge_inp = inp[mask]
177 |     closed = np.arange(len(edgelines))[shapely.is_closed(edgelines)]
178 |     mask = np.isin(merge_res, closed) | np.isin(merge_inp, closed)
179 |     merge_res = merge_res[~mask]
180 |     merge_inp = merge_inp[~mask]
181 |     g = nx.Graph(list(zip((merge_inp * -1) - 1, merge_res, strict=True)))
182 |     components = {
183 |         i: {v for v in k if v > -1} for i, k in enumerate(nx.connected_components(g))
184 |     }
185 |     component_labels = {value: key for key in components for value in components[key]}
186 |     labels = pd.Series(component_labels, index=range(len(edgelines)))
187 | 
188 |     max_label = len(edgelines) - 1 if pd.isna(labels.max()) else int(labels.max())
189 |     filling = pd.Series(range(max_label + 1, max_label + len(edgelines) + 1))
190 |     labels = labels.fillna(filling)
191 | 
192 |     return labels.values
193 | 
194 | 
195 | def weld_edges(
196 |     edgelines: list | np.ndarray | gpd.GeoSeries,
197 |     *,
198 |     ignore: None | gpd.GeoSeries = None,
199 | ) -> list | np.ndarray | gpd.GeoSeries:
200 |     """Combine lines sharing an endpoint (if only 2 lines share that point).
201 |     Lightweight version of ``remove_interstitial_nodes()``.
202 | 
203 |     Parameters
204 |     ----------
205 |     edgelines : list | np.ndarray | gpd.GeoSeries
206 |         Collection of line objects.
207 |     ignore : None | gpd.GeoSeries = None
208 |         Nodes to ignore when welding components.
209 | 
210 |     Returns
211 |     -------
212 |     list | np.ndarray | gpd.GeoSeries
213 |         Resultant welded ``edgelines`` if more than 1 passed in, otherwise
214 |         the original ``edgelines`` object.
215 |     """
216 |     if len(edgelines) < 2:
217 |         return edgelines
218 |     labels = get_components(edgelines, ignore=ignore)
219 |     return (
220 |         gpd.GeoSeries(edgelines)
221 |         .groupby(labels)
222 |         .agg(lambda x: shapely.line_merge(shapely.GeometryCollection(x.values)))
223 |     ).tolist()
224 | 
225 | 
226 | def induce_nodes(streets: gpd.GeoDataFrame, *, eps: float = 1e-4) -> gpd.GeoDataFrame:
227 |     """Adding potentially missing nodes on intersections of individual LineString
228 |     endpoints with the remaining network. The idea behind is that if a line ends
229 |     on an intersection with another, there should be a node on both of them.
230 | 
231 |     Parameters
232 |     ----------
233 |     streets : geopandas.GeoDataFrame
234 |         Input LineString geometries.
235 |     eps : float = 1e-4
236 |         Tolerance epsilon for point snapping passed into ``nodes.split()``.
237 | 
238 |     Returns
239 |     -------
240 |     geopandas.GeoDataFrame
241 |         Updated ``streets`` with (potentially) added nodes.
242 |     """
243 | 
244 |     sindex_kws = {"predicate": "dwithin", "distance": 1e-4}
245 | 
246 |     # identify degree mismatch cases
247 |     nodes_degree_mismatch = _identify_degree_mismatch(streets, sindex_kws)
248 | 
249 |     # ensure loop topology cases:
250 |     #   - loop nodes intersecting non-loops
251 |     #   - loop nodes intersecting other loops
252 |     nodes_off_loops, nodes_on_loops = _makes_loop_contact(streets, sindex_kws)
253 | 
254 |     # all nodes to induce
255 |     nodes_to_induce = pd.concat(
256 |         [nodes_degree_mismatch, nodes_off_loops, nodes_on_loops]
257 |     )
258 | 
259 |     return split(nodes_to_induce.geometry, streets, streets.crs, eps=eps)
260 | 
261 | 
262 | def _identify_degree_mismatch(
263 |     edges: gpd.GeoDataFrame, sindex_kws: dict
264 | ) -> gpd.GeoSeries:
265 |     """Helper to identify difference of observed vs. expected node degree."""
266 |     nodes = _nodes_degrees_from_edges(edges.geometry)
267 |     nodes = nodes.set_crs(edges.crs)
268 |     nix, eix = edges.sindex.query(nodes.geometry, **sindex_kws)
269 |     coo_vals = ([True] * len(nix), (nix, eix))
270 |     coo_shape = (len(nodes), len(edges))
271 |     intersects = sparse.coo_array(coo_vals, shape=coo_shape, dtype=np.bool_)
272 |     nodes["expected_degree"] = intersects.sum(axis=1)
273 |     return nodes[nodes["degree"] != nodes["expected_degree"]].geometry
274 | 
275 | 
276 | def _nodes_from_edges(
277 |     edgelines: list | np.ndarray | gpd.GeoSeries,
278 |     return_degrees=False,
279 | ) -> np.ndarray | tuple[np.ndarray, np.ndarray]:
280 |     """Helper to get network nodes from edges' geometries."""
281 |     edgelines = np.array(edgelines)
282 |     start_points = shapely.get_point(edgelines, 0)
283 |     end_points = shapely.get_point(edgelines, -1)
284 |     node_coords = np.unique(
285 |         shapely.get_coordinates(np.concatenate([start_points, end_points])),
286 |         axis=0,
287 |         return_counts=return_degrees,
288 |     )
289 |     if return_degrees:
290 |         node_coords, degrees = node_coords
291 |     node_points = shapely.points(node_coords)
292 |     if return_degrees:
293 |         return node_points, degrees
294 |     else:
295 |         return node_points
296 | 
297 | 
298 | def _nodes_degrees_from_edges(
299 |     edgelines: list | np.ndarray | gpd.GeoSeries,
300 | ) -> gpd.GeoDataFrame:
301 |     """Helper to get network nodes and their degrees from edges' geometries."""
302 |     node_points, degrees = _nodes_from_edges(edgelines, return_degrees=True)
303 |     nodes_gdf = gpd.GeoDataFrame({"degree": degrees, "geometry": node_points})
304 |     return nodes_gdf
305 | 
306 | 
307 | def _makes_loop_contact(
308 |     edges: gpd.GeoDataFrame, sindex_kws: dict
309 | ) -> tuple[gpd.GeoSeries, gpd.GeoSeries]:
310 |     """Helper to identify:
311 |     1. loop nodes intersecting non-loops
312 |     2. loop nodes intersecting other loops
313 |     """
314 | 
315 |     loops, not_loops = _loops_and_non_loops(edges)
316 |     loop_points = shapely.points(loops.get_coordinates().values)
317 |     loop_gdf = gpd.GeoDataFrame(geometry=loop_points, crs=edges.crs)
318 |     loop_point_geoms = loop_gdf.geometry
319 | 
320 |     # loop points intersecting non-loops
321 |     nodes_from_non_loops_ix, _ = not_loops.sindex.query(loop_point_geoms, **sindex_kws)
322 | 
323 |     # loop points intersecting other loops
324 |     nodes_from_loops_ix, _ = loops.sindex.query(loop_point_geoms, **sindex_kws)
325 |     loop_x_loop, n_loop_x_loop = np.unique(nodes_from_loops_ix, return_counts=True)
326 |     nodes_from_loops_ix = loop_x_loop[n_loop_x_loop > 1]
327 | 
328 |     # tease out both varieties
329 |     nodes_non_loops = loop_gdf.loc[nodes_from_non_loops_ix]
330 |     nodes_loops = loop_gdf.loc[nodes_from_loops_ix]
331 | 
332 |     return nodes_non_loops.geometry, nodes_loops.geometry
333 | 
334 | 
335 | def _loops_and_non_loops(
336 |     edges: gpd.GeoDataFrame,
337 | ) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
338 |     """Bifurcate edge gdf into loops and non-loops."""
339 |     loop_mask = edges.is_ring
340 |     not_loops = edges[~loop_mask]
341 |     loops = edges[loop_mask]
342 |     return loops, not_loops
343 | 
344 | 
345 | def remove_interstitial_nodes(
346 |     gdf: gpd.GeoSeries | gpd.GeoDataFrame, *, aggfunc: str | dict = "first", **kwargs
347 | ) -> gpd.GeoSeries | gpd.GeoDataFrame:
348 |     """Clean topology of existing LineString geometry by removal of nodes of degree 2.
349 | 
350 |     Returns the original gdf if there’s no node of degree 2.
351 | 
352 |     Parameters
353 |     ----------
354 |     gdf : geopandas.GeoSeries | geopandas.GeoDataFrame
355 |         Input edgelines process. If any edges are ``MultiLineString`` they
356 |         will be exploded into constituent ``LineString`` components.
357 |     aggfunc : str | dict = 'first'
358 |         Aggregate function for processing non-spatial component.
359 |     **kwargs
360 |         Keyword arguments for ``aggfunc``.
361 | 
362 |     Returns
363 |     -------
364 |     geopandas.GeoSeries | geopandas.GeoDataFrame
365 |        The original input ``gdf`` if only 1 edgeline, otherwise the processed
366 |        edgeline without interstitial nodes.
367 | 
368 |     Notes
369 |     -----
370 |     Any 3D geometries are (potentially) downcast in loops.
371 |     """
372 | 
373 |     def merge_geometries(block: gpd.GeoSeries) -> shapely.LineString:
374 |         """Helper in processing the spatial component."""
375 |         return shapely.line_merge(shapely.GeometryCollection(block.values))
376 | 
377 |     if len(gdf) < 2:
378 |         return gdf
379 | 
380 |     if isinstance(gdf, gpd.GeoSeries):
381 |         gdf = gdf.to_frame("geometry")
382 | 
383 |     gdf = gdf.explode(ignore_index=True)
384 | 
385 |     labels = get_components(gdf.geometry)
386 | 
387 |     # Process non-spatial component
388 |     data = gdf.drop(labels=gdf.geometry.name, axis=1)
389 |     aggregated_data = data.groupby(by=labels).agg(aggfunc, **kwargs)
390 |     aggregated_data.columns = aggregated_data.columns.to_flat_index()
391 | 
392 |     # Process spatial component
393 |     g = gdf.groupby(group_keys=False, by=labels)[gdf.geometry.name].agg(
394 |         merge_geometries
395 |     )
396 |     aggregated_geometry = gpd.GeoDataFrame(g, geometry=gdf.geometry.name, crs=gdf.crs)
397 | 
398 |     # Recombine
399 |     aggregated = aggregated_geometry.join(aggregated_data)
400 | 
401 |     # Derive nodes
402 |     nodes = _nodes_from_edges(aggregated.geometry)
403 |     # Bifurcate edges into loops and non-loops
404 |     loops, not_loops = _loops_and_non_loops(aggregated)
405 | 
406 |     # Ensure:
407 |     #   - all loops have exactly 1 endpoint; and
408 |     #   - that endpoint shares a node with an intersecting line
409 |     fixed_loops = []
410 |     fixed_index = []
411 |     node_ix, loop_ix = loops.sindex.query(nodes, predicate="intersects")
412 |     for ix in np.unique(loop_ix):
413 |         loop_geom = loops.geometry.iloc[ix]
414 |         target_nodes = nodes[node_ix[loop_ix == ix]]
415 |         if len(target_nodes) == 2:
416 |             new_sequence = _rotate_loop_coords(loop_geom, not_loops)
417 |             fixed_loops.append(shapely.LineString(new_sequence))
418 |             fixed_index.append(ix)
419 | 
420 |     aggregated.loc[loops.index[fixed_index], aggregated.geometry.name] = fixed_loops
421 |     return aggregated.reset_index(drop=True)
422 | 
423 | 
424 | def _rotate_loop_coords(
425 |     loop_geom: shapely.LineString, not_loops: gpd.GeoDataFrame
426 | ) -> np.ndarray:
427 |     """Rotate loop node coordinates if needed to ensure topology."""
428 | 
429 |     loop_coords = shapely.get_coordinates(loop_geom)
430 |     loop_points = gpd.GeoDataFrame(geometry=shapely.points(loop_coords))
431 |     loop_points_ix, _ = not_loops.sindex.query(
432 |         loop_points.geometry, predicate="dwithin", distance=1e-4
433 |     )
434 | 
435 |     mode = loop_points.loc[loop_points_ix].geometry.mode()
436 | 
437 |     # if there is a non-planar intersection, we may have multiple points. Check with
438 |     # entrypoints only in that case
439 |     if mode.shape[0] > 1:
440 |         loop_points_ix, _ = not_loops.sindex.query(
441 |             loop_points.geometry, predicate="dwithin", distance=1e-4
442 |         )
443 |         new_mode = loop_points.loc[loop_points_ix].geometry.mode()
444 |         # if that did not help, just pick one to avoid failure and hope for the best
445 |         if new_mode.empty | new_mode.shape[0] > 1:
446 |             mode = mode.iloc[[0]]
447 | 
448 |     new_start = mode.get_coordinates().values
449 |     _coords_match = (loop_coords == new_start).all(axis=1)
450 |     new_start_idx = np.where(_coords_match)[0].squeeze()
451 | 
452 |     rolled_coords = np.roll(loop_coords[:-1], -new_start_idx, axis=0)
453 |     new_sequence = np.append(rolled_coords, rolled_coords[[0]], axis=0)
454 |     return new_sequence
455 | 
456 | 
457 | def fix_topology(
458 |     streets: gpd.GeoDataFrame,
459 |     *,
460 |     eps: float = 1e-4,
461 |     **kwargs,
462 | ) -> gpd.GeoDataFrame:
463 |     """Fix street network topology. This ensures correct topology of the network by:
464 | 
465 |         1.  Adding potentially missing nodes...
466 |                 on intersections of individual LineString endpoints
467 |                 with the remaining network. The idea behind is that
468 |                 if a line ends on an intersection with another, there
469 |                 should be a node on both of them.
470 |         2. Removing nodes of degree 2...
471 |                 that have no meaning in the network used within our framework.
472 |         3. Removing duplicated geometries (irrespective of orientation).
473 | 
474 |     Parameters
475 |     ----------
476 |     streets : geopandas.GeoDataFrame
477 |         Input LineString geometries.
478 |     eps : float = 1e-4
479 |         Tolerance epsilon for point snapping passed into ``nodes.split()``.
480 |     **kwargs : dict
481 |         Key word arguments passed into ``remove_interstitial_nodes()``.
482 | 
483 |     Returns
484 |     -------
485 |     gpd.GeoDataFrame
486 |         The input streets that now have fixed topology and are ready
487 |         to proceed through the simplification algorithm.
488 |     """
489 |     streets = streets[~streets.geometry.normalize().duplicated()].copy()
490 |     streets_w_nodes = induce_nodes(streets, eps=eps)
491 |     return remove_interstitial_nodes(streets_w_nodes, **kwargs)
492 | 
493 | 
494 | def consolidate_nodes(
495 |     gdf: gpd.GeoDataFrame,
496 |     *,
497 |     tolerance: float = 2.0,
498 |     preserve_ends: bool = False,
499 | ) -> gpd.GeoSeries:
500 |     """Return geometry with consolidated nodes.
501 | 
502 |     Replace clusters of nodes with a single node (weighted centroid
503 |     of a cluster) and snap linestring geometry to it. Cluster is
504 |     defined using hierarchical clustering with average linkage
505 |     on coordinates cut at a cophenetic distance equal to ``tolerance``.
506 | 
507 |     The use of hierachical clustering avoids the chaining effect of a sequence
508 |     of intersections within ``tolerance`` from each other that would happen with
509 |     DBSCAN and similar solutions.
510 | 
511 |     Parameters
512 |     ----------
513 |     gdf : geopandas.GeoDataFrame
514 |         GeoDataFrame with LineStrings (usually representing street network).
515 |     tolerance : float = 2.0
516 |         The maximum distance between two nodes for one to be considered
517 |         as in the neighborhood of the other. Nodes within tolerance are
518 |         considered a part of a single cluster and will be consolidated.
519 |     preserve_ends : bool = False
520 |         If ``True``, nodes of a degree 1 will be excluded from the consolidation.
521 | 
522 |     Returns
523 |     -------
524 |     geopandas.GeoSeries
525 |         Updated input ``gdf`` of LineStrings with consolidated nodes.
526 |     """
527 |     from scipy.cluster import hierarchy
528 | 
529 |     if isinstance(gdf, gpd.GeoSeries):
530 |         gdf = gdf.to_frame("geometry")
531 |     elif isinstance(gdf, np.ndarray):
532 |         gdf = gpd.GeoDataFrame(geometry=gdf)
533 | 
534 |     nodes = _nodes_degrees_from_edges(gdf.geometry)
535 | 
536 |     if preserve_ends:
537 |         # keep at least one meter of original geometry around each end
538 |         ends = nodes[nodes["degree"] == 1].buffer(1)
539 |         nodes = nodes[nodes["degree"] > 1].copy()
540 | 
541 |         # if all we have are ends, return the original
542 |         # - this is generally when called from within ``geometry._consolidate()``
543 |         if nodes.shape[0] < 2:
544 |             gdf["_status"] = "original"
545 |             return gdf
546 | 
547 |     # get clusters of nodes which should be consolidated
548 |     # first get components of possible clusters to and then do the linkage itself
549 |     # otherwise is dead slow and needs a ton of memory
550 |     db = DBSCAN(eps=tolerance, min_samples=2).fit(nodes.get_coordinates())
551 |     comp_labels = db.labels_
552 |     mask = comp_labels > -1
553 |     components = comp_labels[mask]
554 |     nodes_to_merge = nodes[mask]
555 | 
556 |     def get_labels(nodes):
557 |         linkage = hierarchy.linkage(shapely.get_coordinates(nodes), method="average")
558 |         labels = (
559 |             hierarchy.fcluster(linkage, tolerance, criterion="distance").astype(str)
560 |             + f"_{nodes.name}"
561 |         )
562 |         return labels
563 | 
564 |     grouped = (
565 |         pd.Series(nodes_to_merge.geometry).groupby(components).transform(get_labels)
566 |     )
567 |     nodes["lab"] = grouped
568 |     unique, counts = np.unique(nodes["lab"].dropna(), return_counts=True)
569 |     actual_clusters = unique[counts > 1]
570 |     change = nodes[nodes["lab"].isin(actual_clusters)]
571 | 
572 |     # no change needed, return the original
573 |     if change.empty:
574 |         gdf["_status"] = "original"
575 |         return gdf
576 | 
577 |     gdf = gdf.copy()
578 |     # get geometry
579 |     geom = gdf.geometry.copy()
580 |     status = pd.Series("original", index=geom.index)
581 | 
582 |     # loop over clusters, cut out geometry within tolerance / 2 and replace it
583 |     # with spider-like geometry to the weighted centroid of a cluster
584 |     spiders = []
585 |     midpoints = []
586 | 
587 |     clusters = change.dissolve(change["lab"])
588 | 
589 |     # TODO: not optimal but avoids some MultiLineStrings but not all
590 |     cookies = clusters.buffer(tolerance / 2).convex_hull
591 | 
592 |     if preserve_ends:
593 |         cookies = cookies.to_frame().overlay(ends.to_frame(), how="difference")
594 | 
595 |     for cluster, cookie in zip(clusters.geometry, cookies.geometry, strict=True):
596 |         inds = geom.sindex.query(cookie, predicate="intersects")
597 |         pts = shapely.get_coordinates(geom.iloc[inds].intersection(cookie.boundary))
598 |         if pts.shape[0] > 0:
599 |             # TODO: this may result in MultiLineString - we need to avoid that
600 |             # TODO: It is temporarily fixed by that explode in return
601 |             geom.iloc[inds] = geom.iloc[inds].difference(cookie)
602 | 
603 |             status.iloc[inds] = "changed"
604 |             midpoint = np.mean(shapely.get_coordinates(cluster), axis=0)
605 |             midpoints.append(midpoint)
606 |             mids = np.array([midpoint] * len(pts))
607 | 
608 |             spider = shapely.linestrings(
609 |                 np.array([pts[:, 0], mids[:, 0]]).T,
610 |                 y=np.array([pts[:, 1], mids[:, 1]]).T,
611 |             )
612 |             spiders.append(spider)
613 | 
614 |     gdf = gdf.set_geometry(geom)
615 |     gdf["_status"] = status
616 | 
617 |     if spiders:
618 |         # combine geometries
619 |         geoms = np.hstack(spiders)
620 |         gdf = pd.concat([gdf, gpd.GeoDataFrame(geometry=geoms, crs=geom.crs)])
621 | 
622 |     agg: dict[str, str | typing.Callable] = {"_status": _status}
623 |     for c in gdf.columns.drop(gdf.active_geometry_name):
624 |         if c != "_status":
625 |             agg[c] = "first"
626 |     return remove_interstitial_nodes(
627 |         gdf[~gdf.geometry.is_empty].explode(),
628 |         # NOTE: this aggfunc needs to be able to process all the columns
629 |         aggfunc=agg,
630 |     )
631 | 


--------------------------------------------------------------------------------
/neatnet/simplify.py:
--------------------------------------------------------------------------------
   1 | import logging
   2 | import typing
   3 | import warnings
   4 | 
   5 | import geopandas as gpd
   6 | import numpy as np
   7 | import pandas as pd
   8 | import shapely
   9 | from libpysal import graph
  10 | from scipy import sparse
  11 | 
  12 | from .artifacts import (
  13 |     get_artifacts,
  14 |     n1_g1_identical,
  15 |     nx_gx,
  16 |     nx_gx_cluster,
  17 |     nx_gx_identical,
  18 | )
  19 | from .continuity import continuity, get_stroke_info
  20 | from .nodes import (
  21 |     _nodes_degrees_from_edges,
  22 |     _nodes_from_edges,
  23 |     _status,
  24 |     consolidate_nodes,
  25 |     fix_topology,
  26 |     induce_nodes,
  27 |     remove_interstitial_nodes,
  28 |     split,
  29 | )
  30 | 
  31 | DEBUGGING = False
  32 | 
  33 | logger = logging.getLogger(__name__)
  34 | 
  35 | 
  36 | def _check_input_crs(streets: gpd.GeoDataFrame, exclusion_mask: gpd.GeoSeries):
  37 |     """Ensure input data is in appropriate Coordinate reference systems."""
  38 | 
  39 |     streets_crs = streets.crs
  40 |     streets_has_crs = streets_crs is not None
  41 | 
  42 |     if not streets_has_crs:
  43 |         warnings.warn(
  44 |             (
  45 |                 "The input `streets` data does not have an assigned "
  46 |                 "coordinate reference system. Assuming a projected CRS in meters."
  47 |             ),
  48 |             category=UserWarning,
  49 |             stacklevel=2,
  50 |         )
  51 | 
  52 |     else:
  53 |         if not streets_crs.is_projected:
  54 |             raise ValueError(
  55 |                 "The input `streets` data are not in a projected "
  56 |                 "coordinate reference system. Reproject and rerun."
  57 |             )
  58 | 
  59 |         if streets_crs.axis_info[0].unit_name != "metre":
  60 |             warnings.warn(
  61 |                 (
  62 |                     "The input `streets` data coordinate reference system is projected "
  63 |                     "but not in meters. All `neatnet` defaults assume meters. "
  64 |                     "Either reproject and rerun or proceed with caution."
  65 |                 ),
  66 |                 category=UserWarning,
  67 |                 stacklevel=2,
  68 |             )
  69 | 
  70 |     if exclusion_mask is not None and exclusion_mask.crs != streets_crs:
  71 |         raise ValueError(
  72 |             "The input `streets` and `exclusion_mask` data are in "
  73 |             "different coordinate reference systems. Reproject and rerun."
  74 |         )
  75 | 
  76 | 
  77 | def _link_nodes_artifacts(
  78 |     step: str,
  79 |     streets: gpd.GeoDataFrame,
  80 |     artifacts: gpd.GeoDataFrame,
  81 |     eps: None | float,
  82 | ) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
  83 |     """Helper to prep nodes & artifacts when simplifying singletons & pairs."""
  84 | 
  85 |     # Get nodes from the network
  86 |     nodes = _nodes_degrees_from_edges(streets.geometry)
  87 | 
  88 |     if step == "singletons":
  89 |         node_geom = nodes.geometry
  90 |         sindex_kwargs = {"predicate": "dwithin", "distance": eps}
  91 |     else:
  92 |         node_geom = nodes.buffer(0.1)
  93 |         sindex_kwargs = {"predicate": "intersects"}
  94 | 
  95 |     # Link nodes to artifacts
  96 |     node_idx, artifact_idx = artifacts.sindex.query(node_geom, **sindex_kwargs)
  97 | 
  98 |     intersects = sparse.coo_array(
  99 |         ([True] * len(node_idx), (node_idx, artifact_idx)),
 100 |         shape=(len(nodes), len(artifacts)),
 101 |         dtype=np.bool_,
 102 |     )
 103 | 
 104 |     # Compute number of nodes per artifact
 105 |     artifacts["node_count"] = intersects.sum(axis=0)
 106 | 
 107 |     return nodes, artifacts
 108 | 
 109 | 
 110 | def _classify_strokes(
 111 |     artifacts: gpd.GeoDataFrame, streets: gpd.GeoDataFrame
 112 | ) -> gpd.GeoDataFrame:
 113 |     """Classify artifacts with ``{C,E,S}`` typology."""
 114 | 
 115 |     strokes, c_, e_, s_ = get_stroke_info(artifacts, streets)
 116 | 
 117 |     artifacts["stroke_count"] = strokes
 118 |     artifacts["C"] = c_
 119 |     artifacts["E"] = e_
 120 |     artifacts["S"] = s_
 121 | 
 122 |     return artifacts
 123 | 
 124 | 
 125 | def _identify_non_planar(
 126 |     artifacts: gpd.GeoDataFrame, streets: gpd.GeoDataFrame
 127 | ) -> gpd.GeoDataFrame:
 128 |     """Filter artifacts caused by non-planar intersections."""
 129 | 
 130 |     # Note from within `neatify_singletons()`
 131 |     # TODO: This is not perfect.
 132 |     # TODO: Some 3CC artifacts were non-planar but not captured here.
 133 | 
 134 |     artifacts["non_planar"] = artifacts["stroke_count"] > artifacts["node_count"]
 135 |     a_idx, r_idx = streets.sindex.query(
 136 |         artifacts.geometry.boundary, predicate="overlaps"
 137 |     )
 138 |     artifacts.iloc[np.unique(a_idx), artifacts.columns.get_loc("non_planar")] = True
 139 | 
 140 |     return artifacts
 141 | 
 142 | 
 143 | def neatify_singletons(
 144 |     artifacts: gpd.GeoDataFrame,
 145 |     streets: gpd.GeoDataFrame,
 146 |     *,
 147 |     max_segment_length: float | int = 1,
 148 |     compute_coins: bool = True,
 149 |     min_dangle_length: float | int = 10,
 150 |     eps: float = 1e-4,
 151 |     clip_limit: float | int = 2,
 152 |     simplification_factor: float | int = 2,
 153 |     consolidation_tolerance: float | int = 10,
 154 | ) -> gpd.GeoDataFrame:
 155 |     """Simplification of singleton face artifacts – the first simplification step in
 156 |     the procedure detailed in ``simplify.neatify_loop()``.
 157 | 
 158 |     This process extracts nodes from network edges before computing and labeling
 159 |     face artifacts with a ``{C, E, S}`` typology through ``momepy.COINS`` via the
 160 |     constituent street geometries.
 161 | 
 162 |     Next, each artifact is iterated over and constituent line geometries are either
 163 |     dropped or added in the following order of typologies:
 164 | 
 165 |         1. 1 node and 1 continuity group
 166 |         2. more than 1 node and 1 or more identical continuity groups
 167 |         3. 2 or more nodes and 2 or more continuity groups
 168 | 
 169 |     Non-planar geometries are ignored.
 170 | 
 171 |     Parameters
 172 |     ----------
 173 |     artifacts : geopandas.GeoDataFrame
 174 |         Face artifact polygons.
 175 |     streets : geopandas.GeoDataFrame
 176 |         Preprocessed street network data.
 177 |     max_segment_length : float | int = 1
 178 |         Additional nodes will be added so that all line segments
 179 |         are no longer than this value. Must be greater than 0.
 180 |         Used in multiple internal geometric operations.
 181 |     compute_coins : bool = True
 182 |         Flag for computing and labeling artifacts with a ``{C, E, S}`` typology through
 183 |         ``momepy.COINS`` via the constituent street geometries.
 184 |     min_dangle_length : float | int = 10
 185 |         The threshold for determining if linestrings are dangling slivers to be
 186 |         removed or not.
 187 |     eps : float = 1e-4
 188 |         Tolerance epsilon used in multiple internal geometric operations.
 189 |     clip_limit : float | int = 2
 190 |         Following generation of the Voronoi linework, we clip to fit inside the
 191 |         polygon. To ensure we get a space to make proper topological connections
 192 |         from the linework to the actual points on the edge of the polygon, we clip
 193 |         using a polygon with a negative buffer of ``clip_limit`` or the radius of
 194 |         maximum inscribed circle, whichever is smaller.
 195 |     simplification_factor : float | int = 2
 196 |         The factor by which singles, pairs, and clusters are simplified. The
 197 |         ``max_segment_length`` is multiplied by this factor to get the
 198 |         simplification epsilon.
 199 |     consolidation_tolerance : float | int = 10
 200 |         Tolerance passed to node consolidation when generating Voronoi skeletons.
 201 | 
 202 |     Returns
 203 |     -------
 204 |     geopandas.GeoDataFrame
 205 |         The street network line data following the singleton procedure.
 206 |     """
 207 | 
 208 |     # Extract network nodes and relate to artifacts
 209 |     nodes, artifacts = _link_nodes_artifacts("singletons", streets, artifacts, eps)
 210 | 
 211 |     # Compute number of stroke groups per artifact
 212 |     if compute_coins:
 213 |         streets, _ = continuity(streets)
 214 |     artifacts = _classify_strokes(artifacts, streets)
 215 | 
 216 |     # Filter artifacts caused by non-planar intersections
 217 |     artifacts = _identify_non_planar(artifacts, streets)
 218 | 
 219 |     # Count intersititial nodes (primes)
 220 |     _prime_count = artifacts["node_count"] - artifacts[["C", "E", "S"]].sum(axis=1)
 221 |     artifacts["interstitial_nodes"] = _prime_count
 222 | 
 223 |     # Define the type label
 224 |     ces_type = []
 225 |     for x in artifacts[["node_count", "C", "E", "S"]].itertuples():
 226 |         ces_type.append(f"{x.node_count}{'C' * x.C}{'E' * x.E}{'S' * x.S}")
 227 |     artifacts["ces_type"] = ces_type
 228 | 
 229 |     # Collect changes
 230 |     to_drop: list[int] = []
 231 |     to_add: list[int] = []
 232 |     split_points: list[shapely.Point] = []
 233 | 
 234 |     # Isolate planar artifacts
 235 |     planar = artifacts[~artifacts["non_planar"]].copy()
 236 |     planar["buffered"] = planar.buffer(eps)
 237 |     if artifacts["non_planar"].any():
 238 |         logger.debug(f"IGNORING {artifacts.non_planar.sum()} non planar artifacts")
 239 | 
 240 |     # Iterate over each singleton planar artifact and simplify based on typology
 241 |     for artifact in planar.itertuples():
 242 |         n_nodes = artifact.node_count
 243 |         n_strokes = artifact.stroke_count
 244 |         cestype = artifact.ces_type
 245 | 
 246 |         # Get edges relevant for an artifact
 247 |         edges = streets.iloc[
 248 |             streets.sindex.query(artifact.buffered, predicate="covers")
 249 |         ]
 250 | 
 251 |         # Dispatch by typology
 252 |         try:
 253 |             # 1 node and 1 continuity group
 254 |             if (n_nodes == 1) and (n_strokes == 1):
 255 |                 logger.debug("FUNCTION n1_g1_identical")
 256 |                 n1_g1_identical(
 257 |                     edges,
 258 |                     to_drop=to_drop,
 259 |                     to_add=to_add,
 260 |                     geom=artifact.geometry,
 261 |                     max_segment_length=max_segment_length,
 262 |                     clip_limit=clip_limit,
 263 |                 )
 264 |             # More than 1 node and 1 or more identical continuity groups
 265 |             elif (n_nodes > 1) and (len(set(cestype[1:])) == 1):
 266 |                 logger.debug("FUNCTION nx_gx_identical")
 267 |                 nx_gx_identical(
 268 |                     edges,
 269 |                     geom=artifact.geometry,
 270 |                     to_add=to_add,
 271 |                     to_drop=to_drop,
 272 |                     nodes=nodes,
 273 |                     angle=75,
 274 |                     max_segment_length=max_segment_length,
 275 |                     clip_limit=clip_limit,
 276 |                     consolidation_tolerance=consolidation_tolerance,
 277 |                 )
 278 |             # 2 or more nodes and 2 or more continuity groups
 279 |             elif (n_nodes > 1) and (len(cestype) > 2):
 280 |                 logger.debug("FUNCTION nx_gx")
 281 |                 nx_gx(
 282 |                     edges,
 283 |                     artifact=artifact,
 284 |                     to_drop=to_drop,
 285 |                     to_add=to_add,
 286 |                     split_points=split_points,
 287 |                     nodes=nodes,
 288 |                     max_segment_length=max_segment_length,
 289 |                     clip_limit=clip_limit,
 290 |                     min_dangle_length=min_dangle_length,
 291 |                     consolidation_tolerance=consolidation_tolerance,
 292 |                 )
 293 |             else:
 294 |                 logger.debug("NON PLANAR")
 295 |         except Exception as e:
 296 |             if DEBUGGING:
 297 |                 raise e
 298 |             warnings.warn(
 299 |                 f"An error occured at location {artifact.geometry.centroid}. "
 300 |                 f"The artifact has not been simplified. The original message:\n{e}",
 301 |                 UserWarning,
 302 |                 stacklevel=2,
 303 |             )
 304 | 
 305 |     cleaned_streets = streets.drop(to_drop)
 306 |     # split lines on new nodes
 307 |     cleaned_streets = split(split_points, streets.drop(to_drop), streets.crs)
 308 | 
 309 |     if to_add:
 310 |         # Create new streets with fixed geometry.
 311 |         # Note: ``to_add`` and ``to_drop`` lists shall be global and
 312 |         # this step should happen only once, not for every artifact
 313 |         _add_merged = gpd.GeoSeries(to_add).line_merge()
 314 |         new = gpd.GeoDataFrame(geometry=_add_merged, crs=streets.crs).explode()
 315 |         new = new[~new.normalize().duplicated()].copy()
 316 |         new["_status"] = "new"
 317 |         new.geometry = new.simplify(max_segment_length * simplification_factor)
 318 |         new_streets = pd.concat([cleaned_streets, new], ignore_index=True)
 319 |         agg: dict[str, str | typing.Callable] = {"_status": _status}
 320 |         for c in cleaned_streets.columns.drop(cleaned_streets.active_geometry_name):
 321 |             if c != "_status":
 322 |                 agg[c] = "first"
 323 |         non_empties = new_streets[~(new_streets.is_empty | new_streets.geometry.isna())]
 324 |         new_streets = remove_interstitial_nodes(non_empties, aggfunc=agg)
 325 | 
 326 |         final = new_streets
 327 |     else:
 328 |         final = cleaned_streets
 329 | 
 330 |     if "coins_group" in final.columns:
 331 |         final = final.drop(
 332 |             columns=[c for c in streets.columns if c.startswith("coins_")]
 333 |         )
 334 |     return final
 335 | 
 336 | 
 337 | def neatify_pairs(
 338 |     artifacts: gpd.GeoDataFrame,
 339 |     streets: gpd.GeoDataFrame,
 340 |     *,
 341 |     max_segment_length: float | int = 1,
 342 |     min_dangle_length: float | int = 20,
 343 |     clip_limit: float | int = 2,
 344 |     simplification_factor: float | int = 2,
 345 |     consolidation_tolerance: float | int = 10,
 346 | ) -> gpd.GeoDataFrame:
 347 |     """Simplification of pairs of face artifacts – the second simplification step in
 348 |     the procedure detailed in ``simplify.neatify_loop()``.
 349 | 
 350 |     This process extracts nodes from network edges before identifying non-planarity
 351 |     and cluster information.
 352 | 
 353 |     If paired artifacts are present we further classify them as grouped by
 354 |     first vs. last instance of duplicated component label, and whether
 355 |     or not to be simplified with the clustered process.
 356 | 
 357 |     Finally, simplification is performed based on the following order of typologies:
 358 |         1. Singletons – merged pairs & first instance (w/o COINS)
 359 |         2. Singletons – Second instance – w/ COINS
 360 |         3. Clusters
 361 | 
 362 |     Parameters
 363 |     ----------
 364 |     artifacts : geopandas.GeoDataFrame
 365 |         Face artifact polygons.
 366 |     streets : geopandas.GeoDataFrame
 367 |         Preprocessed street network data.
 368 |     max_segment_length : float | int = 1
 369 |         Additional vertices will be added so that all line segments
 370 |         are no longer than this value. Must be greater than 0.
 371 |         Used in multiple internal geometric operations.
 372 |     min_dangle_length : float | int = 20
 373 |         The threshold for determining if linestrings are dangling slivers to be
 374 |         removed or not.
 375 |     clip_limit : float | int = 2
 376 |         Following generation of the Voronoi linework, we clip to fit inside the
 377 |         polygon. To ensure we get a space to make proper topological connections
 378 |         from the linework to the actual points on the edge of the polygon, we clip
 379 |         using a polygon with a negative buffer of ``clip_limit`` or the radius of
 380 |         maximum inscribed circle, whichever is smaller.
 381 |     simplification_factor : float | int = 2
 382 |         The factor by which singles, pairs, and clusters are simplified. The
 383 |         ``max_segment_length`` is multiplied by this factor to get the
 384 |         simplification epsilon.
 385 |     consolidation_tolerance : float | int = 10
 386 |         Tolerance passed to node consolidation when generating Voronoi skeletons.
 387 | 
 388 |     Returns
 389 |     -------
 390 |     geopandas.GeoDataFrame
 391 |         The street network line data following the pairs procedure.
 392 |     """
 393 | 
 394 |     # Extract network nodes and relate to artifacts
 395 |     nodes, artifacts = _link_nodes_artifacts("pairs", streets, artifacts, None)
 396 | 
 397 |     # Compute number of stroke groups per artifact
 398 |     streets, _ = continuity(streets)
 399 |     artifacts = _classify_strokes(artifacts, streets)
 400 | 
 401 |     # Filter artifacts caused by non-planar intersections
 402 |     artifacts = _identify_non_planar(artifacts, streets)
 403 | 
 404 |     # Identify non-planar clusters
 405 |     _id_np = lambda x: sum(artifacts.loc[artifacts["comp"] == x.comp]["non_planar"])  # noqa: E731
 406 |     artifacts["non_planar_cluster"] = artifacts.apply(_id_np, axis=1)
 407 |     # Subset non-planar clusters and planar artifacts
 408 |     np_clusters = artifacts[artifacts.non_planar_cluster > 0]
 409 |     artifacts_planar = artifacts[artifacts.non_planar_cluster == 0]
 410 | 
 411 |     # Isolate planar artifacts
 412 |     _planar_grouped = artifacts_planar.groupby("comp")[artifacts_planar.columns]
 413 |     _solutions = _planar_grouped.apply(get_solution, streets=streets)
 414 |     artifacts_w_info = artifacts.merge(_solutions, left_on="comp", right_index=True)
 415 | 
 416 |     # Isolate non-planar clusters of value 2 – e.g., artifact under highway
 417 |     _np_clust_2 = np_clusters["non_planar_cluster"] == 2
 418 |     artifacts_under_np = np_clusters[_np_clust_2].dissolve("comp", as_index=False)
 419 | 
 420 |     # Determine typology dispatch if artifacts are present
 421 |     if not artifacts_w_info.empty:
 422 |         agg = {
 423 |             "coins_group": "first",
 424 |             "coins_end": lambda x: x.any(),
 425 |             "_status": _status,
 426 |         }
 427 |         for c in streets.columns.drop(
 428 |             [streets.active_geometry_name, "coins_count"], errors="ignore"
 429 |         ):
 430 |             if c not in agg:
 431 |                 agg[c] = "first"
 432 | 
 433 |         sol_drop = "solution == 'drop_interline'"
 434 |         sol_iter = "solution == 'iterate'"
 435 | 
 436 |         # Determine artifacts and street edges to drop
 437 |         _to_drop = artifacts_w_info.drop_duplicates("comp").query(sol_drop).drop_id
 438 |         _drop_streets = streets.drop(_to_drop.dropna().values)
 439 | 
 440 |         # Re-run node cleaning on subset of fresh street edges
 441 |         streets_cleaned = remove_interstitial_nodes(
 442 |             _drop_streets,
 443 |             aggfunc=agg,
 444 |         )
 445 | 
 446 |         # Isolate drops to create merged pairs
 447 |         merged_pairs = artifacts_w_info.query(sol_drop).dissolve("comp", as_index=False)
 448 | 
 449 |         # Sort artifacts by their node count low-to-high
 450 |         sorted_node_count = artifacts_w_info.sort_values("node_count", ascending=False)
 451 | 
 452 |         # Isolate artifacts to process as singletons – first instance
 453 |         _1st = sorted_node_count.query(sol_iter).drop_duplicates("comp", keep="first")
 454 |         _planar_clusters = np_clusters[~np_clusters["non_planar"]]
 455 |         _1st = pd.concat([_1st, _planar_clusters], ignore_index=True)
 456 | 
 457 |         # Isolate artifacts to process as singletons – last instance
 458 |         _2nd = sorted_node_count.query(sol_iter).drop_duplicates("comp", keep="last")
 459 | 
 460 |         # Isolate artifacts to process as clusters
 461 |         for_skeleton = artifacts_w_info.query("solution == 'skeleton'")
 462 | 
 463 |     # Otherwise instantiate artifact containers as empty
 464 |     else:
 465 |         merged_pairs = pd.DataFrame()
 466 |         _1st = pd.DataFrame()
 467 |         _2nd = pd.DataFrame()
 468 |         for_skeleton = pd.DataFrame()
 469 |         streets_cleaned = streets
 470 | 
 471 |     # Generate counts of COINs groups for edges
 472 |     coins_count = (
 473 |         streets_cleaned.groupby("coins_group", as_index=False)
 474 |         .geometry.count()
 475 |         .rename(columns={"geometry": "coins_count"})
 476 |     )
 477 |     streets_cleaned = streets_cleaned.merge(coins_count, on="coins_group", how="left")
 478 | 
 479 |     # Add under non-planars to cluster dispatcher
 480 |     if not artifacts_under_np.empty:
 481 |         for_skeleton = pd.concat([for_skeleton, artifacts_under_np])
 482 | 
 483 |     # Dispatch singleton simplifier
 484 |     if not merged_pairs.empty or not _1st.empty:
 485 |         # Merged pairs & first instance – w/o COINS
 486 |         streets_cleaned = neatify_singletons(
 487 |             pd.concat([merged_pairs, _1st]),
 488 |             streets_cleaned,
 489 |             max_segment_length=max_segment_length,
 490 |             clip_limit=clip_limit,
 491 |             compute_coins=False,
 492 |             min_dangle_length=min_dangle_length,
 493 |             simplification_factor=simplification_factor,
 494 |             consolidation_tolerance=consolidation_tolerance,
 495 |         )
 496 |         # Second instance – w/ COINS
 497 |         if not _2nd.empty:
 498 |             streets_cleaned = neatify_singletons(
 499 |                 _2nd,
 500 |                 streets_cleaned,
 501 |                 max_segment_length=max_segment_length,
 502 |                 clip_limit=clip_limit,
 503 |                 compute_coins=True,
 504 |                 min_dangle_length=min_dangle_length,
 505 |                 simplification_factor=simplification_factor,
 506 |                 consolidation_tolerance=consolidation_tolerance,
 507 |             )
 508 | 
 509 |     # Dispatch cluster simplifier
 510 |     if not for_skeleton.empty:
 511 |         streets_cleaned = neatify_clusters(
 512 |             for_skeleton,
 513 |             streets_cleaned,
 514 |             max_segment_length=max_segment_length,
 515 |             simplification_factor=simplification_factor,
 516 |             min_dangle_length=min_dangle_length,
 517 |             consolidation_tolerance=consolidation_tolerance,
 518 |         )
 519 | 
 520 |     return streets_cleaned
 521 | 
 522 | 
 523 | def neatify_clusters(
 524 |     artifacts: gpd.GeoDataFrame,
 525 |     streets: gpd.GeoDataFrame,
 526 |     *,
 527 |     max_segment_length: float | int = 1,
 528 |     eps: float = 1e-4,
 529 |     simplification_factor: float | int = 2,
 530 |     min_dangle_length: float | int = 20,
 531 |     consolidation_tolerance: float | int = 10,
 532 | ) -> gpd.GeoDataFrame:
 533 |     """Simplification of clusters of face artifacts – the third simplification step in
 534 |     the procedure detailed in ``simplify.neatify_loop()``.
 535 | 
 536 |     This process extracts nodes from network edges before iterating over each
 537 |     cluster artifact and performing simplification.
 538 | 
 539 |     Parameters
 540 |     ----------
 541 |     artifacts : geopandas.GeoDataFrame
 542 |         Face artifact polygons.
 543 |     streets : geopandas.GeoDataFrame
 544 |         Preprocessed street network data.
 545 |     max_segment_length : float | int = 1
 546 |         Additional vertices will be added so that all line segments
 547 |         are no longer than this value. Must be greater than 0.
 548 |         Used in multiple internal geometric operations.
 549 |     eps : float = 1e-4
 550 |         Tolerance epsilon used in multiple internal geometric operations.
 551 |     simplification_factor : float | int = 2
 552 |         The factor by which singles, pairs, and clusters are simplified. The
 553 |         ``max_segment_length`` is multiplied by this factor to get the
 554 |         simplification epsilon.
 555 |     min_dangle_length : float | int = 20
 556 |         The threshold for determining if linestrings are dangling slivers to be
 557 |         removed or not.
 558 |     consolidation_tolerance : float | int = 10
 559 |         Tolerance passed to node consolidation when generating Voronoi skeletons.
 560 | 
 561 |     Returns
 562 |     -------
 563 |     geopandas.GeoDataFrame
 564 |         The street network line data following the clusters procedure.
 565 |     """
 566 | 
 567 |     # Get nodes from the network
 568 |     nodes = gpd.GeoSeries(_nodes_from_edges(streets.geometry))
 569 | 
 570 |     # Collect changes
 571 |     to_drop: list[int] = []
 572 |     to_add: list[int] = []
 573 | 
 574 |     for _, artifact in artifacts.groupby("comp"):
 575 |         # Get artifact cluster polygon
 576 |         cluster_geom = artifact.union_all()
 577 |         # Get edges relevant for an artifact
 578 |         edges = streets.iloc[
 579 |             streets.sindex.query(cluster_geom, predicate="intersects")
 580 |         ].copy()
 581 | 
 582 |         # Clusters of 2 or more nodes and 2 or more continuity groups
 583 |         nx_gx_cluster(
 584 |             edges=edges,
 585 |             cluster_geom=cluster_geom,
 586 |             nodes=nodes,
 587 |             to_drop=to_drop,
 588 |             to_add=to_add,
 589 |             eps=eps,
 590 |             max_segment_length=max_segment_length,
 591 |             min_dangle_length=min_dangle_length,
 592 |             consolidation_tolerance=consolidation_tolerance,
 593 |         )
 594 | 
 595 |     cleaned_streets = streets.drop(to_drop)
 596 | 
 597 |     # Create new street with fixed geometry.
 598 |     # Note: ``to_add`` and ``to_drop`` lists shall be global and
 599 |     # this step should happen only once, not for every artifact
 600 |     new = gpd.GeoDataFrame(geometry=to_add, crs=streets.crs)
 601 |     new["_status"] = "new"
 602 |     new["geometry"] = new.line_merge().simplify(
 603 |         max_segment_length * simplification_factor
 604 |     )
 605 |     new_streets = pd.concat([cleaned_streets, new], ignore_index=True).explode()
 606 |     agg: dict[str, str | typing.Callable] = {"_status": _status}
 607 |     for c in new_streets.columns.drop(new_streets.active_geometry_name):
 608 |         if c != "_status":
 609 |             agg[c] = "first"
 610 |     new_streets = remove_interstitial_nodes(
 611 |         new_streets[~new_streets.is_empty], aggfunc=agg
 612 |     ).drop_duplicates("geometry")
 613 | 
 614 |     return new_streets
 615 | 
 616 | 
 617 | def get_type(edges: gpd.GeoDataFrame, shared_edge: int) -> str:
 618 |     """Classify artifact edges according to the ``{C, E, S}``
 619 |     schema when considering solutions for pairs of artifacts.
 620 | 
 621 |     Parameters
 622 |     ----------
 623 |     edges : geopandas.GeoDataFrame
 624 |         Artifact edges in consideration.
 625 |     shared_edge : int
 626 |         The index location of the shared edge of the pair.
 627 | 
 628 |     Returns
 629 |     -------
 630 |     str
 631 |         Classification for an edge in ``{C, E, S}``.
 632 |     """
 633 | 
 634 |     if (  # Roundabout special case
 635 |         edges["coins_group"].nunique() == 1
 636 |         and edges.shape[0] == edges["coins_count"].iloc[0]
 637 |     ):
 638 |         return "S"
 639 | 
 640 |     all_ends = edges[edges["coins_end"]]
 641 |     mains = edges[~edges["coins_group"].isin(all_ends["coins_group"])]
 642 |     shared = edges.loc[shared_edge]
 643 | 
 644 |     if shared_edge in mains.index:
 645 |         return "C"
 646 | 
 647 |     if shared["coins_count"] == (edges["coins_group"] == shared["coins_group"]).sum():
 648 |         return "S"
 649 | 
 650 |     return "E"
 651 | 
 652 | 
 653 | def get_solution(group: gpd.GeoDataFrame, streets: gpd.GeoDataFrame) -> pd.Series:
 654 |     """Determine the solution for paired planar artifacts.
 655 | 
 656 |     Parameters
 657 |     ----------
 658 |     group : geopandas.GeoDataFrame
 659 |         Dissolved group of connected planar artifacts.
 660 |     streets : geopandas.GeoDataFrame
 661 |         Street network data.
 662 | 
 663 |     Returns
 664 |     -------
 665 |     pandas.Series
 666 |         The determined solution and edge to drop.
 667 |     """
 668 | 
 669 |     def _relate(loc: int) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
 670 |         """Isolate intersecting & covering street geometries."""
 671 |         _geom = group.geometry.iloc[loc]
 672 |         _streets = streets.iloc[streets.sindex.query(_geom, predicate="intersects")]
 673 |         _covers = _streets.iloc[_streets.sindex.query(_geom, predicate="covers")]
 674 |         return _streets, _covers
 675 | 
 676 |     cluster_geom = group.union_all()
 677 | 
 678 |     streets_a, covers_a = _relate(0)
 679 |     streets_b, covers_b = _relate(1)
 680 | 
 681 |     # Find the street segment that is contained within the cluster geometry
 682 |     shared = streets.index[streets.sindex.query(cluster_geom, predicate="contains")]
 683 | 
 684 |     if shared.empty or covers_a.empty or covers_b.empty:
 685 |         return pd.Series({"solution": "non_planar", "drop_id": None})
 686 | 
 687 |     shared = shared.item()
 688 | 
 689 |     if (np.invert(streets_b.index.isin(covers_a.index)).sum() == 1) or (
 690 |         np.invert(streets_a.index.isin(covers_b.index)).sum() == 1
 691 |     ):
 692 |         return pd.Series({"solution": "drop_interline", "drop_id": shared})
 693 | 
 694 |     seen_by_a = get_type(covers_a, shared)
 695 |     seen_by_b = get_type(covers_b, shared)
 696 | 
 697 |     if seen_by_a == "C" and seen_by_b == "C":
 698 |         return pd.Series({"solution": "iterate", "drop_id": shared})
 699 | 
 700 |     if seen_by_a == seen_by_b:
 701 |         return pd.Series({"solution": "drop_interline", "drop_id": shared})
 702 | 
 703 |     return pd.Series({"solution": "skeleton", "drop_id": shared})
 704 | 
 705 | 
 706 | def neatify(
 707 |     streets: gpd.GeoDataFrame,
 708 |     *,
 709 |     exclusion_mask: None | gpd.GeoSeries = None,
 710 |     predicate: str = "intersects",
 711 |     max_segment_length: float | int = 1,
 712 |     min_dangle_length: float | int = 20,
 713 |     clip_limit: float | int = 2,
 714 |     simplification_factor: float | int = 2,
 715 |     consolidation_tolerance: float | int = 10,
 716 |     artifact_threshold: None | float | int = None,
 717 |     artifact_threshold_fallback: float | int = 7,
 718 |     area_threshold_blocks: float | int = 1e5,
 719 |     isoareal_threshold_blocks: float | int = 0.5,
 720 |     area_threshold_circles: float | int = 5e4,
 721 |     isoareal_threshold_circles_enclosed: float | int = 0.75,
 722 |     isoperimetric_threshold_circles_touching: float | int = 0.9,
 723 |     eps: float = 1e-4,
 724 |     n_loops: int = 2,
 725 | ) -> gpd.GeoDataFrame:
 726 |     """Top-level workflow for simplifying street networks. The input raw street network
 727 |     data, which must be in a projected coordinate reference system and is expected to be
 728 |     in meters, is first preprocessed (topological corrections & node consolidation)
 729 |     before two iterations of artifact detection and simplification.
 730 | 
 731 |     Each iteration of the simplification procedure which includes (1.) the removal
 732 |     of false nodes; (2.) face artifact classification; and (3.) the line-based
 733 |     simplification of face artifacts in the order of single artifacts, pairs of
 734 |     artifacts, clusters of artifacts.
 735 | 
 736 |     For further information on face artifact detection and extraction
 737 |     see :cite:`fleischmann_shape-based_2024`.
 738 | 
 739 |     This algorithm is designed for use with only "street" network geometries as input.
 740 |     While passing in other types of pathing (e.g., sidewalks, canals) will likely yield
 741 |     valid geometric results, that behavior is untested.
 742 | 
 743 |     Parameters
 744 |     ----------
 745 |     streets : geopandas.GeoDataFrame
 746 |         Raw street network data. This input *must* be in a projected coordinate
 747 |         reference system and *should* be in meters. All defaults arguments assume
 748 |         meters. The internal algorithm is designed for use with street network
 749 |         geometries, not  other types of pathing (e.g., sidewalks, canals), which
 750 |         should be filtered out.
 751 |     exclusion_mask : None | geopandas.GeoSeries = None
 752 |         Geometries used to determine face artifacts to exclude from returned output.
 753 |     predicate : str = 'intersects'
 754 |         The spatial predicate used to exclude face artifacts from returned output.
 755 |     max_segment_length : float | int = 1
 756 |         Additional vertices will be added so that all line segments
 757 |         are no longer than this value. Must be greater than 0.
 758 |         Used in multiple internal geometric operations.
 759 |     min_dangle_length : float | int
 760 |         The threshold for determining if linestrings are dangling slivers to be
 761 |         removed or not.
 762 |     clip_limit : float | int = 2
 763 |         Following generation of the Voronoi linework, we clip to fit inside the
 764 |         polygon. To ensure we get a space to make proper topological connections
 765 |         from the linework to the actual points on the edge of the polygon, we clip
 766 |         using a polygon with a negative buffer of ``clip_limit`` or the radius of
 767 |         maximum inscribed circle, whichever is smaller.
 768 |     simplification_factor : float | int = 2
 769 |         The factor by which singles, pairs, and clusters are simplified. The
 770 |         ``max_segment_length`` is multiplied by this factor to get the
 771 |         simplification epsilon.
 772 |     consolidation_tolerance : float | int = 10
 773 |         Tolerance passed to node consolidation when generating Voronoi skeletons.
 774 |     artifact_threshold : None | float | int = None
 775 |         When ``artifact_threshold`` is passed, the computed value from
 776 |         ``momepy.FaceArtifacts.threshold`` is not used in favor of the
 777 |         given value. This is useful for small networks where artifact
 778 |         detection may fail or become unreliable.
 779 |     artifact_threshold_fallback : float | int = 7
 780 |         If artifact threshold detection fails, this value is used as a fallback.
 781 |     area_threshold_blocks : float | int = 1e5
 782 |         This is the first threshold for detecting block-like artifacts whose
 783 |         Face Artifact Index (see :cite:`fleischmann_shape-based_2024`) is above
 784 |         the value passed in ``artifact_threshold``.
 785 |         If a polygon has an area below ``area_threshold_blocks``, *and*
 786 |         is of elongated shape (see also ``isoareal_threshold_blocks``),
 787 |         *and* touches at least one polygon that has already been classified as artifact,
 788 |         then it will be classified as an artifact.
 789 |     isoareal_threshold_blocks : float | int = 0.5
 790 |         This is the second threshold for detecting block-like artifacts whose
 791 |         Face Artifact Index (see :cite:`fleischmann_shape-based_2024`) is above the
 792 |         value passed in ``artifact_threshold``. If a polygon has an isoareal quotient
 793 |         below ``isoareal_threshold_blocks`` (see ``esda.shape.isoareal_quotient``),
 794 |         i.e., if it has an elongated shape; *and* it has a sufficiently small area
 795 |         (see also ``area_threshold_blocks``), *and* if it touches at least one
 796 |         polygon that has already been detected as an artifact,
 797 |         then it will be classified as an artifact.
 798 |     area_threshold_circles : float | int = 5e4
 799 |         This is the first threshold for detecting circle-like artifacts whose
 800 |         Face Artifact Index (see :cite:`fleischmann_shape-based_2024`) is above the
 801 |         value passed in ``artifact_threshold``. If a polygon has an area below
 802 |         ``area_threshold_circles``, *and* one of the following 2 cases is given:
 803 |         (a) the polygon is touched, but not enclosed by polygons already classified
 804 |         as artifacts, *and* with an isoperimetric quotient
 805 |         (see ``esda.shape.isoperimetric_quotient``)
 806 |         above ``isoperimetric_threshold_circles_touching``, i.e., if its shape
 807 |         is close to circular; or (b) the polygon is fully enclosed by polygons
 808 |         already classified as artifacts, *and* with an isoareal quotient
 809 |         above
 810 |         ``isoareal_threshold_circles_enclosed``, i.e., if its shape is
 811 |         close to circular; then it will be classified as an artifact.
 812 |     isoareal_threshold_circles_enclosed : float | int = 0.75
 813 |         This is the second threshold for detecting circle-like artifacts whose
 814 |         Face Artifact Index (see :cite:`fleischmann_shape-based_2024`) is above the
 815 |         value  passed in ``artifact_threshold``. If a polygon has a sufficiently small
 816 |         area (see also ``area_threshold_circles``), *and* the polygon is
 817 |         fully enclosed by polygons already classified as artifacts,
 818 |         *and* its isoareal quotient (see ``esda.shape.isoareal_quotient``)
 819 |         is above the value passed to ``isoareal_threshold_circles_enclosed``,
 820 |         i.e., if its shape is close to circular;
 821 |         then it will be classified as an artifact.
 822 |     isoperimetric_threshold_circles_touching : float | int = 0.9
 823 |         This is the third threshold for detecting circle-like artifacts whose
 824 |         Face Artifact Index (see :cite:`fleischmann_shape-based_2024`)
 825 |         is above the value passed in ``artifact_threshold``.
 826 |         If a polygon has a sufficiently small area
 827 |         (see also ``area_threshold_circles``), *and* the polygon is touched
 828 |         by at least one polygon already classified as artifact,
 829 |         *and* its isoperimetric quotient (see ``esda.shape.isoperimetric_quotient``)
 830 |         is above the value passed to ``isoperimetric_threshold_circles_touching``,
 831 |         i.e., if its shape is close to circular;
 832 |         then it will be classified as an artifact.
 833 |     eps : float = 1e-4
 834 |         Tolerance epsilon used in multiple internal geometric operations.
 835 |     n_loops : int = 2
 836 |         Number of loops through the simplification pipeline. It is recommended to stick
 837 |         to the default value and increase it only very conservatively.
 838 | 
 839 |     Returns
 840 |     -------
 841 |     geopandas.GeoDataFrame
 842 |         The final, simplified street network line data.
 843 | 
 844 |     Notes
 845 |     -----
 846 |     As is noted above, the input network data must be in a projected coordinate
 847 |     reference system and is expected to be in meters. However, it may be possible to
 848 |     work with network data projected in feet if all default arguments are adjusted.
 849 |     """
 850 | 
 851 |     _check_input_crs(streets, exclusion_mask)
 852 | 
 853 |     streets = fix_topology(streets, eps=eps)
 854 | 
 855 |     # Merge nearby nodes (up to double of distance used in skeleton).
 856 |     streets = consolidate_nodes(streets, tolerance=max_segment_length * 2.1)
 857 | 
 858 |     # Identify artifacts
 859 |     artifacts, threshold = get_artifacts(
 860 |         streets,
 861 |         exclusion_mask=exclusion_mask,
 862 |         predicate=predicate,
 863 |         threshold=artifact_threshold,
 864 |         threshold_fallback=artifact_threshold_fallback,
 865 |         area_threshold_blocks=area_threshold_blocks,
 866 |         isoareal_threshold_blocks=isoareal_threshold_blocks,
 867 |         area_threshold_circles=area_threshold_circles,
 868 |         isoareal_threshold_circles_enclosed=isoareal_threshold_circles_enclosed,
 869 |         isoperimetric_threshold_circles_touching=isoperimetric_threshold_circles_touching,
 870 |     )
 871 | 
 872 |     # Loop 1
 873 |     new_streets = neatify_loop(
 874 |         streets,
 875 |         artifacts,
 876 |         max_segment_length=max_segment_length,
 877 |         min_dangle_length=min_dangle_length,
 878 |         clip_limit=clip_limit,
 879 |         simplification_factor=simplification_factor,
 880 |         consolidation_tolerance=consolidation_tolerance,
 881 |         eps=eps,
 882 |     )
 883 | 
 884 |     # This is potentially fixing some minor erroneous edges coming from Voronoi
 885 |     new_streets = induce_nodes(new_streets, eps=eps)
 886 |     new_streets = new_streets[~new_streets.geometry.normalize().duplicated()].copy()
 887 | 
 888 |     for _ in range(2, n_loops + 1):
 889 |         # Identify artifacts based on the first loop network
 890 |         artifacts, _ = get_artifacts(
 891 |             new_streets,
 892 |             threshold=threshold,
 893 |             threshold_fallback=artifact_threshold_fallback,
 894 |             area_threshold_blocks=area_threshold_blocks,
 895 |             isoareal_threshold_blocks=isoareal_threshold_blocks,
 896 |             area_threshold_circles=area_threshold_circles,
 897 |             isoareal_threshold_circles_enclosed=isoareal_threshold_circles_enclosed,
 898 |             isoperimetric_threshold_circles_touching=isoperimetric_threshold_circles_touching,
 899 |             exclusion_mask=exclusion_mask,
 900 |             predicate=predicate,
 901 |         )
 902 | 
 903 |         new_streets = neatify_loop(
 904 |             new_streets,
 905 |             artifacts,
 906 |             max_segment_length=max_segment_length,
 907 |             min_dangle_length=min_dangle_length,
 908 |             clip_limit=clip_limit,
 909 |             simplification_factor=simplification_factor,
 910 |             consolidation_tolerance=consolidation_tolerance,
 911 |             eps=eps,
 912 |         )
 913 | 
 914 |         # This is potentially fixing some minor erroneous edges coming from Voronoi
 915 |         new_streets = induce_nodes(new_streets, eps=eps)
 916 |         new_streets = new_streets[~new_streets.geometry.normalize().duplicated()].copy()
 917 | 
 918 |     return new_streets
 919 | 
 920 | 
 921 | def neatify_loop(
 922 |     streets: gpd.GeoDataFrame,
 923 |     artifacts: gpd.GeoDataFrame,
 924 |     *,
 925 |     max_segment_length: float | int = 1,
 926 |     min_dangle_length: float | int = 20,
 927 |     clip_limit: float | int = 2,
 928 |     simplification_factor: float | int = 2,
 929 |     consolidation_tolerance: float | int = 10,
 930 |     eps: float = 1e-4,
 931 | ) -> gpd.GeoDataFrame:
 932 |     """Perform an iteration of the simplification procedure which includes:
 933 |         1. Removal of false nodes
 934 |         2. Artifact classification
 935 |         3. Simplifying artifacts:
 936 |             - Single artifacts
 937 |             - Pairs of artifacts
 938 |             - Clusters of artifacts
 939 | 
 940 |     Parameters
 941 |     ----------
 942 |     streets : geopandas.GeoDataFrame
 943 |         Raw street network data.
 944 |     artifacts : geopandas.GeoDataFrame
 945 |         Face artifact polygons.
 946 |     max_segment_length : float | int = 1
 947 |         Additional vertices will be added so that all line segments
 948 |         are no longer than this value. Must be greater than 0.
 949 |         Used in multiple internal geometric operations.
 950 |     min_dangle_length : float | int = 20
 951 |         The threshold for determining if linestrings are dangling slivers to be
 952 |         removed or not.
 953 |     clip_limit : float | int = 2
 954 |         Following generation of the Voronoi linework, we clip to fit inside the
 955 |         polygon. To ensure we get a space to make proper topological connections
 956 |         from the linework to the actual points on the edge of the polygon, we clip
 957 |         using a polygon with a negative buffer of ``clip_limit`` or the radius of
 958 |         maximum inscribed circle, whichever is smaller.
 959 |     simplification_factor : float | int = 2
 960 |         The factor by which singles, pairs, and clusters are simplified. The
 961 |         ``max_segment_length`` is multiplied by this factor to get the
 962 |         simplification epsilon.
 963 |     consolidation_tolerance : float | int = 10
 964 |         Tolerance passed to node consolidation when generating Voronoi skeletons.
 965 |     eps : float = 1e-4
 966 |         Tolerance epsilon used in multiple internal geometric operations.
 967 | 
 968 |     Returns
 969 |     -------
 970 |     geopandas.GeoDataFrame
 971 |         The street network line data following 1 iteration of simplification.
 972 |     """
 973 | 
 974 |     # Remove edges fully within the artifact (dangles).
 975 |     _, r_idx = streets.sindex.query(artifacts.geometry, predicate="contains")
 976 |     # Dropping may lead to new false nodes – drop those
 977 |     streets = remove_interstitial_nodes(streets.drop(streets.index[r_idx]))
 978 | 
 979 |     # Filter singleton artifacts
 980 |     rook = graph.Graph.build_contiguity(artifacts, rook=True)
 981 | 
 982 |     # Keep only those artifacts which occur as isolates,
 983 |     # e.g. artifacts that are not part of a larger intersection
 984 |     singles = artifacts.loc[artifacts.index.intersection(rook.isolates)].copy()
 985 | 
 986 |     # Filter doubles
 987 |     artifacts["comp"] = rook.component_labels
 988 |     counts = artifacts["comp"].value_counts()
 989 |     doubles = artifacts.loc[artifacts["comp"].isin(counts[counts == 2].index)].copy()
 990 | 
 991 |     # Filter clusters
 992 |     clusters = artifacts.loc[artifacts["comp"].isin(counts[counts > 2].index)].copy()
 993 | 
 994 |     if not singles.empty:
 995 |         # NOTE: this drops attributes
 996 |         streets = neatify_singletons(
 997 |             singles,
 998 |             streets,
 999 |             max_segment_length=max_segment_length,
1000 |             simplification_factor=simplification_factor,
1001 |             consolidation_tolerance=consolidation_tolerance,
1002 |         )
1003 |     if not doubles.empty:
1004 |         streets = neatify_pairs(
1005 |             doubles,
1006 |             streets,
1007 |             max_segment_length=max_segment_length,
1008 |             min_dangle_length=min_dangle_length,
1009 |             clip_limit=clip_limit,
1010 |             simplification_factor=simplification_factor,
1011 |             consolidation_tolerance=consolidation_tolerance,
1012 |         )
1013 |     if not clusters.empty:
1014 |         streets = neatify_clusters(
1015 |             clusters,
1016 |             streets,
1017 |             max_segment_length=max_segment_length,
1018 |             simplification_factor=simplification_factor,
1019 |             eps=eps,
1020 |             min_dangle_length=min_dangle_length,
1021 |             consolidation_tolerance=consolidation_tolerance,
1022 |         )
1023 | 
1024 |     if "coins_group" in streets.columns:
1025 |         streets = streets.drop(
1026 |             columns=[c for c in streets.columns if c.startswith("coins_")]
1027 |         )
1028 |     return streets
1029 | 


--------------------------------------------------------------------------------
/neatnet/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | import pathlib
  2 | import platform
  3 | import warnings
  4 | 
  5 | import geopandas.testing
  6 | import matplotlib.pyplot
  7 | import numpy
  8 | import pandas
  9 | import pytest
 10 | import shapely
 11 | 
 12 | import neatnet
 13 | 
 14 | # set the global exception raiser for testing & debugging
 15 | # See gh#121
 16 | neatnet.simplify.DEBUGGING = False
 17 | 
 18 | line_collection = (  # type: ignore[valid-type, misc]
 19 |     list[shapely.LineString]
 20 |     | tuple[shapely.LineString]
 21 |     | numpy.ndarray
 22 |     | pandas.Series
 23 |     | geopandas.GeoSeries
 24 | )
 25 | 
 26 | geometry_collection = (
 27 |     list[shapely.GeometryCollection]
 28 |     | tuple[shapely.GeometryCollection]
 29 |     | numpy.ndarray
 30 |     | pandas.Series
 31 |     | geopandas.GeoSeries
 32 | )
 33 | 
 34 | 
 35 | ####################################################
 36 | # see:
 37 | #   - gh#106
 38 | #   - gh#102
 39 | #   - gh#77
 40 | #   - gh#75
 41 | #   - gh#74
 42 | KNOWN_BAD_GEOMS = {
 43 |     "aleppo_1133": [],
 44 |     "auckland_869": [1412],
 45 |     "bucaramanga_4617": [],
 46 |     "douala_809": [],
 47 |     "liege_1656": [921],
 48 |     "slc_4881": [1144, 1146],
 49 |     "wuhan_8989": [],
 50 |     "apalachicola_standard": [324],
 51 |     "apalachicola_exclusion_mask": [],
 52 | }
 53 | ####################################################
 54 | 
 55 | 
 56 | def polygonize(
 57 |     collection: line_collection,  # type: ignore[valid-type]
 58 |     as_geom: bool = True,  # type: ignore[valid-type]
 59 | ) -> shapely.Polygon | geopandas.GeoSeries:
 60 |     """Testing helper -- Create polygon from collection of lines."""
 61 |     if isinstance(collection, pandas.Series | geopandas.GeoSeries):
 62 |         _poly = geopandas.GeoSeries(collection).polygonize()
 63 |         if as_geom:
 64 |             return _poly.squeeze()
 65 |         else:
 66 |             return _poly
 67 |     else:
 68 |         return shapely.polygonize(collection).buffer(0)
 69 | 
 70 | 
 71 | def is_geopandas(collection: geometry_collection) -> bool:  # type: ignore[valid-type]
 72 |     return isinstance(collection, geopandas.GeoSeries | geopandas.GeoDataFrame)
 73 | 
 74 | 
 75 | def geom_test(
 76 |     collection1: geometry_collection,  # type: ignore[valid-type]
 77 |     collection2: geometry_collection,  # type: ignore[valid-type]
 78 |     tolerance: float = 1e-1,
 79 |     aoi: None | str = None,
 80 | ) -> bool:
 81 |     """Testing helper -- geometry verification."""
 82 | 
 83 |     if not is_geopandas(collection1):
 84 |         collection1 = geopandas.GeoSeries(collection1)
 85 | 
 86 |     if not is_geopandas(collection2):
 87 |         collection2 = geopandas.GeoSeries(collection2)
 88 | 
 89 |     geoms1 = collection1.geometry.normalize()  # type: ignore[attr-defined]
 90 |     geoms2 = collection2.geometry.normalize()  # type: ignore[attr-defined]
 91 | 
 92 |     if aoi and aoi.startswith("apalachicola"):
 93 |         # Varied index order across OSs.
 94 |         # See [https://github.com/uscuni/neatnet/pull/104#issuecomment-2495572388]
 95 |         geoms1 = geoms1.sort_values().reset_index(drop=True)
 96 |         geoms2 = geoms2.sort_values().reset_index(drop=True)
 97 | 
 98 |     try:
 99 |         assert shapely.equals_exact(geoms1, geoms2, tolerance=tolerance).all()
100 |     except AssertionError:
101 |         unexpected_bad = {}
102 |         for ix in geoms1.index:
103 |             g1 = geoms1.loc[ix]
104 |             g2 = geoms2.loc[ix]
105 |             if (
106 |                 not shapely.equals_exact(g1, g2, tolerance=tolerance)
107 |                 and ix not in KNOWN_BAD_GEOMS[aoi]  # type: ignore[index]
108 |             ):
109 |                 unexpected_bad[ix] = {
110 |                     "n_coords": {
111 |                         "g1": shapely.get_coordinates(g1).shape[0],
112 |                         "g2": shapely.get_coordinates(g2).shape[0],
113 |                     },
114 |                     "length": {"g1": g1.length, "g2": g2.length},
115 |                 }
116 |         if unexpected_bad:
117 |             raise AssertionError(
118 |                 f"Problem in '{aoi}' – check locs: {unexpected_bad}"
119 |             ) from None
120 |     return True
121 | 
122 | 
123 | def difference_plot(
124 |     aoi: str,
125 |     writedir: pathlib.Path,
126 |     known: geopandas.GeoDataFrame,
127 |     observed: geopandas.GeoDataFrame,
128 |     diff_buff: int = 50,
129 | ):
130 |     """Plot difference locations observed simplified in relation to known simplified."""
131 | 
132 |     crs = known.crs
133 | 
134 |     # unioned multilinestring of each - known & observed
135 |     known = geopandas.GeoDataFrame(geometry=[known.union_all()], crs=crs)
136 |     observed = geopandas.GeoDataFrame(geometry=[observed.union_all()], crs=crs)
137 | 
138 |     # unioned difference of k-o + o-k
139 |     known_observed_diff = known.difference(observed)
140 |     observed_known_diff = observed.difference(known)
141 |     differences = geopandas.GeoDataFrame(
142 |         geometry=[
143 |             pandas.concat([known_observed_diff, observed_known_diff])
144 |             .explode()
145 |             .union_all()
146 |         ],
147 |         crs=crs,
148 |     )
149 | 
150 |     # plot difference locations in relation to known
151 |     base = known.plot(figsize=(15, 15), zorder=2, alpha=0.4, ec="k", lw=0.5)
152 |     with warnings.catch_warnings():
153 |         # See GL#188
154 |         warnings.filterwarnings(
155 |             "ignore",
156 |             message="The GeoSeries you are attempting to plot",
157 |             category=UserWarning,
158 |         )
159 |         differences.buffer(diff_buff).plot(ax=base, zorder=1, fc="r", alpha=0.6)
160 |     base.set_title(f"known vs. observed differences - {aoi}")
161 |     matplotlib.pyplot.savefig(writedir / f"{aoi}.png", dpi=300, bbox_inches="tight")
162 | 
163 | 
164 | def pytest_addoption(parser):
165 |     """Add custom command line arguments."""
166 | 
167 |     # flag for determining CI environment
168 |     parser.addoption(
169 |         "--env_type",
170 |         action="store",
171 |         default="latest",
172 |         help="Testing environment type label",
173 |         type=str,
174 |     )
175 | 
176 | 
177 | def pytest_configure(config):  # noqa: ARG001
178 |     """PyTest session attributes, methods, etc."""
179 | 
180 |     valid_env_types = ["oldest", "latest", "dev"]
181 |     pytest.env_type = config.getoption("env_type").split("_")[-1]
182 |     assert pytest.env_type in valid_env_types
183 | 
184 |     pytest.ubuntu = "ubuntu" in platform.version().lower()
185 | 
186 |     pytest.polygonize = polygonize
187 |     pytest.geom_test = geom_test
188 |     pytest.difference_plot = difference_plot
189 | 


--------------------------------------------------------------------------------
/neatnet/tests/data/apalachicola_original.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/neatnet/tests/data/apalachicola_original.parquet


--------------------------------------------------------------------------------
/neatnet/tests/data/apalachicola_simplified_exclusion_mask.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/neatnet/tests/data/apalachicola_simplified_exclusion_mask.parquet


--------------------------------------------------------------------------------
/neatnet/tests/data/apalachicola_simplified_standard.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uscuni/neatnet/3d9118d5b3df77a6fce24c18e425b2e49fefa4e4/neatnet/tests/data/apalachicola_simplified_standard.parquet


--------------------------------------------------------------------------------
/neatnet/tests/test_artifacts.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | 
 3 | import geopandas
 4 | import pytest
 5 | import shapely
 6 | 
 7 | import neatnet
 8 | 
 9 | 
10 | def test_get_artifacts_error():
11 |     path = pathlib.Path("neatnet", "tests", "data", "apalachicola_original.parquet")
12 |     with pytest.raises(  # noqa: SIM117
13 |         ValueError,
14 |         match=(
15 |             "No threshold for artifact detection found. Pass explicit "
16 |             "`threshold` or `threshold_fallback` to provide the value directly."
17 |         ),
18 |     ):
19 |         with pytest.warns(
20 |             UserWarning,
21 |             match=(
22 |                 "Input streets could not not be polygonized. "
23 |                 "Identification of face artifacts not possible."
24 |             ),
25 |         ):
26 |             neatnet.artifacts.get_artifacts(geopandas.read_parquet(path).iloc[:3])
27 | 
28 | 
29 | def test_FaceArtifacts():  # noqa: N802
30 |     osmnx = pytest.importorskip("osmnx")
31 |     type_filter = (
32 |         '["highway"~"living_street|motorway|motorway_link|pedestrian|primary'
33 |         "|primary_link|residential|secondary|secondary_link|service|tertiary"
34 |         '|tertiary_link|trunk|trunk_link|unclassified|service"]'
35 |     )
36 |     streets_graph = osmnx.graph_from_point(
37 |         (35.7798, -78.6421),
38 |         dist=1000,
39 |         network_type="all_private",
40 |         custom_filter=type_filter,
41 |         retain_all=True,
42 |         simplify=False,
43 |     )
44 |     streets_graph = osmnx.projection.project_graph(streets_graph)
45 |     gdf = osmnx.graph_to_gdfs(
46 |         osmnx.convert.to_undirected(streets_graph),
47 |         nodes=False,
48 |         edges=True,
49 |         node_geometry=False,
50 |         fill_edge_geometry=True,
51 |     )
52 |     fa = neatnet.FaceArtifacts(gdf)
53 |     assert 6 < fa.threshold < 9
54 |     assert isinstance(fa.face_artifacts, geopandas.GeoDataFrame)
55 |     assert fa.face_artifacts.shape[0] > 200
56 |     assert fa.face_artifacts.shape[1] == 2
57 | 
58 |     with pytest.warns(UserWarning, match="No threshold found"):
59 |         neatnet.FaceArtifacts(gdf.cx[712104:713000, 3961073:3961500])
60 | 
61 |     fa_ipq = neatnet.FaceArtifacts(gdf, index="isoperimetric_quotient")
62 |     assert 6 < fa_ipq.threshold < 9
63 |     assert fa_ipq.threshold != fa.threshold
64 | 
65 |     fa_dia = neatnet.FaceArtifacts(gdf, index="diameter_ratio")
66 |     assert 6 < fa_dia.threshold < 9
67 |     assert fa_dia.threshold != fa.threshold
68 | 
69 |     fa = neatnet.FaceArtifacts(gdf, index="isoperimetric_quotient")
70 |     assert 6 < fa.threshold < 9
71 | 
72 |     with pytest.raises(ValueError, match="'banana' is not supported"):
73 |         neatnet.FaceArtifacts(gdf, index="banana")
74 | 
75 |     p1, p2, p3, p4 = (
76 |         shapely.Point(1, 0),
77 |         shapely.Point(2, 0),
78 |         shapely.Point(3, 0),
79 |         shapely.Point(2, 1),
80 |     )
81 |     inverted_t = [
82 |         shapely.LineString((p1, p2)),
83 |         shapely.LineString((p2, p3)),
84 |         shapely.LineString((p2, p4)),
85 |     ]
86 | 
87 |     with pytest.warns(
88 |         UserWarning,
89 |         match=(
90 |             "Input streets could not not be polygonized. "
91 |             "Identification of face artifacts not possible."
92 |         ),
93 |     ):
94 |         neatnet.FaceArtifacts(geopandas.GeoDataFrame(geometry=inverted_t))
95 | 


--------------------------------------------------------------------------------
/neatnet/tests/test_continuity.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | 
  3 | import geopandas.testing
  4 | import momepy
  5 | import pandas
  6 | import pytest
  7 | import shapely
  8 | 
  9 | import neatnet
 10 | 
 11 | 
 12 | @pytest.fixture
 13 | def streets() -> geopandas.GeoDataFrame:
 14 |     """Toy set of 'streets' for testing only."""
 15 |     inita = 2
 16 |     final = 8
 17 |     grid = list(range(inita, final))
 18 |     vert_points = list(zip(grid[:-1], grid[1:], strict=True))
 19 |     hori_points = [(j, i) for i, j in vert_points]
 20 |     vert_lines = [
 21 |         shapely.LineString(i)
 22 |         for i in list(zip(hori_points[:-1], vert_points[1:], strict=True))
 23 |     ]
 24 |     hori_lines = [
 25 |         shapely.LineString(i)
 26 |         for i in list(zip(vert_points[:-1], hori_points[1:], strict=True))
 27 |     ]
 28 |     return geopandas.GeoDataFrame(
 29 |         geometry=(
 30 |             vert_lines
 31 |             + hori_lines
 32 |             + [
 33 |                 shapely.LineString(((4, 5), (3, 6))),
 34 |                 shapely.LineString(((3, 6), (4, 4))),
 35 |                 shapely.LineString(((6, 3), (5, 4))),
 36 |                 shapely.LineString(((3, 6), (3, 4))),
 37 |                 shapely.LineString(((5, 5), (6, 6))),
 38 |                 shapely.LineString(((6, 7), (7, 7))),
 39 |                 shapely.LineString(((7, 6), (7, 7))),
 40 |             ]
 41 |         )
 42 |     )
 43 | 
 44 | 
 45 | def test_continuity(streets):
 46 |     observed_continuity, observed_coins = neatnet.continuity.continuity(streets)
 47 | 
 48 |     assert isinstance(observed_continuity, geopandas.GeoDataFrame)
 49 |     known_continuity = (
 50 |         geopandas.GeoDataFrame(
 51 |             pandas.read_csv(
 52 |                 io.StringIO(
 53 |                     "geometry	coins_group	coins_end	coins_len	coins_count\n"
 54 |                     "LINESTRING (3 2, 3 4)	0	True	4.0	2\n"
 55 |                     "LINESTRING (4 3, 4 5)	1	True	4.0	2\n"
 56 |                     "LINESTRING (5 4, 5 6)	2	True	11.414213562373096	7\n"
 57 |                     "LINESTRING (6 5, 6 7)	2	False	11.414213562373096	7\n"
 58 |                     "LINESTRING (2 3, 4 3)	1	True	4.0	2\n"
 59 |                     "LINESTRING (3 4, 5 4)	3	True	3.414213562373095	2\n"
 60 |                     "LINESTRING (4 5, 6 5)	2	False	11.414213562373096	7\n"
 61 |                     "LINESTRING (5 6, 7 6)	2	False	11.414213562373096	7\n"
 62 |                     "LINESTRING (4 5, 3 6)	2	True	11.414213562373096	7\n"
 63 |                     "LINESTRING (3 6, 4 4)	4	True	2.23606797749979	1\n"
 64 |                     "LINESTRING (6 3, 5 4)	3	True	3.414213562373095	2\n"
 65 |                     "LINESTRING (3 6, 3 4)	0	True	4.0	2\n"
 66 |                     "LINESTRING (5 5, 6 6)	5	True	1.4142135623730951	1\n"
 67 |                     "LINESTRING (6 7, 7 7)	2	False	11.414213562373096	7\n"
 68 |                     "LINESTRING (7 6, 7 7)	2	False	11.414213562373096	7\n"
 69 |                 ),
 70 |                 sep="\t",
 71 |             )
 72 |         )
 73 |         .pipe(lambda df: df.assign(**{"geometry": shapely.from_wkt(df["geometry"])}))
 74 |         .set_geometry("geometry")
 75 |     )
 76 |     geopandas.testing.assert_geodataframe_equal(observed_continuity, known_continuity)
 77 | 
 78 |     assert isinstance(observed_coins, momepy.COINS)
 79 |     assert observed_coins.already_merged
 80 |     assert observed_coins.merging_list == [
 81 |         [0, 11],
 82 |         [1, 4],
 83 |         [2, 3, 6, 7, 8, 13, 14],
 84 |         [5, 10],
 85 |         [9],
 86 |         [12],
 87 |     ]
 88 |     assert len(observed_coins.angle_pairs) == 36
 89 | 
 90 | 
 91 | def test_get_stroke_info(streets):
 92 |     known_strokes = [0, 0, 2, 1, 1, 1, 2]
 93 |     known_c_ = [0, 0, 0, 0, 0, 1, 0]
 94 |     known_e_ = [0, 0, 1, 0, 0, 0, 1]
 95 |     known_s_ = [0, 0, 1, 1, 1, 0, 1]
 96 | 
 97 |     observed = neatnet.continuity.get_stroke_info(
 98 |         neatnet.artifacts.get_artifacts(streets, threshold=1)[0],
 99 |         neatnet.continuity.continuity(streets.copy())[0],
100 |     )
101 | 
102 |     observed_strokes = observed[0]
103 |     observed_c_ = observed[1]
104 |     observed_e_ = observed[2]
105 |     observed_s_ = observed[3]
106 | 
107 |     assert observed_strokes == known_strokes
108 |     assert observed_c_ == known_c_
109 |     assert observed_e_ == known_e_
110 |     assert observed_s_ == known_s_
111 | 


--------------------------------------------------------------------------------
/neatnet/tests/test_gaps.py:
--------------------------------------------------------------------------------
 1 | import geopandas as gpd
 2 | import pytest
 3 | from shapely.geometry import LineString
 4 | 
 5 | import neatnet
 6 | 
 7 | 
 8 | def test_close_gaps():
 9 |     l1 = LineString([(1, 0), (2, 1)])
10 |     l2 = LineString([(2.1, 1), (3, 2)])
11 |     l3 = LineString([(3.1, 2), (4, 0)])
12 |     l4 = LineString([(4.1, 0), (5, 0)])
13 |     l5 = LineString([(5.1, 0), (6, 0)])
14 |     df = gpd.GeoDataFrame(geometry=[l1, l2, l3, l4, l5])
15 | 
16 |     closed = neatnet.close_gaps(df, 0.25)
17 |     assert len(closed) == len(df)
18 | 
19 |     merged = neatnet.remove_interstitial_nodes(closed)
20 |     assert len(merged) == 1
21 |     assert merged.length[0] == pytest.approx(7.0502, rel=1e-3)
22 | 
23 | 
24 | def test_extend_lines():
25 |     l1 = LineString([(1, 0), (1.9, 0)])
26 |     l2 = LineString([(2.1, -1), (2.1, 1)])
27 |     l3 = LineString([(2, 1.1), (3, 1.1)])
28 |     gdf = gpd.GeoDataFrame([1, 2, 3], geometry=[l1, l2, l3])
29 | 
30 |     ext1 = neatnet.extend_lines(gdf, 2)
31 |     assert ext1.length.sum() > gdf.length.sum()
32 |     assert ext1.length.sum() == pytest.approx(4.2, rel=1e-3)
33 | 
34 |     target = gpd.GeoSeries([l2.centroid.buffer(3)])
35 |     ext2 = neatnet.extend_lines(gdf, 3, target=target)
36 | 
37 |     assert ext2.length.sum() > gdf.length.sum()
38 |     assert ext2.length.sum() == pytest.approx(17.3776, rel=1e-3)
39 | 
40 |     barrier = LineString([(2, -1), (2, 1)])
41 |     ext3 = neatnet.extend_lines(gdf, 2, barrier=gpd.GeoSeries([barrier]))
42 | 
43 |     assert ext3.length.sum() > gdf.length.sum()
44 |     assert ext3.length.sum() == pytest.approx(4, rel=1e-3)
45 | 
46 |     ext4 = neatnet.extend_lines(gdf, 2, extension=1)
47 |     assert ext4.length.sum() > gdf.length.sum()
48 |     assert ext4.length.sum() == pytest.approx(10.2, rel=1e-3)
49 | 
50 |     gdf = gpd.GeoDataFrame([1, 2, 3, 4], geometry=[l1, l2, l3, barrier])
51 |     ext5 = neatnet.extend_lines(gdf, 2)
52 |     assert ext5.length.sum() > gdf.length.sum()
53 |     assert ext5.length.sum() == pytest.approx(6.2, rel=1e-3)
54 | 


--------------------------------------------------------------------------------
/neatnet/tests/test_geometry.py:
--------------------------------------------------------------------------------
  1 | import geopandas.testing
  2 | import numpy
  3 | import pandas
  4 | import pytest
  5 | import shapely
  6 | 
  7 | import neatnet
  8 | 
  9 | 
 10 | class TestIsWithin:
 11 |     def setup_method(self):
 12 |         self.polygon = shapely.Polygon(((0, 0), (10, 0), (10, 10), (0, 10), (0, 0)))
 13 | 
 14 |     def test_within_fully(self):
 15 |         line = shapely.LineString(((2, 2), (8, 8)))
 16 | 
 17 |         known = True
 18 |         observed = neatnet.geometry._is_within(line, self.polygon)
 19 | 
 20 |         assert known == observed
 21 | 
 22 |     def test_within_tol(self):
 23 |         line = shapely.LineString(((2, 2), (2, 10.0001)))
 24 | 
 25 |         known = True
 26 |         observed = neatnet.geometry._is_within(line, self.polygon)
 27 | 
 28 |         assert known == observed
 29 | 
 30 |     def test_not_within_tol(self):
 31 |         line = shapely.LineString(((2, 2), (2, 10.001)))
 32 | 
 33 |         known = False
 34 |         observed = neatnet.geometry._is_within(line, self.polygon)
 35 | 
 36 |         assert known == observed
 37 | 
 38 |     def test_within_tol_strict(self):
 39 |         line = shapely.LineString(((2, 2), (2, 10.0000001)))
 40 | 
 41 |         known = True
 42 |         observed = neatnet.geometry._is_within(line, self.polygon, rtol=1e-7)
 43 | 
 44 |         assert known == observed
 45 | 
 46 |     def test_not_within_tol_strict(self):
 47 |         line = shapely.LineString(((2, 2), (2, 10.000001)))
 48 | 
 49 |         known = False
 50 |         observed = neatnet.geometry._is_within(line, self.polygon, rtol=1e-7)
 51 | 
 52 |         assert known == observed
 53 | 
 54 |     def test_within_tol_relaxed(self):
 55 |         line = shapely.LineString(((2, 2), (2, 11)))
 56 | 
 57 |         known = True
 58 |         observed = neatnet.geometry._is_within(line, self.polygon, rtol=1)
 59 | 
 60 |         assert known == observed
 61 | 
 62 |     def test_not_within_tol_relaxed(self):
 63 |         line = shapely.LineString(((2, 2), (2, 12)))
 64 | 
 65 |         known = False
 66 |         observed = neatnet.geometry._is_within(line, self.polygon, rtol=1)
 67 | 
 68 |         assert known == observed
 69 | 
 70 |     def test_not_within(self):
 71 |         line = shapely.LineString(((11, 11), (12, 12)))
 72 | 
 73 |         known = False
 74 |         observed = neatnet.geometry._is_within(line, self.polygon)
 75 | 
 76 |         assert known == observed
 77 | 
 78 | 
 79 | class TestAngleBetween2Lines:
 80 |     def setup_method(self):
 81 |         self.line1 = shapely.LineString(((0, 0), (1, 0)))
 82 |         self.line2 = shapely.LineString(((1, 0), (1, 1)))
 83 |         self.line3 = shapely.LineString(((0, 0), (0, 1)))
 84 |         self.line4 = shapely.LineString(((0, 1), (1, 1)))
 85 | 
 86 |     def test_q1(self):
 87 |         known = 90.0
 88 |         observed = neatnet.geometry.angle_between_two_lines(self.line1, self.line3)
 89 |         assert observed == known
 90 | 
 91 |     def test_q2(self):
 92 |         known = 90.0
 93 |         observed = neatnet.geometry.angle_between_two_lines(self.line1, self.line2)
 94 |         assert observed == known
 95 | 
 96 |     def test_q3(self):
 97 |         known = 90.0
 98 |         observed = neatnet.geometry.angle_between_two_lines(self.line2, self.line4)
 99 |         assert observed == known
100 | 
101 |     def test_q4(self):
102 |         known = 90.0
103 |         observed = neatnet.geometry.angle_between_two_lines(self.line3, self.line4)
104 |         assert observed == known
105 | 
106 |     def test_indistinct(self):
107 |         known = 0.0
108 |         with pytest.warns(
109 |             UserWarning,
110 |             match="Input lines are identical - must be distinct. Returning 0.0.",
111 |         ):
112 |             observed = neatnet.geometry.angle_between_two_lines(self.line1, self.line1)
113 |         assert observed == known
114 | 
115 |     def test_not_adjacent(self):
116 |         known = 0.0
117 |         with pytest.warns(
118 |             UserWarning, match="Input lines do not share a vertex. Returning 0.0."
119 |         ):
120 |             observed = neatnet.geometry.angle_between_two_lines(self.line1, self.line4)
121 |         assert observed == known
122 | 
123 | 
124 | voronoi_skeleton_params = pytest.mark.parametrize(
125 |     "lines_type,as_poly,buffer",
126 |     [
127 |         (list, False, None),
128 |         (list, True, 0.001),
129 |         (numpy.array, False, 0.01),
130 |         (numpy.array, True, 0.1),
131 |         (pandas.Series, False, 1),
132 |         (pandas.Series, True, 2.0),
133 |         (geopandas.GeoSeries, False, 5),
134 |         (geopandas.GeoSeries, True, 10.314),
135 |     ],
136 | )
137 | 
138 | 
139 | class TestVoronoiSkeleton:
140 |     def setup_method(self):
141 |         self.square = [
142 |             shapely.LineString(((0, 0), (1000, 0))),
143 |             shapely.LineString(((1000, 0), (1000, 1000))),
144 |             shapely.LineString(((0, 0), (0, 1000))),
145 |             shapely.LineString(((0, 1000), (1000, 1000))),
146 |         ]
147 |         self.known_square_skeleton_edges = numpy.array(
148 |             [
149 |                 shapely.LineString(((1000, 0), (998, 2), (500, 500))),
150 |                 shapely.LineString(((0, 0), (2, 2), (500, 500))),
151 |                 shapely.LineString(((1000, 1000), (998, 998), (500, 500))),
152 |                 shapely.LineString(((0, 1000), (2, 998), (500, 500))),
153 |             ]
154 |         )
155 |         self.known_square_skeleton_splits = [shapely.Point(0, 0)]
156 |         self.known_square_skeleton_splits_snap_to = [
157 |             shapely.Point(1000, 0),
158 |             shapely.Point(0, 0),
159 |             shapely.Point(0, 1000),
160 |             shapely.Point(1000, 1000),
161 |         ]
162 | 
163 |     @voronoi_skeleton_params
164 |     def test_square(self, lines_type, as_poly, buffer):
165 |         known_edges = self.known_square_skeleton_edges
166 |         known_splits = self.known_square_skeleton_splits
167 | 
168 |         lines = lines_type(self.square)
169 |         poly = pytest.polygonize(lines) if as_poly else None
170 |         observed_edges, observed_splits = neatnet.geometry.voronoi_skeleton(
171 |             lines,
172 |             poly=poly,
173 |             buffer=buffer,
174 |         )
175 | 
176 |         pytest.geom_test(observed_edges, known_edges)
177 |         pytest.geom_test(observed_splits, known_splits)
178 | 
179 |     @voronoi_skeleton_params
180 |     def test_square_snap_to(self, lines_type, as_poly, buffer):
181 |         known_edges = self.known_square_skeleton_edges
182 |         known_splits = self.known_square_skeleton_splits_snap_to
183 | 
184 |         lines = lines_type(self.square)
185 |         poly = pytest.polygonize(lines) if as_poly else None
186 |         observed_edges, observed_splits = neatnet.geometry.voronoi_skeleton(
187 |             lines,
188 |             poly=poly,
189 |             buffer=buffer,
190 |             snap_to=(
191 |                 pytest.polygonize(geopandas.GeoSeries(lines), as_geom=False)
192 |                 .extract_unique_points()
193 |                 .explode()
194 |             ),
195 |         )
196 | 
197 |         pytest.geom_test(observed_edges, known_edges)
198 |         pytest.geom_test(observed_splits, known_splits)
199 | 
200 | 
201 | line_100_900 = shapely.LineString(((1000, 1000), (1000, 9000)))
202 | line_100_120 = shapely.LineString(((1000, 1020), (1020, 1020)))
203 | lines_100_900_100_120 = shapely.MultiLineString((line_100_900, line_100_120))
204 | line_110_900 = shapely.LineString(((1000, 9000), (1100, 9000)))
205 | 
206 | 
207 | def test_remove_sliver():
208 |     known = line_100_900
209 |     observed = neatnet.geometry._remove_sliver(lines_100_900_100_120)
210 |     assert observed == known
211 | 
212 | 
213 | def test_as_parts():
214 |     known = numpy.array([line_100_900, line_100_120, line_110_900])
215 |     observed = neatnet.geometry._as_parts(
216 |         numpy.array([lines_100_900_100_120, line_110_900])
217 |     )
218 |     numpy.testing.assert_array_equal(observed, known)
219 | 
220 | 
221 | @pytest.mark.parametrize("tolerance", [0.1, 1, 10, 100, 1_000, 10_000, 100_000])
222 | def test_consolidate(tolerance):
223 |     known = numpy.array([line_100_900, line_100_120, line_110_900])
224 |     observed = neatnet.geometry._consolidate(
225 |         numpy.array([line_100_900, line_100_120, line_110_900]), tolerance
226 |     )
227 |     numpy.testing.assert_array_equal(observed, known)
228 | 
229 | 
230 | def test_prep_components():
231 |     line1 = shapely.LineString(((1, 1), (1, 2)))
232 |     line2 = shapely.LineString(((1, 2), (2, 2)))
233 |     line3 = shapely.LineString(((3, 0), (3, 3)))
234 | 
235 |     known_labels = pandas.Series(
236 |         [0, 0, 1],
237 |         index=pandas.Index([0, 1, 2], name="focal"),
238 |         name="component labels",
239 |         dtype=numpy.int32,
240 |     )
241 |     known_counts = pandas.Series(
242 |         [2, 1],
243 |         index=pandas.Index([0, 1], name="component labels", dtype=numpy.int32),
244 |         name="count",
245 |         dtype=numpy.int64,
246 |     )
247 |     known_comps = geopandas.GeoDataFrame(
248 |         geometry=[
249 |             shapely.MultiLineString(
250 |                 (
251 |                     shapely.LineString(((1, 1), (1, 2))),
252 |                     shapely.LineString(((1, 2), (2, 2))),
253 |                 )
254 |             ),
255 |             shapely.LineString(((3, 0), (3, 3))),
256 |         ],
257 |         index=pandas.Index([0, 1], name="component labels", dtype=numpy.int32),
258 |     )
259 | 
260 |     observed_labels, observed_counts, observed_comps = (
261 |         neatnet.geometry._prep_components([line1, line2, line3])
262 |     )
263 | 
264 |     pandas.testing.assert_series_equal(observed_labels, known_labels)
265 |     pandas.testing.assert_series_equal(observed_counts, known_counts)
266 |     geopandas.testing.assert_geodataframe_equal(observed_comps, known_comps)
267 | 
268 | 
269 | def test_split_add():
270 |     _x = 1100
271 |     x1, y1 = _x, 0
272 |     x2, y2 = _x, 1000
273 |     sl = shapely.LineString(((x1, y1), (x2, y2)))
274 |     known_splits = [shapely.Point((x2, y2))]
275 |     known_adds = [sl]
276 |     observed_splits, observed_adds = neatnet.geometry._split_add(sl, [], [])
277 |     assert observed_splits == known_splits
278 |     assert observed_adds == known_adds
279 | 
280 | 
281 | class TestSnapToTargets:
282 |     def setup_method(self):
283 |         # edgelines
284 |         line1 = shapely.LineString(((100, 100), (1000, 100)))
285 |         line2 = shapely.LineString(((1000, 100), (1000, 1000)))
286 |         line3 = shapely.LineString(((100, 100), (100, 1000)))
287 |         line4 = shapely.LineString(((100, 1000), (1000, 1000)))
288 |         self.lines = [line1, line2, line3, line4]
289 | 
290 |         # poly
291 |         self.poly = shapely.polygonize(self.lines).buffer(0)
292 | 
293 |         # snap_to
294 |         self.snap_to_1 = (
295 |             geopandas.GeoSeries(self.lines)
296 |             .polygonize()
297 |             .extract_unique_points()
298 |             .explode()
299 |         )
300 | 
301 |     def test_warn(self):
302 |         with pytest.warns(
303 |             UserWarning,
304 |             match=(
305 |                 "Could not create a connection as it would "
306 |                 "lead outside of the artifact."
307 |             ),
308 |         ):
309 |             neatnet.geometry.snap_to_targets(
310 |                 self.lines,
311 |                 self.poly,
312 |                 snap_to=self.snap_to_1,
313 |             )
314 | 
315 |     def test_secondary(self):
316 |         known = ([None], [None])
317 | 
318 |         line1_b = shapely.LineString(((500, 500), (1500, 500)))
319 |         line2_b = shapely.LineString(((1500, 500), (1500, 1500)))
320 |         line3_b = shapely.LineString(((500, 500), (500, 1500)))
321 |         line4_b = shapely.LineString(((500, 1500), (1500, 1500)))
322 |         lines_b = [line1_b, line2_b, line3_b, line4_b]
323 |         snap_to_2 = (
324 |             geopandas.GeoSeries(lines_b).polygonize().extract_unique_points().explode()
325 |         )
326 | 
327 |         observed = neatnet.geometry.snap_to_targets(
328 |             self.lines + lines_b,
329 |             self.poly,
330 |             snap_to=self.snap_to_1,
331 |             secondary_snap_to=snap_to_2,
332 |         )
333 | 
334 |         assert observed == known
335 | 


--------------------------------------------------------------------------------
/neatnet/tests/test_simplify.py:
--------------------------------------------------------------------------------
  1 | import pathlib
  2 | 
  3 | import geopandas
  4 | import momepy
  5 | import numpy
  6 | import pytest
  7 | import shapely
  8 | from pandas.testing import assert_frame_equal, assert_series_equal
  9 | 
 10 | import neatnet
 11 | 
 12 | test_data = pathlib.Path("neatnet", "tests", "data")
 13 | full_fua_data = pathlib.Path("data")
 14 | 
 15 | ci_artifacts = pathlib.Path("ci_artifacts")
 16 | 
 17 | 
 18 | AC = "apalachicola"
 19 | AC_STREETS = geopandas.read_parquet(test_data / f"{AC}_original.parquet")
 20 | AC_EXCLUSION_MASK = geopandas.GeoSeries(
 21 |     [
 22 |         shapely.Polygon(
 23 |             (
 24 |                 (-9461361.807208396, 3469029.2708674935),
 25 |                 (-9461009.046874022, 3469029.2708674935),
 26 |                 (-9461009.046874022, 3469240.1785251377),
 27 |                 (-9461361.807208396, 3469240.1785251377),
 28 |                 (-9461361.807208396, 3469029.2708674935),
 29 |             )
 30 |         ),
 31 |         shapely.Polygon(
 32 |             (
 33 |                 (-9461429.266819818, 3469157.7482423405),
 34 |                 (-9461361.807208396, 3469157.7482423405),
 35 |                 (-9461361.807208396, 3469240.1785251377),
 36 |                 (-9461429.266819818, 3469240.1785251377),
 37 |                 (-9461429.266819818, 3469157.7482423405),
 38 |             )
 39 |         ),
 40 |     ],
 41 |     crs=AC_STREETS.crs,
 42 | )
 43 | 
 44 | 
 45 | @pytest.mark.parametrize(
 46 |     "scenario,tol,known_length",
 47 |     [
 48 |         ("standard", 1.5, 64566.0),
 49 |         ("exclusion_mask", 1.05, 65765.0),
 50 |     ],
 51 | )
 52 | def test_neatify_small(scenario, tol, known_length):
 53 |     original = AC_STREETS.copy()
 54 | 
 55 |     known = geopandas.read_parquet(test_data / f"{AC}_simplified_{scenario}.parquet")
 56 |     exclusion_mask = AC_EXCLUSION_MASK.copy() if scenario == "exclusion_mask" else None
 57 | 
 58 |     observed = neatnet.neatify(original, exclusion_mask=exclusion_mask)
 59 |     observed_length = observed.geometry.length.sum()
 60 | 
 61 |     # storing GH artifacts
 62 |     artifact_dir = ci_artifacts / AC
 63 |     artifact_dir.mkdir(parents=True, exist_ok=True)
 64 |     observed.to_parquet(artifact_dir / f"simplified_{scenario}.parquet")
 65 | 
 66 |     assert pytest.approx(observed_length, rel=0.0001) == known_length
 67 |     assert observed.index.dtype == numpy.dtype("int64")
 68 | 
 69 |     assert observed.shape == known.shape
 70 |     assert_series_equal(known["_status"], observed["_status"])
 71 |     assert_frame_equal(
 72 |         known.drop(columns=["_status", "geometry"]),
 73 |         observed.drop(columns=["_status", "geometry"]),
 74 |     )
 75 | 
 76 |     pytest.geom_test(known, observed, tolerance=tol, aoi=f"{AC}_{scenario}")
 77 | 
 78 | 
 79 | @pytest.mark.parametrize(
 80 |     "aoi,tol,known_length",
 81 |     [
 82 |         ("aleppo_1133", 0.2, 4_361_625),
 83 |         ("auckland_869", 0.3, 1_268_048),
 84 |         ("bucaramanga_4617", 0.2, 1_681_011),
 85 |         ("douala_809", 0.1, 2_961_364),
 86 |         ("liege_1656", 0.3, 2_350_782),
 87 |         ("slc_4881", 0.3, 1_762_456),
 88 |     ],
 89 | )
 90 | def test_neatify_full_fua(aoi, tol, known_length):
 91 |     known = geopandas.read_parquet(full_fua_data / aoi / "simplified.parquet")
 92 |     observed = neatnet.neatify(
 93 |         geopandas.read_parquet(full_fua_data / aoi / "original.parquet")
 94 |     )
 95 |     observed_length = observed.geometry.length.sum()
 96 |     assert "highway" in observed.columns
 97 | 
 98 |     # storing GH artifacts
 99 |     artifact_dir = ci_artifacts / aoi
100 |     artifact_dir.mkdir(parents=True, exist_ok=True)
101 |     observed.to_parquet(artifact_dir / "simplified.parquet")
102 |     pytest.difference_plot(aoi, artifact_dir, known, observed)
103 | 
104 |     assert pytest.approx(observed_length, rel=0.0001) == known_length
105 |     assert observed.index.dtype == numpy.dtype("int64")
106 | 
107 |     if pytest.ubuntu and pytest.env_type != "oldest":
108 |         assert_series_equal(known["_status"], observed["_status"])
109 |         assert_frame_equal(
110 |             known.drop(columns=["_status", "geometry"]),
111 |             observed.drop(columns=["_status", "geometry"]),
112 |         )
113 |         pytest.geom_test(known, observed, tolerance=tol, aoi=aoi)
114 | 
115 | 
116 | @pytest.mark.wuhan
117 | def test_neatify_wuhan(aoi="wuhan_8989", tol=0.3, known_length=4_702_861):
118 |     known = geopandas.read_parquet(full_fua_data / aoi / "simplified.parquet")
119 |     observed = neatnet.neatify(
120 |         geopandas.read_parquet(full_fua_data / aoi / "original.parquet")
121 |     )
122 |     observed_length = observed.geometry.length.sum()
123 |     assert "highway" in observed.columns
124 | 
125 |     # storing GH artifacts
126 |     artifact_dir = ci_artifacts / aoi
127 |     artifact_dir.mkdir(parents=True, exist_ok=True)
128 |     observed.to_parquet(artifact_dir / "simplified.parquet")
129 |     pytest.difference_plot(aoi, artifact_dir, known, observed)
130 | 
131 |     assert pytest.approx(observed_length, rel=0.0001) == known_length
132 |     assert observed.index.dtype == numpy.dtype("int64")
133 | 
134 |     if pytest.ubuntu and pytest.env_type != "oldest":
135 |         assert_series_equal(known["_status"], observed["_status"])
136 |         assert_frame_equal(
137 |             known.drop(columns=["_status", "geometry"]),
138 |             observed.drop(columns=["_status", "geometry"]),
139 |         )
140 |         pytest.geom_test(known, observed, tolerance=tol, aoi=aoi)
141 | 
142 | 
143 | def test_neatify_fallback():
144 |     streets = geopandas.read_file(momepy.datasets.get_path("bubenec"), layer="streets")
145 |     with pytest.warns(UserWarning, match="No threshold for artifact"):
146 |         simple = neatnet.neatify(streets)
147 |         # only topology is fixed
148 |         assert simple.shape == (31, 2)
149 | 
150 | 
151 | class TestCheckCRS:
152 |     def test_projected_street_matching_mask(self):
153 |         assert neatnet.simplify._check_input_crs(AC_STREETS, AC_EXCLUSION_MASK) is None
154 | 
155 |     def test_projected_street_no_mask(self):
156 |         assert neatnet.simplify._check_input_crs(AC_STREETS, None) is None
157 | 
158 |     def test_projected_street_mismatch_mask(self):
159 |         with pytest.raises(
160 |             ValueError,
161 |             match=(
162 |                 "The input `streets` and `exclusion_mask` data are in "
163 |                 "different coordinate reference systems. Reproject and rerun."
164 |             ),
165 |         ):
166 |             neatnet.simplify._check_input_crs(
167 |                 AC_STREETS, AC_EXCLUSION_MASK.to_crs(4326)
168 |             )
169 | 
170 |     def test_no_crs_street_no_mask(self):
171 |         with pytest.warns(
172 |             UserWarning,
173 |             match=(
174 |                 "The input `streets` data does not have an assigned "
175 |                 "coordinate reference system. Assuming a projected CRS in meters."
176 |             ),
177 |         ):
178 |             neatnet.simplify._check_input_crs(
179 |                 AC_STREETS.set_crs(None, allow_override=True), None
180 |             )
181 | 
182 |     def test_projected_street_feet(self):
183 |         with pytest.warns(
184 |             UserWarning,
185 |             match=(
186 |                 "The input `streets` data coordinate reference system is projected "
187 |                 "but not in meters. All `neatnet` defaults assume meters. "
188 |                 "Either reproject and rerun or proceed with caution."
189 |             ),
190 |         ):
191 |             neatnet.simplify._check_input_crs(AC_STREETS.to_crs(6441), None)
192 | 
193 |     def test_geographic_street(self):
194 |         with pytest.raises(
195 |             ValueError,
196 |             match=(
197 |                 "The input `streets` data are not in a projected "
198 |                 "coordinate reference system. Reproject and rerun."
199 |             ),
200 |         ):
201 |             neatnet.simplify._check_input_crs(AC_STREETS.to_crs(4326), None)
202 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["setuptools>=61.0", "setuptools_scm[toml]>=6.2"]
  3 | build-backend = "setuptools.build_meta"
  4 | 
  5 | [tool.setuptools_scm]
  6 | 
  7 | [project]
  8 | name = "neatnet"
  9 | dynamic = ["version"]
 10 | authors = [
 11 |    { name = "Martin Fleischmann", email = "martin@martinfleischmann.net" },
 12 |    { name = "Anastassia Vybornova", email = "anvy@itu.dk" },
 13 |    { name = "James D. Gaboardi", email = "jgaboardi@gmail.com" },
 14 | ]
 15 | 
 16 | license = { text = "BSD 3-Clause" }
 17 | description = "Street geometry processing toolkit"
 18 | keywords = [""]
 19 | readme = "README.md"
 20 | classifiers = [
 21 |     "Programming Language :: Python :: 3",
 22 |     "License :: OSI Approved :: BSD License",
 23 |     "Operating System :: OS Independent",
 24 |     "Intended Audience :: Science/Research",
 25 |     "Topic :: Scientific/Engineering :: GIS",
 26 | ]
 27 | requires-python = ">=3.11"
 28 | dependencies = [
 29 |     "esda>=2.6.0",
 30 |     "geopandas>=1.0.1",
 31 |     "libpysal>=4.12.1",
 32 |     "momepy>=0.9.0",
 33 |     "networkx>=3.3",
 34 |     "numpy>=2",
 35 |     "pandas>=2.2.3",
 36 |     "scipy>=1.14.1",
 37 |     "shapely>=2.0.6",
 38 |     "scikit-learn>=1.2.0",
 39 | ]
 40 | 
 41 | [project.urls]
 42 | Home = "https://github.com/uscuni/"
 43 | Repository = "https://github.com/uscuni/neatnet"
 44 | 
 45 | [project.optional-dependencies]
 46 | tests = [
 47 |     "codecov",
 48 |     "coverage",
 49 |     "matplotlib",
 50 |     "mypy>=1.15.0,<2",
 51 |     "pre-commit",
 52 |     "pyarrow>=17.0",
 53 |     "pytest",
 54 |     "pytest-cov",
 55 |     "pytest-xdist",
 56 |     "ruff",
 57 |     "yamllint",
 58 | ]
 59 | 
 60 | docs = [
 61 |     "ipykernel",
 62 |     "ipywidgets",
 63 |     "jupyterlab",
 64 |     "myst_nb",
 65 |     "numpydoc",
 66 |     "sphinx",
 67 |     "sphinxcontrib-bibtex",
 68 |     "sphinx_autosummary_accessors",
 69 |     "sphinx_book_theme",
 70 |     "sphinx_copybutton",
 71 | ]
 72 | all = ["neatnet[tests,docs]"]
 73 | 
 74 | 
 75 | [tool.setuptools.packages.find]
 76 | include = ["neatnet", "neatnet.*"]
 77 | 
 78 | [tool.ruff]
 79 | line-length = 88
 80 | extend-include = ["*.ipynb"]
 81 | 
 82 | [tool.ruff.lint]
 83 | select = ["E", "F", "W", "I", "UP", "N", "B", "A", "C4", "SIM", "ARG"]
 84 | 
 85 | [tool.ruff.lint.per-file-ignores]
 86 |  "*__init__.py" = [
 87 |      "F401",  # imported but unused
 88 |      "F403",  # star import; unable to detect undefined names
 89 |  ]
 90 | 
 91 | [tool.coverage.run]
 92 | source = ["./neatnet"]
 93 | 
 94 | [tool.coverage.report]
 95 | exclude_lines = [
 96 |     "if self.debug:",
 97 |     "pragma: no cover",
 98 |     "raise NotImplementedError",
 99 |     "except ModuleNotFoundError:",
100 |     "except ImportError",
101 | ]
102 | ignore_errors = true
103 | omit = ["neatnet/tests/*"]
104 | 
105 | [tool.pytest.ini_options]
106 | filterwarnings = [
107 |     # this is an internal warning thrown within ``neatnet.geometry.snap_to_targets()``
108 |     'ignore:Could not create a connection*:UserWarning',
109 | ]
110 | markers = [
111 |     'wuhan:Wuhan takes ages to run'
112 | ]
113 | [tool.pixi.project]
114 | channels = ["conda-forge"]
115 | platforms = ["linux-64", "osx-arm64", "osx-64", "win-64"]
116 | 
117 | [tool.pixi.dependencies]
118 | python = "3.13.*"
119 | esda = "*"
120 | geopandas = "*"
121 | libpysal = "*"
122 | momepy = "*"
123 | networkx = "*"
124 | numpy = "*"
125 | osmnx = "*"
126 | pandas = "*"
127 | pyarrow = "*"
128 | pyogrio = "*"
129 | scipy = "*"
130 | shapely = "*"
131 | mypy = ">=1.15.0,<2"
132 | pre-commit = "*"
133 | pytest = "*"
134 | pytest-cov = "*"
135 | pytest-xdist = "*"
136 | ruff = "*"
137 | yamllint = "*"
138 | 
139 | [tool.pixi.pypi-dependencies]
140 | neatnet = { path = ".", editable = true }
141 | 
142 | [tool.pixi.environments]
143 | default = { solve-group = "default" }
144 | tests = { features = ["tests", "docs"], solve-group = "default" }
145 | 
146 | [tool.pixi.tasks]
147 | 


--------------------------------------------------------------------------------