├── .gitignore ├── .vscode └── settings.json ├── Example use.ipynb ├── LICENSE ├── README.md ├── buildingregulariser ├── __init__.py ├── __version__.py ├── coordinator.py ├── geometry_utils.py ├── neighbor_alignment.py └── regularization.py ├── examples ├── 1.png └── 2.png ├── pyproject.toml ├── test data └── input │ ├── test_data.gpkg │ └── test_data_multi_geom.gpkg ├── tests └── test_end_to_end.py └── uv.lock /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 116 | .pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ 169 | 170 | # Ruff stuff: 171 | .ruff_cache/ 172 | 173 | # PyPI configuration file 174 | .pypirc 175 | private test data/* 176 | *.gpkg-shm 177 | *.gpkg-wal 178 | *__pycache__* 179 | buildingregulariser.egg-info/* 180 | .DS_Store 181 | test data/input/old/* 182 | test data/output/* -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "cSpell.words": [ 3 | "arctan", 4 | "chunksize", 5 | "fromiter", 6 | "geodataframe", 7 | "ndarray", 8 | "reproject", 9 | "reprojection", 10 | "segmentize", 11 | "segs", 12 | "sindex" 13 | ] 14 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 DPIRD-DMA 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Building Regulariser 2 | 3 | A Python library for regularizing building footprints in geospatial data. This library helps clean up and standardize building polygon geometries by aligning edges to principal directions. Built as an open source alternative to the [ArcGIS Regularize Building Footprint (3D Analyst) tool](https://pro.arcgis.com/en/pro-app/latest/tool-reference/3d-analyst/regularize-building-footprint.htm). 4 | 5 | [![Python](https://img.shields.io/badge/Python-3.9%2B-blue)]() 6 | [![License](https://img.shields.io/badge/License-MIT-green)]() 7 | 8 | ## Example Results 9 | 10 | Before and after regularization: 11 | 12 |
13 | Example 1: Before and After Regularization 14 | Example 2: Before and After Regularization 15 |
16 | 17 | ## Try in Colab 18 | 19 | [![Colab_Button]][Link] 20 | 21 | [Link]: https://colab.research.google.com/drive/1xeFxpQCAybgbNjmopiHZb7_Tz1lv8k6A?usp=sharing 'Try Building Regulariser In Colab' 22 | 23 | [Colab_Button]: https://img.shields.io/badge/Try%20in%20Colab-grey?style=for-the-badge&logo=google-colab 24 | 25 | ## Overview 26 | 27 | Building footprints extracted from remote sensing imagery often contain noise, irregular edges, and geometric inconsistencies. This library provides tools to regularize these footprints by: 28 | 29 | - Aligning edges to principal directions (orthogonal and optional 45-degree angles) 30 | - Converting near-rectangular buildings to perfect rectangles 31 | - Converting near-circular buildings to perfect circles 32 | - Simplifying complex polygons while maintaining their essential shape 33 | - Supporting parallel processing for efficient computation with large datasets 34 | - Fine-tune building alignment with neighboring buildings 35 | 36 | Inspired by [RS-building-regularization](https://github.com/niecongchong/RS-building-regularization), this library takes a geometric approach to building regularization with improvements for usability and integration with the GeoPandas ecosystem. 37 | 38 | ## Installation 39 | 40 | ```bash 41 | pip install buildingregulariser 42 | ``` 43 | or 44 | ```bash 45 | conda install conda-forge::buildingregulariser 46 | ``` 47 | or 48 | ```bash 49 | uv add buildingregulariser 50 | ``` 51 | ## Quick Start 52 | 53 | ```python 54 | import geopandas as gpd 55 | from buildingregulariser import regularize_geodataframe 56 | 57 | # Load your building footprints 58 | buildings = gpd.read_file("buildings.gpkg") 59 | 60 | # Regularize the building footprints 61 | regularized_buildings = regularize_geodataframe( 62 | buildings, 63 | ) 64 | 65 | # Save the results 66 | regularized_buildings.to_file("regularized_buildings.gpkg") 67 | ``` 68 | 69 | ## Features 70 | 71 | - **GeoDataFrame Integration**: Works seamlessly with GeoPandas GeoDataFrames 72 | - **Polygon Regularization**: Aligns edges to principal directions 73 | - **45-Degree Support**: Optional alignment to 45-degree angles 74 | - **Align with neighboring buildings**: Align each building with neighboring buildings 75 | - **Circle Detection**: Identifies and converts near-circular shapes to perfect circles 76 | - **Edge Simplification**: Reduces the number of vertices while preserving shape 77 | - **Parallel Processing**: Utilizes multiple CPU cores for faster processing of large datasets 78 | 79 | ## Usage Examples 80 | 81 | ### Basic Regularization 82 | 83 | ```python 84 | from buildingregulariser import regularize_geodataframe 85 | import geopandas as gpd 86 | 87 | buildings = gpd.read_file("buildings.gpkg") 88 | regularized = regularize_geodataframe(buildings) 89 | ``` 90 | 91 | ### Fine-tuning Regularization Parameters 92 | 93 | ```python 94 | regularized = regularize_geodataframe( 95 | buildings, 96 | parallel_threshold=2.0, # Higher values allow less edge alignment 97 | simplify_tolerance=0.5, # Controls simplification level, should be 2-3 x the raster pixel size 98 | allow_45_degree=True, # Enable 45-degree angles 99 | allow_circles=True, # Enable circle detection 100 | circle_threshold=0.9 # IOU threshold for circle detection 101 | neighbor_alignment=True, # After regularization try to align each building with neighboring buildings 102 | neighbor_search_distance: float = 100.0, # The search distance around each building to find neighbors 103 | neighbor_max_rotation: float = 10, # The maximum rotation allowed to align with neighbors 104 | ) 105 | ``` 106 | 107 | ## Parameters 108 | 109 | - **geodataframe**: Input GeoDataFrame with polygon geometries 110 | - **parallel_threshold**: Distance threshold for handling parallel lines (default: 1.0) 111 | - **simplify**: If True, applies simplification to the geometry (default: True) 112 | - **simplify_tolerance**: Tolerance for simplification (default: 0.5) 113 | - **allow_45_degree**: If True, allows edges to be oriented at 45-degree angles (default: True) 114 | - **diagonal_threshold_reduction**: Used to reduce the chance of diagonal edges being generated, can be from 0 to 22.5 (default: 15.0) 115 | - **allow_circles**: If True, detects and converts near-circular shapes to perfect circles (default: True) 116 | - **circle_threshold**: Intersection over Union (IoU) threshold for circle detection (default: 0.9) 117 | - **num_cores**: Number of CPU cores to use for parallel processing (default: 1) 118 | - **include_metadata**: Include the main direction, IOU, perimeter and aligned_direction (if used) in output gdf 119 | - **neighbor_alignment**: If True, try to align each building with neighboring buildings (default: False) 120 | - **neighbor_search_distance**: The distance to find neighboring buildings (default: 350.0) 121 | - **neighbor_max_rotation**: The maximum allowable rotation to align with neighbors (default: 10) 122 | 123 | 124 | ## Returns 125 | 126 | - A new GeoDataFrame with regularized polygon geometries 127 | 128 | ## How It Works 129 | 130 | 1. **Edge Analysis**: Analyzes each polygon to identify principal directions 131 | 2. **Edge Orientation**: Aligns edges to be parallel, perpendicular, or at 45 degrees to the main direction 132 | 3. **Circle Detection**: Optionally identifies shapes that are nearly circular and converts them to perfect circles 133 | 4. **Edge Connection**: Ensures proper connectivity between oriented edges 134 | 5. **Angle Enforcement**: Post-processing to ensure target angles are precisely maintained 135 | 6. **Neighbor Alignment**: Optionally align each building with neighboring buildings, via rotation around centroid. 136 | 137 | ## License 138 | 139 | This project is licensed under the MIT License 140 | 141 | ## Acknowledgments 142 | 143 | This library was inspired by the [RS-building-regularization](https://github.com/niecongchong/RS-building-regularization) project, with improvements for integration with the GeoPandas ecosystem and enhanced regularization algorithms. -------------------------------------------------------------------------------- /buildingregulariser/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Polygon Regularization Package 3 | 4 | A package for regularizing polygons by aligning edges to principal directions. 5 | """ 6 | 7 | from .__version__ import __version__ 8 | from .coordinator import regularize_geodataframe 9 | 10 | # Package-wide exports 11 | __all__ = [ 12 | "regularize_geodataframe", 13 | "__version__", 14 | ] 15 | -------------------------------------------------------------------------------- /buildingregulariser/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.2.2" 2 | -------------------------------------------------------------------------------- /buildingregulariser/coordinator.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from multiprocessing import Pool, cpu_count 3 | from typing import Optional, Union 4 | 5 | import geopandas as gpd 6 | import pandas as pd 7 | import pyproj 8 | 9 | from .neighbor_alignment import align_with_neighbor_polygons 10 | from .regularization import regularize_single_polygon 11 | 12 | 13 | def cleanup_geometry( 14 | result_geodataframe: gpd.GeoDataFrame, simplify_tolerance: float 15 | ) -> gpd.GeoDataFrame: 16 | """ 17 | Cleans up geometries in a GeoDataFrame. 18 | 19 | Removes empty geometries, attempts to remove small slivers using buffer 20 | operations, and simplifies geometries to remove redundant vertices. 21 | 22 | Parameters: 23 | ----------- 24 | result_geodataframe : geopandas.GeoDataFrame 25 | GeoDataFrame with geometries to clean. 26 | simplify_tolerance : float 27 | Tolerance used for simplification and determining buffer size 28 | for sliver removal. 29 | 30 | Returns: 31 | -------- 32 | geopandas.GeoDataFrame 33 | GeoDataFrame with cleaned geometries. 34 | """ 35 | # Filter out None results from processing errors 36 | result_geodataframe = result_geodataframe[~result_geodataframe.geometry.is_empty] 37 | result_geodataframe = result_geodataframe[result_geodataframe.geometry.notna()] 38 | 39 | if result_geodataframe.empty: 40 | return result_geodataframe # Return early if GDF is empty 41 | 42 | # Define buffer size based on simplify tolerance 43 | buffer_size = simplify_tolerance / 50 44 | 45 | # Attempt to remove small slivers using a sequence of buffer operations 46 | # Positive buffer -> negative buffer -> positive buffer 47 | result_geodataframe["geometry"] = result_geodataframe.geometry.buffer( 48 | buffer_size, cap_style="square", join_style="mitre" 49 | ) 50 | result_geodataframe["geometry"] = result_geodataframe.geometry.buffer( 51 | buffer_size * -2, cap_style="square", join_style="mitre" 52 | ) 53 | result_geodataframe["geometry"] = result_geodataframe.geometry.buffer( 54 | buffer_size, cap_style="square", join_style="mitre" 55 | ) 56 | 57 | # Remove any geometries that became empty after buffering 58 | result_geodataframe = result_geodataframe[~result_geodataframe.geometry.is_empty] 59 | 60 | if result_geodataframe.empty: 61 | return result_geodataframe # Return early if GDF is empty 62 | 63 | # Simplify to remove collinear vertices introduced by buffering/regularization 64 | # Use a small tolerance related to the buffer size 65 | result_geodataframe["geometry"] = result_geodataframe.geometry.simplify( 66 | tolerance=buffer_size, preserve_topology=True 67 | ) 68 | # Final check for empty geometries after simplification 69 | result_geodataframe = result_geodataframe[~result_geodataframe.geometry.is_empty] 70 | 71 | return result_geodataframe 72 | 73 | 74 | def regularize_geodataframe( 75 | geodataframe: gpd.GeoDataFrame, 76 | parallel_threshold: float = 1.0, 77 | target_crs: Optional[Union[str, pyproj.CRS]] = None, 78 | simplify: bool = True, 79 | simplify_tolerance: float = 0.5, 80 | allow_45_degree: bool = True, 81 | diagonal_threshold_reduction: float = 15, 82 | allow_circles: bool = True, 83 | circle_threshold: float = 0.9, 84 | num_cores: int = 0, 85 | include_metadata: bool = False, 86 | neighbor_alignment: bool = False, 87 | neighbor_search_distance: float = 100.0, 88 | neighbor_max_rotation: float = 10, 89 | ) -> gpd.GeoDataFrame: 90 | """ 91 | Regularizes polygon geometries in a GeoDataFrame by aligning edges. 92 | 93 | Aligns edges to be parallel or perpendicular (optionally also 45 degrees) 94 | to their main direction. Handles reprojection, initial simplification, 95 | regularization, geometry cleanup, and parallel processing. 96 | 97 | Parameters: 98 | ----------- 99 | geodataframe : geopandas.GeoDataFrame 100 | Input GeoDataFrame with polygon or multipolygon geometries. 101 | parallel_threshold : float, optional 102 | Distance threshold for merging nearly parallel adjacent edges during 103 | regularization. Specified in the same units as the input GeoDataFrame's CRS. Defaults to 1.0. 104 | target_crs : str or pyproj.CRS, optional 105 | CRS to reproject the input GeoDataFrame to before regularization. 106 | If None, no reprojection is performed. Defaults to None. 107 | simplify : bool, optional 108 | If True, applies initial simplification to the geometry before 109 | regularization. Defaults to True. 110 | simplify_tolerance : float, optional 111 | Tolerance for the initial simplification step (if `simplify` is True). 112 | Also used for geometry cleanup steps. Specified in the same units as the input GeoDataFrame's CRS. Defaults to 0.5. 113 | allow_45_degree : bool, optional 114 | If True, allows edges to be oriented at 45-degree angles relative 115 | to the main direction during regularization. Defaults to True. 116 | diagonal_threshold_reduction : float, optional 117 | Reduction factor in degrees to reduce the likelihood of diagonal 118 | edges being created. larger values reduce the likelihood of diagonal edges. Possible values are 0 - 22.5 degrees. 119 | Defaults to 15 degrees. 120 | allow_circles : bool, optional 121 | If True, attempts to detect polygons that are nearly circular and 122 | replaces them with perfect circles. Defaults to True. 123 | circle_threshold : float, optional 124 | Intersection over Union (IoU) threshold used for circle detection 125 | (if `allow_circles` is True). Value between 0 and 1. Defaults to 0.9. 126 | num_cores : int, optional 127 | Number of CPU cores to use for parallel processing. If 1, processing 128 | is done sequentially. Defaults to 0 (all available cores). 129 | include_metadata : bool, optional 130 | If True, includes metadata about the regularization process in the 131 | output GeoDataFrame. Defaults to False. 132 | neighbor_alignment : bool, optional 133 | If True, aligns the polygons with their neighbors after regularization. 134 | Defaults to False. 135 | neighbor_search_distance : float, optional 136 | Search radius used to identify neighboring polygons for alignment (if `align_with_neighbors` is True). 137 | Specified in the same units as the input GeoDataFrame's CRS. Defaults to 100.0. 138 | neighbor_max_rotation : float, optional 139 | Direction threshold for aligning with neighbors (if 140 | `align_with_neighbors` is True). Defaults to 10 degrees. 141 | 142 | Returns: 143 | -------- 144 | geopandas.GeoDataFrame 145 | A new GeoDataFrame with regularized polygon geometries. Original 146 | attributes are preserved. Geometries that failed processing might be 147 | dropped. 148 | """ 149 | # Make a copy to avoid modifying the original GeoDataFrame 150 | result_geodataframe = geodataframe.copy() 151 | # Explode the geometries to process them individually 152 | result_geodataframe = result_geodataframe.explode(ignore_index=True) 153 | 154 | if target_crs is not None: 155 | # Reproject to the target CRS if specified 156 | result_geodataframe = result_geodataframe.to_crs(target_crs) 157 | # Split gdf into chunks for parallel processing 158 | # Determine number of jobs 159 | if num_cores <= 0: 160 | num_cores = cpu_count() 161 | 162 | partial_regularize_single_polygon = partial( 163 | regularize_single_polygon, 164 | parallel_threshold=parallel_threshold, 165 | allow_45_degree=allow_45_degree, 166 | diagonal_threshold_reduction=diagonal_threshold_reduction, 167 | allow_circles=allow_circles, 168 | circle_threshold=circle_threshold, 169 | include_metadata=include_metadata, 170 | simplify=simplify, 171 | simplify_tolerance=simplify_tolerance, 172 | ) 173 | 174 | # Sequential processing 175 | if num_cores == 1: 176 | processed_data = [ 177 | partial_regularize_single_polygon(geometry) 178 | for geometry in result_geodataframe["geometry"] 179 | ] 180 | else: 181 | with Pool(num_cores) as p: 182 | processed_data = p.map( 183 | partial_regularize_single_polygon, 184 | result_geodataframe["geometry"], 185 | ) 186 | 187 | results_df = pd.DataFrame(processed_data) 188 | result_geodataframe["geometry"] = results_df["geometry"] 189 | result_geodataframe["iou"] = results_df["iou"] 190 | result_geodataframe["main_direction"] = results_df["main_direction"] 191 | 192 | # Clean up the resulting geometries (remove slivers) 193 | result_geodataframe = cleanup_geometry( 194 | result_geodataframe=result_geodataframe, simplify_tolerance=simplify_tolerance 195 | ) 196 | 197 | # Return result_geodataframe 198 | if neighbor_alignment: 199 | result_geodataframe = align_with_neighbor_polygons( 200 | gdf=result_geodataframe, 201 | buffer_size=neighbor_search_distance, 202 | max_rotation=neighbor_max_rotation, 203 | include_metadata=include_metadata, 204 | num_cores=num_cores, 205 | ) 206 | 207 | if not include_metadata: 208 | # Extract metadata columns from the results DataFrame 209 | try_to_drop_cols = [ 210 | "iou", 211 | "main_direction", 212 | "perimeter", 213 | "aligned_direction", 214 | ] 215 | for col in try_to_drop_cols: 216 | if col in result_geodataframe.columns: 217 | result_geodataframe.drop(columns=col, inplace=True) 218 | 219 | return result_geodataframe 220 | -------------------------------------------------------------------------------- /buildingregulariser/geometry_utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import List, Tuple, Union 3 | 4 | import numpy as np 5 | 6 | 7 | def calculate_distance( 8 | point_1: np.ndarray, 9 | point_2: np.ndarray, 10 | ) -> float: 11 | """ 12 | Calculate Euclidean distance between two points. 13 | 14 | Parameters: 15 | ----------- 16 | point_1 : np.ndarray 17 | First point coordinates 18 | point_2 : np.ndarray 19 | Second point coordinates 20 | 21 | Returns: 22 | -------- 23 | float 24 | Euclidean distance 25 | """ 26 | # return np.sqrt(np.sum(np.power((point_1 - point_2), 2))) 27 | dx, dy = point_1[0] - point_2[0], point_1[1] - point_2[1] 28 | return math.hypot(dx, dy) # fastest for scalar calls 29 | 30 | 31 | def calculate_azimuth_angle(start_point: np.ndarray, end_point: np.ndarray) -> float: 32 | """ 33 | Calculate azimuth angle of the line from start_point to end_point (in degrees). 34 | Angle is measured clockwise from the positive x-axis. 35 | 36 | Parameters: 37 | ----------- 38 | start_point : np.ndarray 39 | Starting point coordinates 40 | end_point : np.ndarray 41 | Ending point coordinates 42 | 43 | Returns: 44 | -------- 45 | float 46 | Angle in degrees in the range [0, 360) 47 | """ 48 | dx = end_point[0] - start_point[0] 49 | dy = end_point[1] - start_point[1] 50 | angle_radians = math.atan2(dy, dx) 51 | angle_degrees = math.degrees(angle_radians) 52 | return angle_degrees % 360 53 | 54 | 55 | def create_line_equation( 56 | point1: np.ndarray, 57 | point2: np.ndarray, 58 | ) -> Tuple[float, float, float]: 59 | """ 60 | Create a line equation in the form Ax + By + C = 0 61 | 62 | Parameters: 63 | ----------- 64 | point1, point2 : np.ndarray 65 | Two points defining the line 66 | 67 | Returns: 68 | -------- 69 | tuple 70 | Coefficients (A, B, C) where Ax + By + C = 0 71 | """ 72 | A = point1[1] - point2[1] 73 | B = point2[0] - point1[0] 74 | C = point1[0] * point2[1] - point2[0] * point1[1] 75 | return A, B, -C 76 | 77 | 78 | def calculate_line_intersection( 79 | line1: Tuple[float, float, float], 80 | line2: Tuple[float, float, float], 81 | ) -> Union[Tuple[float, float], None]: 82 | """ 83 | Calculate intersection point of two lines 84 | 85 | Parameters: 86 | ----------- 87 | line1, line2 : tuple 88 | Line coefficients (A, B, C) where Ax + By + C = 0 89 | 90 | Returns: 91 | -------- 92 | tuple or None 93 | Coordinates of intersection point or None if lines are parallel 94 | """ 95 | D = line1[0] * line2[1] - line1[1] * line2[0] 96 | Dx = line1[2] * line2[1] - line1[1] * line2[2] 97 | Dy = line1[0] * line2[2] - line1[2] * line2[0] 98 | if D != 0: 99 | x = Dx / D 100 | y = Dy / D 101 | return x, y 102 | else: 103 | return None 104 | 105 | 106 | def calculate_parallel_line_distance( 107 | line1: Tuple[float, float, float], 108 | line2: Tuple[float, float, float], 109 | ) -> float: 110 | """ 111 | Calculate the distance between two parallel lines 112 | 113 | Parameters: 114 | ----------- 115 | line1, line2 : tuple 116 | Line coefficients (A, B, C) where Ax + By + C = 0 117 | 118 | Returns: 119 | -------- 120 | float 121 | Distance between lines 122 | """ 123 | A1, _, C1 = line1 124 | A2, B2, C2 = line2 125 | eps = 1e-10 126 | 127 | # Normalize equations to the form: x + (B/A)y + (C/A) = 0 128 | new_C1 = C1 / (A1 + eps) 129 | new_A2 = 1 130 | new_B2 = B2 / (A2 + eps) 131 | new_C2 = C2 / (A2 + eps) 132 | 133 | # Calculate distance using the formula for parallel lines 134 | distance = abs(new_C1 - new_C2) / math.sqrt(new_A2 * new_A2 + new_B2 * new_B2) 135 | return distance 136 | 137 | 138 | def project_point_to_line( 139 | point_x: float, 140 | point_y: float, 141 | line_x1: float, 142 | line_y1: float, 143 | line_x2: float, 144 | line_y2: float, 145 | ) -> Tuple[float, float]: 146 | """ 147 | Project a point onto a line. 148 | 149 | Parameters: 150 | ----------- 151 | point_x, point_y : float 152 | Coordinates of the point to project 153 | line_x1, line_y1, line_x2, line_y2 : float 154 | Coordinates of two points defining the line 155 | 156 | Returns: 157 | -------- 158 | Tuple[float, float] 159 | Coordinates of the projected point 160 | """ 161 | eps = 1e-10 162 | dx = line_x2 - line_x1 163 | dy = line_y2 - line_y1 164 | denom = dx * dx + dy * dy + eps 165 | 166 | x = ( 167 | point_x * dx * dx 168 | + point_y * dy * dx 169 | + (line_x1 * line_y2 - line_x2 * line_y1) * dy 170 | ) / denom 171 | 172 | y = ( 173 | point_x * dx * dy 174 | + point_y * dy * dy 175 | + (line_x2 * line_y1 - line_x1 * line_y2) * dx 176 | ) / denom 177 | 178 | return (x, y) 179 | 180 | 181 | def rotate_point( 182 | point: np.ndarray, 183 | center: np.ndarray, 184 | angle_degrees: float, 185 | ) -> Tuple[float, float]: 186 | """ 187 | Rotate a point clockwise around a center point 188 | 189 | Parameters: 190 | ----------- 191 | point : np.ndarray 192 | Point to rotate 193 | center : np.ndarray 194 | Center of rotation 195 | angle_degrees : float 196 | Rotation angle in degrees 197 | 198 | Returns: 199 | -------- 200 | tuple 201 | Rotated point coordinates 202 | """ 203 | x, y = point 204 | center_x, center_y = center 205 | angle_radians = math.radians(angle_degrees) 206 | 207 | # Translate point to origin 208 | translated_x = x - center_x 209 | translated_y = y - center_y 210 | 211 | # Rotate 212 | rotated_x = translated_x * math.cos(angle_radians) + translated_y * math.sin( 213 | angle_radians 214 | ) 215 | rotated_y = translated_y * math.cos(angle_radians) - translated_x * math.sin( 216 | angle_radians 217 | ) 218 | 219 | # Translate back 220 | final_x = rotated_x + center_x 221 | final_y = rotated_y + center_y 222 | 223 | return (final_x, final_y) 224 | 225 | 226 | def rotate_edge( 227 | start_point: np.ndarray, end_point: np.ndarray, rotation_angle: float 228 | ) -> List[np.ndarray]: 229 | """ 230 | Rotate an edge around its midpoint by the given angle 231 | 232 | Parameters: 233 | ----------- 234 | start_point : numpy.ndarray 235 | Start point of the edge 236 | end_point : numpy.ndarray 237 | End point of the edge 238 | rotation_angle : float 239 | Angle to rotate by in degrees 240 | 241 | Returns: 242 | -------- 243 | list 244 | List containing the rotated start and end points 245 | """ 246 | midpoint = (start_point + end_point) / 2 247 | 248 | if rotation_angle > 0: 249 | rotated_start = rotate_point(start_point, midpoint, -rotation_angle) 250 | rotated_end = rotate_point(end_point, midpoint, -rotation_angle) 251 | elif rotation_angle < 0: 252 | rotated_start = rotate_point(start_point, midpoint, np.abs(rotation_angle)) 253 | rotated_end = rotate_point(end_point, midpoint, np.abs(rotation_angle)) 254 | else: 255 | rotated_start = start_point 256 | rotated_end = end_point 257 | 258 | return [np.array(rotated_start), np.array(rotated_end)] 259 | -------------------------------------------------------------------------------- /buildingregulariser/neighbor_alignment.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from functools import partial 3 | from multiprocessing import Pool 4 | from typing import Any 5 | 6 | import geopandas as gpd 7 | import pandas as pd 8 | from shapely.affinity import rotate 9 | 10 | 11 | def process_row( 12 | idx: int, 13 | buffer_size: float, 14 | max_rotation: float, 15 | gdf: gpd.GeoDataFrame, 16 | ) -> dict[str, Any]: 17 | """ 18 | Aligns a single polygon's orientation with its neighbours if a dominant direction is detected. 19 | 20 | For a given polygon index in the GeoDataFrame, this function identifies neighbouring polygons 21 | within a specified buffer, aggregates their directional data weighted by perimeter, and, if 22 | conditions are met, rotates the polygon to align with the dominant neighbour direction. 23 | 24 | Parameters: 25 | ----------- 26 | idx : int 27 | Index of the polygon row in the GeoDataFrame. 28 | buffer_size : float 29 | Distance used to define the neighbourhood search area around the polygon. 30 | max_rotation : float 31 | Maximum allowed rotation (in degrees) from the current to the proposed direction. 32 | gdf : gpd.GeoDataFrame 33 | The full GeoDataFrame containing all polygons and required attributes: 34 | - 'geometry': polygon geometry 35 | - 'main_direction': original orientation angle 36 | - 'perimeter': polygon perimeter (used as weight) 37 | 38 | Returns: 39 | -------- 40 | dict 41 | A dictionary with: 42 | - 'idx': the index of the processed row 43 | - 'geometry': original or rotated polygon geometry 44 | - 'aligned_direction': selected direction used for alignment 45 | """ 46 | row = gdf.iloc[idx] 47 | geom = row.geometry 48 | search_geom = geom.buffer(buffer_size) 49 | 50 | # Use spatial index data for filtering 51 | candidate_idx = gdf.sindex.query(search_geom, predicate="intersects") 52 | 53 | # Only do full geometric operations on the candidates 54 | neighbors_data = gdf.iloc[candidate_idx] 55 | 56 | # Combine original and perpendicular directions into one Series 57 | all_directions = pd.concat( 58 | [neighbors_data["main_direction"], 90 - neighbors_data["main_direction"]] 59 | ) 60 | # Calculate weights based on perimeter 61 | all_weights = pd.concat([neighbors_data["perimeter"], neighbors_data["perimeter"]]) 62 | 63 | # Aggregate weights 64 | grouped_weights = all_weights.groupby(all_directions).sum() 65 | 66 | # Convert to defaultdict 67 | direction_weights = defaultdict(float, grouped_weights.to_dict()) 68 | 69 | # Sort directions by their weights (highest first) 70 | sorted_directions = sorted( 71 | direction_weights.items(), key=lambda x: x[1], reverse=True 72 | ) 73 | 74 | # Find the best direction to align with 75 | result = { 76 | "idx": idx, 77 | "geometry": row.geometry, 78 | "aligned_direction": row.main_direction, 79 | } 80 | 81 | for align_dir, _ in sorted_directions[:4]: 82 | direction_delta = row.main_direction - align_dir 83 | if abs(direction_delta) <= max_rotation: 84 | result["aligned_direction"] = align_dir 85 | result["geometry"] = rotate( 86 | row.geometry, -direction_delta, origin="centroid" 87 | ) 88 | break 89 | 90 | return result 91 | 92 | 93 | def align_with_neighbor_polygons( 94 | gdf: gpd.GeoDataFrame, 95 | num_cores: int, 96 | buffer_size: float, 97 | max_rotation: float, 98 | include_metadata: bool, 99 | ) -> gpd.GeoDataFrame: 100 | """ 101 | Aligns the orientation of polygons in a GeoDataFrame based on their neighbors' dominant direction. 102 | 103 | Each polygon is evaluated in parallel. A buffer is used to identify neighboring polygons, 104 | which are then used to infer a dominant direction. If a suitable direction is found within 105 | a defined angular threshold, the polygon is rotated to match it. 106 | 107 | Parameters: 108 | ----------- 109 | gdf : gpd.GeoDataFrame 110 | Input GeoDataFrame with 'geometry' and 'main_direction' columns. 111 | num_cores : int 112 | Number of processes to use for parallel processing 113 | buffer_size : float, default=350.0 114 | Buffer distance for determining neighborhoods. 115 | max_rotation : float, default=10 116 | Maximum rotation angle allowed for alignment (in degrees). 117 | include_metadata : bool, default=False 118 | Whether to retain intermediate columns such as 'aligned_direction' and 'perimeter'. 119 | 120 | Returns: 121 | -------- 122 | gpd.GeoDataFrame 123 | A copy of the original GeoDataFrame with aligned geometries. Intermediate metadata columns 124 | are included only if `include_metadata` is True. 125 | """ 126 | # Create a copy and add necessary columns 127 | gdf = gdf.explode(ignore_index=True).copy() 128 | gdf["aligned_direction"] = gdf["main_direction"].copy() 129 | gdf["perimeter"] = gdf.geometry.length 130 | 131 | # Process in parallel using imap 132 | results = [] 133 | process_row_partial = partial( 134 | process_row, 135 | buffer_size=buffer_size, 136 | max_rotation=max_rotation, 137 | gdf=gdf, 138 | ) 139 | 140 | # Work out chunksize for imap 141 | row_count = len(gdf) 142 | chunksize = min(max(row_count // num_cores, 1), 5000) 143 | 144 | with Pool(processes=num_cores) as pool: 145 | results = pool.map(process_row_partial, range(len(gdf)), chunksize=chunksize) 146 | 147 | # Update the GeoDataFrame with results 148 | for result in results: 149 | idx = result["idx"] 150 | gdf.at[idx, "geometry"] = result["geometry"] 151 | gdf.at[idx, "aligned_direction"] = result["aligned_direction"] 152 | 153 | # Clean up if needed 154 | if not include_metadata: 155 | gdf = gdf.drop(columns=["aligned_direction", "perimeter"]) 156 | 157 | return gdf 158 | -------------------------------------------------------------------------------- /buildingregulariser/regularization.py: -------------------------------------------------------------------------------- 1 | import math 2 | import warnings 3 | from typing import Any, List, Tuple 4 | 5 | import numpy as np 6 | from shapely.geometry import LinearRing, Polygon 7 | 8 | from .geometry_utils import ( 9 | calculate_azimuth_angle, 10 | calculate_distance, 11 | calculate_line_intersection, 12 | calculate_parallel_line_distance, 13 | create_line_equation, 14 | project_point_to_line, 15 | rotate_edge, 16 | rotate_point, 17 | ) 18 | 19 | 20 | def find_nearest_target_angle( 21 | current_azimuth: float, main_direction: float, allow_45_degree: bool 22 | ) -> float: 23 | """ 24 | Finds the closest allowed target azimuth angle (0-360). 25 | """ 26 | # Calculate angular difference relative to main_direction, normalize to [-180, 180] 27 | diff_angle = (current_azimuth - main_direction + 180) % 360 - 180 28 | 29 | # Define potential offsets from the main direction 30 | allowed_offsets = [] 31 | if allow_45_degree: 32 | # Use offsets like 0, 45, 90, 135, 180, -45, -90, -135 33 | # Note: 180 and -180 are equivalent, 225 is -135, 270 is -90, 315 is -45 34 | allowed_offsets = [0.0, 45.0, 90.0, 135.0, 180.0, -45.0, -90.0, -135.0] 35 | else: 36 | # Use offsets 0, 90, 180, -90 (or 270) 37 | allowed_offsets = [0.0, 90.0, 180.0, -90.0] 38 | 39 | # Find the offset that minimizes the absolute difference to diff_angle 40 | best_offset = 0.0 41 | min_angle_dist = 181.0 # Start with a value larger than max possible diff (180) 42 | 43 | for offset in allowed_offsets: 44 | # Calculate the shortest angle between diff_angle and the current offset 45 | d = (diff_angle - offset + 180) % 360 - 180 46 | if abs(d) < min_angle_dist: 47 | min_angle_dist = abs(d) 48 | best_offset = offset 49 | 50 | # Calculate the final target azimuth by adding the best offset to the main direction 51 | # Normalize to [0, 360) 52 | target_azimuth = (main_direction + best_offset + 360) % 360 53 | return target_azimuth 54 | 55 | 56 | def enforce_angles_post_process( 57 | points: List[np.ndarray], 58 | main_direction: int, 59 | allow_45_degree: bool, 60 | angle_tolerance: float = 0.1, 61 | max_iterations: int = 2, 62 | ) -> List[np.ndarray]: 63 | """ 64 | Adjusts vertices iteratively to enforce target angles for each segment. 65 | Runs multiple iterations as adjusting one segment can affect adjacent ones. 66 | 67 | Parameters: 68 | ----------- 69 | points : list[np.ndarray] 70 | List of numpy arrays representing polygon vertices. Assumed NOT closed 71 | (last point != first point). Length N >= 3. 72 | main_direction : float 73 | The main direction angle in degrees (0-360). 74 | allow_45_degree : bool 75 | Whether to allow 45-degree angles. 76 | angle_tolerance : float 77 | Allowable deviation from target angle in degrees. 78 | Default is 0.1 degrees. 79 | max_iterations : int 80 | Maximum number of full passes to adjust angles. 81 | Default is 2 iterations. 82 | 83 | Returns: 84 | -------- 85 | list[np.ndarray] 86 | List of adjusted vertices (N points). 87 | """ 88 | if len(points) < 3: 89 | return points # Not enough points to form segments 90 | 91 | adjusted_points = [p.copy() for p in points] # Work on a copy 92 | num_points = len(adjusted_points) 93 | 94 | for _ in range(max_iterations): 95 | max_angle_diff_this_iter = 0.0 96 | changed = False # Flag to track if any changes were made in this iteration 97 | 98 | for i in range(num_points): 99 | p1_idx = i 100 | p2_idx = (i + 1) % num_points # Wrap around for the end point index 101 | 102 | p1 = adjusted_points[p1_idx] 103 | p2 = adjusted_points[p2_idx] 104 | 105 | # Avoid issues with coincident points before calculating angle 106 | dist = calculate_distance(p1, p2) 107 | if dist < 1e-7: 108 | # Coincident points have undefined angle, skip adjustment for this segment 109 | continue 110 | 111 | current_azimuth = calculate_azimuth_angle(p1, p2) 112 | target_azimuth = find_nearest_target_angle( 113 | current_azimuth, main_direction, allow_45_degree 114 | ) 115 | 116 | # Calculate shortest rotation angle needed (positive for counter-clockwise) 117 | rotation_diff = (target_azimuth - current_azimuth + 180) % 360 - 180 118 | 119 | # Track the maximum deviation found in this iteration 120 | max_angle_diff_this_iter = max(max_angle_diff_this_iter, abs(rotation_diff)) 121 | 122 | # Only rotate if the difference significantly exceeds tolerance 123 | # Use a slightly larger threshold for making changes to prevent jitter 124 | if abs(rotation_diff) > angle_tolerance: 125 | changed = True # Mark that an adjustment was made 126 | 127 | # Perform rotation (rotation_diff > 0 means counter-clockwise) 128 | if rotation_diff > 0: 129 | new_p2_tuple = rotate_point(p2, p1, -rotation_diff) 130 | else: 131 | new_p2_tuple = rotate_point(p2, p1, abs(rotation_diff)) 132 | 133 | # Update the endpoint in the list for the *next* segment's calculation 134 | adjusted_points[p2_idx] = np.array(new_p2_tuple) 135 | 136 | # Check for convergence: If no points were adjusted significantly in this pass, stop. 137 | if not changed: 138 | break 139 | 140 | # Return the list of N adjusted unique points 141 | return adjusted_points 142 | 143 | 144 | def regularize_coordinate_array( 145 | coordinates: np.ndarray, 146 | parallel_threshold: float, 147 | allow_45_degree: bool, 148 | diagonal_threshold_reduction: float, 149 | angle_enforcement_tolerance: float = 0.1, 150 | ) -> Tuple[np.ndarray, float]: 151 | """ 152 | Regularize polygon coordinates by aligning edges to be either parallel 153 | or perpendicular (or 45 deg) to the main direction, with a 154 | post-processing step to enforce angles. 155 | 156 | Parameters: 157 | ----------- 158 | coordinates : numpy.ndarray 159 | Array of coordinates for a polygon ring (shape: n x 2). 160 | Assumed closed (first point == last point). 161 | parallel_threshold : float 162 | Distance threshold for considering parallel lines as needing connection. 163 | allow_45_degree : bool 164 | If True, allows 45-degree orientations relative to the main direction. 165 | diagonal_threshold_reduction : float 166 | Angle in degrees to subtract from the 45-degree snapping thresholds, 167 | making diagonal (45°) orientations less likely. 168 | angle_enforcement_tolerance : float 169 | Maximum allowed deviation (degrees) from target angle in the final output. 170 | Default is 0.1 degrees. 171 | 172 | Returns: 173 | -------- 174 | numpy.ndarray 175 | Regularized coordinates array (n x 2), closed (first == last). 176 | """ 177 | if ( 178 | len(coordinates) < 4 179 | ): # Need at least 3 unique points + closing point for a polygon 180 | warnings.warn("Not enough coordinates to regularize. Returning original.") 181 | return coordinates, 0.0 182 | 183 | # Remove duplicate closing point for processing, if present 184 | if np.allclose(coordinates[0], coordinates[-1]): 185 | processing_coords = coordinates[:-1] 186 | else: 187 | processing_coords = coordinates # Assume it wasn't closed 188 | 189 | if len(processing_coords) < 3: 190 | warnings.warn( 191 | "Not enough unique coordinates to regularize. Returning original." 192 | ) 193 | return coordinates, 0.0 # Return original closed coords 194 | 195 | # Analyze edges to find properties and main direction 196 | # Use the non-closed version for edge analysis 197 | edge_data = analyze_edges(processing_coords) 198 | 199 | # Orient edges based on main direction 200 | oriented_edges, edge_orientations = orient_edges( 201 | processing_coords, 202 | edge_data, 203 | allow_45_degree=allow_45_degree, 204 | diagonal_threshold_reduction=diagonal_threshold_reduction, 205 | ) 206 | 207 | # Connect and regularize edges 208 | # This returns a list of np.ndarray points 209 | initial_regularized_points = connect_regularized_edges( 210 | oriented_edges, edge_orientations, parallel_threshold 211 | ) 212 | 213 | if not initial_regularized_points or len(initial_regularized_points) < 3: 214 | warnings.warn("Regularization resulted in too few points. Returning original.") 215 | # Returning original for safety: 216 | return coordinates, 0.0 217 | 218 | final_regularized_points_list = enforce_angles_post_process( 219 | points=initial_regularized_points, 220 | main_direction=edge_data["main_direction"], 221 | allow_45_degree=allow_45_degree, 222 | angle_tolerance=angle_enforcement_tolerance, 223 | ) 224 | 225 | if not final_regularized_points_list or len(final_regularized_points_list) < 3: 226 | warnings.warn( 227 | "Angle enforcement resulted in too few points. Returning original." 228 | ) 229 | return coordinates, 0.0 230 | 231 | # Convert list of arrays back to a single numpy array and ensure closure 232 | final_coords_array = np.array([p for p in final_regularized_points_list]) 233 | # Ensure the final array is explicitly closed for Shapely 234 | closed_final_coords = np.vstack([final_coords_array, final_coords_array[0]]) 235 | 236 | return closed_final_coords, edge_data["main_direction"] 237 | 238 | 239 | def analyze_edges( 240 | coordinates: np.ndarray, coarse_bin_size: int = 5, fine_bin_size: int = 1 241 | ) -> dict[str, Any]: 242 | """ 243 | Analyze edges to determine azimuth angles and main structural direction. 244 | 245 | Parameters: 246 | ----------- 247 | coordinates : np.ndarray 248 | Polygon coordinates (shape: N x 2), assumed NOT closed. 249 | coarse_bin_size : int 250 | Size of the coarse bin for histogram analysis (degrees). 251 | Default is 5 degrees. 252 | fine_bin_size : int 253 | Size of the fine bin for histogram analysis (degrees). 254 | Default is 1 degree. 255 | 256 | Returns: 257 | -------- 258 | dict 259 | Dictionary containing: 260 | - azimuth_angles: array of absolute edge angles (degrees) 261 | - edge_indices: array of [start_idx, end_idx] pairs for each edge 262 | - main_direction: float angle (degrees) representing dominant structure orientation 263 | """ 264 | if len(coordinates) < 3: 265 | return { 266 | "azimuth_angles": np.array([]), 267 | "edge_indices": np.array([]), 268 | "main_direction": 0, 269 | } 270 | 271 | def create_weighted_histogram( 272 | angles: np.ndarray, 273 | bin_size: float, 274 | weights: np.ndarray, 275 | num_bins_override=None, 276 | smooth: bool = True, 277 | ) -> np.ndarray: 278 | num_bins = ( 279 | int(90 / bin_size) if num_bins_override is None else num_bins_override 280 | ) 281 | indices = np.minimum(np.floor(angles / bin_size).astype(int), num_bins - 1) 282 | bins = np.bincount(indices, weights=weights, minlength=num_bins) 283 | if smooth: 284 | bins = smooth_histogram(bins) 285 | return bins 286 | 287 | def smooth_histogram(hist: np.ndarray) -> np.ndarray: 288 | smoothed = hist.copy() 289 | 290 | # Smooth internal bins 291 | for i in range(1, len(hist) - 1): 292 | smoothed[i] = (2 * hist[i] + hist[i - 1] + hist[i + 1]) / 4 293 | 294 | # Smooth edges differently 295 | smoothed[0] = (2 * hist[0] + hist[1]) / 3 296 | smoothed[-1] = (2 * hist[-1] + hist[-2]) / 3 297 | 298 | return smoothed 299 | 300 | def find_best_symmetric_bin(hist: np.ndarray) -> int: 301 | """ 302 | Find the dominant bin in a histogram, considering symmetry. 303 | 304 | This averages the histogram with its mirror (reverse), 305 | then selects the top two candidates and picks the one with 306 | the largest value in the original histogram. 307 | 308 | Parameters: 309 | ----------- 310 | hist : np.ndarray 311 | Histogram of bin weights. 312 | 313 | Returns: 314 | -------- 315 | int 316 | Index of the selected dominant bin. 317 | """ 318 | mirrored_mean = (hist + hist[::-1]) / 2 319 | sorted_indices = np.argsort(mirrored_mean) 320 | top_two = sorted_indices[-2:] 321 | 322 | a, b = top_two 323 | return a if hist[a] > hist[b] else b 324 | 325 | # Form edges and compute vectors 326 | start_points = coordinates 327 | end_points = np.roll(coordinates, -1, axis=0) 328 | vectors = end_points - start_points 329 | edge_lengths = np.linalg.norm(vectors, axis=1) 330 | 331 | # Filter out very short edges 332 | valid = edge_lengths > 1e-9 333 | if not np.any(valid): 334 | return { 335 | "azimuth_angles": np.array([]), 336 | "edge_indices": np.array([]), 337 | "main_direction": 0, 338 | } 339 | 340 | vectors = vectors[valid] 341 | lengths = edge_lengths[valid] 342 | azimuth_angles = (np.degrees(np.arctan2(vectors[:, 1], vectors[:, 0])) + 360) % 360 343 | normalized_angles = azimuth_angles % 180 344 | orthogonal_angles = normalized_angles % 90 345 | 346 | indices = np.stack( 347 | [ 348 | np.arange(len(coordinates)), 349 | (np.arange(len(coordinates)) + 1) % len(coordinates), 350 | ], 351 | axis=1, 352 | ) 353 | edge_indices = indices[valid] 354 | 355 | coarse_bins = create_weighted_histogram(orthogonal_angles, coarse_bin_size, lengths) 356 | fine_bins = create_weighted_histogram( 357 | orthogonal_angles, fine_bin_size, lengths, num_bins_override=90 358 | ) 359 | 360 | if np.sum(coarse_bins) == 0: 361 | refined_angle = 0 362 | else: 363 | # Step 1: Coarse dominant bin 364 | main_bin = find_best_symmetric_bin(coarse_bins) 365 | fine_start = main_bin * coarse_bin_size 366 | fine_end = fine_start + coarse_bin_size 367 | 368 | # Step 2: Refine with fine bin 369 | refined_bin = find_best_symmetric_bin(fine_bins[fine_start:fine_end]) 370 | 371 | # This will be the center of the refined bin 372 | refined_angle_center = fine_start + refined_bin + fine_bin_size / 2 373 | 374 | # Round the angle up or down based on the bin's neighbors 375 | if refined_bin == 0: 376 | refined_angle = math.floor(refined_angle_center) 377 | elif refined_bin == (fine_end - fine_start - 1): 378 | refined_angle = math.ceil(refined_angle_center) 379 | else: 380 | left = fine_bins[fine_start + refined_bin - 1] 381 | right = fine_bins[fine_start + refined_bin + 1] 382 | if right > left: 383 | refined_angle = math.ceil(refined_angle_center) 384 | else: 385 | refined_angle = math.floor(refined_angle_center) 386 | 387 | return { 388 | "azimuth_angles": azimuth_angles, 389 | "edge_indices": edge_indices, 390 | "main_direction": refined_angle, 391 | } 392 | 393 | 394 | def get_orientation_and_rotation( 395 | diff_angle: float, 396 | main_direction: float, 397 | azimuth: float, 398 | allow_45_degree: bool, 399 | diagonal_threshold_reduction: float, 400 | tolerance: float = 1e-9, 401 | ) -> Tuple[int, float]: 402 | target_offset = 0.0 # The desired angle relative to main_direction (0, 45, 90 etc.) 403 | orientation_code = 0 404 | 405 | if allow_45_degree: 406 | # Calculate how close we are to each of the key orientations 407 | mod180 = diff_angle % 180 408 | 409 | dist_to_0 = min(abs(mod180), abs((mod180) - 180)) 410 | dist_to_90 = min(abs((mod180) - 90), abs((mod180) - 90)) 411 | dist_to_45 = min(abs((mod180) - 45), abs((mod180) - 135)) 412 | 413 | # Apply down-weighting to 45-degree angles 414 | # This effectively shrinks the zone where angles snap to 45 degrees 415 | if dist_to_45 <= (22.5 - diagonal_threshold_reduction): 416 | # Close enough to 45/135/225/315 degrees (accounting for down-weighting) 417 | angle_mod = diff_angle % 90 418 | if angle_mod < 45: 419 | target_offset = (diff_angle // 90) * 90 + 45 420 | else: 421 | target_offset = (diff_angle // 90 + 1) * 90 - 45 422 | 423 | # Determine which diagonal direction we're closer to 424 | # Use modulo 180 to differentiate between 45/225 and 135/315 425 | normalized_angle = (main_direction + target_offset) % 180 426 | if 0 <= normalized_angle < 90: 427 | # This is closer to 45 degrees 428 | orientation_code = 2 # 45/225 degrees 429 | else: 430 | # This is closer to 135 degrees 431 | orientation_code = 3 # 135/315 degrees 432 | elif dist_to_0 <= dist_to_90: 433 | # Closer to 0/180 degrees 434 | target_offset = round(diff_angle / 180.0) * 180.0 435 | orientation_code = 0 436 | else: 437 | # Closer to 90/270 degrees 438 | target_offset = round(diff_angle / 90.0) * 90.0 439 | if abs(target_offset % 180) < tolerance: 440 | # If rounding diff_angle/90 gave 0 or 180, force to 90 or -90 441 | target_offset = 90.0 if diff_angle > 0 else -90.0 442 | orientation_code = 1 443 | 444 | else: # Original logic (refined): Snap only to nearest 90 degrees 445 | if abs(diff_angle) < 45.0: # Closer to parallel/anti-parallel (0 or 180) 446 | # Snap to 0 or 180, whichever is closer 447 | target_offset = round(diff_angle / 180.0) * 180.0 448 | orientation_code = 0 449 | else: # Closer to perpendicular (+90 or -90/270) 450 | # Snap to +90 or -90, whichever is closer 451 | target_offset = round(diff_angle / 90.0) * 90.0 452 | # Ensure it's not actually 0 or 180 (should be handled above, but safety check) 453 | if abs(target_offset % 180) < tolerance: 454 | # If rounding diff_angle/90 gave 0 or 180, force to 90 or -90 455 | target_offset = 90.0 if diff_angle > 0 else -90.0 456 | orientation_code = 1 457 | rotation_angle = (main_direction + target_offset - azimuth + 180) % 360 - 180 458 | return orientation_code, rotation_angle 459 | 460 | 461 | def orient_edges( 462 | simplified_coordinates: np.ndarray, 463 | edge_data: dict, 464 | allow_45_degree: bool, 465 | diagonal_threshold_reduction: float, 466 | ) -> Tuple[np.ndarray, List[int]]: 467 | """ 468 | Orient edges to be parallel or perpendicular (or optionally 45 degrees) 469 | to the main direction determined by angle distribution analysis. 470 | 471 | Parameters: 472 | ----------- 473 | simplified_coordinates : numpy.ndarray 474 | Simplified polygon coordinates (shape: n x 2, assumed closed). 475 | edge_data : dict 476 | Dictionary containing edge analysis data ('azimuth_angles', 'edge_indices', 477 | 'main_direction'). 478 | allow_45_degree : bool, optional 479 | If True, allows edges to be oriented at 45-degree angles relative 480 | to the main direction. 481 | diagonal_threshold_reduction : float, optional 482 | Angle in degrees to subtract from the 45-degree snapping thresholds, 483 | making diagonal (45°) orientations less likely. 484 | 485 | Returns: 486 | -------- 487 | tuple 488 | Tuple containing: 489 | - oriented_edges (numpy.ndarray): Array of [start, end] points for each oriented edge. 490 | - edge_orientations (list): List of orientation codes for each edge. 491 | - 0: Parallel or anti-parallel (0, 180 deg relative to main_direction) 492 | - 1: Perpendicular (90, 270 deg relative to main_direction) 493 | - 2: Diagonal (45, 135, 225, 315 deg relative to main_direction) - only if allow_45=True. 494 | """ 495 | 496 | # edge_data = 497 | oriented_edges = [] 498 | # Orientation codes: 0=Parallel/AntiParallel, 1=Perpendicular, 2=Diagonal(45deg) 499 | edge_orientations = [] 500 | 501 | azimuth_angles = edge_data["azimuth_angles"] 502 | edge_indices = edge_data["edge_indices"] 503 | main_direction = edge_data["main_direction"] 504 | 505 | for i, (azimuth, (start_idx, end_idx)) in enumerate( 506 | zip(azimuth_angles, edge_indices) 507 | ): 508 | # Calculate the shortest angle difference from edge azimuth to main_direction 509 | # Result is in the range [-180, 180] 510 | diff_angle = (azimuth - main_direction + 180) % 360 - 180 511 | 512 | orientation_code, rotation_angle = get_orientation_and_rotation( 513 | diff_angle=diff_angle, 514 | main_direction=main_direction, 515 | azimuth=azimuth, 516 | allow_45_degree=allow_45_degree, 517 | diagonal_threshold_reduction=diagonal_threshold_reduction, 518 | ) 519 | 520 | # Perform rotation 521 | start_point = np.array(simplified_coordinates[start_idx], dtype=float) 522 | end_point = np.array(simplified_coordinates[end_idx], dtype=float) 523 | 524 | # Rotate the edge to align with the target orientation 525 | rotated_edge = rotate_edge(start_point, end_point, rotation_angle) 526 | 527 | oriented_edges.append(rotated_edge) 528 | edge_orientations.append(orientation_code) 529 | 530 | return np.array(oriented_edges, dtype=float), edge_orientations 531 | 532 | 533 | def connect_regularized_edges( 534 | oriented_edges: np.ndarray, edge_orientations: list, parallel_threshold: float 535 | ) -> List[np.ndarray]: 536 | """ 537 | Connect oriented edges to form a regularized polygon 538 | 539 | Parameters: 540 | ----------- 541 | oriented_edges : numpy.ndarray 542 | Array of oriented edges 543 | edge_orientations : list 544 | List of edge orientations (0=parallel, 1=perpendicular) 545 | parallel_threshold : float 546 | Distance threshold for considering parallel lines as needing connection 547 | 548 | Returns: 549 | -------- 550 | list 551 | List of regularized points forming the polygon 552 | """ 553 | regularized_points = [] 554 | 555 | # Process all edges including the connection between last and first edge 556 | for i in range(len(oriented_edges)): 557 | current_index = i 558 | next_index = (i + 1) % len(oriented_edges) # Wrap around to first edge 559 | 560 | current_edge_start = oriented_edges[current_index][0] 561 | current_edge_end = oriented_edges[current_index][1] 562 | next_edge_start = oriented_edges[next_index][0] 563 | next_edge_end = oriented_edges[next_index][1] 564 | 565 | current_orientation = edge_orientations[current_index] 566 | next_orientation = edge_orientations[next_index] 567 | 568 | if current_orientation != next_orientation: 569 | # Handle perpendicular edges 570 | regularized_points.append( 571 | handle_perpendicular_edges( 572 | current_edge_start, current_edge_end, next_edge_start, next_edge_end 573 | ) 574 | ) 575 | else: 576 | # Handle parallel edges 577 | new_points = handle_parallel_edges( 578 | current_edge_start, 579 | current_edge_end, 580 | next_edge_start, 581 | next_edge_end, 582 | parallel_threshold, 583 | next_index, 584 | oriented_edges, 585 | ) 586 | regularized_points.extend(new_points) 587 | 588 | return regularized_points 589 | 590 | 591 | def handle_perpendicular_edges( 592 | current_edge_start: np.ndarray, 593 | current_edge_end: np.ndarray, 594 | next_edge_start: np.ndarray, 595 | next_edge_end: np.ndarray, 596 | ) -> np.ndarray: 597 | """ 598 | Handle intersection of perpendicular edges 599 | 600 | Parameters: 601 | ----------- 602 | current_edge_start : numpy.ndarray 603 | Start point of current edge 604 | current_edge_end : numpy.ndarray 605 | End point of current edge 606 | next_edge_start : numpy.ndarray 607 | Start point of next edge 608 | next_edge_end : numpy.ndarray 609 | End point of next edge 610 | 611 | Returns: 612 | -------- 613 | numpy.ndarray 614 | Intersection point of the two edges 615 | """ 616 | line1 = create_line_equation(current_edge_start, current_edge_end) 617 | line2 = create_line_equation(next_edge_start, next_edge_end) 618 | 619 | intersection_point = calculate_line_intersection(line1, line2) 620 | if intersection_point: 621 | # Convert to numpy array if not already 622 | return np.array(intersection_point) 623 | else: 624 | # If lines are parallel (shouldn't happen with perpendicular check) 625 | # add the end point of current edge 626 | return current_edge_end 627 | 628 | 629 | def handle_parallel_edges( 630 | current_edge_start: np.ndarray, 631 | current_edge_end: np.ndarray, 632 | next_edge_start: np.ndarray, 633 | next_edge_end: np.ndarray, 634 | parallel_threshold: float, 635 | next_index: int, 636 | oriented_edges: np.ndarray, 637 | ) -> List[np.ndarray]: 638 | """ 639 | Handle connection between parallel edges 640 | 641 | Parameters: 642 | ----------- 643 | current_edge_start : numpy.ndarray 644 | Start point of current edge 645 | current_edge_end : numpy.ndarray 646 | End point of current edge 647 | next_edge_start : numpy.ndarray 648 | Start point of next edge 649 | next_edge_end : numpy.ndarray 650 | End point of next edge 651 | parallel_threshold : float 652 | Distance threshold for considering parallel lines as needing connection 653 | next_index : int 654 | Index of the next edge 655 | oriented_edges : numpy.ndarray 656 | Array of all oriented edges 657 | 658 | Returns: 659 | -------- 660 | list 661 | List of points to add to the regularized polygon 662 | """ 663 | line1 = create_line_equation(current_edge_start, current_edge_end) 664 | line2 = create_line_equation(next_edge_start, next_edge_end) 665 | line_distance = calculate_parallel_line_distance(line1, line2) 666 | 667 | new_points = [] 668 | 669 | if line_distance < parallel_threshold: 670 | # Shift next edge to align with current edge 671 | projected_point = project_point_to_line( 672 | next_edge_start[0], 673 | next_edge_start[1], 674 | current_edge_start[0], 675 | current_edge_start[1], 676 | current_edge_end[0], 677 | current_edge_end[1], 678 | ) 679 | # Ensure projected_point is a numpy array 680 | new_points.append(np.array(projected_point)) 681 | 682 | # Update next edge starting point 683 | oriented_edges[next_index][0] = np.array(projected_point) 684 | oriented_edges[next_index][1] = np.array( 685 | project_point_to_line( 686 | next_edge_end[0], 687 | next_edge_end[1], 688 | current_edge_start[0], 689 | current_edge_start[1], 690 | current_edge_end[0], 691 | current_edge_end[1], 692 | ) 693 | ) 694 | else: 695 | # Add connecting segment between edges 696 | midpoint = (current_edge_end + next_edge_start) / 2 697 | connecting_point1 = project_point_to_line( 698 | midpoint[0], 699 | midpoint[1], 700 | current_edge_start[0], 701 | current_edge_start[1], 702 | current_edge_end[0], 703 | current_edge_end[1], 704 | ) 705 | connecting_point2 = project_point_to_line( 706 | midpoint[0], 707 | midpoint[1], 708 | next_edge_start[0], 709 | next_edge_start[1], 710 | next_edge_end[0], 711 | next_edge_end[1], 712 | ) 713 | # Convert points to numpy arrays 714 | new_points.append(np.array(connecting_point1)) 715 | new_points.append(np.array(connecting_point2)) 716 | 717 | return new_points 718 | 719 | 720 | def preprocess_polygon( 721 | polygon: Polygon, 722 | simplify: bool, 723 | simplify_tolerance: float, 724 | ) -> Polygon: 725 | # Apply initial simplification if requested 726 | if simplify: 727 | simplified = polygon.simplify( 728 | tolerance=simplify_tolerance, preserve_topology=True 729 | ) 730 | # Remove geometries that might become invalid after simplification 731 | if polygon.is_empty: 732 | return polygon 733 | if isinstance(simplified, Polygon): 734 | polygon = simplified 735 | else: 736 | return polygon 737 | 738 | polygon = polygon.segmentize(max_segment_length=simplify_tolerance * 5) 739 | 740 | return polygon 741 | 742 | 743 | def regularize_single_polygon( 744 | polygon: Polygon, 745 | parallel_threshold: float, 746 | allow_45_degree: bool, 747 | diagonal_threshold_reduction: float, 748 | allow_circles: bool, 749 | circle_threshold: float, 750 | include_metadata: bool, 751 | simplify: bool, 752 | simplify_tolerance: float, 753 | ) -> dict[str, Any]: 754 | """ 755 | Regularize a Shapely polygon by aligning edges to principal directions 756 | 757 | Parameters: 758 | ----------- 759 | polygon : shapely.geometry.Polygon 760 | Input polygon to regularize 761 | parallel_threshold : float 762 | Distance threshold for parallel line handling 763 | allow_45_degree : bool 764 | If True, allows 45-degree orientations relative to the main direction 765 | diagonal_threshold_reduction : float 766 | Reduction factor in degrees to reduce the likelihood of diagonal 767 | edges being created 768 | allow_circles : bool 769 | If True, attempts to detect polygons that are nearly circular and 770 | replaces them with perfect circles 771 | circle_threshold : float 772 | Intersection over Union (IoU) threshold used for circle detection 773 | Value between 0 and 1. 774 | include_metadata : bool 775 | If True, includes metadata about the regularization process 776 | in the output. 777 | 778 | Returns: 779 | -------- 780 | shapely.geometry.Polygon 781 | Regularized polygon 782 | """ 783 | if not isinstance(polygon, Polygon): 784 | # Return unmodified if not a polygon 785 | warnings.warn( 786 | f"Unsupported geometry type: {type(polygon)}. Returning original." 787 | ) 788 | return {"geometry": polygon, "iou": 0, "main_direction": 0} 789 | polygon = preprocess_polygon( 790 | polygon, 791 | simplify=simplify, 792 | simplify_tolerance=simplify_tolerance, 793 | ).buffer(0) 794 | 795 | exterior_coordinates = np.array(polygon.exterior.coords) 796 | # append the first point to the end to close the polygon 797 | 798 | regularized_exterior, main_direction = regularize_coordinate_array( 799 | coordinates=exterior_coordinates, 800 | parallel_threshold=parallel_threshold, 801 | allow_45_degree=allow_45_degree, 802 | diagonal_threshold_reduction=diagonal_threshold_reduction, 803 | ) 804 | 805 | if allow_circles: 806 | radius = np.sqrt(polygon.area / np.pi) 807 | perfect_circle = polygon.centroid.buffer(radius, quad_segs=42) 808 | # Check if the polygon is close to a circle using iou 809 | iou = ( 810 | perfect_circle.intersection(polygon).area 811 | / perfect_circle.union(polygon).area 812 | ) 813 | if iou > circle_threshold: 814 | # If the polygon is close to a circle, return the perfect circle 815 | regularized_exterior = np.array(perfect_circle.exterior.coords, dtype=float) 816 | 817 | # Handle interior rings (holes) 818 | regularized_interiors: List[np.ndarray] = [] 819 | for interior in polygon.interiors: 820 | interior_coordinates = np.array(interior.coords) 821 | regularized_interior, _ = regularize_coordinate_array( 822 | coordinates=interior_coordinates, 823 | parallel_threshold=parallel_threshold, 824 | allow_45_degree=allow_45_degree, 825 | diagonal_threshold_reduction=diagonal_threshold_reduction, 826 | ) 827 | regularized_interiors.append(regularized_interior) 828 | 829 | # Create new polygon 830 | try: 831 | # Convert coordinates to LinearRings 832 | exterior_ring = LinearRing(regularized_exterior) 833 | interior_rings = [LinearRing(r) for r in regularized_interiors] 834 | 835 | # Create regularized polygon 836 | regularized_polygon = Polygon(exterior_ring, interior_rings).buffer(0) 837 | if include_metadata: 838 | final_iou = ( 839 | regularized_polygon.intersection(polygon).area 840 | / regularized_polygon.union(polygon).area 841 | ) 842 | else: 843 | final_iou = 0 844 | 845 | return { 846 | "geometry": regularized_polygon, 847 | "iou": final_iou, 848 | "main_direction": main_direction, 849 | } 850 | except Exception as e: 851 | # If there's an error creating the polygon, return the original 852 | warnings.warn(f"Error creating regularized polygon: {e}. Returning original.") 853 | return {"geometry": polygon, "iou": 0, "main_direction": 0} 854 | -------------------------------------------------------------------------------- /examples/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DPIRD-DMA/Building-Regulariser/6583bdd1b91ebe4ec0b2c4d44203e8557f2baeff/examples/1.png -------------------------------------------------------------------------------- /examples/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DPIRD-DMA/Building-Regulariser/6583bdd1b91ebe4ec0b2c4d44203e8557f2baeff/examples/2.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "buildingregulariser" 7 | dynamic = ["version"] 8 | description = "A Python library for regularizing building footprints in geospatial data. This library helps clean up and standardize building polygon geometries by aligning edges to principal directions." 9 | readme = "README.md" 10 | requires-python = ">=3.9" 11 | dependencies = [ 12 | "geopandas>=1.0.0", 13 | "numpy>=2.0.0", 14 | ] 15 | [tool.setuptools.dynamic] 16 | version = {attr = "buildingregulariser.__version__.__version__"} 17 | 18 | 19 | [dependency-groups] 20 | dev = [ 21 | "folium>=0.19.5", 22 | "ipykernel>=6.29.5", 23 | "mapclassify>=2.8.1", 24 | "matplotlib>=3.9.4", 25 | "pytest>=8.3.5", 26 | "rasterio>=1.4.3", 27 | ] 28 | [[tool.uv.index]] 29 | name = "testpypi" 30 | url = "https://test.pypi.org/simple/" 31 | publish-url = "https://test.pypi.org/legacy/" 32 | explicit = true 33 | -------------------------------------------------------------------------------- /test data/input/test_data.gpkg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DPIRD-DMA/Building-Regulariser/6583bdd1b91ebe4ec0b2c4d44203e8557f2baeff/test data/input/test_data.gpkg -------------------------------------------------------------------------------- /test data/input/test_data_multi_geom.gpkg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DPIRD-DMA/Building-Regulariser/6583bdd1b91ebe4ec0b2c4d44203e8557f2baeff/test data/input/test_data_multi_geom.gpkg -------------------------------------------------------------------------------- /tests/test_end_to_end.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Any 3 | 4 | import geopandas as gpd 5 | import pytest 6 | from shapely.geometry.base import BaseGeometry 7 | 8 | from buildingregulariser import regularize_geodataframe 9 | 10 | cwd = Path(__file__).parent 11 | OUTPUT_FILE = cwd.parent / "test data/output/test output.gpkg" 12 | if OUTPUT_FILE.exists(): 13 | OUTPUT_FILE.unlink() 14 | INPUT_FILE = cwd.parent / "test data/input/test_data.gpkg" 15 | DEFAULT_PARAMS = dict( 16 | parallel_threshold=1.0, 17 | simplify=True, 18 | simplify_tolerance=0.5, 19 | allow_45_degree=True, 20 | diagonal_threshold_reduction=15, 21 | allow_circles=True, 22 | circle_threshold=0.9, 23 | num_cores=1, 24 | include_metadata=False, 25 | neighbor_alignment=False, 26 | neighbor_search_distance=100, 27 | neighbor_max_rotation=10, 28 | ) 29 | 30 | assert INPUT_FILE.exists(), f"Test file missing: {INPUT_FILE}" 31 | input_gdf = gpd.read_file(INPUT_FILE) 32 | 33 | 34 | # --- Geometry Quality Checks --- 35 | def iou(poly1: BaseGeometry, poly2: BaseGeometry) -> float: 36 | inter = poly1.intersection(poly2).area 37 | union = poly1.union(poly2).area 38 | return inter / union if union != 0 else 0 39 | 40 | 41 | def check_geometry_quality(inputs, outputs, iou_threshold=0.4, perimeter_tolerance=0.6): 42 | assert not outputs.empty 43 | assert outputs.geometry.is_valid.all() 44 | assert outputs.geometry.notnull().all() 45 | assert len(inputs) == len( 46 | outputs 47 | ), f"Row count mismatch: {len(inputs)} != {len(outputs)}" 48 | 49 | for idx, (i_geom, o_geom) in enumerate(zip(inputs.geometry, outputs.geometry)): 50 | assert i_geom.intersects(o_geom), f"No intersection for feature {idx}" 51 | 52 | overlap_iou = iou(i_geom, o_geom) 53 | assert ( 54 | overlap_iou >= iou_threshold 55 | ), f"Low IoU for feature {idx}: {overlap_iou:.2f}" 56 | 57 | in_perim = i_geom.length 58 | out_perim = o_geom.length 59 | min_perim = in_perim * (1 - perimeter_tolerance) 60 | max_perim = in_perim * (1 + perimeter_tolerance) 61 | 62 | assert ( 63 | min_perim <= out_perim <= max_perim 64 | ), f"Perimeter out of range for feature {idx}: {out_perim:.2f} (expected {min_perim:.2f}–{max_perim:.2f})" 65 | 66 | 67 | # --- Parametrized Tests --- 68 | @pytest.mark.parametrize( 69 | "param,values", 70 | [ 71 | ("parallel_threshold", [0.5, 2.0, 5.0]), 72 | ("simplify", [True, False]), 73 | ("simplify_tolerance", [0.3, 1.0, 3.0]), 74 | ("allow_45_degree", [True, False]), 75 | ("diagonal_threshold_reduction", [0, 22.5, 45]), 76 | ("allow_circles", [True, False]), 77 | ("circle_threshold", [0.5, 0.75, 0.99]), 78 | ("num_cores", [0, 1, 4]), 79 | ("include_metadata", [False, True]), 80 | ("neighbor_alignment", [False, True]), 81 | ("neighbor_search_distance", [0, 100, 350]), 82 | ("neighbor_max_rotation", [0, 22.5, 45]), 83 | ], 84 | ) 85 | def test_regularize_param_variants(param, values): 86 | for val in values: 87 | config: dict[str, Any] = DEFAULT_PARAMS.copy() 88 | if param in { 89 | "simplify", 90 | "allow_45_degree", 91 | "allow_circles", 92 | "include_metadata", 93 | "neighbor_alignment", 94 | }: 95 | config[param] = bool(val) 96 | elif param in {"num_cores"}: 97 | config[param] = int(val) 98 | else: 99 | config[param] = val 100 | result = regularize_geodataframe(geodataframe=input_gdf.copy(), **config) 101 | layer_name = f"{param}_{str(val).replace('.', '_')}" 102 | result.to_file(OUTPUT_FILE, layer=layer_name, driver="GPKG") 103 | print(f"Saved layer '{layer_name}' to {OUTPUT_FILE}") 104 | check_geometry_quality(input_gdf, result) 105 | --------------------------------------------------------------------------------