├── .gitignore
├── .vscode
└── settings.json
├── Example use.ipynb
├── LICENSE
├── README.md
├── buildingregulariser
├── __init__.py
├── __version__.py
├── coordinator.py
├── geometry_utils.py
├── neighbor_alignment.py
└── regularization.py
├── examples
├── 1.png
└── 2.png
├── pyproject.toml
├── test data
└── input
│ ├── test_data.gpkg
│ └── test_data_multi_geom.gpkg
├── tests
└── test_end_to_end.py
└── uv.lock
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # UV
98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | #uv.lock
102 |
103 | # poetry
104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | # This is especially recommended for binary packages to ensure reproducibility, and is more
106 | # commonly ignored for libraries.
107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 |
110 | # pdm
111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | # in version control.
115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 |
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 |
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 |
127 | # SageMath parsed files
128 | *.sage.py
129 |
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 |
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 |
143 | # Rope project settings
144 | .ropeproject
145 |
146 | # mkdocs documentation
147 | /site
148 |
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 |
154 | # Pyre type checker
155 | .pyre/
156 |
157 | # pytype static type analyzer
158 | .pytype/
159 |
160 | # Cython debug symbols
161 | cython_debug/
162 |
163 | # PyCharm
164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | # and can be added to the global gitignore or merged into this file. For a more nuclear
167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 |
170 | # Ruff stuff:
171 | .ruff_cache/
172 |
173 | # PyPI configuration file
174 | .pypirc
175 | private test data/*
176 | *.gpkg-shm
177 | *.gpkg-wal
178 | *__pycache__*
179 | buildingregulariser.egg-info/*
180 | .DS_Store
181 | test data/input/old/*
182 | test data/output/*
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "cSpell.words": [
3 | "arctan",
4 | "chunksize",
5 | "fromiter",
6 | "geodataframe",
7 | "ndarray",
8 | "reproject",
9 | "reprojection",
10 | "segmentize",
11 | "segs",
12 | "sindex"
13 | ]
14 | }
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 DPIRD-DMA
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Building Regulariser
2 |
3 | A Python library for regularizing building footprints in geospatial data. This library helps clean up and standardize building polygon geometries by aligning edges to principal directions. Built as an open source alternative to the [ArcGIS Regularize Building Footprint (3D Analyst) tool](https://pro.arcgis.com/en/pro-app/latest/tool-reference/3d-analyst/regularize-building-footprint.htm).
4 |
5 | []()
6 | []()
7 |
8 | ## Example Results
9 |
10 | Before and after regularization:
11 |
12 |
13 |

14 |

15 |
16 |
17 | ## Try in Colab
18 |
19 | [![Colab_Button]][Link]
20 |
21 | [Link]: https://colab.research.google.com/drive/1xeFxpQCAybgbNjmopiHZb7_Tz1lv8k6A?usp=sharing 'Try Building Regulariser In Colab'
22 |
23 | [Colab_Button]: https://img.shields.io/badge/Try%20in%20Colab-grey?style=for-the-badge&logo=google-colab
24 |
25 | ## Overview
26 |
27 | Building footprints extracted from remote sensing imagery often contain noise, irregular edges, and geometric inconsistencies. This library provides tools to regularize these footprints by:
28 |
29 | - Aligning edges to principal directions (orthogonal and optional 45-degree angles)
30 | - Converting near-rectangular buildings to perfect rectangles
31 | - Converting near-circular buildings to perfect circles
32 | - Simplifying complex polygons while maintaining their essential shape
33 | - Supporting parallel processing for efficient computation with large datasets
34 | - Fine-tune building alignment with neighboring buildings
35 |
36 | Inspired by [RS-building-regularization](https://github.com/niecongchong/RS-building-regularization), this library takes a geometric approach to building regularization with improvements for usability and integration with the GeoPandas ecosystem.
37 |
38 | ## Installation
39 |
40 | ```bash
41 | pip install buildingregulariser
42 | ```
43 | or
44 | ```bash
45 | conda install conda-forge::buildingregulariser
46 | ```
47 | or
48 | ```bash
49 | uv add buildingregulariser
50 | ```
51 | ## Quick Start
52 |
53 | ```python
54 | import geopandas as gpd
55 | from buildingregulariser import regularize_geodataframe
56 |
57 | # Load your building footprints
58 | buildings = gpd.read_file("buildings.gpkg")
59 |
60 | # Regularize the building footprints
61 | regularized_buildings = regularize_geodataframe(
62 | buildings,
63 | )
64 |
65 | # Save the results
66 | regularized_buildings.to_file("regularized_buildings.gpkg")
67 | ```
68 |
69 | ## Features
70 |
71 | - **GeoDataFrame Integration**: Works seamlessly with GeoPandas GeoDataFrames
72 | - **Polygon Regularization**: Aligns edges to principal directions
73 | - **45-Degree Support**: Optional alignment to 45-degree angles
74 | - **Align with neighboring buildings**: Align each building with neighboring buildings
75 | - **Circle Detection**: Identifies and converts near-circular shapes to perfect circles
76 | - **Edge Simplification**: Reduces the number of vertices while preserving shape
77 | - **Parallel Processing**: Utilizes multiple CPU cores for faster processing of large datasets
78 |
79 | ## Usage Examples
80 |
81 | ### Basic Regularization
82 |
83 | ```python
84 | from buildingregulariser import regularize_geodataframe
85 | import geopandas as gpd
86 |
87 | buildings = gpd.read_file("buildings.gpkg")
88 | regularized = regularize_geodataframe(buildings)
89 | ```
90 |
91 | ### Fine-tuning Regularization Parameters
92 |
93 | ```python
94 | regularized = regularize_geodataframe(
95 | buildings,
96 | parallel_threshold=2.0, # Higher values allow less edge alignment
97 | simplify_tolerance=0.5, # Controls simplification level, should be 2-3 x the raster pixel size
98 | allow_45_degree=True, # Enable 45-degree angles
99 | allow_circles=True, # Enable circle detection
100 | circle_threshold=0.9 # IOU threshold for circle detection
101 | neighbor_alignment=True, # After regularization try to align each building with neighboring buildings
102 | neighbor_search_distance: float = 100.0, # The search distance around each building to find neighbors
103 | neighbor_max_rotation: float = 10, # The maximum rotation allowed to align with neighbors
104 | )
105 | ```
106 |
107 | ## Parameters
108 |
109 | - **geodataframe**: Input GeoDataFrame with polygon geometries
110 | - **parallel_threshold**: Distance threshold for handling parallel lines (default: 1.0)
111 | - **simplify**: If True, applies simplification to the geometry (default: True)
112 | - **simplify_tolerance**: Tolerance for simplification (default: 0.5)
113 | - **allow_45_degree**: If True, allows edges to be oriented at 45-degree angles (default: True)
114 | - **diagonal_threshold_reduction**: Used to reduce the chance of diagonal edges being generated, can be from 0 to 22.5 (default: 15.0)
115 | - **allow_circles**: If True, detects and converts near-circular shapes to perfect circles (default: True)
116 | - **circle_threshold**: Intersection over Union (IoU) threshold for circle detection (default: 0.9)
117 | - **num_cores**: Number of CPU cores to use for parallel processing (default: 1)
118 | - **include_metadata**: Include the main direction, IOU, perimeter and aligned_direction (if used) in output gdf
119 | - **neighbor_alignment**: If True, try to align each building with neighboring buildings (default: False)
120 | - **neighbor_search_distance**: The distance to find neighboring buildings (default: 350.0)
121 | - **neighbor_max_rotation**: The maximum allowable rotation to align with neighbors (default: 10)
122 |
123 |
124 | ## Returns
125 |
126 | - A new GeoDataFrame with regularized polygon geometries
127 |
128 | ## How It Works
129 |
130 | 1. **Edge Analysis**: Analyzes each polygon to identify principal directions
131 | 2. **Edge Orientation**: Aligns edges to be parallel, perpendicular, or at 45 degrees to the main direction
132 | 3. **Circle Detection**: Optionally identifies shapes that are nearly circular and converts them to perfect circles
133 | 4. **Edge Connection**: Ensures proper connectivity between oriented edges
134 | 5. **Angle Enforcement**: Post-processing to ensure target angles are precisely maintained
135 | 6. **Neighbor Alignment**: Optionally align each building with neighboring buildings, via rotation around centroid.
136 |
137 | ## License
138 |
139 | This project is licensed under the MIT License
140 |
141 | ## Acknowledgments
142 |
143 | This library was inspired by the [RS-building-regularization](https://github.com/niecongchong/RS-building-regularization) project, with improvements for integration with the GeoPandas ecosystem and enhanced regularization algorithms.
--------------------------------------------------------------------------------
/buildingregulariser/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Polygon Regularization Package
3 |
4 | A package for regularizing polygons by aligning edges to principal directions.
5 | """
6 |
7 | from .__version__ import __version__
8 | from .coordinator import regularize_geodataframe
9 |
10 | # Package-wide exports
11 | __all__ = [
12 | "regularize_geodataframe",
13 | "__version__",
14 | ]
15 |
--------------------------------------------------------------------------------
/buildingregulariser/__version__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.2.2"
2 |
--------------------------------------------------------------------------------
/buildingregulariser/coordinator.py:
--------------------------------------------------------------------------------
1 | from functools import partial
2 | from multiprocessing import Pool, cpu_count
3 | from typing import Optional, Union
4 |
5 | import geopandas as gpd
6 | import pandas as pd
7 | import pyproj
8 |
9 | from .neighbor_alignment import align_with_neighbor_polygons
10 | from .regularization import regularize_single_polygon
11 |
12 |
13 | def cleanup_geometry(
14 | result_geodataframe: gpd.GeoDataFrame, simplify_tolerance: float
15 | ) -> gpd.GeoDataFrame:
16 | """
17 | Cleans up geometries in a GeoDataFrame.
18 |
19 | Removes empty geometries, attempts to remove small slivers using buffer
20 | operations, and simplifies geometries to remove redundant vertices.
21 |
22 | Parameters:
23 | -----------
24 | result_geodataframe : geopandas.GeoDataFrame
25 | GeoDataFrame with geometries to clean.
26 | simplify_tolerance : float
27 | Tolerance used for simplification and determining buffer size
28 | for sliver removal.
29 |
30 | Returns:
31 | --------
32 | geopandas.GeoDataFrame
33 | GeoDataFrame with cleaned geometries.
34 | """
35 | # Filter out None results from processing errors
36 | result_geodataframe = result_geodataframe[~result_geodataframe.geometry.is_empty]
37 | result_geodataframe = result_geodataframe[result_geodataframe.geometry.notna()]
38 |
39 | if result_geodataframe.empty:
40 | return result_geodataframe # Return early if GDF is empty
41 |
42 | # Define buffer size based on simplify tolerance
43 | buffer_size = simplify_tolerance / 50
44 |
45 | # Attempt to remove small slivers using a sequence of buffer operations
46 | # Positive buffer -> negative buffer -> positive buffer
47 | result_geodataframe["geometry"] = result_geodataframe.geometry.buffer(
48 | buffer_size, cap_style="square", join_style="mitre"
49 | )
50 | result_geodataframe["geometry"] = result_geodataframe.geometry.buffer(
51 | buffer_size * -2, cap_style="square", join_style="mitre"
52 | )
53 | result_geodataframe["geometry"] = result_geodataframe.geometry.buffer(
54 | buffer_size, cap_style="square", join_style="mitre"
55 | )
56 |
57 | # Remove any geometries that became empty after buffering
58 | result_geodataframe = result_geodataframe[~result_geodataframe.geometry.is_empty]
59 |
60 | if result_geodataframe.empty:
61 | return result_geodataframe # Return early if GDF is empty
62 |
63 | # Simplify to remove collinear vertices introduced by buffering/regularization
64 | # Use a small tolerance related to the buffer size
65 | result_geodataframe["geometry"] = result_geodataframe.geometry.simplify(
66 | tolerance=buffer_size, preserve_topology=True
67 | )
68 | # Final check for empty geometries after simplification
69 | result_geodataframe = result_geodataframe[~result_geodataframe.geometry.is_empty]
70 |
71 | return result_geodataframe
72 |
73 |
74 | def regularize_geodataframe(
75 | geodataframe: gpd.GeoDataFrame,
76 | parallel_threshold: float = 1.0,
77 | target_crs: Optional[Union[str, pyproj.CRS]] = None,
78 | simplify: bool = True,
79 | simplify_tolerance: float = 0.5,
80 | allow_45_degree: bool = True,
81 | diagonal_threshold_reduction: float = 15,
82 | allow_circles: bool = True,
83 | circle_threshold: float = 0.9,
84 | num_cores: int = 0,
85 | include_metadata: bool = False,
86 | neighbor_alignment: bool = False,
87 | neighbor_search_distance: float = 100.0,
88 | neighbor_max_rotation: float = 10,
89 | ) -> gpd.GeoDataFrame:
90 | """
91 | Regularizes polygon geometries in a GeoDataFrame by aligning edges.
92 |
93 | Aligns edges to be parallel or perpendicular (optionally also 45 degrees)
94 | to their main direction. Handles reprojection, initial simplification,
95 | regularization, geometry cleanup, and parallel processing.
96 |
97 | Parameters:
98 | -----------
99 | geodataframe : geopandas.GeoDataFrame
100 | Input GeoDataFrame with polygon or multipolygon geometries.
101 | parallel_threshold : float, optional
102 | Distance threshold for merging nearly parallel adjacent edges during
103 | regularization. Specified in the same units as the input GeoDataFrame's CRS. Defaults to 1.0.
104 | target_crs : str or pyproj.CRS, optional
105 | CRS to reproject the input GeoDataFrame to before regularization.
106 | If None, no reprojection is performed. Defaults to None.
107 | simplify : bool, optional
108 | If True, applies initial simplification to the geometry before
109 | regularization. Defaults to True.
110 | simplify_tolerance : float, optional
111 | Tolerance for the initial simplification step (if `simplify` is True).
112 | Also used for geometry cleanup steps. Specified in the same units as the input GeoDataFrame's CRS. Defaults to 0.5.
113 | allow_45_degree : bool, optional
114 | If True, allows edges to be oriented at 45-degree angles relative
115 | to the main direction during regularization. Defaults to True.
116 | diagonal_threshold_reduction : float, optional
117 | Reduction factor in degrees to reduce the likelihood of diagonal
118 | edges being created. larger values reduce the likelihood of diagonal edges. Possible values are 0 - 22.5 degrees.
119 | Defaults to 15 degrees.
120 | allow_circles : bool, optional
121 | If True, attempts to detect polygons that are nearly circular and
122 | replaces them with perfect circles. Defaults to True.
123 | circle_threshold : float, optional
124 | Intersection over Union (IoU) threshold used for circle detection
125 | (if `allow_circles` is True). Value between 0 and 1. Defaults to 0.9.
126 | num_cores : int, optional
127 | Number of CPU cores to use for parallel processing. If 1, processing
128 | is done sequentially. Defaults to 0 (all available cores).
129 | include_metadata : bool, optional
130 | If True, includes metadata about the regularization process in the
131 | output GeoDataFrame. Defaults to False.
132 | neighbor_alignment : bool, optional
133 | If True, aligns the polygons with their neighbors after regularization.
134 | Defaults to False.
135 | neighbor_search_distance : float, optional
136 | Search radius used to identify neighboring polygons for alignment (if `align_with_neighbors` is True).
137 | Specified in the same units as the input GeoDataFrame's CRS. Defaults to 100.0.
138 | neighbor_max_rotation : float, optional
139 | Direction threshold for aligning with neighbors (if
140 | `align_with_neighbors` is True). Defaults to 10 degrees.
141 |
142 | Returns:
143 | --------
144 | geopandas.GeoDataFrame
145 | A new GeoDataFrame with regularized polygon geometries. Original
146 | attributes are preserved. Geometries that failed processing might be
147 | dropped.
148 | """
149 | # Make a copy to avoid modifying the original GeoDataFrame
150 | result_geodataframe = geodataframe.copy()
151 | # Explode the geometries to process them individually
152 | result_geodataframe = result_geodataframe.explode(ignore_index=True)
153 |
154 | if target_crs is not None:
155 | # Reproject to the target CRS if specified
156 | result_geodataframe = result_geodataframe.to_crs(target_crs)
157 | # Split gdf into chunks for parallel processing
158 | # Determine number of jobs
159 | if num_cores <= 0:
160 | num_cores = cpu_count()
161 |
162 | partial_regularize_single_polygon = partial(
163 | regularize_single_polygon,
164 | parallel_threshold=parallel_threshold,
165 | allow_45_degree=allow_45_degree,
166 | diagonal_threshold_reduction=diagonal_threshold_reduction,
167 | allow_circles=allow_circles,
168 | circle_threshold=circle_threshold,
169 | include_metadata=include_metadata,
170 | simplify=simplify,
171 | simplify_tolerance=simplify_tolerance,
172 | )
173 |
174 | # Sequential processing
175 | if num_cores == 1:
176 | processed_data = [
177 | partial_regularize_single_polygon(geometry)
178 | for geometry in result_geodataframe["geometry"]
179 | ]
180 | else:
181 | with Pool(num_cores) as p:
182 | processed_data = p.map(
183 | partial_regularize_single_polygon,
184 | result_geodataframe["geometry"],
185 | )
186 |
187 | results_df = pd.DataFrame(processed_data)
188 | result_geodataframe["geometry"] = results_df["geometry"]
189 | result_geodataframe["iou"] = results_df["iou"]
190 | result_geodataframe["main_direction"] = results_df["main_direction"]
191 |
192 | # Clean up the resulting geometries (remove slivers)
193 | result_geodataframe = cleanup_geometry(
194 | result_geodataframe=result_geodataframe, simplify_tolerance=simplify_tolerance
195 | )
196 |
197 | # Return result_geodataframe
198 | if neighbor_alignment:
199 | result_geodataframe = align_with_neighbor_polygons(
200 | gdf=result_geodataframe,
201 | buffer_size=neighbor_search_distance,
202 | max_rotation=neighbor_max_rotation,
203 | include_metadata=include_metadata,
204 | num_cores=num_cores,
205 | )
206 |
207 | if not include_metadata:
208 | # Extract metadata columns from the results DataFrame
209 | try_to_drop_cols = [
210 | "iou",
211 | "main_direction",
212 | "perimeter",
213 | "aligned_direction",
214 | ]
215 | for col in try_to_drop_cols:
216 | if col in result_geodataframe.columns:
217 | result_geodataframe.drop(columns=col, inplace=True)
218 |
219 | return result_geodataframe
220 |
--------------------------------------------------------------------------------
/buildingregulariser/geometry_utils.py:
--------------------------------------------------------------------------------
1 | import math
2 | from typing import List, Tuple, Union
3 |
4 | import numpy as np
5 |
6 |
7 | def calculate_distance(
8 | point_1: np.ndarray,
9 | point_2: np.ndarray,
10 | ) -> float:
11 | """
12 | Calculate Euclidean distance between two points.
13 |
14 | Parameters:
15 | -----------
16 | point_1 : np.ndarray
17 | First point coordinates
18 | point_2 : np.ndarray
19 | Second point coordinates
20 |
21 | Returns:
22 | --------
23 | float
24 | Euclidean distance
25 | """
26 | # return np.sqrt(np.sum(np.power((point_1 - point_2), 2)))
27 | dx, dy = point_1[0] - point_2[0], point_1[1] - point_2[1]
28 | return math.hypot(dx, dy) # fastest for scalar calls
29 |
30 |
31 | def calculate_azimuth_angle(start_point: np.ndarray, end_point: np.ndarray) -> float:
32 | """
33 | Calculate azimuth angle of the line from start_point to end_point (in degrees).
34 | Angle is measured clockwise from the positive x-axis.
35 |
36 | Parameters:
37 | -----------
38 | start_point : np.ndarray
39 | Starting point coordinates
40 | end_point : np.ndarray
41 | Ending point coordinates
42 |
43 | Returns:
44 | --------
45 | float
46 | Angle in degrees in the range [0, 360)
47 | """
48 | dx = end_point[0] - start_point[0]
49 | dy = end_point[1] - start_point[1]
50 | angle_radians = math.atan2(dy, dx)
51 | angle_degrees = math.degrees(angle_radians)
52 | return angle_degrees % 360
53 |
54 |
55 | def create_line_equation(
56 | point1: np.ndarray,
57 | point2: np.ndarray,
58 | ) -> Tuple[float, float, float]:
59 | """
60 | Create a line equation in the form Ax + By + C = 0
61 |
62 | Parameters:
63 | -----------
64 | point1, point2 : np.ndarray
65 | Two points defining the line
66 |
67 | Returns:
68 | --------
69 | tuple
70 | Coefficients (A, B, C) where Ax + By + C = 0
71 | """
72 | A = point1[1] - point2[1]
73 | B = point2[0] - point1[0]
74 | C = point1[0] * point2[1] - point2[0] * point1[1]
75 | return A, B, -C
76 |
77 |
78 | def calculate_line_intersection(
79 | line1: Tuple[float, float, float],
80 | line2: Tuple[float, float, float],
81 | ) -> Union[Tuple[float, float], None]:
82 | """
83 | Calculate intersection point of two lines
84 |
85 | Parameters:
86 | -----------
87 | line1, line2 : tuple
88 | Line coefficients (A, B, C) where Ax + By + C = 0
89 |
90 | Returns:
91 | --------
92 | tuple or None
93 | Coordinates of intersection point or None if lines are parallel
94 | """
95 | D = line1[0] * line2[1] - line1[1] * line2[0]
96 | Dx = line1[2] * line2[1] - line1[1] * line2[2]
97 | Dy = line1[0] * line2[2] - line1[2] * line2[0]
98 | if D != 0:
99 | x = Dx / D
100 | y = Dy / D
101 | return x, y
102 | else:
103 | return None
104 |
105 |
106 | def calculate_parallel_line_distance(
107 | line1: Tuple[float, float, float],
108 | line2: Tuple[float, float, float],
109 | ) -> float:
110 | """
111 | Calculate the distance between two parallel lines
112 |
113 | Parameters:
114 | -----------
115 | line1, line2 : tuple
116 | Line coefficients (A, B, C) where Ax + By + C = 0
117 |
118 | Returns:
119 | --------
120 | float
121 | Distance between lines
122 | """
123 | A1, _, C1 = line1
124 | A2, B2, C2 = line2
125 | eps = 1e-10
126 |
127 | # Normalize equations to the form: x + (B/A)y + (C/A) = 0
128 | new_C1 = C1 / (A1 + eps)
129 | new_A2 = 1
130 | new_B2 = B2 / (A2 + eps)
131 | new_C2 = C2 / (A2 + eps)
132 |
133 | # Calculate distance using the formula for parallel lines
134 | distance = abs(new_C1 - new_C2) / math.sqrt(new_A2 * new_A2 + new_B2 * new_B2)
135 | return distance
136 |
137 |
138 | def project_point_to_line(
139 | point_x: float,
140 | point_y: float,
141 | line_x1: float,
142 | line_y1: float,
143 | line_x2: float,
144 | line_y2: float,
145 | ) -> Tuple[float, float]:
146 | """
147 | Project a point onto a line.
148 |
149 | Parameters:
150 | -----------
151 | point_x, point_y : float
152 | Coordinates of the point to project
153 | line_x1, line_y1, line_x2, line_y2 : float
154 | Coordinates of two points defining the line
155 |
156 | Returns:
157 | --------
158 | Tuple[float, float]
159 | Coordinates of the projected point
160 | """
161 | eps = 1e-10
162 | dx = line_x2 - line_x1
163 | dy = line_y2 - line_y1
164 | denom = dx * dx + dy * dy + eps
165 |
166 | x = (
167 | point_x * dx * dx
168 | + point_y * dy * dx
169 | + (line_x1 * line_y2 - line_x2 * line_y1) * dy
170 | ) / denom
171 |
172 | y = (
173 | point_x * dx * dy
174 | + point_y * dy * dy
175 | + (line_x2 * line_y1 - line_x1 * line_y2) * dx
176 | ) / denom
177 |
178 | return (x, y)
179 |
180 |
181 | def rotate_point(
182 | point: np.ndarray,
183 | center: np.ndarray,
184 | angle_degrees: float,
185 | ) -> Tuple[float, float]:
186 | """
187 | Rotate a point clockwise around a center point
188 |
189 | Parameters:
190 | -----------
191 | point : np.ndarray
192 | Point to rotate
193 | center : np.ndarray
194 | Center of rotation
195 | angle_degrees : float
196 | Rotation angle in degrees
197 |
198 | Returns:
199 | --------
200 | tuple
201 | Rotated point coordinates
202 | """
203 | x, y = point
204 | center_x, center_y = center
205 | angle_radians = math.radians(angle_degrees)
206 |
207 | # Translate point to origin
208 | translated_x = x - center_x
209 | translated_y = y - center_y
210 |
211 | # Rotate
212 | rotated_x = translated_x * math.cos(angle_radians) + translated_y * math.sin(
213 | angle_radians
214 | )
215 | rotated_y = translated_y * math.cos(angle_radians) - translated_x * math.sin(
216 | angle_radians
217 | )
218 |
219 | # Translate back
220 | final_x = rotated_x + center_x
221 | final_y = rotated_y + center_y
222 |
223 | return (final_x, final_y)
224 |
225 |
226 | def rotate_edge(
227 | start_point: np.ndarray, end_point: np.ndarray, rotation_angle: float
228 | ) -> List[np.ndarray]:
229 | """
230 | Rotate an edge around its midpoint by the given angle
231 |
232 | Parameters:
233 | -----------
234 | start_point : numpy.ndarray
235 | Start point of the edge
236 | end_point : numpy.ndarray
237 | End point of the edge
238 | rotation_angle : float
239 | Angle to rotate by in degrees
240 |
241 | Returns:
242 | --------
243 | list
244 | List containing the rotated start and end points
245 | """
246 | midpoint = (start_point + end_point) / 2
247 |
248 | if rotation_angle > 0:
249 | rotated_start = rotate_point(start_point, midpoint, -rotation_angle)
250 | rotated_end = rotate_point(end_point, midpoint, -rotation_angle)
251 | elif rotation_angle < 0:
252 | rotated_start = rotate_point(start_point, midpoint, np.abs(rotation_angle))
253 | rotated_end = rotate_point(end_point, midpoint, np.abs(rotation_angle))
254 | else:
255 | rotated_start = start_point
256 | rotated_end = end_point
257 |
258 | return [np.array(rotated_start), np.array(rotated_end)]
259 |
--------------------------------------------------------------------------------
/buildingregulariser/neighbor_alignment.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 | from functools import partial
3 | from multiprocessing import Pool
4 | from typing import Any
5 |
6 | import geopandas as gpd
7 | import pandas as pd
8 | from shapely.affinity import rotate
9 |
10 |
11 | def process_row(
12 | idx: int,
13 | buffer_size: float,
14 | max_rotation: float,
15 | gdf: gpd.GeoDataFrame,
16 | ) -> dict[str, Any]:
17 | """
18 | Aligns a single polygon's orientation with its neighbours if a dominant direction is detected.
19 |
20 | For a given polygon index in the GeoDataFrame, this function identifies neighbouring polygons
21 | within a specified buffer, aggregates their directional data weighted by perimeter, and, if
22 | conditions are met, rotates the polygon to align with the dominant neighbour direction.
23 |
24 | Parameters:
25 | -----------
26 | idx : int
27 | Index of the polygon row in the GeoDataFrame.
28 | buffer_size : float
29 | Distance used to define the neighbourhood search area around the polygon.
30 | max_rotation : float
31 | Maximum allowed rotation (in degrees) from the current to the proposed direction.
32 | gdf : gpd.GeoDataFrame
33 | The full GeoDataFrame containing all polygons and required attributes:
34 | - 'geometry': polygon geometry
35 | - 'main_direction': original orientation angle
36 | - 'perimeter': polygon perimeter (used as weight)
37 |
38 | Returns:
39 | --------
40 | dict
41 | A dictionary with:
42 | - 'idx': the index of the processed row
43 | - 'geometry': original or rotated polygon geometry
44 | - 'aligned_direction': selected direction used for alignment
45 | """
46 | row = gdf.iloc[idx]
47 | geom = row.geometry
48 | search_geom = geom.buffer(buffer_size)
49 |
50 | # Use spatial index data for filtering
51 | candidate_idx = gdf.sindex.query(search_geom, predicate="intersects")
52 |
53 | # Only do full geometric operations on the candidates
54 | neighbors_data = gdf.iloc[candidate_idx]
55 |
56 | # Combine original and perpendicular directions into one Series
57 | all_directions = pd.concat(
58 | [neighbors_data["main_direction"], 90 - neighbors_data["main_direction"]]
59 | )
60 | # Calculate weights based on perimeter
61 | all_weights = pd.concat([neighbors_data["perimeter"], neighbors_data["perimeter"]])
62 |
63 | # Aggregate weights
64 | grouped_weights = all_weights.groupby(all_directions).sum()
65 |
66 | # Convert to defaultdict
67 | direction_weights = defaultdict(float, grouped_weights.to_dict())
68 |
69 | # Sort directions by their weights (highest first)
70 | sorted_directions = sorted(
71 | direction_weights.items(), key=lambda x: x[1], reverse=True
72 | )
73 |
74 | # Find the best direction to align with
75 | result = {
76 | "idx": idx,
77 | "geometry": row.geometry,
78 | "aligned_direction": row.main_direction,
79 | }
80 |
81 | for align_dir, _ in sorted_directions[:4]:
82 | direction_delta = row.main_direction - align_dir
83 | if abs(direction_delta) <= max_rotation:
84 | result["aligned_direction"] = align_dir
85 | result["geometry"] = rotate(
86 | row.geometry, -direction_delta, origin="centroid"
87 | )
88 | break
89 |
90 | return result
91 |
92 |
93 | def align_with_neighbor_polygons(
94 | gdf: gpd.GeoDataFrame,
95 | num_cores: int,
96 | buffer_size: float,
97 | max_rotation: float,
98 | include_metadata: bool,
99 | ) -> gpd.GeoDataFrame:
100 | """
101 | Aligns the orientation of polygons in a GeoDataFrame based on their neighbors' dominant direction.
102 |
103 | Each polygon is evaluated in parallel. A buffer is used to identify neighboring polygons,
104 | which are then used to infer a dominant direction. If a suitable direction is found within
105 | a defined angular threshold, the polygon is rotated to match it.
106 |
107 | Parameters:
108 | -----------
109 | gdf : gpd.GeoDataFrame
110 | Input GeoDataFrame with 'geometry' and 'main_direction' columns.
111 | num_cores : int
112 | Number of processes to use for parallel processing
113 | buffer_size : float, default=350.0
114 | Buffer distance for determining neighborhoods.
115 | max_rotation : float, default=10
116 | Maximum rotation angle allowed for alignment (in degrees).
117 | include_metadata : bool, default=False
118 | Whether to retain intermediate columns such as 'aligned_direction' and 'perimeter'.
119 |
120 | Returns:
121 | --------
122 | gpd.GeoDataFrame
123 | A copy of the original GeoDataFrame with aligned geometries. Intermediate metadata columns
124 | are included only if `include_metadata` is True.
125 | """
126 | # Create a copy and add necessary columns
127 | gdf = gdf.explode(ignore_index=True).copy()
128 | gdf["aligned_direction"] = gdf["main_direction"].copy()
129 | gdf["perimeter"] = gdf.geometry.length
130 |
131 | # Process in parallel using imap
132 | results = []
133 | process_row_partial = partial(
134 | process_row,
135 | buffer_size=buffer_size,
136 | max_rotation=max_rotation,
137 | gdf=gdf,
138 | )
139 |
140 | # Work out chunksize for imap
141 | row_count = len(gdf)
142 | chunksize = min(max(row_count // num_cores, 1), 5000)
143 |
144 | with Pool(processes=num_cores) as pool:
145 | results = pool.map(process_row_partial, range(len(gdf)), chunksize=chunksize)
146 |
147 | # Update the GeoDataFrame with results
148 | for result in results:
149 | idx = result["idx"]
150 | gdf.at[idx, "geometry"] = result["geometry"]
151 | gdf.at[idx, "aligned_direction"] = result["aligned_direction"]
152 |
153 | # Clean up if needed
154 | if not include_metadata:
155 | gdf = gdf.drop(columns=["aligned_direction", "perimeter"])
156 |
157 | return gdf
158 |
--------------------------------------------------------------------------------
/buildingregulariser/regularization.py:
--------------------------------------------------------------------------------
1 | import math
2 | import warnings
3 | from typing import Any, List, Tuple
4 |
5 | import numpy as np
6 | from shapely.geometry import LinearRing, Polygon
7 |
8 | from .geometry_utils import (
9 | calculate_azimuth_angle,
10 | calculate_distance,
11 | calculate_line_intersection,
12 | calculate_parallel_line_distance,
13 | create_line_equation,
14 | project_point_to_line,
15 | rotate_edge,
16 | rotate_point,
17 | )
18 |
19 |
20 | def find_nearest_target_angle(
21 | current_azimuth: float, main_direction: float, allow_45_degree: bool
22 | ) -> float:
23 | """
24 | Finds the closest allowed target azimuth angle (0-360).
25 | """
26 | # Calculate angular difference relative to main_direction, normalize to [-180, 180]
27 | diff_angle = (current_azimuth - main_direction + 180) % 360 - 180
28 |
29 | # Define potential offsets from the main direction
30 | allowed_offsets = []
31 | if allow_45_degree:
32 | # Use offsets like 0, 45, 90, 135, 180, -45, -90, -135
33 | # Note: 180 and -180 are equivalent, 225 is -135, 270 is -90, 315 is -45
34 | allowed_offsets = [0.0, 45.0, 90.0, 135.0, 180.0, -45.0, -90.0, -135.0]
35 | else:
36 | # Use offsets 0, 90, 180, -90 (or 270)
37 | allowed_offsets = [0.0, 90.0, 180.0, -90.0]
38 |
39 | # Find the offset that minimizes the absolute difference to diff_angle
40 | best_offset = 0.0
41 | min_angle_dist = 181.0 # Start with a value larger than max possible diff (180)
42 |
43 | for offset in allowed_offsets:
44 | # Calculate the shortest angle between diff_angle and the current offset
45 | d = (diff_angle - offset + 180) % 360 - 180
46 | if abs(d) < min_angle_dist:
47 | min_angle_dist = abs(d)
48 | best_offset = offset
49 |
50 | # Calculate the final target azimuth by adding the best offset to the main direction
51 | # Normalize to [0, 360)
52 | target_azimuth = (main_direction + best_offset + 360) % 360
53 | return target_azimuth
54 |
55 |
56 | def enforce_angles_post_process(
57 | points: List[np.ndarray],
58 | main_direction: int,
59 | allow_45_degree: bool,
60 | angle_tolerance: float = 0.1,
61 | max_iterations: int = 2,
62 | ) -> List[np.ndarray]:
63 | """
64 | Adjusts vertices iteratively to enforce target angles for each segment.
65 | Runs multiple iterations as adjusting one segment can affect adjacent ones.
66 |
67 | Parameters:
68 | -----------
69 | points : list[np.ndarray]
70 | List of numpy arrays representing polygon vertices. Assumed NOT closed
71 | (last point != first point). Length N >= 3.
72 | main_direction : float
73 | The main direction angle in degrees (0-360).
74 | allow_45_degree : bool
75 | Whether to allow 45-degree angles.
76 | angle_tolerance : float
77 | Allowable deviation from target angle in degrees.
78 | Default is 0.1 degrees.
79 | max_iterations : int
80 | Maximum number of full passes to adjust angles.
81 | Default is 2 iterations.
82 |
83 | Returns:
84 | --------
85 | list[np.ndarray]
86 | List of adjusted vertices (N points).
87 | """
88 | if len(points) < 3:
89 | return points # Not enough points to form segments
90 |
91 | adjusted_points = [p.copy() for p in points] # Work on a copy
92 | num_points = len(adjusted_points)
93 |
94 | for _ in range(max_iterations):
95 | max_angle_diff_this_iter = 0.0
96 | changed = False # Flag to track if any changes were made in this iteration
97 |
98 | for i in range(num_points):
99 | p1_idx = i
100 | p2_idx = (i + 1) % num_points # Wrap around for the end point index
101 |
102 | p1 = adjusted_points[p1_idx]
103 | p2 = adjusted_points[p2_idx]
104 |
105 | # Avoid issues with coincident points before calculating angle
106 | dist = calculate_distance(p1, p2)
107 | if dist < 1e-7:
108 | # Coincident points have undefined angle, skip adjustment for this segment
109 | continue
110 |
111 | current_azimuth = calculate_azimuth_angle(p1, p2)
112 | target_azimuth = find_nearest_target_angle(
113 | current_azimuth, main_direction, allow_45_degree
114 | )
115 |
116 | # Calculate shortest rotation angle needed (positive for counter-clockwise)
117 | rotation_diff = (target_azimuth - current_azimuth + 180) % 360 - 180
118 |
119 | # Track the maximum deviation found in this iteration
120 | max_angle_diff_this_iter = max(max_angle_diff_this_iter, abs(rotation_diff))
121 |
122 | # Only rotate if the difference significantly exceeds tolerance
123 | # Use a slightly larger threshold for making changes to prevent jitter
124 | if abs(rotation_diff) > angle_tolerance:
125 | changed = True # Mark that an adjustment was made
126 |
127 | # Perform rotation (rotation_diff > 0 means counter-clockwise)
128 | if rotation_diff > 0:
129 | new_p2_tuple = rotate_point(p2, p1, -rotation_diff)
130 | else:
131 | new_p2_tuple = rotate_point(p2, p1, abs(rotation_diff))
132 |
133 | # Update the endpoint in the list for the *next* segment's calculation
134 | adjusted_points[p2_idx] = np.array(new_p2_tuple)
135 |
136 | # Check for convergence: If no points were adjusted significantly in this pass, stop.
137 | if not changed:
138 | break
139 |
140 | # Return the list of N adjusted unique points
141 | return adjusted_points
142 |
143 |
144 | def regularize_coordinate_array(
145 | coordinates: np.ndarray,
146 | parallel_threshold: float,
147 | allow_45_degree: bool,
148 | diagonal_threshold_reduction: float,
149 | angle_enforcement_tolerance: float = 0.1,
150 | ) -> Tuple[np.ndarray, float]:
151 | """
152 | Regularize polygon coordinates by aligning edges to be either parallel
153 | or perpendicular (or 45 deg) to the main direction, with a
154 | post-processing step to enforce angles.
155 |
156 | Parameters:
157 | -----------
158 | coordinates : numpy.ndarray
159 | Array of coordinates for a polygon ring (shape: n x 2).
160 | Assumed closed (first point == last point).
161 | parallel_threshold : float
162 | Distance threshold for considering parallel lines as needing connection.
163 | allow_45_degree : bool
164 | If True, allows 45-degree orientations relative to the main direction.
165 | diagonal_threshold_reduction : float
166 | Angle in degrees to subtract from the 45-degree snapping thresholds,
167 | making diagonal (45°) orientations less likely.
168 | angle_enforcement_tolerance : float
169 | Maximum allowed deviation (degrees) from target angle in the final output.
170 | Default is 0.1 degrees.
171 |
172 | Returns:
173 | --------
174 | numpy.ndarray
175 | Regularized coordinates array (n x 2), closed (first == last).
176 | """
177 | if (
178 | len(coordinates) < 4
179 | ): # Need at least 3 unique points + closing point for a polygon
180 | warnings.warn("Not enough coordinates to regularize. Returning original.")
181 | return coordinates, 0.0
182 |
183 | # Remove duplicate closing point for processing, if present
184 | if np.allclose(coordinates[0], coordinates[-1]):
185 | processing_coords = coordinates[:-1]
186 | else:
187 | processing_coords = coordinates # Assume it wasn't closed
188 |
189 | if len(processing_coords) < 3:
190 | warnings.warn(
191 | "Not enough unique coordinates to regularize. Returning original."
192 | )
193 | return coordinates, 0.0 # Return original closed coords
194 |
195 | # Analyze edges to find properties and main direction
196 | # Use the non-closed version for edge analysis
197 | edge_data = analyze_edges(processing_coords)
198 |
199 | # Orient edges based on main direction
200 | oriented_edges, edge_orientations = orient_edges(
201 | processing_coords,
202 | edge_data,
203 | allow_45_degree=allow_45_degree,
204 | diagonal_threshold_reduction=diagonal_threshold_reduction,
205 | )
206 |
207 | # Connect and regularize edges
208 | # This returns a list of np.ndarray points
209 | initial_regularized_points = connect_regularized_edges(
210 | oriented_edges, edge_orientations, parallel_threshold
211 | )
212 |
213 | if not initial_regularized_points or len(initial_regularized_points) < 3:
214 | warnings.warn("Regularization resulted in too few points. Returning original.")
215 | # Returning original for safety:
216 | return coordinates, 0.0
217 |
218 | final_regularized_points_list = enforce_angles_post_process(
219 | points=initial_regularized_points,
220 | main_direction=edge_data["main_direction"],
221 | allow_45_degree=allow_45_degree,
222 | angle_tolerance=angle_enforcement_tolerance,
223 | )
224 |
225 | if not final_regularized_points_list or len(final_regularized_points_list) < 3:
226 | warnings.warn(
227 | "Angle enforcement resulted in too few points. Returning original."
228 | )
229 | return coordinates, 0.0
230 |
231 | # Convert list of arrays back to a single numpy array and ensure closure
232 | final_coords_array = np.array([p for p in final_regularized_points_list])
233 | # Ensure the final array is explicitly closed for Shapely
234 | closed_final_coords = np.vstack([final_coords_array, final_coords_array[0]])
235 |
236 | return closed_final_coords, edge_data["main_direction"]
237 |
238 |
239 | def analyze_edges(
240 | coordinates: np.ndarray, coarse_bin_size: int = 5, fine_bin_size: int = 1
241 | ) -> dict[str, Any]:
242 | """
243 | Analyze edges to determine azimuth angles and main structural direction.
244 |
245 | Parameters:
246 | -----------
247 | coordinates : np.ndarray
248 | Polygon coordinates (shape: N x 2), assumed NOT closed.
249 | coarse_bin_size : int
250 | Size of the coarse bin for histogram analysis (degrees).
251 | Default is 5 degrees.
252 | fine_bin_size : int
253 | Size of the fine bin for histogram analysis (degrees).
254 | Default is 1 degree.
255 |
256 | Returns:
257 | --------
258 | dict
259 | Dictionary containing:
260 | - azimuth_angles: array of absolute edge angles (degrees)
261 | - edge_indices: array of [start_idx, end_idx] pairs for each edge
262 | - main_direction: float angle (degrees) representing dominant structure orientation
263 | """
264 | if len(coordinates) < 3:
265 | return {
266 | "azimuth_angles": np.array([]),
267 | "edge_indices": np.array([]),
268 | "main_direction": 0,
269 | }
270 |
271 | def create_weighted_histogram(
272 | angles: np.ndarray,
273 | bin_size: float,
274 | weights: np.ndarray,
275 | num_bins_override=None,
276 | smooth: bool = True,
277 | ) -> np.ndarray:
278 | num_bins = (
279 | int(90 / bin_size) if num_bins_override is None else num_bins_override
280 | )
281 | indices = np.minimum(np.floor(angles / bin_size).astype(int), num_bins - 1)
282 | bins = np.bincount(indices, weights=weights, minlength=num_bins)
283 | if smooth:
284 | bins = smooth_histogram(bins)
285 | return bins
286 |
287 | def smooth_histogram(hist: np.ndarray) -> np.ndarray:
288 | smoothed = hist.copy()
289 |
290 | # Smooth internal bins
291 | for i in range(1, len(hist) - 1):
292 | smoothed[i] = (2 * hist[i] + hist[i - 1] + hist[i + 1]) / 4
293 |
294 | # Smooth edges differently
295 | smoothed[0] = (2 * hist[0] + hist[1]) / 3
296 | smoothed[-1] = (2 * hist[-1] + hist[-2]) / 3
297 |
298 | return smoothed
299 |
300 | def find_best_symmetric_bin(hist: np.ndarray) -> int:
301 | """
302 | Find the dominant bin in a histogram, considering symmetry.
303 |
304 | This averages the histogram with its mirror (reverse),
305 | then selects the top two candidates and picks the one with
306 | the largest value in the original histogram.
307 |
308 | Parameters:
309 | -----------
310 | hist : np.ndarray
311 | Histogram of bin weights.
312 |
313 | Returns:
314 | --------
315 | int
316 | Index of the selected dominant bin.
317 | """
318 | mirrored_mean = (hist + hist[::-1]) / 2
319 | sorted_indices = np.argsort(mirrored_mean)
320 | top_two = sorted_indices[-2:]
321 |
322 | a, b = top_two
323 | return a if hist[a] > hist[b] else b
324 |
325 | # Form edges and compute vectors
326 | start_points = coordinates
327 | end_points = np.roll(coordinates, -1, axis=0)
328 | vectors = end_points - start_points
329 | edge_lengths = np.linalg.norm(vectors, axis=1)
330 |
331 | # Filter out very short edges
332 | valid = edge_lengths > 1e-9
333 | if not np.any(valid):
334 | return {
335 | "azimuth_angles": np.array([]),
336 | "edge_indices": np.array([]),
337 | "main_direction": 0,
338 | }
339 |
340 | vectors = vectors[valid]
341 | lengths = edge_lengths[valid]
342 | azimuth_angles = (np.degrees(np.arctan2(vectors[:, 1], vectors[:, 0])) + 360) % 360
343 | normalized_angles = azimuth_angles % 180
344 | orthogonal_angles = normalized_angles % 90
345 |
346 | indices = np.stack(
347 | [
348 | np.arange(len(coordinates)),
349 | (np.arange(len(coordinates)) + 1) % len(coordinates),
350 | ],
351 | axis=1,
352 | )
353 | edge_indices = indices[valid]
354 |
355 | coarse_bins = create_weighted_histogram(orthogonal_angles, coarse_bin_size, lengths)
356 | fine_bins = create_weighted_histogram(
357 | orthogonal_angles, fine_bin_size, lengths, num_bins_override=90
358 | )
359 |
360 | if np.sum(coarse_bins) == 0:
361 | refined_angle = 0
362 | else:
363 | # Step 1: Coarse dominant bin
364 | main_bin = find_best_symmetric_bin(coarse_bins)
365 | fine_start = main_bin * coarse_bin_size
366 | fine_end = fine_start + coarse_bin_size
367 |
368 | # Step 2: Refine with fine bin
369 | refined_bin = find_best_symmetric_bin(fine_bins[fine_start:fine_end])
370 |
371 | # This will be the center of the refined bin
372 | refined_angle_center = fine_start + refined_bin + fine_bin_size / 2
373 |
374 | # Round the angle up or down based on the bin's neighbors
375 | if refined_bin == 0:
376 | refined_angle = math.floor(refined_angle_center)
377 | elif refined_bin == (fine_end - fine_start - 1):
378 | refined_angle = math.ceil(refined_angle_center)
379 | else:
380 | left = fine_bins[fine_start + refined_bin - 1]
381 | right = fine_bins[fine_start + refined_bin + 1]
382 | if right > left:
383 | refined_angle = math.ceil(refined_angle_center)
384 | else:
385 | refined_angle = math.floor(refined_angle_center)
386 |
387 | return {
388 | "azimuth_angles": azimuth_angles,
389 | "edge_indices": edge_indices,
390 | "main_direction": refined_angle,
391 | }
392 |
393 |
394 | def get_orientation_and_rotation(
395 | diff_angle: float,
396 | main_direction: float,
397 | azimuth: float,
398 | allow_45_degree: bool,
399 | diagonal_threshold_reduction: float,
400 | tolerance: float = 1e-9,
401 | ) -> Tuple[int, float]:
402 | target_offset = 0.0 # The desired angle relative to main_direction (0, 45, 90 etc.)
403 | orientation_code = 0
404 |
405 | if allow_45_degree:
406 | # Calculate how close we are to each of the key orientations
407 | mod180 = diff_angle % 180
408 |
409 | dist_to_0 = min(abs(mod180), abs((mod180) - 180))
410 | dist_to_90 = min(abs((mod180) - 90), abs((mod180) - 90))
411 | dist_to_45 = min(abs((mod180) - 45), abs((mod180) - 135))
412 |
413 | # Apply down-weighting to 45-degree angles
414 | # This effectively shrinks the zone where angles snap to 45 degrees
415 | if dist_to_45 <= (22.5 - diagonal_threshold_reduction):
416 | # Close enough to 45/135/225/315 degrees (accounting for down-weighting)
417 | angle_mod = diff_angle % 90
418 | if angle_mod < 45:
419 | target_offset = (diff_angle // 90) * 90 + 45
420 | else:
421 | target_offset = (diff_angle // 90 + 1) * 90 - 45
422 |
423 | # Determine which diagonal direction we're closer to
424 | # Use modulo 180 to differentiate between 45/225 and 135/315
425 | normalized_angle = (main_direction + target_offset) % 180
426 | if 0 <= normalized_angle < 90:
427 | # This is closer to 45 degrees
428 | orientation_code = 2 # 45/225 degrees
429 | else:
430 | # This is closer to 135 degrees
431 | orientation_code = 3 # 135/315 degrees
432 | elif dist_to_0 <= dist_to_90:
433 | # Closer to 0/180 degrees
434 | target_offset = round(diff_angle / 180.0) * 180.0
435 | orientation_code = 0
436 | else:
437 | # Closer to 90/270 degrees
438 | target_offset = round(diff_angle / 90.0) * 90.0
439 | if abs(target_offset % 180) < tolerance:
440 | # If rounding diff_angle/90 gave 0 or 180, force to 90 or -90
441 | target_offset = 90.0 if diff_angle > 0 else -90.0
442 | orientation_code = 1
443 |
444 | else: # Original logic (refined): Snap only to nearest 90 degrees
445 | if abs(diff_angle) < 45.0: # Closer to parallel/anti-parallel (0 or 180)
446 | # Snap to 0 or 180, whichever is closer
447 | target_offset = round(diff_angle / 180.0) * 180.0
448 | orientation_code = 0
449 | else: # Closer to perpendicular (+90 or -90/270)
450 | # Snap to +90 or -90, whichever is closer
451 | target_offset = round(diff_angle / 90.0) * 90.0
452 | # Ensure it's not actually 0 or 180 (should be handled above, but safety check)
453 | if abs(target_offset % 180) < tolerance:
454 | # If rounding diff_angle/90 gave 0 or 180, force to 90 or -90
455 | target_offset = 90.0 if diff_angle > 0 else -90.0
456 | orientation_code = 1
457 | rotation_angle = (main_direction + target_offset - azimuth + 180) % 360 - 180
458 | return orientation_code, rotation_angle
459 |
460 |
461 | def orient_edges(
462 | simplified_coordinates: np.ndarray,
463 | edge_data: dict,
464 | allow_45_degree: bool,
465 | diagonal_threshold_reduction: float,
466 | ) -> Tuple[np.ndarray, List[int]]:
467 | """
468 | Orient edges to be parallel or perpendicular (or optionally 45 degrees)
469 | to the main direction determined by angle distribution analysis.
470 |
471 | Parameters:
472 | -----------
473 | simplified_coordinates : numpy.ndarray
474 | Simplified polygon coordinates (shape: n x 2, assumed closed).
475 | edge_data : dict
476 | Dictionary containing edge analysis data ('azimuth_angles', 'edge_indices',
477 | 'main_direction').
478 | allow_45_degree : bool, optional
479 | If True, allows edges to be oriented at 45-degree angles relative
480 | to the main direction.
481 | diagonal_threshold_reduction : float, optional
482 | Angle in degrees to subtract from the 45-degree snapping thresholds,
483 | making diagonal (45°) orientations less likely.
484 |
485 | Returns:
486 | --------
487 | tuple
488 | Tuple containing:
489 | - oriented_edges (numpy.ndarray): Array of [start, end] points for each oriented edge.
490 | - edge_orientations (list): List of orientation codes for each edge.
491 | - 0: Parallel or anti-parallel (0, 180 deg relative to main_direction)
492 | - 1: Perpendicular (90, 270 deg relative to main_direction)
493 | - 2: Diagonal (45, 135, 225, 315 deg relative to main_direction) - only if allow_45=True.
494 | """
495 |
496 | # edge_data =
497 | oriented_edges = []
498 | # Orientation codes: 0=Parallel/AntiParallel, 1=Perpendicular, 2=Diagonal(45deg)
499 | edge_orientations = []
500 |
501 | azimuth_angles = edge_data["azimuth_angles"]
502 | edge_indices = edge_data["edge_indices"]
503 | main_direction = edge_data["main_direction"]
504 |
505 | for i, (azimuth, (start_idx, end_idx)) in enumerate(
506 | zip(azimuth_angles, edge_indices)
507 | ):
508 | # Calculate the shortest angle difference from edge azimuth to main_direction
509 | # Result is in the range [-180, 180]
510 | diff_angle = (azimuth - main_direction + 180) % 360 - 180
511 |
512 | orientation_code, rotation_angle = get_orientation_and_rotation(
513 | diff_angle=diff_angle,
514 | main_direction=main_direction,
515 | azimuth=azimuth,
516 | allow_45_degree=allow_45_degree,
517 | diagonal_threshold_reduction=diagonal_threshold_reduction,
518 | )
519 |
520 | # Perform rotation
521 | start_point = np.array(simplified_coordinates[start_idx], dtype=float)
522 | end_point = np.array(simplified_coordinates[end_idx], dtype=float)
523 |
524 | # Rotate the edge to align with the target orientation
525 | rotated_edge = rotate_edge(start_point, end_point, rotation_angle)
526 |
527 | oriented_edges.append(rotated_edge)
528 | edge_orientations.append(orientation_code)
529 |
530 | return np.array(oriented_edges, dtype=float), edge_orientations
531 |
532 |
533 | def connect_regularized_edges(
534 | oriented_edges: np.ndarray, edge_orientations: list, parallel_threshold: float
535 | ) -> List[np.ndarray]:
536 | """
537 | Connect oriented edges to form a regularized polygon
538 |
539 | Parameters:
540 | -----------
541 | oriented_edges : numpy.ndarray
542 | Array of oriented edges
543 | edge_orientations : list
544 | List of edge orientations (0=parallel, 1=perpendicular)
545 | parallel_threshold : float
546 | Distance threshold for considering parallel lines as needing connection
547 |
548 | Returns:
549 | --------
550 | list
551 | List of regularized points forming the polygon
552 | """
553 | regularized_points = []
554 |
555 | # Process all edges including the connection between last and first edge
556 | for i in range(len(oriented_edges)):
557 | current_index = i
558 | next_index = (i + 1) % len(oriented_edges) # Wrap around to first edge
559 |
560 | current_edge_start = oriented_edges[current_index][0]
561 | current_edge_end = oriented_edges[current_index][1]
562 | next_edge_start = oriented_edges[next_index][0]
563 | next_edge_end = oriented_edges[next_index][1]
564 |
565 | current_orientation = edge_orientations[current_index]
566 | next_orientation = edge_orientations[next_index]
567 |
568 | if current_orientation != next_orientation:
569 | # Handle perpendicular edges
570 | regularized_points.append(
571 | handle_perpendicular_edges(
572 | current_edge_start, current_edge_end, next_edge_start, next_edge_end
573 | )
574 | )
575 | else:
576 | # Handle parallel edges
577 | new_points = handle_parallel_edges(
578 | current_edge_start,
579 | current_edge_end,
580 | next_edge_start,
581 | next_edge_end,
582 | parallel_threshold,
583 | next_index,
584 | oriented_edges,
585 | )
586 | regularized_points.extend(new_points)
587 |
588 | return regularized_points
589 |
590 |
591 | def handle_perpendicular_edges(
592 | current_edge_start: np.ndarray,
593 | current_edge_end: np.ndarray,
594 | next_edge_start: np.ndarray,
595 | next_edge_end: np.ndarray,
596 | ) -> np.ndarray:
597 | """
598 | Handle intersection of perpendicular edges
599 |
600 | Parameters:
601 | -----------
602 | current_edge_start : numpy.ndarray
603 | Start point of current edge
604 | current_edge_end : numpy.ndarray
605 | End point of current edge
606 | next_edge_start : numpy.ndarray
607 | Start point of next edge
608 | next_edge_end : numpy.ndarray
609 | End point of next edge
610 |
611 | Returns:
612 | --------
613 | numpy.ndarray
614 | Intersection point of the two edges
615 | """
616 | line1 = create_line_equation(current_edge_start, current_edge_end)
617 | line2 = create_line_equation(next_edge_start, next_edge_end)
618 |
619 | intersection_point = calculate_line_intersection(line1, line2)
620 | if intersection_point:
621 | # Convert to numpy array if not already
622 | return np.array(intersection_point)
623 | else:
624 | # If lines are parallel (shouldn't happen with perpendicular check)
625 | # add the end point of current edge
626 | return current_edge_end
627 |
628 |
629 | def handle_parallel_edges(
630 | current_edge_start: np.ndarray,
631 | current_edge_end: np.ndarray,
632 | next_edge_start: np.ndarray,
633 | next_edge_end: np.ndarray,
634 | parallel_threshold: float,
635 | next_index: int,
636 | oriented_edges: np.ndarray,
637 | ) -> List[np.ndarray]:
638 | """
639 | Handle connection between parallel edges
640 |
641 | Parameters:
642 | -----------
643 | current_edge_start : numpy.ndarray
644 | Start point of current edge
645 | current_edge_end : numpy.ndarray
646 | End point of current edge
647 | next_edge_start : numpy.ndarray
648 | Start point of next edge
649 | next_edge_end : numpy.ndarray
650 | End point of next edge
651 | parallel_threshold : float
652 | Distance threshold for considering parallel lines as needing connection
653 | next_index : int
654 | Index of the next edge
655 | oriented_edges : numpy.ndarray
656 | Array of all oriented edges
657 |
658 | Returns:
659 | --------
660 | list
661 | List of points to add to the regularized polygon
662 | """
663 | line1 = create_line_equation(current_edge_start, current_edge_end)
664 | line2 = create_line_equation(next_edge_start, next_edge_end)
665 | line_distance = calculate_parallel_line_distance(line1, line2)
666 |
667 | new_points = []
668 |
669 | if line_distance < parallel_threshold:
670 | # Shift next edge to align with current edge
671 | projected_point = project_point_to_line(
672 | next_edge_start[0],
673 | next_edge_start[1],
674 | current_edge_start[0],
675 | current_edge_start[1],
676 | current_edge_end[0],
677 | current_edge_end[1],
678 | )
679 | # Ensure projected_point is a numpy array
680 | new_points.append(np.array(projected_point))
681 |
682 | # Update next edge starting point
683 | oriented_edges[next_index][0] = np.array(projected_point)
684 | oriented_edges[next_index][1] = np.array(
685 | project_point_to_line(
686 | next_edge_end[0],
687 | next_edge_end[1],
688 | current_edge_start[0],
689 | current_edge_start[1],
690 | current_edge_end[0],
691 | current_edge_end[1],
692 | )
693 | )
694 | else:
695 | # Add connecting segment between edges
696 | midpoint = (current_edge_end + next_edge_start) / 2
697 | connecting_point1 = project_point_to_line(
698 | midpoint[0],
699 | midpoint[1],
700 | current_edge_start[0],
701 | current_edge_start[1],
702 | current_edge_end[0],
703 | current_edge_end[1],
704 | )
705 | connecting_point2 = project_point_to_line(
706 | midpoint[0],
707 | midpoint[1],
708 | next_edge_start[0],
709 | next_edge_start[1],
710 | next_edge_end[0],
711 | next_edge_end[1],
712 | )
713 | # Convert points to numpy arrays
714 | new_points.append(np.array(connecting_point1))
715 | new_points.append(np.array(connecting_point2))
716 |
717 | return new_points
718 |
719 |
720 | def preprocess_polygon(
721 | polygon: Polygon,
722 | simplify: bool,
723 | simplify_tolerance: float,
724 | ) -> Polygon:
725 | # Apply initial simplification if requested
726 | if simplify:
727 | simplified = polygon.simplify(
728 | tolerance=simplify_tolerance, preserve_topology=True
729 | )
730 | # Remove geometries that might become invalid after simplification
731 | if polygon.is_empty:
732 | return polygon
733 | if isinstance(simplified, Polygon):
734 | polygon = simplified
735 | else:
736 | return polygon
737 |
738 | polygon = polygon.segmentize(max_segment_length=simplify_tolerance * 5)
739 |
740 | return polygon
741 |
742 |
743 | def regularize_single_polygon(
744 | polygon: Polygon,
745 | parallel_threshold: float,
746 | allow_45_degree: bool,
747 | diagonal_threshold_reduction: float,
748 | allow_circles: bool,
749 | circle_threshold: float,
750 | include_metadata: bool,
751 | simplify: bool,
752 | simplify_tolerance: float,
753 | ) -> dict[str, Any]:
754 | """
755 | Regularize a Shapely polygon by aligning edges to principal directions
756 |
757 | Parameters:
758 | -----------
759 | polygon : shapely.geometry.Polygon
760 | Input polygon to regularize
761 | parallel_threshold : float
762 | Distance threshold for parallel line handling
763 | allow_45_degree : bool
764 | If True, allows 45-degree orientations relative to the main direction
765 | diagonal_threshold_reduction : float
766 | Reduction factor in degrees to reduce the likelihood of diagonal
767 | edges being created
768 | allow_circles : bool
769 | If True, attempts to detect polygons that are nearly circular and
770 | replaces them with perfect circles
771 | circle_threshold : float
772 | Intersection over Union (IoU) threshold used for circle detection
773 | Value between 0 and 1.
774 | include_metadata : bool
775 | If True, includes metadata about the regularization process
776 | in the output.
777 |
778 | Returns:
779 | --------
780 | shapely.geometry.Polygon
781 | Regularized polygon
782 | """
783 | if not isinstance(polygon, Polygon):
784 | # Return unmodified if not a polygon
785 | warnings.warn(
786 | f"Unsupported geometry type: {type(polygon)}. Returning original."
787 | )
788 | return {"geometry": polygon, "iou": 0, "main_direction": 0}
789 | polygon = preprocess_polygon(
790 | polygon,
791 | simplify=simplify,
792 | simplify_tolerance=simplify_tolerance,
793 | ).buffer(0)
794 |
795 | exterior_coordinates = np.array(polygon.exterior.coords)
796 | # append the first point to the end to close the polygon
797 |
798 | regularized_exterior, main_direction = regularize_coordinate_array(
799 | coordinates=exterior_coordinates,
800 | parallel_threshold=parallel_threshold,
801 | allow_45_degree=allow_45_degree,
802 | diagonal_threshold_reduction=diagonal_threshold_reduction,
803 | )
804 |
805 | if allow_circles:
806 | radius = np.sqrt(polygon.area / np.pi)
807 | perfect_circle = polygon.centroid.buffer(radius, quad_segs=42)
808 | # Check if the polygon is close to a circle using iou
809 | iou = (
810 | perfect_circle.intersection(polygon).area
811 | / perfect_circle.union(polygon).area
812 | )
813 | if iou > circle_threshold:
814 | # If the polygon is close to a circle, return the perfect circle
815 | regularized_exterior = np.array(perfect_circle.exterior.coords, dtype=float)
816 |
817 | # Handle interior rings (holes)
818 | regularized_interiors: List[np.ndarray] = []
819 | for interior in polygon.interiors:
820 | interior_coordinates = np.array(interior.coords)
821 | regularized_interior, _ = regularize_coordinate_array(
822 | coordinates=interior_coordinates,
823 | parallel_threshold=parallel_threshold,
824 | allow_45_degree=allow_45_degree,
825 | diagonal_threshold_reduction=diagonal_threshold_reduction,
826 | )
827 | regularized_interiors.append(regularized_interior)
828 |
829 | # Create new polygon
830 | try:
831 | # Convert coordinates to LinearRings
832 | exterior_ring = LinearRing(regularized_exterior)
833 | interior_rings = [LinearRing(r) for r in regularized_interiors]
834 |
835 | # Create regularized polygon
836 | regularized_polygon = Polygon(exterior_ring, interior_rings).buffer(0)
837 | if include_metadata:
838 | final_iou = (
839 | regularized_polygon.intersection(polygon).area
840 | / regularized_polygon.union(polygon).area
841 | )
842 | else:
843 | final_iou = 0
844 |
845 | return {
846 | "geometry": regularized_polygon,
847 | "iou": final_iou,
848 | "main_direction": main_direction,
849 | }
850 | except Exception as e:
851 | # If there's an error creating the polygon, return the original
852 | warnings.warn(f"Error creating regularized polygon: {e}. Returning original.")
853 | return {"geometry": polygon, "iou": 0, "main_direction": 0}
854 |
--------------------------------------------------------------------------------
/examples/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DPIRD-DMA/Building-Regulariser/6583bdd1b91ebe4ec0b2c4d44203e8557f2baeff/examples/1.png
--------------------------------------------------------------------------------
/examples/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DPIRD-DMA/Building-Regulariser/6583bdd1b91ebe4ec0b2c4d44203e8557f2baeff/examples/2.png
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=61.0", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "buildingregulariser"
7 | dynamic = ["version"]
8 | description = "A Python library for regularizing building footprints in geospatial data. This library helps clean up and standardize building polygon geometries by aligning edges to principal directions."
9 | readme = "README.md"
10 | requires-python = ">=3.9"
11 | dependencies = [
12 | "geopandas>=1.0.0",
13 | "numpy>=2.0.0",
14 | ]
15 | [tool.setuptools.dynamic]
16 | version = {attr = "buildingregulariser.__version__.__version__"}
17 |
18 |
19 | [dependency-groups]
20 | dev = [
21 | "folium>=0.19.5",
22 | "ipykernel>=6.29.5",
23 | "mapclassify>=2.8.1",
24 | "matplotlib>=3.9.4",
25 | "pytest>=8.3.5",
26 | "rasterio>=1.4.3",
27 | ]
28 | [[tool.uv.index]]
29 | name = "testpypi"
30 | url = "https://test.pypi.org/simple/"
31 | publish-url = "https://test.pypi.org/legacy/"
32 | explicit = true
33 |
--------------------------------------------------------------------------------
/test data/input/test_data.gpkg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DPIRD-DMA/Building-Regulariser/6583bdd1b91ebe4ec0b2c4d44203e8557f2baeff/test data/input/test_data.gpkg
--------------------------------------------------------------------------------
/test data/input/test_data_multi_geom.gpkg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DPIRD-DMA/Building-Regulariser/6583bdd1b91ebe4ec0b2c4d44203e8557f2baeff/test data/input/test_data_multi_geom.gpkg
--------------------------------------------------------------------------------
/tests/test_end_to_end.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import Any
3 |
4 | import geopandas as gpd
5 | import pytest
6 | from shapely.geometry.base import BaseGeometry
7 |
8 | from buildingregulariser import regularize_geodataframe
9 |
10 | cwd = Path(__file__).parent
11 | OUTPUT_FILE = cwd.parent / "test data/output/test output.gpkg"
12 | if OUTPUT_FILE.exists():
13 | OUTPUT_FILE.unlink()
14 | INPUT_FILE = cwd.parent / "test data/input/test_data.gpkg"
15 | DEFAULT_PARAMS = dict(
16 | parallel_threshold=1.0,
17 | simplify=True,
18 | simplify_tolerance=0.5,
19 | allow_45_degree=True,
20 | diagonal_threshold_reduction=15,
21 | allow_circles=True,
22 | circle_threshold=0.9,
23 | num_cores=1,
24 | include_metadata=False,
25 | neighbor_alignment=False,
26 | neighbor_search_distance=100,
27 | neighbor_max_rotation=10,
28 | )
29 |
30 | assert INPUT_FILE.exists(), f"Test file missing: {INPUT_FILE}"
31 | input_gdf = gpd.read_file(INPUT_FILE)
32 |
33 |
34 | # --- Geometry Quality Checks ---
35 | def iou(poly1: BaseGeometry, poly2: BaseGeometry) -> float:
36 | inter = poly1.intersection(poly2).area
37 | union = poly1.union(poly2).area
38 | return inter / union if union != 0 else 0
39 |
40 |
41 | def check_geometry_quality(inputs, outputs, iou_threshold=0.4, perimeter_tolerance=0.6):
42 | assert not outputs.empty
43 | assert outputs.geometry.is_valid.all()
44 | assert outputs.geometry.notnull().all()
45 | assert len(inputs) == len(
46 | outputs
47 | ), f"Row count mismatch: {len(inputs)} != {len(outputs)}"
48 |
49 | for idx, (i_geom, o_geom) in enumerate(zip(inputs.geometry, outputs.geometry)):
50 | assert i_geom.intersects(o_geom), f"No intersection for feature {idx}"
51 |
52 | overlap_iou = iou(i_geom, o_geom)
53 | assert (
54 | overlap_iou >= iou_threshold
55 | ), f"Low IoU for feature {idx}: {overlap_iou:.2f}"
56 |
57 | in_perim = i_geom.length
58 | out_perim = o_geom.length
59 | min_perim = in_perim * (1 - perimeter_tolerance)
60 | max_perim = in_perim * (1 + perimeter_tolerance)
61 |
62 | assert (
63 | min_perim <= out_perim <= max_perim
64 | ), f"Perimeter out of range for feature {idx}: {out_perim:.2f} (expected {min_perim:.2f}–{max_perim:.2f})"
65 |
66 |
67 | # --- Parametrized Tests ---
68 | @pytest.mark.parametrize(
69 | "param,values",
70 | [
71 | ("parallel_threshold", [0.5, 2.0, 5.0]),
72 | ("simplify", [True, False]),
73 | ("simplify_tolerance", [0.3, 1.0, 3.0]),
74 | ("allow_45_degree", [True, False]),
75 | ("diagonal_threshold_reduction", [0, 22.5, 45]),
76 | ("allow_circles", [True, False]),
77 | ("circle_threshold", [0.5, 0.75, 0.99]),
78 | ("num_cores", [0, 1, 4]),
79 | ("include_metadata", [False, True]),
80 | ("neighbor_alignment", [False, True]),
81 | ("neighbor_search_distance", [0, 100, 350]),
82 | ("neighbor_max_rotation", [0, 22.5, 45]),
83 | ],
84 | )
85 | def test_regularize_param_variants(param, values):
86 | for val in values:
87 | config: dict[str, Any] = DEFAULT_PARAMS.copy()
88 | if param in {
89 | "simplify",
90 | "allow_45_degree",
91 | "allow_circles",
92 | "include_metadata",
93 | "neighbor_alignment",
94 | }:
95 | config[param] = bool(val)
96 | elif param in {"num_cores"}:
97 | config[param] = int(val)
98 | else:
99 | config[param] = val
100 | result = regularize_geodataframe(geodataframe=input_gdf.copy(), **config)
101 | layer_name = f"{param}_{str(val).replace('.', '_')}"
102 | result.to_file(OUTPUT_FILE, layer=layer_name, driver="GPKG")
103 | print(f"Saved layer '{layer_name}' to {OUTPUT_FILE}")
104 | check_geometry_quality(input_gdf, result)
105 |
--------------------------------------------------------------------------------