├── tests ├── __init__.py ├── data │ └── test_delete_metadata.xml ├── test_relation.py ├── test_way.py ├── test_node.py ├── conftest.py ├── test_augmenteddiff_continuous.py ├── test_api.py ├── test_adiff.py ├── test_osm.py ├── test_osmchange.py └── test_augmenteddiff.py ├── .python-version ├── src ├── __init__.py └── osmdiff │ ├── osm │ ├── __init__.py │ └── osm.py │ ├── settings.py │ ├── __init__.py │ ├── config.py │ ├── osmchange.py │ └── augmenteddiff.py ├── .gitignore ├── .vscode └── settings.json ├── docs ├── api │ ├── utils.md │ ├── osm.md │ ├── config.md │ ├── augmenteddiff.md │ ├── osmchange.md │ └── continuous.md ├── index.md ├── examples │ └── index.md └── getting-started.md ├── LICENSE ├── examples ├── continuous.py ├── try.py ├── api │ └── app.py └── geo_interface.py ├── mkdocs.yml ├── pyproject.toml ├── CHANGELOG.md └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.12.9 2 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | # This marks the src directory as a Python package 2 | -------------------------------------------------------------------------------- /src/osmdiff/osm/__init__.py: -------------------------------------------------------------------------------- 1 | from .osm import Node, OSMObject, Relation, Way 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | **/__pycache__/ 3 | **/*.egg-info 4 | .pytest_cache 5 | .venv/ 6 | build/ 7 | dist/ 8 | .pdm-python 9 | .aider* 10 | site/ 11 | .vscode/ 12 | .coverage 13 | htmlcov/ -------------------------------------------------------------------------------- /src/osmdiff/settings.py: -------------------------------------------------------------------------------- 1 | DEFAULT_OVERPASS_URL = "http://overpass-api.de/api" # URL for Overpass API 2 | DEFAULT_REPLICATION_URL = ( 3 | "https://planet.openstreetmap.org/replication" # URL for OSM replication API 4 | ) 5 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.testing.pytestArgs": [ 3 | "tests" 4 | ], 5 | "python.testing.unittestEnabled": false, 6 | "python.testing.pytestEnabled": true, 7 | "python.testing.pytestPath": "pytest" 8 | } -------------------------------------------------------------------------------- /tests/data/test_delete_metadata.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /docs/api/utils.md: -------------------------------------------------------------------------------- 1 | # Utilities 2 | 3 | ## Settings 4 | 5 | ::: osmdiff.settings.DEFAULT_OVERPASS_URL 6 | options: 7 | heading_level: 2 8 | show_source: true 9 | 10 | ::: osmdiff.settings.DEFAULT_REPLICATION_URL 11 | options: 12 | heading_level: 2 13 | show_source: true 14 | 15 | ## Version Information 16 | 17 | ::: osmdiff.__version__ 18 | options: 19 | heading_level: 2 20 | show_source: true 21 | -------------------------------------------------------------------------------- /src/osmdiff/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | osmdiff is a Python library for working with OpenStreetMap changesets and diffs. 3 | 4 | It provides classes for working with OpenStreetMap changesets and diffs, and 5 | includes a parser for the OpenStreetMap changeset API. 6 | """ 7 | 8 | from .augmenteddiff import AugmentedDiff, ContinuousAugmentedDiff 9 | from .osm import Node, Relation, Way 10 | from .osmchange import OSMChange 11 | 12 | __version__ = "0.4.6" 13 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # OSMDiff Documentation 2 | 3 | OSMDiff provides Python tools for working with OpenStreetMap change data: 4 | 5 | - Track real-time map edits 6 | - Process historical changes 7 | - Monitor specific geographic areas 8 | - Analyze contributor patterns 9 | 10 | ## Quick Links 11 | 12 | - [Getting Started](/getting-started) - First steps with OSMDiff 13 | - [API Reference](/api/augmenteddiff) - Detailed documentation 14 | - [Examples](/examples) - More advanced usage patterns 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2017-2022 Martijn van Exel 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /docs/api/osm.md: -------------------------------------------------------------------------------- 1 | # OSM Data Structures 2 | 3 | This module provides classes for working with OpenStreetMap (OSM) data elements. 4 | 5 | ## Base Class 6 | 7 | ::: osmdiff.osm.OSMObject 8 | options: 9 | show_root_heading: true 10 | show_source: true 11 | 12 | ## OSM Elements 13 | 14 | ### Node 15 | 16 | ::: osmdiff.osm.Node 17 | options: 18 | show_root_heading: true 19 | show_source: true 20 | 21 | ### Way 22 | 23 | ::: osmdiff.osm.Way 24 | options: 25 | show_root_heading: true 26 | show_source: true 27 | 28 | ### Relation 29 | 30 | ::: osmdiff.osm.Relation 31 | options: 32 | show_root_heading: true 33 | show_source: true 34 | 35 | -------------------------------------------------------------------------------- /examples/continuous.py: -------------------------------------------------------------------------------- 1 | from osmdiff import ContinuousAugmentedDiff 2 | 3 | # Create continuous fetcher for London area 4 | fetcher = ContinuousAugmentedDiff( 5 | minlon=-0.489, 6 | minlat=51.28, 7 | maxlon=0.236, 8 | maxlat=51.686, 9 | min_interval=30, # Check at least every 30 seconds 10 | max_interval=120, # Back off up to 120 seconds if no changes 11 | ) 12 | 13 | # Process changes as they come in 14 | for diff in fetcher: 15 | print(f"\nNew changes in diff {diff.sequence_number}:") 16 | print(f" Created: {len(diff.create)} objects") 17 | print(f" Modified: {len(diff.modify)} objects") 18 | print(f" Deleted: {len(diff.delete)} objects") 19 | 20 | # Example: Track new amenities 21 | for obj in diff.create: 22 | if "amenity" in obj.tags: 23 | print(f"New amenity: {obj.tags['amenity']}") 24 | -------------------------------------------------------------------------------- /docs/api/config.md: -------------------------------------------------------------------------------- 1 | # Configuration Reference 2 | 3 | ## API Configuration 4 | 5 | ::: osmdiff.config.API_CONFIG 6 | options: 7 | heading_level: 2 8 | show_source: true 9 | 10 | ## AugmentedDiff Defaults 11 | 12 | ::: osmdiff.config.AUGMENTED_DIFF_CONFIG 13 | options: 14 | heading_level: 2 15 | show_source: true 16 | 17 | ## HTTP Settings 18 | 19 | ::: osmdiff.config.DEFAULT_HEADERS 20 | options: 21 | heading_level: 2 22 | show_source: true 23 | 24 | ::: osmdiff.config.USER_AGENT 25 | options: 26 | heading_level: 2 27 | show_source: true 28 | 29 | ## Overriding Configuration 30 | 31 | ```python 32 | from osmdiff import AugmentedDiff 33 | from osmdiff.config import API_CONFIG 34 | 35 | # Modify configuration before use 36 | API_CONFIG["overpass"]["timeout"] = 60 # Increase timeout 37 | 38 | adiff = AugmentedDiff() # Will use updated configuration 39 | ``` 40 | -------------------------------------------------------------------------------- /examples/try.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from osmdiff import AugmentedDiff, OSMChange 4 | from osmdiff.osm import Node, Relation, Way 5 | from pathlib import Path 6 | 7 | osm_change_file = Path("tests", "data", "test_osmchange.xml") 8 | print(osm_change_file) 9 | 10 | # absolute path 11 | osm_change_file = osm_change_file.resolve() 12 | print(osm_change_file) 13 | 14 | 15 | r = OSMChange() 16 | r.get_state() 17 | r.retrieve() 18 | print(r) 19 | 20 | r = OSMChange(file=osm_change_file) 21 | print(r) 22 | 23 | a = AugmentedDiff(file=osm_change_file) 24 | print(a) 25 | 26 | a = AugmentedDiff( 27 | # minlon=-160.0, 28 | # minlat=20.0, 29 | # maxlon=-80.0, 30 | # maxlat=60.0, 31 | ) 32 | a._get_current_id() 33 | a.retrieve() 34 | print(a) 35 | 36 | # n = Node() 37 | # w = Way() 38 | # r = Relation() 39 | 40 | # r = replication.OSMChange(frequency="hour") 41 | 42 | # print(r.sequence_number) 43 | # r.get(r.sequence_number) 44 | 45 | # r = replication.OSMChange(frequency="day") 46 | 47 | # print(r.sequence_number) 48 | # r.get(r.sequence_number) 49 | -------------------------------------------------------------------------------- /tests/test_relation.py: -------------------------------------------------------------------------------- 1 | from osmdiff import Relation 2 | from osmdiff.osm import OSMObject 3 | from typing_extensions import assert_type 4 | 5 | 6 | def test_relation_init(): 7 | relation = Relation() 8 | assert isinstance(relation, Relation) 9 | assert isinstance(relation, OSMObject) 10 | assert isinstance(relation.attribs, dict) 11 | assert isinstance(relation.tags, dict) 12 | assert len(relation.tags) == 0 13 | assert len(relation.attribs) == 0 14 | assert isinstance(relation.members, list) 15 | assert len(relation.members) == 0 16 | 17 | def test_relation_from_xml(): 18 | import xml.etree.ElementTree as ET 19 | xml = '' 20 | elem = ET.fromstring(xml) 21 | relation = Relation.from_xml(elem) 22 | assert isinstance(relation, Relation) 23 | assert relation.attribs["id"] == "1" 24 | assert relation.tags["type"] == "multipolygon" 25 | assert len(relation.members) == 1 26 | 27 | # Optionally, add test for geo interface if implemented 28 | -------------------------------------------------------------------------------- /tests/test_way.py: -------------------------------------------------------------------------------- 1 | from osmdiff import Way 2 | from osmdiff.osm import OSMObject 3 | from typing_extensions import assert_type 4 | 5 | 6 | def test_way_init(): 7 | way = Way() 8 | assert isinstance(way, Way) 9 | assert isinstance(way, OSMObject) 10 | assert isinstance(way.attribs, dict) 11 | assert isinstance(way.tags, dict) 12 | assert len(way.tags) == 0 13 | assert len(way.attribs) == 0 14 | assert isinstance(way.nodes, list) 15 | assert len(way.nodes) == 0 16 | 17 | def test_way_is_closed(): 18 | way = Way() 19 | way.nodes = [1, 2, 1] 20 | assert way.is_closed() is True 21 | way.nodes = [1, 2, 3] 22 | assert way.is_closed() is False 23 | 24 | def test_way_from_xml(): 25 | import xml.etree.ElementTree as ET 26 | xml = '' 27 | elem = ET.fromstring(xml) 28 | way = Way.from_xml(elem) 29 | assert isinstance(way, Way) 30 | assert way.attribs["id"] == "1" 31 | assert way.tags["highway"] == "residential" 32 | 33 | # Optionally, add test for geo interface if implemented 34 | -------------------------------------------------------------------------------- /examples/api/app.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from fastapi.middleware.cors import CORSMiddleware 3 | import uvicorn 4 | from typing import Optional 5 | from osmdiff import AugmentedDiff 6 | 7 | 8 | app = FastAPI() 9 | 10 | origins = [ 11 | "http://localhost:3000", 12 | "http://127.0.0.1:3000", 13 | ] 14 | 15 | app.add_middleware( 16 | CORSMiddleware, 17 | allow_origins=origins, 18 | allow_credentials=True, 19 | allow_methods=["*"], 20 | allow_headers=["*"], 21 | ) 22 | 23 | 24 | @app.get("/") 25 | async def root(): 26 | return {"message": "Hello World"} 27 | 28 | 29 | @app.get("/items/{item_id}") 30 | async def read_item(item_id: int, q: Optional[str] = None): 31 | return {"item_id": item_id, "q": q} 32 | 33 | 34 | @app.get("/adiff/{sequence_number}") 35 | async def get_augmented_diff(sequence_number: int): 36 | adiff = AugmentedDiff(sequence_number=sequence_number) 37 | adiff.retrieve() 38 | return {"create": adiff._create, "modify": adiff._modify, "delete": adiff._delete} 39 | 40 | 41 | if __name__ == "__main__": 42 | uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True) 43 | -------------------------------------------------------------------------------- /tests/test_node.py: -------------------------------------------------------------------------------- 1 | from osmdiff import Node 2 | from osmdiff.osm import OSMObject 3 | from typing_extensions import assert_type 4 | 5 | 6 | def test_node_init(): 7 | node = Node() 8 | assert isinstance(node, Node) 9 | assert isinstance(node, OSMObject) 10 | assert isinstance(node.attribs, dict) 11 | assert isinstance(node.tags, dict) 12 | assert len(node.attribs) == 0 13 | assert len(node.tags) == 0 14 | assert node.lat == 0.0 15 | assert node.lon == 0.0 16 | 17 | def test_node_geo_interface_and_equality(): 18 | node1 = Node(attribs={"lat": 10.0, "lon": 20.0}) 19 | node2 = Node(attribs={"lat": 10.0, "lon": 20.0}) 20 | node3 = Node(attribs={"lat": 10.1, "lon": 20.1}) 21 | gi = node1.__geo_interface__ 22 | assert gi["type"] == "Point" 23 | assert gi["coordinates"] == [node1.lon, node1.lat] 24 | assert node1 == node2 25 | assert node1 != node3 26 | 27 | def test_node_from_xml(): 28 | import xml.etree.ElementTree as ET 29 | xml = '' 30 | elem = ET.fromstring(xml) 31 | node = Node.from_xml(elem) 32 | assert isinstance(node, Node) 33 | assert node.attribs["id"] == "1" 34 | assert node.attribs["lat"] == "10.0" 35 | assert node.tags["name"] == "TestNode" 36 | -------------------------------------------------------------------------------- /examples/geo_interface.py: -------------------------------------------------------------------------------- 1 | from shapely.geometry import shape 2 | from osmdiff.osm import Node, Way, Relation 3 | 4 | # Create a node with coordinates 5 | node = Node() 6 | node.attribs = {"lon": "-122.4", "lat": "37.7"} 7 | 8 | # Convert to Shapely Point using geo_interface 9 | point = shape(node.__geo_interface__) 10 | print(f"Node as Point: {point}") # POINT (-122.4 37.7) 11 | 12 | # Create a way with nodes 13 | way = Way() 14 | way.nodes = [ 15 | Node(attribs={"lon": "-122.4", "lat": "37.7"}), 16 | Node(attribs={"lon": "-122.4", "lat": "37.8"}), 17 | Node(attribs={"lon": "-122.5", "lat": "37.8"}), 18 | Node(attribs={"lon": "-122.4", "lat": "37.7"}), # Closing the loop 19 | ] 20 | 21 | # Convert to Shapely Polygon using geo_interface 22 | polygon = shape(way.__geo_interface__) 23 | print( 24 | f"Way as Polygon: {polygon}" 25 | ) # POLYGON ((-122.4 37.7, -122.4 37.8, -122.5 37.8, -122.4 37.7)) 26 | 27 | # Create a relation with members 28 | relation = Relation() 29 | relation.members = [way, node] 30 | 31 | # Convert to Shapely GeometryCollection using geo_interface 32 | collection = shape(relation.__geo_interface__) 33 | print( 34 | f"Relation as Collection: {collection}" 35 | ) # GEOMETRYCOLLECTION (POLYGON ((-122.4 37.7, -122.4 37.8, -122.5 37.8, -122.4 37.7)), POINT (-122.4 37.7)) 36 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: OSMDiff 2 | theme: 3 | name: material 4 | palette: 5 | - scheme: default 6 | primary: indigo 7 | accent: indigo 8 | toggle: 9 | icon: material/brightness-7 10 | name: Switch to dark mode 11 | - scheme: slate 12 | primary: indigo 13 | accent: indigo 14 | toggle: 15 | icon: material/brightness-4 16 | name: Switch to light mode 17 | 18 | plugins: 19 | - search 20 | - mkdocstrings: 21 | default_handler: python 22 | handlers: 23 | python: 24 | paths: [src] 25 | options: 26 | docstring_style: google 27 | show_source: true 28 | show_root_heading: false 29 | heading_level: 2 30 | show_submodules: true 31 | members_order: alphabetical 32 | merge_init_into_class: true 33 | 34 | nav: 35 | - Home: index.md 36 | - Getting Started: getting-started.md 37 | - Examples: examples/index.md 38 | - API Reference: 39 | - OSM: api/osm.md 40 | - OSMChange: api/osmchange.md 41 | - AugmentedDiff: api/augmenteddiff.md 42 | - ContinuousAugmentedDiff: api/continuous.md 43 | markdown_extensions: 44 | - pymdownx.highlight: 45 | anchor_linenums: true 46 | - toc: 47 | permalink: "#" 48 | - smarty 49 | - sane_lists 50 | -------------------------------------------------------------------------------- /docs/api/augmenteddiff.md: -------------------------------------------------------------------------------- 1 | # AugmentedDiff 2 | 3 | Core class for retrieving and parsing OpenStreetMap augmented diffs. 4 | 5 | For continuous monitoring of changes, see [ContinuousAugmentedDiff](continuous.md). 6 | 7 | ## Features 8 | 9 | - Single diff retrieval 10 | - Bounding box filtering 11 | - Automatic sequence number handling 12 | - Context manager support 13 | 14 | ## Basic Usage 15 | 16 | ```python 17 | from osmdiff import AugmentedDiff 18 | 19 | # Create with bounding box for London 20 | adiff = AugmentedDiff( 21 | minlon=-0.489, 22 | minlat=51.28, 23 | maxlon=0.236, 24 | maxlat=51.686 25 | ) 26 | 27 | # Retrieve and process changes 28 | status = adiff.retrieve() 29 | if status == 200: 30 | print(f"Created: {len(adiff.create)} features") 31 | print(f"Modified: {len(adiff.modify)} features") 32 | print(f"Deleted: {len(adiff.delete)} features") 33 | ``` 34 | 35 | ## API Reference 36 | 37 | ::: osmdiff.augmenteddiff.AugmentedDiff 38 | options: 39 | heading_level: 2 40 | show_source: true 41 | members: 42 | - __init__ 43 | - get_state 44 | - retrieve 45 | - sequence_number 46 | - timestamp 47 | - remarks 48 | - actions 49 | - __repr__ 50 | - __enter__ 51 | - __exit__ 52 | 53 | ## See Also 54 | 55 | - [ContinuousAugmentedDiff](continuous.md) - For continuous monitoring 56 | - [OSMChange](osmchange.md) - For standard OSM changesets 57 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "osmdiff" 3 | dynamic = ["version"] 4 | authors = [{ name = "Martijn van Exel", email = "m@rtijn.org" }] 5 | maintainers = [{ name = "Martijn van Exel", email = "m@rtijn.org" }] 6 | description = "A read-only interface to OpenStreetMap change APIs and files" 7 | keywords = ["openstreetmap", "osm", "diff", "changeset", "api"] 8 | readme = "README.md" 9 | requires-python = ">=3.9" 10 | classifiers = [ 11 | "Programming Language :: Python :: 3", 12 | "Operating System :: OS Independent", 13 | ] 14 | dependencies = ["python-dateutil>=2.9.0.post0", "requests>=2.32.2"] 15 | license = "MIT" 16 | 17 | [project.urls] 18 | "Homepage" = "https://git.sr.ht/~mvexel/osmdiff" 19 | "Bug Tracker" = "https://todo.sr.ht/~mvexel/tracker?search=label%3Aosmdiff" 20 | 21 | [tool.setuptools.dynamic] 22 | version = { attr = "osmdiff.__version__" } 23 | 24 | [tool.pytest.ini_options] 25 | markers = ["integration: mark a test as an integration test"] 26 | addopts = "--cov=src/osmdiff --cov-report=term-missing --cov-report=html --cov-fail-under=85" 27 | 28 | [tool.pyright] 29 | venvPath = "." 30 | venv = ".venv" 31 | 32 | [dependency-groups] 33 | dev = [ 34 | "mkdocs>=1.6.1", 35 | "mkdocs-material>=9.5.50", 36 | "mkdocs-material-extensions>=1.3.1", 37 | "mkdocstrings[python]>=0.26.1", 38 | "pytest>=8.3.4", 39 | "typing-extensions>=4.12.2", 40 | ] 41 | 42 | test = ["pytest>=7.0.0", "pytest-cov>=3.0.0", "requests-mock>=1.9.3"] 43 | 44 | examples = ["fastapi>=0.115.12", "uvicorn>=0.34.2", "shapely>=2.1.1"] 45 | -------------------------------------------------------------------------------- /docs/api/osmchange.md: -------------------------------------------------------------------------------- 1 | # OSMChange 2 | 3 | Core class for retrieving and parsing OpenStreetMap changesets in OSMChange format. 4 | 5 | For working with the Augmented Diff format, we have [AugmentedDiff](augmenteddiff.md) and [ContinuousAugmentedDiff](continuous.md). 6 | 7 | ## Features 8 | 9 | - Retrieves changesets from OSM replication servers 10 | - Parses OSMChange XML format 11 | - Handles create/modify/delete actions 12 | - Supports both remote and local file sources 13 | - Context manager support 14 | - Sequence number management 15 | 16 | ## Basic Usage 17 | 18 | ```python 19 | from osmdiff import OSMChange 20 | 21 | # Create with sequence number 22 | osm_change = OSMChange(sequence_number=12345) 23 | 24 | # Retrieve and process changes 25 | status = osm_change.retrieve() 26 | if status == 200: 27 | creations = osm_change.actions["create"] 28 | modifications = osm_change.actions["modify"] 29 | deletions = osm_change.actions["delete"] 30 | print(f"Created: {len(creations)} features") 31 | print(f"Modified: {len(modifications)} features") 32 | print(f"Deleted: {len(deletions)} features") 33 | ``` 34 | 35 | ## API Reference 36 | 37 | ::: osmdiff.osmchange.OSMChange 38 | options: 39 | heading_level: 2 40 | show_source: true 41 | members: 42 | - __init__ 43 | - get_state 44 | - retrieve 45 | - sequence_number 46 | - frequency 47 | - actions 48 | - __repr__ 49 | - __enter__ 50 | - __exit__ 51 | 52 | ## See Also 53 | 54 | - [AugmentedDiff](augmenteddiff.md) - For augmented diffs with additional metadata 55 | 56 | -------------------------------------------------------------------------------- /docs/api/continuous.md: -------------------------------------------------------------------------------- 1 | # ContinuousAugmentedDiff 2 | 3 | Iterator for continuous monitoring of OpenStreetMap changes using augmented diffs. 4 | 5 | Builds on [AugmentedDiff](augmenteddiff.md) to provide automatic polling with backoff. 6 | 7 | ## Features 8 | 9 | - Continuous monitoring 10 | - Automatic sequence number tracking 11 | - Exponential backoff during errors 12 | - Configurable polling intervals 13 | - Bounding box filtering 14 | 15 | ## Basic Usage 16 | 17 | ```python 18 | from osmdiff import ContinuousAugmentedDiff 19 | 20 | # Monitor London area 21 | monitor = ContinuousAugmentedDiff( 22 | minlon=-0.489, 23 | minlat=51.28, 24 | maxlon=0.236, 25 | maxlat=51.686 26 | ) 27 | 28 | for changes in monitor: # Runs indefinitely 29 | print(f"Changeset {changes.sequence_number}:") 30 | print(f" New: {len(changes.create)}") 31 | print(f" Modified: {len(changes.modify)}") 32 | ``` 33 | 34 | ## Advanced Configuration 35 | 36 | ```python 37 | monitor = ContinuousAugmentedDiff( 38 | minlon=-0.489, 39 | minlat=51.28, 40 | maxlon=0.236, 41 | maxlat=51.686, 42 | min_interval=60, # Minimum 1 minute between checks 43 | max_interval=300 # Maximum 5 minutes during backoff 44 | ) 45 | ``` 46 | 47 | ## API Reference 48 | 49 | ::: osmdiff.augmenteddiff.ContinuousAugmentedDiff 50 | options: 51 | heading_level: 2 52 | show_source: true 53 | members: 54 | - __init__ 55 | - __iter__ 56 | - __next__ 57 | 58 | ## See Also 59 | 60 | - [AugmentedDiff](augmenteddiff.md) - For single diff retrieval 61 | - [OSMChange](osmchange.md) - For standard changesets 62 | -------------------------------------------------------------------------------- /docs/examples/index.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | Here are some examples of how to use the OSMDiff library. 4 | 5 | ## Basic Augmented Diff Usage 6 | 7 | ```python 8 | from osmdiff import AugmentedDiff 9 | 10 | # Create an AugmentedDiff instance for a specific area 11 | ad = AugmentedDiff( 12 | minlon=-0.489, # London bounding box 13 | minlat=51.28, 14 | maxlon=0.236, 15 | maxlat=51.686 16 | ) 17 | 18 | # Get current state and retrieve changes 19 | ad.get_state() 20 | status = ad.retrieve() 21 | 22 | if status == 200: 23 | print(f"Changes retrieved:") 24 | print(f" Created: {len(ad.create)}") 25 | print(f" Modified: {len(ad.modify)}") 26 | print(f" Deleted: {len(ad.delete)}") 27 | ``` 28 | 29 | ## Continuous Monitoring 30 | 31 | For continuous monitoring of changes, use the ContinuousAugmentedDiff class: 32 | 33 | ```python 34 | from osmdiff import ContinuousAugmentedDiff 35 | 36 | # Create continuous fetcher for London area 37 | fetcher = ContinuousAugmentedDiff( 38 | minlon=-0.489, 39 | minlat=51.28, 40 | maxlon=0.236, 41 | maxlat=51.686, 42 | min_interval=30, # Check at least every 30 seconds 43 | max_interval=120 # Back off up to 120 seconds if no changes 44 | ) 45 | 46 | # Process changes as they come in 47 | for diff in fetcher: 48 | print(f"\nNew changes in diff {diff.sequence_number}:") 49 | print(f" Created: {len(diff.create)} objects") 50 | print(f" Modified: {len(diff.modify)} objects") 51 | print(f" Deleted: {len(diff.delete)} objects") 52 | 53 | # Process specific changes 54 | for obj in diff.create: 55 | if "amenity" in obj.tags: 56 | print(f"New amenity: {obj.tags['amenity']}") 57 | ``` 58 | 59 | -------------------------------------------------------------------------------- /src/osmdiff/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration settings for the osmdiff package. 3 | 4 | This module contains all the configuration settings for the osmdiff package's API interactions. 5 | It provides default values for API endpoints, timeouts, and request headers. 6 | 7 | Configuration Structure: 8 | - API_CONFIG: Contains settings for different API endpoints (Overpass, OSM, Nominatim) 9 | - AUGMENTED_DIFF_CONFIG: Default parameters for AugmentedDiff operations 10 | - DEFAULT_HEADERS: Standard HTTP headers used across all API requests 11 | 12 | Example: 13 | from osmdiff.config import API_CONFIG, DEFAULT_HEADERS 14 | 15 | # Get OSM API base URL 16 | osm_url = API_CONFIG["osm"]["base_url"] 17 | 18 | # Use default headers in requests 19 | response = requests.get(url, headers=DEFAULT_HEADERS) 20 | """ 21 | 22 | # Default API URLs and settings for different services 23 | API_CONFIG = { 24 | "overpass": { 25 | "base_url": "http://overpass-api.de/api/augmented_diff?id={sequence_number}", # sic 26 | "state_url": "https://overpass-api.de/api/augmented_diff_status", 27 | "timeout": 30, # Default timeout in seconds 28 | }, 29 | "osm": { 30 | "base_url": "https://api.openstreetmap.org/api/0.6", 31 | "timeout": 30, 32 | }, 33 | "nominatim": { 34 | "base_url": "https://nominatim.openstreetmap.org", 35 | "timeout": 30, 36 | }, 37 | } 38 | 39 | # Default parameters for AugmentedDiff operations 40 | AUGMENTED_DIFF_CONFIG = { 41 | "minlon": None, # Minimum longitude for bounding box 42 | "minlat": None, # Minimum latitude for bounding box 43 | "maxlon": None, # Maximum longitude for bounding box 44 | "maxlat": None, # Maximum latitude for bounding box 45 | "timestamp": None, # Timestamp for diff operations 46 | } 47 | 48 | # User agent string following OSM API guidelines 49 | # https://operations.osmfoundation.org/policies/api/ 50 | USER_AGENT = "osmdiff/1.0" # Replace with your actual user agent 51 | 52 | # Standard headers used in all API requests 53 | DEFAULT_HEADERS = {"User-Agent": USER_AGENT, "Accept": "application/json, text/xml"} 54 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## v0.4.6 (2025-05-04) 4 | 5 | **This version fixes some critical bugs in 0.4.5, existing users should upgrade immediately** 6 | 7 | ### ✅ Testing Improvements 8 | - Added comprehensive tests for OSM object geo interfaces (Node, Way, Relation) 9 | - Added coordinate validation tests for Node class 10 | - Added tests for Member class parsing and geo interface 11 | - Improved test coverage for Way.length() method 12 | - Added equality comparison tests for Node objects 13 | 14 | ### 🐛 Bug Fixes 15 | - Fixed critical bugs in `AugmentedDiff` and `ContinuousAugmentedDiff` 16 | - state fetching 17 | - iteration 18 | - Fixed coordinate validation in Node class to properly handle edge cases 19 | - Improved error messages for invalid coordinates 20 | 21 | ### 📖 Documentation 22 | - Added more detailed docstrings for geo interface methods 23 | - Improved examples in OSM object class documentation 24 | 25 | ## v0.4.5 (2025-05-03) 26 | 27 | ### 🚀 Features 28 | - Add `ContinuousAugmentedDiff` to package exports 29 | - Add continuous augmented-diff fetcher with back-off strategy 30 | - Add `actions` property to `OSMChange` and `AugmentedDiff` 31 | - Update Overpass API base URL to the new endpoint 32 | - Provide `__geo_interface__` example 33 | 34 | ### 🐛 Bug Fixes 35 | - Properly return deleted features (fixes #43) 36 | - Correct state-parsing and response handling in diff APIs 37 | - Capture metadata for deleted objects in `AugmentedDiff` parser 38 | - Fix various API-test mocks (raw streams, gzipped responses, missing imports) 39 | - Clean up test assertions and fixtures for all API scenarios 40 | 41 | ### ♻️ Refactoring 42 | - Convert `test_augmenteddiff.py` to pytest + fixtures 43 | - Switch Overpass base-URL code to use a sequence template 44 | - Switch from PDM to hatchling build system 45 | - Enforce Black formatting 46 | 47 | ### 📖 Documentation 48 | - Add "Continuous Augmented Diff" section to README 49 | - Update docstring & class docs for `ContinuousAugmentedDiff` 50 | 51 | ### ✅ Testing Improvements 52 | - Continue increasing overall test coverage 53 | - Add tests for continuous augmented-diff, metadata capture & plumbbin 54 | - Improve API tests with better mocks, error cases & assertions 55 | -------------------------------------------------------------------------------- /docs/getting-started.md: -------------------------------------------------------------------------------- 1 | # Getting Started with OSMDiff 2 | 3 | OSMDiff helps you work with OpenStreetMap change data. OpenStreetMap (OSM) is a collaborative map that's constantly updated by volunteers. These updates come in different formats: 4 | 5 | - **Augmented Diffs**: Detailed changes including metadata about who made changes and why 6 | - **OSMChange**: Standard format for basic create/modify/delete operations 7 | 8 | ## Installation 9 | 10 | ```bash 11 | pip install osmdiff 12 | ``` 13 | 14 | ## Basic Usage 15 | 16 | Track changes in a specific area (here using London as an example): 17 | 18 | ```python 19 | from osmdiff import AugmentedDiff 20 | 21 | # Create an AugmentedDiff instance for London 22 | ad = AugmentedDiff( 23 | minlon=-0.489, # West 24 | minlat=51.28, # South 25 | maxlon=0.236, # East 26 | maxlat=51.686 # North 27 | ) 28 | 29 | # Get current state and retrieve changes 30 | ad.get_state() 31 | status = ad.retrieve() 32 | 33 | if status == 200: 34 | print(f"Changes retrieved:") 35 | print(f" Created: {len(ad.create)}") 36 | print(f" Modified: {len(ad.modify)}") 37 | print(f" Deleted: {len(ad.delete)}") 38 | ``` 39 | 40 | ## Continuous Monitoring 41 | 42 | For real-time monitoring of changes: 43 | 44 | ```python 45 | from osmdiff import ContinuousAugmentedDiff 46 | 47 | # Create continuous fetcher for London area 48 | fetcher = ContinuousAugmentedDiff( 49 | minlon=-0.489, 50 | minlat=51.28, 51 | maxlon=0.236, 52 | maxlat=51.686, 53 | min_interval=30, # Check at least every 30 seconds 54 | max_interval=120 # Back off up to 120 seconds if no changes 55 | ) 56 | 57 | # Process changes as they come in 58 | for diff in fetcher: 59 | print(f"\nNew changes in diff {diff.sequence_number}:") 60 | print(f" Created: {len(diff.create)} objects") 61 | print(f" Modified: {len(diff.modify)} objects") 62 | print(f" Deleted: {len(diff.delete)} objects") 63 | 64 | # Example: Track new amenities 65 | for obj in diff.create: 66 | if "amenity" in obj.tags: 67 | print(f"New amenity: {obj.tags['amenity']}") 68 | ``` 69 | 70 | ## Next Steps 71 | 72 | - Learn about [AugmentedDiff API](/api/augmenteddiff) 73 | - Explore [OSMChange format](/api/osmchange) 74 | - See [OSM Objects](/api/osm) you can work with 75 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import xml.etree.ElementTree as ET 3 | from unittest.mock import Mock 4 | 5 | 6 | @pytest.fixture 7 | def mock_osm_api_response(): 8 | """Standard mock response for OSM API calls""" 9 | return { 10 | "version": "0.6", 11 | "generator": "OpenStreetMap server", 12 | "elements": [ 13 | { 14 | "type": "node", 15 | "id": 123, 16 | "lat": 51.5, 17 | "lon": -0.1, 18 | "tags": {"amenity": "cafe"} 19 | } 20 | ] 21 | } 22 | 23 | @pytest.fixture 24 | def mock_osm_api(monkeypatch): 25 | """Mock requests.get for OSM API calls""" 26 | mock = Mock() 27 | mock.get.return_value.status_code = 200 28 | mock.get.return_value.json.return_value = { 29 | "version": "0.6", 30 | "generator": "OpenStreetMap server", 31 | "elements": [] 32 | } 33 | monkeypatch.setattr("requests.get", mock.get) 34 | return mock 35 | 36 | @pytest.fixture 37 | def mock_adiff_response(): 38 | """Mock response for AugmentedDiff API calls""" 39 | return b''' 40 | 41 | 42 | 43 | 44 | 45 | ''' 46 | 47 | 48 | # Define a fixture to load the XML file 49 | @pytest.fixture 50 | def osmchange_xml_obj(): 51 | with open("tests/data/test_osmchange.xml", "r") as fh: 52 | return ET.parse(fh) 53 | 54 | 55 | # Path to the changeset XML file 56 | @pytest.fixture 57 | def osmchange_file_path(): 58 | return "tests/data/test_osmchange.xml" 59 | 60 | 61 | # Path to the augmented diff XML file 62 | @pytest.fixture 63 | def adiff_file_path(): 64 | return "tests/data/test_adiff.xml" 65 | 66 | 67 | @pytest.fixture 68 | def api_config(): 69 | return { 70 | "base_url": "https://api.openstreetmap.org/api/0.6", 71 | "timeout": 30, 72 | "headers": {"Content-Type": "application/xml", "Accept": "application/xml"}, 73 | } 74 | 75 | 76 | @pytest.fixture 77 | def create_test_changeset(): 78 | def _create_changeset(id="12345", user="testuser"): 79 | return f""" 80 | 81 | 82 | 83 | 84 | 85 | """ 86 | 87 | return _create_changeset 88 | -------------------------------------------------------------------------------- /tests/test_augmenteddiff_continuous.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from osmdiff import ContinuousAugmentedDiff, AugmentedDiff 3 | from unittest.mock import patch, MagicMock 4 | 5 | 6 | class TestContinuousAugmentedDiff: 7 | @pytest.fixture 8 | def mock_state_sequence(self): 9 | # Simulate state endpoint returning increasing sequence numbers 10 | return [12345, 12346, 12347] 11 | 12 | @pytest.fixture 13 | def mock_adiff_response(self): 14 | xml_content = """\n\n\n\n\n\n""".strip() 15 | mock_response = MagicMock() 16 | mock_response.status_code = 200 17 | mock_response.text = xml_content 18 | mock_response.content = xml_content.encode() 19 | mock_response.raw = MagicMock() 20 | return mock_response 21 | 22 | def test_iterator_yields_augmented_diff( 23 | self, mock_state_sequence, mock_adiff_response 24 | ): 25 | # Patch get_state and retrieve, and patch time.sleep to avoid real delays 26 | with ( 27 | patch.object(AugmentedDiff, "get_state", side_effect=mock_state_sequence), 28 | patch.object(AugmentedDiff, "retrieve", return_value=200), 29 | patch("time.sleep", return_value=None), 30 | ): 31 | 32 | fetcher = ContinuousAugmentedDiff(min_interval=0, max_interval=0) 33 | gen = iter(fetcher) 34 | diff = next(gen) 35 | assert isinstance(diff, AugmentedDiff) 36 | assert diff.sequence_number == 12345 37 | 38 | # Next sequence increases again, another diff is yielded 39 | diff2 = next(gen) 40 | assert isinstance(diff2, AugmentedDiff) 41 | assert diff2.sequence_number == 12346 42 | 43 | def test_iterator_handles_backoff(self, mock_state_sequence, mock_adiff_response): 44 | # Simulate get_state returning None (API error) first, then a valid sequence 45 | with ( 46 | patch.object(AugmentedDiff, "get_state", side_effect=[None, 12345, 12346]), 47 | patch.object(AugmentedDiff, "retrieve", return_value=200), 48 | patch("time.sleep", return_value=None), 49 | ): 50 | 51 | fetcher = ContinuousAugmentedDiff(min_interval=0, max_interval=0) 52 | gen = iter(fetcher) 53 | # First call to get_state returns None, so it should backoff and retry 54 | diff = next(gen) 55 | assert isinstance(diff, AugmentedDiff) 56 | assert diff.sequence_number == 12345 57 | -------------------------------------------------------------------------------- /tests/test_api.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from osmdiff import AugmentedDiff, OSMChange 3 | from typing_extensions import assert_type 4 | from unittest.mock import patch, MagicMock 5 | import requests 6 | 7 | 8 | class TestApi: 9 | """Tests for OSM API integration.""" 10 | 11 | @pytest.fixture 12 | def mock_osm_state_response(self): 13 | """Fixture providing a mock OSM state API response.""" 14 | from io import BytesIO 15 | 16 | xml_content = """ 17 | 18 | 19 | 12345 20 | 2024-01-01T00:00:00Z 21 | 22 | """ 23 | 24 | mock_response = MagicMock(spec=requests.Response) 25 | mock_response.status_code = 200 26 | mock_response.text = xml_content 27 | mock_response.content = xml_content.encode() 28 | 29 | # Create a raw attribute with a read method 30 | mock_raw = BytesIO(xml_content.encode()) 31 | mock_raw.decode_content = True 32 | mock_response.raw = mock_raw 33 | 34 | return mock_response 35 | 36 | @pytest.fixture 37 | def mock_osm_diff_response(self): 38 | """Fixture providing a mock OSM diff response.""" 39 | from io import BytesIO 40 | 41 | xml_content = """ 42 | 43 | 44 | 45 | 46 | """ 47 | 48 | mock_response = MagicMock(spec=requests.Response) 49 | mock_response.status_code = 200 50 | mock_response.text = xml_content 51 | mock_response.content = xml_content.encode() 52 | 53 | # Create a raw attribute with a read method 54 | mock_raw = BytesIO(xml_content.encode()) 55 | mock_raw.decode_content = True 56 | mock_response.raw = mock_raw 57 | 58 | return mock_response 59 | 60 | @pytest.fixture 61 | def mock_adiff_response(self): 62 | """Fixture providing a mock Augmented Diff response.""" 63 | from io import BytesIO 64 | 65 | xml_content = """ 66 | 67 | 68 | 69 | 70 | 71 | """ 72 | 73 | mock_response = MagicMock(spec=requests.Response) 74 | mock_response.status_code = 200 75 | mock_response.text = xml_content 76 | mock_response.content = xml_content.encode() 77 | 78 | # Create a raw attribute with a read method 79 | mock_raw = BytesIO(xml_content.encode()) 80 | mock_raw.decode_content = True 81 | mock_response.raw = mock_raw 82 | 83 | return mock_response 84 | 85 | @pytest.mark.integration 86 | def test_osm_diff_api_state(self, mock_osm_state_response): 87 | """Test getting state from OSM API returns valid sequence number.""" 88 | with patch( 89 | "osmdiff.osmchange.requests.get", return_value=mock_osm_state_response 90 | ): 91 | osm_change = OSMChange() 92 | osm_change.base_url = "http://example.com/api" 93 | state = osm_change.get_state() 94 | assert state is True 95 | assert osm_change.sequence_number == 12345 96 | assert isinstance(osm_change.sequence_number, int) 97 | 98 | @pytest.mark.integration 99 | def test_osm_diff_retrieve(self, mock_osm_diff_response): 100 | """Test retrieving OSM diff returns successful status.""" 101 | with patch("requests.get", return_value=mock_osm_diff_response): 102 | osm_change = OSMChange(sequence_number=12345) 103 | status = osm_change.retrieve() 104 | assert status == 200 105 | assert hasattr(osm_change, "actions") 106 | assert len(osm_change.actions["create"]) > 0 107 | 108 | @pytest.mark.integration 109 | def test_api_error_handling(self): 110 | """Test API error conditions are properly handled.""" 111 | mock_response = MagicMock(spec=requests.Response) 112 | mock_response.status_code = 500 113 | 114 | with patch("requests.get", return_value=mock_response): 115 | with pytest.raises(Exception): 116 | osm_change = OSMChange() 117 | osm_change.retrieve() 118 | -------------------------------------------------------------------------------- /tests/test_adiff.py: -------------------------------------------------------------------------------- 1 | from osmdiff import Node, AugmentedDiff, Relation, Way 2 | from typing_extensions import assert_type 3 | from unittest.mock import patch, Mock 4 | from io import StringIO, BytesIO 5 | 6 | 7 | class TestAugmentedDiff: 8 | "tests for AugmentedDiff class" 9 | 10 | def test_init_augmenteddiff(self): 11 | "Test AugmentedDiff init" 12 | augmenteddiff = AugmentedDiff() 13 | assert_type(augmenteddiff, AugmentedDiff) 14 | assert_type(augmenteddiff.create, list) 15 | assert_type(augmenteddiff.modify, list) 16 | assert_type(augmenteddiff.delete, list) 17 | assert len(augmenteddiff.create) == 0 18 | assert len(augmenteddiff.modify) == 0 19 | assert len(augmenteddiff.delete) == 0 20 | 21 | def test_set_sequencenumber(self): 22 | "Sequence number is not defined by default but can be set manually" 23 | augmented_diff = AugmentedDiff() 24 | assert not augmented_diff.sequence_number 25 | augmented_diff.sequence_number = 12345 26 | assert augmented_diff.sequence_number == 12345 27 | augmented_diff.sequence_number = "12345" 28 | assert augmented_diff.sequence_number == 12345 29 | 30 | @patch("osmdiff.augmenteddiff.requests.get") 31 | def test_read_changeset_from_xml_file( 32 | self, mock_get, adiff_file_path, mock_adiff_response 33 | ): 34 | """Test initializing from an XML object with mocked response""" 35 | mock_get.return_value.status_code = 200 36 | mock_get.return_value.raw = BytesIO(mock_adiff_response) 37 | mock_get.return_value.raw.decode_content = True 38 | 39 | adiff = AugmentedDiff(file=adiff_file_path) 40 | 41 | # Verify API call was made if file is remote 42 | if adiff_file_path.startswith("http"): 43 | mock_get.assert_called_once() 44 | 45 | # Test that objects were parsed correctly 46 | assert len(adiff.create) > 0 47 | assert len(adiff.modify) >= 0 # Some diffs may only have creates 48 | assert len(adiff.delete) >= 0 # Some diffs may only have creates 49 | 50 | # Test created object structure 51 | if adiff.create: 52 | created_obj = adiff.create[0] 53 | assert isinstance(created_obj, (Node, Way, Relation)) 54 | assert hasattr(created_obj, "attribs") 55 | assert hasattr(created_obj, "tags") 56 | 57 | # Test modified object structure 58 | if adiff.modify: 59 | modified = adiff.modify[0] 60 | assert set(modified.keys()) == {"old", "new"} 61 | assert isinstance(modified["old"], (Node, Way, Relation)) 62 | assert isinstance(modified["new"], (Node, Way, Relation)) 63 | 64 | # Test deleted object structure 65 | if adiff.delete: 66 | deleted_obj = adiff.delete[0] 67 | assert "old" in deleted_obj 68 | assert isinstance(deleted_obj["old"], (Node, Way, Relation)) 69 | assert "meta" in deleted_obj # Verify metadata exists 70 | 71 | # Test metadata was parsed 72 | assert adiff.timestamp is not None 73 | assert isinstance(adiff.remarks, list) 74 | 75 | def test_auto_increment(self): 76 | "Test auto-increment behavior in retrieve()" 77 | augmented_diff = AugmentedDiff() 78 | augmented_diff.sequence_number = 100 79 | 80 | # Create a minimal valid XML response 81 | xml_content = """ 82 | 83 | The data included in this document is from www.openstreetmap.org 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | """ 97 | 98 | # Mock the requests.get call 99 | with patch("requests.get") as mock_get: 100 | 101 | def get_mock_response(): 102 | mock_response = Mock() 103 | mock_response.status_code = 200 104 | mock_response.raw = StringIO(xml_content) 105 | mock_response.raw.decode_content = True 106 | return mock_response 107 | 108 | mock_get.return_value = get_mock_response() 109 | 110 | # Test auto-increment (default behavior) 111 | augmented_diff.retrieve() 112 | assert augmented_diff.sequence_number == 101 113 | 114 | # Create fresh mock for second call 115 | mock_get.return_value = get_mock_response() 116 | 117 | # Test without auto-increment 118 | augmented_diff.retrieve(auto_increment=False) 119 | assert augmented_diff.sequence_number == 101 120 | 121 | # Create fresh mock for third call 122 | mock_get.return_value = get_mock_response() 123 | 124 | # Test with auto-increment again 125 | augmented_diff.retrieve() 126 | assert augmented_diff.sequence_number == 102 127 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # osmdiff 2 | 3 | ## We are on Codeberg! 4 | If you are reading this notice on Github, please point your bookmarks and git remotes at the `osmdiff` repo [on Codeberg](https://codeberg.org/mvexel/osmdiff) instead. This project will not be updated on Github. 5 | 6 | --- 7 | 8 | A read-only interface to OpenStreetMap change APIs and files. See also [pyosm](https://github.com/iandees/pyosm) which can do similar things. 9 | 10 | ## Documentation 11 | 12 | 📚 Comprehensive documentation is available at [mvexel.github.io/osmdiff](https://mvexel.github.io/osmdiff/) 13 | 14 | ## Python Version Support 15 | 16 | This module has been tested with Python 3.9 - 3.12. Use at your own risk with other versions. 17 | 18 | ## Installing 19 | 20 | `pip install osmdiff` 21 | 22 | ## Usage 23 | 24 | See the [documentation](https://mvexel.github.io/osmdiff/) for more details. 25 | 26 | ### Reading 27 | 28 | Retrieve the latest replication diff from the OSM API: 29 | 30 | ```python 31 | >>> from osmdiff import OSMChange 32 | >>> o = OSMChange(frequency="minute") # minute is the default frequency 33 | >>> o.get_state() # retrieve current sequence ID 34 | >>> o.sequence_number 35 | 2704451 36 | >>> o.retrieve() # retrieve from API 37 | >>> o 38 | OSMChange (677 created, 204 modified, 14 deleted) 39 | ``` 40 | 41 | Read a replication diff from a file: 42 | 43 | ```python 44 | >>> from osmdiff import OSMChange 45 | >>> o = OSMChange(file="test_osmchange.xml") 46 | >>> o 47 | OSMChange (831 created, 368 modified, 3552 deleted) 48 | ``` 49 | 50 | Retrieve the latest Augmented Diff from Overpass: 51 | 52 | ```python 53 | >>> from osmdiff import AugmentedDiff 54 | >>> a = AugmentedDiff() 55 | >>> a.get_state() 56 | >>> a.sequence_number 57 | 2715051 58 | >>> a.retrieve() 59 | >>> a 60 | AugmentedDiff (747 created, 374 modified, 55 deleted) 61 | ``` 62 | 63 | Read an augmented diff file: 64 | 65 | ```python 66 | >>> from osmdiff import AugmentedDiff 67 | >>> a = AugmentedDiff(file="test_adiff.xml") 68 | >>> a 69 | AugmentedDiff (2329 created, 677 modified, 39 deleted) 70 | ``` 71 | 72 | ### Inspect contents 73 | 74 | Get all the things that `chris66` has created: 75 | 76 | ``` 77 | >>> [n for n in a.create if n.attribs["user"] == "chris66"] 78 | [Node 5221564287, Node 5221564288, Node 5221564289, Node 5221564290, Node 5221564291, Node 5221564292, Node 5221564293, Node 5221564294, Node 5221564295, Node 5221564296, Node 5221564297, Node 5221564298, Node 5221564299, Node 5221564301, Node 5221564302, Node 5221564303, Node 5221564304, Way 539648222 (5 nodes), Way 539648223 (5 nodes), Way 539648323 (5 nodes)] 79 | ``` 80 | 81 | Get all `residential` ways that were modified: 82 | 83 | ```python 84 | >>> [n["new"] for n in a.modify if type(n["new"]) == Way and n["new"].tags.get("highway") == "residential"] 85 | [Way 34561958 (3 nodes), Way 53744484 (6 nodes), Way 53744485 (6 nodes), Way 122650942 (3 nodes), Way 283221266 (4 nodes), Way 344272652 (5 nodes), Way 358243999 (13 nodes), Way 410489319 (5 nodes), Way 452218081 (10 nodes)] 86 | ``` 87 | 88 | Get all ways that were changed to `residential` from something else: 89 | 90 | ```python 91 | >>> [n["new"] for n in a.modify if type(n["new"]) == Way and n["new"].tags.get("highway") == "residential" and n["old"].tags["highway"] != "residential"] 92 | [Way 410489319 (5 nodes), Way 452218081 (10 nodes)] 93 | ``` 94 | 95 | Inspect details: 96 | 97 | ```python 98 | >>> w = [n["new"] for n in a.modify if n["new"].attribs["id"] == "452218081"] 99 | >>> w 100 | [Way 452218081 (10 nodes)] 101 | >>> w[0] 102 | Way 452218081 (10 nodes) 103 | >>> w[0].tags 104 | {'highway': 'residential'} 105 | >>> w[0].attribs 106 | {'id': '452218081', 'version': '2', 'timestamp': '2017-11-10T13:52:01Z', 'changeset': '53667190', 'uid': '2352517', 'user': 'carths81'} 107 | >>> w[0].attribs 108 | {'id': '452218081', 'version': '2', 'timestamp': '2017-11-10T13:52:01Z', 'changeset': '53667190', 'uid': '2352517', 'user': 'carths81'} 109 | >>> w[0].bounds 110 | ['12.8932677', '43.3575917', '12.8948117', '43.3585947'] 111 | ``` 112 | 113 | ### Iterating 114 | 115 | To continuously iterate over AugmentedDiff objects, use `ContinuousAugmentedDiff`: 116 | 117 | ```python 118 | >>> for a in ContinuousAugmentedDiff(): 119 | ... print(a) 120 | ``` 121 | 122 | This will iterate indefinitely, printing each AugmentedDiff as it is retrieved. 123 | 124 | You can also use it in a loop: 125 | 126 | ```python 127 | for a in ContinuousAugmentedDiff(): 128 | if a.sequence_number > 123456: 129 | break 130 | print(a) 131 | ``` 132 | 133 | ## Configuration 134 | 135 | The osmdiff package uses a centralized configuration system in `src/osmdiff/config.py`. This includes: 136 | 137 | ### API Configuration 138 | Default settings for API endpoints and timeouts: 139 | 140 | ```python 141 | API_CONFIG = { 142 | "overpass": {"base_url": "...", "timeout": 30}, 143 | "osm": {"base_url": "...", "timeout": 30}, 144 | "nominatim": {"base_url": "...", "timeout": 30} 145 | } 146 | ``` 147 | 148 | ### Request Headers 149 | Standard headers used in all API requests: 150 | ```python 151 | DEFAULT_HEADERS = { 152 | "User-Agent": "osmdiff/1.0", 153 | "Accept": "application/json, text/xml" 154 | } 155 | ``` 156 | 157 | ### Customizing Configuration 158 | You can override any configuration value at runtime by passing parameters to the respective class constructors: 159 | 160 | ```python 161 | from osmdiff import OSMChange 162 | 163 | # Override default URL and timeout 164 | change = OSMChange( 165 | url="https://custom-api.example.com", 166 | timeout=60 167 | ) 168 | ``` 169 | 170 | ## Community 171 | 172 | Join the conversation and get help: 173 | 174 | - [OpenStreetMap Community Forum](https://community.openstreetmap.org/) - Please mention me (`mvexel`) 175 | - [OSM Slack](https://osmus.slack.com/) - Join the #dev channel 176 | 177 | Please be respectful and follow the [OpenStreetMap Code of Conduct](https://wiki.openstreetmap.org/wiki/Code_of_conduct) in all community interactions. 178 | 179 | ## Contributing 180 | 181 | I welcome your contributions in code, documentation and suggestions for enhancements. 182 | 183 | If you find `osmdiff` useful, or you use it in commercial software, please consider sponsoring this project. 184 | -------------------------------------------------------------------------------- /tests/test_osm.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from osmdiff.osm.osm import Member, OSMObject, Way, Relation, Node 3 | 4 | 5 | def test_osmobject_init_defaults(): 6 | obj = OSMObject() 7 | assert obj.tags == {} 8 | assert obj.attribs == {} 9 | assert obj.bounds is None 10 | 11 | 12 | def test_osmobject_init_with_values(): 13 | tags = {"amenity": "cafe"} 14 | attribs = {"id": "123", "version": "1"} 15 | bounds = [0.0, 1.0, 2.0, 3.0] 16 | obj = OSMObject(tags=tags, attribs=attribs, bounds=bounds) 17 | assert obj.tags == tags 18 | assert obj.attribs == attribs 19 | assert obj.bounds == bounds 20 | 21 | 22 | def test_osmobject_repr(): 23 | obj = OSMObject(attribs={"id": "42"}) 24 | assert "OSMObject 42" in repr(obj) 25 | 26 | 27 | import xml.etree.ElementTree as ET 28 | 29 | 30 | def test_osmobject_parse_tags_and_bounds(): 31 | xml = """""" 32 | elem = ET.fromstring(xml) 33 | obj = OSMObject() 34 | obj._parse_tags(elem) 35 | assert obj.tags["amenity"] == "cafe" 36 | obj._parse_bounds(elem) 37 | assert obj.bounds == ["0", "1", "2", "3"] 38 | 39 | 40 | def test_osmobject_to_dict_and_json(): 41 | obj = OSMObject(tags={"foo": "bar"}, attribs={"id": "1"}, bounds=[0, 1, 2, 3]) 42 | d = obj.to_dict() 43 | assert d["tags"] == {"foo": "bar"} 44 | assert d["id"] == "1" 45 | assert d["bounds"] == [0, 1, 2, 3] 46 | j = obj.to_json() 47 | assert '"foo": "bar"' in j 48 | 49 | 50 | def test_osmobject_from_file(tmp_path): 51 | xml = '' 52 | file_path = tmp_path / "test.xml" 53 | file_path.write_text(xml) 54 | obj = OSMObject.from_file(str(file_path)) 55 | assert isinstance(obj, OSMObject) 56 | assert obj.attribs["id"] == "1" 57 | 58 | 59 | def test_osmobject_init_with_values(): 60 | tags = {"amenity": "cafe"} 61 | attribs = {"id": "123", "version": "1"} 62 | bounds = [0.0, 1.0, 2.0, 3.0] 63 | obj = OSMObject(tags=tags, attribs=attribs, bounds=bounds) 64 | assert obj.tags == tags 65 | assert obj.attribs == attribs 66 | assert obj.bounds == bounds 67 | 68 | 69 | def test_osmobject_repr(): 70 | obj = OSMObject(attribs={"id": "42"}) 71 | assert "OSMObject 42" in repr(obj) 72 | way = Way(attribs={"id": "99"}) 73 | way.nodes = [1, 2, 3] 74 | assert "Way 99 (3 nodes)" in repr(way) 75 | rel = Relation(attribs={"id": "7"}) 76 | rel.members = [1, 2] 77 | assert "Relation 7 (2 members)" in repr(rel) 78 | 79 | 80 | def test_way_is_closed(): 81 | way = Way() 82 | way.nodes = [1, 2, 1] 83 | assert way.is_closed() is True 84 | way.nodes = [1, 2, 3] 85 | assert way.is_closed() is False 86 | 87 | 88 | def test_node_geo_interface_and_equality(): 89 | """Test Node geo interface and equality.""" 90 | node1 = Node(attribs={"lon": "1", "lat": "2"}) 91 | node2 = Node(attribs={"lon": "1", "lat": "2"}) 92 | node3 = Node(attribs={"lon": "3", "lat": "4"}) 93 | 94 | assert node1.__geo_interface__ == {"type": "Point", "coordinates": [1, 2]} 95 | assert node1 == node2 96 | assert node1 != node3 97 | assert node1 != "not a node" 98 | 99 | 100 | def test_node_invalid_coords(): 101 | """Test Node coordinate validation.""" 102 | with pytest.raises(ValueError): 103 | node = Node(attribs={"lon": "181", "lat": "0"}) # Invalid lon 104 | lon = node.lon 105 | with pytest.raises(ValueError): 106 | node = Node(attribs={"lon": "0", "lat": "91"}) # Invalid lat 107 | lat = node.lat 108 | 109 | with pytest.raises(ValueError): 110 | node = Node(attribs={"lon": "-181", "lat": "0"}) # Invalid lon 111 | lon = node.lon 112 | with pytest.raises(ValueError): 113 | node = Node(attribs={"lon": "0", "lat": "-91"}) # Invalid lat 114 | lat = node.lat 115 | 116 | 117 | def test_way_geo_interface(): 118 | """Test Way geo interface.""" 119 | way = Way() 120 | way.nodes = [ 121 | Node(attribs={"lon": "0", "lat": "0"}), 122 | Node(attribs={"lon": "1", "lat": "0"}), 123 | Node(attribs={"lon": "1", "lat": "1"}), 124 | ] 125 | # Open way should be LineString 126 | assert way.__geo_interface__ == { 127 | "type": "LineString", 128 | "coordinates": [[0, 0], [1, 0], [1, 1]], 129 | } 130 | 131 | # Closed way should be Polygon 132 | way.nodes.append(Node(attribs={"lon": "0", "lat": "0"})) 133 | assert way.__geo_interface__ == { 134 | "type": "Polygon", 135 | "coordinates": [[[0, 0], [1, 0], [1, 1], [0, 0]]], 136 | } 137 | 138 | 139 | def test_relation_geo_interface(): 140 | """Test Relation geo interface.""" 141 | relation = Relation() 142 | relation.members = [ 143 | Node(attribs={"lon": "0.0", "lat": "0.0"}), 144 | Way( 145 | nodes=[ 146 | Node(attribs={"lon": "1.0", "lat": "0.0"}), 147 | Node(attribs={"lon": "1.0", "lat": "1.0"}), 148 | ] 149 | ), 150 | ] 151 | assert relation.__geo_interface__ == { 152 | "type": "GeometryCollection", 153 | "geometries": [ 154 | {"type": "Point", "coordinates": [0.0, 0.0]}, 155 | {"type": "LineString", "coordinates": [[1.0, 0.0], [1.0, 1.0]]}, 156 | ], 157 | } 158 | node1 = Node(attribs={"lat": 10.0, "lon": 20.0}) 159 | node2 = Node(attribs={"lat": 10.0, "lon": 20.0}) 160 | node3 = Node(attribs={"lat": 10.1, "lon": 20.1}) 161 | # __geo_interface__ property 162 | gi = node1.__geo_interface__ 163 | assert gi["type"] == "Point" 164 | assert gi["coordinates"] == [node1.lon, node1.lat] 165 | # __eq__ 166 | assert node1 == node2 167 | assert node1 != node3 168 | 169 | 170 | def test_way_init_defaults(): 171 | way = Way() 172 | assert way.tags == {} 173 | assert way.attribs == {} 174 | assert way.bounds is None 175 | assert way.nodes == [] 176 | 177 | 178 | def test_way_init_with_values(): 179 | tags = {"amenity": "cafe"} 180 | attribs = {"id": "123", "version": "1"} 181 | bounds = [0.0, 1.0, 2.0, 3.0] 182 | nodes = ["1", "2", "3"] 183 | way = Way(tags=tags, attribs=attribs, bounds=bounds) 184 | way.nodes = nodes 185 | assert way.tags == tags 186 | assert way.attribs == attribs 187 | assert way.bounds == bounds 188 | assert way.nodes == nodes 189 | 190 | 191 | def test_relation_init_defaults(): 192 | relation = Relation() 193 | assert relation.tags == {} 194 | assert relation.attribs == {} 195 | assert relation.bounds is None 196 | assert relation.members == [] 197 | 198 | 199 | def test_relation_init_with_values(): 200 | tags = {"amenity": "cafe"} 201 | attribs = {"id": "123", "version": "1"} 202 | bounds = [0.0, 1.0, 2.0, 3.0] 203 | members = ["1", "2", "3"] 204 | relation = Relation(tags=tags, attribs=attribs, bounds=bounds) 205 | relation.members = members 206 | assert relation.tags == tags 207 | assert relation.attribs == attribs 208 | assert relation.bounds == bounds 209 | assert relation.members == members 210 | 211 | 212 | def test_member_class(): 213 | """Test Member class methods.""" 214 | member = Member() 215 | elem = ET.Element("member", {"type": "node", "ref": "123", "role": "point"}) 216 | member._parse_attributes(elem) 217 | assert member.__geo_interface__ == { 218 | "type": "Feature", 219 | "geometry": None, 220 | "properties": {"type": "node", "ref": 123, "role": "point"}, 221 | } 222 | -------------------------------------------------------------------------------- /src/osmdiff/osmchange.py: -------------------------------------------------------------------------------- 1 | from gzip import GzipFile 2 | from posixpath import join as urljoin 3 | from typing import Optional 4 | from xml.etree import ElementTree 5 | 6 | import requests 7 | 8 | from osmdiff.config import API_CONFIG, DEFAULT_HEADERS 9 | from osmdiff.osm import OSMObject 10 | 11 | 12 | class OSMChange(object): 13 | """Handles OpenStreetMap changesets in OSMChange format. 14 | 15 | Args: 16 | url: Base URL of OSM replication server 17 | frequency: Replication frequency ('minute', 'hour', or 'day') 18 | file: Path to local OSMChange XML file 19 | sequence_number: Sequence number of the diff 20 | timeout: Request timeout in seconds 21 | 22 | Note: 23 | Follows the OSM replication protocol. 24 | """ 25 | 26 | def __init__( 27 | self, 28 | url: Optional[str] = None, 29 | frequency: str = "minute", 30 | file: Optional[str] = None, 31 | sequence_number: Optional[int] = None, 32 | timeout: Optional[int] = None, 33 | ): 34 | # Initialize with defaults from config 35 | self.base_url = url or API_CONFIG["osm"]["base_url"] 36 | self.timeout = timeout or API_CONFIG["osm"]["timeout"] 37 | 38 | self.create = [] 39 | self.modify = [] 40 | self.delete = [] 41 | 42 | if file: 43 | with open(file, "r") as fh: 44 | xml = ElementTree.iterparse(fh, events=("start", "end")) 45 | self._parse_xml(xml) 46 | else: 47 | self._frequency = frequency 48 | self._sequence_number = sequence_number 49 | 50 | def get_state(self) -> bool: 51 | """ 52 | Retrieve the current state from the OSM API. 53 | 54 | Returns: 55 | bool: True if state was successfully retrieved, False otherwise 56 | 57 | Raises: 58 | requests.RequestException: If the API request fails 59 | """ 60 | state_url = urljoin(self.base_url, "api/0.6/changesets/state") 61 | response = requests.get( 62 | state_url, timeout=self.timeout, headers=DEFAULT_HEADERS 63 | ) 64 | if response.status_code != 200: 65 | return False 66 | 67 | # Parse XML response 68 | root = ElementTree.fromstring(response.content) 69 | state = root.find("state") 70 | if state is not None: 71 | seq = state.find("sequenceNumber") 72 | if seq is not None and seq.text: 73 | self._sequence_number = int(seq.text) 74 | return True 75 | return False 76 | 77 | def _build_sequence_url(self) -> str: 78 | seqno = str(self._sequence_number).zfill(9) 79 | url = urljoin( 80 | self.base_url, 81 | self._frequency, 82 | seqno[:3], 83 | seqno[3:6], 84 | "{}{}".format(seqno[6:], ".osc.gz"), 85 | ) 86 | return url 87 | 88 | def _parse_xml(self, xml) -> None: 89 | for event, elem in xml: 90 | if elem.tag in ("create", "modify", "delete"): 91 | self._build_action(elem) 92 | 93 | def _build_action(self, elem: ElementTree.Element) -> None: 94 | """ 95 | Build OSM objects from XML elements and add them to the appropriate list. 96 | 97 | Args: 98 | elem (ElementTree.Element): XML element containing OSM objects 99 | """ 100 | for thing in elem: 101 | o = OSMObject.from_xml(thing) 102 | getattr(self, elem.tag).append(o) # Use getattr instead of __getattribute__ 103 | 104 | def retrieve(self, clear_cache: bool = False, timeout: Optional[int] = None) -> int: 105 | """ 106 | Retrieve the OSM diff corresponding to the OSMChange sequence_number. 107 | 108 | Parameters: 109 | clear_cache (bool): clear the cache 110 | timeout (int): request timeout 111 | 112 | Returns: 113 | int: HTTP status code 114 | 115 | Raises: 116 | Exception: If an invalid sequence number is provided 117 | """ 118 | if not self._sequence_number: 119 | raise Exception("invalid sequence number") 120 | if clear_cache: 121 | self.create, self.modify, self.delete = ([], [], []) 122 | try: 123 | r = requests.get( 124 | self._build_sequence_url(), 125 | stream=True, 126 | timeout=timeout or self.timeout, 127 | headers=DEFAULT_HEADERS, 128 | ) 129 | if r.status_code != 200: 130 | return r.status_code 131 | # Handle both gzipped and plain XML responses 132 | content = r.content 133 | if content.startswith(b"\x1f\x8b"): # Gzip magic number 134 | gzfile = GzipFile(fileobj=r.raw) 135 | xml = ElementTree.iterparse(gzfile, events=("start", "end")) 136 | else: 137 | xml = ElementTree.iterparse(r.raw, events=("start", "end")) 138 | self._parse_xml(xml) 139 | return r.status_code 140 | except ConnectionError: 141 | # FIXME catch this? 142 | return 0 143 | 144 | @classmethod 145 | def from_xml(cls, xml: ElementTree.Element) -> "OSMChange": 146 | """ 147 | Initialize OSMChange object from an XML object. 148 | 149 | If you used this method before version 0.3, please note that this 150 | method now takes an XML object. If you want to initialize from a file,\ 151 | use the from_xml_file method. 152 | 153 | Parameters: 154 | xml (ElementTree.Element): XML object 155 | 156 | Returns: 157 | OSMChange: OSMChange object 158 | """ 159 | new_osmchange_obj = cls() 160 | new_osmchange_obj._parse_xml(xml) 161 | return new_osmchange_obj 162 | 163 | @classmethod 164 | def from_xml_file(cls, path) -> "OSMChange": 165 | """ 166 | Initialize OSMChange object from an XML file. 167 | 168 | Parameters: 169 | path (str): path to the XML file 170 | 171 | Returns: 172 | OSMChange: OSMChange object 173 | """ 174 | with open(path, "r") as fh: 175 | xml = ElementTree.iterparse(fh, events=("start", "end")) 176 | return cls.from_xml(xml) 177 | 178 | @property 179 | def sequence_number(self) -> int: 180 | return self._sequence_number 181 | 182 | @sequence_number.setter 183 | def sequence_number(self, value): 184 | try: 185 | # value can be none 186 | if value is None: 187 | self._sequence_number = None 188 | return 189 | self._sequence_number = int(value) 190 | except ValueError: 191 | raise ValueError( 192 | "sequence_number must be an integer or parsable as an integer" 193 | ) 194 | 195 | @property 196 | def frequency(self) -> str: 197 | return self._frequency 198 | 199 | @frequency.setter 200 | def frequency(self, f: str) -> None: 201 | """ 202 | Set the frequency for OSM changes. 203 | 204 | Args: 205 | f (str): Frequency ('minute', 'hour', or 'day') 206 | 207 | Raises: 208 | ValueError: If frequency is not one of the valid options 209 | """ 210 | VALID_FREQUENCIES = {"minute", "hour", "day"} 211 | if f not in VALID_FREQUENCIES: 212 | raise ValueError( 213 | f"Frequency must be one of: {', '.join(VALID_FREQUENCIES)}" 214 | ) 215 | self._frequency = f 216 | 217 | @property 218 | def actions(self): 219 | """Get all actions combined in a single list.""" 220 | return {"create": self.create, "modify": self.modify, "delete": self.delete} 221 | 222 | def __repr__(self): 223 | return "OSMChange ({create} created, {modify} modified, \ 224 | {delete} deleted)".format( 225 | create=len(self.create), modify=len(self.modify), delete=len(self.delete) 226 | ) 227 | 228 | def __enter__(self): 229 | return self 230 | 231 | def __exit__(self, exc_type, exc_val, exc_tb): 232 | """Clear all changes when exiting context.""" 233 | self.create.clear() 234 | self.modify.clear() 235 | self.delete.clear() 236 | -------------------------------------------------------------------------------- /tests/test_osmchange.py: -------------------------------------------------------------------------------- 1 | import io 2 | import gzip 3 | import pytest 4 | from osmdiff import Node, OSMChange, Relation, Way 5 | from typing_extensions import assert_type 6 | from unittest.mock import patch, MagicMock 7 | 8 | 9 | class TestOSMChange: 10 | "tests for OSMChange object" 11 | 12 | def test_init_osmchange(self): 13 | "Test OSMChange init" 14 | osmchange = OSMChange() 15 | assert_type(osmchange, OSMChange) 16 | assert_type(osmchange.create, list) 17 | assert_type(osmchange.modify, list) 18 | assert_type(osmchange.delete, list) 19 | assert len(osmchange.create) == 0 20 | assert len(osmchange.modify) == 0 21 | assert len(osmchange.delete) == 0 22 | 23 | def test_set_sequencenumber(self): 24 | "Sequence number is not defined by default but can be set manually" 25 | osm_change = OSMChange() 26 | assert not osm_change.sequence_number 27 | osm_change.sequence_number = 12345 28 | assert osm_change.sequence_number == 12345 29 | osm_change.sequence_number = "12345" 30 | assert osm_change.sequence_number == 12345 31 | 32 | @patch('osmdiff.osmchange.requests.get') 33 | def test_read_changeset_from_xml_file(self, mock_get, osmchange_file_path): 34 | """Test initializing from an XML object with mocked response""" 35 | # Mock the response if testing remote file 36 | if osmchange_file_path.startswith('http'): 37 | mock_get.return_value.status_code = 200 38 | with open("tests/data/test_osmchange.xml", "rb") as f: 39 | mock_get.return_value.content = f.read() 40 | 41 | osmchange = OSMChange.from_xml_file(osmchange_file_path) 42 | 43 | # Verify API call was made if file is remote 44 | if osmchange_file_path.startswith('http'): 45 | mock_get.assert_called_once() 46 | 47 | # Test counts 48 | assert len(osmchange.create) > 0 49 | assert len(osmchange.modify) >= 0 50 | assert len(osmchange.delete) >= 0 51 | 52 | # Test object types 53 | nodes_created = [o for o in osmchange.create if isinstance(o, Node)] 54 | ways_created = [o for o in osmchange.create if isinstance(o, Way)] 55 | rels_created = [o for o in osmchange.create if isinstance(o, Relation)] 56 | 57 | # Verify all created objects are accounted for 58 | assert len(nodes_created + ways_created + rels_created) == len(osmchange.create) 59 | 60 | # Test object attributes 61 | if nodes_created: 62 | node = nodes_created[0] 63 | assert hasattr(node, 'lat') 64 | assert hasattr(node, 'lon') 65 | 66 | @patch('osmdiff.osmchange.requests.get') 67 | def test_get_state_success(self, mock_get): 68 | # Simulate a valid state response with sequenceNumber 69 | xml = b'''123''' 70 | mock_get.return_value.status_code = 200 71 | mock_get.return_value.content = xml 72 | oc = OSMChange() 73 | assert oc.get_state() is True 74 | assert oc.sequence_number == 123 75 | 76 | @patch('osmdiff.osmchange.requests.get') 77 | def test_get_state_missing_seq(self, mock_get): 78 | # Simulate state response without sequenceNumber 79 | xml = b'''''' 80 | mock_get.return_value.status_code = 200 81 | mock_get.return_value.content = xml 82 | oc = OSMChange() 83 | assert oc.get_state() is False 84 | 85 | @patch('osmdiff.osmchange.requests.get') 86 | def test_get_state_fail(self, mock_get): 87 | mock_get.return_value.status_code = 404 88 | oc = OSMChange() 89 | assert oc.get_state() is False 90 | 91 | @patch('osmdiff.osmchange.requests.get') 92 | def test_retrieve_non_200(self, mock_get): 93 | oc = OSMChange(sequence_number=1) 94 | mock_get.return_value.status_code = 404 95 | mock_get.return_value.content = b'' 96 | status = oc.retrieve() 97 | assert status == 404 98 | 99 | @patch('osmdiff.osmchange.requests.get') 100 | def test_retrieve_gzip(self, mock_get): 101 | # Simulate a gzip-compressed XML response 102 | xml = b'' 103 | gzipped = gzip.compress(xml) 104 | mock_get.return_value.status_code = 200 105 | mock_get.return_value.content = gzipped 106 | mock_get.return_value.raw = io.BytesIO(gzipped) 107 | oc = OSMChange(sequence_number=1) 108 | status = oc.retrieve() 109 | assert status == 200 110 | 111 | @patch('osmdiff.osmchange.requests.get', side_effect=ConnectionError) 112 | def test_retrieve_connection_error(self, mock_get): 113 | oc = OSMChange(sequence_number=1) 114 | status = oc.retrieve() 115 | assert status == 0 116 | 117 | @patch('osmdiff.osmchange.requests.get') 118 | def test_retrieve_clear_cache(self, mock_get): 119 | oc = OSMChange(sequence_number=1) 120 | oc.create = [1] 121 | oc.modify = [2] 122 | oc.delete = [3] 123 | mock_get.return_value.status_code = 404 124 | mock_get.return_value.content = b'' 125 | oc.retrieve(clear_cache=True) 126 | assert oc.create == [] and oc.modify == [] and oc.delete == [] 127 | 128 | def test_sequence_number_setter_and_errors(self): 129 | oc = OSMChange() 130 | oc.sequence_number = 42 131 | assert oc.sequence_number == 42 132 | oc.sequence_number = None 133 | assert oc.sequence_number is None 134 | with pytest.raises(ValueError): 135 | oc.sequence_number = 'notanumber' 136 | 137 | def test_frequency_setter_and_errors(self): 138 | oc = OSMChange() 139 | oc.frequency = 'hour' 140 | assert oc.frequency == 'hour' 141 | with pytest.raises(ValueError): 142 | oc.frequency = 'invalid' 143 | 144 | def test_actions_property(self): 145 | oc = OSMChange() 146 | oc.create = [1] 147 | oc.modify = [2] 148 | oc.delete = [3] 149 | acts = oc.actions 150 | assert acts['create'] == [1] 151 | assert acts['modify'] == [2] 152 | assert acts['delete'] == [3] 153 | 154 | def test_repr(self): 155 | oc = OSMChange() 156 | oc.create = [1,2] 157 | oc.modify = [3] 158 | oc.delete = [] 159 | r = repr(oc) 160 | assert '2 created' in r and '1 modified' in r 161 | 162 | def test_context_manager_exit_clears(self): 163 | oc = OSMChange() 164 | oc.create = [1] 165 | oc.modify = [2] 166 | oc.delete = [3] 167 | with oc: 168 | pass 169 | assert oc.create == [] and oc.modify == [] and oc.delete == [] 170 | 171 | @patch('builtins.open', side_effect=FileNotFoundError) 172 | def test_init_else_branch(self, mock_open): 173 | # Should set _frequency and _sequence_number if file is not provided 174 | oc = OSMChange(frequency='hour', sequence_number=42) 175 | assert oc._frequency == 'hour' 176 | assert oc._sequence_number == 42 177 | # Also cover the case where both are left default 178 | oc2 = OSMChange() 179 | assert hasattr(oc2, '_frequency') 180 | assert hasattr(oc2, '_sequence_number') 181 | assert oc2._frequency == 'minute' 182 | assert oc2._sequence_number is None 183 | # And the case where only frequency is set 184 | oc3 = OSMChange(frequency='day') 185 | assert oc3._frequency == 'day' 186 | assert oc3._sequence_number is None 187 | # And only sequence_number is set 188 | oc4 = OSMChange(sequence_number=99) 189 | assert oc4._frequency == 'minute' 190 | assert oc4._sequence_number == 99 191 | 192 | def test_retrieve_raises_on_missing_sequence_number(self): 193 | oc = OSMChange() 194 | with pytest.raises(Exception) as exc: 195 | oc.retrieve() 196 | assert "invalid sequence number" in str(exc.value) 197 | 198 | @patch('osmdiff.osmchange.requests.get') 199 | def test_retrieve_non_gzip_xml(self, mock_get): 200 | xml = b'' 201 | mock_get.return_value.status_code = 200 202 | mock_get.return_value.content = xml 203 | mock_get.return_value.raw = io.BytesIO(xml) 204 | oc = OSMChange(sequence_number=1) 205 | status = oc.retrieve() 206 | assert status == 200 207 | -------------------------------------------------------------------------------- /tests/test_augmenteddiff.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from osmdiff import AugmentedDiff 3 | from unittest.mock import patch, MagicMock 4 | import requests 5 | from io import BytesIO 6 | 7 | 8 | class TestAugmentedDiff: 9 | """Tests for AugmentedDiff class.""" 10 | 11 | def test_init(self): 12 | """Test AugmentedDiff initialization.""" 13 | from osmdiff import AugmentedDiff 14 | 15 | adiff = AugmentedDiff(sequence_number=12345) 16 | assert adiff.sequence_number == 12345 17 | 18 | def test_get_state_errors(self): 19 | """Test AugmentedDiff.get_state error handling and edge cases.""" 20 | from osmdiff import AugmentedDiff 21 | import requests 22 | from unittest.mock import patch, MagicMock 23 | 24 | # Non-200 response 25 | mock_response = MagicMock() 26 | mock_response.status_code = 404 27 | mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError 28 | with patch("requests.get", return_value=mock_response): 29 | with pytest.raises(requests.exceptions.HTTPError) as excinfo: 30 | AugmentedDiff.get_state() 31 | 32 | def test_retrieve_exceptions_and_clear_cache(self): 33 | """Test retrieve() for missing sequence_number, clear_cache, and non-200 status.""" 34 | from osmdiff import AugmentedDiff 35 | from unittest.mock import patch, MagicMock 36 | 37 | # Missing sequence_number 38 | adiff = AugmentedDiff() 39 | with patch("requests.get"): 40 | try: 41 | adiff.retrieve() 42 | except Exception as e: 43 | assert "invalid sequence number" in str(e) 44 | else: 45 | assert False, "Exception not raised for missing sequence_number" 46 | 47 | # clear_cache should clear lists 48 | adiff = AugmentedDiff(sequence_number=1) 49 | adiff.create = [1] 50 | adiff.modify = [2] 51 | adiff.delete = [3] 52 | import io 53 | 54 | mock_response = MagicMock() 55 | mock_response.status_code = 200 56 | mock_response.raw = io.BytesIO(b"") 57 | mock_response.raw.decode_content = True 58 | mock_response.content = b"" 59 | with patch("requests.get", return_value=mock_response): 60 | adiff.retrieve(clear_cache=True) 61 | assert adiff.create == [] 62 | assert adiff.modify == [] 63 | assert adiff.delete == [] 64 | 65 | # Non-200 HTTP status 66 | adiff = AugmentedDiff(sequence_number=1) 67 | mock_response = MagicMock() 68 | mock_response.status_code = 404 69 | mock_response.raw = MagicMock() 70 | mock_response.raw.decode_content = True 71 | with patch("requests.get", return_value=mock_response): 72 | status = adiff.retrieve() 73 | assert status == 404 74 | 75 | def test_parse_stream_meta_tag(self): 76 | """Test that meta tag in XML sets timestamp attribute.""" 77 | from osmdiff import AugmentedDiff 78 | import io 79 | 80 | xml = """""" 81 | adiff = AugmentedDiff() 82 | adiff._parse_stream(io.StringIO(xml)) 83 | assert hasattr(adiff, "timestamp") 84 | 85 | def test_sequence_number_setter_and_repr(self): 86 | """Test sequence_number setter error and __repr__ output.""" 87 | from osmdiff import AugmentedDiff 88 | 89 | adiff = AugmentedDiff(sequence_number=1) 90 | # Valid int 91 | adiff.sequence_number = 42 92 | assert adiff.sequence_number == 42 93 | # Valid string 94 | adiff.sequence_number = "43" 95 | assert adiff.sequence_number == 43 96 | # Invalid value 97 | try: 98 | adiff.sequence_number = "notanumber" 99 | except ValueError as e: 100 | assert "sequence_number must be an integer" in str(e) 101 | else: 102 | assert False, "ValueError not raised for invalid sequence_number" 103 | # __repr__ 104 | r = repr(adiff) 105 | assert ( 106 | "AugmentedDiff" in r 107 | and "created" in r 108 | and "modified" in r 109 | and "deleted" in r 110 | ) 111 | 112 | def test_context_manager_clears_lists(self): 113 | """Test that __enter__ returns self and __exit__ clears lists.""" 114 | from osmdiff import AugmentedDiff 115 | 116 | adiff = AugmentedDiff(sequence_number=1) 117 | adiff.create = [1] 118 | adiff.modify = [2] 119 | adiff.delete = [3] 120 | with adiff as a: 121 | assert a is adiff 122 | assert adiff.create == [1] 123 | # After context exit, lists should be cleared 124 | assert adiff.create == [] 125 | assert adiff.modify == [] 126 | assert adiff.delete == [] 127 | 128 | def test_bbox_validation(self): 129 | """Test that invalid bounding boxes raise an Exception.""" 130 | from osmdiff import AugmentedDiff 131 | 132 | # Valid bbox should NOT raise 133 | AugmentedDiff(minlon=5, minlat=10, maxlon=10, maxlat=20) 134 | # Invalid bbox: maxlon <= minlon (all nonzero) 135 | with pytest.raises(Exception, match="invalid bbox"): 136 | AugmentedDiff(minlon=10, minlat=10, maxlon=5, maxlat=20) 137 | # Invalid bbox: maxlat <= minlat (all nonzero) 138 | with pytest.raises(Exception, match="invalid bbox"): 139 | AugmentedDiff(minlon=5, minlat=20, maxlon=10, maxlat=10) 140 | 141 | @pytest.fixture 142 | def mock_adiff_response(self): 143 | """Fixture providing a mock Augmented Diff response.""" 144 | xml_content = """ 145 | 146 | 147 | 148 | 149 | 150 | """.strip() 151 | 152 | mock_response = MagicMock(spec=requests.Response) 153 | mock_response.status_code = 200 154 | mock_response.text = xml_content 155 | mock_response.content = xml_content.encode() 156 | mock_response.raw = BytesIO(xml_content.encode()) 157 | return mock_response 158 | 159 | @pytest.fixture 160 | def mock_timeout_response(self): 161 | """Fixture providing a mock timeout response.""" 162 | mock_response = MagicMock(spec=requests.Response) 163 | mock_response.status_code = 200 164 | mock_response.raise_for_status.side_effect = requests.exceptions.ReadTimeout( 165 | "Timeout" 166 | ) 167 | 168 | # Add raw attribute that will raise timeout when read 169 | mock_raw = MagicMock() 170 | mock_raw.read.side_effect = requests.exceptions.ReadTimeout("Timeout") 171 | mock_raw.decode_content = True 172 | mock_response.raw = mock_raw 173 | 174 | return mock_response 175 | 176 | @pytest.fixture 177 | def augmented_diff(self): 178 | """Fixture providing a basic AugmentedDiff instance.""" 179 | return AugmentedDiff(sequence_number=12345) 180 | 181 | def test_delete_metadata(self, augmented_diff): 182 | """Test that metadata is captured for deleted objects.""" 183 | with open("tests/data/test_delete_metadata.xml", "r") as f: 184 | adiff = AugmentedDiff(file=f.name) 185 | 186 | assert len(adiff.delete) == 1 187 | deletion = adiff.delete[0] 188 | 189 | # Check the metadata is present 190 | assert "meta" in deletion 191 | assert deletion["meta"]["user"] == "TestUser" 192 | assert deletion["meta"]["uid"] == "12345" 193 | assert deletion["meta"]["changeset"] == "67890" 194 | assert deletion["meta"]["timestamp"] == "2024-01-28T12:00:00Z" 195 | 196 | # Check the old object is present 197 | assert "old" in deletion 198 | assert deletion["old"].attribs["id"] == "123" 199 | assert deletion["old"].attribs["lat"] == "51.5" 200 | assert deletion["old"].attribs["lon"] == "-0.1" 201 | assert deletion["old"].tags["amenity"] == "cafe" 202 | 203 | def test_timeout_retry_success( 204 | self, augmented_diff, mock_adiff_response, mock_timeout_response 205 | ): 206 | """Test successful retry after timeout.""" 207 | with patch( 208 | "requests.get", side_effect=[mock_timeout_response, mock_adiff_response] 209 | ) as mock_get: 210 | status = augmented_diff.retrieve() 211 | assert status == 200 212 | assert mock_get.call_count == 2 # Verify it retried once 213 | 214 | def test_multiple_timeouts(self, augmented_diff, mock_timeout_response): 215 | """Test max retries on consecutive timeouts.""" 216 | with patch("requests.get", return_value=mock_timeout_response) as mock_get: 217 | with pytest.raises(requests.exceptions.ReadTimeout): 218 | augmented_diff.retrieve() 219 | assert mock_get.call_count == 3 # Verify it tried 3 times 220 | 221 | def test_consecutive_sequence_numbers(self, augmented_diff): 222 | """Test auto-increment of sequence numbers.""" 223 | 224 | def new_mock_response(): 225 | xml_content = """\n\n\n\n\n\n""".strip() 226 | mock_response = MagicMock(spec=requests.Response) 227 | mock_response.status_code = 200 228 | mock_response.text = xml_content 229 | mock_response.content = xml_content.encode() 230 | mock_response.raw = BytesIO(xml_content.encode()) 231 | return mock_response 232 | 233 | with patch( 234 | "requests.get", side_effect=[new_mock_response(), new_mock_response()] 235 | ) as mock_get: 236 | # Retrieve first diff 237 | status1 = augmented_diff.retrieve(auto_increment=True) 238 | assert status1 == 200 239 | assert augmented_diff.sequence_number == 12346 240 | initial_create_count = len(augmented_diff.create) 241 | 242 | # Retrieve next diff 243 | status2 = augmented_diff.retrieve(auto_increment=True) 244 | assert status2 == 200 245 | assert augmented_diff.sequence_number == 12347 246 | assert len(augmented_diff.create) == initial_create_count * 2 247 | assert mock_get.call_count == 2 248 | -------------------------------------------------------------------------------- /src/osmdiff/osm/osm.py: -------------------------------------------------------------------------------- 1 | """ 2 | # OSM Objects 3 | 4 | This module provides classes for working with OpenStreetMap data in the application. 5 | 6 | ::: osmdiff.osm.Member 7 | 8 | ::: osmdiff.osm.OSMNode 9 | 10 | ::: osmdiff.osm.OSMWay 11 | 12 | ::: osmdiff.osm.OSMRelation 13 | 14 | ::: osmdiff.osm.OSMData 15 | 16 | ## Overview 17 | 18 | This module contains base classes for OSM objects: 19 | - OSMObject: Base class for all OSM elements 20 | - Node: Represents OSM nodes with lat/lon coordinates 21 | - Way: Represents OSM ways (sequences of nodes) 22 | - Relation: Represents OSM relations (collections of objects) 23 | 24 | ## Example 25 | 26 | ```python 27 | from osmdiff.osm import Node, Way, Relation 28 | ``` 29 | 30 | Each OSM object has: 31 | - tags: Dictionary of key-value tag pairs 32 | - attribs: Dictionary of XML attributes (id, version, etc.) 33 | - bounds: Optional bounding box [minlon, minlat, maxlon, maxlat] 34 | 35 | The objects can be created from XML elements using the from_xml() classmethod. 36 | 37 | Example: 38 | # Create a node 39 | node = Node() 40 | node.attribs = { 41 | "id": "123", 42 | "version": "2", 43 | "lat": "37.7", 44 | "lon": "-122.4" 45 | } 46 | node.tags = { 47 | "amenity": "cafe", 48 | "name": "Joe's Coffee" 49 | } 50 | 51 | # Create a way 52 | way = Way() 53 | way.attribs = { 54 | "id": "456", 55 | "version": "1" 56 | } 57 | way.nodes = ["123", "124", "125"] # List of node IDs 58 | way.tags = { 59 | "highway": "residential", 60 | "name": "Oak Street" 61 | } 62 | 63 | # Create a relation 64 | rel = Relation() 65 | rel.attribs = { 66 | "id": "789", 67 | "version": "1" 68 | } 69 | rel.members = [ 70 | {"type": "way", "ref": "456", "role": "outer"}, 71 | {"type": "way", "ref": "457", "role": "inner"} 72 | ] 73 | rel.tags = { 74 | "type": "multipolygon", 75 | "landuse": "park" 76 | } 77 | ``` 78 | 79 | # Access __geo_interface__ for GeoJSON compatibility 80 | 81 | See https://gist.github.com/sgillies/2217756 for more details. 82 | 83 | ```python 84 | print(node.__geo_interface__) # {"type": "Point", "coordinates": [-0.1, 51.5]} 85 | ``` 86 | """ 87 | 88 | from typing import Dict, Any, List 89 | from xml.etree import ElementTree 90 | from xml.etree.ElementTree import Element 91 | import json 92 | 93 | 94 | class OSMObject: 95 | """Base class for all OpenStreetMap elements (nodes, ways, relations). 96 | 97 | Args: 98 | tags: Key-value tag dictionary 99 | attribs: XML attributes dictionary 100 | bounds: Optional bounding box coordinates [minlon, minlat, maxlon, maxlat] 101 | 102 | Note: 103 | This is an abstract base class - use Node, Way or Relation for concrete elements. 104 | """ 105 | 106 | def __init__( 107 | self, 108 | tags: Dict[str, str] = {}, 109 | attribs: Dict[str, str] = {}, 110 | bounds: List[float] = [], 111 | ) -> None: 112 | """Initialize an empty OSM object.""" 113 | self.tags = tags or {} 114 | self.attribs = attribs or {} 115 | self.bounds = bounds or None 116 | 117 | def __repr__(self) -> str: 118 | """ 119 | String representation of the OSM object. 120 | 121 | Returns: 122 | str: Object type and ID, with additional info for ways/relations 123 | """ 124 | out = "{type} {id}".format(type=type(self).__name__, id=self.attribs.get("id")) 125 | if type(self) == Way: 126 | out += " ({ways} nodes)".format(ways=len(self.nodes)) 127 | if type(self) == Relation: 128 | out += " ({mem} members)".format(mem=len(self.members)) 129 | return out 130 | 131 | def _parse_tags(self, elem: Element) -> None: 132 | """ 133 | Parse tags from XML element. 134 | 135 | Args: 136 | elem: XML element containing tag elements 137 | """ 138 | for tagelem in elem.findall("tag"): 139 | self.tags[tagelem.attrib["k"]] = tagelem.attrib["v"] 140 | 141 | def _parse_bounds(self, elem: Element) -> None: 142 | """ 143 | Parse bounds from XML element. 144 | 145 | Args: 146 | elem: XML element containing bounds element 147 | """ 148 | be = elem.find("bounds") 149 | if be is not None: 150 | self.bounds = [ 151 | be.attrib["minlon"], 152 | be.attrib["minlat"], 153 | be.attrib["maxlon"], 154 | be.attrib["maxlat"], 155 | ] 156 | 157 | @classmethod 158 | def from_xml(cls, elem: Element) -> "OSMObject": 159 | """ 160 | Create OSM object from XML element. 161 | 162 | Args: 163 | elem: XML element representing an OSM object 164 | 165 | Returns: 166 | OSMObject: Appropriate subclass instance 167 | 168 | Raises: 169 | ValueError: If XML element is invalid 170 | TypeError: If element type is unknown 171 | """ 172 | if elem is None: 173 | raise ValueError("XML element cannot be None") 174 | 175 | osmtype = "" 176 | if elem.tag == "member": 177 | osmtype = elem.attrib.get("type") 178 | if not osmtype: 179 | raise ValueError("Member element missing type attribute") 180 | else: 181 | osmtype = elem.tag 182 | 183 | if osmtype not in ("node", "nd", "way", "relation"): 184 | raise TypeError(f"Unknown OSM element type: {osmtype}") 185 | 186 | o = OSMObject() 187 | if osmtype in ("node", "nd"): 188 | o = Node() 189 | elif osmtype == "way": 190 | o = Way() 191 | o._parse_nodes(elem) 192 | elif osmtype == "relation": 193 | o = Relation() 194 | o._parse_members(elem) 195 | else: 196 | pass 197 | o.attribs = elem.attrib 198 | o.osmtype = str(o.__class__.__name__).lower()[0] 199 | o._parse_tags(elem) 200 | o._parse_bounds(elem) 201 | return o 202 | 203 | def to_dict(self) -> Dict[str, Any]: 204 | """ 205 | Convert object to dictionary. 206 | 207 | Returns: 208 | Dict[str, Any]: Dictionary representation 209 | """ 210 | return { 211 | "type": self.__class__.__name__, 212 | "id": self.attribs.get("id"), 213 | "tags": self.tags, 214 | "bounds": self.bounds, 215 | } 216 | 217 | def to_json(self) -> str: 218 | """ 219 | Convert object to JSON string. 220 | 221 | Returns: 222 | str: JSON representation 223 | """ 224 | return json.dumps(self.to_dict()) 225 | 226 | @classmethod 227 | def from_file(cls, filename: str) -> "OSMObject": 228 | """ 229 | Create object from XML file. 230 | 231 | Args: 232 | filename: Path to XML file 233 | 234 | Returns: 235 | OSMObject: Parsed object 236 | """ 237 | with open(filename, "r") as f: 238 | tree = ElementTree.parse(f) 239 | return cls.from_xml(tree.getroot()) 240 | 241 | 242 | class Node(OSMObject): 243 | """OpenStreetMap node (geographic point feature). 244 | 245 | Implements __geo_interface__ for GeoJSON compatibility as a Point feature. 246 | Coordinates must be valid (-180<=lon<=180, -90<=lat<=90). 247 | """ 248 | 249 | def __init__( 250 | self, 251 | tags: Dict[str, str] = {}, 252 | attribs: Dict[str, str] = {}, 253 | bounds: List[float] = [], 254 | ) -> None: 255 | super().__init__(tags, attribs, bounds) 256 | 257 | def _validate_coords(self) -> None: 258 | """Validate node coordinates.""" 259 | lon = float(self.attribs.get("lon", 0)) 260 | lat = float(self.attribs.get("lat", 0)) 261 | if not -90 <= lat <= 90: 262 | raise ValueError(f"Invalid latitude: {lat}") 263 | if not -180 <= lon <= 180: 264 | raise ValueError(f"Invalid longitude: {lon}") 265 | 266 | @property 267 | def lon(self) -> float: 268 | """Get longitude value.""" 269 | self._validate_coords() 270 | return float(self.attribs.get("lon", 0)) 271 | 272 | @property 273 | def lat(self) -> float: 274 | """Get latitude value.""" 275 | self._validate_coords() 276 | return float(self.attribs.get("lat", 0)) 277 | 278 | def _geo_interface(self) -> dict: 279 | """ 280 | GeoJSON-compatible interface. 281 | 282 | Returns: 283 | dict: GeoJSON Point geometry 284 | """ 285 | return {"type": "Point", "coordinates": [self.lon, self.lat]} 286 | 287 | __geo_interface__ = property(_geo_interface) 288 | 289 | def __eq__(self, other) -> bool: 290 | """ 291 | Check if two nodes are equal. 292 | 293 | Args: 294 | other (OSMObject): Another OSMObject object 295 | 296 | Returns: 297 | bool: True if nodes have same coordinates 298 | """ 299 | if not isinstance(other, Node): 300 | return False 301 | return self.lon == other.lon and self.lat == other.lat 302 | 303 | 304 | class Way(OSMObject): 305 | """Represents an OSM way (linear feature). 306 | 307 | Implements __geo_interface__ for GeoJSON compatibility as either: 308 | - LineString for open ways 309 | - Polygon for closed ways 310 | """ 311 | 312 | def __init__( 313 | self, 314 | tags: Dict[str, str] = {}, 315 | attribs: Dict[str, str] = {}, 316 | bounds: List[float] = [], 317 | nodes: List[Node] = [], 318 | ) -> None: 319 | """Initialize a Way object.""" 320 | self.tags = tags or {} 321 | self.attribs = attribs or {} 322 | self.nodes = nodes or [] 323 | super().__init__(tags, attribs, bounds) 324 | 325 | def is_closed(self) -> bool: 326 | """ 327 | Check if the way forms a closed loop. 328 | 329 | Returns: 330 | bool: True if first and last nodes are identical 331 | """ 332 | return bool(self.nodes and self.nodes[0] == self.nodes[-1]) 333 | 334 | def length(self) -> None: 335 | """ 336 | Calculate approximate length in meters. 337 | 338 | Returns: 339 | float: Length of way in meters (not implemented) 340 | """ 341 | # Implementation using haversine formula 342 | pass 343 | 344 | def _parse_nodes(self, elem: Element): 345 | """ 346 | Parse nodes from XML element. 347 | 348 | Args: 349 | elem: XML element containing nd elements 350 | """ 351 | for node in elem.findall("nd"): 352 | self.nodes.append(OSMObject.from_xml(node)) 353 | 354 | def _geo_interface(self) -> dict: 355 | """ 356 | GeoJSON-compatible interface. 357 | 358 | Returns: 359 | dict: GeoJSON LineString or Polygon geometry 360 | """ 361 | geom_type = "Polygon" if self.is_closed() else "LineString" 362 | coordinates = [[n.lon, n.lat] for n in self.nodes] 363 | 364 | # For Polygon, we need to wrap coordinates in an extra list 365 | if geom_type == "Polygon": 366 | coordinates = [coordinates] 367 | 368 | return {"type": geom_type, "coordinates": coordinates} 369 | 370 | __geo_interface__ = property(_geo_interface) 371 | 372 | 373 | class Relation(OSMObject): 374 | """ 375 | Represents an OSM relation (collection of features). 376 | 377 | ## Attributes 378 | members (list): List of member objects 379 | __geo_interface__ (dict): GeoJSON-compatible interface, see https://gist.github.com/sgillies/2217756 for more details. 380 | 381 | ## Example 382 | ```python 383 | relation = Relation() 384 | relation.members = [Way(), Node()] # Add members 385 | print(relation.__geo_interface__["type"]) # "FeatureCollection" 386 | ``` 387 | """ 388 | 389 | def __init__( 390 | self, 391 | tags: Dict[str, str] = {}, 392 | attribs: Dict[str, str] = {}, 393 | bounds: List[float] = [], 394 | ) -> None: 395 | """Initialize a Relation object.""" 396 | tags = tags or {} 397 | attribs = attribs or {} 398 | super().__init__(tags, attribs, bounds) 399 | self.members = [] 400 | 401 | def _parse_members(self, elem: Element): 402 | """ 403 | Parse members from XML element. 404 | 405 | Args: 406 | elem: XML element containing member elements 407 | """ 408 | for member in elem.findall("member"): 409 | self.members.append(OSMObject.from_xml(member)) 410 | 411 | def _geo_interface(self) -> dict: 412 | """ 413 | GeoJSON-compatible interface. 414 | 415 | Returns: 416 | dict: GeoJSON GeometryCollection 417 | """ 418 | return { 419 | "type": "GeometryCollection", 420 | "geometries": [m.__geo_interface__ for m in self.members], 421 | } 422 | 423 | __geo_interface__ = property(_geo_interface) 424 | 425 | 426 | class Member(OSMObject): 427 | """ 428 | Represents an OSM member (a feature within a relation). 429 | """ 430 | 431 | def __init__(self): 432 | """Initialize an empty member.""" 433 | self.type = None 434 | self.ref = None 435 | self.role = None 436 | super().__init__() 437 | 438 | def _parse_attributes(self, elem: Element): 439 | """ 440 | Parse member attributes from XML element. 441 | 442 | Args: 443 | elem: XML element containing member attributes 444 | """ 445 | self.type = elem.get("type") 446 | self.ref = int(elem.get("ref")) 447 | self.role = elem.get("role") 448 | 449 | def _geo_interface(self): 450 | """ 451 | GeoJSON-compatible interface. 452 | 453 | Returns: 454 | dict: GeoJSON Feature 455 | """ 456 | return { 457 | "type": "Feature", 458 | "geometry": None, 459 | "properties": {"type": self.type, "ref": self.ref, "role": self.role}, 460 | } 461 | 462 | __geo_interface__ = property(_geo_interface) 463 | -------------------------------------------------------------------------------- /src/osmdiff/augmenteddiff.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | from datetime import datetime 4 | from typing import Optional 5 | from xml.etree import ElementTree 6 | 7 | import requests 8 | from dateutil import parser 9 | 10 | from osmdiff.settings import DEFAULT_OVERPASS_URL 11 | 12 | from .config import API_CONFIG, DEFAULT_HEADERS 13 | from .osm import OSMObject 14 | 15 | 16 | class AugmentedDiff: 17 | """An Augmented Diff representation for OpenStreetMap changes. 18 | 19 | Handles retrieval and parsing of OpenStreetMap augmented diffs containing detailed 20 | changes to OSM data (creations, modifications, deletions). 21 | 22 | Args: 23 | minlon: Minimum longitude of bounding box (WGS84) 24 | minlat: Minimum latitude of bounding box (WGS84) 25 | maxlon: Maximum longitude of bounding box (WGS84) 26 | maxlat: Maximum latitude of bounding box (WGS84) 27 | file: Path to local augmented diff XML file 28 | sequence_number: Sequence number of the diff 29 | base_url: Override default Overpass API URL 30 | timeout: Request timeout in seconds 31 | 32 | Note: 33 | The bounding box coordinates should be in WGS84 (EPSG:4326) format. 34 | """ 35 | 36 | base_url = DEFAULT_OVERPASS_URL 37 | minlon = None 38 | minlat = None 39 | maxlon = None 40 | maxlat = None 41 | 42 | def __init__( 43 | self, 44 | minlon: Optional[float] = None, 45 | minlat: Optional[float] = None, 46 | maxlon: Optional[float] = None, 47 | maxlat: Optional[float] = None, 48 | file: Optional[str] = None, 49 | sequence_number: Optional[int] = None, 50 | timestamp: Optional[datetime] = None, 51 | base_url: Optional[str] = None, 52 | timeout: Optional[int] = None, 53 | ) -> None: 54 | # Initialize with defaults from config 55 | self.base_url = base_url or API_CONFIG["overpass"]["base_url"] 56 | self.timeout = timeout or API_CONFIG["overpass"]["timeout"] 57 | 58 | # Initialize other config values 59 | self.minlon = minlon 60 | self.minlat = minlat 61 | self.maxlon = maxlon 62 | self.maxlat = maxlat 63 | self.timestamp = timestamp 64 | self._remarks = [] 65 | self._sequence_number = None 66 | self._create = [] 67 | self._modify = [] 68 | self._delete = [] 69 | if file: 70 | with open(file, "r") as file_handle: 71 | self._parse_stream(file_handle) 72 | else: 73 | self.sequence_number = sequence_number 74 | if minlon and minlat and maxlon and maxlat: 75 | if maxlon > minlon and maxlat > minlat: 76 | self.minlon = minlon 77 | self.minlat = minlat 78 | self.maxlon = maxlon 79 | self.maxlat = maxlat 80 | else: 81 | raise Exception("invalid bbox.") 82 | self._logger = logging.getLogger(__name__) 83 | 84 | @classmethod 85 | def get_state( 86 | cls, base_url: Optional[str] = None, timeout: Optional[int] = None 87 | ) -> Optional[dict]: 88 | """Get the current sequence number from the Overpass API. 89 | 90 | Args: 91 | base_url: Override default Overpass API URL (deprecated) 92 | timeout: Optional override for request timeout 93 | 94 | Returns: 95 | int: Sequence number 96 | """ 97 | state_url = API_CONFIG["overpass"]["state_url"] 98 | response = requests.get( 99 | state_url, timeout=timeout or 5, headers=DEFAULT_HEADERS 100 | ) 101 | response.raise_for_status() 102 | return_dict = {"sequence_number": int(response.text), "timestamp": None} 103 | return return_dict 104 | 105 | def _build_action(self, elem): 106 | """Parse an action element from an augmented diff. 107 | 108 | Actions in augmented diffs are ordered: nodes first, then ways, then relations. 109 | Within each type, elements are ordered by ID. 110 | """ 111 | action_type = elem.attrib["type"] 112 | 113 | if action_type == "create": 114 | for child in elem: 115 | osm_obj = OSMObject.from_xml(child) 116 | self._create.append(osm_obj) 117 | elif action_type == "modify": 118 | old = elem.find("old") 119 | new = elem.find("new") 120 | if old is not None and new is not None: 121 | osm_obj_old = None 122 | osm_obj_new = None 123 | for child in old: 124 | osm_obj_old = OSMObject.from_xml(child) 125 | for child in new: 126 | osm_obj_new = OSMObject.from_xml(child) 127 | if osm_obj_old and osm_obj_new: 128 | self._modify.append({"old": osm_obj_old, "new": osm_obj_new}) 129 | elif action_type == "delete": 130 | old = elem.find("old") 131 | new = elem.find("new") 132 | osm_obj_old = None 133 | osm_obj_new = None 134 | if old is not None: 135 | for child in old: 136 | osm_obj_old = OSMObject.from_xml(child) 137 | if new is not None: 138 | for child in new: 139 | osm_obj_new = OSMObject.from_xml(child) 140 | if osm_obj_old is not None or osm_obj_new is not None: 141 | # Store both old and new, and optionally meta info 142 | deletion_info = { 143 | "old": osm_obj_old, 144 | "new": osm_obj_new, 145 | "meta": elem.attrib.copy(), 146 | } 147 | self._delete.append(deletion_info) 148 | 149 | def _parse_stream(self, stream): 150 | for event, elem in ElementTree.iterparse(stream): 151 | if elem.tag == "remark": 152 | self._remarks.append(elem.text) 153 | if elem.tag == "meta": 154 | timestamp = parser.parse(elem.attrib.get("osm_base")) 155 | self.timestamp = timestamp 156 | if elem.tag == "action": 157 | self._build_action(elem) 158 | 159 | def retrieve( 160 | self, 161 | clear_cache: bool = False, 162 | timeout: Optional[int] = None, 163 | auto_increment: bool = True, 164 | max_retries: int = 3, 165 | ) -> int: 166 | """Retrieve the Augmented diff corresponding to the sequence_number. 167 | 168 | Args: 169 | clear_cache: Whether to clear existing data before retrieval. 170 | timeout: Request timeout in seconds. 171 | auto_increment: Whether to automatically increment sequence number after retrieval. 172 | max_retries: Maximum number of retry attempts for failed requests. 173 | 174 | Returns: 175 | HTTP status code of the request (200 for success) 176 | 177 | Raises: 178 | Exception: If sequence_number is not set 179 | requests.exceptions.RequestException: If all retry attempts fail 180 | """ 181 | if not self.sequence_number: 182 | raise Exception("invalid sequence number") 183 | 184 | if clear_cache: 185 | self._create, self._modify, self._delete = ([], [], []) 186 | 187 | url = self.base_url.format(sequence_number=self.sequence_number) 188 | 189 | self._logger.info(f"Retrieving diff {self.sequence_number} from {url}") 190 | 191 | # Store current data before making request 192 | prev_create = self._create.copy() 193 | prev_modify = self._modify.copy() 194 | prev_delete = self._delete.copy() 195 | 196 | # Use a longer timeout if none specified 197 | request_timeout = ( 198 | timeout or self.timeout or 120 199 | ) # 2 minutes default, this will still fail for very large diffs, like 12346 200 | 201 | for attempt in range(max_retries): 202 | try: 203 | # Exponential backoff between retries 204 | if attempt > 0: 205 | time.sleep(2**attempt) # 2, 4, 8 seconds... 206 | 207 | r = requests.get( 208 | url, stream=True, timeout=request_timeout, headers=DEFAULT_HEADERS 209 | ) 210 | 211 | if r.status_code != 200: 212 | return r.status_code 213 | 214 | r.raw.decode_content = True 215 | 216 | # Clear current lists but keep previous data 217 | self._create, self._modify, self._delete = ([], [], []) 218 | 219 | # Parse new data 220 | self._parse_stream(r.raw) 221 | 222 | # Merge with previous data 223 | self._create = prev_create + self._create 224 | self._modify = prev_modify + self._modify 225 | self._delete = prev_delete + self._delete 226 | 227 | # Automatically increment sequence number after successful retrieval 228 | if auto_increment: 229 | self.sequence_number += 1 230 | 231 | return r.status_code 232 | 233 | except ( 234 | requests.exceptions.ReadTimeout, 235 | requests.exceptions.ConnectionError, 236 | ) as e: 237 | if attempt == max_retries - 1: # Last attempt 238 | raise 239 | continue 240 | 241 | return 0 # Should never reach here due to raise in except block 242 | 243 | @property 244 | def create(self) -> list: 245 | """Get the list of created objects from the augmented diff.""" 246 | return self._create 247 | 248 | @create.setter 249 | def create(self, value: list) -> None: 250 | self._create = value 251 | 252 | @property 253 | def modify(self) -> list: 254 | """Get the list of modified objects from the augmented diff.""" 255 | return self._modify 256 | 257 | @modify.setter 258 | def modify(self, value: list) -> None: 259 | self._modify = value 260 | 261 | @property 262 | def delete(self) -> list: 263 | """Get the list of deleted objects from the augmented diff.""" 264 | return self._delete 265 | 266 | @delete.setter 267 | def delete(self, value: list) -> None: 268 | self._delete = value 269 | 270 | @property 271 | def remarks(self) -> list: 272 | """Get the list of remarks from the augmented diff. 273 | 274 | Remarks provide additional metadata about the changes in the diff. 275 | """ 276 | return self._remarks 277 | 278 | @property 279 | def timestamp(self) -> datetime: 280 | """Get the timestamp of when the changes in this diff were made. 281 | 282 | Returns: 283 | datetime: The timestamp parsed from the diff metadata 284 | """ 285 | return self._timestamp 286 | 287 | @timestamp.setter 288 | def timestamp(self, value: datetime) -> None: 289 | """Set the timestamp for this diff. 290 | 291 | Args: 292 | value: The new timestamp to set 293 | """ 294 | self._timestamp = value 295 | 296 | @property 297 | def sequence_number(self) -> int | None: 298 | """Get the sequence number identifying this diff. 299 | 300 | Sequence numbers increment monotonically and uniquely identify each diff. 301 | """ 302 | return self._sequence_number 303 | 304 | @sequence_number.setter 305 | def sequence_number(self, value: int) -> None: 306 | try: 307 | # value can be none 308 | if value is None: 309 | self._sequence_number = None 310 | return 311 | self._sequence_number = int(value) 312 | except ValueError: 313 | raise ValueError( 314 | "sequence_number must be an integer or parsable as an integer" 315 | ) 316 | 317 | @property 318 | def actions(self): 319 | """Get all actions combined in a single list.""" 320 | return {"create": self._create, "modify": self._modify, "delete": self._delete} 321 | 322 | def __repr__(self): 323 | return """AugmentedDiff ({create} created, {modify} modified, {delete} deleted)""".format( 324 | create=len(self._create), 325 | modify=len(self._modify), 326 | delete=len(self._delete), 327 | ) 328 | 329 | def __enter__(self): 330 | return self 331 | 332 | def __exit__(self, exc_type, exc_val, exc_tb): 333 | self._create.clear() 334 | self._modify.clear() 335 | self._delete.clear() 336 | 337 | 338 | class ContinuousAugmentedDiff: 339 | """Iterator for continuously fetching augmented diffs with backoff. 340 | 341 | Yields AugmentedDiff objects as new diffs become available. 342 | 343 | Args: 344 | minlon: Minimum longitude of bounding box 345 | minlat: Minimum latitude of bounding box 346 | maxlon: Maximum longitude of bounding box 347 | maxlat: Maximum latitude of bounding box 348 | base_url: Override default Overpass API URL 349 | timeout: Request timeout in seconds 350 | min_interval: Minimum seconds between checks (default: 30) 351 | max_interval: Maximum seconds between checks (default: 120) 352 | """ 353 | 354 | def __init__( 355 | self, 356 | minlon: Optional[float] = None, 357 | minlat: Optional[float] = None, 358 | maxlon: Optional[float] = None, 359 | maxlat: Optional[float] = None, 360 | base_url: Optional[str] = None, 361 | timeout: Optional[int] = None, 362 | min_interval: int = 30, 363 | max_interval: int = 120, 364 | ): 365 | self.bbox = (minlon, minlat, maxlon, maxlat) 366 | self.base_url = base_url 367 | self.timeout = timeout 368 | self.min_interval = min_interval 369 | self.max_interval = max_interval 370 | 371 | self._current_sequence = None 372 | self._current_interval = min_interval 373 | self._last_check = None 374 | self._logger = logging.getLogger(__name__) 375 | 376 | def _wait_for_next_check(self) -> None: 377 | """Wait appropriate time before next check, using exponential backoff.""" 378 | now = datetime.now() 379 | if self._last_check: 380 | elapsed = (now - self._last_check).total_seconds() 381 | wait_time = max(0, self._current_interval - elapsed) 382 | if wait_time > 0: 383 | time.sleep(wait_time) 384 | 385 | self._last_check = datetime.now() 386 | 387 | def _backoff(self) -> None: 388 | """Increase check interval, up to max_interval.""" 389 | self._current_interval = min(self._current_interval * 2, self.max_interval) 390 | 391 | def _reset_backoff(self) -> None: 392 | """Reset check interval to minimum.""" 393 | self._current_interval = self.min_interval 394 | 395 | def __iter__(self): 396 | return self 397 | 398 | def __next__(self) -> AugmentedDiff: 399 | while True: 400 | self._wait_for_next_check() 401 | 402 | # check if we have a newer sequence on the remote 403 | newest_remote = AugmentedDiff.get_state(timeout=self.timeout) 404 | 405 | # if we don't have a local sequence number yet, set it 406 | if self._current_sequence is None: 407 | self._current_sequence = newest_remote 408 | 409 | # if we do, proceed ony if the remote is newer 410 | elif self._current_sequence >= newest_remote: 411 | continue 412 | 413 | # Create diff object for new sequence 414 | diff = AugmentedDiff( 415 | minlon=self.bbox[0], 416 | minlat=self.bbox[1], 417 | maxlon=self.bbox[2], 418 | maxlat=self.bbox[3], 419 | sequence_number=self._current_sequence, 420 | base_url=self.base_url, 421 | timeout=self.timeout, 422 | ) 423 | 424 | # Try to retrieve the diff 425 | try: 426 | status = diff.retrieve(auto_increment=False) 427 | if status != 200: 428 | self._logger.warning(f"Failed to retrieve diff: HTTP {status}") 429 | self._backoff() 430 | continue 431 | 432 | # Success! Reset backoff and update sequence 433 | self._reset_backoff() 434 | self._current_sequence += 1 435 | return diff 436 | 437 | except Exception as e: 438 | self._logger.warning(f"Error retrieving diff: {e}") 439 | self._backoff() 440 | continue 441 | --------------------------------------------------------------------------------