├── tests
├── __init__.py
├── data
│ └── test_delete_metadata.xml
├── test_relation.py
├── test_way.py
├── test_node.py
├── conftest.py
├── test_augmenteddiff_continuous.py
├── test_api.py
├── test_adiff.py
├── test_osm.py
├── test_osmchange.py
└── test_augmenteddiff.py
├── .python-version
├── src
├── __init__.py
└── osmdiff
│ ├── osm
│ ├── __init__.py
│ └── osm.py
│ ├── settings.py
│ ├── __init__.py
│ ├── config.py
│ ├── osmchange.py
│ └── augmenteddiff.py
├── .gitignore
├── .vscode
└── settings.json
├── docs
├── api
│ ├── utils.md
│ ├── osm.md
│ ├── config.md
│ ├── augmenteddiff.md
│ ├── osmchange.md
│ └── continuous.md
├── index.md
├── examples
│ └── index.md
└── getting-started.md
├── LICENSE
├── examples
├── continuous.py
├── try.py
├── api
│ └── app.py
└── geo_interface.py
├── mkdocs.yml
├── pyproject.toml
├── CHANGELOG.md
└── README.md
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.12.9
2 |
--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
1 | # This marks the src directory as a Python package
2 |
--------------------------------------------------------------------------------
/src/osmdiff/osm/__init__.py:
--------------------------------------------------------------------------------
1 | from .osm import Node, OSMObject, Relation, Way
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | **/__pycache__/
3 | **/*.egg-info
4 | .pytest_cache
5 | .venv/
6 | build/
7 | dist/
8 | .pdm-python
9 | .aider*
10 | site/
11 | .vscode/
12 | .coverage
13 | htmlcov/
--------------------------------------------------------------------------------
/src/osmdiff/settings.py:
--------------------------------------------------------------------------------
1 | DEFAULT_OVERPASS_URL = "http://overpass-api.de/api" # URL for Overpass API
2 | DEFAULT_REPLICATION_URL = (
3 | "https://planet.openstreetmap.org/replication" # URL for OSM replication API
4 | )
5 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "python.testing.pytestArgs": [
3 | "tests"
4 | ],
5 | "python.testing.unittestEnabled": false,
6 | "python.testing.pytestEnabled": true,
7 | "python.testing.pytestPath": "pytest"
8 | }
--------------------------------------------------------------------------------
/tests/data/test_delete_metadata.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/docs/api/utils.md:
--------------------------------------------------------------------------------
1 | # Utilities
2 |
3 | ## Settings
4 |
5 | ::: osmdiff.settings.DEFAULT_OVERPASS_URL
6 | options:
7 | heading_level: 2
8 | show_source: true
9 |
10 | ::: osmdiff.settings.DEFAULT_REPLICATION_URL
11 | options:
12 | heading_level: 2
13 | show_source: true
14 |
15 | ## Version Information
16 |
17 | ::: osmdiff.__version__
18 | options:
19 | heading_level: 2
20 | show_source: true
21 |
--------------------------------------------------------------------------------
/src/osmdiff/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | osmdiff is a Python library for working with OpenStreetMap changesets and diffs.
3 |
4 | It provides classes for working with OpenStreetMap changesets and diffs, and
5 | includes a parser for the OpenStreetMap changeset API.
6 | """
7 |
8 | from .augmenteddiff import AugmentedDiff, ContinuousAugmentedDiff
9 | from .osm import Node, Relation, Way
10 | from .osmchange import OSMChange
11 |
12 | __version__ = "0.4.6"
13 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | # OSMDiff Documentation
2 |
3 | OSMDiff provides Python tools for working with OpenStreetMap change data:
4 |
5 | - Track real-time map edits
6 | - Process historical changes
7 | - Monitor specific geographic areas
8 | - Analyze contributor patterns
9 |
10 | ## Quick Links
11 |
12 | - [Getting Started](/getting-started) - First steps with OSMDiff
13 | - [API Reference](/api/augmenteddiff) - Detailed documentation
14 | - [Examples](/examples) - More advanced usage patterns
15 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2017-2022 Martijn van Exel
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 |
--------------------------------------------------------------------------------
/docs/api/osm.md:
--------------------------------------------------------------------------------
1 | # OSM Data Structures
2 |
3 | This module provides classes for working with OpenStreetMap (OSM) data elements.
4 |
5 | ## Base Class
6 |
7 | ::: osmdiff.osm.OSMObject
8 | options:
9 | show_root_heading: true
10 | show_source: true
11 |
12 | ## OSM Elements
13 |
14 | ### Node
15 |
16 | ::: osmdiff.osm.Node
17 | options:
18 | show_root_heading: true
19 | show_source: true
20 |
21 | ### Way
22 |
23 | ::: osmdiff.osm.Way
24 | options:
25 | show_root_heading: true
26 | show_source: true
27 |
28 | ### Relation
29 |
30 | ::: osmdiff.osm.Relation
31 | options:
32 | show_root_heading: true
33 | show_source: true
34 |
35 |
--------------------------------------------------------------------------------
/examples/continuous.py:
--------------------------------------------------------------------------------
1 | from osmdiff import ContinuousAugmentedDiff
2 |
3 | # Create continuous fetcher for London area
4 | fetcher = ContinuousAugmentedDiff(
5 | minlon=-0.489,
6 | minlat=51.28,
7 | maxlon=0.236,
8 | maxlat=51.686,
9 | min_interval=30, # Check at least every 30 seconds
10 | max_interval=120, # Back off up to 120 seconds if no changes
11 | )
12 |
13 | # Process changes as they come in
14 | for diff in fetcher:
15 | print(f"\nNew changes in diff {diff.sequence_number}:")
16 | print(f" Created: {len(diff.create)} objects")
17 | print(f" Modified: {len(diff.modify)} objects")
18 | print(f" Deleted: {len(diff.delete)} objects")
19 |
20 | # Example: Track new amenities
21 | for obj in diff.create:
22 | if "amenity" in obj.tags:
23 | print(f"New amenity: {obj.tags['amenity']}")
24 |
--------------------------------------------------------------------------------
/docs/api/config.md:
--------------------------------------------------------------------------------
1 | # Configuration Reference
2 |
3 | ## API Configuration
4 |
5 | ::: osmdiff.config.API_CONFIG
6 | options:
7 | heading_level: 2
8 | show_source: true
9 |
10 | ## AugmentedDiff Defaults
11 |
12 | ::: osmdiff.config.AUGMENTED_DIFF_CONFIG
13 | options:
14 | heading_level: 2
15 | show_source: true
16 |
17 | ## HTTP Settings
18 |
19 | ::: osmdiff.config.DEFAULT_HEADERS
20 | options:
21 | heading_level: 2
22 | show_source: true
23 |
24 | ::: osmdiff.config.USER_AGENT
25 | options:
26 | heading_level: 2
27 | show_source: true
28 |
29 | ## Overriding Configuration
30 |
31 | ```python
32 | from osmdiff import AugmentedDiff
33 | from osmdiff.config import API_CONFIG
34 |
35 | # Modify configuration before use
36 | API_CONFIG["overpass"]["timeout"] = 60 # Increase timeout
37 |
38 | adiff = AugmentedDiff() # Will use updated configuration
39 | ```
40 |
--------------------------------------------------------------------------------
/examples/try.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from osmdiff import AugmentedDiff, OSMChange
4 | from osmdiff.osm import Node, Relation, Way
5 | from pathlib import Path
6 |
7 | osm_change_file = Path("tests", "data", "test_osmchange.xml")
8 | print(osm_change_file)
9 |
10 | # absolute path
11 | osm_change_file = osm_change_file.resolve()
12 | print(osm_change_file)
13 |
14 |
15 | r = OSMChange()
16 | r.get_state()
17 | r.retrieve()
18 | print(r)
19 |
20 | r = OSMChange(file=osm_change_file)
21 | print(r)
22 |
23 | a = AugmentedDiff(file=osm_change_file)
24 | print(a)
25 |
26 | a = AugmentedDiff(
27 | # minlon=-160.0,
28 | # minlat=20.0,
29 | # maxlon=-80.0,
30 | # maxlat=60.0,
31 | )
32 | a._get_current_id()
33 | a.retrieve()
34 | print(a)
35 |
36 | # n = Node()
37 | # w = Way()
38 | # r = Relation()
39 |
40 | # r = replication.OSMChange(frequency="hour")
41 |
42 | # print(r.sequence_number)
43 | # r.get(r.sequence_number)
44 |
45 | # r = replication.OSMChange(frequency="day")
46 |
47 | # print(r.sequence_number)
48 | # r.get(r.sequence_number)
49 |
--------------------------------------------------------------------------------
/tests/test_relation.py:
--------------------------------------------------------------------------------
1 | from osmdiff import Relation
2 | from osmdiff.osm import OSMObject
3 | from typing_extensions import assert_type
4 |
5 |
6 | def test_relation_init():
7 | relation = Relation()
8 | assert isinstance(relation, Relation)
9 | assert isinstance(relation, OSMObject)
10 | assert isinstance(relation.attribs, dict)
11 | assert isinstance(relation.tags, dict)
12 | assert len(relation.tags) == 0
13 | assert len(relation.attribs) == 0
14 | assert isinstance(relation.members, list)
15 | assert len(relation.members) == 0
16 |
17 | def test_relation_from_xml():
18 | import xml.etree.ElementTree as ET
19 | xml = ''
20 | elem = ET.fromstring(xml)
21 | relation = Relation.from_xml(elem)
22 | assert isinstance(relation, Relation)
23 | assert relation.attribs["id"] == "1"
24 | assert relation.tags["type"] == "multipolygon"
25 | assert len(relation.members) == 1
26 |
27 | # Optionally, add test for geo interface if implemented
28 |
--------------------------------------------------------------------------------
/tests/test_way.py:
--------------------------------------------------------------------------------
1 | from osmdiff import Way
2 | from osmdiff.osm import OSMObject
3 | from typing_extensions import assert_type
4 |
5 |
6 | def test_way_init():
7 | way = Way()
8 | assert isinstance(way, Way)
9 | assert isinstance(way, OSMObject)
10 | assert isinstance(way.attribs, dict)
11 | assert isinstance(way.tags, dict)
12 | assert len(way.tags) == 0
13 | assert len(way.attribs) == 0
14 | assert isinstance(way.nodes, list)
15 | assert len(way.nodes) == 0
16 |
17 | def test_way_is_closed():
18 | way = Way()
19 | way.nodes = [1, 2, 1]
20 | assert way.is_closed() is True
21 | way.nodes = [1, 2, 3]
22 | assert way.is_closed() is False
23 |
24 | def test_way_from_xml():
25 | import xml.etree.ElementTree as ET
26 | xml = ''
27 | elem = ET.fromstring(xml)
28 | way = Way.from_xml(elem)
29 | assert isinstance(way, Way)
30 | assert way.attribs["id"] == "1"
31 | assert way.tags["highway"] == "residential"
32 |
33 | # Optionally, add test for geo interface if implemented
34 |
--------------------------------------------------------------------------------
/examples/api/app.py:
--------------------------------------------------------------------------------
1 | from fastapi import FastAPI
2 | from fastapi.middleware.cors import CORSMiddleware
3 | import uvicorn
4 | from typing import Optional
5 | from osmdiff import AugmentedDiff
6 |
7 |
8 | app = FastAPI()
9 |
10 | origins = [
11 | "http://localhost:3000",
12 | "http://127.0.0.1:3000",
13 | ]
14 |
15 | app.add_middleware(
16 | CORSMiddleware,
17 | allow_origins=origins,
18 | allow_credentials=True,
19 | allow_methods=["*"],
20 | allow_headers=["*"],
21 | )
22 |
23 |
24 | @app.get("/")
25 | async def root():
26 | return {"message": "Hello World"}
27 |
28 |
29 | @app.get("/items/{item_id}")
30 | async def read_item(item_id: int, q: Optional[str] = None):
31 | return {"item_id": item_id, "q": q}
32 |
33 |
34 | @app.get("/adiff/{sequence_number}")
35 | async def get_augmented_diff(sequence_number: int):
36 | adiff = AugmentedDiff(sequence_number=sequence_number)
37 | adiff.retrieve()
38 | return {"create": adiff._create, "modify": adiff._modify, "delete": adiff._delete}
39 |
40 |
41 | if __name__ == "__main__":
42 | uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
43 |
--------------------------------------------------------------------------------
/tests/test_node.py:
--------------------------------------------------------------------------------
1 | from osmdiff import Node
2 | from osmdiff.osm import OSMObject
3 | from typing_extensions import assert_type
4 |
5 |
6 | def test_node_init():
7 | node = Node()
8 | assert isinstance(node, Node)
9 | assert isinstance(node, OSMObject)
10 | assert isinstance(node.attribs, dict)
11 | assert isinstance(node.tags, dict)
12 | assert len(node.attribs) == 0
13 | assert len(node.tags) == 0
14 | assert node.lat == 0.0
15 | assert node.lon == 0.0
16 |
17 | def test_node_geo_interface_and_equality():
18 | node1 = Node(attribs={"lat": 10.0, "lon": 20.0})
19 | node2 = Node(attribs={"lat": 10.0, "lon": 20.0})
20 | node3 = Node(attribs={"lat": 10.1, "lon": 20.1})
21 | gi = node1.__geo_interface__
22 | assert gi["type"] == "Point"
23 | assert gi["coordinates"] == [node1.lon, node1.lat]
24 | assert node1 == node2
25 | assert node1 != node3
26 |
27 | def test_node_from_xml():
28 | import xml.etree.ElementTree as ET
29 | xml = ''
30 | elem = ET.fromstring(xml)
31 | node = Node.from_xml(elem)
32 | assert isinstance(node, Node)
33 | assert node.attribs["id"] == "1"
34 | assert node.attribs["lat"] == "10.0"
35 | assert node.tags["name"] == "TestNode"
36 |
--------------------------------------------------------------------------------
/examples/geo_interface.py:
--------------------------------------------------------------------------------
1 | from shapely.geometry import shape
2 | from osmdiff.osm import Node, Way, Relation
3 |
4 | # Create a node with coordinates
5 | node = Node()
6 | node.attribs = {"lon": "-122.4", "lat": "37.7"}
7 |
8 | # Convert to Shapely Point using geo_interface
9 | point = shape(node.__geo_interface__)
10 | print(f"Node as Point: {point}") # POINT (-122.4 37.7)
11 |
12 | # Create a way with nodes
13 | way = Way()
14 | way.nodes = [
15 | Node(attribs={"lon": "-122.4", "lat": "37.7"}),
16 | Node(attribs={"lon": "-122.4", "lat": "37.8"}),
17 | Node(attribs={"lon": "-122.5", "lat": "37.8"}),
18 | Node(attribs={"lon": "-122.4", "lat": "37.7"}), # Closing the loop
19 | ]
20 |
21 | # Convert to Shapely Polygon using geo_interface
22 | polygon = shape(way.__geo_interface__)
23 | print(
24 | f"Way as Polygon: {polygon}"
25 | ) # POLYGON ((-122.4 37.7, -122.4 37.8, -122.5 37.8, -122.4 37.7))
26 |
27 | # Create a relation with members
28 | relation = Relation()
29 | relation.members = [way, node]
30 |
31 | # Convert to Shapely GeometryCollection using geo_interface
32 | collection = shape(relation.__geo_interface__)
33 | print(
34 | f"Relation as Collection: {collection}"
35 | ) # GEOMETRYCOLLECTION (POLYGON ((-122.4 37.7, -122.4 37.8, -122.5 37.8, -122.4 37.7)), POINT (-122.4 37.7))
36 |
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: OSMDiff
2 | theme:
3 | name: material
4 | palette:
5 | - scheme: default
6 | primary: indigo
7 | accent: indigo
8 | toggle:
9 | icon: material/brightness-7
10 | name: Switch to dark mode
11 | - scheme: slate
12 | primary: indigo
13 | accent: indigo
14 | toggle:
15 | icon: material/brightness-4
16 | name: Switch to light mode
17 |
18 | plugins:
19 | - search
20 | - mkdocstrings:
21 | default_handler: python
22 | handlers:
23 | python:
24 | paths: [src]
25 | options:
26 | docstring_style: google
27 | show_source: true
28 | show_root_heading: false
29 | heading_level: 2
30 | show_submodules: true
31 | members_order: alphabetical
32 | merge_init_into_class: true
33 |
34 | nav:
35 | - Home: index.md
36 | - Getting Started: getting-started.md
37 | - Examples: examples/index.md
38 | - API Reference:
39 | - OSM: api/osm.md
40 | - OSMChange: api/osmchange.md
41 | - AugmentedDiff: api/augmenteddiff.md
42 | - ContinuousAugmentedDiff: api/continuous.md
43 | markdown_extensions:
44 | - pymdownx.highlight:
45 | anchor_linenums: true
46 | - toc:
47 | permalink: "#"
48 | - smarty
49 | - sane_lists
50 |
--------------------------------------------------------------------------------
/docs/api/augmenteddiff.md:
--------------------------------------------------------------------------------
1 | # AugmentedDiff
2 |
3 | Core class for retrieving and parsing OpenStreetMap augmented diffs.
4 |
5 | For continuous monitoring of changes, see [ContinuousAugmentedDiff](continuous.md).
6 |
7 | ## Features
8 |
9 | - Single diff retrieval
10 | - Bounding box filtering
11 | - Automatic sequence number handling
12 | - Context manager support
13 |
14 | ## Basic Usage
15 |
16 | ```python
17 | from osmdiff import AugmentedDiff
18 |
19 | # Create with bounding box for London
20 | adiff = AugmentedDiff(
21 | minlon=-0.489,
22 | minlat=51.28,
23 | maxlon=0.236,
24 | maxlat=51.686
25 | )
26 |
27 | # Retrieve and process changes
28 | status = adiff.retrieve()
29 | if status == 200:
30 | print(f"Created: {len(adiff.create)} features")
31 | print(f"Modified: {len(adiff.modify)} features")
32 | print(f"Deleted: {len(adiff.delete)} features")
33 | ```
34 |
35 | ## API Reference
36 |
37 | ::: osmdiff.augmenteddiff.AugmentedDiff
38 | options:
39 | heading_level: 2
40 | show_source: true
41 | members:
42 | - __init__
43 | - get_state
44 | - retrieve
45 | - sequence_number
46 | - timestamp
47 | - remarks
48 | - actions
49 | - __repr__
50 | - __enter__
51 | - __exit__
52 |
53 | ## See Also
54 |
55 | - [ContinuousAugmentedDiff](continuous.md) - For continuous monitoring
56 | - [OSMChange](osmchange.md) - For standard OSM changesets
57 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "osmdiff"
3 | dynamic = ["version"]
4 | authors = [{ name = "Martijn van Exel", email = "m@rtijn.org" }]
5 | maintainers = [{ name = "Martijn van Exel", email = "m@rtijn.org" }]
6 | description = "A read-only interface to OpenStreetMap change APIs and files"
7 | keywords = ["openstreetmap", "osm", "diff", "changeset", "api"]
8 | readme = "README.md"
9 | requires-python = ">=3.9"
10 | classifiers = [
11 | "Programming Language :: Python :: 3",
12 | "Operating System :: OS Independent",
13 | ]
14 | dependencies = ["python-dateutil>=2.9.0.post0", "requests>=2.32.2"]
15 | license = "MIT"
16 |
17 | [project.urls]
18 | "Homepage" = "https://git.sr.ht/~mvexel/osmdiff"
19 | "Bug Tracker" = "https://todo.sr.ht/~mvexel/tracker?search=label%3Aosmdiff"
20 |
21 | [tool.setuptools.dynamic]
22 | version = { attr = "osmdiff.__version__" }
23 |
24 | [tool.pytest.ini_options]
25 | markers = ["integration: mark a test as an integration test"]
26 | addopts = "--cov=src/osmdiff --cov-report=term-missing --cov-report=html --cov-fail-under=85"
27 |
28 | [tool.pyright]
29 | venvPath = "."
30 | venv = ".venv"
31 |
32 | [dependency-groups]
33 | dev = [
34 | "mkdocs>=1.6.1",
35 | "mkdocs-material>=9.5.50",
36 | "mkdocs-material-extensions>=1.3.1",
37 | "mkdocstrings[python]>=0.26.1",
38 | "pytest>=8.3.4",
39 | "typing-extensions>=4.12.2",
40 | ]
41 |
42 | test = ["pytest>=7.0.0", "pytest-cov>=3.0.0", "requests-mock>=1.9.3"]
43 |
44 | examples = ["fastapi>=0.115.12", "uvicorn>=0.34.2", "shapely>=2.1.1"]
45 |
--------------------------------------------------------------------------------
/docs/api/osmchange.md:
--------------------------------------------------------------------------------
1 | # OSMChange
2 |
3 | Core class for retrieving and parsing OpenStreetMap changesets in OSMChange format.
4 |
5 | For working with the Augmented Diff format, we have [AugmentedDiff](augmenteddiff.md) and [ContinuousAugmentedDiff](continuous.md).
6 |
7 | ## Features
8 |
9 | - Retrieves changesets from OSM replication servers
10 | - Parses OSMChange XML format
11 | - Handles create/modify/delete actions
12 | - Supports both remote and local file sources
13 | - Context manager support
14 | - Sequence number management
15 |
16 | ## Basic Usage
17 |
18 | ```python
19 | from osmdiff import OSMChange
20 |
21 | # Create with sequence number
22 | osm_change = OSMChange(sequence_number=12345)
23 |
24 | # Retrieve and process changes
25 | status = osm_change.retrieve()
26 | if status == 200:
27 | creations = osm_change.actions["create"]
28 | modifications = osm_change.actions["modify"]
29 | deletions = osm_change.actions["delete"]
30 | print(f"Created: {len(creations)} features")
31 | print(f"Modified: {len(modifications)} features")
32 | print(f"Deleted: {len(deletions)} features")
33 | ```
34 |
35 | ## API Reference
36 |
37 | ::: osmdiff.osmchange.OSMChange
38 | options:
39 | heading_level: 2
40 | show_source: true
41 | members:
42 | - __init__
43 | - get_state
44 | - retrieve
45 | - sequence_number
46 | - frequency
47 | - actions
48 | - __repr__
49 | - __enter__
50 | - __exit__
51 |
52 | ## See Also
53 |
54 | - [AugmentedDiff](augmenteddiff.md) - For augmented diffs with additional metadata
55 |
56 |
--------------------------------------------------------------------------------
/docs/api/continuous.md:
--------------------------------------------------------------------------------
1 | # ContinuousAugmentedDiff
2 |
3 | Iterator for continuous monitoring of OpenStreetMap changes using augmented diffs.
4 |
5 | Builds on [AugmentedDiff](augmenteddiff.md) to provide automatic polling with backoff.
6 |
7 | ## Features
8 |
9 | - Continuous monitoring
10 | - Automatic sequence number tracking
11 | - Exponential backoff during errors
12 | - Configurable polling intervals
13 | - Bounding box filtering
14 |
15 | ## Basic Usage
16 |
17 | ```python
18 | from osmdiff import ContinuousAugmentedDiff
19 |
20 | # Monitor London area
21 | monitor = ContinuousAugmentedDiff(
22 | minlon=-0.489,
23 | minlat=51.28,
24 | maxlon=0.236,
25 | maxlat=51.686
26 | )
27 |
28 | for changes in monitor: # Runs indefinitely
29 | print(f"Changeset {changes.sequence_number}:")
30 | print(f" New: {len(changes.create)}")
31 | print(f" Modified: {len(changes.modify)}")
32 | ```
33 |
34 | ## Advanced Configuration
35 |
36 | ```python
37 | monitor = ContinuousAugmentedDiff(
38 | minlon=-0.489,
39 | minlat=51.28,
40 | maxlon=0.236,
41 | maxlat=51.686,
42 | min_interval=60, # Minimum 1 minute between checks
43 | max_interval=300 # Maximum 5 minutes during backoff
44 | )
45 | ```
46 |
47 | ## API Reference
48 |
49 | ::: osmdiff.augmenteddiff.ContinuousAugmentedDiff
50 | options:
51 | heading_level: 2
52 | show_source: true
53 | members:
54 | - __init__
55 | - __iter__
56 | - __next__
57 |
58 | ## See Also
59 |
60 | - [AugmentedDiff](augmenteddiff.md) - For single diff retrieval
61 | - [OSMChange](osmchange.md) - For standard changesets
62 |
--------------------------------------------------------------------------------
/docs/examples/index.md:
--------------------------------------------------------------------------------
1 | # Examples
2 |
3 | Here are some examples of how to use the OSMDiff library.
4 |
5 | ## Basic Augmented Diff Usage
6 |
7 | ```python
8 | from osmdiff import AugmentedDiff
9 |
10 | # Create an AugmentedDiff instance for a specific area
11 | ad = AugmentedDiff(
12 | minlon=-0.489, # London bounding box
13 | minlat=51.28,
14 | maxlon=0.236,
15 | maxlat=51.686
16 | )
17 |
18 | # Get current state and retrieve changes
19 | ad.get_state()
20 | status = ad.retrieve()
21 |
22 | if status == 200:
23 | print(f"Changes retrieved:")
24 | print(f" Created: {len(ad.create)}")
25 | print(f" Modified: {len(ad.modify)}")
26 | print(f" Deleted: {len(ad.delete)}")
27 | ```
28 |
29 | ## Continuous Monitoring
30 |
31 | For continuous monitoring of changes, use the ContinuousAugmentedDiff class:
32 |
33 | ```python
34 | from osmdiff import ContinuousAugmentedDiff
35 |
36 | # Create continuous fetcher for London area
37 | fetcher = ContinuousAugmentedDiff(
38 | minlon=-0.489,
39 | minlat=51.28,
40 | maxlon=0.236,
41 | maxlat=51.686,
42 | min_interval=30, # Check at least every 30 seconds
43 | max_interval=120 # Back off up to 120 seconds if no changes
44 | )
45 |
46 | # Process changes as they come in
47 | for diff in fetcher:
48 | print(f"\nNew changes in diff {diff.sequence_number}:")
49 | print(f" Created: {len(diff.create)} objects")
50 | print(f" Modified: {len(diff.modify)} objects")
51 | print(f" Deleted: {len(diff.delete)} objects")
52 |
53 | # Process specific changes
54 | for obj in diff.create:
55 | if "amenity" in obj.tags:
56 | print(f"New amenity: {obj.tags['amenity']}")
57 | ```
58 |
59 |
--------------------------------------------------------------------------------
/src/osmdiff/config.py:
--------------------------------------------------------------------------------
1 | """
2 | Configuration settings for the osmdiff package.
3 |
4 | This module contains all the configuration settings for the osmdiff package's API interactions.
5 | It provides default values for API endpoints, timeouts, and request headers.
6 |
7 | Configuration Structure:
8 | - API_CONFIG: Contains settings for different API endpoints (Overpass, OSM, Nominatim)
9 | - AUGMENTED_DIFF_CONFIG: Default parameters for AugmentedDiff operations
10 | - DEFAULT_HEADERS: Standard HTTP headers used across all API requests
11 |
12 | Example:
13 | from osmdiff.config import API_CONFIG, DEFAULT_HEADERS
14 |
15 | # Get OSM API base URL
16 | osm_url = API_CONFIG["osm"]["base_url"]
17 |
18 | # Use default headers in requests
19 | response = requests.get(url, headers=DEFAULT_HEADERS)
20 | """
21 |
22 | # Default API URLs and settings for different services
23 | API_CONFIG = {
24 | "overpass": {
25 | "base_url": "http://overpass-api.de/api/augmented_diff?id={sequence_number}", # sic
26 | "state_url": "https://overpass-api.de/api/augmented_diff_status",
27 | "timeout": 30, # Default timeout in seconds
28 | },
29 | "osm": {
30 | "base_url": "https://api.openstreetmap.org/api/0.6",
31 | "timeout": 30,
32 | },
33 | "nominatim": {
34 | "base_url": "https://nominatim.openstreetmap.org",
35 | "timeout": 30,
36 | },
37 | }
38 |
39 | # Default parameters for AugmentedDiff operations
40 | AUGMENTED_DIFF_CONFIG = {
41 | "minlon": None, # Minimum longitude for bounding box
42 | "minlat": None, # Minimum latitude for bounding box
43 | "maxlon": None, # Maximum longitude for bounding box
44 | "maxlat": None, # Maximum latitude for bounding box
45 | "timestamp": None, # Timestamp for diff operations
46 | }
47 |
48 | # User agent string following OSM API guidelines
49 | # https://operations.osmfoundation.org/policies/api/
50 | USER_AGENT = "osmdiff/1.0" # Replace with your actual user agent
51 |
52 | # Standard headers used in all API requests
53 | DEFAULT_HEADERS = {"User-Agent": USER_AGENT, "Accept": "application/json, text/xml"}
54 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | ## v0.4.6 (2025-05-04)
4 |
5 | **This version fixes some critical bugs in 0.4.5, existing users should upgrade immediately**
6 |
7 | ### ✅ Testing Improvements
8 | - Added comprehensive tests for OSM object geo interfaces (Node, Way, Relation)
9 | - Added coordinate validation tests for Node class
10 | - Added tests for Member class parsing and geo interface
11 | - Improved test coverage for Way.length() method
12 | - Added equality comparison tests for Node objects
13 |
14 | ### 🐛 Bug Fixes
15 | - Fixed critical bugs in `AugmentedDiff` and `ContinuousAugmentedDiff`
16 | - state fetching
17 | - iteration
18 | - Fixed coordinate validation in Node class to properly handle edge cases
19 | - Improved error messages for invalid coordinates
20 |
21 | ### 📖 Documentation
22 | - Added more detailed docstrings for geo interface methods
23 | - Improved examples in OSM object class documentation
24 |
25 | ## v0.4.5 (2025-05-03)
26 |
27 | ### 🚀 Features
28 | - Add `ContinuousAugmentedDiff` to package exports
29 | - Add continuous augmented-diff fetcher with back-off strategy
30 | - Add `actions` property to `OSMChange` and `AugmentedDiff`
31 | - Update Overpass API base URL to the new endpoint
32 | - Provide `__geo_interface__` example
33 |
34 | ### 🐛 Bug Fixes
35 | - Properly return deleted features (fixes #43)
36 | - Correct state-parsing and response handling in diff APIs
37 | - Capture metadata for deleted objects in `AugmentedDiff` parser
38 | - Fix various API-test mocks (raw streams, gzipped responses, missing imports)
39 | - Clean up test assertions and fixtures for all API scenarios
40 |
41 | ### ♻️ Refactoring
42 | - Convert `test_augmenteddiff.py` to pytest + fixtures
43 | - Switch Overpass base-URL code to use a sequence template
44 | - Switch from PDM to hatchling build system
45 | - Enforce Black formatting
46 |
47 | ### 📖 Documentation
48 | - Add "Continuous Augmented Diff" section to README
49 | - Update docstring & class docs for `ContinuousAugmentedDiff`
50 |
51 | ### ✅ Testing Improvements
52 | - Continue increasing overall test coverage
53 | - Add tests for continuous augmented-diff, metadata capture & plumbbin
54 | - Improve API tests with better mocks, error cases & assertions
55 |
--------------------------------------------------------------------------------
/docs/getting-started.md:
--------------------------------------------------------------------------------
1 | # Getting Started with OSMDiff
2 |
3 | OSMDiff helps you work with OpenStreetMap change data. OpenStreetMap (OSM) is a collaborative map that's constantly updated by volunteers. These updates come in different formats:
4 |
5 | - **Augmented Diffs**: Detailed changes including metadata about who made changes and why
6 | - **OSMChange**: Standard format for basic create/modify/delete operations
7 |
8 | ## Installation
9 |
10 | ```bash
11 | pip install osmdiff
12 | ```
13 |
14 | ## Basic Usage
15 |
16 | Track changes in a specific area (here using London as an example):
17 |
18 | ```python
19 | from osmdiff import AugmentedDiff
20 |
21 | # Create an AugmentedDiff instance for London
22 | ad = AugmentedDiff(
23 | minlon=-0.489, # West
24 | minlat=51.28, # South
25 | maxlon=0.236, # East
26 | maxlat=51.686 # North
27 | )
28 |
29 | # Get current state and retrieve changes
30 | ad.get_state()
31 | status = ad.retrieve()
32 |
33 | if status == 200:
34 | print(f"Changes retrieved:")
35 | print(f" Created: {len(ad.create)}")
36 | print(f" Modified: {len(ad.modify)}")
37 | print(f" Deleted: {len(ad.delete)}")
38 | ```
39 |
40 | ## Continuous Monitoring
41 |
42 | For real-time monitoring of changes:
43 |
44 | ```python
45 | from osmdiff import ContinuousAugmentedDiff
46 |
47 | # Create continuous fetcher for London area
48 | fetcher = ContinuousAugmentedDiff(
49 | minlon=-0.489,
50 | minlat=51.28,
51 | maxlon=0.236,
52 | maxlat=51.686,
53 | min_interval=30, # Check at least every 30 seconds
54 | max_interval=120 # Back off up to 120 seconds if no changes
55 | )
56 |
57 | # Process changes as they come in
58 | for diff in fetcher:
59 | print(f"\nNew changes in diff {diff.sequence_number}:")
60 | print(f" Created: {len(diff.create)} objects")
61 | print(f" Modified: {len(diff.modify)} objects")
62 | print(f" Deleted: {len(diff.delete)} objects")
63 |
64 | # Example: Track new amenities
65 | for obj in diff.create:
66 | if "amenity" in obj.tags:
67 | print(f"New amenity: {obj.tags['amenity']}")
68 | ```
69 |
70 | ## Next Steps
71 |
72 | - Learn about [AugmentedDiff API](/api/augmenteddiff)
73 | - Explore [OSMChange format](/api/osmchange)
74 | - See [OSM Objects](/api/osm) you can work with
75 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import xml.etree.ElementTree as ET
3 | from unittest.mock import Mock
4 |
5 |
6 | @pytest.fixture
7 | def mock_osm_api_response():
8 | """Standard mock response for OSM API calls"""
9 | return {
10 | "version": "0.6",
11 | "generator": "OpenStreetMap server",
12 | "elements": [
13 | {
14 | "type": "node",
15 | "id": 123,
16 | "lat": 51.5,
17 | "lon": -0.1,
18 | "tags": {"amenity": "cafe"}
19 | }
20 | ]
21 | }
22 |
23 | @pytest.fixture
24 | def mock_osm_api(monkeypatch):
25 | """Mock requests.get for OSM API calls"""
26 | mock = Mock()
27 | mock.get.return_value.status_code = 200
28 | mock.get.return_value.json.return_value = {
29 | "version": "0.6",
30 | "generator": "OpenStreetMap server",
31 | "elements": []
32 | }
33 | monkeypatch.setattr("requests.get", mock.get)
34 | return mock
35 |
36 | @pytest.fixture
37 | def mock_adiff_response():
38 | """Mock response for AugmentedDiff API calls"""
39 | return b'''
40 |
41 |
42 |
43 |
44 |
45 | '''
46 |
47 |
48 | # Define a fixture to load the XML file
49 | @pytest.fixture
50 | def osmchange_xml_obj():
51 | with open("tests/data/test_osmchange.xml", "r") as fh:
52 | return ET.parse(fh)
53 |
54 |
55 | # Path to the changeset XML file
56 | @pytest.fixture
57 | def osmchange_file_path():
58 | return "tests/data/test_osmchange.xml"
59 |
60 |
61 | # Path to the augmented diff XML file
62 | @pytest.fixture
63 | def adiff_file_path():
64 | return "tests/data/test_adiff.xml"
65 |
66 |
67 | @pytest.fixture
68 | def api_config():
69 | return {
70 | "base_url": "https://api.openstreetmap.org/api/0.6",
71 | "timeout": 30,
72 | "headers": {"Content-Type": "application/xml", "Accept": "application/xml"},
73 | }
74 |
75 |
76 | @pytest.fixture
77 | def create_test_changeset():
78 | def _create_changeset(id="12345", user="testuser"):
79 | return f"""
80 |
81 |
82 |
83 |
84 |
85 | """
86 |
87 | return _create_changeset
88 |
--------------------------------------------------------------------------------
/tests/test_augmenteddiff_continuous.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from osmdiff import ContinuousAugmentedDiff, AugmentedDiff
3 | from unittest.mock import patch, MagicMock
4 |
5 |
6 | class TestContinuousAugmentedDiff:
7 | @pytest.fixture
8 | def mock_state_sequence(self):
9 | # Simulate state endpoint returning increasing sequence numbers
10 | return [12345, 12346, 12347]
11 |
12 | @pytest.fixture
13 | def mock_adiff_response(self):
14 | xml_content = """\n\n\n\n\n\n""".strip()
15 | mock_response = MagicMock()
16 | mock_response.status_code = 200
17 | mock_response.text = xml_content
18 | mock_response.content = xml_content.encode()
19 | mock_response.raw = MagicMock()
20 | return mock_response
21 |
22 | def test_iterator_yields_augmented_diff(
23 | self, mock_state_sequence, mock_adiff_response
24 | ):
25 | # Patch get_state and retrieve, and patch time.sleep to avoid real delays
26 | with (
27 | patch.object(AugmentedDiff, "get_state", side_effect=mock_state_sequence),
28 | patch.object(AugmentedDiff, "retrieve", return_value=200),
29 | patch("time.sleep", return_value=None),
30 | ):
31 |
32 | fetcher = ContinuousAugmentedDiff(min_interval=0, max_interval=0)
33 | gen = iter(fetcher)
34 | diff = next(gen)
35 | assert isinstance(diff, AugmentedDiff)
36 | assert diff.sequence_number == 12345
37 |
38 | # Next sequence increases again, another diff is yielded
39 | diff2 = next(gen)
40 | assert isinstance(diff2, AugmentedDiff)
41 | assert diff2.sequence_number == 12346
42 |
43 | def test_iterator_handles_backoff(self, mock_state_sequence, mock_adiff_response):
44 | # Simulate get_state returning None (API error) first, then a valid sequence
45 | with (
46 | patch.object(AugmentedDiff, "get_state", side_effect=[None, 12345, 12346]),
47 | patch.object(AugmentedDiff, "retrieve", return_value=200),
48 | patch("time.sleep", return_value=None),
49 | ):
50 |
51 | fetcher = ContinuousAugmentedDiff(min_interval=0, max_interval=0)
52 | gen = iter(fetcher)
53 | # First call to get_state returns None, so it should backoff and retry
54 | diff = next(gen)
55 | assert isinstance(diff, AugmentedDiff)
56 | assert diff.sequence_number == 12345
57 |
--------------------------------------------------------------------------------
/tests/test_api.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from osmdiff import AugmentedDiff, OSMChange
3 | from typing_extensions import assert_type
4 | from unittest.mock import patch, MagicMock
5 | import requests
6 |
7 |
8 | class TestApi:
9 | """Tests for OSM API integration."""
10 |
11 | @pytest.fixture
12 | def mock_osm_state_response(self):
13 | """Fixture providing a mock OSM state API response."""
14 | from io import BytesIO
15 |
16 | xml_content = """
17 |
18 |
19 | 12345
20 | 2024-01-01T00:00:00Z
21 |
22 | """
23 |
24 | mock_response = MagicMock(spec=requests.Response)
25 | mock_response.status_code = 200
26 | mock_response.text = xml_content
27 | mock_response.content = xml_content.encode()
28 |
29 | # Create a raw attribute with a read method
30 | mock_raw = BytesIO(xml_content.encode())
31 | mock_raw.decode_content = True
32 | mock_response.raw = mock_raw
33 |
34 | return mock_response
35 |
36 | @pytest.fixture
37 | def mock_osm_diff_response(self):
38 | """Fixture providing a mock OSM diff response."""
39 | from io import BytesIO
40 |
41 | xml_content = """
42 |
43 |
44 |
45 |
46 | """
47 |
48 | mock_response = MagicMock(spec=requests.Response)
49 | mock_response.status_code = 200
50 | mock_response.text = xml_content
51 | mock_response.content = xml_content.encode()
52 |
53 | # Create a raw attribute with a read method
54 | mock_raw = BytesIO(xml_content.encode())
55 | mock_raw.decode_content = True
56 | mock_response.raw = mock_raw
57 |
58 | return mock_response
59 |
60 | @pytest.fixture
61 | def mock_adiff_response(self):
62 | """Fixture providing a mock Augmented Diff response."""
63 | from io import BytesIO
64 |
65 | xml_content = """
66 |
67 |
68 |
69 |
70 |
71 | """
72 |
73 | mock_response = MagicMock(spec=requests.Response)
74 | mock_response.status_code = 200
75 | mock_response.text = xml_content
76 | mock_response.content = xml_content.encode()
77 |
78 | # Create a raw attribute with a read method
79 | mock_raw = BytesIO(xml_content.encode())
80 | mock_raw.decode_content = True
81 | mock_response.raw = mock_raw
82 |
83 | return mock_response
84 |
85 | @pytest.mark.integration
86 | def test_osm_diff_api_state(self, mock_osm_state_response):
87 | """Test getting state from OSM API returns valid sequence number."""
88 | with patch(
89 | "osmdiff.osmchange.requests.get", return_value=mock_osm_state_response
90 | ):
91 | osm_change = OSMChange()
92 | osm_change.base_url = "http://example.com/api"
93 | state = osm_change.get_state()
94 | assert state is True
95 | assert osm_change.sequence_number == 12345
96 | assert isinstance(osm_change.sequence_number, int)
97 |
98 | @pytest.mark.integration
99 | def test_osm_diff_retrieve(self, mock_osm_diff_response):
100 | """Test retrieving OSM diff returns successful status."""
101 | with patch("requests.get", return_value=mock_osm_diff_response):
102 | osm_change = OSMChange(sequence_number=12345)
103 | status = osm_change.retrieve()
104 | assert status == 200
105 | assert hasattr(osm_change, "actions")
106 | assert len(osm_change.actions["create"]) > 0
107 |
108 | @pytest.mark.integration
109 | def test_api_error_handling(self):
110 | """Test API error conditions are properly handled."""
111 | mock_response = MagicMock(spec=requests.Response)
112 | mock_response.status_code = 500
113 |
114 | with patch("requests.get", return_value=mock_response):
115 | with pytest.raises(Exception):
116 | osm_change = OSMChange()
117 | osm_change.retrieve()
118 |
--------------------------------------------------------------------------------
/tests/test_adiff.py:
--------------------------------------------------------------------------------
1 | from osmdiff import Node, AugmentedDiff, Relation, Way
2 | from typing_extensions import assert_type
3 | from unittest.mock import patch, Mock
4 | from io import StringIO, BytesIO
5 |
6 |
7 | class TestAugmentedDiff:
8 | "tests for AugmentedDiff class"
9 |
10 | def test_init_augmenteddiff(self):
11 | "Test AugmentedDiff init"
12 | augmenteddiff = AugmentedDiff()
13 | assert_type(augmenteddiff, AugmentedDiff)
14 | assert_type(augmenteddiff.create, list)
15 | assert_type(augmenteddiff.modify, list)
16 | assert_type(augmenteddiff.delete, list)
17 | assert len(augmenteddiff.create) == 0
18 | assert len(augmenteddiff.modify) == 0
19 | assert len(augmenteddiff.delete) == 0
20 |
21 | def test_set_sequencenumber(self):
22 | "Sequence number is not defined by default but can be set manually"
23 | augmented_diff = AugmentedDiff()
24 | assert not augmented_diff.sequence_number
25 | augmented_diff.sequence_number = 12345
26 | assert augmented_diff.sequence_number == 12345
27 | augmented_diff.sequence_number = "12345"
28 | assert augmented_diff.sequence_number == 12345
29 |
30 | @patch("osmdiff.augmenteddiff.requests.get")
31 | def test_read_changeset_from_xml_file(
32 | self, mock_get, adiff_file_path, mock_adiff_response
33 | ):
34 | """Test initializing from an XML object with mocked response"""
35 | mock_get.return_value.status_code = 200
36 | mock_get.return_value.raw = BytesIO(mock_adiff_response)
37 | mock_get.return_value.raw.decode_content = True
38 |
39 | adiff = AugmentedDiff(file=adiff_file_path)
40 |
41 | # Verify API call was made if file is remote
42 | if adiff_file_path.startswith("http"):
43 | mock_get.assert_called_once()
44 |
45 | # Test that objects were parsed correctly
46 | assert len(adiff.create) > 0
47 | assert len(adiff.modify) >= 0 # Some diffs may only have creates
48 | assert len(adiff.delete) >= 0 # Some diffs may only have creates
49 |
50 | # Test created object structure
51 | if adiff.create:
52 | created_obj = adiff.create[0]
53 | assert isinstance(created_obj, (Node, Way, Relation))
54 | assert hasattr(created_obj, "attribs")
55 | assert hasattr(created_obj, "tags")
56 |
57 | # Test modified object structure
58 | if adiff.modify:
59 | modified = adiff.modify[0]
60 | assert set(modified.keys()) == {"old", "new"}
61 | assert isinstance(modified["old"], (Node, Way, Relation))
62 | assert isinstance(modified["new"], (Node, Way, Relation))
63 |
64 | # Test deleted object structure
65 | if adiff.delete:
66 | deleted_obj = adiff.delete[0]
67 | assert "old" in deleted_obj
68 | assert isinstance(deleted_obj["old"], (Node, Way, Relation))
69 | assert "meta" in deleted_obj # Verify metadata exists
70 |
71 | # Test metadata was parsed
72 | assert adiff.timestamp is not None
73 | assert isinstance(adiff.remarks, list)
74 |
75 | def test_auto_increment(self):
76 | "Test auto-increment behavior in retrieve()"
77 | augmented_diff = AugmentedDiff()
78 | augmented_diff.sequence_number = 100
79 |
80 | # Create a minimal valid XML response
81 | xml_content = """
82 |
83 | The data included in this document is from www.openstreetmap.org
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 | """
97 |
98 | # Mock the requests.get call
99 | with patch("requests.get") as mock_get:
100 |
101 | def get_mock_response():
102 | mock_response = Mock()
103 | mock_response.status_code = 200
104 | mock_response.raw = StringIO(xml_content)
105 | mock_response.raw.decode_content = True
106 | return mock_response
107 |
108 | mock_get.return_value = get_mock_response()
109 |
110 | # Test auto-increment (default behavior)
111 | augmented_diff.retrieve()
112 | assert augmented_diff.sequence_number == 101
113 |
114 | # Create fresh mock for second call
115 | mock_get.return_value = get_mock_response()
116 |
117 | # Test without auto-increment
118 | augmented_diff.retrieve(auto_increment=False)
119 | assert augmented_diff.sequence_number == 101
120 |
121 | # Create fresh mock for third call
122 | mock_get.return_value = get_mock_response()
123 |
124 | # Test with auto-increment again
125 | augmented_diff.retrieve()
126 | assert augmented_diff.sequence_number == 102
127 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # osmdiff
2 |
3 | ## We are on Codeberg!
4 | If you are reading this notice on Github, please point your bookmarks and git remotes at the `osmdiff` repo [on Codeberg](https://codeberg.org/mvexel/osmdiff) instead. This project will not be updated on Github.
5 |
6 | ---
7 |
8 | A read-only interface to OpenStreetMap change APIs and files. See also [pyosm](https://github.com/iandees/pyosm) which can do similar things.
9 |
10 | ## Documentation
11 |
12 | 📚 Comprehensive documentation is available at [mvexel.github.io/osmdiff](https://mvexel.github.io/osmdiff/)
13 |
14 | ## Python Version Support
15 |
16 | This module has been tested with Python 3.9 - 3.12. Use at your own risk with other versions.
17 |
18 | ## Installing
19 |
20 | `pip install osmdiff`
21 |
22 | ## Usage
23 |
24 | See the [documentation](https://mvexel.github.io/osmdiff/) for more details.
25 |
26 | ### Reading
27 |
28 | Retrieve the latest replication diff from the OSM API:
29 |
30 | ```python
31 | >>> from osmdiff import OSMChange
32 | >>> o = OSMChange(frequency="minute") # minute is the default frequency
33 | >>> o.get_state() # retrieve current sequence ID
34 | >>> o.sequence_number
35 | 2704451
36 | >>> o.retrieve() # retrieve from API
37 | >>> o
38 | OSMChange (677 created, 204 modified, 14 deleted)
39 | ```
40 |
41 | Read a replication diff from a file:
42 |
43 | ```python
44 | >>> from osmdiff import OSMChange
45 | >>> o = OSMChange(file="test_osmchange.xml")
46 | >>> o
47 | OSMChange (831 created, 368 modified, 3552 deleted)
48 | ```
49 |
50 | Retrieve the latest Augmented Diff from Overpass:
51 |
52 | ```python
53 | >>> from osmdiff import AugmentedDiff
54 | >>> a = AugmentedDiff()
55 | >>> a.get_state()
56 | >>> a.sequence_number
57 | 2715051
58 | >>> a.retrieve()
59 | >>> a
60 | AugmentedDiff (747 created, 374 modified, 55 deleted)
61 | ```
62 |
63 | Read an augmented diff file:
64 |
65 | ```python
66 | >>> from osmdiff import AugmentedDiff
67 | >>> a = AugmentedDiff(file="test_adiff.xml")
68 | >>> a
69 | AugmentedDiff (2329 created, 677 modified, 39 deleted)
70 | ```
71 |
72 | ### Inspect contents
73 |
74 | Get all the things that `chris66` has created:
75 |
76 | ```
77 | >>> [n for n in a.create if n.attribs["user"] == "chris66"]
78 | [Node 5221564287, Node 5221564288, Node 5221564289, Node 5221564290, Node 5221564291, Node 5221564292, Node 5221564293, Node 5221564294, Node 5221564295, Node 5221564296, Node 5221564297, Node 5221564298, Node 5221564299, Node 5221564301, Node 5221564302, Node 5221564303, Node 5221564304, Way 539648222 (5 nodes), Way 539648223 (5 nodes), Way 539648323 (5 nodes)]
79 | ```
80 |
81 | Get all `residential` ways that were modified:
82 |
83 | ```python
84 | >>> [n["new"] for n in a.modify if type(n["new"]) == Way and n["new"].tags.get("highway") == "residential"]
85 | [Way 34561958 (3 nodes), Way 53744484 (6 nodes), Way 53744485 (6 nodes), Way 122650942 (3 nodes), Way 283221266 (4 nodes), Way 344272652 (5 nodes), Way 358243999 (13 nodes), Way 410489319 (5 nodes), Way 452218081 (10 nodes)]
86 | ```
87 |
88 | Get all ways that were changed to `residential` from something else:
89 |
90 | ```python
91 | >>> [n["new"] for n in a.modify if type(n["new"]) == Way and n["new"].tags.get("highway") == "residential" and n["old"].tags["highway"] != "residential"]
92 | [Way 410489319 (5 nodes), Way 452218081 (10 nodes)]
93 | ```
94 |
95 | Inspect details:
96 |
97 | ```python
98 | >>> w = [n["new"] for n in a.modify if n["new"].attribs["id"] == "452218081"]
99 | >>> w
100 | [Way 452218081 (10 nodes)]
101 | >>> w[0]
102 | Way 452218081 (10 nodes)
103 | >>> w[0].tags
104 | {'highway': 'residential'}
105 | >>> w[0].attribs
106 | {'id': '452218081', 'version': '2', 'timestamp': '2017-11-10T13:52:01Z', 'changeset': '53667190', 'uid': '2352517', 'user': 'carths81'}
107 | >>> w[0].attribs
108 | {'id': '452218081', 'version': '2', 'timestamp': '2017-11-10T13:52:01Z', 'changeset': '53667190', 'uid': '2352517', 'user': 'carths81'}
109 | >>> w[0].bounds
110 | ['12.8932677', '43.3575917', '12.8948117', '43.3585947']
111 | ```
112 |
113 | ### Iterating
114 |
115 | To continuously iterate over AugmentedDiff objects, use `ContinuousAugmentedDiff`:
116 |
117 | ```python
118 | >>> for a in ContinuousAugmentedDiff():
119 | ... print(a)
120 | ```
121 |
122 | This will iterate indefinitely, printing each AugmentedDiff as it is retrieved.
123 |
124 | You can also use it in a loop:
125 |
126 | ```python
127 | for a in ContinuousAugmentedDiff():
128 | if a.sequence_number > 123456:
129 | break
130 | print(a)
131 | ```
132 |
133 | ## Configuration
134 |
135 | The osmdiff package uses a centralized configuration system in `src/osmdiff/config.py`. This includes:
136 |
137 | ### API Configuration
138 | Default settings for API endpoints and timeouts:
139 |
140 | ```python
141 | API_CONFIG = {
142 | "overpass": {"base_url": "...", "timeout": 30},
143 | "osm": {"base_url": "...", "timeout": 30},
144 | "nominatim": {"base_url": "...", "timeout": 30}
145 | }
146 | ```
147 |
148 | ### Request Headers
149 | Standard headers used in all API requests:
150 | ```python
151 | DEFAULT_HEADERS = {
152 | "User-Agent": "osmdiff/1.0",
153 | "Accept": "application/json, text/xml"
154 | }
155 | ```
156 |
157 | ### Customizing Configuration
158 | You can override any configuration value at runtime by passing parameters to the respective class constructors:
159 |
160 | ```python
161 | from osmdiff import OSMChange
162 |
163 | # Override default URL and timeout
164 | change = OSMChange(
165 | url="https://custom-api.example.com",
166 | timeout=60
167 | )
168 | ```
169 |
170 | ## Community
171 |
172 | Join the conversation and get help:
173 |
174 | - [OpenStreetMap Community Forum](https://community.openstreetmap.org/) - Please mention me (`mvexel`)
175 | - [OSM Slack](https://osmus.slack.com/) - Join the #dev channel
176 |
177 | Please be respectful and follow the [OpenStreetMap Code of Conduct](https://wiki.openstreetmap.org/wiki/Code_of_conduct) in all community interactions.
178 |
179 | ## Contributing
180 |
181 | I welcome your contributions in code, documentation and suggestions for enhancements.
182 |
183 | If you find `osmdiff` useful, or you use it in commercial software, please consider sponsoring this project.
184 |
--------------------------------------------------------------------------------
/tests/test_osm.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from osmdiff.osm.osm import Member, OSMObject, Way, Relation, Node
3 |
4 |
5 | def test_osmobject_init_defaults():
6 | obj = OSMObject()
7 | assert obj.tags == {}
8 | assert obj.attribs == {}
9 | assert obj.bounds is None
10 |
11 |
12 | def test_osmobject_init_with_values():
13 | tags = {"amenity": "cafe"}
14 | attribs = {"id": "123", "version": "1"}
15 | bounds = [0.0, 1.0, 2.0, 3.0]
16 | obj = OSMObject(tags=tags, attribs=attribs, bounds=bounds)
17 | assert obj.tags == tags
18 | assert obj.attribs == attribs
19 | assert obj.bounds == bounds
20 |
21 |
22 | def test_osmobject_repr():
23 | obj = OSMObject(attribs={"id": "42"})
24 | assert "OSMObject 42" in repr(obj)
25 |
26 |
27 | import xml.etree.ElementTree as ET
28 |
29 |
30 | def test_osmobject_parse_tags_and_bounds():
31 | xml = """"""
32 | elem = ET.fromstring(xml)
33 | obj = OSMObject()
34 | obj._parse_tags(elem)
35 | assert obj.tags["amenity"] == "cafe"
36 | obj._parse_bounds(elem)
37 | assert obj.bounds == ["0", "1", "2", "3"]
38 |
39 |
40 | def test_osmobject_to_dict_and_json():
41 | obj = OSMObject(tags={"foo": "bar"}, attribs={"id": "1"}, bounds=[0, 1, 2, 3])
42 | d = obj.to_dict()
43 | assert d["tags"] == {"foo": "bar"}
44 | assert d["id"] == "1"
45 | assert d["bounds"] == [0, 1, 2, 3]
46 | j = obj.to_json()
47 | assert '"foo": "bar"' in j
48 |
49 |
50 | def test_osmobject_from_file(tmp_path):
51 | xml = ''
52 | file_path = tmp_path / "test.xml"
53 | file_path.write_text(xml)
54 | obj = OSMObject.from_file(str(file_path))
55 | assert isinstance(obj, OSMObject)
56 | assert obj.attribs["id"] == "1"
57 |
58 |
59 | def test_osmobject_init_with_values():
60 | tags = {"amenity": "cafe"}
61 | attribs = {"id": "123", "version": "1"}
62 | bounds = [0.0, 1.0, 2.0, 3.0]
63 | obj = OSMObject(tags=tags, attribs=attribs, bounds=bounds)
64 | assert obj.tags == tags
65 | assert obj.attribs == attribs
66 | assert obj.bounds == bounds
67 |
68 |
69 | def test_osmobject_repr():
70 | obj = OSMObject(attribs={"id": "42"})
71 | assert "OSMObject 42" in repr(obj)
72 | way = Way(attribs={"id": "99"})
73 | way.nodes = [1, 2, 3]
74 | assert "Way 99 (3 nodes)" in repr(way)
75 | rel = Relation(attribs={"id": "7"})
76 | rel.members = [1, 2]
77 | assert "Relation 7 (2 members)" in repr(rel)
78 |
79 |
80 | def test_way_is_closed():
81 | way = Way()
82 | way.nodes = [1, 2, 1]
83 | assert way.is_closed() is True
84 | way.nodes = [1, 2, 3]
85 | assert way.is_closed() is False
86 |
87 |
88 | def test_node_geo_interface_and_equality():
89 | """Test Node geo interface and equality."""
90 | node1 = Node(attribs={"lon": "1", "lat": "2"})
91 | node2 = Node(attribs={"lon": "1", "lat": "2"})
92 | node3 = Node(attribs={"lon": "3", "lat": "4"})
93 |
94 | assert node1.__geo_interface__ == {"type": "Point", "coordinates": [1, 2]}
95 | assert node1 == node2
96 | assert node1 != node3
97 | assert node1 != "not a node"
98 |
99 |
100 | def test_node_invalid_coords():
101 | """Test Node coordinate validation."""
102 | with pytest.raises(ValueError):
103 | node = Node(attribs={"lon": "181", "lat": "0"}) # Invalid lon
104 | lon = node.lon
105 | with pytest.raises(ValueError):
106 | node = Node(attribs={"lon": "0", "lat": "91"}) # Invalid lat
107 | lat = node.lat
108 |
109 | with pytest.raises(ValueError):
110 | node = Node(attribs={"lon": "-181", "lat": "0"}) # Invalid lon
111 | lon = node.lon
112 | with pytest.raises(ValueError):
113 | node = Node(attribs={"lon": "0", "lat": "-91"}) # Invalid lat
114 | lat = node.lat
115 |
116 |
117 | def test_way_geo_interface():
118 | """Test Way geo interface."""
119 | way = Way()
120 | way.nodes = [
121 | Node(attribs={"lon": "0", "lat": "0"}),
122 | Node(attribs={"lon": "1", "lat": "0"}),
123 | Node(attribs={"lon": "1", "lat": "1"}),
124 | ]
125 | # Open way should be LineString
126 | assert way.__geo_interface__ == {
127 | "type": "LineString",
128 | "coordinates": [[0, 0], [1, 0], [1, 1]],
129 | }
130 |
131 | # Closed way should be Polygon
132 | way.nodes.append(Node(attribs={"lon": "0", "lat": "0"}))
133 | assert way.__geo_interface__ == {
134 | "type": "Polygon",
135 | "coordinates": [[[0, 0], [1, 0], [1, 1], [0, 0]]],
136 | }
137 |
138 |
139 | def test_relation_geo_interface():
140 | """Test Relation geo interface."""
141 | relation = Relation()
142 | relation.members = [
143 | Node(attribs={"lon": "0.0", "lat": "0.0"}),
144 | Way(
145 | nodes=[
146 | Node(attribs={"lon": "1.0", "lat": "0.0"}),
147 | Node(attribs={"lon": "1.0", "lat": "1.0"}),
148 | ]
149 | ),
150 | ]
151 | assert relation.__geo_interface__ == {
152 | "type": "GeometryCollection",
153 | "geometries": [
154 | {"type": "Point", "coordinates": [0.0, 0.0]},
155 | {"type": "LineString", "coordinates": [[1.0, 0.0], [1.0, 1.0]]},
156 | ],
157 | }
158 | node1 = Node(attribs={"lat": 10.0, "lon": 20.0})
159 | node2 = Node(attribs={"lat": 10.0, "lon": 20.0})
160 | node3 = Node(attribs={"lat": 10.1, "lon": 20.1})
161 | # __geo_interface__ property
162 | gi = node1.__geo_interface__
163 | assert gi["type"] == "Point"
164 | assert gi["coordinates"] == [node1.lon, node1.lat]
165 | # __eq__
166 | assert node1 == node2
167 | assert node1 != node3
168 |
169 |
170 | def test_way_init_defaults():
171 | way = Way()
172 | assert way.tags == {}
173 | assert way.attribs == {}
174 | assert way.bounds is None
175 | assert way.nodes == []
176 |
177 |
178 | def test_way_init_with_values():
179 | tags = {"amenity": "cafe"}
180 | attribs = {"id": "123", "version": "1"}
181 | bounds = [0.0, 1.0, 2.0, 3.0]
182 | nodes = ["1", "2", "3"]
183 | way = Way(tags=tags, attribs=attribs, bounds=bounds)
184 | way.nodes = nodes
185 | assert way.tags == tags
186 | assert way.attribs == attribs
187 | assert way.bounds == bounds
188 | assert way.nodes == nodes
189 |
190 |
191 | def test_relation_init_defaults():
192 | relation = Relation()
193 | assert relation.tags == {}
194 | assert relation.attribs == {}
195 | assert relation.bounds is None
196 | assert relation.members == []
197 |
198 |
199 | def test_relation_init_with_values():
200 | tags = {"amenity": "cafe"}
201 | attribs = {"id": "123", "version": "1"}
202 | bounds = [0.0, 1.0, 2.0, 3.0]
203 | members = ["1", "2", "3"]
204 | relation = Relation(tags=tags, attribs=attribs, bounds=bounds)
205 | relation.members = members
206 | assert relation.tags == tags
207 | assert relation.attribs == attribs
208 | assert relation.bounds == bounds
209 | assert relation.members == members
210 |
211 |
212 | def test_member_class():
213 | """Test Member class methods."""
214 | member = Member()
215 | elem = ET.Element("member", {"type": "node", "ref": "123", "role": "point"})
216 | member._parse_attributes(elem)
217 | assert member.__geo_interface__ == {
218 | "type": "Feature",
219 | "geometry": None,
220 | "properties": {"type": "node", "ref": 123, "role": "point"},
221 | }
222 |
--------------------------------------------------------------------------------
/src/osmdiff/osmchange.py:
--------------------------------------------------------------------------------
1 | from gzip import GzipFile
2 | from posixpath import join as urljoin
3 | from typing import Optional
4 | from xml.etree import ElementTree
5 |
6 | import requests
7 |
8 | from osmdiff.config import API_CONFIG, DEFAULT_HEADERS
9 | from osmdiff.osm import OSMObject
10 |
11 |
12 | class OSMChange(object):
13 | """Handles OpenStreetMap changesets in OSMChange format.
14 |
15 | Args:
16 | url: Base URL of OSM replication server
17 | frequency: Replication frequency ('minute', 'hour', or 'day')
18 | file: Path to local OSMChange XML file
19 | sequence_number: Sequence number of the diff
20 | timeout: Request timeout in seconds
21 |
22 | Note:
23 | Follows the OSM replication protocol.
24 | """
25 |
26 | def __init__(
27 | self,
28 | url: Optional[str] = None,
29 | frequency: str = "minute",
30 | file: Optional[str] = None,
31 | sequence_number: Optional[int] = None,
32 | timeout: Optional[int] = None,
33 | ):
34 | # Initialize with defaults from config
35 | self.base_url = url or API_CONFIG["osm"]["base_url"]
36 | self.timeout = timeout or API_CONFIG["osm"]["timeout"]
37 |
38 | self.create = []
39 | self.modify = []
40 | self.delete = []
41 |
42 | if file:
43 | with open(file, "r") as fh:
44 | xml = ElementTree.iterparse(fh, events=("start", "end"))
45 | self._parse_xml(xml)
46 | else:
47 | self._frequency = frequency
48 | self._sequence_number = sequence_number
49 |
50 | def get_state(self) -> bool:
51 | """
52 | Retrieve the current state from the OSM API.
53 |
54 | Returns:
55 | bool: True if state was successfully retrieved, False otherwise
56 |
57 | Raises:
58 | requests.RequestException: If the API request fails
59 | """
60 | state_url = urljoin(self.base_url, "api/0.6/changesets/state")
61 | response = requests.get(
62 | state_url, timeout=self.timeout, headers=DEFAULT_HEADERS
63 | )
64 | if response.status_code != 200:
65 | return False
66 |
67 | # Parse XML response
68 | root = ElementTree.fromstring(response.content)
69 | state = root.find("state")
70 | if state is not None:
71 | seq = state.find("sequenceNumber")
72 | if seq is not None and seq.text:
73 | self._sequence_number = int(seq.text)
74 | return True
75 | return False
76 |
77 | def _build_sequence_url(self) -> str:
78 | seqno = str(self._sequence_number).zfill(9)
79 | url = urljoin(
80 | self.base_url,
81 | self._frequency,
82 | seqno[:3],
83 | seqno[3:6],
84 | "{}{}".format(seqno[6:], ".osc.gz"),
85 | )
86 | return url
87 |
88 | def _parse_xml(self, xml) -> None:
89 | for event, elem in xml:
90 | if elem.tag in ("create", "modify", "delete"):
91 | self._build_action(elem)
92 |
93 | def _build_action(self, elem: ElementTree.Element) -> None:
94 | """
95 | Build OSM objects from XML elements and add them to the appropriate list.
96 |
97 | Args:
98 | elem (ElementTree.Element): XML element containing OSM objects
99 | """
100 | for thing in elem:
101 | o = OSMObject.from_xml(thing)
102 | getattr(self, elem.tag).append(o) # Use getattr instead of __getattribute__
103 |
104 | def retrieve(self, clear_cache: bool = False, timeout: Optional[int] = None) -> int:
105 | """
106 | Retrieve the OSM diff corresponding to the OSMChange sequence_number.
107 |
108 | Parameters:
109 | clear_cache (bool): clear the cache
110 | timeout (int): request timeout
111 |
112 | Returns:
113 | int: HTTP status code
114 |
115 | Raises:
116 | Exception: If an invalid sequence number is provided
117 | """
118 | if not self._sequence_number:
119 | raise Exception("invalid sequence number")
120 | if clear_cache:
121 | self.create, self.modify, self.delete = ([], [], [])
122 | try:
123 | r = requests.get(
124 | self._build_sequence_url(),
125 | stream=True,
126 | timeout=timeout or self.timeout,
127 | headers=DEFAULT_HEADERS,
128 | )
129 | if r.status_code != 200:
130 | return r.status_code
131 | # Handle both gzipped and plain XML responses
132 | content = r.content
133 | if content.startswith(b"\x1f\x8b"): # Gzip magic number
134 | gzfile = GzipFile(fileobj=r.raw)
135 | xml = ElementTree.iterparse(gzfile, events=("start", "end"))
136 | else:
137 | xml = ElementTree.iterparse(r.raw, events=("start", "end"))
138 | self._parse_xml(xml)
139 | return r.status_code
140 | except ConnectionError:
141 | # FIXME catch this?
142 | return 0
143 |
144 | @classmethod
145 | def from_xml(cls, xml: ElementTree.Element) -> "OSMChange":
146 | """
147 | Initialize OSMChange object from an XML object.
148 |
149 | If you used this method before version 0.3, please note that this
150 | method now takes an XML object. If you want to initialize from a file,\
151 | use the from_xml_file method.
152 |
153 | Parameters:
154 | xml (ElementTree.Element): XML object
155 |
156 | Returns:
157 | OSMChange: OSMChange object
158 | """
159 | new_osmchange_obj = cls()
160 | new_osmchange_obj._parse_xml(xml)
161 | return new_osmchange_obj
162 |
163 | @classmethod
164 | def from_xml_file(cls, path) -> "OSMChange":
165 | """
166 | Initialize OSMChange object from an XML file.
167 |
168 | Parameters:
169 | path (str): path to the XML file
170 |
171 | Returns:
172 | OSMChange: OSMChange object
173 | """
174 | with open(path, "r") as fh:
175 | xml = ElementTree.iterparse(fh, events=("start", "end"))
176 | return cls.from_xml(xml)
177 |
178 | @property
179 | def sequence_number(self) -> int:
180 | return self._sequence_number
181 |
182 | @sequence_number.setter
183 | def sequence_number(self, value):
184 | try:
185 | # value can be none
186 | if value is None:
187 | self._sequence_number = None
188 | return
189 | self._sequence_number = int(value)
190 | except ValueError:
191 | raise ValueError(
192 | "sequence_number must be an integer or parsable as an integer"
193 | )
194 |
195 | @property
196 | def frequency(self) -> str:
197 | return self._frequency
198 |
199 | @frequency.setter
200 | def frequency(self, f: str) -> None:
201 | """
202 | Set the frequency for OSM changes.
203 |
204 | Args:
205 | f (str): Frequency ('minute', 'hour', or 'day')
206 |
207 | Raises:
208 | ValueError: If frequency is not one of the valid options
209 | """
210 | VALID_FREQUENCIES = {"minute", "hour", "day"}
211 | if f not in VALID_FREQUENCIES:
212 | raise ValueError(
213 | f"Frequency must be one of: {', '.join(VALID_FREQUENCIES)}"
214 | )
215 | self._frequency = f
216 |
217 | @property
218 | def actions(self):
219 | """Get all actions combined in a single list."""
220 | return {"create": self.create, "modify": self.modify, "delete": self.delete}
221 |
222 | def __repr__(self):
223 | return "OSMChange ({create} created, {modify} modified, \
224 | {delete} deleted)".format(
225 | create=len(self.create), modify=len(self.modify), delete=len(self.delete)
226 | )
227 |
228 | def __enter__(self):
229 | return self
230 |
231 | def __exit__(self, exc_type, exc_val, exc_tb):
232 | """Clear all changes when exiting context."""
233 | self.create.clear()
234 | self.modify.clear()
235 | self.delete.clear()
236 |
--------------------------------------------------------------------------------
/tests/test_osmchange.py:
--------------------------------------------------------------------------------
1 | import io
2 | import gzip
3 | import pytest
4 | from osmdiff import Node, OSMChange, Relation, Way
5 | from typing_extensions import assert_type
6 | from unittest.mock import patch, MagicMock
7 |
8 |
9 | class TestOSMChange:
10 | "tests for OSMChange object"
11 |
12 | def test_init_osmchange(self):
13 | "Test OSMChange init"
14 | osmchange = OSMChange()
15 | assert_type(osmchange, OSMChange)
16 | assert_type(osmchange.create, list)
17 | assert_type(osmchange.modify, list)
18 | assert_type(osmchange.delete, list)
19 | assert len(osmchange.create) == 0
20 | assert len(osmchange.modify) == 0
21 | assert len(osmchange.delete) == 0
22 |
23 | def test_set_sequencenumber(self):
24 | "Sequence number is not defined by default but can be set manually"
25 | osm_change = OSMChange()
26 | assert not osm_change.sequence_number
27 | osm_change.sequence_number = 12345
28 | assert osm_change.sequence_number == 12345
29 | osm_change.sequence_number = "12345"
30 | assert osm_change.sequence_number == 12345
31 |
32 | @patch('osmdiff.osmchange.requests.get')
33 | def test_read_changeset_from_xml_file(self, mock_get, osmchange_file_path):
34 | """Test initializing from an XML object with mocked response"""
35 | # Mock the response if testing remote file
36 | if osmchange_file_path.startswith('http'):
37 | mock_get.return_value.status_code = 200
38 | with open("tests/data/test_osmchange.xml", "rb") as f:
39 | mock_get.return_value.content = f.read()
40 |
41 | osmchange = OSMChange.from_xml_file(osmchange_file_path)
42 |
43 | # Verify API call was made if file is remote
44 | if osmchange_file_path.startswith('http'):
45 | mock_get.assert_called_once()
46 |
47 | # Test counts
48 | assert len(osmchange.create) > 0
49 | assert len(osmchange.modify) >= 0
50 | assert len(osmchange.delete) >= 0
51 |
52 | # Test object types
53 | nodes_created = [o for o in osmchange.create if isinstance(o, Node)]
54 | ways_created = [o for o in osmchange.create if isinstance(o, Way)]
55 | rels_created = [o for o in osmchange.create if isinstance(o, Relation)]
56 |
57 | # Verify all created objects are accounted for
58 | assert len(nodes_created + ways_created + rels_created) == len(osmchange.create)
59 |
60 | # Test object attributes
61 | if nodes_created:
62 | node = nodes_created[0]
63 | assert hasattr(node, 'lat')
64 | assert hasattr(node, 'lon')
65 |
66 | @patch('osmdiff.osmchange.requests.get')
67 | def test_get_state_success(self, mock_get):
68 | # Simulate a valid state response with sequenceNumber
69 | xml = b'''123'''
70 | mock_get.return_value.status_code = 200
71 | mock_get.return_value.content = xml
72 | oc = OSMChange()
73 | assert oc.get_state() is True
74 | assert oc.sequence_number == 123
75 |
76 | @patch('osmdiff.osmchange.requests.get')
77 | def test_get_state_missing_seq(self, mock_get):
78 | # Simulate state response without sequenceNumber
79 | xml = b''''''
80 | mock_get.return_value.status_code = 200
81 | mock_get.return_value.content = xml
82 | oc = OSMChange()
83 | assert oc.get_state() is False
84 |
85 | @patch('osmdiff.osmchange.requests.get')
86 | def test_get_state_fail(self, mock_get):
87 | mock_get.return_value.status_code = 404
88 | oc = OSMChange()
89 | assert oc.get_state() is False
90 |
91 | @patch('osmdiff.osmchange.requests.get')
92 | def test_retrieve_non_200(self, mock_get):
93 | oc = OSMChange(sequence_number=1)
94 | mock_get.return_value.status_code = 404
95 | mock_get.return_value.content = b''
96 | status = oc.retrieve()
97 | assert status == 404
98 |
99 | @patch('osmdiff.osmchange.requests.get')
100 | def test_retrieve_gzip(self, mock_get):
101 | # Simulate a gzip-compressed XML response
102 | xml = b''
103 | gzipped = gzip.compress(xml)
104 | mock_get.return_value.status_code = 200
105 | mock_get.return_value.content = gzipped
106 | mock_get.return_value.raw = io.BytesIO(gzipped)
107 | oc = OSMChange(sequence_number=1)
108 | status = oc.retrieve()
109 | assert status == 200
110 |
111 | @patch('osmdiff.osmchange.requests.get', side_effect=ConnectionError)
112 | def test_retrieve_connection_error(self, mock_get):
113 | oc = OSMChange(sequence_number=1)
114 | status = oc.retrieve()
115 | assert status == 0
116 |
117 | @patch('osmdiff.osmchange.requests.get')
118 | def test_retrieve_clear_cache(self, mock_get):
119 | oc = OSMChange(sequence_number=1)
120 | oc.create = [1]
121 | oc.modify = [2]
122 | oc.delete = [3]
123 | mock_get.return_value.status_code = 404
124 | mock_get.return_value.content = b''
125 | oc.retrieve(clear_cache=True)
126 | assert oc.create == [] and oc.modify == [] and oc.delete == []
127 |
128 | def test_sequence_number_setter_and_errors(self):
129 | oc = OSMChange()
130 | oc.sequence_number = 42
131 | assert oc.sequence_number == 42
132 | oc.sequence_number = None
133 | assert oc.sequence_number is None
134 | with pytest.raises(ValueError):
135 | oc.sequence_number = 'notanumber'
136 |
137 | def test_frequency_setter_and_errors(self):
138 | oc = OSMChange()
139 | oc.frequency = 'hour'
140 | assert oc.frequency == 'hour'
141 | with pytest.raises(ValueError):
142 | oc.frequency = 'invalid'
143 |
144 | def test_actions_property(self):
145 | oc = OSMChange()
146 | oc.create = [1]
147 | oc.modify = [2]
148 | oc.delete = [3]
149 | acts = oc.actions
150 | assert acts['create'] == [1]
151 | assert acts['modify'] == [2]
152 | assert acts['delete'] == [3]
153 |
154 | def test_repr(self):
155 | oc = OSMChange()
156 | oc.create = [1,2]
157 | oc.modify = [3]
158 | oc.delete = []
159 | r = repr(oc)
160 | assert '2 created' in r and '1 modified' in r
161 |
162 | def test_context_manager_exit_clears(self):
163 | oc = OSMChange()
164 | oc.create = [1]
165 | oc.modify = [2]
166 | oc.delete = [3]
167 | with oc:
168 | pass
169 | assert oc.create == [] and oc.modify == [] and oc.delete == []
170 |
171 | @patch('builtins.open', side_effect=FileNotFoundError)
172 | def test_init_else_branch(self, mock_open):
173 | # Should set _frequency and _sequence_number if file is not provided
174 | oc = OSMChange(frequency='hour', sequence_number=42)
175 | assert oc._frequency == 'hour'
176 | assert oc._sequence_number == 42
177 | # Also cover the case where both are left default
178 | oc2 = OSMChange()
179 | assert hasattr(oc2, '_frequency')
180 | assert hasattr(oc2, '_sequence_number')
181 | assert oc2._frequency == 'minute'
182 | assert oc2._sequence_number is None
183 | # And the case where only frequency is set
184 | oc3 = OSMChange(frequency='day')
185 | assert oc3._frequency == 'day'
186 | assert oc3._sequence_number is None
187 | # And only sequence_number is set
188 | oc4 = OSMChange(sequence_number=99)
189 | assert oc4._frequency == 'minute'
190 | assert oc4._sequence_number == 99
191 |
192 | def test_retrieve_raises_on_missing_sequence_number(self):
193 | oc = OSMChange()
194 | with pytest.raises(Exception) as exc:
195 | oc.retrieve()
196 | assert "invalid sequence number" in str(exc.value)
197 |
198 | @patch('osmdiff.osmchange.requests.get')
199 | def test_retrieve_non_gzip_xml(self, mock_get):
200 | xml = b''
201 | mock_get.return_value.status_code = 200
202 | mock_get.return_value.content = xml
203 | mock_get.return_value.raw = io.BytesIO(xml)
204 | oc = OSMChange(sequence_number=1)
205 | status = oc.retrieve()
206 | assert status == 200
207 |
--------------------------------------------------------------------------------
/tests/test_augmenteddiff.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from osmdiff import AugmentedDiff
3 | from unittest.mock import patch, MagicMock
4 | import requests
5 | from io import BytesIO
6 |
7 |
8 | class TestAugmentedDiff:
9 | """Tests for AugmentedDiff class."""
10 |
11 | def test_init(self):
12 | """Test AugmentedDiff initialization."""
13 | from osmdiff import AugmentedDiff
14 |
15 | adiff = AugmentedDiff(sequence_number=12345)
16 | assert adiff.sequence_number == 12345
17 |
18 | def test_get_state_errors(self):
19 | """Test AugmentedDiff.get_state error handling and edge cases."""
20 | from osmdiff import AugmentedDiff
21 | import requests
22 | from unittest.mock import patch, MagicMock
23 |
24 | # Non-200 response
25 | mock_response = MagicMock()
26 | mock_response.status_code = 404
27 | mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError
28 | with patch("requests.get", return_value=mock_response):
29 | with pytest.raises(requests.exceptions.HTTPError) as excinfo:
30 | AugmentedDiff.get_state()
31 |
32 | def test_retrieve_exceptions_and_clear_cache(self):
33 | """Test retrieve() for missing sequence_number, clear_cache, and non-200 status."""
34 | from osmdiff import AugmentedDiff
35 | from unittest.mock import patch, MagicMock
36 |
37 | # Missing sequence_number
38 | adiff = AugmentedDiff()
39 | with patch("requests.get"):
40 | try:
41 | adiff.retrieve()
42 | except Exception as e:
43 | assert "invalid sequence number" in str(e)
44 | else:
45 | assert False, "Exception not raised for missing sequence_number"
46 |
47 | # clear_cache should clear lists
48 | adiff = AugmentedDiff(sequence_number=1)
49 | adiff.create = [1]
50 | adiff.modify = [2]
51 | adiff.delete = [3]
52 | import io
53 |
54 | mock_response = MagicMock()
55 | mock_response.status_code = 200
56 | mock_response.raw = io.BytesIO(b"")
57 | mock_response.raw.decode_content = True
58 | mock_response.content = b""
59 | with patch("requests.get", return_value=mock_response):
60 | adiff.retrieve(clear_cache=True)
61 | assert adiff.create == []
62 | assert adiff.modify == []
63 | assert adiff.delete == []
64 |
65 | # Non-200 HTTP status
66 | adiff = AugmentedDiff(sequence_number=1)
67 | mock_response = MagicMock()
68 | mock_response.status_code = 404
69 | mock_response.raw = MagicMock()
70 | mock_response.raw.decode_content = True
71 | with patch("requests.get", return_value=mock_response):
72 | status = adiff.retrieve()
73 | assert status == 404
74 |
75 | def test_parse_stream_meta_tag(self):
76 | """Test that meta tag in XML sets timestamp attribute."""
77 | from osmdiff import AugmentedDiff
78 | import io
79 |
80 | xml = """"""
81 | adiff = AugmentedDiff()
82 | adiff._parse_stream(io.StringIO(xml))
83 | assert hasattr(adiff, "timestamp")
84 |
85 | def test_sequence_number_setter_and_repr(self):
86 | """Test sequence_number setter error and __repr__ output."""
87 | from osmdiff import AugmentedDiff
88 |
89 | adiff = AugmentedDiff(sequence_number=1)
90 | # Valid int
91 | adiff.sequence_number = 42
92 | assert adiff.sequence_number == 42
93 | # Valid string
94 | adiff.sequence_number = "43"
95 | assert adiff.sequence_number == 43
96 | # Invalid value
97 | try:
98 | adiff.sequence_number = "notanumber"
99 | except ValueError as e:
100 | assert "sequence_number must be an integer" in str(e)
101 | else:
102 | assert False, "ValueError not raised for invalid sequence_number"
103 | # __repr__
104 | r = repr(adiff)
105 | assert (
106 | "AugmentedDiff" in r
107 | and "created" in r
108 | and "modified" in r
109 | and "deleted" in r
110 | )
111 |
112 | def test_context_manager_clears_lists(self):
113 | """Test that __enter__ returns self and __exit__ clears lists."""
114 | from osmdiff import AugmentedDiff
115 |
116 | adiff = AugmentedDiff(sequence_number=1)
117 | adiff.create = [1]
118 | adiff.modify = [2]
119 | adiff.delete = [3]
120 | with adiff as a:
121 | assert a is adiff
122 | assert adiff.create == [1]
123 | # After context exit, lists should be cleared
124 | assert adiff.create == []
125 | assert adiff.modify == []
126 | assert adiff.delete == []
127 |
128 | def test_bbox_validation(self):
129 | """Test that invalid bounding boxes raise an Exception."""
130 | from osmdiff import AugmentedDiff
131 |
132 | # Valid bbox should NOT raise
133 | AugmentedDiff(minlon=5, minlat=10, maxlon=10, maxlat=20)
134 | # Invalid bbox: maxlon <= minlon (all nonzero)
135 | with pytest.raises(Exception, match="invalid bbox"):
136 | AugmentedDiff(minlon=10, minlat=10, maxlon=5, maxlat=20)
137 | # Invalid bbox: maxlat <= minlat (all nonzero)
138 | with pytest.raises(Exception, match="invalid bbox"):
139 | AugmentedDiff(minlon=5, minlat=20, maxlon=10, maxlat=10)
140 |
141 | @pytest.fixture
142 | def mock_adiff_response(self):
143 | """Fixture providing a mock Augmented Diff response."""
144 | xml_content = """
145 |
146 |
147 |
148 |
149 |
150 | """.strip()
151 |
152 | mock_response = MagicMock(spec=requests.Response)
153 | mock_response.status_code = 200
154 | mock_response.text = xml_content
155 | mock_response.content = xml_content.encode()
156 | mock_response.raw = BytesIO(xml_content.encode())
157 | return mock_response
158 |
159 | @pytest.fixture
160 | def mock_timeout_response(self):
161 | """Fixture providing a mock timeout response."""
162 | mock_response = MagicMock(spec=requests.Response)
163 | mock_response.status_code = 200
164 | mock_response.raise_for_status.side_effect = requests.exceptions.ReadTimeout(
165 | "Timeout"
166 | )
167 |
168 | # Add raw attribute that will raise timeout when read
169 | mock_raw = MagicMock()
170 | mock_raw.read.side_effect = requests.exceptions.ReadTimeout("Timeout")
171 | mock_raw.decode_content = True
172 | mock_response.raw = mock_raw
173 |
174 | return mock_response
175 |
176 | @pytest.fixture
177 | def augmented_diff(self):
178 | """Fixture providing a basic AugmentedDiff instance."""
179 | return AugmentedDiff(sequence_number=12345)
180 |
181 | def test_delete_metadata(self, augmented_diff):
182 | """Test that metadata is captured for deleted objects."""
183 | with open("tests/data/test_delete_metadata.xml", "r") as f:
184 | adiff = AugmentedDiff(file=f.name)
185 |
186 | assert len(adiff.delete) == 1
187 | deletion = adiff.delete[0]
188 |
189 | # Check the metadata is present
190 | assert "meta" in deletion
191 | assert deletion["meta"]["user"] == "TestUser"
192 | assert deletion["meta"]["uid"] == "12345"
193 | assert deletion["meta"]["changeset"] == "67890"
194 | assert deletion["meta"]["timestamp"] == "2024-01-28T12:00:00Z"
195 |
196 | # Check the old object is present
197 | assert "old" in deletion
198 | assert deletion["old"].attribs["id"] == "123"
199 | assert deletion["old"].attribs["lat"] == "51.5"
200 | assert deletion["old"].attribs["lon"] == "-0.1"
201 | assert deletion["old"].tags["amenity"] == "cafe"
202 |
203 | def test_timeout_retry_success(
204 | self, augmented_diff, mock_adiff_response, mock_timeout_response
205 | ):
206 | """Test successful retry after timeout."""
207 | with patch(
208 | "requests.get", side_effect=[mock_timeout_response, mock_adiff_response]
209 | ) as mock_get:
210 | status = augmented_diff.retrieve()
211 | assert status == 200
212 | assert mock_get.call_count == 2 # Verify it retried once
213 |
214 | def test_multiple_timeouts(self, augmented_diff, mock_timeout_response):
215 | """Test max retries on consecutive timeouts."""
216 | with patch("requests.get", return_value=mock_timeout_response) as mock_get:
217 | with pytest.raises(requests.exceptions.ReadTimeout):
218 | augmented_diff.retrieve()
219 | assert mock_get.call_count == 3 # Verify it tried 3 times
220 |
221 | def test_consecutive_sequence_numbers(self, augmented_diff):
222 | """Test auto-increment of sequence numbers."""
223 |
224 | def new_mock_response():
225 | xml_content = """\n\n\n\n\n\n""".strip()
226 | mock_response = MagicMock(spec=requests.Response)
227 | mock_response.status_code = 200
228 | mock_response.text = xml_content
229 | mock_response.content = xml_content.encode()
230 | mock_response.raw = BytesIO(xml_content.encode())
231 | return mock_response
232 |
233 | with patch(
234 | "requests.get", side_effect=[new_mock_response(), new_mock_response()]
235 | ) as mock_get:
236 | # Retrieve first diff
237 | status1 = augmented_diff.retrieve(auto_increment=True)
238 | assert status1 == 200
239 | assert augmented_diff.sequence_number == 12346
240 | initial_create_count = len(augmented_diff.create)
241 |
242 | # Retrieve next diff
243 | status2 = augmented_diff.retrieve(auto_increment=True)
244 | assert status2 == 200
245 | assert augmented_diff.sequence_number == 12347
246 | assert len(augmented_diff.create) == initial_create_count * 2
247 | assert mock_get.call_count == 2
248 |
--------------------------------------------------------------------------------
/src/osmdiff/osm/osm.py:
--------------------------------------------------------------------------------
1 | """
2 | # OSM Objects
3 |
4 | This module provides classes for working with OpenStreetMap data in the application.
5 |
6 | ::: osmdiff.osm.Member
7 |
8 | ::: osmdiff.osm.OSMNode
9 |
10 | ::: osmdiff.osm.OSMWay
11 |
12 | ::: osmdiff.osm.OSMRelation
13 |
14 | ::: osmdiff.osm.OSMData
15 |
16 | ## Overview
17 |
18 | This module contains base classes for OSM objects:
19 | - OSMObject: Base class for all OSM elements
20 | - Node: Represents OSM nodes with lat/lon coordinates
21 | - Way: Represents OSM ways (sequences of nodes)
22 | - Relation: Represents OSM relations (collections of objects)
23 |
24 | ## Example
25 |
26 | ```python
27 | from osmdiff.osm import Node, Way, Relation
28 | ```
29 |
30 | Each OSM object has:
31 | - tags: Dictionary of key-value tag pairs
32 | - attribs: Dictionary of XML attributes (id, version, etc.)
33 | - bounds: Optional bounding box [minlon, minlat, maxlon, maxlat]
34 |
35 | The objects can be created from XML elements using the from_xml() classmethod.
36 |
37 | Example:
38 | # Create a node
39 | node = Node()
40 | node.attribs = {
41 | "id": "123",
42 | "version": "2",
43 | "lat": "37.7",
44 | "lon": "-122.4"
45 | }
46 | node.tags = {
47 | "amenity": "cafe",
48 | "name": "Joe's Coffee"
49 | }
50 |
51 | # Create a way
52 | way = Way()
53 | way.attribs = {
54 | "id": "456",
55 | "version": "1"
56 | }
57 | way.nodes = ["123", "124", "125"] # List of node IDs
58 | way.tags = {
59 | "highway": "residential",
60 | "name": "Oak Street"
61 | }
62 |
63 | # Create a relation
64 | rel = Relation()
65 | rel.attribs = {
66 | "id": "789",
67 | "version": "1"
68 | }
69 | rel.members = [
70 | {"type": "way", "ref": "456", "role": "outer"},
71 | {"type": "way", "ref": "457", "role": "inner"}
72 | ]
73 | rel.tags = {
74 | "type": "multipolygon",
75 | "landuse": "park"
76 | }
77 | ```
78 |
79 | # Access __geo_interface__ for GeoJSON compatibility
80 |
81 | See https://gist.github.com/sgillies/2217756 for more details.
82 |
83 | ```python
84 | print(node.__geo_interface__) # {"type": "Point", "coordinates": [-0.1, 51.5]}
85 | ```
86 | """
87 |
88 | from typing import Dict, Any, List
89 | from xml.etree import ElementTree
90 | from xml.etree.ElementTree import Element
91 | import json
92 |
93 |
94 | class OSMObject:
95 | """Base class for all OpenStreetMap elements (nodes, ways, relations).
96 |
97 | Args:
98 | tags: Key-value tag dictionary
99 | attribs: XML attributes dictionary
100 | bounds: Optional bounding box coordinates [minlon, minlat, maxlon, maxlat]
101 |
102 | Note:
103 | This is an abstract base class - use Node, Way or Relation for concrete elements.
104 | """
105 |
106 | def __init__(
107 | self,
108 | tags: Dict[str, str] = {},
109 | attribs: Dict[str, str] = {},
110 | bounds: List[float] = [],
111 | ) -> None:
112 | """Initialize an empty OSM object."""
113 | self.tags = tags or {}
114 | self.attribs = attribs or {}
115 | self.bounds = bounds or None
116 |
117 | def __repr__(self) -> str:
118 | """
119 | String representation of the OSM object.
120 |
121 | Returns:
122 | str: Object type and ID, with additional info for ways/relations
123 | """
124 | out = "{type} {id}".format(type=type(self).__name__, id=self.attribs.get("id"))
125 | if type(self) == Way:
126 | out += " ({ways} nodes)".format(ways=len(self.nodes))
127 | if type(self) == Relation:
128 | out += " ({mem} members)".format(mem=len(self.members))
129 | return out
130 |
131 | def _parse_tags(self, elem: Element) -> None:
132 | """
133 | Parse tags from XML element.
134 |
135 | Args:
136 | elem: XML element containing tag elements
137 | """
138 | for tagelem in elem.findall("tag"):
139 | self.tags[tagelem.attrib["k"]] = tagelem.attrib["v"]
140 |
141 | def _parse_bounds(self, elem: Element) -> None:
142 | """
143 | Parse bounds from XML element.
144 |
145 | Args:
146 | elem: XML element containing bounds element
147 | """
148 | be = elem.find("bounds")
149 | if be is not None:
150 | self.bounds = [
151 | be.attrib["minlon"],
152 | be.attrib["minlat"],
153 | be.attrib["maxlon"],
154 | be.attrib["maxlat"],
155 | ]
156 |
157 | @classmethod
158 | def from_xml(cls, elem: Element) -> "OSMObject":
159 | """
160 | Create OSM object from XML element.
161 |
162 | Args:
163 | elem: XML element representing an OSM object
164 |
165 | Returns:
166 | OSMObject: Appropriate subclass instance
167 |
168 | Raises:
169 | ValueError: If XML element is invalid
170 | TypeError: If element type is unknown
171 | """
172 | if elem is None:
173 | raise ValueError("XML element cannot be None")
174 |
175 | osmtype = ""
176 | if elem.tag == "member":
177 | osmtype = elem.attrib.get("type")
178 | if not osmtype:
179 | raise ValueError("Member element missing type attribute")
180 | else:
181 | osmtype = elem.tag
182 |
183 | if osmtype not in ("node", "nd", "way", "relation"):
184 | raise TypeError(f"Unknown OSM element type: {osmtype}")
185 |
186 | o = OSMObject()
187 | if osmtype in ("node", "nd"):
188 | o = Node()
189 | elif osmtype == "way":
190 | o = Way()
191 | o._parse_nodes(elem)
192 | elif osmtype == "relation":
193 | o = Relation()
194 | o._parse_members(elem)
195 | else:
196 | pass
197 | o.attribs = elem.attrib
198 | o.osmtype = str(o.__class__.__name__).lower()[0]
199 | o._parse_tags(elem)
200 | o._parse_bounds(elem)
201 | return o
202 |
203 | def to_dict(self) -> Dict[str, Any]:
204 | """
205 | Convert object to dictionary.
206 |
207 | Returns:
208 | Dict[str, Any]: Dictionary representation
209 | """
210 | return {
211 | "type": self.__class__.__name__,
212 | "id": self.attribs.get("id"),
213 | "tags": self.tags,
214 | "bounds": self.bounds,
215 | }
216 |
217 | def to_json(self) -> str:
218 | """
219 | Convert object to JSON string.
220 |
221 | Returns:
222 | str: JSON representation
223 | """
224 | return json.dumps(self.to_dict())
225 |
226 | @classmethod
227 | def from_file(cls, filename: str) -> "OSMObject":
228 | """
229 | Create object from XML file.
230 |
231 | Args:
232 | filename: Path to XML file
233 |
234 | Returns:
235 | OSMObject: Parsed object
236 | """
237 | with open(filename, "r") as f:
238 | tree = ElementTree.parse(f)
239 | return cls.from_xml(tree.getroot())
240 |
241 |
242 | class Node(OSMObject):
243 | """OpenStreetMap node (geographic point feature).
244 |
245 | Implements __geo_interface__ for GeoJSON compatibility as a Point feature.
246 | Coordinates must be valid (-180<=lon<=180, -90<=lat<=90).
247 | """
248 |
249 | def __init__(
250 | self,
251 | tags: Dict[str, str] = {},
252 | attribs: Dict[str, str] = {},
253 | bounds: List[float] = [],
254 | ) -> None:
255 | super().__init__(tags, attribs, bounds)
256 |
257 | def _validate_coords(self) -> None:
258 | """Validate node coordinates."""
259 | lon = float(self.attribs.get("lon", 0))
260 | lat = float(self.attribs.get("lat", 0))
261 | if not -90 <= lat <= 90:
262 | raise ValueError(f"Invalid latitude: {lat}")
263 | if not -180 <= lon <= 180:
264 | raise ValueError(f"Invalid longitude: {lon}")
265 |
266 | @property
267 | def lon(self) -> float:
268 | """Get longitude value."""
269 | self._validate_coords()
270 | return float(self.attribs.get("lon", 0))
271 |
272 | @property
273 | def lat(self) -> float:
274 | """Get latitude value."""
275 | self._validate_coords()
276 | return float(self.attribs.get("lat", 0))
277 |
278 | def _geo_interface(self) -> dict:
279 | """
280 | GeoJSON-compatible interface.
281 |
282 | Returns:
283 | dict: GeoJSON Point geometry
284 | """
285 | return {"type": "Point", "coordinates": [self.lon, self.lat]}
286 |
287 | __geo_interface__ = property(_geo_interface)
288 |
289 | def __eq__(self, other) -> bool:
290 | """
291 | Check if two nodes are equal.
292 |
293 | Args:
294 | other (OSMObject): Another OSMObject object
295 |
296 | Returns:
297 | bool: True if nodes have same coordinates
298 | """
299 | if not isinstance(other, Node):
300 | return False
301 | return self.lon == other.lon and self.lat == other.lat
302 |
303 |
304 | class Way(OSMObject):
305 | """Represents an OSM way (linear feature).
306 |
307 | Implements __geo_interface__ for GeoJSON compatibility as either:
308 | - LineString for open ways
309 | - Polygon for closed ways
310 | """
311 |
312 | def __init__(
313 | self,
314 | tags: Dict[str, str] = {},
315 | attribs: Dict[str, str] = {},
316 | bounds: List[float] = [],
317 | nodes: List[Node] = [],
318 | ) -> None:
319 | """Initialize a Way object."""
320 | self.tags = tags or {}
321 | self.attribs = attribs or {}
322 | self.nodes = nodes or []
323 | super().__init__(tags, attribs, bounds)
324 |
325 | def is_closed(self) -> bool:
326 | """
327 | Check if the way forms a closed loop.
328 |
329 | Returns:
330 | bool: True if first and last nodes are identical
331 | """
332 | return bool(self.nodes and self.nodes[0] == self.nodes[-1])
333 |
334 | def length(self) -> None:
335 | """
336 | Calculate approximate length in meters.
337 |
338 | Returns:
339 | float: Length of way in meters (not implemented)
340 | """
341 | # Implementation using haversine formula
342 | pass
343 |
344 | def _parse_nodes(self, elem: Element):
345 | """
346 | Parse nodes from XML element.
347 |
348 | Args:
349 | elem: XML element containing nd elements
350 | """
351 | for node in elem.findall("nd"):
352 | self.nodes.append(OSMObject.from_xml(node))
353 |
354 | def _geo_interface(self) -> dict:
355 | """
356 | GeoJSON-compatible interface.
357 |
358 | Returns:
359 | dict: GeoJSON LineString or Polygon geometry
360 | """
361 | geom_type = "Polygon" if self.is_closed() else "LineString"
362 | coordinates = [[n.lon, n.lat] for n in self.nodes]
363 |
364 | # For Polygon, we need to wrap coordinates in an extra list
365 | if geom_type == "Polygon":
366 | coordinates = [coordinates]
367 |
368 | return {"type": geom_type, "coordinates": coordinates}
369 |
370 | __geo_interface__ = property(_geo_interface)
371 |
372 |
373 | class Relation(OSMObject):
374 | """
375 | Represents an OSM relation (collection of features).
376 |
377 | ## Attributes
378 | members (list): List of member objects
379 | __geo_interface__ (dict): GeoJSON-compatible interface, see https://gist.github.com/sgillies/2217756 for more details.
380 |
381 | ## Example
382 | ```python
383 | relation = Relation()
384 | relation.members = [Way(), Node()] # Add members
385 | print(relation.__geo_interface__["type"]) # "FeatureCollection"
386 | ```
387 | """
388 |
389 | def __init__(
390 | self,
391 | tags: Dict[str, str] = {},
392 | attribs: Dict[str, str] = {},
393 | bounds: List[float] = [],
394 | ) -> None:
395 | """Initialize a Relation object."""
396 | tags = tags or {}
397 | attribs = attribs or {}
398 | super().__init__(tags, attribs, bounds)
399 | self.members = []
400 |
401 | def _parse_members(self, elem: Element):
402 | """
403 | Parse members from XML element.
404 |
405 | Args:
406 | elem: XML element containing member elements
407 | """
408 | for member in elem.findall("member"):
409 | self.members.append(OSMObject.from_xml(member))
410 |
411 | def _geo_interface(self) -> dict:
412 | """
413 | GeoJSON-compatible interface.
414 |
415 | Returns:
416 | dict: GeoJSON GeometryCollection
417 | """
418 | return {
419 | "type": "GeometryCollection",
420 | "geometries": [m.__geo_interface__ for m in self.members],
421 | }
422 |
423 | __geo_interface__ = property(_geo_interface)
424 |
425 |
426 | class Member(OSMObject):
427 | """
428 | Represents an OSM member (a feature within a relation).
429 | """
430 |
431 | def __init__(self):
432 | """Initialize an empty member."""
433 | self.type = None
434 | self.ref = None
435 | self.role = None
436 | super().__init__()
437 |
438 | def _parse_attributes(self, elem: Element):
439 | """
440 | Parse member attributes from XML element.
441 |
442 | Args:
443 | elem: XML element containing member attributes
444 | """
445 | self.type = elem.get("type")
446 | self.ref = int(elem.get("ref"))
447 | self.role = elem.get("role")
448 |
449 | def _geo_interface(self):
450 | """
451 | GeoJSON-compatible interface.
452 |
453 | Returns:
454 | dict: GeoJSON Feature
455 | """
456 | return {
457 | "type": "Feature",
458 | "geometry": None,
459 | "properties": {"type": self.type, "ref": self.ref, "role": self.role},
460 | }
461 |
462 | __geo_interface__ = property(_geo_interface)
463 |
--------------------------------------------------------------------------------
/src/osmdiff/augmenteddiff.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import time
3 | from datetime import datetime
4 | from typing import Optional
5 | from xml.etree import ElementTree
6 |
7 | import requests
8 | from dateutil import parser
9 |
10 | from osmdiff.settings import DEFAULT_OVERPASS_URL
11 |
12 | from .config import API_CONFIG, DEFAULT_HEADERS
13 | from .osm import OSMObject
14 |
15 |
16 | class AugmentedDiff:
17 | """An Augmented Diff representation for OpenStreetMap changes.
18 |
19 | Handles retrieval and parsing of OpenStreetMap augmented diffs containing detailed
20 | changes to OSM data (creations, modifications, deletions).
21 |
22 | Args:
23 | minlon: Minimum longitude of bounding box (WGS84)
24 | minlat: Minimum latitude of bounding box (WGS84)
25 | maxlon: Maximum longitude of bounding box (WGS84)
26 | maxlat: Maximum latitude of bounding box (WGS84)
27 | file: Path to local augmented diff XML file
28 | sequence_number: Sequence number of the diff
29 | base_url: Override default Overpass API URL
30 | timeout: Request timeout in seconds
31 |
32 | Note:
33 | The bounding box coordinates should be in WGS84 (EPSG:4326) format.
34 | """
35 |
36 | base_url = DEFAULT_OVERPASS_URL
37 | minlon = None
38 | minlat = None
39 | maxlon = None
40 | maxlat = None
41 |
42 | def __init__(
43 | self,
44 | minlon: Optional[float] = None,
45 | minlat: Optional[float] = None,
46 | maxlon: Optional[float] = None,
47 | maxlat: Optional[float] = None,
48 | file: Optional[str] = None,
49 | sequence_number: Optional[int] = None,
50 | timestamp: Optional[datetime] = None,
51 | base_url: Optional[str] = None,
52 | timeout: Optional[int] = None,
53 | ) -> None:
54 | # Initialize with defaults from config
55 | self.base_url = base_url or API_CONFIG["overpass"]["base_url"]
56 | self.timeout = timeout or API_CONFIG["overpass"]["timeout"]
57 |
58 | # Initialize other config values
59 | self.minlon = minlon
60 | self.minlat = minlat
61 | self.maxlon = maxlon
62 | self.maxlat = maxlat
63 | self.timestamp = timestamp
64 | self._remarks = []
65 | self._sequence_number = None
66 | self._create = []
67 | self._modify = []
68 | self._delete = []
69 | if file:
70 | with open(file, "r") as file_handle:
71 | self._parse_stream(file_handle)
72 | else:
73 | self.sequence_number = sequence_number
74 | if minlon and minlat and maxlon and maxlat:
75 | if maxlon > minlon and maxlat > minlat:
76 | self.minlon = minlon
77 | self.minlat = minlat
78 | self.maxlon = maxlon
79 | self.maxlat = maxlat
80 | else:
81 | raise Exception("invalid bbox.")
82 | self._logger = logging.getLogger(__name__)
83 |
84 | @classmethod
85 | def get_state(
86 | cls, base_url: Optional[str] = None, timeout: Optional[int] = None
87 | ) -> Optional[dict]:
88 | """Get the current sequence number from the Overpass API.
89 |
90 | Args:
91 | base_url: Override default Overpass API URL (deprecated)
92 | timeout: Optional override for request timeout
93 |
94 | Returns:
95 | int: Sequence number
96 | """
97 | state_url = API_CONFIG["overpass"]["state_url"]
98 | response = requests.get(
99 | state_url, timeout=timeout or 5, headers=DEFAULT_HEADERS
100 | )
101 | response.raise_for_status()
102 | return_dict = {"sequence_number": int(response.text), "timestamp": None}
103 | return return_dict
104 |
105 | def _build_action(self, elem):
106 | """Parse an action element from an augmented diff.
107 |
108 | Actions in augmented diffs are ordered: nodes first, then ways, then relations.
109 | Within each type, elements are ordered by ID.
110 | """
111 | action_type = elem.attrib["type"]
112 |
113 | if action_type == "create":
114 | for child in elem:
115 | osm_obj = OSMObject.from_xml(child)
116 | self._create.append(osm_obj)
117 | elif action_type == "modify":
118 | old = elem.find("old")
119 | new = elem.find("new")
120 | if old is not None and new is not None:
121 | osm_obj_old = None
122 | osm_obj_new = None
123 | for child in old:
124 | osm_obj_old = OSMObject.from_xml(child)
125 | for child in new:
126 | osm_obj_new = OSMObject.from_xml(child)
127 | if osm_obj_old and osm_obj_new:
128 | self._modify.append({"old": osm_obj_old, "new": osm_obj_new})
129 | elif action_type == "delete":
130 | old = elem.find("old")
131 | new = elem.find("new")
132 | osm_obj_old = None
133 | osm_obj_new = None
134 | if old is not None:
135 | for child in old:
136 | osm_obj_old = OSMObject.from_xml(child)
137 | if new is not None:
138 | for child in new:
139 | osm_obj_new = OSMObject.from_xml(child)
140 | if osm_obj_old is not None or osm_obj_new is not None:
141 | # Store both old and new, and optionally meta info
142 | deletion_info = {
143 | "old": osm_obj_old,
144 | "new": osm_obj_new,
145 | "meta": elem.attrib.copy(),
146 | }
147 | self._delete.append(deletion_info)
148 |
149 | def _parse_stream(self, stream):
150 | for event, elem in ElementTree.iterparse(stream):
151 | if elem.tag == "remark":
152 | self._remarks.append(elem.text)
153 | if elem.tag == "meta":
154 | timestamp = parser.parse(elem.attrib.get("osm_base"))
155 | self.timestamp = timestamp
156 | if elem.tag == "action":
157 | self._build_action(elem)
158 |
159 | def retrieve(
160 | self,
161 | clear_cache: bool = False,
162 | timeout: Optional[int] = None,
163 | auto_increment: bool = True,
164 | max_retries: int = 3,
165 | ) -> int:
166 | """Retrieve the Augmented diff corresponding to the sequence_number.
167 |
168 | Args:
169 | clear_cache: Whether to clear existing data before retrieval.
170 | timeout: Request timeout in seconds.
171 | auto_increment: Whether to automatically increment sequence number after retrieval.
172 | max_retries: Maximum number of retry attempts for failed requests.
173 |
174 | Returns:
175 | HTTP status code of the request (200 for success)
176 |
177 | Raises:
178 | Exception: If sequence_number is not set
179 | requests.exceptions.RequestException: If all retry attempts fail
180 | """
181 | if not self.sequence_number:
182 | raise Exception("invalid sequence number")
183 |
184 | if clear_cache:
185 | self._create, self._modify, self._delete = ([], [], [])
186 |
187 | url = self.base_url.format(sequence_number=self.sequence_number)
188 |
189 | self._logger.info(f"Retrieving diff {self.sequence_number} from {url}")
190 |
191 | # Store current data before making request
192 | prev_create = self._create.copy()
193 | prev_modify = self._modify.copy()
194 | prev_delete = self._delete.copy()
195 |
196 | # Use a longer timeout if none specified
197 | request_timeout = (
198 | timeout or self.timeout or 120
199 | ) # 2 minutes default, this will still fail for very large diffs, like 12346
200 |
201 | for attempt in range(max_retries):
202 | try:
203 | # Exponential backoff between retries
204 | if attempt > 0:
205 | time.sleep(2**attempt) # 2, 4, 8 seconds...
206 |
207 | r = requests.get(
208 | url, stream=True, timeout=request_timeout, headers=DEFAULT_HEADERS
209 | )
210 |
211 | if r.status_code != 200:
212 | return r.status_code
213 |
214 | r.raw.decode_content = True
215 |
216 | # Clear current lists but keep previous data
217 | self._create, self._modify, self._delete = ([], [], [])
218 |
219 | # Parse new data
220 | self._parse_stream(r.raw)
221 |
222 | # Merge with previous data
223 | self._create = prev_create + self._create
224 | self._modify = prev_modify + self._modify
225 | self._delete = prev_delete + self._delete
226 |
227 | # Automatically increment sequence number after successful retrieval
228 | if auto_increment:
229 | self.sequence_number += 1
230 |
231 | return r.status_code
232 |
233 | except (
234 | requests.exceptions.ReadTimeout,
235 | requests.exceptions.ConnectionError,
236 | ) as e:
237 | if attempt == max_retries - 1: # Last attempt
238 | raise
239 | continue
240 |
241 | return 0 # Should never reach here due to raise in except block
242 |
243 | @property
244 | def create(self) -> list:
245 | """Get the list of created objects from the augmented diff."""
246 | return self._create
247 |
248 | @create.setter
249 | def create(self, value: list) -> None:
250 | self._create = value
251 |
252 | @property
253 | def modify(self) -> list:
254 | """Get the list of modified objects from the augmented diff."""
255 | return self._modify
256 |
257 | @modify.setter
258 | def modify(self, value: list) -> None:
259 | self._modify = value
260 |
261 | @property
262 | def delete(self) -> list:
263 | """Get the list of deleted objects from the augmented diff."""
264 | return self._delete
265 |
266 | @delete.setter
267 | def delete(self, value: list) -> None:
268 | self._delete = value
269 |
270 | @property
271 | def remarks(self) -> list:
272 | """Get the list of remarks from the augmented diff.
273 |
274 | Remarks provide additional metadata about the changes in the diff.
275 | """
276 | return self._remarks
277 |
278 | @property
279 | def timestamp(self) -> datetime:
280 | """Get the timestamp of when the changes in this diff were made.
281 |
282 | Returns:
283 | datetime: The timestamp parsed from the diff metadata
284 | """
285 | return self._timestamp
286 |
287 | @timestamp.setter
288 | def timestamp(self, value: datetime) -> None:
289 | """Set the timestamp for this diff.
290 |
291 | Args:
292 | value: The new timestamp to set
293 | """
294 | self._timestamp = value
295 |
296 | @property
297 | def sequence_number(self) -> int | None:
298 | """Get the sequence number identifying this diff.
299 |
300 | Sequence numbers increment monotonically and uniquely identify each diff.
301 | """
302 | return self._sequence_number
303 |
304 | @sequence_number.setter
305 | def sequence_number(self, value: int) -> None:
306 | try:
307 | # value can be none
308 | if value is None:
309 | self._sequence_number = None
310 | return
311 | self._sequence_number = int(value)
312 | except ValueError:
313 | raise ValueError(
314 | "sequence_number must be an integer or parsable as an integer"
315 | )
316 |
317 | @property
318 | def actions(self):
319 | """Get all actions combined in a single list."""
320 | return {"create": self._create, "modify": self._modify, "delete": self._delete}
321 |
322 | def __repr__(self):
323 | return """AugmentedDiff ({create} created, {modify} modified, {delete} deleted)""".format(
324 | create=len(self._create),
325 | modify=len(self._modify),
326 | delete=len(self._delete),
327 | )
328 |
329 | def __enter__(self):
330 | return self
331 |
332 | def __exit__(self, exc_type, exc_val, exc_tb):
333 | self._create.clear()
334 | self._modify.clear()
335 | self._delete.clear()
336 |
337 |
338 | class ContinuousAugmentedDiff:
339 | """Iterator for continuously fetching augmented diffs with backoff.
340 |
341 | Yields AugmentedDiff objects as new diffs become available.
342 |
343 | Args:
344 | minlon: Minimum longitude of bounding box
345 | minlat: Minimum latitude of bounding box
346 | maxlon: Maximum longitude of bounding box
347 | maxlat: Maximum latitude of bounding box
348 | base_url: Override default Overpass API URL
349 | timeout: Request timeout in seconds
350 | min_interval: Minimum seconds between checks (default: 30)
351 | max_interval: Maximum seconds between checks (default: 120)
352 | """
353 |
354 | def __init__(
355 | self,
356 | minlon: Optional[float] = None,
357 | minlat: Optional[float] = None,
358 | maxlon: Optional[float] = None,
359 | maxlat: Optional[float] = None,
360 | base_url: Optional[str] = None,
361 | timeout: Optional[int] = None,
362 | min_interval: int = 30,
363 | max_interval: int = 120,
364 | ):
365 | self.bbox = (minlon, minlat, maxlon, maxlat)
366 | self.base_url = base_url
367 | self.timeout = timeout
368 | self.min_interval = min_interval
369 | self.max_interval = max_interval
370 |
371 | self._current_sequence = None
372 | self._current_interval = min_interval
373 | self._last_check = None
374 | self._logger = logging.getLogger(__name__)
375 |
376 | def _wait_for_next_check(self) -> None:
377 | """Wait appropriate time before next check, using exponential backoff."""
378 | now = datetime.now()
379 | if self._last_check:
380 | elapsed = (now - self._last_check).total_seconds()
381 | wait_time = max(0, self._current_interval - elapsed)
382 | if wait_time > 0:
383 | time.sleep(wait_time)
384 |
385 | self._last_check = datetime.now()
386 |
387 | def _backoff(self) -> None:
388 | """Increase check interval, up to max_interval."""
389 | self._current_interval = min(self._current_interval * 2, self.max_interval)
390 |
391 | def _reset_backoff(self) -> None:
392 | """Reset check interval to minimum."""
393 | self._current_interval = self.min_interval
394 |
395 | def __iter__(self):
396 | return self
397 |
398 | def __next__(self) -> AugmentedDiff:
399 | while True:
400 | self._wait_for_next_check()
401 |
402 | # check if we have a newer sequence on the remote
403 | newest_remote = AugmentedDiff.get_state(timeout=self.timeout)
404 |
405 | # if we don't have a local sequence number yet, set it
406 | if self._current_sequence is None:
407 | self._current_sequence = newest_remote
408 |
409 | # if we do, proceed ony if the remote is newer
410 | elif self._current_sequence >= newest_remote:
411 | continue
412 |
413 | # Create diff object for new sequence
414 | diff = AugmentedDiff(
415 | minlon=self.bbox[0],
416 | minlat=self.bbox[1],
417 | maxlon=self.bbox[2],
418 | maxlat=self.bbox[3],
419 | sequence_number=self._current_sequence,
420 | base_url=self.base_url,
421 | timeout=self.timeout,
422 | )
423 |
424 | # Try to retrieve the diff
425 | try:
426 | status = diff.retrieve(auto_increment=False)
427 | if status != 200:
428 | self._logger.warning(f"Failed to retrieve diff: HTTP {status}")
429 | self._backoff()
430 | continue
431 |
432 | # Success! Reset backoff and update sequence
433 | self._reset_backoff()
434 | self._current_sequence += 1
435 | return diff
436 |
437 | except Exception as e:
438 | self._logger.warning(f"Error retrieving diff: {e}")
439 | self._backoff()
440 | continue
441 |
--------------------------------------------------------------------------------