├── gpq_downloader
├── icons
│ ├── parquet-download.png
│ └── parquet-download.svg
├── tests
│ ├── data
│ │ ├── geoparquet_with_metadata.parquet
│ │ └── non_geoparquet_with_geometry.parquet
│ ├── test_logger.py
│ ├── test_dialog.py
│ ├── test_integration.py
│ ├── conftest.py
│ ├── test_validation.py
│ ├── test_worker.py
│ ├── test_utils.py
│ ├── create_test_data.py
│ ├── test_plugin.py
│ └── test_non_geoparquet.py
├── logger.py
├── data
│ ├── formats.json
│ └── presets.json
├── metadata.txt
├── __init__.py
├── dialog.py
├── plugin.py
└── utils.py
├── pyproject.toml
├── .github
└── workflows
│ └── tests.yml
├── make_release.sh
├── README.md
├── .gitignore
└── LICENSE
/gpq_downloader/icons/parquet-download.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cholmes/qgis_plugin_gpq_downloader/HEAD/gpq_downloader/icons/parquet-download.png
--------------------------------------------------------------------------------
/gpq_downloader/tests/data/geoparquet_with_metadata.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cholmes/qgis_plugin_gpq_downloader/HEAD/gpq_downloader/tests/data/geoparquet_with_metadata.parquet
--------------------------------------------------------------------------------
/gpq_downloader/tests/data/non_geoparquet_with_geometry.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cholmes/qgis_plugin_gpq_downloader/HEAD/gpq_downloader/tests/data/non_geoparquet_with_geometry.parquet
--------------------------------------------------------------------------------
/gpq_downloader/tests/test_logger.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from gpq_downloader.logger import log
3 |
4 | def test_logger_basic():
5 | """Test basic logger functionality"""
6 | log("Test message")
7 | log("Test message", 1)
8 | log("Test message", 2)
9 |
10 | def test_logger_levels():
11 | """Test different logger levels"""
12 | log("Info message", 0)
13 | log("Warning message", 1)
14 | log("Error message", 2)
--------------------------------------------------------------------------------
/gpq_downloader/logger.py:
--------------------------------------------------------------------------------
1 | from qgis.core import Qgis, QgsMessageLog
2 |
3 |
4 | def log(message: str, level_in: int = 0):
5 | if level_in == 0:
6 | level = Qgis.MessageLevel.Info
7 | elif level_in == 1:
8 | level = Qgis.MessageLevel.Warning
9 | elif level_in == 2:
10 | level = Qgis.MessageLevel.Critical
11 | else:
12 | level = Qgis.MessageLevel.Info
13 |
14 | QgsMessageLog.logMessage(str(message), "GeoParquet Downloader", level)
15 |
--------------------------------------------------------------------------------
/gpq_downloader/data/formats.json:
--------------------------------------------------------------------------------
1 | {
2 | "GeoParquet (*.parquet)": {
3 | "extension": ".parquet",
4 | "format_options": "(FORMAT 'parquet', COMPRESSION 'ZSTD')"
5 | },
6 | "GeoPackage (*.gpkg)": {
7 | "extension": ".gpkg",
8 | "format_options": "(FORMAT GDAL, DRIVER 'GPKG', SRS 'EPSG:4326')"
9 | },
10 | "FlatGeobuf (*.fgb)": {
11 | "extension": ".fgb",
12 | "format_options": "(FORMAT GDAL, DRIVER 'FlatGeobuf', SRS 'EPSG:4326')"
13 | },
14 | "GeoJSON (*.geojson)": {
15 | "extension": ".geojson",
16 | "format_options": "(FORMAT GDAL, DRIVER 'GeoJSON', SRS 'EPSG:4326')"
17 | }
18 | }
--------------------------------------------------------------------------------
/gpq_downloader/tests/test_dialog.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from unittest.mock import MagicMock, patch
3 | from qgis.PyQt.QtWidgets import QDialog
4 | from qgis.PyQt.QtCore import Qt
5 |
6 | from gpq_downloader.dialog import DataSourceDialog
7 |
8 | def test_dialog_initialization(qgs_app, mock_iface):
9 | """Test dialog initialization"""
10 | dialog = DataSourceDialog(None, mock_iface)
11 | assert dialog is not None
12 | assert dialog.iface == mock_iface
13 |
14 | def test_dialog_radio_buttons(qgs_app, mock_iface):
15 | """Test radio button functionality"""
16 | dialog = DataSourceDialog(None, mock_iface)
17 |
18 | # Set Overture radio to checked (since it might not be default)
19 | dialog.overture_radio.setChecked(True)
20 |
21 | # Check state after explicitly setting
22 | assert dialog.overture_radio.isChecked()
23 | assert not dialog.sourcecoop_radio.isChecked()
24 | assert not dialog.osm_radio.isChecked()
25 |
26 | # Test switching radio buttons
27 | dialog.sourcecoop_radio.setChecked(True)
28 | assert not dialog.overture_radio.isChecked()
29 | assert dialog.sourcecoop_radio.isChecked()
30 | assert not dialog.osm_radio.isChecked()
31 |
32 | @patch('gpq_downloader.dialog.QgsSettings')
33 | def test_dialog_settings_saved(mock_settings, qgs_app, mock_iface):
34 | """Test that settings are saved"""
35 | dialog = DataSourceDialog(None, mock_iface)
36 | dialog.save_checkbox_states()
37 | mock_settings.assert_called()
--------------------------------------------------------------------------------
/gpq_downloader/tests/test_integration.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import os
3 | import sys
4 | from qgis.core import QgsProject, QgsVectorLayer
5 | from qgis.PyQt.QtWidgets import QApplication
6 |
7 | from gpq_downloader.plugin import QgisPluginGeoParquet
8 |
9 | @pytest.mark.skipif(not os.environ.get('RUN_INTEGRATION_TESTS'), reason="Integration tests not enabled")
10 | def test_plugin_load(qgs_app, mock_iface):
11 | """Test that plugin loads properly"""
12 | plugin = QgisPluginGeoParquet(mock_iface)
13 | assert plugin is not None
14 |
15 | # Initialize plugin
16 | plugin.initGui()
17 |
18 | # Check that actions were created
19 | assert plugin.action is not None
20 |
21 | @pytest.mark.skipif(not os.environ.get('RUN_INTEGRATION_TESTS'), reason="Integration tests not enabled")
22 | def test_plugin_unload(qgs_app, mock_iface):
23 | """Test that plugin unloads properly"""
24 | plugin = QgisPluginGeoParquet(mock_iface)
25 | plugin.initGui()
26 |
27 | # Unload the plugin
28 | plugin.unload()
29 |
30 | # Check that cleanup was successful
31 | assert plugin.worker is None
32 | assert plugin.worker_thread is None
33 |
34 | @pytest.mark.skipif(not os.environ.get('RUN_INTEGRATION_TESTS'), reason="Integration tests not enabled")
35 | def test_plugin_download_dir(qgs_app, mock_iface):
36 | """Test that plugin creates download directory"""
37 | plugin = QgisPluginGeoParquet(mock_iface)
38 |
39 | # Check that download directory exists
40 | assert plugin.download_dir.exists()
41 | assert plugin.download_dir.is_dir()
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=42", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "gpq_downloader"
7 | version = "0.8.5"
8 | description = "QGIS plugin for downloading and processing GeoParquet files"
9 | readme = "README.md"
10 | requires-python = ">=3.7"
11 | license = {text = "GPL-2.0-or-later"}
12 | authors = [
13 | {name = "Chris Holmes", email = "cholmes@9eo.org"}
14 | ]
15 | dependencies = [
16 | "duckdb>=1.1.0",
17 | ]
18 |
19 | [project.urls]
20 | "Homepage" = "https://github.com/cholmes/qgis_plugin_gpq_downloader"
21 | "Bug Tracker" = "https://github.com/yourusername/qgis_plugin_gpq_downloader/issues"
22 |
23 | [tool.setuptools]
24 | packages = ["gpq_downloader"]
25 |
26 | [tool.pytest.ini_options]
27 | testpaths = ["gpq_downloader/tests"]
28 | python_files = "test_*.py"
29 | addopts = "--cov=gpq_downloader"
30 |
31 | [tool.coverage.run]
32 | source = ["gpq_downloader"]
33 | omit = ["gpq_downloader/tests/*"]
34 |
35 | [tool.coverage.report]
36 | exclude_lines = [
37 | "pragma: no cover",
38 | "def __repr__",
39 | "raise NotImplementedError",
40 | "if __name__ == .__main__.:",
41 | "pass",
42 | "raise ImportError",
43 | ]
44 |
45 | [tool.black]
46 | line-length = 88
47 | target-version = ['py37', 'py38', 'py39', 'py310']
48 | include = '\.pyi?$'
49 |
50 | [project.optional-dependencies]
51 | dev = [
52 | "pytest>=7.4.0",
53 | "pytest-qt>=4.2.0",
54 | "pytest-mock>=3.11.1",
55 | "pytest-cov>=4.1.0",
56 | "pyarrow>=10.0.0",
57 | "black>=23.3.0",
58 | ]
59 |
60 | [tool.setuptools.package-data]
61 | gpq_downloader = ["data/*.json"]
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: Tests
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 | pull_request:
7 | branches: [ main ]
8 |
9 | jobs:
10 | test:
11 | runs-on: ubuntu-latest
12 | container:
13 | image: qgis/qgis:release-3_34 # QGIS docker image with QGIS 3.34
14 |
15 | steps:
16 | - uses: actions/checkout@v2
17 |
18 | - name: Install Python dependencies
19 | run: |
20 | python3 -m pip install --upgrade pip
21 | pip3 install pytest pytest-qt
22 | # Install in development mode to ensure data files are available
23 | pip3 install -e .[dev]
24 |
25 | - name: Debug package installation
26 | run: |
27 | # Print out installed package location
28 | python3 -c "import gpq_downloader; print(gpq_downloader.__file__)"
29 | # Check if data directory exists
30 | ls -la $(python3 -c "import gpq_downloader; import os; print(os.path.dirname(gpq_downloader.__file__))")/data || echo "Data directory not found"
31 |
32 | - name: Create data directory if missing
33 | run: |
34 | # Create data directory if it doesn't exist
35 | PACKAGE_DIR=$(python3 -c "import gpq_downloader; import os; print(os.path.dirname(gpq_downloader.__file__))")
36 | mkdir -p $PACKAGE_DIR/data
37 | # If presets.json doesn't exist, create a minimal version
38 | if [ ! -f "$PACKAGE_DIR/data/presets.json" ]; then
39 | echo '{"datasets": {}}' > $PACKAGE_DIR/data/presets.json
40 | echo "Created minimal presets.json file at $PACKAGE_DIR/data/presets.json"
41 | fi
42 |
43 | - name: Run tests with xvfb
44 | run: |
45 | # Make sure xvfb is installed in container
46 | apt-get update && apt-get install -y xvfb
47 |
48 | # Run tests with virtual display
49 | xvfb-run --auto-servernum --server-args="-screen 0 1024x768x24" pytest
--------------------------------------------------------------------------------
/make_release.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Function to extract version from metadata.txt
4 | get_version_from_metadata() {
5 | if [ -f "gpq_downloader/metadata.txt" ]; then
6 | VERSION=$(grep "^version=" gpq_downloader/metadata.txt | cut -d'=' -f2 | tr -d '[:space:]')
7 | if [ -n "$VERSION" ]; then
8 | echo "Found version $VERSION in metadata.txt"
9 | return 0
10 | fi
11 | fi
12 | echo "Warning: Could not extract version from metadata.txt"
13 | return 1
14 | }
15 |
16 | # Get version from command line argument or metadata.txt or use date
17 | if [ -n "$1" ]; then
18 | VERSION=$1
19 | echo "Using provided version: $VERSION"
20 | else
21 | if ! get_version_from_metadata; then
22 | VERSION=$(date +"%Y%m%d")
23 | echo "Using date-based version: $VERSION"
24 | fi
25 | fi
26 |
27 | ZIP_FILENAME="gpq_downloader_${VERSION}.zip"
28 | TEMP_DIR=$(mktemp -d)
29 |
30 | echo "Creating release zip: ${ZIP_FILENAME}"
31 |
32 | # Create a temporary directory with the renamed plugin
33 | echo "Creating temporary directory with renamed plugin..."
34 | cp -r gpq_downloader/ "${TEMP_DIR}/qgis_plugin_gpq_downloader"
35 |
36 | # Copy LICENSE file if it exists
37 | if [ -f "LICENSE" ]; then
38 | echo "Copying LICENSE file..."
39 | cp LICENSE "${TEMP_DIR}/qgis_plugin_gpq_downloader/"
40 | else
41 | echo "Warning: LICENSE file not found"
42 | fi
43 |
44 | # Navigate to the temp directory
45 | cd "${TEMP_DIR}"
46 |
47 | # Create zip file excluding unwanted files
48 | echo "Creating zip file..."
49 | zip -r "${ZIP_FILENAME}" qgis_plugin_gpq_downloader/ \
50 | -x "*.DS_Store" "*.gitignore" "*/.git/*" "*/__pycache__/*" "*.pyc" "*.pyo" "*.zip" "*/tests/*"
51 |
52 | # Move the zip file back to the original directory
53 | mv "${ZIP_FILENAME}" "${OLDPWD}/"
54 |
55 | # Clean up
56 | cd "${OLDPWD}"
57 | rm -rf "${TEMP_DIR}"
58 |
59 | echo "Release zip created: ${ZIP_FILENAME}"
60 | echo "You can now upload this file to the QGIS Plugin Repository."
--------------------------------------------------------------------------------
/gpq_downloader/metadata.txt:
--------------------------------------------------------------------------------
1 | [general]
2 | name=GeoParquet Downloader (Overture, Source Coop & Custom Cloud)
3 | qgisMinimumVersion=3.16
4 | qgisMaximumVersion=4.99.0
5 | version=0.8.5
6 | supportsQt6=yes
7 | icon=icons/parquet-download.png
8 | description=Plugin for downloading GeoParquet data from cloud sources.
9 | about=This plugin connects to cloud-based GeoParquet data and downloads the portion in the current viewport.
10 |
11 | The plugin comes with pre-configured sources for Overture
12 | Maps, Source Cooperative, and you can enter the location
13 | of any online GeoParquet file or partition. It works best with
14 | the bbox struct from GeoParquet 1.1, but any GeoParquet file
15 | will work. You can save the output data as GeoParquet,
16 | GeoPackage, DuckDB, FlatGeobuf, or GeoJSON.
17 |
18 | The plugin does not require that your QGIS supports
19 | GeoParquet, as you can download data as GeoPackage, but
20 | GeoParquet generally works better (faster and better nested
21 | data). Most Windows installations come with it, and for Mac
22 | and Linux you can install via conda. For information on
23 | installing Geoparquet support see this wiki page.
24 |
25 | The plugin depends on DuckDB, which should be installed
26 | automatically when you install the plugin. If you have issues
27 | with DuckDB installing please file an issue on the GitHub issue tracker.
28 |
29 | tags=geoparquet,parquet,overture,source cooperative,cloud,duckdb,geopackage
30 |
31 | # credits and contact
32 | author=Chris Holmes
33 | email=cholmes@9eo.org
34 | homepage=https://github.com/cholmes/qgis_plugin_gpq_downloader/
35 | repository=https://github.com/cholmes/qgis_plugin_gpq_downloader/
36 | tracker=https://github.com/cholmes/qgis_plugin_gpq_downloader/issues
37 |
38 | [dependencies]
39 | pip_dependencies=duckdb>=1.1.0
--------------------------------------------------------------------------------
/gpq_downloader/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import pytest
4 | from qgis.core import QgsApplication, QgsCoordinateReferenceSystem, QgsRectangle
5 | from qgis.PyQt.QtCore import QCoreApplication, QObject
6 | from qgis.PyQt.QtWidgets import QMainWindow
7 |
8 | # Add the parent directory to sys.path
9 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10 |
11 | # Mock QGIS Application
12 | @pytest.fixture(scope="session")
13 | def qgs_app():
14 | """QGIS application fixture"""
15 | qgs_app = QgsApplication([], False)
16 | qgs_app.initQgis()
17 | yield qgs_app
18 | qgs_app.exitQgis()
19 |
20 | # Mock iface
21 | class MockIface(QObject):
22 | def __init__(self):
23 | super().__init__()
24 | self.canvas = MockCanvas()
25 | self._window = QMainWindow()
26 | self.toolbar_icons = [] # Add this to track added icons
27 |
28 | def mapCanvas(self):
29 | return self.canvas
30 |
31 | def mainWindow(self):
32 | return self._window
33 |
34 | def addToolBarIcon(self, action): # Add this method
35 | """Mock method for adding toolbar icons"""
36 | self.toolbar_icons.append(action)
37 |
38 | def removeToolBarIcon(self, action): # Add this method too
39 | """Mock method for removing toolbar icons"""
40 | if action in self.toolbar_icons:
41 | self.toolbar_icons.remove(action)
42 |
43 | class MockCanvas:
44 | def __init__(self):
45 | self.settings = MockMapSettings()
46 |
47 | def mapSettings(self):
48 | return self.settings
49 |
50 | def extent(self):
51 | return QgsRectangle(0, 0, 1, 1)
52 |
53 | class MockMapSettings:
54 | def destinationCrs(self):
55 | return QgsCoordinateReferenceSystem("EPSG:4326")
56 |
57 | @pytest.fixture
58 | def mock_iface():
59 | """Mock iface fixture"""
60 | return MockIface()
61 |
62 | # Sample test data
63 | @pytest.fixture
64 | def sample_bbox():
65 | """Sample bounding box fixture"""
66 | return QgsRectangle(1, 2, 3, 4)
67 |
68 | @pytest.fixture
69 | def sample_validation_results():
70 | """Sample validation results fixture"""
71 | return {
72 | "has_bbox": True,
73 | "bbox_column": "bbox",
74 | "geometry_column": "geometry",
75 | "schema": [
76 | ("id", "INTEGER", "YES", None, None, None),
77 | ("name", "VARCHAR", "YES", None, None, None),
78 | ("bbox", "STRUCT(xmin DOUBLE, ymin DOUBLE, xmax DOUBLE, ymax DOUBLE)", "YES", None, None, None),
79 | ("geometry", "GEOMETRY", "YES", None, None, None)
80 | ]
81 | }
82 |
83 | @pytest.fixture
84 | def sample_validation_results_no_bbox():
85 | """Sample validation results with no bbox fixture"""
86 | return {
87 | "has_bbox": False,
88 | "bbox_column": None,
89 | "geometry_column": "geometry",
90 | "schema": [
91 | ("id", "INTEGER", "YES", None, None, None),
92 | ("name", "VARCHAR", "YES", None, None, None),
93 | ("geometry", "GEOMETRY", "YES", None, None, None)
94 | ]
95 | }
--------------------------------------------------------------------------------
/gpq_downloader/tests/test_validation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from unittest.mock import MagicMock, patch
3 | import json
4 | import os
5 |
6 | from gpq_downloader.utils import ValidationWorker
7 |
8 | @patch("duckdb.connect")
9 | def test_validation_worker_with_bbox(mock_connect, mock_iface, sample_bbox):
10 | """Test the validation worker with a dataset that has a bbox column"""
11 | # Setup mock connection
12 | mock_conn = MagicMock()
13 | mock_conn.execute.return_value.fetchall.return_value = [
14 | ("id", "INTEGER", "YES", None, None, None),
15 | ("bbox", "STRUCT(xmin DOUBLE, ymin DOUBLE, xmax DOUBLE, ymax DOUBLE)", "YES", None, None, None),
16 | ("geometry", "GEOMETRY", "YES", None, None, None)
17 | ]
18 | mock_connect.return_value = mock_conn
19 |
20 | # Setup validation signals
21 | finished_signal_received = False
22 | validation_results = None
23 |
24 | def on_finished(success, message, results):
25 | nonlocal finished_signal_received, validation_results
26 | finished_signal_received = True
27 | validation_results = results
28 |
29 | # Create worker
30 | worker = ValidationWorker("https://example.com/test.parquet", mock_iface, sample_bbox)
31 | worker.finished.connect(on_finished)
32 |
33 | # Mock presets.json to return empty dict
34 | with patch.object(worker, 'PRESET_DATASETS', {}):
35 | worker.run()
36 |
37 | # Check results
38 | assert finished_signal_received
39 | assert validation_results["has_bbox"] is True
40 | assert validation_results["bbox_column"] == "bbox"
41 |
42 | @patch("duckdb.connect")
43 | def test_validation_worker_without_bbox(mock_connect, mock_iface, sample_bbox):
44 | """Test the validation worker with a dataset that has no bbox column"""
45 | # Setup mock connection
46 | mock_conn = MagicMock()
47 | mock_conn.execute.return_value.fetchall.return_value = [
48 | ("id", "INTEGER", "YES", None, None, None),
49 | ("geometry", "GEOMETRY", "YES", None, None, None)
50 | ]
51 | mock_connect.return_value = mock_conn
52 |
53 | # Setup validation signals
54 | warning_signal_received = False
55 | finished_signal_received = False
56 | validation_results = None
57 |
58 | def on_finished(success, message, results):
59 | nonlocal finished_signal_received, validation_results
60 | finished_signal_received = True
61 | validation_results = results
62 | print(f"Received validation results: {results}") # Add debug print
63 |
64 | def on_warning():
65 | nonlocal warning_signal_received
66 | warning_signal_received = True
67 | print("Warning signal received") # Add debug print
68 |
69 | # Create worker
70 | worker = ValidationWorker("https://example.com/test.parquet", mock_iface, sample_bbox)
71 | worker.finished.connect(on_finished)
72 | worker.needs_bbox_warning.connect(on_warning)
73 |
74 | # Mock presets.json to return empty dict
75 | with patch.object(worker, 'PRESET_DATASETS', {}):
76 | worker.run()
77 |
78 | # Check results
79 | assert finished_signal_received, "Finished signal was not emitted"
80 | assert validation_results is not None, "No validation results received"
81 | assert "has_bbox" in validation_results, f"has_bbox not in validation_results: {validation_results}"
82 | assert validation_results["has_bbox"] is False
83 | assert validation_results["bbox_column"] is None
84 | assert warning_signal_received, "Warning signal was not emitted"
--------------------------------------------------------------------------------
/gpq_downloader/tests/test_worker.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from unittest.mock import MagicMock, patch
3 | import os
4 | from qgis.PyQt.QtCore import QObject
5 |
6 | from gpq_downloader.utils import Worker
7 |
8 | class MockResult:
9 | def __init__(self, data):
10 | self.data = data
11 |
12 | def fetchall(self):
13 | return self.data
14 |
15 | def fetchone(self):
16 | return self.data[0] if self.data else None
17 |
18 | class MockConnection:
19 | def __init__(self, schema_data=None, count_result=1):
20 | self.schema_data = schema_data or []
21 | self.count_result = count_result
22 | self.executed_queries = []
23 |
24 | def execute(self, query):
25 | self.executed_queries.append(query)
26 | if "DESCRIBE" in query:
27 | return MockResult(self.schema_data)
28 | elif "COUNT" in query:
29 | return MockResult([(self.count_result,)])
30 | return MockResult([])
31 |
32 | def commit(self):
33 | pass
34 |
35 | def close(self):
36 | pass
37 |
38 | @pytest.fixture
39 | def schema_with_bbox():
40 | return [
41 | ("id", "INTEGER", "YES", None, None, None),
42 | ("bbox", "STRUCT(xmin DOUBLE, ymin DOUBLE, xmax DOUBLE, ymax DOUBLE)", "YES", None, None, None),
43 | ("geometry", "GEOMETRY", "YES", None, None, None)
44 | ]
45 |
46 | @pytest.fixture
47 | def schema_without_bbox():
48 | return [
49 | ("id", "INTEGER", "YES", None, None, None),
50 | ("geometry", "GEOMETRY", "YES", None, None, None)
51 | ]
52 |
53 | @patch("duckdb.connect")
54 | def test_worker_run_with_bbox(mock_connect, mock_iface, sample_bbox, tmp_path, sample_validation_results, schema_with_bbox):
55 | """Test Worker run method with a bbox column"""
56 | # Setup
57 | mock_conn = MockConnection(schema_data=schema_with_bbox)
58 | mock_connect.return_value = mock_conn
59 |
60 | # Create signals for testing
61 | progress_messages = []
62 |
63 | # Create worker
64 | worker = Worker(
65 | "https://example.com/test.parquet",
66 | sample_bbox,
67 | os.path.join(tmp_path, "output.gpkg"),
68 | mock_iface,
69 | sample_validation_results
70 | )
71 |
72 | # Connect to our test slots
73 | worker.progress.connect(lambda msg: progress_messages.append(msg))
74 |
75 | # Run the worker
76 | worker.run()
77 |
78 | # Check queries
79 | bbox_query_found = False
80 | for query in mock_conn.executed_queries:
81 | if '"bbox".xmin BETWEEN' in query:
82 | bbox_query_found = True
83 |
84 | assert bbox_query_found, "Should use bbox in the query"
85 | assert any("Downloading" in msg for msg in progress_messages)
86 |
87 | @patch("duckdb.connect")
88 | def test_worker_run_without_bbox(mock_connect, mock_iface, sample_bbox, tmp_path, sample_validation_results_no_bbox, schema_without_bbox):
89 | """Test Worker run method without a bbox column"""
90 | # Setup
91 | mock_conn = MockConnection(schema_data=schema_without_bbox)
92 | mock_connect.return_value = mock_conn
93 |
94 | # Create signals for testing
95 | progress_messages = []
96 |
97 | # Create worker with no bbox
98 | worker = Worker(
99 | "https://example.com/test.parquet",
100 | sample_bbox,
101 | os.path.join(tmp_path, "output.gpkg"),
102 | mock_iface,
103 | sample_validation_results_no_bbox
104 | )
105 |
106 | # Connect to our test slots
107 | worker.progress.connect(lambda msg: progress_messages.append(msg))
108 |
109 | # Run the worker
110 | worker.run()
111 |
112 | # Check queries
113 | st_intersects_found = False
114 | for query in mock_conn.executed_queries:
115 | if 'ST_Intersects' in query:
116 | st_intersects_found = True
117 |
118 | assert st_intersects_found, "Should use ST_Intersects in the query when no bbox column"
119 | assert any("Downloading" in msg for msg in progress_messages)
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # GeoParquet Downloader for QGIS
2 |
3 | This repo contains a QGIS plugin for downloading GeoParquet data from cloud sources, including Overture Maps, Source Cooperative, and the ability to enter the location of any online GeoParquet file or partition. Just the user's current viewport then gets downloaded, as GeoParquet, DuckDB or GeoPackage.
4 |
5 | 
6 |
7 |
8 | The core idea is that GeoParquet can act more like a 'server', letting users download only the data they need, if you add a bit more smarts to the client. So this plugin uses [DuckDB](https://duckdb.org/) but abstracts all the details of forming the right queries to external sources, so users can just pick the data they want and pull it down with ease. And with GeoPackage output users don't even need to know anything about GeoParquet. More info is on the [plugin homepage](https://plugins.qgis.org/plugins/qgis_plugin_gpq_downloader/).
9 |
10 |
11 | ## Installation
12 |
13 | The easiest way to install the plugin file is to use the QGIS plugin manager. Just go to `Plugins > Manage and Install Plugins`, click
14 | the 'install' tab and search for 'GeoParquet Downloader'. Click on 'Install Plugin' and it will install. Alternatively you can download the zip file from
15 | one of the [releases](https://github.com/cholmes/qgis_plugin_gpq_downloader/releases) and 'install from zip' in QGIS. For the plugin to work DuckDB
16 | needs to be installed. As of version 0.3 the plugin should try to automatically install DuckDB, but it doesn't work reliably. If you installed but don't see the
17 | icon below then it's likely because DuckDB isn't there.
18 |
19 | If the installation of DuckDB doesn't work, then on Windows you can use the [QDuckDB plugin](https://oslandia.gitlab.io/qgis/qduckdb/) which includes a precompiled binary.
20 | They also document how to install DuckDB on [Linux](https://oslandia.gitlab.io/qgis/qduckdb/usage/installation.html#linux) and
21 | [Mac OS/X](https://oslandia.gitlab.io/qgis/qduckdb/usage/installation.html#macos). If you're on Mac we recommend trying
22 | the [QGIS 4.0 mac build preview](https://github.com/opengisch/qgis-notarize/) which ships with DuckDB.
23 |
24 | See [metadata.txt](gpq_downloader/metadata.txt) for more installation notes.
25 |
26 | ## Usage
27 |
28 | The plugin will install 1 button on the "Plugin" QGIS toolbar, that you might have to enable through `View > Toolbars > Plugins`:
29 |
30 | 
31 |
32 | It opens a dialog box, that lets you select Overture and Source Cooperative, Hugging Face or 'custom' - where you
33 | can enter the location of any GeoParquet or partition file online.
34 |
35 |
36 |
37 |
38 | To use it move to an area where you'd like to download data and then select which layer you'd like to download. From there you can choose the output format (GeoParquet, GeoPackage, DuckDB, GeoJSON or FlatGeobuf) and the location to download the data to.
39 |
40 | Downloads can sometimes take awhile, especially if the data provider hasn't optimized their GeoParquet files very well, or if you're downloading an area with a lot of data. Overture is one of the faster ones for now, others may take a minute or two. But it should most always be faster than trying to figure out exactly which files you need and downloading them manually.
41 |
42 | For now we only support downloading into the current viewport, but hope to [improve that](https://github.com/cholmes/qgis_plugin_gpq_downloader/issues/10). Note also that right now only lat/long data is supported, but we also hope to [support it](https://github.com/cholmes/qgis_plugin_gpq_downloader/issues/102).
43 |
44 | If your QGIS doesn't have GeoParquet support you'll get a warning dialog after the data downloads completes. The GeoParquet will be there, but it won't automatically open on the map. We definitely recommend getting your QGIS working with GeoParquet, as the format is faster and handles nested attributes better. See [Installing GeoParquet Support in QGIS](https://github.com/cholmes/qgis_plugin_gpq_downloader/wiki/Installing-GeoParquet-Support-in-QGIS) for more details.
45 |
46 |
47 | ## Contributing
48 |
49 | This plugin has been made entirely with AI coding tools (primarily Cursor with claude-3.5-sonnet). Contributions are very welcome, both from more experienced python developers who can help clean up the code and add missing features, and from anyone who wants a place to do AI-assisted coding that (hopefully) actually gets widely used.
50 |
51 | I'm interested in exploring open source collaboration in the age of AI coding tools, especially working with less experienced developers who'd like to contribute, so don't hesitate to jump in with AI-assisted pull requests.
52 |
53 | And any help on ideas/feedback, documentation, testing, promoting, etc. is very welcome!
54 |
55 |
56 |
57 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 | *.zip
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 | cover/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | db.sqlite3
63 | db.sqlite3-journal
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 |
75 | # PyBuilder
76 | .pybuilder/
77 | target/
78 |
79 | # Jupyter Notebook
80 | .ipynb_checkpoints
81 |
82 | # IPython
83 | profile_default/
84 | ipython_config.py
85 |
86 | # pyenv
87 | # For a library or package, you might want to ignore these files since the code is
88 | # intended to run in multiple environments; otherwise, check them in:
89 | # .python-version
90 |
91 | # pipenv
92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
95 | # install all needed dependencies.
96 | #Pipfile.lock
97 |
98 | # poetry
99 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100 | # This is especially recommended for binary packages to ensure reproducibility, and is more
101 | # commonly ignored for libraries.
102 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103 | #poetry.lock
104 |
105 | # pdm
106 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107 | #pdm.lock
108 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109 | # in version control.
110 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
111 | .pdm.toml
112 | .pdm-python
113 | .pdm-build/
114 |
115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116 | __pypackages__/
117 |
118 | # Celery stuff
119 | celerybeat-schedule
120 | celerybeat.pid
121 |
122 | # SageMath parsed files
123 | *.sage.py
124 |
125 | # Environments
126 | .env
127 | .venv
128 | env/
129 | venv/
130 | ENV/
131 | env.bak/
132 | venv.bak/
133 |
134 | # Spyder project settings
135 | .spyderproject
136 | .spyproject
137 |
138 | # Rope project settings
139 | .ropeproject
140 |
141 | # mkdocs documentation
142 | /site
143 |
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 |
149 | # Pyre type checker
150 | .pyre/
151 |
152 | # pytype static type analyzer
153 | .pytype/
154 |
155 | # Cython debug symbols
156 | cython_debug/
157 |
158 | # mac stuff
159 | .DS_Store
160 |
161 | # PyCharm
162 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
163 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
164 | # and can be added to the global gitignore or merged into this file. For a more nuclear
165 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
166 | #.idea/
167 |
168 | # --> This is the recommended way
169 |
170 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
171 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
172 |
173 | # User-specific stuff
174 | .idea/**/workspace.xml
175 | .idea/**/tasks.xml
176 | .idea/**/usage.statistics.xml
177 | .idea/**/dictionaries
178 | .idea/**/shelf
179 |
180 | # AWS User-specific
181 | .idea/**/aws.xml
182 |
183 | # Generated files
184 | .idea/**/contentModel.xml
185 |
186 | # Sensitive or high-churn files
187 | .idea/**/dataSources/
188 | .idea/**/dataSources.ids
189 | .idea/**/dataSources.local.xml
190 | .idea/**/sqlDataSources.xml
191 | .idea/**/dynamic.xml
192 | .idea/**/uiDesigner.xml
193 | .idea/**/dbnavigator.xml
194 |
195 | # Gradle
196 | .idea/**/gradle.xml
197 | .idea/**/libraries
198 |
199 | # Gradle and Maven with auto-import
200 | # When using Gradle or Maven with auto-import, you should exclude module files,
201 | # since they will be recreated, and may cause churn. Uncomment if using
202 | # auto-import.
203 | # .idea/artifacts
204 | # .idea/compiler.xml
205 | # .idea/jarRepositories.xml
206 | # .idea/modules.xml
207 | # .idea/*.iml
208 | # .idea/modules
209 | # *.iml
210 | # *.ipr
211 |
212 | # CMake
213 | cmake-build-*/
214 |
215 | # Mongo Explorer plugin
216 | .idea/**/mongoSettings.xml
217 |
218 | # File-based project format
219 | *.iws
220 |
221 | # IntelliJ
222 | out/
223 |
224 | # mpeltonen/sbt-idea plugin
225 | .idea_modules/
226 |
227 | # JIRA plugin
228 | atlassian-ide-plugin.xml
229 |
230 | # Cursive Clojure plugin
231 | .idea/replstate.xml
232 |
233 | # SonarLint plugin
234 | .idea/sonarlint/
235 |
236 | # Crashlytics plugin (for Android Studio and IntelliJ)
237 | com_crashlytics_export_strings.xml
238 | crashlytics.properties
239 | crashlytics-build.properties
240 | fabric.properties
241 |
242 | # Editor-based Rest Client
243 | .idea/httpRequests
244 |
245 | # Android studio 3.1+ serialized cache file
246 | .idea/caches/build_file_checksums.ser
247 |
248 | .claude/
--------------------------------------------------------------------------------
/gpq_downloader/data/presets.json:
--------------------------------------------------------------------------------
1 | {
2 | "overture": {
3 | "buildings": {
4 | "url_template": "s3://overturemaps-us-west-2/release/{release}/theme=buildings/type=building/*",
5 | "info_url": "https://docs.overturemaps.org/reference/buildings",
6 | "needs_validation": false
7 | },
8 | "places": {
9 | "url_template": "s3://overturemaps-us-west-2/release/{release}/theme=places/type=place/*",
10 | "info_url": "https://docs.overturemaps.org/reference/places",
11 | "needs_validation": false
12 | },
13 | "transportation": {
14 | "url_template": "s3://overturemaps-us-west-2/release/{release}/theme=transportation/type=segment/*",
15 | "info_url": "https://docs.overturemaps.org/reference/transportation",
16 | "needs_validation": false
17 | },
18 | "addresses": {
19 | "url_template": "s3://overturemaps-us-west-2/release/{release}/theme=addresses/type=*/*",
20 | "info_url": "https://docs.overturemaps.org/reference/addresses",
21 | "needs_validation": false
22 | },
23 | "base": {
24 | "url_template": "s3://overturemaps-us-west-2/release/{release}/theme=base/type={subtype}/*",
25 | "info_url": "https://docs.overturemaps.org/reference/base",
26 | "needs_validation": false,
27 | "subtypes": [
28 | "infrastructure",
29 | "land",
30 | "land_cover",
31 | "land_use",
32 | "water",
33 | "bathymetry"
34 | ]
35 | },
36 | "divisions": {
37 | "url_template": "s3://overturemaps-us-west-2/release/{release}/theme=divisions/type=division_area/*",
38 | "info_url": "https://docs.overturemaps.org/reference/administrative",
39 | "needs_validation": false
40 | }
41 | },
42 | "source_cooperative": {
43 | "vida_buildings": {
44 | "url": "s3://us-west-2.opendata.source.coop/vida/google-microsoft-osm-open-buildings/geoparquet/by_country/*/*.parquet",
45 | "info_url": "https://source.coop/vida/google-microsoft-osm-open-buildings",
46 | "needs_validation": false,
47 | "display_name": "VIDA Google/Microsoft/OSM Buildings"
48 | },
49 | "microsoft_ml_roads": {
50 | "url": "s3://us-west-2.opendata.source.coop/nlebovits/microsoft-ml-road-detections/by_country/*/*.parquet",
51 | "info_url": "https://source.coop/nlebovits/microsoft-ml-road-detections",
52 | "needs_validation": false,
53 | "display_name": "Microsoft ML Road Detections"
54 | },
55 | "globalbuildingatlas": {
56 | "url": "s3://us-west-2.opendata.source.coop/tge-labs/globalbuildingatlas-lod1/*.parquet",
57 | "info_url": "https://source.coop/tge-labs/globalbuildingatlas-lod1",
58 | "needs_validation": false,
59 | "display_name": "GlobalBuildingAtlas"
60 | },
61 | "openbuildingmap": {
62 | "url": "s3://us-west-2.opendata.source.coop/tge-labs/openbuildingmap/*.parquet",
63 | "info_url": "https://source.coop/tge-labs/openbuildingmap",
64 | "needs_validation": false,
65 | "display_name": "OpenBuildingMap"
66 | },
67 | "fsq_places_fused": {
68 | "url": "s3://us-west-2.opendata.source.coop/fused/fsq-os-places/2025-02-06/places/*.parquet",
69 | "info_url": "https://source.coop/fused/fsq-os-places",
70 | "needs_validation": false,
71 | "display_name": "Foursquare Open Source Places - Fused-partitioned"
72 | },
73 | "us_structures": {
74 | "url": "s3://us-west-2.opendata.source.coop/wherobots/usa-structures/geoparquet/*.parquet",
75 | "info_url": "https://source.coop/wherobots/usa-structures/geoparquet",
76 | "needs_validation": false,
77 | "display_name": "US Structures from ORNL by Wherobots"
78 | },
79 | "planet_eu_boundaries": {
80 | "url": "https://data.source.coop/planet/eu-field-boundaries/field_boundaries.parquet",
81 | "info_url": "https://source.coop/planet/eu-field-boundaries",
82 | "needs_validation": false,
83 | "display_name": "Planet EU Field Boundaries (2022)"
84 | },
85 | "usda_crop": {
86 | "url": "https://data.source.coop/fiboa/us-usda-cropland/us_usda_cropland.parquet",
87 | "info_url": "https://source.coop/fiboa/us-usda-cropland",
88 | "needs_validation": false,
89 | "display_name": "USDA Crop Sequence Boundaries"
90 | },
91 | "nhd_flowlines": {
92 | "url": "https://data.source.coop/cholmes/nhd/NHDFlowline.parquet",
93 | "info_url": "https://source.coop/cholmes/nhd",
94 | "needs_validation": true,
95 | "display_name": "NHD Flowlines (experimental)"
96 | }
97 | },
98 | "openstreetmap": {
99 | "buildings": {
100 | "url": "https://data.openstreetmap.us/layercake/buildings.parquet",
101 | "info_url": "https://data.openstreetmap.us/",
102 | "needs_validation": false
103 | },
104 | "boundaries": {
105 | "url": "https://data.openstreetmap.us/layercake/boundaries.parquet",
106 | "info_url": "https://data.openstreetmap.us/",
107 | "needs_validation": false
108 | },
109 | "highways": {
110 | "url": "https://data.openstreetmap.us/layercake/highways.parquet",
111 | "info_url": "https://data.openstreetmap.us/",
112 | "needs_validation": false
113 | },
114 | "settlements": {
115 | "url": "https://data.openstreetmap.us/layercake/settlements.parquet",
116 | "info_url": "https://data.openstreetmap.us/",
117 | "needs_validation": false
118 | }
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/gpq_downloader/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import platform
3 | import subprocess
4 | import sys
5 | import shutil
6 | from qgis.PyQt.QtWidgets import QProgressBar, QMessageBox
7 | from qgis.PyQt.QtCore import QCoreApplication, QTimer
8 | from qgis.core import QgsTask, QgsApplication, QgsSettings
9 | from qgis.utils import iface, loadPlugin, startPlugin, unloadPlugin, plugins
10 |
11 | from . import logger
12 |
13 | # Global flag to track installation status
14 | _duckdb_ready = False
15 |
16 |
17 | class DuckDBInstallerTask(QgsTask):
18 | def __init__(self, callback):
19 | # Simple initialization with just CanCancel flag
20 | super().__init__("Installing DuckDB", QgsTask.CanCancel)
21 | self.success = False
22 | self.message = ""
23 | self.exception = None
24 | self.callback = callback
25 | # logger.log("Task initialized")
26 |
27 | def run(self):
28 | # logger.log("Task run method started")
29 | try:
30 | logger.log("Starting DuckDB installation...")
31 | if platform.system() == "Windows":
32 | py_path = os.path.join(os.path.dirname(sys.executable), "python.exe")
33 | elif platform.system() == "Darwin":
34 | qgis_bin = os.path.dirname(sys.executable)
35 | possible_paths = [
36 | os.path.join(qgis_bin, "python3"),
37 | os.path.join(qgis_bin, "bin", "python3"),
38 | os.path.join(qgis_bin, "Resources", "python", "bin", "python3"),
39 | ]
40 | py_path = next(
41 | (path for path in possible_paths if os.path.exists(path)),
42 | sys.executable,
43 | )
44 | else:
45 | py_path = sys.executable
46 |
47 | # logger.log(f"Using Python path: {py_path}")
48 | # logger.log(f"Running pip install command...")
49 |
50 | subprocess.check_call([py_path, "-m", "pip", "install", "--user", "duckdb"])
51 |
52 | # logger.log("Pip install completed, reloading modules...")
53 | import importlib
54 |
55 | importlib.invalidate_caches()
56 |
57 | self.success = True
58 | self.message = "DuckDB installed successfully"
59 | return True
60 |
61 | except subprocess.CalledProcessError as e:
62 | self.exception = e
63 | self.message = f"Pip install failed: {str(e)}"
64 | logger.log(f"Installation failed with error: {str(e)}")
65 | return False
66 | except Exception as e:
67 | self.exception = e
68 | self.message = f"Failed to install/upgrade DuckDB: {str(e)}"
69 | logger.log(f"Installation failed with error: {str(e)}", 2)
70 | return False
71 |
72 | def finished(self, result):
73 | global _duckdb_ready
74 | msg_bar = iface.messageBar()
75 | msg_bar.clearWidgets()
76 |
77 | if result and self.success:
78 | try:
79 | import duckdb
80 |
81 | self.message = f"DuckDB {duckdb.__version__} installed successfully"
82 | except ImportError:
83 | pass
84 | msg_bar.pushSuccess("Success", self.message)
85 | logger.log(self.message)
86 | _duckdb_ready = True
87 | if self.callback:
88 | self.callback()
89 | else:
90 | msg_bar.pushCritical("Error", self.message)
91 | logger.log(self.message)
92 | _duckdb_ready = False
93 |
94 |
95 | def ensure_duckdb(callback=None):
96 | try:
97 | import duckdb
98 |
99 | version = duckdb.__version__
100 | from packaging import version as version_parser
101 |
102 | if version_parser.parse(version) >= version_parser.parse("1.1.0"):
103 | logger.log(f"DuckDB {version} already installed")
104 | global _duckdb_ready
105 | _duckdb_ready = True
106 | if callback:
107 | callback()
108 | return True
109 | else:
110 | logger.log(f"DuckDB {version} found but needs upgrade to 1.1.0+", 2)
111 | raise ImportError("Version too old")
112 |
113 | except ImportError:
114 | logger.log("DuckDB not found or needs upgrade, attempting to install/upgrade...", 2)
115 | try:
116 | msg_bar = iface.messageBar()
117 | progress = QProgressBar()
118 | progress.setMinimum(0)
119 | progress.setMaximum(0)
120 | progress.setValue(0)
121 |
122 | msg = msg_bar.createMessage("Installing DuckDB...")
123 | msg.layout().addWidget(progress)
124 | msg_bar.pushWidget(msg)
125 | QCoreApplication.processEvents()
126 |
127 | # Create and start the task
128 | task = DuckDBInstallerTask(callback)
129 | # logger.log("Created installer task")
130 |
131 | # Get the task manager and add the task
132 | task_manager = QgsApplication.taskManager()
133 | # logger.log(f"Task manager has {task_manager.count()} tasks")
134 |
135 | # Add task and check if it was added successfully
136 | task_manager.addTask(task)
137 | # logger.log(f"Task added successfully: {success}")
138 |
139 | # Check task status
140 | # logger.log(f"Task manager now has {task_manager.count()} tasks")
141 | # logger.log(f"Task description: {task.description()}")
142 | # logger.log(f"Task status: {task.status()}")
143 |
144 | # Schedule periodic status checks with guarded access
145 | def check_status():
146 | try:
147 | status = task.status()
148 | except RuntimeError:
149 | # logger.log("Task has been deleted, stopping status checks")
150 | return
151 |
152 | # logger.log(f"Current task status: {status}")
153 | if status == QgsTask.Queued:
154 | # logger.log("Task still queued, retriggering...")
155 | try:
156 | QgsApplication.taskManager().triggerTask(task)
157 | except RuntimeError:
158 | logger.log("Failed to trigger task, object likely deleted")
159 | return
160 | QTimer.singleShot(1000, check_status)
161 | elif status == QgsTask.Running:
162 | # logger.log("Task is running")
163 | QTimer.singleShot(1000, check_status)
164 | elif status == QgsTask.Complete:
165 | logger.log("Task completed")
166 |
167 | # Start checking status after a short delay
168 | QTimer.singleShot(100, check_status)
169 |
170 | return True
171 |
172 | except Exception as e:
173 | msg_bar.clearWidgets()
174 | msg_bar.pushCritical("Error", f"Failed to install/upgrade DuckDB: {str(e)}", 2)
175 | logger.log(f"Failed to setup task with error: {str(e)}", 2)
176 | logger.log(f"Error type: {type(e)}", 2)
177 | import traceback
178 |
179 | logger.log(f"Traceback: {traceback.format_exc()}", 2)
180 | return False
181 |
182 |
183 | def classFactory(iface):
184 | """Load the plugin class."""
185 | from .plugin import QgisPluginGeoParquet
186 | return QgisPluginGeoParquet(iface)
187 |
188 |
189 |
--------------------------------------------------------------------------------
/gpq_downloader/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from unittest.mock import MagicMock, patch
3 | import os
4 | from qgis.core import QgsRectangle, QgsCoordinateReferenceSystem
5 | from pathlib import Path
6 |
7 | from gpq_downloader.utils import (
8 | transform_bbox_to_4326,
9 | Worker,
10 | ValidationWorker
11 | )
12 |
13 | # Add new test for file size estimation
14 | def test_estimate_file_size(mock_iface, sample_bbox, tmp_path):
15 | """Test file size estimation for GeoJSON output"""
16 | # Create mock connection and cursor
17 | mock_conn = MagicMock()
18 | mock_conn.execute.return_value.fetchone.side_effect = [
19 | (1000,), # row count
20 | (2000.0,) # avg feature size
21 | ]
22 |
23 | # Create worker
24 | worker = Worker(
25 | "https://example.com/test.parquet",
26 | sample_bbox,
27 | str(tmp_path / "test.geojson"),
28 | mock_iface,
29 | {"has_bbox": True, "bbox_column": "bbox"}
30 | )
31 |
32 | # Test size estimation
33 | estimated_size = worker.estimate_file_size(mock_conn, "test_table")
34 | assert estimated_size > 0
35 | assert isinstance(estimated_size, float)
36 |
37 | # Add test for process_schema_columns
38 | def test_process_schema_columns():
39 | """Test schema column processing for different data types"""
40 | # Create worker
41 | worker = Worker(
42 | "https://example.com/test.parquet",
43 | QgsRectangle(0, 0, 1, 1),
44 | "test.parquet",
45 | MagicMock(),
46 | {"has_bbox": True}
47 | )
48 |
49 | # Test different column types
50 | schema_result = [
51 | ("id", "INTEGER", "YES", None, None, None),
52 | ("tags", "MAP(VARCHAR, VARCHAR)", "YES", None, None, None),
53 | ("names", "STRUCT(primary VARCHAR)", "YES", None, None, None),
54 | ("categories", "VARCHAR[]", "YES", None, None, None),
55 | ("small_num", "UTINYINT", "YES", None, None, None),
56 | ("geometry", "GEOMETRY", "YES", None, None, None)
57 | ]
58 |
59 | columns = worker.process_schema_columns(schema_result)
60 |
61 | assert len(columns) == 6
62 | assert 'TO_JSON("tags")' in columns[1]
63 | assert 'TO_JSON("names")' in columns[2]
64 | assert 'array_to_string("categories"' in columns[3]
65 | assert 'CAST("small_num" AS INTEGER)' in columns[4]
66 |
67 | # Add test for ValidationWorker metadata parsing
68 | @patch('duckdb.connect')
69 | def test_validation_worker_metadata_parsing(mock_connect, mock_iface):
70 | """Test GeoParquet metadata parsing in ValidationWorker"""
71 | # Mock connection with metadata
72 | mock_conn = MagicMock()
73 | mock_conn.execute.return_value.fetchall.return_value = [
74 | (b"geo", b'{"columns":{"geometry":{"encoding":"WKB","geometry_types":["Point"],"covering":{"bbox":{"xmin":[0],"ymin":[0],"xmax":[1],"ymax":[1]}}}}}')
75 | ]
76 | mock_connect.return_value = mock_conn
77 |
78 | worker = ValidationWorker(
79 | "https://example.com/test.parquet",
80 | mock_iface,
81 | QgsRectangle(0, 0, 1, 1)
82 | )
83 |
84 | # Test metadata parsing
85 | bbox_column = worker.check_bbox_metadata(mock_conn)
86 | assert bbox_column is not None
87 |
88 | # Add test for needs_validation method
89 | def test_validation_worker_needs_validation():
90 | """Test needs_validation logic for different URLs"""
91 | worker = ValidationWorker(
92 | "https://example.com/test.parquet",
93 | MagicMock(),
94 | QgsRectangle(0, 0, 1, 1)
95 | )
96 |
97 | # Test custom URL
98 | assert worker.needs_validation() == True
99 |
100 | # Test Overture URL
101 | worker.dataset_url = "s3://overturemaps-us-west-2/release/2025-10-22.0/theme=buildings"
102 | assert worker.needs_validation() == False
103 |
104 | # Test Source Cooperative URL with validation flag
105 | worker.PRESET_DATASETS = {
106 | "source_cooperative": {
107 | "test_dataset": {
108 | "url": "https://example.com/test.parquet",
109 | "needs_validation": False
110 | }
111 | }
112 | }
113 | worker.dataset_url = "https://example.com/test.parquet"
114 | assert worker.needs_validation() == False
115 |
116 | # Add test for transform_bbox_to_4326 with invalid inputs
117 | def test_transform_bbox_invalid_inputs(qgs_app):
118 | """Test bbox transformation with invalid inputs"""
119 | # Test with None extent
120 | assert transform_bbox_to_4326(None, QgsCoordinateReferenceSystem("EPSG:4326")) is None
121 |
122 | # Test with None CRS
123 | assert transform_bbox_to_4326(QgsRectangle(0, 0, 1, 1), None) is None
124 |
125 | # Test with invalid CRS
126 | invalid_crs = QgsCoordinateReferenceSystem()
127 | assert not invalid_crs.isValid()
128 | result = transform_bbox_to_4326(QgsRectangle(0, 0, 1, 1), invalid_crs)
129 | assert isinstance(result, QgsRectangle)
130 |
131 | # Add test for Worker initialization with layer name
132 | def test_worker_initialization_with_layer_name(mock_iface, sample_bbox, tmp_path):
133 | """Test Worker initialization with optional layer name"""
134 | worker = Worker(
135 | "https://example.com/test.parquet",
136 | sample_bbox,
137 | str(tmp_path / "test.parquet"),
138 | mock_iface,
139 | {"has_bbox": True},
140 | layer_name="Test Layer"
141 | )
142 |
143 | assert worker.layer_name == "Test Layer"
144 | assert not worker.size_warning_accepted
145 | assert not worker.killed
146 |
147 | def test_transform_bbox_to_4326(qgs_app):
148 | """Test transforming a bounding box to EPSG:4326"""
149 | # Create test bbox in EPSG:3857
150 | source_crs = QgsCoordinateReferenceSystem("EPSG:3857")
151 | input_bbox = QgsRectangle(1000000, 2000000, 1010000, 2010000)
152 |
153 | # Transform
154 | result_bbox = transform_bbox_to_4326(input_bbox, source_crs)
155 |
156 | # Check result is in 4326
157 | assert isinstance(result_bbox, QgsRectangle)
158 | assert result_bbox.xMinimum() != input_bbox.xMinimum() # Values should change after transform
159 |
160 | # Test when already in 4326 (no transformation needed)
161 | already_4326 = QgsRectangle(1, 2, 3, 4)
162 | result = transform_bbox_to_4326(already_4326, QgsCoordinateReferenceSystem("EPSG:4326"))
163 | assert result.xMinimum() == already_4326.xMinimum()
164 |
165 | def test_worker_initialization(mock_iface, sample_bbox, tmp_path, sample_validation_results):
166 | """Test Worker initialization"""
167 | # Create test parameters
168 | dataset_url = "https://example.com/test.parquet"
169 | output_file = os.path.join(tmp_path, "output.gpkg")
170 |
171 | # Initialize worker
172 | worker = Worker(dataset_url, sample_bbox, output_file, mock_iface, sample_validation_results)
173 |
174 | # Check properties
175 | assert worker.dataset_url == dataset_url
176 | assert worker.extent == sample_bbox
177 | assert worker.output_file == output_file
178 | assert worker.validation_results == sample_validation_results
179 | assert worker.killed is False
180 |
181 | def test_validation_worker_initialization(mock_iface, sample_bbox):
182 | """Test ValidationWorker initialization"""
183 | dataset_url = "https://example.com/test.parquet"
184 |
185 | # Initialize validation worker
186 | worker = ValidationWorker(dataset_url, mock_iface, sample_bbox)
187 |
188 | # Check properties
189 | assert worker.dataset_url == dataset_url
190 | assert worker.extent == sample_bbox
191 | assert worker.killed is False
192 |
193 | def test_transform_bbox_with_none(qgs_app):
194 | """Test transform_bbox_to_4326 with None input"""
195 | result = transform_bbox_to_4326(None, None)
196 | assert result is None
197 |
198 | @patch('duckdb.connect')
199 | def test_worker_error_handling(mock_connect, mock_iface, sample_bbox, tmp_path):
200 | """Test Worker error handling"""
201 | mock_connect.side_effect = Exception("Test error")
202 |
203 | # Create signals for testing
204 | error_message = None
205 | def on_error(msg):
206 | nonlocal error_message
207 | error_message = msg
208 |
209 | # Create worker
210 | worker = Worker(
211 | "https://example.com/test.parquet",
212 | sample_bbox,
213 | str(tmp_path / "test.parquet"),
214 | mock_iface,
215 | {"has_bbox": True, "bbox_column": "bbox"}
216 | )
217 | worker.error.connect(on_error)
218 |
219 | # Run worker
220 | worker.run()
221 |
222 | assert error_message is not None
223 | assert "Test error" in error_message
--------------------------------------------------------------------------------
/gpq_downloader/tests/create_test_data.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Create test data files for the GPQ Downloader plugin tests.
4 |
5 | This module can create:
6 | 1. Non-GeoParquet compliant parquet files (compatible parquet as per spec)
7 | 2. Standard GeoParquet files (with proper metadata)
8 | 3. Other test data as needed
9 | """
10 |
11 | import pyarrow as pa
12 | import pyarrow.parquet as pq
13 | from shapely.geometry import LineString
14 | from shapely import wkb
15 | import pandas as pd
16 |
17 |
18 | def create_non_geoparquet_file(output_path="non_geoparquet_with_geometry.parquet"):
19 | """Create a parquet file with WKB geometry but no GeoParquet metadata."""
20 |
21 | # Create sample LineString geometries representing street segments in San Francisco
22 | # Using approximate coordinates for real SF streets
23 | geometries = [
24 | # Market Street segment
25 | LineString([(-122.4194, 37.7749), (-122.4184, 37.7759), (-122.4174, 37.7769)]),
26 |
27 | # Mission Street segment
28 | LineString([(-122.4180, 37.7600), (-122.4170, 37.7610), (-122.4160, 37.7620)]),
29 |
30 | # Geary Boulevard segment
31 | LineString([(-122.4650, 37.7810), (-122.4640, 37.7810), (-122.4630, 37.7810)]),
32 |
33 | # Van Ness Avenue segment
34 | LineString([(-122.4220, 37.7750), (-122.4220, 37.7760), (-122.4220, 37.7770)]),
35 |
36 | # Embarcadero segment
37 | LineString([(-122.3950, 37.7950), (-122.3940, 37.7940), (-122.3930, 37.7930)]),
38 |
39 | # Lombard Street segment (the famous crooked part)
40 | LineString([(-122.4186, 37.8021), (-122.4176, 37.8020), (-122.4166, 37.8019)]),
41 |
42 | # Golden Gate Park path
43 | LineString([(-122.4820, 37.7700), (-122.4810, 37.7700), (-122.4800, 37.7700)])
44 | ]
45 |
46 | # Convert to WKB
47 | wkb_geometries = [wkb.dumps(geom) for geom in geometries]
48 |
49 | # Create other sample data
50 | data = {
51 | 'id': [1, 2, 3, 4, 5, 6, 7],
52 | 'name': [
53 | 'Market St',
54 | 'Mission St',
55 | 'Geary Blvd',
56 | 'Van Ness Ave',
57 | 'Embarcadero',
58 | 'Lombard St',
59 | 'GG Park Trail'
60 | ],
61 | 'type': [
62 | 'major_street',
63 | 'major_street',
64 | 'boulevard',
65 | 'avenue',
66 | 'waterfront',
67 | 'tourist_street',
68 | 'park_path'
69 | ],
70 | 'length_m': [250.5, 220.3, 180.7, 200.9, 190.1, 150.2, 210.4],
71 | 'geometry': wkb_geometries # WKB-encoded geometry column
72 | }
73 |
74 | # Create DataFrame
75 | df = pd.DataFrame(data)
76 |
77 | # Convert to PyArrow Table
78 | # Note: We're explicitly NOT adding any geo metadata
79 | table = pa.Table.from_pandas(df)
80 |
81 | # Write to Parquet file WITHOUT any geo metadata
82 | pq.write_table(table, output_path)
83 |
84 | print(f"Created non-GeoParquet file: {output_path}")
85 | print(f"Columns: {list(data.keys())}")
86 | print(f"Rows: {len(df)}")
87 |
88 | # Verify it has no geo metadata
89 | parquet_file = pq.ParquetFile(output_path)
90 | metadata = parquet_file.metadata
91 |
92 | # Check that there's no "geo" key in the metadata
93 | if metadata.metadata:
94 | metadata_dict = {k.decode(): v.decode() for k, v in metadata.metadata.items()}
95 | has_geo = 'geo' in metadata_dict
96 | print(f"Has 'geo' metadata: {has_geo}")
97 | else:
98 | print("No metadata present")
99 |
100 | return output_path
101 |
102 |
103 | def create_geoparquet_file(output_path="geoparquet_with_metadata.parquet"):
104 | """Create a proper GeoParquet file with geo metadata."""
105 |
106 | # Same geometries as non-geoparquet version
107 | geometries = [
108 | LineString([(-122.4194, 37.7749), (-122.4184, 37.7759), (-122.4174, 37.7769)]),
109 | LineString([(-122.4180, 37.7600), (-122.4170, 37.7610), (-122.4160, 37.7620)]),
110 | LineString([(-122.4650, 37.7810), (-122.4640, 37.7810), (-122.4630, 37.7810)]),
111 | LineString([(-122.4220, 37.7750), (-122.4220, 37.7760), (-122.4220, 37.7770)]),
112 | LineString([(-122.3950, 37.7950), (-122.3940, 37.7940), (-122.3930, 37.7930)]),
113 | LineString([(-122.4186, 37.8021), (-122.4176, 37.8020), (-122.4166, 37.8019)]),
114 | LineString([(-122.4820, 37.7700), (-122.4810, 37.7700), (-122.4800, 37.7700)])
115 | ]
116 |
117 | # Convert to WKB
118 | wkb_geometries = [wkb.dumps(geom) for geom in geometries]
119 |
120 | # Create data
121 | data = {
122 | 'id': [1, 2, 3, 4, 5, 6, 7],
123 | 'name': [
124 | 'Market St',
125 | 'Mission St',
126 | 'Geary Blvd',
127 | 'Van Ness Ave',
128 | 'Embarcadero',
129 | 'Lombard St',
130 | 'GG Park Trail'
131 | ],
132 | 'type': [
133 | 'major_street',
134 | 'major_street',
135 | 'boulevard',
136 | 'avenue',
137 | 'waterfront',
138 | 'tourist_street',
139 | 'park_path'
140 | ],
141 | 'length_m': [250.5, 220.3, 180.7, 200.9, 190.1, 150.2, 210.4],
142 | 'geometry': wkb_geometries
143 | }
144 |
145 | df = pd.DataFrame(data)
146 | table = pa.Table.from_pandas(df)
147 |
148 | # Create GeoParquet metadata
149 | geo_metadata = {
150 | "version": "1.0.0",
151 | "primary_column": "geometry",
152 | "columns": {
153 | "geometry": {
154 | "encoding": "WKB",
155 | "geometry_types": ["LineString"],
156 | "crs": {
157 | "$schema": "https://proj.org/schemas/v0.6/projjson.schema.json",
158 | "type": "GeographicCRS",
159 | "name": "WGS 84",
160 | "datum": {
161 | "type": "GeodeticReferenceFrame",
162 | "name": "World Geodetic System 1984",
163 | "ellipsoid": {
164 | "name": "WGS 84",
165 | "semi_major_axis": 6378137,
166 | "inverse_flattening": 298.257223563
167 | }
168 | },
169 | "coordinate_system": {
170 | "subtype": "ellipsoidal",
171 | "axis": [
172 | {
173 | "name": "Geodetic longitude",
174 | "abbreviation": "Lon",
175 | "direction": "east",
176 | "unit": "degree"
177 | },
178 | {
179 | "name": "Geodetic latitude",
180 | "abbreviation": "Lat",
181 | "direction": "north",
182 | "unit": "degree"
183 | }
184 | ]
185 | },
186 | "id": {
187 | "authority": "EPSG",
188 | "code": 4326
189 | }
190 | }
191 | }
192 | }
193 | }
194 |
195 | # Convert metadata to JSON string
196 | import json
197 | geo_metadata_str = json.dumps(geo_metadata)
198 |
199 | # Create new metadata with geo key
200 | metadata = table.schema.metadata or {}
201 | metadata[b'geo'] = geo_metadata_str.encode('utf-8')
202 |
203 | # Create new table with metadata
204 | table = table.replace_schema_metadata(metadata)
205 |
206 | # Write GeoParquet file
207 | pq.write_table(table, output_path)
208 |
209 | print(f"Created GeoParquet file: {output_path}")
210 | print(f"Columns: {list(data.keys())}")
211 | print(f"Rows: {len(df)}")
212 |
213 | # Verify it has geo metadata
214 | parquet_file = pq.ParquetFile(output_path)
215 | metadata = parquet_file.metadata
216 |
217 | if metadata.metadata and b'geo' in metadata.metadata:
218 | print("Has 'geo' metadata: True")
219 | else:
220 | print("Has 'geo' metadata: False")
221 |
222 | return output_path
223 |
224 |
225 | if __name__ == "__main__":
226 | import os
227 |
228 | # Create data directory if it doesn't exist
229 | data_dir = os.path.join(os.path.dirname(__file__), 'data')
230 | os.makedirs(data_dir, exist_ok=True)
231 |
232 | # Create both types of files
233 | non_geo_path = os.path.join(data_dir, 'non_geoparquet_with_geometry.parquet')
234 | geo_path = os.path.join(data_dir, 'geoparquet_with_metadata.parquet')
235 |
236 | print("Creating test data files...")
237 | print("-" * 50)
238 | create_non_geoparquet_file(non_geo_path)
239 | print("-" * 50)
240 | create_geoparquet_file(geo_path)
241 | print("-" * 50)
242 | print("Test data creation complete!")
--------------------------------------------------------------------------------
/gpq_downloader/tests/test_plugin.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import datetime
3 | from unittest.mock import MagicMock, patch, call
4 | from qgis.PyQt.QtWidgets import QAction, QProgressDialog, QMessageBox, QFileDialog, QDialog, QVBoxLayout, QLabel
5 | from qgis.core import QgsProject, QgsVectorLayer, QgsSettings, QgsCoordinateReferenceSystem, QgsRectangle
6 | from pathlib import Path
7 | from pytestqt import qtbot
8 |
9 | from gpq_downloader.plugin import QgisPluginGeoParquet
10 | from gpq_downloader.dialog import DataSourceDialog
11 |
12 | def test_plugin_run_with_active_download(qgs_app, mock_iface):
13 | """Test run method when a download is already in progress"""
14 | plugin = QgisPluginGeoParquet(mock_iface)
15 | plugin.worker = MagicMock()
16 | plugin.worker_thread = MagicMock()
17 | plugin.worker_thread.isRunning.return_value = True
18 |
19 | with patch('gpq_downloader.plugin.QMessageBox.warning') as mock_warning:
20 | plugin.run()
21 | mock_warning.assert_called_once()
22 | assert "Download in Progress" in mock_warning.call_args[0][1]
23 |
24 | @patch('gpq_downloader.plugin.DataSourceDialog')
25 | def test_plugin_run_dialog_rejected(mock_dialog, qgs_app, mock_iface):
26 | """Test run method when dialog is rejected"""
27 | plugin = QgisPluginGeoParquet(mock_iface)
28 |
29 | # Setup mock dialog
30 | dialog_instance = MagicMock()
31 | dialog_instance.exec.return_value = QDialog.Rejected
32 | mock_dialog.return_value = dialog_instance
33 |
34 | plugin.run()
35 |
36 | dialog_instance.exec.assert_called_once()
37 | assert plugin.worker is None
38 | assert plugin.worker_thread is None
39 |
40 | @patch('gpq_downloader.plugin.QgsSettings')
41 | @patch('gpq_downloader.plugin.QFileDialog.getSaveFileName')
42 | @patch('gpq_downloader.plugin.DataSourceDialog')
43 | def test_plugin_run_with_download(mock_dialog, mock_save_dialog, mock_settings, qgs_app, mock_iface, tmp_path):
44 | """Test run method with successful download setup"""
45 | plugin = QgisPluginGeoParquet(mock_iface)
46 |
47 | # Setup mock dialog
48 | dialog_instance = MagicMock()
49 | dialog_instance.exec.return_value = QDialog.Accepted
50 | dialog_instance.get_urls.return_value = ["https://example.com/test.parquet?theme=buildings"]
51 | dialog_instance.overture_radio.isChecked.return_value = True
52 | mock_dialog.return_value = dialog_instance
53 |
54 | # Setup mock save dialog
55 | output_file = str(tmp_path / "test.parquet")
56 | mock_save_dialog.return_value = (output_file, "GeoParquet (*.parquet)")
57 |
58 | # Setup mock settings
59 | mock_settings_instance = MagicMock()
60 | mock_settings.return_value = mock_settings_instance
61 |
62 | # Mock datetime to avoid timestamp issues
63 | with patch('gpq_downloader.plugin.datetime') as mock_datetime:
64 | mock_datetime.datetime.now.return_value.strftime.return_value = "20230101_120000"
65 |
66 | # Mock the process_download_queue method to avoid actual processing
67 | with patch.object(plugin, 'process_download_queue'):
68 | plugin.run()
69 |
70 | mock_save_dialog.assert_called_once()
71 |
72 | def test_plugin_handle_error(qgs_app, mock_iface):
73 | """Test error handling"""
74 | plugin = QgisPluginGeoParquet(mock_iface)
75 | plugin.progress_dialog = MagicMock()
76 | error_msg = "Test error"
77 |
78 | with patch('gpq_downloader.plugin.QMessageBox.critical') as mock_critical:
79 | plugin.handle_error(error_msg)
80 | mock_critical.assert_called_once()
81 | assert mock_critical.call_args[0][1] == "Error" or error_msg in mock_critical.call_args[0][1]
82 | plugin.progress_dialog.close.assert_called_once()
83 |
84 | def test_plugin_update_progress(qgs_app, mock_iface):
85 | """Test progress updates"""
86 | plugin = QgisPluginGeoParquet(mock_iface)
87 | plugin.progress_dialog = MagicMock()
88 |
89 | plugin.update_progress("Test progress")
90 | plugin.progress_dialog.setLabelText.assert_called_once_with("Test progress")
91 |
92 | def test_plugin_cancel_download(qgs_app, mock_iface):
93 | """Test download cancellation"""
94 | plugin = QgisPluginGeoParquet(mock_iface)
95 | plugin.worker = MagicMock()
96 | plugin.worker_thread = MagicMock()
97 |
98 | # Patch the cleanup_thread method to verify it's called
99 | with patch.object(plugin, 'cleanup_thread') as mock_cleanup:
100 | plugin.cancel_download()
101 | plugin.worker.kill.assert_called_once()
102 | mock_cleanup.assert_called_once()
103 |
104 | @patch('gpq_downloader.plugin.QgsVectorLayer')
105 | def test_plugin_load_layer_success(mock_vector_layer, qgs_app, mock_iface):
106 | """Test successful layer loading"""
107 | plugin = QgisPluginGeoParquet(mock_iface)
108 |
109 | # Setup mock layer
110 | mock_layer = MagicMock()
111 | mock_layer.isValid.return_value = True
112 | mock_vector_layer.return_value = mock_layer
113 |
114 | # Setup mock project
115 | mock_project = MagicMock()
116 |
117 | with patch('gpq_downloader.plugin.QgsProject.instance', return_value=mock_project):
118 | plugin.load_layer("test.gpkg")
119 | mock_project.addMapLayer.assert_called_once_with(mock_layer)
120 |
121 | @patch('gpq_downloader.plugin.QgsVectorLayer')
122 | def test_plugin_load_layer_invalid(mock_vector_layer, qgs_app, mock_iface):
123 | """Test loading invalid layer"""
124 | plugin = QgisPluginGeoParquet(mock_iface)
125 |
126 | # Setup mock layer
127 | mock_layer = MagicMock()
128 | mock_layer.isValid.return_value = False
129 | mock_vector_layer.return_value = mock_layer
130 |
131 | with patch('gpq_downloader.plugin.QMessageBox.critical') as mock_critical:
132 | plugin.load_layer("test.gpkg")
133 | mock_critical.assert_called_once()
134 | assert mock_critical.call_args[0][0] == mock_iface.mainWindow()
135 | assert mock_critical.call_args[0][1] == "Error" or "test.gpkg" in mock_critical.call_args[0][1]
136 |
137 | def test_plugin_show_info(qgs_app, mock_iface):
138 | """Test info message display"""
139 | plugin = QgisPluginGeoParquet(mock_iface)
140 | test_message = "Test info"
141 |
142 | with patch('gpq_downloader.plugin.QMessageBox.information') as mock_info:
143 | plugin.show_info(test_message)
144 | mock_info.assert_called_once()
145 | assert mock_info.call_args[0][0] == mock_iface.mainWindow()
146 | assert mock_info.call_args[0][1] == "Success" or test_message in mock_info.call_args[0][1]
147 |
148 | def test_plugin_initialization(qgs_app, mock_iface):
149 | """Test plugin initialization"""
150 | plugin = QgisPluginGeoParquet(mock_iface)
151 | assert plugin.iface == mock_iface
152 | assert plugin.worker is None
153 | assert plugin.worker_thread is None
154 | assert isinstance(plugin.download_dir, Path)
155 |
156 | def test_plugin_init_gui(qgs_app, mock_iface):
157 | """Test initGui method"""
158 | plugin = QgisPluginGeoParquet(mock_iface)
159 | plugin.initGui()
160 |
161 | # Check that action was created
162 | assert isinstance(plugin.action, QAction)
163 | assert plugin.action.text() == "Download GeoParquet Data"
164 |
165 | # Check that icon was added to toolbar
166 | assert len(mock_iface.toolbar_icons) == 1
167 | assert mock_iface.toolbar_icons[0] == plugin.action
168 |
169 | def test_plugin_unload(qgs_app, mock_iface):
170 | """Test plugin unload"""
171 | plugin = QgisPluginGeoParquet(mock_iface)
172 | plugin.initGui() # Add the icon first
173 |
174 | # Verify icon was added
175 | assert len(mock_iface.toolbar_icons) == 1
176 |
177 | # Mock worker thread to not be running
178 | plugin.worker_thread = MagicMock()
179 | plugin.worker_thread.isRunning.return_value = False
180 |
181 | # Unload plugin
182 | plugin.unload()
183 |
184 | # Check that icon was removed
185 | assert len(mock_iface.toolbar_icons) == 0
186 |
187 | @patch('gpq_downloader.plugin.QThread')
188 | def test_plugin_cleanup_thread(mock_thread, qgs_app, mock_iface):
189 | """Test thread cleanup"""
190 | plugin = QgisPluginGeoParquet(mock_iface)
191 | plugin.worker = MagicMock()
192 | plugin.worker_thread = MagicMock()
193 |
194 | plugin.cleanup_thread()
195 | assert plugin.worker is None
196 | assert plugin.worker_thread is None
197 |
198 | def test_handle_validation_complete_success(qgs_app, mock_iface, qtbot):
199 | plugin = QgisPluginGeoParquet(mock_iface)
200 |
201 | # Create a fake dialog and attach the expected attributes.
202 | fake_dialog = QDialog()
203 | qtbot.addWidget(fake_dialog)
204 | # Fake overture radio button; isChecked() returns True.
205 | fake_radio = MagicMock()
206 | fake_radio.isChecked.return_value = True
207 | fake_dialog.overture_radio = fake_radio
208 |
209 | # Fake overture combo box for theme selection.
210 | fake_combo = MagicMock()
211 | fake_combo.currentText.return_value = "castle" # Any theme other than "base"
212 | fake_dialog.overture_combo = fake_combo
213 |
214 | # Use a valid dummy extent (avoid using a MagicMock)
215 | dummy_extent = QgsRectangle(0, 0, 10, 10)
216 |
217 | # Patch the file dialog: simulate user clicking "Save" by returning a valid filename.
218 | with patch('qgis.PyQt.QtWidgets.QFileDialog.getSaveFileName',
219 | return_value=("test_output.parquet", "GeoParquet (*.parquet)")) as mock_save_dialog:
220 | plugin.download_and_save = MagicMock()
221 |
222 | plugin.handle_validation_complete(
223 | success=True,
224 | message="",
225 | validation_results={},
226 | url="https://example.com/test.parquet",
227 | extent=dummy_extent,
228 | dialog=fake_dialog
229 | )
230 |
231 | mock_save_dialog.assert_called_once()
232 | plugin.download_and_save.assert_called_once()
233 |
234 | def test_handle_validation_complete_cancel(qgs_app, mock_iface, qtbot):
235 | plugin = QgisPluginGeoParquet(mock_iface)
236 |
237 | # Create a fake dialog with the same expected attributes.
238 | fake_dialog = QDialog()
239 | qtbot.addWidget(fake_dialog)
240 | fake_radio = MagicMock()
241 | fake_radio.isChecked.return_value = True
242 | fake_dialog.overture_radio = fake_radio
243 |
244 | fake_combo = MagicMock()
245 | fake_combo.currentText.return_value = "castle"
246 | fake_dialog.overture_combo = fake_combo
247 |
248 | # Use a valid dummy extent instead of a MagicMock.
249 | dummy_extent = QgsRectangle(0, 0, 10, 10)
250 |
251 | # Simulate the file dialog being cancelled by returning empty strings.
252 | with patch('qgis.PyQt.QtWidgets.QFileDialog.getSaveFileName',
253 | return_value=("", "")) as mock_save_dialog:
254 | plugin.download_and_save = MagicMock()
255 | # Optionally, also patch the warning to confirm no warning is shown.
256 | with patch('gpq_downloader.plugin.QMessageBox.warning') as mock_warning:
257 | plugin.handle_validation_complete(
258 | success=True,
259 | message="",
260 | validation_results={},
261 | url="https://example.com/test.parquet",
262 | extent=dummy_extent,
263 | dialog=fake_dialog
264 | )
265 | mock_save_dialog.assert_called_once()
266 | plugin.download_and_save.assert_not_called()
267 | # In the cancel case, no warning message is expected.
268 | mock_warning.assert_not_called()
269 |
270 | def test_handle_validation_complete_failure(qgs_app, mock_iface):
271 | plugin = QgisPluginGeoParquet(mock_iface)
272 |
273 | with patch('gpq_downloader.plugin.QMessageBox.warning') as mock_warning:
274 | plugin.handle_validation_complete(
275 | success=False,
276 | message="Validation failed",
277 | validation_results={},
278 | url="https://example.com/test.parquet",
279 | extent=MagicMock(),
280 | dialog=MagicMock()
281 | )
282 | mock_warning.assert_called_once_with(mock_iface.mainWindow(), "Validation Error", "Validation failed")
283 |
284 | def test_create_progress_dialog(qgs_app, mock_iface):
285 | plugin = QgisPluginGeoParquet(mock_iface)
286 | progress_dialog = plugin.create_progress_dialog("Test Title", "Test Message")
287 |
288 | assert progress_dialog.windowTitle() == "Test Title"
289 | assert progress_dialog.labelText() == "Test Message"
290 |
291 | def test_setup_worker(qgs_app, mock_iface):
292 | plugin = QgisPluginGeoParquet(mock_iface)
293 | plugin.progress_dialog = MagicMock() # Ensure progress_dialog is initialized
294 | dataset_url = "https://example.com/test.parquet"
295 | extent = MagicMock()
296 | output_file = "output.parquet"
297 | validation_results = {"has_bbox": True}
298 |
299 | worker, worker_thread = plugin.setup_worker(dataset_url, extent, output_file, validation_results)
300 |
301 | assert worker is not None
302 | assert worker_thread is not None
303 | assert worker.dataset_url == dataset_url
304 | assert worker.extent == extent
305 | assert worker.output_file == output_file
306 | assert worker.validation_results == validation_results
--------------------------------------------------------------------------------
/gpq_downloader/icons/parquet-download.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gpq_downloader/tests/test_non_geoparquet.py:
--------------------------------------------------------------------------------
1 | """Tests for handling non-GeoParquet compliant parquet files with geometry columns."""
2 |
3 | import pytest
4 | from pathlib import Path
5 | from unittest.mock import MagicMock, patch, call
6 | import pyarrow.parquet as pq
7 | import duckdb
8 | import tempfile
9 | import os
10 |
11 | from gpq_downloader.utils import Worker, ValidationWorker
12 |
13 |
14 | class TestNonGeoParquetHandling:
15 | """Test handling of parquet files with geometry but no geo metadata."""
16 |
17 | @pytest.fixture
18 | def test_data_path(self):
19 | """Path to test data directory."""
20 | return Path(__file__).parent / "data"
21 |
22 | @pytest.fixture
23 | def non_geoparquet_file(self, test_data_path):
24 | """Path to non-GeoParquet test file."""
25 | return test_data_path / "non_geoparquet_with_geometry.parquet"
26 |
27 | def test_non_geoparquet_file_exists(self, non_geoparquet_file):
28 | """Verify test file exists and has expected structure."""
29 | assert non_geoparquet_file.exists(), f"Test file not found: {non_geoparquet_file}"
30 |
31 | # Verify file structure
32 | pf = pq.ParquetFile(non_geoparquet_file)
33 | schema = pf.schema
34 |
35 | # Check expected columns
36 | column_names = [field.name for field in schema]
37 | assert "geometry" in column_names
38 | assert "id" in column_names
39 | assert "name" in column_names
40 |
41 | # Verify no geo metadata
42 | metadata = pf.metadata.metadata
43 | if metadata:
44 | metadata_dict = {k.decode(): v.decode() for k, v in metadata.items()}
45 | assert "geo" not in metadata_dict
46 |
47 | @patch('gpq_downloader.utils.transform_bbox_to_4326')
48 | @patch('gpq_downloader.utils.duckdb.connect')
49 | def test_worker_handles_non_geoparquet(self, mock_connect, mock_transform_bbox, non_geoparquet_file, tmp_path):
50 | """Test that Worker can process non-GeoParquet files with geometry."""
51 | # Mock connection
52 | mock_conn = MagicMock()
53 | mock_connect.return_value = mock_conn
54 |
55 | # Mock execute method to handle spatial extension loading
56 | def mock_execute(query):
57 | result = MagicMock()
58 | if "DESCRIBE SELECT" in query:
59 | # Schema query result
60 | result.fetchall.return_value = [
61 | ('id', 'BIGINT', 'YES', None, None, None),
62 | ('name', 'VARCHAR', 'YES', None, None, None),
63 | ('type', 'VARCHAR', 'YES', None, None, None),
64 | ('length_m', 'DOUBLE', 'YES', None, None, None),
65 | ('geometry', 'BLOB', 'YES', None, None, None) # Geometry as BLOB, not WKB_BLOB
66 | ]
67 | elif "SELECT COUNT(*)" in query:
68 | # Count query result
69 | result.fetchone.return_value = (7,)
70 | else:
71 | # For other queries (INSTALL, LOAD, CREATE TABLE, etc.)
72 | result.fetchall.return_value = []
73 | result.fetchone.return_value = None
74 | return result
75 |
76 | mock_conn.execute.side_effect = mock_execute
77 |
78 | # Mock transform_bbox_to_4326 to return a proper bbox for testing
79 | from qgis.core import QgsRectangle
80 | mock_bbox = QgsRectangle(-180, -90, 180, 90) # Global extent
81 | mock_transform_bbox.return_value = mock_bbox
82 |
83 | # Create worker with temp output file
84 | output_file = tmp_path / "test_output.parquet"
85 |
86 | # Mock iface
87 | mock_iface = MagicMock()
88 | mock_iface.mapCanvas.return_value.mapSettings.return_value.destinationCrs.return_value = MagicMock()
89 |
90 | # Mock validation results
91 | validation_results = {
92 | 'has_geometry': True,
93 | 'geometry_type': 'BLOB',
94 | 'total_features': 7
95 | }
96 |
97 | worker = Worker(
98 | dataset_url=f"file://{non_geoparquet_file}",
99 | extent=None,
100 | output_file=str(output_file),
101 | iface=mock_iface,
102 | validation_results=validation_results
103 | )
104 |
105 | # Mock signals
106 | worker.progress = MagicMock()
107 | worker.error = MagicMock()
108 | worker.finished = MagicMock()
109 |
110 | # Run worker
111 | worker.run()
112 |
113 | # Verify spatial extension was loaded
114 | execute_calls = [call[0][0] for call in mock_conn.execute.call_args_list]
115 | assert any("INSTALL spatial" in call for call in execute_calls)
116 | assert any("LOAD spatial" in call for call in execute_calls)
117 |
118 | # Verify no errors
119 | worker.error.emit.assert_not_called()
120 |
121 | # Verify finished signal was emitted
122 | worker.finished.emit.assert_called_once()
123 |
124 | @patch('gpq_downloader.utils.transform_bbox_to_4326')
125 | @patch('gpq_downloader.utils.duckdb.connect')
126 | def test_non_geoparquet_spatial_query(self, mock_connect, mock_transform_bbox, non_geoparquet_file, tmp_path):
127 | """Test spatial filtering works without bbox column."""
128 | mock_conn = MagicMock()
129 | mock_connect.return_value = mock_conn
130 |
131 | # Track all queries
132 | queries_executed = []
133 |
134 | def mock_execute(query):
135 | queries_executed.append(query)
136 | result = MagicMock()
137 | if "DESCRIBE SELECT" in query:
138 | result.fetchall.return_value = [
139 | ('geometry', 'BLOB', 'YES', None, None, None),
140 | ('id', 'BIGINT', 'YES', None, None, None),
141 | ('name', 'VARCHAR', 'YES', None, None, None)
142 | ]
143 | elif "SELECT COUNT(*)" in query:
144 | result.fetchone.return_value = (5,)
145 | else:
146 | result.fetchall.return_value = []
147 | result.fetchone.return_value = None
148 | return result
149 |
150 | mock_conn.execute.side_effect = mock_execute
151 |
152 | # Mock transform_bbox_to_4326 to return the extent
153 | from qgis.core import QgsRectangle
154 | mock_bbox = QgsRectangle(-122.5, 37.7, -122.4, 37.8)
155 | mock_transform_bbox.return_value = mock_bbox
156 |
157 | # Create worker with bbox filter
158 | output_file = tmp_path / "test_output.parquet"
159 |
160 | # Mock iface
161 | mock_iface = MagicMock()
162 | mock_iface.mapCanvas.return_value.mapSettings.return_value.destinationCrs.return_value = MagicMock()
163 |
164 | # Create extent for bbox filter
165 | from qgis.core import QgsRectangle
166 | extent = QgsRectangle(-122.5, 37.7, -122.4, 37.8) # SF area
167 |
168 | # Mock validation results
169 | validation_results = {
170 | 'has_geometry': True,
171 | 'geometry_type': 'BLOB',
172 | 'total_features': 5
173 | }
174 |
175 | worker = Worker(
176 | dataset_url=f"file://{non_geoparquet_file}",
177 | extent=extent,
178 | output_file=str(output_file),
179 | iface=mock_iface,
180 | validation_results=validation_results
181 | )
182 |
183 | # Mock signals
184 | worker.progress = MagicMock()
185 | worker.error = MagicMock()
186 | worker.finished = MagicMock()
187 |
188 | # Run worker
189 | worker.run()
190 |
191 | # For BLOB geometry columns, spatial filtering happens after conversion
192 | # So we should see the conversion happening in a separate step
193 | conversion_query = any(
194 | "ST_GeomFromWKB" in query and "CREATE TABLE" in query
195 | for query in queries_executed
196 | )
197 | assert conversion_query, f"Expected geometry conversion for BLOB column. Queries: {queries_executed}"
198 |
199 | def test_duckdb_reads_non_geoparquet(self, non_geoparquet_file):
200 | """Test that DuckDB can actually read the non-GeoParquet file with spatial extension."""
201 | conn = duckdb.connect()
202 |
203 | # Load spatial extension
204 | conn.execute("INSTALL spatial;")
205 | conn.execute("LOAD spatial;")
206 |
207 | # Read the file
208 | query = f"SELECT * FROM read_parquet('{non_geoparquet_file}')"
209 | result = conn.execute(query).fetchall()
210 |
211 | # Should have 7 rows
212 | assert len(result) == 7
213 |
214 | # Test geometry column can be converted from WKB
215 | geom_query = f"""
216 | SELECT
217 | id,
218 | name,
219 | ST_AsText(ST_GeomFromWKB(geometry)) as geom_wkt
220 | FROM read_parquet('{non_geoparquet_file}')
221 | LIMIT 1
222 | """
223 | geom_result = conn.execute(geom_query).fetchone()
224 |
225 | assert geom_result is not None
226 | assert geom_result[0] == 1 # id
227 | assert geom_result[1] == 'Market St' # name
228 | assert 'LINESTRING' in geom_result[2] # geometry as WKT
229 |
230 | conn.close()
231 |
232 | @pytest.mark.integration
233 | @pytest.mark.skipif(
234 | os.environ.get('SKIP_INTEGRATION_TESTS', 'false').lower() == 'true',
235 | reason="Skipping integration tests"
236 | )
237 | @patch('gpq_downloader.utils.transform_bbox_to_4326')
238 | def test_end_to_end_remote_non_geoparquet(self, mock_transform_bbox):
239 | """End-to-end test downloading and processing remote non-geoparquet file."""
240 | dataset_url = "https://data.source.coop/cholmes/aois/non_geoparquet_with_geometry.parquet"
241 |
242 | # Create a temporary directory for output
243 | with tempfile.TemporaryDirectory() as temp_dir:
244 | output_file = os.path.join(temp_dir, "test_output.parquet")
245 |
246 | # Mock iface
247 | mock_iface = MagicMock()
248 | mock_canvas = MagicMock()
249 | mock_settings = MagicMock()
250 | mock_crs = MagicMock()
251 |
252 | # Setup the chain of mocks
253 | mock_iface.mapCanvas.return_value = mock_canvas
254 | mock_canvas.mapSettings.return_value = mock_settings
255 | mock_settings.destinationCrs.return_value = mock_crs
256 | mock_crs.authid.return_value = "EPSG:4326"
257 |
258 | # Create extent for filtering (San Francisco area)
259 | from qgis.core import QgsRectangle
260 | extent = QgsRectangle(-122.5, 37.7, -122.4, 37.8)
261 |
262 | # Mock transform_bbox_to_4326 to return the same extent (already in 4326)
263 | mock_transform_bbox.return_value = extent
264 |
265 | # Run validation manually to get results
266 | # Since we can't easily test the actual ValidationWorker with signals,
267 | # we'll validate using duckdb directly
268 | conn = duckdb.connect()
269 | conn.execute("INSTALL spatial;")
270 | conn.execute("LOAD spatial;")
271 |
272 | # Get schema
273 | schema_query = f"DESCRIBE SELECT * FROM read_parquet('{dataset_url}')"
274 | schema = conn.execute(schema_query).fetchall()
275 |
276 | # Check for geometry column
277 | has_geometry = False
278 | geometry_column = None
279 | for col_name, col_type, _, _, _, _ in schema:
280 | if col_name == 'geometry' or 'geom' in col_name.lower():
281 | has_geometry = True
282 | geometry_column = col_name
283 | break
284 |
285 | # Check for bbox metadata
286 | has_bbox = False
287 | bbox_column = None
288 | try:
289 | metadata_query = f"SELECT key, value FROM parquet_kv_metadata('{dataset_url}')"
290 | metadata_results = conn.execute(metadata_query).fetchall()
291 | for key, value in metadata_results:
292 | if key == b"geo":
293 | has_bbox = True # Would need more parsing to get actual bbox column
294 | break
295 | except:
296 | pass
297 |
298 | conn.close()
299 |
300 | # Create validation results based on our checks
301 | validation_results = {
302 | 'has_geometry': has_geometry,
303 | 'geometry_column': geometry_column,
304 | 'has_bbox': has_bbox,
305 | 'bbox_column': bbox_column,
306 | 'schema': schema
307 | }
308 |
309 | # Now run the worker with validation results
310 | worker = Worker(
311 | dataset_url=dataset_url,
312 | extent=extent,
313 | output_file=output_file,
314 | iface=mock_iface,
315 | validation_results=validation_results
316 | )
317 |
318 | # Mock signals for worker
319 | worker.finished = MagicMock()
320 | worker.error = MagicMock()
321 | worker.progress = MagicMock()
322 | worker.percent = MagicMock()
323 | worker.info = MagicMock()
324 | worker.load_layer = MagicMock()
325 | worker.file_size_warning = MagicMock()
326 |
327 | # Run worker
328 | worker.run()
329 |
330 | # The worker should succeed or at least handle the spatial extension issue gracefully
331 | # For now, let's check if it tried to load the spatial extension
332 | if worker.error.emit.called:
333 | error_message = worker.error.emit.call_args[0][0]
334 | # This is actually revealing a bug - the spatial extension isn't being loaded properly
335 | print(f"Worker encountered error: {error_message}")
336 | # The test should now pass without this error
337 | assert False, f"Worker should not encounter spatial extension error: {error_message}"
338 |
339 | # Check finished signal was emitted
340 | worker.finished.emit.assert_called_once()
341 |
342 | # Verify output file was created
343 | assert os.path.exists(output_file)
344 |
345 | # Verify the output is valid GeoParquet
346 | conn = duckdb.connect()
347 | conn.execute("INSTALL spatial;")
348 | conn.execute("LOAD spatial;")
349 |
350 | # Check we can read the output file
351 | result = conn.execute(f"SELECT COUNT(*) FROM read_parquet('{output_file}')").fetchone()
352 | assert result[0] > 0 # Should have filtered some features
353 |
354 | # Check geometry column exists and is valid
355 | schema_result = conn.execute(f"DESCRIBE SELECT * FROM read_parquet('{output_file}')").fetchall()
356 | column_names = [row[0] for row in schema_result]
357 | assert 'geometry' in column_names
358 |
359 | # Check we can read geometry
360 | # First check what type the geometry column is
361 | geom_col_type = None
362 | for row in schema_result:
363 | if row[0] == 'geometry':
364 | geom_col_type = row[1]
365 | break
366 |
367 | # If it's already GEOMETRY type, don't use ST_GeomFromWKB
368 | if geom_col_type and 'GEOMETRY' in geom_col_type.upper():
369 | geom_result = conn.execute(f"""
370 | SELECT ST_AsText(geometry) as wkt
371 | FROM read_parquet('{output_file}')
372 | LIMIT 1
373 | """).fetchone()
374 | else:
375 | # It's still BLOB, so convert it
376 | geom_result = conn.execute(f"""
377 | SELECT ST_AsText(ST_GeomFromWKB(geometry)) as wkt
378 | FROM read_parquet('{output_file}')
379 | LIMIT 1
380 | """).fetchone()
381 |
382 | assert geom_result is not None
383 | assert 'LINESTRING' in geom_result[0] or 'POINT' in geom_result[0] or 'POLYGON' in geom_result[0]
384 |
385 | conn.close()
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 2, June 1991
3 |
4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc.
5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 | Preamble
10 |
11 | The licenses for most software are designed to take away your
12 | freedom to share and change it. By contrast, the GNU General Public
13 | License is intended to guarantee your freedom to share and change free
14 | software--to make sure the software is free for all its users. This
15 | General Public License applies to most of the Free Software
16 | Foundation's software and to any other program whose authors commit to
17 | using it. (Some other Free Software Foundation software is covered by
18 | the GNU Lesser General Public License instead.) You can apply it to
19 | your programs, too.
20 |
21 | When we speak of free software, we are referring to freedom, not
22 | price. Our General Public Licenses are designed to make sure that you
23 | have the freedom to distribute copies of free software (and charge for
24 | this service if you wish), that you receive source code or can get it
25 | if you want it, that you can change the software or use pieces of it
26 | in new free programs; and that you know you can do these things.
27 |
28 | To protect your rights, we need to make restrictions that forbid
29 | anyone to deny you these rights or to ask you to surrender the rights.
30 | These restrictions translate to certain responsibilities for you if you
31 | distribute copies of the software, or if you modify it.
32 |
33 | For example, if you distribute copies of such a program, whether
34 | gratis or for a fee, you must give the recipients all the rights that
35 | you have. You must make sure that they, too, receive or can get the
36 | source code. And you must show them these terms so they know their
37 | rights.
38 |
39 | We protect your rights with two steps: (1) copyright the software, and
40 | (2) offer you this license which gives you legal permission to copy,
41 | distribute and/or modify the software.
42 |
43 | Also, for each author's protection and ours, we want to make certain
44 | that everyone understands that there is no warranty for this free
45 | software. If the software is modified by someone else and passed on, we
46 | want its recipients to know that what they have is not the original, so
47 | that any problems introduced by others will not reflect on the original
48 | authors' reputations.
49 |
50 | Finally, any free program is threatened constantly by software
51 | patents. We wish to avoid the danger that redistributors of a free
52 | program will individually obtain patent licenses, in effect making the
53 | program proprietary. To prevent this, we have made it clear that any
54 | patent must be licensed for everyone's free use or not licensed at all.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | GNU GENERAL PUBLIC LICENSE
60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61 |
62 | 0. This License applies to any program or other work which contains
63 | a notice placed by the copyright holder saying it may be distributed
64 | under the terms of this General Public License. The "Program", below,
65 | refers to any such program or work, and a "work based on the Program"
66 | means either the Program or any derivative work under copyright law:
67 | that is to say, a work containing the Program or a portion of it,
68 | either verbatim or with modifications and/or translated into another
69 | language. (Hereafter, translation is included without limitation in
70 | the term "modification".) Each licensee is addressed as "you".
71 |
72 | Activities other than copying, distribution and modification are not
73 | covered by this License; they are outside its scope. The act of
74 | running the Program is not restricted, and the output from the Program
75 | is covered only if its contents constitute a work based on the
76 | Program (independent of having been made by running the Program).
77 | Whether that is true depends on what the Program does.
78 |
79 | 1. You may copy and distribute verbatim copies of the Program's
80 | source code as you receive it, in any medium, provided that you
81 | conspicuously and appropriately publish on each copy an appropriate
82 | copyright notice and disclaimer of warranty; keep intact all the
83 | notices that refer to this License and to the absence of any warranty;
84 | and give any other recipients of the Program a copy of this License
85 | along with the Program.
86 |
87 | You may charge a fee for the physical act of transferring a copy, and
88 | you may at your option offer warranty protection in exchange for a fee.
89 |
90 | 2. You may modify your copy or copies of the Program or any portion
91 | of it, thus forming a work based on the Program, and copy and
92 | distribute such modifications or work under the terms of Section 1
93 | above, provided that you also meet all of these conditions:
94 |
95 | a) You must cause the modified files to carry prominent notices
96 | stating that you changed the files and the date of any change.
97 |
98 | b) You must cause any work that you distribute or publish, that in
99 | whole or in part contains or is derived from the Program or any
100 | part thereof, to be licensed as a whole at no charge to all third
101 | parties under the terms of this License.
102 |
103 | c) If the modified program normally reads commands interactively
104 | when run, you must cause it, when started running for such
105 | interactive use in the most ordinary way, to print or display an
106 | announcement including an appropriate copyright notice and a
107 | notice that there is no warranty (or else, saying that you provide
108 | a warranty) and that users may redistribute the program under
109 | these conditions, and telling the user how to view a copy of this
110 | License. (Exception: if the Program itself is interactive but
111 | does not normally print such an announcement, your work based on
112 | the Program is not required to print an announcement.)
113 |
114 | These requirements apply to the modified work as a whole. If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works. But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 |
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 |
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 |
134 | 3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 |
138 | a) Accompany it with the complete corresponding machine-readable
139 | source code, which must be distributed under the terms of Sections
140 | 1 and 2 above on a medium customarily used for software interchange; or,
141 |
142 | b) Accompany it with a written offer, valid for at least three
143 | years, to give any third party, for a charge no more than your
144 | cost of physically performing source distribution, a complete
145 | machine-readable copy of the corresponding source code, to be
146 | distributed under the terms of Sections 1 and 2 above on a medium
147 | customarily used for software interchange; or,
148 |
149 | c) Accompany it with the information you received as to the offer
150 | to distribute corresponding source code. (This alternative is
151 | allowed only for noncommercial distribution and only if you
152 | received the program in object code or executable form with such
153 | an offer, in accord with Subsection b above.)
154 |
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it. For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable. However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 |
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 |
172 | 4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License. Any attempt
174 | otherwise to copy, modify, sublicense, or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 |
180 | 5. You are not required to accept this License, since you have not
181 | signed it. However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works. These actions are
183 | prohibited by law if you do not accept this License. Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 |
189 | 6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions. You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 |
197 | 7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License. If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all. For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 |
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 |
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices. Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 |
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 |
229 | 8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded. In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 |
237 | 9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time. Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 |
242 | Each version is given a distinguishing version number. If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation. If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free
248 | Software Foundation.
249 |
250 | 10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission. For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this. Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 |
258 | NO WARRANTY
259 |
260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 |
270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 |
280 | END OF TERMS AND CONDITIONS
281 |
282 | How to Apply These Terms to Your New Programs
283 |
284 | If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 |
288 | To do so, attach the following notices to the program. It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 |
293 |
294 | Copyright (C)
295 |
296 | This program is free software; you can redistribute it and/or modify
297 | it under the terms of the GNU General Public License as published by
298 | the Free Software Foundation; either version 2 of the License, or
299 | (at your option) any later version.
300 |
301 | This program is distributed in the hope that it will be useful,
302 | but WITHOUT ANY WARRANTY; without even the implied warranty of
303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304 | GNU General Public License for more details.
305 |
306 | You should have received a copy of the GNU General Public License along
307 | with this program; if not, write to the Free Software Foundation, Inc.,
308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
309 |
310 | Also add information on how to contact you by electronic and paper mail.
311 |
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 |
315 | Gnomovision version 69, Copyright (C) year name of author
316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 | This is free software, and you are welcome to redistribute it
318 | under certain conditions; type `show c' for details.
319 |
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License. Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 |
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary. Here is a sample; alter the names:
328 |
329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 | `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 |
332 | , 1 April 1989
333 | Ty Coon, President of Vice
334 |
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs. If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library. If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
--------------------------------------------------------------------------------
/gpq_downloader/dialog.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | import requests
4 |
5 | from qgis.PyQt.QtWidgets import (
6 | QMessageBox,
7 | QDialog,
8 | QVBoxLayout,
9 | QHBoxLayout,
10 | QLabel,
11 | QLineEdit,
12 | QPushButton,
13 | QComboBox,
14 | QProgressDialog,
15 | QRadioButton,
16 | QStackedWidget,
17 | QWidget,
18 | QCheckBox,
19 | )
20 | from qgis.PyQt.QtCore import pyqtSignal, Qt, QThread
21 | from qgis.core import QgsSettings
22 | import os
23 | from .utils import ValidationWorker
24 |
25 |
26 | class DataSourceDialog(QDialog):
27 | validation_complete = pyqtSignal(bool, str, dict)
28 |
29 | def __init__(self, parent=None, iface=None):
30 | super().__init__(parent)
31 | self.iface = iface
32 | self.validation_thread = None
33 | self.validation_worker = None
34 | self.progress_message = None
35 | self.requires_validation = True
36 | self.setWindowTitle("GeoParquet Data Source")
37 | self.setMinimumWidth(500)
38 |
39 |
40 | base_path = os.path.dirname(os.path.abspath(__file__))
41 | presets_path = os.path.join(base_path, "data", "presets.json")
42 | with open(presets_path, "r") as f:
43 | self.PRESET_DATASETS = json.load(f)
44 |
45 | # Create main layout
46 | layout = QVBoxLayout()
47 |
48 | # Create horizontal layout for radio buttons
49 | radio_layout = QHBoxLayout()
50 |
51 | # Create radio buttons
52 | self.overture_radio = QRadioButton("Overture Maps")
53 | self.sourcecoop_radio = QRadioButton("Source Cooperative")
54 | self.osm_radio = QRadioButton("OpenStreetMap")
55 | self.custom_radio = QRadioButton("Custom URL")
56 |
57 | # Add radio buttons to horizontal layout
58 | radio_layout.addWidget(self.overture_radio)
59 | radio_layout.addWidget(self.sourcecoop_radio)
60 | radio_layout.addWidget(self.osm_radio)
61 | radio_layout.addWidget(self.custom_radio)
62 |
63 | # Connect to save state
64 | self.overture_radio.released.connect(self.save_radio_button_state)
65 | self.sourcecoop_radio.released.connect(self.save_radio_button_state)
66 | self.osm_radio.released.connect(self.save_radio_button_state)
67 | self.custom_radio.released.connect(self.save_radio_button_state)
68 |
69 | # Add radio button layout to main layout
70 | layout.addLayout(radio_layout)
71 |
72 | # Add some spacing between radio buttons and content
73 | layout.addSpacing(10)
74 |
75 | # Create and setup the stacked widget for different options
76 | self.stack = QStackedWidget()
77 |
78 | # Custom URL page
79 | custom_page = QWidget()
80 | custom_layout = QVBoxLayout()
81 | self.url_input = QLineEdit()
82 | self.url_input.setPlaceholderText(
83 | "Enter URL to Parquet file or folder (s3:// or https://)"
84 | )
85 | custom_layout.addWidget(self.url_input)
86 | custom_page.setLayout(custom_layout)
87 |
88 | # Overture Maps page
89 | overture_page = QWidget()
90 | overture_layout = QVBoxLayout()
91 |
92 | # Create horizontal layout for main checkboxes
93 | checkbox_layout = QHBoxLayout()
94 |
95 | # Create a widget to hold checkboxes
96 | self.overture_checkboxes = {}
97 | for key in self.PRESET_DATASETS['overture'].keys():
98 | if key != 'base': # Handle base separately
99 | checkbox = QCheckBox(key.title())
100 | self.overture_checkboxes[key] = checkbox
101 | checkbox_layout.addWidget(checkbox)
102 |
103 | # Add the horizontal checkbox layout to main layout
104 | overture_layout.addLayout(checkbox_layout)
105 |
106 | # Add base layer section
107 | base_group = QWidget()
108 | base_layout = QVBoxLayout()
109 | base_layout.setContentsMargins(0, 10, 0, 0) # Add some top margin
110 |
111 | self.base_checkbox = QCheckBox("Base")
112 | self.overture_checkboxes['base'] = self.base_checkbox
113 | base_layout.addWidget(self.base_checkbox)
114 |
115 | # Add base subtype checkboxes
116 | self.base_subtype_widget = QWidget()
117 | base_subtype_layout = QHBoxLayout() # Horizontal layout for subtypes
118 | base_subtype_layout.setContentsMargins(20, 0, 0, 0) # Add left margin for indentation
119 |
120 | # Replace combo box with checkboxes
121 | self.base_subtype_checkboxes = {}
122 | subtype_display_names = {
123 | 'infrastructure': 'Infrastructure',
124 | 'land': 'Land',
125 | 'land_cover': 'Land Cover',
126 | 'land_use': 'Land Use',
127 | 'water': 'Water',
128 | 'bathymetry': 'Bathymetry'
129 | }
130 |
131 | for subtype in self.PRESET_DATASETS['overture']['base']['subtypes']:
132 | checkbox = QCheckBox(subtype_display_names[subtype])
133 | self.base_subtype_checkboxes[subtype] = checkbox
134 | base_subtype_layout.addWidget(checkbox)
135 |
136 | self.base_subtype_widget.setLayout(base_subtype_layout)
137 | self.base_subtype_widget.hide()
138 |
139 | base_layout.addWidget(self.base_subtype_widget)
140 | base_group.setLayout(base_layout)
141 | overture_layout.addWidget(base_group)
142 |
143 | # Connect base checkbox to show/hide subtype checkboxes and resize dialog
144 | self.base_checkbox.toggled.connect(self.base_subtype_widget.setVisible)
145 | self.base_checkbox.toggled.connect(lambda checked: self.adjust_dialog_width(checked, 100))
146 |
147 |
148 | overture_page.setLayout(overture_layout)
149 |
150 | # Source Cooperative page
151 | sourcecoop_page = QWidget()
152 | sourcecoop_layout = QVBoxLayout()
153 | self.sourcecoop_combo = QComboBox()
154 | self.sourcecoop_combo.addItems(
155 | sorted([
156 | dataset["display_name"]
157 | for dataset in self.PRESET_DATASETS["source_cooperative"].values()
158 | ], key=str.lower)
159 | )
160 | sourcecoop_layout.addWidget(self.sourcecoop_combo)
161 |
162 | # Add link label
163 | self.sourcecoop_link = QLabel()
164 | self.sourcecoop_link.setOpenExternalLinks(True)
165 | self.sourcecoop_link.setWordWrap(True)
166 | sourcecoop_layout.addWidget(self.sourcecoop_link)
167 |
168 | # Connect combo box change to update link
169 | self.sourcecoop_combo.currentTextChanged.connect(self.update_sourcecoop_link)
170 | sourcecoop_page.setLayout(sourcecoop_layout)
171 |
172 | # OpenStreetMap page
173 | osm_page = QWidget()
174 | osm_layout = QVBoxLayout()
175 |
176 | # Create horizontal layout for checkboxes
177 | osm_checkbox_layout = QHBoxLayout()
178 |
179 | # Create checkboxes for OSM datasets
180 | self.osm_checkboxes = {}
181 | for key in self.PRESET_DATASETS['openstreetmap'].keys():
182 | checkbox = QCheckBox(key.title())
183 | self.osm_checkboxes[key] = checkbox
184 | osm_checkbox_layout.addWidget(checkbox)
185 |
186 | # Add the horizontal checkbox layout to main layout
187 | osm_layout.addLayout(osm_checkbox_layout)
188 |
189 | # Add link label for LayerCake info
190 | self.osm_link = QLabel()
191 | self.osm_link.setText(
192 | 'Data from LayerCake GeoParquet files'
193 | )
194 | self.osm_link.setOpenExternalLinks(True)
195 | self.osm_link.setWordWrap(True)
196 | osm_layout.addWidget(self.osm_link)
197 |
198 | osm_page.setLayout(osm_layout)
199 |
200 | # Add pages to stack
201 | self.stack.addWidget(custom_page)
202 | self.stack.addWidget(overture_page)
203 | self.stack.addWidget(sourcecoop_page)
204 | self.stack.addWidget(osm_page)
205 |
206 | layout.addWidget(self.stack)
207 |
208 | # Buttons
209 | button_layout = QHBoxLayout()
210 | self.ok_button = QPushButton("OK")
211 | self.cancel_button = QPushButton("Cancel")
212 | button_layout.addWidget(self.ok_button)
213 | button_layout.addWidget(self.cancel_button)
214 | layout.addLayout(button_layout)
215 |
216 | self.setLayout(layout)
217 |
218 | # Connect signals
219 | self.custom_radio.toggled.connect(lambda: self.stack.setCurrentIndex(0))
220 | self.overture_radio.toggled.connect(lambda: self.stack.setCurrentIndex(1))
221 | self.sourcecoop_radio.toggled.connect(lambda: self.stack.setCurrentIndex(2))
222 | self.osm_radio.toggled.connect(lambda: self.stack.setCurrentIndex(3))
223 | self.ok_button.clicked.connect(self.validate_and_accept)
224 | self.cancel_button.clicked.connect(self.reject)
225 |
226 | # Add after setting up the sourcecoop_combo
227 | self.update_sourcecoop_link(self.sourcecoop_combo.currentText())
228 |
229 | # Load checkbox states during initialization
230 | self.load_checkbox_states()
231 |
232 | # Connect each checkbox to save its state when toggled
233 | for checkbox in self.overture_checkboxes.values():
234 | checkbox.toggled.connect(self.save_checkbox_states)
235 | for checkbox in self.base_subtype_checkboxes.values():
236 | checkbox.toggled.connect(self.save_checkbox_states)
237 | for checkbox in self.osm_checkboxes.values():
238 | checkbox.toggled.connect(self.save_checkbox_states)
239 |
240 | # Ensure to call save_checkbox_states when the dialog is accepted
241 | self.ok_button.clicked.connect(self.save_checkbox_states)
242 |
243 | def save_radio_button_state(self) -> None:
244 | if self.custom_radio.isChecked():
245 | button_name = self.custom_radio.text()
246 | elif self.overture_radio.isChecked():
247 | button_name = self.overture_radio.text()
248 | elif self.sourcecoop_radio.isChecked():
249 | button_name = self.sourcecoop_radio.text()
250 | elif self.osm_radio.isChecked():
251 | button_name = self.osm_radio.text()
252 | else:
253 | button_name = self.custom_radio.text()
254 |
255 | QgsSettings().setValue(
256 | "gpq_downloader/radio_selection",
257 | button_name,
258 | section=QgsSettings.Plugins,
259 | )
260 |
261 | def handle_overture_selection(self, text):
262 | """Show/hide base subtype combo based on selection"""
263 | self.base_subtype_widget.setVisible(text == "Base")
264 |
265 | def validate_and_accept(self):
266 | """Validate the input and accept the dialog if valid"""
267 | urls = self.get_urls()
268 | if not urls:
269 | QMessageBox.warning(self, "Validation Error", "Please select at least one dataset")
270 | return
271 |
272 | # For Overture and OSM datasets, we know they're valid so we can skip validation
273 | if self.overture_radio.isChecked() or self.osm_radio.isChecked():
274 | self.accept()
275 | return
276 |
277 | # For custom URLs, do validation
278 | if self.custom_radio.isChecked():
279 | for url in urls:
280 | if not (url.startswith('http://') or url.startswith('https://') or
281 | url.startswith('s3://') or url.startswith('file://') or url.startswith('hf://')):
282 | QMessageBox.warning(self, "Validation Error",
283 | "URL must start with http://, https://, s3://, hf://, or file://")
284 | return
285 |
286 | # Create progress dialog for validation
287 | self.progress_dialog = QProgressDialog("Validating URL...", "Cancel", 0, 0, self)
288 | self.progress_dialog.setWindowModality(Qt.WindowModality.WindowModal)
289 | self.progress_dialog.canceled.connect(self.cancel_validation)
290 |
291 | # Create validation worker
292 | self.validation_worker = ValidationWorker(url, self.iface, self.iface.mapCanvas().extent())
293 | self.validation_thread = QThread()
294 | self.validation_worker.moveToThread(self.validation_thread)
295 |
296 | # Connect signals
297 | self.validation_thread.started.connect(self.validation_worker.run)
298 | self.validation_worker.progress.connect(self.progress_dialog.setLabelText)
299 | self.validation_worker.finished.connect(
300 | lambda success, message, results: self.handle_validation_result(
301 | success, message, results
302 | )
303 | )
304 | self.validation_worker.needs_bbox_warning.connect(self.show_bbox_warning)
305 |
306 | # Start validation
307 | self.validation_thread.start()
308 | self.progress_dialog.exec()
309 | return
310 |
311 | # For other preset sources, we can skip validation
312 | self.accept()
313 |
314 | def handle_validation_result(self, success, message, validation_results):
315 | """Handle validation result in the dialog"""
316 | self.cleanup_validation()
317 |
318 | if success:
319 | self.validation_complete.emit(True, message, validation_results)
320 | self.accept()
321 | else:
322 | QMessageBox.warning(self, "Validation Error", message)
323 | self.validation_complete.emit(False, message, validation_results)
324 |
325 | def cancel_validation(self):
326 | """Handle validation cancellation"""
327 | if self.validation_worker:
328 | self.validation_worker.killed = True
329 | self.cleanup_validation()
330 |
331 | def cleanup_validation(self):
332 | """Clean up validation resources"""
333 | if hasattr(self, 'progress_dialog') and self.progress_dialog:
334 | self.progress_dialog.close()
335 | self.progress_dialog = None
336 |
337 | if self.validation_worker:
338 | self.validation_worker.deleteLater()
339 | self.validation_worker = None
340 |
341 | if self.validation_thread:
342 | self.validation_thread.quit()
343 | self.validation_thread.wait()
344 | self.validation_thread.deleteLater()
345 | self.validation_thread = None
346 |
347 | def closeEvent(self, event):
348 | """Handle dialog closing"""
349 | self.cleanup_validation()
350 | super().closeEvent(event)
351 |
352 | def get_urls(self):
353 | """Returns a list of URLs for selected datasets"""
354 | urls = []
355 | if self.custom_radio.isChecked():
356 | return [self.url_input.text().strip()]
357 | elif self.overture_radio.isChecked():
358 | latest_release = requests.get('https://labs.overturemaps.org/data/releases.json').json()['latest']
359 |
360 | for theme, checkbox in self.overture_checkboxes.items():
361 | if checkbox.isChecked():
362 | dataset = self.PRESET_DATASETS['overture'][theme]
363 | if theme == "transportation":
364 | type_str = "segment"
365 | elif theme == "divisions":
366 | type_str = "division_area"
367 | elif theme == "addresses":
368 | type_str = "*"
369 | elif theme == "base":
370 | # Handle multiple base subtypes
371 | for subtype, subtype_checkbox in self.base_subtype_checkboxes.items():
372 | if subtype_checkbox.isChecked():
373 | urls.append(dataset['url_template'].format(subtype=subtype, release=latest_release))
374 | continue # Skip the normal URL append for base
375 | else:
376 | type_str = theme.rstrip('s') # remove trailing 's' for singular form
377 | urls.append(dataset['url_template'].format(subtype=type_str, release=latest_release))
378 | elif self.sourcecoop_radio.isChecked():
379 | selection = self.sourcecoop_combo.currentText()
380 | dataset = next((dataset for dataset in self.PRESET_DATASETS['source_cooperative'].values()
381 | if dataset['display_name'] == selection), None)
382 | return [dataset['url']] if dataset else []
383 | elif self.osm_radio.isChecked():
384 | for layer, checkbox in self.osm_checkboxes.items():
385 | if checkbox.isChecked():
386 | dataset = self.PRESET_DATASETS['openstreetmap'][layer]
387 | urls.append(dataset['url'])
388 | return urls
389 |
390 | def update_sourcecoop_link(self, selection):
391 | """Update the link based on the selected dataset"""
392 | # Find the dataset by display_name
393 | dataset = next((dataset for dataset in self.PRESET_DATASETS['source_cooperative'].values()
394 | if dataset['display_name'] == selection), None)
395 | if dataset and 'info_url' in dataset:
396 | self.sourcecoop_link.setText(
397 | f'View dataset info'
398 | )
399 | else:
400 | self.sourcecoop_link.setText("")
401 |
402 |
403 | def show_bbox_warning(self):
404 | """Show bbox warning dialog in main thread"""
405 | # Close the progress dialog if it exists
406 | if hasattr(self, "progress_dialog") and self.progress_dialog:
407 | self.progress_dialog.close()
408 | self.progress_dialog = None
409 |
410 | reply = QMessageBox.warning(
411 | self,
412 | "No bbox Column Detected",
413 | "This dataset doesn't have a bbox column, which means downloads will be slower. "
414 | "GeoParquet 1.1 files with a bbox column work much better - tell your data provider to upgrade!\n\n"
415 | "Do you want to continue with the download?",
416 | QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No,
417 | QMessageBox.StandardButton.No,
418 | )
419 |
420 | validation_results = {"has_bbox": False, "schema": None, "bbox_column": None, "geometry_column": "geometry"}
421 | if reply == QMessageBox.StandardButton.No:
422 | self.validation_complete.emit(
423 | False, "Download cancelled by user.", validation_results
424 | )
425 | else:
426 | # Accept the dialog when user clicks Yes
427 | self.validation_complete.emit(
428 | True, "Validation successful", validation_results
429 | )
430 | self.accept()
431 |
432 | def adjust_dialog_width(self, checked, width):
433 | """Adjust the dialog width based on the base checkbox state."""
434 | if checked:
435 | self.resize(self.width() + width, self.height())
436 | else:
437 | self.resize(self.width() - width, self.height())
438 |
439 | def save_checkbox_states(self) -> None:
440 | # Save main checkboxes
441 | for key, checkbox in self.overture_checkboxes.items():
442 | QgsSettings().setValue(
443 | f"gpq_downloader/checkbox_{key}",
444 | checkbox.isChecked(),
445 | section=QgsSettings.Plugins,
446 | )
447 |
448 | # Save base subtype checkboxes
449 | for key, checkbox in self.base_subtype_checkboxes.items():
450 | QgsSettings().setValue(
451 | f"gpq_downloader/base_subtype_checkbox_{key}",
452 | checkbox.isChecked(),
453 | section=QgsSettings.Plugins,
454 | )
455 |
456 | # Save OSM checkboxes
457 | for key, checkbox in self.osm_checkboxes.items():
458 | QgsSettings().setValue(
459 | f"gpq_downloader/osm_checkbox_{key}",
460 | checkbox.isChecked(),
461 | section=QgsSettings.Plugins,
462 | )
463 |
464 | def load_checkbox_states(self) -> None:
465 | # Load main checkboxes
466 | for key, checkbox in self.overture_checkboxes.items():
467 | checked = QgsSettings().value(
468 | f"gpq_downloader/checkbox_{key}",
469 | False,
470 | type=bool,
471 | section=QgsSettings.Plugins,
472 | )
473 | checkbox.setChecked(checked)
474 |
475 | # Load base subtype checkboxes
476 | for key, checkbox in self.base_subtype_checkboxes.items():
477 | checked = QgsSettings().value(
478 | f"gpq_downloader/base_subtype_checkbox_{key}",
479 | False,
480 | type=bool,
481 | section=QgsSettings.Plugins,
482 | )
483 | checkbox.setChecked(checked)
484 |
485 | # Load OSM checkboxes
486 | for key, checkbox in self.osm_checkboxes.items():
487 | checked = QgsSettings().value(
488 | f"gpq_downloader/osm_checkbox_{key}",
489 | False,
490 | type=bool,
491 | section=QgsSettings.Plugins,
492 | )
493 | checkbox.setChecked(checked)
494 |
495 | # Update base subtype widget visibility based on base checkbox state
496 | self.base_subtype_widget.setVisible(self.base_checkbox.isChecked())
497 |
498 | def on_validation_finished(self, success, message, results):
499 | # This method should handle the validation results
500 | # Check how it's setting validation_results
501 | pass
502 |
--------------------------------------------------------------------------------
/gpq_downloader/plugin.py:
--------------------------------------------------------------------------------
1 | from qgis.PyQt.QtWidgets import (
2 | QAction,
3 | QFileDialog,
4 | QMessageBox,
5 | QDialog,
6 | QVBoxLayout,
7 | QHBoxLayout,
8 | QLabel,
9 | QPushButton,
10 | QComboBox,
11 | QProgressDialog,
12 | QCheckBox,
13 | QWidget,
14 | QLineEdit,
15 | )
16 | from qgis.PyQt.QtGui import QIcon
17 | from qgis.PyQt.QtCore import Qt, QThread
18 | from qgis.core import QgsProject, QgsVectorLayer, QgsSettings
19 | import os
20 | import datetime
21 | from pathlib import Path
22 |
23 | from .dialog import DataSourceDialog
24 | from .utils import Worker
25 |
26 |
27 | class QgisPluginGeoParquet:
28 | def __init__(self, iface):
29 | self.iface = iface
30 | self.worker = None
31 | self.worker_thread = None
32 | self.action = None
33 | self.output_file = None
34 | # Create a default downloads directory in user's home directory
35 | self.download_dir = Path.home() / "Downloads"
36 | # Create the directory if it doesn't exist
37 | self.download_dir.mkdir(parents=True, exist_ok=True)
38 |
39 | def initGui(self):
40 | # Create the action with the icon and tooltip
41 | base_path = os.path.dirname(os.path.abspath(__file__))
42 | icon_path = os.path.join(base_path, "icons", "parquet-download.svg")
43 | self.action = QAction(
44 | QIcon(icon_path), "Download GeoParquet Data", self.iface.mainWindow()
45 | )
46 | self.action.setToolTip("Download GeoParquet Data")
47 | self.action.triggered.connect(self.run)
48 |
49 | # Add the actions to the toolbar
50 | self.iface.addToolBarIcon(self.action)
51 |
52 | def unload(self):
53 | # Clean up worker and thread when plugin is unloaded
54 | if self.worker_thread and self.worker_thread.isRunning():
55 | QMessageBox.warning(
56 | self.iface.mainWindow(),
57 | "Download in Progress",
58 | "Please wait for any downloads to complete before unloading the plugin."
59 | )
60 | return
61 | self.cleanup_thread()
62 | # Remove all actions from the toolbar
63 | self.iface.removeToolBarIcon(self.action)
64 |
65 | def run(self, default_source=None):
66 | # Check if a worker is already running
67 | if self.worker is not None and self.worker_thread is not None and self.worker_thread.isRunning():
68 | QMessageBox.warning(
69 | self.iface.mainWindow(),
70 | "Download in Progress",
71 | "A download is already in progress. Please wait for it to complete before starting a new download."
72 | )
73 | return
74 |
75 | # Reset any existing worker
76 | self.worker = None
77 | self.worker_thread = None
78 |
79 | dialog = DataSourceDialog(self.iface.mainWindow(), self.iface)
80 |
81 | selected_name = QgsSettings().value("gpq_downloader/radio_selection", section=QgsSettings.Plugins)
82 | for button in [dialog.overture_radio, dialog.sourcecoop_radio, dialog.osm_radio, dialog.custom_radio]:
83 | if button.text() == selected_name:
84 | button.setChecked(True)
85 | if not selected_name:
86 | dialog.overture_radio.setChecked(True)
87 |
88 | if dialog.exec() == QDialog.DialogCode.Accepted:
89 | # Get the selected URLs from the dialog
90 | urls = dialog.get_urls()
91 | extent = self.iface.mapCanvas().extent()
92 |
93 | # First, collect all file locations from user
94 | download_queue = []
95 | for url in urls:
96 | # Get current date for filename
97 | current_date = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
98 |
99 | # Generate filename based on the URL and source type
100 | if dialog.overture_radio.isChecked():
101 | # Extract theme from URL
102 | theme = url.split('theme=')[1].split('/')[0]
103 | if 'type=' in url:
104 | type_str = url.split('type=')[1].split('/')[0]
105 | if theme == 'base':
106 | filename = f"overture_base_{type_str}_{current_date}.parquet"
107 | else:
108 | filename = f"overture_{theme}_{current_date}.parquet"
109 | else:
110 | filename = f"overture_{theme}_{current_date}.parquet"
111 | elif dialog.sourcecoop_radio.isChecked():
112 | dataset_name = dialog.sourcecoop_combo.currentText()
113 | clean_name = dataset_name.lower().replace(' ', '_').replace('/', '_').replace('(', '').replace(')', '')
114 | filename = f"sourcecoop_{clean_name}_{current_date}.parquet"
115 | elif dialog.osm_radio.isChecked():
116 | # Extract layer name from URL
117 | layer_name = url.split('/')[-1].replace('.parquet', '')
118 | filename = f"osm_{layer_name}_{current_date}.parquet"
119 | else:
120 | filename = f"custom_download_{current_date}.parquet"
121 |
122 | default_save_path = str(self.download_dir / filename)
123 |
124 | # Show save file dialog
125 | output_file, selected_filter = QFileDialog.getSaveFileName(
126 | self.iface.mainWindow(),
127 | f"Save Data for {theme if dialog.overture_radio.isChecked() else 'dataset'}",
128 | default_save_path,
129 | "GeoParquet (*.parquet);;DuckDB Database (*.duckdb);;GeoPackage (*.gpkg);;FlatGeobuf (*.fgb);;GeoJSON (*.geojson)"
130 | )
131 |
132 | if output_file:
133 | download_queue.append((url, output_file))
134 | else:
135 | return
136 |
137 | # Now process downloads one at a time
138 | self.process_download_queue(download_queue, extent)
139 |
140 | def handle_validation_complete(
141 | self, success, message, validation_results, url, extent, dialog
142 | ):
143 | """Handle validation completion and start download if successful."""
144 | if success:
145 | # Get current date for filename
146 | current_date = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
147 |
148 | # Generate the default filename based on dialog selection
149 | if dialog.overture_radio.isChecked():
150 | theme = dialog.overture_combo.currentText().lower()
151 | if theme == "base":
152 | subtype = dialog.base_subtype_combo.currentText()
153 | filename = f"overture_base_{subtype}_{current_date}.parquet"
154 | else:
155 | filename = f"overture_{theme}_{current_date}.parquet"
156 |
157 | elif dialog.sourcecoop_radio.isChecked():
158 | selection = dialog.sourcecoop_combo.currentText()
159 | # Convert display name to safe filename format
160 | safe_name = selection.lower().replace(" ", "_").replace("/", "_")
161 | filename = f"sourcecoop_{safe_name}_{current_date}.parquet"
162 |
163 | else: # custom URL
164 | filename = f"custom_download_{current_date}.parquet"
165 |
166 | default_save_path = str(self.download_dir / filename)
167 |
168 | # Show save file dialog
169 | output_file, selected_filter = QFileDialog.getSaveFileName(
170 | self.iface.mainWindow(),
171 | "Save Data",
172 | default_save_path,
173 | "GeoParquet (*.parquet);;DuckDB Database (*.duckdb);;GeoPackage (*.gpkg);;FlatGeobuf (*.fgb);;GeoJSON (*.geojson)",
174 | )
175 |
176 | if output_file:
177 | self.output_file = output_file
178 | self.download_and_save(url, extent, output_file, validation_results)
179 | else:
180 | QMessageBox.warning(self.iface.mainWindow(), "Validation Error", message)
181 |
182 | def download_and_save(self, dataset_url, extent, output_file, validation_results):
183 | # Ensure we start with a fresh worker
184 | self.cleanup_thread()
185 |
186 | # Create progress dialog
187 | self.progress_dialog = self.create_progress_dialog("Downloading Data")
188 |
189 | # Create worker with validation results
190 | self.worker, self.worker_thread = self.setup_worker(
191 | dataset_url, extent, output_file, validation_results
192 | )
193 |
194 | # Show the progress dialog and start the thread
195 | self.progress_dialog.show()
196 | self.worker_thread.start()
197 |
198 | def handle_error(self, message):
199 | self.progress_dialog.close()
200 | QMessageBox.critical(self.iface.mainWindow(), "Error", message)
201 |
202 | def update_progress(self, message):
203 | if hasattr(self, "progress_dialog"):
204 | self.progress_dialog.setLabelText(message)
205 |
206 | def cancel_download(self):
207 | if self.worker:
208 | self.worker.kill()
209 | self.cleanup_thread()
210 |
211 | def cleanup_thread(self):
212 | if self.worker_thread is not None:
213 | if self.worker:
214 | self.worker.kill()
215 | self.worker_thread.quit()
216 | self.worker_thread.wait()
217 | self.worker_thread = None
218 | self.worker = None
219 | if hasattr(self, "progress_dialog"):
220 | self.progress_dialog.close()
221 |
222 | def load_layer(self, output_file):
223 | """Load the layer into QGIS if GeoParquet is supported"""
224 | if output_file.lower().endswith(".parquet"):
225 | # Try to create a test layer to check GeoParquet support
226 | test_layer = QgsVectorLayer(output_file, "test", "ogr")
227 | if not test_layer.isValid():
228 | dialog = QDialog(self.iface.mainWindow())
229 | dialog.setWindowTitle("GeoParquet Support Not Available")
230 | dialog.setMinimumWidth(400)
231 |
232 | layout = QVBoxLayout()
233 |
234 | message = QLabel(
235 | "Data has been successfully saved to GeoParquet file.\n\n"
236 | "Note: Your current QGIS installation does not support reading GeoParquet files directly. You can select GeoPackage for your output format to view immediately.\n\n"
237 | "To view GeoParquet files in QGIS, you'll need to install QGIS with GDAL 3.8 "
238 | "or higher with 'libgdal-arrow-parquet'. You can find instructions at:"
239 | )
240 | message.setWordWrap(True)
241 | layout.addWidget(message)
242 |
243 | link = QLabel()
244 | link.setText(
245 | 'Installing GeoParquet Support in QGIS'
246 | )
247 | link.setOpenExternalLinks(True)
248 | layout.addWidget(link)
249 |
250 | button_box = QPushButton("OK")
251 | button_box.clicked.connect(dialog.accept)
252 | layout.addWidget(button_box)
253 |
254 | dialog.setLayout(layout)
255 | dialog.exec()
256 | return
257 |
258 | layer_name = Path(output_file).stem # Get filename without extension
259 | # Create the layer
260 | layer = QgsVectorLayer(output_file, layer_name, "ogr")
261 | if not layer.isValid():
262 | QMessageBox.critical(
263 | self.iface.mainWindow(),
264 | "Error",
265 | f"Failed to load the layer from {output_file}",
266 | )
267 | return
268 | # Add the layer to the QGIS project
269 | QgsProject.instance().addMapLayer(layer)
270 |
271 | def show_info(self, message):
272 | """Show an information message to the user"""
273 | QMessageBox.information(self.iface.mainWindow(), "Success", message)
274 |
275 | def handle_large_file_warning(self, estimated_size):
276 | """Handle warning about large GeoJSON file size with a more streamlined UI"""
277 | if not hasattr(self, 'worker') or self.worker is None:
278 | QMessageBox.critical(self.iface.mainWindow(), "Error", "Download session lost. Please try again.")
279 | return
280 |
281 | worker_info = {
282 | 'dataset_url': self.worker.dataset_url,
283 | 'extent': self.worker.extent,
284 | 'iface': self.worker.iface,
285 | 'validation_results': self.worker.validation_results,
286 | 'output_file': self.worker.output_file,
287 | 'size_warning_accepted': False,
288 | 'remaining_queue': getattr(self.worker, 'remaining_queue', [])
289 | }
290 |
291 | if hasattr(self, 'progress_dialog') and self.progress_dialog:
292 | self.progress_dialog.close()
293 |
294 | dialog = QDialog(self.iface.mainWindow())
295 | dialog.setWindowTitle("Large File Warning")
296 | dialog.setMinimumWidth(400)
297 | layout = QVBoxLayout()
298 |
299 | if estimated_size >= 1024:
300 | size_str = f"{estimated_size/1024:.2f} GB"
301 | else:
302 | size_str = f"{estimated_size:.0f} MB"
303 |
304 | msg = QLabel(
305 | f"The estimated file size is {size_str}. Large GeoJSON files can be slow to process and load.\n\n"
306 | )
307 | msg.setWordWrap(True)
308 | layout.addWidget(msg)
309 |
310 | format_group = QVBoxLayout()
311 | recommended_label = QLabel("Alternative formats (recommended for large datasets):")
312 | format_group.addWidget(recommended_label)
313 |
314 | format_row = QHBoxLayout()
315 |
316 | format_combo = QComboBox()
317 | format_combo.addItems([
318 | "FlatGeobuf (*.fgb)",
319 | "GeoPackage (*.gpkg)",
320 | "GeoParquet (*.parquet)"
321 | ])
322 | format_row.addWidget(format_combo)
323 |
324 | save_button = QPushButton("Save As...")
325 | format_row.addWidget(save_button)
326 |
327 | format_group.addLayout(format_row)
328 | layout.addLayout(format_group)
329 |
330 | button_box = QHBoxLayout()
331 | proceed_button = QPushButton("Proceed with GeoJSON anyway")
332 | cancel_button = QPushButton("Cancel")
333 | button_box.addWidget(proceed_button)
334 | button_box.addWidget(cancel_button)
335 | layout.addLayout(button_box)
336 |
337 | dialog.setLayout(layout)
338 |
339 | cancel_button.clicked.connect(dialog.reject)
340 | save_button.clicked.connect(lambda: dialog.done(1))
341 | proceed_button.clicked.connect(lambda: dialog.done(2))
342 |
343 | while True:
344 | result = dialog.exec()
345 | if result == 1:
346 | selected_format = format_combo.currentText()
347 | extension = selected_format.split("*")[1].rstrip(")")
348 |
349 | new_output_file = os.path.splitext(worker_info['output_file'])[0] + extension
350 |
351 | output_file, _ = QFileDialog.getSaveFileName(
352 | self.iface.mainWindow(),
353 | "Save Data",
354 | new_output_file,
355 | selected_format
356 | )
357 |
358 | if output_file:
359 | self.progress_dialog = QProgressDialog("Starting download...", "Cancel", 0, 0, self.iface.mainWindow())
360 | self.progress_dialog.setWindowTitle("Downloading Data")
361 | self.progress_dialog.setWindowModality(Qt.WindowModality.NonModal)
362 | self.progress_dialog.setMinimumDuration(0)
363 |
364 | self.output_file = output_file
365 |
366 | self.worker = Worker(
367 | worker_info['dataset_url'],
368 | worker_info['extent'],
369 | output_file,
370 | worker_info['iface'],
371 | worker_info['validation_results']
372 | )
373 | self.worker.remaining_queue = worker_info['remaining_queue']
374 | self.worker_thread = QThread()
375 | self.worker.moveToThread(self.worker_thread)
376 |
377 | self.worker_thread.started.connect(self.worker.run)
378 | self.worker.error.connect(self.handle_error)
379 | self.worker.load_layer.connect(self.load_layer)
380 | self.worker.info.connect(self.show_info)
381 | self.worker.file_size_warning.connect(self.handle_large_file_warning)
382 | self.worker.finished.connect(lambda: self.handle_download_complete(worker_info['remaining_queue'], worker_info['extent']))
383 | self.worker.progress.connect(self.update_progress)
384 | self.progress_dialog.canceled.connect(self.cancel_download)
385 |
386 | self.progress_dialog.show()
387 | self.worker_thread.start()
388 | return
389 | continue
390 |
391 | elif result == 2:
392 | self.progress_dialog = QProgressDialog("Starting download...", "Cancel", 0, 0, self.iface.mainWindow())
393 | self.progress_dialog.setWindowTitle("Downloading Data")
394 | self.progress_dialog.setWindowModality(Qt.WindowModality.NonModal)
395 | self.progress_dialog.setMinimumDuration(0)
396 |
397 | self.worker = Worker(
398 | worker_info['dataset_url'],
399 | worker_info['extent'],
400 | worker_info['output_file'],
401 | worker_info['iface'],
402 | worker_info['validation_results']
403 | )
404 | self.worker.remaining_queue = worker_info['remaining_queue']
405 | self.worker_thread = QThread()
406 | self.worker.moveToThread(self.worker_thread)
407 |
408 | self.worker_thread.started.connect(self.worker.run)
409 | self.worker.error.connect(self.handle_error)
410 | self.worker.load_layer.connect(self.load_layer)
411 | self.worker.info.connect(self.show_info)
412 | self.worker.file_size_warning.connect(self.handle_large_file_warning)
413 | self.worker.finished.connect(lambda: self.handle_download_complete(worker_info['remaining_queue'], worker_info['extent']))
414 | self.worker.progress.connect(self.update_progress)
415 | self.progress_dialog.canceled.connect(self.cancel_download)
416 |
417 | self.worker.size_warning_accepted = True
418 |
419 | self.progress_dialog.show()
420 | self.worker_thread.start()
421 | return
422 |
423 | else:
424 | if worker_info['remaining_queue']:
425 | self.process_download_queue(worker_info['remaining_queue'], worker_info['extent'])
426 | else:
427 | self.cleanup_thread()
428 | return
429 |
430 | def create_progress_dialog(
431 | self, title="Downloading Data", message="Starting download..."
432 | ):
433 | """Create and return a configured progress dialog"""
434 | progress_dialog = QProgressDialog(
435 | message, "Cancel", 0, 0, self.iface.mainWindow()
436 | )
437 | progress_dialog.setWindowTitle(title)
438 | progress_dialog.setWindowModality(Qt.WindowModality.NonModal)
439 | progress_dialog.setMinimumDuration(0)
440 | return progress_dialog
441 |
442 | def setup_worker(self, dataset_url, extent, output_file, validation_results):
443 | """Create and setup a worker thread with all connections"""
444 | self.worker = Worker(
445 | dataset_url, extent, output_file, self.iface, validation_results
446 | )
447 | self.worker_thread = QThread()
448 | self.worker.moveToThread(self.worker_thread)
449 |
450 | # Connect signals
451 | self.worker_thread.started.connect(self.worker.run)
452 | self.worker.error.connect(self.handle_error)
453 | self.worker.load_layer.connect(self.load_layer)
454 | self.worker.info.connect(self.show_info)
455 | self.worker.file_size_warning.connect(self.handle_large_file_warning)
456 | self.worker.finished.connect(self.cleanup_thread)
457 | self.worker.progress.connect(self.update_progress)
458 | self.progress_dialog.canceled.connect(self.cancel_download)
459 |
460 | return self.worker, self.worker_thread
461 |
462 | def process_download_queue(self, download_queue, extent):
463 | """Process downloads sequentially"""
464 | if not download_queue:
465 | return
466 |
467 | # Get the next download
468 | url, output_file = download_queue[0]
469 | remaining_queue = download_queue[1:]
470 |
471 | # Extract layer name from URL for Overture data
472 | layer_name = None
473 | if 'overture' in url:
474 | if 'theme=' in url:
475 | theme = url.split('theme=')[1].split('/')[0]
476 | if theme == 'base':
477 | # For base layers, include the subtype
478 | subtype = url.split('type=')[1].split('/')[0]
479 | layer_name = f"Overture {theme.title()} - {subtype.title()}"
480 | else:
481 | layer_name = f"Overture {theme.title()}"
482 |
483 | # Create validation results (we know Overture URLs are valid)
484 | validation_results = {'has_bbox': True, 'bbox_column': 'bbox', 'geometry_column': 'geometry'}
485 |
486 | # For non-Overture data, try to detect the geometry column name from the URL
487 | if 'overture' not in url:
488 | from . import logger
489 | #logger.log(f"Processing URL: {url}")
490 |
491 | # Try to extract dataset name from URL for better logging
492 | dataset_name = url.split('/')[-1].split('?')[0]
493 | #logger.log(f"Dataset name from URL: {dataset_name}")
494 |
495 | # For specific known datasets, set the geometry column
496 | if 'addresses.nobbox.pq' in url or 'addresses.pq' in url:
497 | #logger.log("Detected addresses dataset, setting geometry column to 'geom'")
498 | validation_results['geometry_column'] = 'geom'
499 |
500 | #logger.log(f"Initial validation_results: {validation_results}")
501 |
502 | # Create progress dialog
503 | self.progress_dialog = QProgressDialog(
504 | "Starting download..." if not layer_name else f"Starting {layer_name} download...",
505 | "Cancel", 0, 0, self.iface.mainWindow()
506 | )
507 | self.progress_dialog.setWindowTitle("Downloading Data")
508 | self.progress_dialog.setWindowModality(Qt.WindowModality.NonModal)
509 | self.progress_dialog.setMinimumDuration(0)
510 |
511 | # Create worker with layer name
512 | self.worker = Worker(url, extent, output_file, self.iface, validation_results, layer_name)
513 | self.worker.remaining_queue = remaining_queue # Store remaining queue in worker
514 | self.worker_thread = QThread()
515 |
516 | # Move worker to thread
517 | self.worker.moveToThread(self.worker_thread)
518 |
519 | # Connect signals
520 | self.worker_thread.started.connect(self.worker.run)
521 | self.worker.error.connect(self.handle_error)
522 | self.worker.load_layer.connect(self.load_layer)
523 | self.worker.info.connect(self.show_info)
524 | self.worker.file_size_warning.connect(self.handle_large_file_warning)
525 | self.worker.finished.connect(lambda: self.handle_download_complete(remaining_queue, extent))
526 | self.worker.progress.connect(self.update_progress)
527 | self.progress_dialog.canceled.connect(self.cancel_download)
528 |
529 | # Show the progress dialog and start the thread
530 | self.progress_dialog.show()
531 | self.worker_thread.start()
532 |
533 | def handle_download_complete(self, remaining_queue, extent):
534 | """Handle completion of a download and start the next one if any"""
535 | self.cleanup_thread()
536 | if remaining_queue:
537 | # Start the next download
538 | self.process_download_queue(remaining_queue, extent)
539 |
540 |
541 | def classFactory(iface):
542 | return QgisPluginGeoParquet(iface)
543 |
--------------------------------------------------------------------------------
/gpq_downloader/utils.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | from qgis.core import QgsCoordinateReferenceSystem, QgsCoordinateTransform, QgsProject
4 | from qgis.PyQt.QtCore import pyqtSignal, QObject
5 | import os
6 | import duckdb
7 |
8 | from . import logger
9 |
10 |
11 | def transform_bbox_to_4326(extent, source_crs):
12 | """
13 | Transform a bounding box to EPSG:4326 (WGS84)
14 |
15 | Args:
16 | extent (QgsRectangle): The input extent to transform
17 | source_crs (QgsCoordinateReferenceSystem): The source CRS of the extent
18 |
19 | Returns:
20 | QgsRectangle: The transformed extent in EPSG:4326, or None if inputs are invalid
21 | """
22 | if extent is None or source_crs is None:
23 | return None
24 |
25 | dest_crs = QgsCoordinateReferenceSystem("EPSG:4326")
26 |
27 | if source_crs != dest_crs:
28 | transform = QgsCoordinateTransform(source_crs, dest_crs, QgsProject.instance())
29 | extent = transform.transformBoundingBox(extent)
30 |
31 | return extent
32 |
33 |
34 | class Worker(QObject):
35 | finished = pyqtSignal()
36 | error = pyqtSignal(str)
37 | load_layer = pyqtSignal(str)
38 | info = pyqtSignal(str)
39 | progress = pyqtSignal(str)
40 | percent = pyqtSignal(int)
41 | file_size_warning = pyqtSignal(float) # Signal for file size warnings (in MB)
42 |
43 | def __init__(self, dataset_url, extent, output_file, iface, validation_results, layer_name=None):
44 | super().__init__()
45 | self.dataset_url = dataset_url
46 | self.extent = extent
47 | self.output_file = output_file
48 | self.iface = iface
49 | #logger.log(f"Worker __init__ received validation_results: {validation_results}")
50 | self.validation_results = validation_results
51 | self.killed = False
52 | self.layer_name = layer_name # Ensure this is included if needed
53 | self.size_warning_accepted = False # Ensure this is False on initialization
54 |
55 | def get_bbox_info_from_metadata(self, conn):
56 | """Read GeoParquet metadata to find bbox column info"""
57 | self.progress.emit("Checking for bbox metadata...")
58 | metadata_query = (
59 | f"SELECT key, value FROM parquet_kv_metadata('{self.dataset_url}')"
60 | )
61 | metadata_results = conn.execute(metadata_query).fetchall()
62 |
63 | for key, value in metadata_results:
64 | if key == b"geo":
65 | try:
66 | decoded_value = value.decode()
67 | #logger.log("\nRaw metadata value:")
68 | #logger.log(decoded_value)
69 |
70 | # Parse JSON using DuckDB's JSON functions
71 | json_query = (
72 | f"SELECT json_parse('{decoded_value}'::VARCHAR) as json"
73 | )
74 | #logger.log("\nExecuting JSON query:")
75 | #logger.log(json_query)
76 |
77 | geo_metadata = conn.execute(json_query).fetchone()[0]
78 | #logger.log("\nParsed metadata:")
79 | #logger.log(geo_metadata)
80 |
81 | if geo_metadata and "covering" in geo_metadata:
82 | #logger.log("\nFound covering:")
83 | #logger.log(geo_metadata["covering"])
84 | if "bbox" in geo_metadata["covering"]:
85 | bbox_info = geo_metadata["covering"]["bbox"]
86 | #logger.log("\nExtracted bbox info:")
87 | #logger.log(bbox_info)
88 | return bbox_info
89 | except Exception as e:
90 | logger.log(f"\nError parsing geo metadata: {str(e)}", 2)
91 | logger.log(f"Exception type: {type(e)}", 2)
92 | import traceback
93 |
94 | logger.log(traceback.format_exc(), 2)
95 | continue
96 | return None
97 |
98 | def run(self):
99 | try:
100 | layer_info = f" for {self.layer_name}" if self.layer_name else ""
101 | self.progress.emit(f"Connecting to database{layer_info}...")
102 | source_crs = self.iface.mapCanvas().mapSettings().destinationCrs()
103 | bbox = transform_bbox_to_4326(self.extent, source_crs)
104 |
105 | # Log validation results dictionary at the beginning of run
106 | #logger.log(f"Full validation_results at start of run: {self.validation_results}")
107 |
108 | conn = None
109 | try:
110 | # Install and load the spatial extension
111 | self.progress.emit(f"Loading spatial extension{layer_info}...")
112 |
113 | if self.output_file.lower().endswith('.duckdb'):
114 | conn = duckdb.connect(self.output_file) # Connect directly to output file
115 | else:
116 | conn = duckdb.connect()
117 |
118 | conn.execute("INSTALL httpfs;")
119 | conn.execute("INSTALL spatial;")
120 | conn.execute("LOAD httpfs;")
121 | conn.execute("LOAD spatial;")
122 |
123 | # Verify spatial extension is loaded by testing a spatial function
124 | try:
125 | conn.execute("SELECT ST_AsText(ST_GeomFromText('POINT(0 0)'))").fetchone()
126 | except Exception as e:
127 | logger.log(f"Failed to verify spatial extension: {e}")
128 | # Force reload
129 | conn.execute("LOAD spatial;")
130 |
131 | # Get schema early as we need it for both column names and bbox check
132 | schema_query = f"DESCRIBE SELECT * FROM read_parquet('{self.dataset_url}')"
133 | schema_result = conn.execute(schema_query).fetchall()
134 | self.validation_results['schema'] = schema_result
135 |
136 | # Log the schema for debugging
137 | #logger.log("Schema in Worker:")
138 | #for row in schema_result:
139 | #logger.log(f"Column: {row[0]}, Type: {row[1]}")
140 |
141 | # If geometry_column is not in validation_results, detect it now
142 | if 'geometry_column' not in self.validation_results:
143 | #logger.log("No geometry_column in validation_results, detecting now")
144 | self.validation_results['geometry_column'] = 'geometry' # Default
145 | geometry_found = False
146 |
147 | for row in schema_result:
148 | col_name = row[0]
149 | col_type = row[1].upper()
150 | #logger.log(f"Checking column {col_name} with type {col_type} for geometry")
151 | if 'GEOMETRY' in col_type or 'GEOGRAPHY' in col_type:
152 | self.validation_results['geometry_column'] = col_name
153 | logger.log(f"Found geometry column by type: {col_name}")
154 | geometry_found = True
155 | break
156 |
157 | if not geometry_found:
158 | # Try a different approach - look for columns
159 | #logger.log("No standard geometry column found, trying alternative detection")
160 | for row in schema_result:
161 | col_name = row[0].lower()
162 | col_name_orig = row[0] # Keep original case
163 | col_type = row[1].upper()
164 |
165 | # Check for common geometry column names
166 | if col_name in ['geometry', 'geom', 'the_geom', 'wkb_geometry']:
167 | self.validation_results['geometry_column'] = col_name_orig
168 | #logger.log(f"Found likely geometry column by name: {col_name_orig}")
169 | geometry_found = True
170 | break
171 | # Also check for BLOB columns with geometry-like names
172 | elif 'BLOB' in col_type and col_name in ['geometry', 'geom', 'the_geom', 'wkb_geometry']:
173 | self.validation_results['geometry_column'] = col_name_orig
174 | logger.log(f"Found WKB BLOB geometry column: {col_name_orig}")
175 | geometry_found = True
176 | break
177 |
178 | #logger.log(f"Final geometry column detection result: {self.validation_results['geometry_column']}")
179 |
180 | table_name = "download_data"
181 |
182 | self.progress.emit(f"Preparing query{layer_info}...")
183 | select_query = "SELECT *"
184 | if not self.output_file.endswith(".parquet"):
185 | # Construct the SELECT clause with array conversion to strings
186 | columns = []
187 | for row in schema_result:
188 | col_name = row[0]
189 | col_type = row[1]
190 |
191 | # Quote the column name to handle special characters
192 | quoted_col_name = f'"{col_name}"'
193 |
194 | if 'STRUCT' in col_type.upper() or 'MAP' in col_type.upper():
195 | columns.append(f"TO_JSON({quoted_col_name}) AS {quoted_col_name}")
196 | elif '[]' in col_type: # Check for array types like VARCHAR[]
197 | columns.append(f"array_to_string({quoted_col_name}, ', ') AS {quoted_col_name}")
198 | elif col_type.upper() == 'UTINYINT':
199 | columns.append(f"CAST({quoted_col_name} AS INTEGER) AS {quoted_col_name}")
200 | elif 'BLOB' in col_type.upper() and col_name == geometry_column:
201 | # For BLOB geometry columns, we'll handle conversion differently
202 | # to avoid spatial function validation issues
203 | columns.append(quoted_col_name)
204 | else:
205 | columns.append(quoted_col_name)
206 |
207 | # Check if this is Overture data and has a names column
208 | has_names_column = any('names' in row[0] for row in schema_result)
209 | if 'overture' in self.dataset_url and has_names_column:
210 | select_query = f'SELECT "names"."primary" as name,{", ".join(columns)}'
211 | else:
212 | select_query = f'SELECT {", ".join(columns)}'
213 |
214 | # First check: Does the schema actually have a bbox column?
215 | has_bbox_in_schema = False
216 | if 'schema' in self.validation_results and self.validation_results['schema']:
217 | for row in self.validation_results['schema']:
218 | if row[0].lower() == 'bbox' and 'struct' in row[1].lower():
219 | has_bbox_in_schema = True
220 | #logger.log("Found actual bbox column in schema")
221 | break
222 |
223 | if not has_bbox_in_schema:
224 | #logger.log("No bbox column found in schema, overriding validation_results")
225 | # Force override incorrect bbox settings if schema doesn't have bbox
226 | self.validation_results['has_bbox'] = False
227 | self.validation_results['bbox_column'] = None
228 |
229 | # Now use the corrected validation_results
230 | bbox_column = self.validation_results.get('bbox_column')
231 | geometry_column = self.validation_results.get('geometry_column', 'geometry')
232 | #logger.log(f"Final bbox_column value: {bbox_column}")
233 | #logger.log(f"Using geometry column: {geometry_column}")
234 |
235 | # Check if geometry column is a BLOB that needs conversion
236 | geometry_col_type = None
237 | for row in schema_result:
238 | if row[0] == geometry_column:
239 | geometry_col_type = row[1].upper()
240 | break
241 |
242 | if bbox_column is not None:
243 | #logger.log(f"Using bbox column for query: {bbox_column}")
244 | where_clause = f"""
245 | WHERE "{bbox_column}".xmin BETWEEN {bbox.xMinimum()} AND {bbox.xMaximum()}
246 | AND "{bbox_column}".ymin BETWEEN {bbox.yMinimum()} AND {bbox.yMaximum()}
247 | """
248 | else:
249 | #logger.log("Using spatial filter instead of bbox")
250 | # If it's a BLOB column, we can't use spatial functions in the initial query
251 | # We'll apply the filter after converting the geometry
252 | if geometry_col_type and 'BLOB' in geometry_col_type:
253 | where_clause = "" # No spatial filter initially for BLOB columns
254 | else:
255 | # For proper geometry columns, we can use spatial filter directly
256 | geometry_expr = f'"{geometry_column}"'
257 | where_clause = f"""
258 | WHERE ST_Intersects(
259 | {geometry_expr},
260 | ST_GeomFromText('POLYGON(({bbox.xMinimum()} {bbox.yMinimum()},
261 | {bbox.xMaximum()} {bbox.yMinimum()},
262 | {bbox.xMaximum()} {bbox.yMaximum()},
263 | {bbox.xMinimum()} {bbox.yMaximum()},
264 | {bbox.xMinimum()} {bbox.yMinimum()}))')
265 | )
266 | """
267 |
268 | # Base query
269 | base_query = f"""
270 | CREATE TABLE {table_name} AS (
271 | {select_query} FROM read_parquet('{self.dataset_url}')
272 | {where_clause}
273 | )
274 | """
275 | self.progress.emit(f"Downloading{layer_info} data...")
276 | logger.log("Executing SQL query:")
277 | logger.log(base_query)
278 |
279 | conn.execute(base_query)
280 |
281 | # If we have a BLOB geometry column, we need to convert it after table creation
282 | # and apply spatial filter if needed
283 | if (geometry_column and geometry_col_type and 'BLOB' in geometry_col_type):
284 | # Create a new table with converted geometry
285 | temp_table = f"{table_name}_converted"
286 |
287 | # Build column list for conversion
288 | convert_columns = []
289 | for col_name, col_type, _, _, _, _ in schema_result:
290 | quoted_col_name = f'"{col_name}"'
291 | if col_name == geometry_column:
292 | convert_columns.append(f"ST_GeomFromWKB({quoted_col_name}) AS {quoted_col_name}")
293 | else:
294 | convert_columns.append(quoted_col_name)
295 |
296 | # Add spatial filter if bbox is available and we didn't filter earlier
297 | spatial_filter = ""
298 | if bbox and not bbox_column: # Only if we didn't filter with bbox column
299 | spatial_filter = f"""
300 | WHERE ST_Intersects(
301 | ST_GeomFromWKB("{geometry_column}"),
302 | ST_GeomFromText('POLYGON(({bbox.xMinimum()} {bbox.yMinimum()},
303 | {bbox.xMaximum()} {bbox.yMinimum()},
304 | {bbox.xMaximum()} {bbox.yMaximum()},
305 | {bbox.xMinimum()} {bbox.yMaximum()},
306 | {bbox.xMinimum()} {bbox.yMinimum()}))')
307 | )
308 | """
309 |
310 | convert_query = f"""
311 | CREATE TABLE {temp_table} AS
312 | SELECT {', '.join(convert_columns)}
313 | FROM {table_name}
314 | {spatial_filter}
315 | """
316 |
317 | conn.execute(convert_query)
318 |
319 | # Drop original and rename
320 | conn.execute(f"DROP TABLE {table_name}")
321 | conn.execute(f"ALTER TABLE {temp_table} RENAME TO {table_name}")
322 |
323 | # Add check for empty results
324 | row_count = conn.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0]
325 | if row_count == 0:
326 | self.info.emit(f"No data found{layer_info} in the requested area. Check that your map extent overlaps with the data and/or expand your map extent. Skipping to next dataset if available.")
327 | self.finished.emit() # Ensure finished signal is emitted
328 | return
329 |
330 | self.progress.emit(f"Processing{layer_info} data to requested format...")
331 |
332 | file_extension = self.output_file.lower().split('.')[-1]
333 |
334 | if file_extension == 'duckdb':
335 | # Commit the transaction to ensure the data is saved
336 | conn.commit()
337 | if not self.killed:
338 | self.info.emit(
339 | "Data has been successfully saved to DuckDB database.\n\n"
340 | "Note: QGIS does not currently support loading DuckDB files directly."
341 | )
342 | else:
343 | # Check size if exporting to GeoJSON
344 | if self.output_file.lower().endswith('.geojson'):
345 | estimated_size = self.estimate_file_size(conn, table_name)
346 | if estimated_size > 4096 and not self.size_warning_accepted: # 4GB warning threshold
347 | self.file_size_warning.emit(estimated_size)
348 | return
349 |
350 | # Use the geometry column from validation results for the Hilbert sorting
351 | # At this point, if we converted BLOB to geometry, it's already a GEOMETRY type
352 | # So we don't need ST_GeomFromWKB anymore
353 | geometry_expr = f'"{geometry_column}"'
354 | extent_expr = f'"{geometry_column}"'
355 |
356 | copy_query = f"""
357 | COPY (
358 | WITH bbox AS (
359 | SELECT ST_Extent(ST_Extent_Agg({extent_expr}))::BOX_2D AS b
360 | FROM {table_name}
361 | )
362 | SELECT t.*
363 | FROM {table_name} AS t
364 | CROSS JOIN bbox
365 | ORDER BY ST_Hilbert(t.{geometry_expr}, bbox.b)
366 | ) TO '{self.output_file}'
367 | """
368 |
369 | if file_extension == "parquet":
370 | format_options = "(FORMAT 'parquet', COMPRESSION 'ZSTD', COMPRESSION_LEVEL 22);"
371 | elif self.output_file.endswith(".gpkg"):
372 | format_options = "(FORMAT GDAL, DRIVER 'GPKG');"
373 | elif self.output_file.endswith(".fgb"):
374 | format_options = "(FORMAT GDAL, DRIVER 'FlatGeobuf', SRS 'EPSG:4326');"
375 | elif self.output_file.endswith(".geojson"):
376 | format_options = "(FORMAT GDAL, DRIVER 'GeoJSON', SRS 'EPSG:4326');"
377 | else:
378 | self.error.emit("Unsupported file format.")
379 |
380 | logger.log("Executing SQL query:")
381 | logger.log(copy_query + format_options)
382 | conn.execute(copy_query + format_options)
383 |
384 |
385 | if self.killed:
386 | return
387 |
388 | if not self.killed:
389 | if self.output_file.lower().endswith('.duckdb'):
390 | self.info.emit(
391 | "Data has been successfully saved to DuckDB database.\n\n"
392 | "Note: QGIS does not currently support loading DuckDB files directly."
393 | )
394 | else:
395 | self.load_layer.emit(self.output_file)
396 | self.finished.emit()
397 |
398 | except Exception as e:
399 | if not self.killed:
400 | # Change error to info if it's a "no data" error
401 | error_str = str(e)
402 | if "No data found" in error_str:
403 | self.info.emit(f"No data found{layer_info} in the requested area for {self.dataset_url}. Skipping to next dataset if available.")
404 | self.finished.emit() # Ensure finished signal is emitted
405 | else:
406 | self.error.emit(error_str)
407 | finally:
408 | if conn:
409 | if not self.output_file.lower().endswith('.duckdb'): # Clean up temporary table
410 | try:
411 | conn.execute(f"DROP TABLE IF EXISTS {table_name}")
412 | except:
413 | pass
414 | conn.close()
415 |
416 | except Exception as e:
417 | if not self.killed:
418 | self.error.emit(str(e))
419 |
420 | def kill(self):
421 | self.killed = True
422 |
423 | def estimate_file_size(self, conn, table_name):
424 | """Estimate the output file size in MB using GeoJSON feature collection structure"""
425 | try:
426 | # Get total row count
427 | row_count = conn.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0]
428 |
429 | # Use a smaller sample size for large datasets
430 | sample_size = min(100, row_count)
431 |
432 | if sample_size > 0:
433 | # Create a proper GeoJSON FeatureCollection sample with all properties
434 | sample_query = f"""
435 | WITH sample AS (
436 | SELECT * FROM {table_name} LIMIT {sample_size}
437 | )
438 | SELECT AVG(LENGTH(
439 | json_object(
440 | 'type', 'Feature',
441 | 'geometry', ST_AsGeoJSON(geometry),
442 | 'properties', json_object(
443 | {', '.join([
444 | f"'{col[0]}', COALESCE(CAST({col[0]} AS VARCHAR), 'null')"
445 | for col in conn.execute(f"DESCRIBE {table_name}").fetchall()
446 | if col[0] != 'geometry'
447 | ])}
448 | )
449 | )::VARCHAR
450 | )) as avg_feature_size
451 | FROM sample;
452 | """
453 |
454 | # Get average feature size
455 | avg_feature_size = conn.execute(sample_query).fetchone()[0]
456 |
457 | if avg_feature_size:
458 | # Account for GeoJSON overhead
459 | collection_overhead = (
460 | 50 # {"type":"FeatureCollection","features":[]}
461 | )
462 | comma_overhead = row_count - 1 # Commas between features
463 |
464 | total_estimated_bytes = (
465 | (row_count * avg_feature_size)
466 | + collection_overhead
467 | + comma_overhead
468 | )
469 | return total_estimated_bytes / (1024 * 1024) # Convert to MB
470 | return 0
471 |
472 | except Exception as e:
473 | logger.log(f"Error estimating file size: {str(e)}", 2)
474 | return 0
475 |
476 | def process_schema_columns(self, schema_result):
477 | """Process schema columns and return formatted SELECT clause"""
478 | columns = []
479 | for row in schema_result:
480 | col_name = row[0]
481 | col_type = row[1]
482 | quoted_col_name = f'"{col_name}"'
483 |
484 | if "STRUCT" in col_type.upper() or "MAP" in col_type.upper():
485 | columns.append(f"TO_JSON({quoted_col_name}) AS {quoted_col_name}")
486 | elif "[]" in col_type:
487 | columns.append(
488 | f"array_to_string({quoted_col_name}, ', ') AS {quoted_col_name}"
489 | )
490 | elif col_type.upper() == "UTINYINT":
491 | columns.append(
492 | f"CAST({quoted_col_name} AS INTEGER) AS {quoted_col_name}"
493 | )
494 | else:
495 | columns.append(quoted_col_name)
496 | return columns
497 |
498 |
499 | class ValidationWorker(QObject):
500 | finished = pyqtSignal(bool, str, dict)
501 | progress = pyqtSignal(str)
502 | needs_bbox_warning = pyqtSignal()
503 |
504 | def __init__(self, dataset_url, iface, extent):
505 | super().__init__()
506 | self.dataset_url = dataset_url
507 | self.iface = iface
508 | self.extent = extent
509 | self.killed = False
510 |
511 | base_path = os.path.dirname(os.path.abspath(__file__))
512 | presets_path = os.path.join(base_path, "data", "presets.json")
513 | with open(presets_path, "r") as f:
514 | self.PRESET_DATASETS = json.load(f)
515 |
516 | def check_bbox_metadata(self, conn):
517 | """Check for bbox information in GeoParquet metadata"""
518 | metadata_query = (
519 | f"SELECT key, value FROM parquet_kv_metadata('{self.dataset_url}')"
520 | )
521 | metadata_results = conn.execute(metadata_query).fetchall()
522 |
523 | for key, value in metadata_results:
524 | if key == b"geo":
525 | try:
526 | decoded_value = value.decode()
527 | #logger.log("\nRaw metadata value:")
528 | #logger.log(decoded_value)
529 |
530 | # Install and load JSON extension
531 | conn.execute("INSTALL json;")
532 | conn.execute("LOAD json;")
533 |
534 | # Create a table with the JSON string
535 | conn.execute(
536 | f"CREATE TEMP TABLE temp_json AS SELECT '{decoded_value}' as json_str"
537 | )
538 |
539 | # Extract the bbox column name using JSON path
540 | # First get the geometry column info which contains the covering
541 | result = conn.execute("""
542 | SELECT json_str->'$.columns.geometry.covering.bbox.xmin[0]' as bbox_column
543 | FROM temp_json
544 | """).fetchone()
545 |
546 | #logger.log("\nExtracted bbox column name:")
547 | #logger.log(result[0] if result else None)
548 |
549 | if result and result[0]:
550 | # Remove quotes from the result if present
551 | bbox_col = result[0].strip('"')
552 | return bbox_col
553 |
554 | except Exception as e:
555 | logger.log(f"\nError parsing geo metadata: {str(e)}", 2)
556 | logger.log(f"Exception type: {type(e)}", 2)
557 | import traceback
558 |
559 | logger.log(traceback.format_exc())
560 | finally:
561 | # Clean up temporary table
562 | conn.execute("DROP TABLE IF EXISTS temp_json")
563 | return None
564 |
565 | def run(self):
566 | # Initialize validation results with default values
567 | validation_results = {
568 | "schema": None,
569 | "has_bbox": False,
570 | "bbox_column": None,
571 | "geometry_column": "geometry" # Default fallback
572 | }
573 |
574 | try:
575 | self.progress.emit("Connecting to data source...")
576 | conn = duckdb.connect()
577 | conn.execute("INSTALL spatial;")
578 | conn.execute("LOAD spatial;")
579 | conn.execute("INSTALL httpfs;")
580 | conn.execute("LOAD httpfs;")
581 |
582 | if not self.needs_validation():
583 | validation_results.update({
584 | "has_bbox": True,
585 | "bbox_column": "bbox",
586 | })
587 | self.finished.emit(True, "Validation successful", validation_results)
588 | return
589 |
590 | self.progress.emit("Checking data format...")
591 | schema_query = f"DESCRIBE SELECT * FROM read_parquet('{self.dataset_url}')"
592 | schema_result = conn.execute(schema_query).fetchall()
593 |
594 | # Update validation results with schema
595 | validation_results["schema"] = schema_result
596 |
597 | # Check for standard bbox column first
598 | has_bbox = any(
599 | row[0].lower() == "bbox" and "struct" in row[1].lower()
600 | for row in schema_result
601 | )
602 |
603 | if has_bbox:
604 | validation_results["has_bbox"] = True
605 | validation_results["bbox_column"] = "bbox"
606 | self.finished.emit(True, "Validation successful", validation_results)
607 | else:
608 | # Check metadata for alternative bbox column
609 | bbox_column = self.check_bbox_metadata(conn)
610 | if bbox_column:
611 | validation_results["has_bbox"] = True
612 | validation_results["bbox_column"] = bbox_column
613 | self.finished.emit(True, "Validation successful", validation_results)
614 | else:
615 | # No bbox column found - emit warning signal first
616 | self.needs_bbox_warning.emit()
617 | # Then emit finished signal with no bbox results
618 | self.finished.emit(True, "Validation with no bbox column", validation_results)
619 |
620 | except Exception as e:
621 | logger.log(f"Error in ValidationWorker: {str(e)}")
622 | # Emit warning before error if no bbox was found
623 | if not validation_results.get("has_bbox"):
624 | self.needs_bbox_warning.emit()
625 | # Still emit validation results with default values in case of error
626 | self.finished.emit(False, f"Error validating source: {str(e)}", validation_results)
627 | finally:
628 | conn.close()
629 |
630 | def needs_validation(self):
631 | """Determine if the dataset needs any validation"""
632 | # Check if URL matches any preset dataset
633 | for source in self.PRESET_DATASETS.values():
634 | for dataset in source.values():
635 | if (
636 | isinstance(dataset.get("url"), str)
637 | and dataset["url"] in self.dataset_url
638 | ):
639 | return dataset.get("needs_validation", True)
640 | elif (
641 | isinstance(dataset.get("url_template"), str)
642 | and dataset["url_template"].split("{")[0] in self.dataset_url
643 | ):
644 | return dataset.get("needs_validation", True)
645 |
646 | # All other datasets need validation
647 | return True
648 |
--------------------------------------------------------------------------------