├── gpq_downloader
    ├── icons
    │   ├── parquet-download.png
    │   └── parquet-download.svg
    ├── tests
    │   ├── data
    │   │   ├── geoparquet_with_metadata.parquet
    │   │   └── non_geoparquet_with_geometry.parquet
    │   ├── test_logger.py
    │   ├── test_dialog.py
    │   ├── test_integration.py
    │   ├── conftest.py
    │   ├── test_validation.py
    │   ├── test_worker.py
    │   ├── test_utils.py
    │   ├── create_test_data.py
    │   ├── test_plugin.py
    │   └── test_non_geoparquet.py
    ├── logger.py
    ├── data
    │   ├── formats.json
    │   └── presets.json
    ├── metadata.txt
    ├── __init__.py
    ├── dialog.py
    ├── plugin.py
    └── utils.py
├── pyproject.toml
├── .github
    └── workflows
    │   └── tests.yml
├── make_release.sh
├── README.md
├── .gitignore
└── LICENSE


/gpq_downloader/icons/parquet-download.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cholmes/qgis_plugin_gpq_downloader/HEAD/gpq_downloader/icons/parquet-download.png


--------------------------------------------------------------------------------
/gpq_downloader/tests/data/geoparquet_with_metadata.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cholmes/qgis_plugin_gpq_downloader/HEAD/gpq_downloader/tests/data/geoparquet_with_metadata.parquet


--------------------------------------------------------------------------------
/gpq_downloader/tests/data/non_geoparquet_with_geometry.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cholmes/qgis_plugin_gpq_downloader/HEAD/gpq_downloader/tests/data/non_geoparquet_with_geometry.parquet


--------------------------------------------------------------------------------
/gpq_downloader/tests/test_logger.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from gpq_downloader.logger import log
 3 | 
 4 | def test_logger_basic():
 5 |     """Test basic logger functionality"""
 6 |     log("Test message")
 7 |     log("Test message", 1)
 8 |     log("Test message", 2)
 9 | 
10 | def test_logger_levels():
11 |     """Test different logger levels"""
12 |     log("Info message", 0)
13 |     log("Warning message", 1)
14 |     log("Error message", 2) 


--------------------------------------------------------------------------------
/gpq_downloader/logger.py:
--------------------------------------------------------------------------------
 1 | from qgis.core import Qgis, QgsMessageLog
 2 | 
 3 | 
 4 | def log(message: str, level_in: int = 0):
 5 |     if level_in == 0:
 6 |         level = Qgis.MessageLevel.Info
 7 |     elif level_in == 1:
 8 |         level = Qgis.MessageLevel.Warning
 9 |     elif level_in == 2:
10 |         level = Qgis.MessageLevel.Critical
11 |     else:
12 |         level = Qgis.MessageLevel.Info
13 | 
14 |     QgsMessageLog.logMessage(str(message), "GeoParquet Downloader", level)
15 | 


--------------------------------------------------------------------------------
/gpq_downloader/data/formats.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "GeoParquet (*.parquet)": {
 3 |         "extension": ".parquet",
 4 |         "format_options": "(FORMAT 'parquet', COMPRESSION 'ZSTD')"
 5 |     },
 6 |     "GeoPackage (*.gpkg)": {
 7 |         "extension": ".gpkg",
 8 |         "format_options": "(FORMAT GDAL, DRIVER 'GPKG', SRS 'EPSG:4326')"
 9 |     },
10 |     "FlatGeobuf (*.fgb)": {
11 |         "extension": ".fgb",
12 |         "format_options": "(FORMAT GDAL, DRIVER 'FlatGeobuf', SRS 'EPSG:4326')"
13 |     },
14 |     "GeoJSON (*.geojson)": {
15 |         "extension": ".geojson",
16 |         "format_options": "(FORMAT GDAL, DRIVER 'GeoJSON', SRS 'EPSG:4326')"
17 |     }
18 | }


--------------------------------------------------------------------------------
/gpq_downloader/tests/test_dialog.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from unittest.mock import MagicMock, patch
 3 | from qgis.PyQt.QtWidgets import QDialog
 4 | from qgis.PyQt.QtCore import Qt
 5 | 
 6 | from gpq_downloader.dialog import DataSourceDialog
 7 | 
 8 | def test_dialog_initialization(qgs_app, mock_iface):
 9 |     """Test dialog initialization"""
10 |     dialog = DataSourceDialog(None, mock_iface)
11 |     assert dialog is not None
12 |     assert dialog.iface == mock_iface
13 | 
14 | def test_dialog_radio_buttons(qgs_app, mock_iface):
15 |     """Test radio button functionality"""
16 |     dialog = DataSourceDialog(None, mock_iface)
17 |     
18 |     # Set Overture radio to checked (since it might not be default)
19 |     dialog.overture_radio.setChecked(True)
20 |     
21 |     # Check state after explicitly setting
22 |     assert dialog.overture_radio.isChecked()
23 |     assert not dialog.sourcecoop_radio.isChecked()
24 |     assert not dialog.osm_radio.isChecked()
25 | 
26 |     # Test switching radio buttons
27 |     dialog.sourcecoop_radio.setChecked(True)
28 |     assert not dialog.overture_radio.isChecked()
29 |     assert dialog.sourcecoop_radio.isChecked()
30 |     assert not dialog.osm_radio.isChecked()
31 | 
32 | @patch('gpq_downloader.dialog.QgsSettings')
33 | def test_dialog_settings_saved(mock_settings, qgs_app, mock_iface):
34 |     """Test that settings are saved"""
35 |     dialog = DataSourceDialog(None, mock_iface)
36 |     dialog.save_checkbox_states()
37 |     mock_settings.assert_called() 


--------------------------------------------------------------------------------
/gpq_downloader/tests/test_integration.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import os
 3 | import sys
 4 | from qgis.core import QgsProject, QgsVectorLayer
 5 | from qgis.PyQt.QtWidgets import QApplication
 6 | 
 7 | from gpq_downloader.plugin import QgisPluginGeoParquet
 8 | 
 9 | @pytest.mark.skipif(not os.environ.get('RUN_INTEGRATION_TESTS'), reason="Integration tests not enabled")
10 | def test_plugin_load(qgs_app, mock_iface):
11 |     """Test that plugin loads properly"""
12 |     plugin = QgisPluginGeoParquet(mock_iface)
13 |     assert plugin is not None
14 |     
15 |     # Initialize plugin
16 |     plugin.initGui()
17 |     
18 |     # Check that actions were created
19 |     assert plugin.action is not None 
20 | 
21 | @pytest.mark.skipif(not os.environ.get('RUN_INTEGRATION_TESTS'), reason="Integration tests not enabled")
22 | def test_plugin_unload(qgs_app, mock_iface):
23 |     """Test that plugin unloads properly"""
24 |     plugin = QgisPluginGeoParquet(mock_iface)
25 |     plugin.initGui()
26 |     
27 |     # Unload the plugin
28 |     plugin.unload()
29 |     
30 |     # Check that cleanup was successful
31 |     assert plugin.worker is None
32 |     assert plugin.worker_thread is None
33 | 
34 | @pytest.mark.skipif(not os.environ.get('RUN_INTEGRATION_TESTS'), reason="Integration tests not enabled")
35 | def test_plugin_download_dir(qgs_app, mock_iface):
36 |     """Test that plugin creates download directory"""
37 |     plugin = QgisPluginGeoParquet(mock_iface)
38 |     
39 |     # Check that download directory exists
40 |     assert plugin.download_dir.exists()
41 |     assert plugin.download_dir.is_dir() 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=42", "wheel"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "gpq_downloader"
 7 | version = "0.8.5"
 8 | description = "QGIS plugin for downloading and processing GeoParquet files"
 9 | readme = "README.md"
10 | requires-python = ">=3.7"
11 | license = {text = "GPL-2.0-or-later"}
12 | authors = [
13 |     {name = "Chris Holmes", email = "cholmes@9eo.org"}
14 | ]
15 | dependencies = [
16 |     "duckdb>=1.1.0",
17 | ]
18 | 
19 | [project.urls]
20 | "Homepage" = "https://github.com/cholmes/qgis_plugin_gpq_downloader"
21 | "Bug Tracker" = "https://github.com/yourusername/qgis_plugin_gpq_downloader/issues"
22 | 
23 | [tool.setuptools]
24 | packages = ["gpq_downloader"]
25 | 
26 | [tool.pytest.ini_options]
27 | testpaths = ["gpq_downloader/tests"]
28 | python_files = "test_*.py"
29 | addopts = "--cov=gpq_downloader"
30 | 
31 | [tool.coverage.run]
32 | source = ["gpq_downloader"]
33 | omit = ["gpq_downloader/tests/*"]
34 | 
35 | [tool.coverage.report]
36 | exclude_lines = [
37 |     "pragma: no cover",
38 |     "def __repr__",
39 |     "raise NotImplementedError",
40 |     "if __name__ == .__main__.:",
41 |     "pass",
42 |     "raise ImportError",
43 | ]
44 | 
45 | [tool.black]
46 | line-length = 88
47 | target-version = ['py37', 'py38', 'py39', 'py310']
48 | include = '\.pyi?$'
49 | 
50 | [project.optional-dependencies]
51 | dev = [
52 |     "pytest>=7.4.0",
53 |     "pytest-qt>=4.2.0",
54 |     "pytest-mock>=3.11.1",
55 |     "pytest-cov>=4.1.0",
56 |     "pyarrow>=10.0.0",
57 |     "black>=23.3.0",
58 | ]
59 | 
60 | [tool.setuptools.package-data]
61 | gpq_downloader = ["data/*.json"]


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | jobs:
10 |   test:
11 |     runs-on: ubuntu-latest
12 |     container:
13 |       image: qgis/qgis:release-3_34  # QGIS docker image with QGIS 3.34
14 |       
15 |     steps:
16 |     - uses: actions/checkout@v2
17 |     
18 |     - name: Install Python dependencies
19 |       run: |
20 |         python3 -m pip install --upgrade pip
21 |         pip3 install pytest pytest-qt
22 |         # Install in development mode to ensure data files are available
23 |         pip3 install -e .[dev]
24 |         
25 |     - name: Debug package installation
26 |       run: |
27 |         # Print out installed package location
28 |         python3 -c "import gpq_downloader; print(gpq_downloader.__file__)"
29 |         # Check if data directory exists
30 |         ls -la $(python3 -c "import gpq_downloader; import os; print(os.path.dirname(gpq_downloader.__file__))")/data || echo "Data directory not found"
31 |         
32 |     - name: Create data directory if missing
33 |       run: |
34 |         # Create data directory if it doesn't exist
35 |         PACKAGE_DIR=$(python3 -c "import gpq_downloader; import os; print(os.path.dirname(gpq_downloader.__file__))")
36 |         mkdir -p $PACKAGE_DIR/data
37 |         # If presets.json doesn't exist, create a minimal version
38 |         if [ ! -f "$PACKAGE_DIR/data/presets.json" ]; then
39 |           echo '{"datasets": {}}' > $PACKAGE_DIR/data/presets.json
40 |           echo "Created minimal presets.json file at $PACKAGE_DIR/data/presets.json"
41 |         fi
42 |         
43 |     - name: Run tests with xvfb
44 |       run: |
45 |         # Make sure xvfb is installed in container
46 |         apt-get update && apt-get install -y xvfb
47 |         
48 |         # Run tests with virtual display
49 |         xvfb-run --auto-servernum --server-args="-screen 0 1024x768x24" pytest 


--------------------------------------------------------------------------------
/make_release.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Function to extract version from metadata.txt
 4 | get_version_from_metadata() {
 5 |   if [ -f "gpq_downloader/metadata.txt" ]; then
 6 |     VERSION=$(grep "^version=" gpq_downloader/metadata.txt | cut -d'=' -f2 | tr -d '[:space:]')
 7 |     if [ -n "$VERSION" ]; then
 8 |       echo "Found version $VERSION in metadata.txt"
 9 |       return 0
10 |     fi
11 |   fi
12 |   echo "Warning: Could not extract version from metadata.txt"
13 |   return 1
14 | }
15 | 
16 | # Get version from command line argument or metadata.txt or use date
17 | if [ -n "$1" ]; then
18 |   VERSION=$1
19 |   echo "Using provided version: $VERSION"
20 | else
21 |   if ! get_version_from_metadata; then
22 |     VERSION=$(date +"%Y%m%d")
23 |     echo "Using date-based version: $VERSION"
24 |   fi
25 | fi
26 | 
27 | ZIP_FILENAME="gpq_downloader_${VERSION}.zip"
28 | TEMP_DIR=$(mktemp -d)
29 | 
30 | echo "Creating release zip: ${ZIP_FILENAME}"
31 | 
32 | # Create a temporary directory with the renamed plugin
33 | echo "Creating temporary directory with renamed plugin..."
34 | cp -r gpq_downloader/ "${TEMP_DIR}/qgis_plugin_gpq_downloader"
35 | 
36 | # Copy LICENSE file if it exists
37 | if [ -f "LICENSE" ]; then
38 |   echo "Copying LICENSE file..."
39 |   cp LICENSE "${TEMP_DIR}/qgis_plugin_gpq_downloader/"
40 | else
41 |   echo "Warning: LICENSE file not found"
42 | fi
43 | 
44 | # Navigate to the temp directory
45 | cd "${TEMP_DIR}"
46 | 
47 | # Create zip file excluding unwanted files
48 | echo "Creating zip file..."
49 | zip -r "${ZIP_FILENAME}" qgis_plugin_gpq_downloader/ \
50 |   -x "*.DS_Store" "*.gitignore" "*/.git/*" "*/__pycache__/*" "*.pyc" "*.pyo" "*.zip" "*/tests/*"
51 | 
52 | # Move the zip file back to the original directory
53 | mv "${ZIP_FILENAME}" "${OLDPWD}/"
54 | 
55 | # Clean up
56 | cd "${OLDPWD}"
57 | rm -rf "${TEMP_DIR}"
58 | 
59 | echo "Release zip created: ${ZIP_FILENAME}"
60 | echo "You can now upload this file to the QGIS Plugin Repository." 


--------------------------------------------------------------------------------
/gpq_downloader/metadata.txt:
--------------------------------------------------------------------------------
 1 | [general]
 2 | name=GeoParquet Downloader (Overture, Source Coop & Custom Cloud)
 3 | qgisMinimumVersion=3.16
 4 | qgisMaximumVersion=4.99.0
 5 | version=0.8.5
 6 | supportsQt6=yes
 7 | icon=icons/parquet-download.png
 8 | description=Plugin for downloading GeoParquet data from cloud sources.
 9 | about=This plugin connects to cloud-based GeoParquet data and downloads the portion in the current viewport.
10 |     
11 |     The plugin comes with pre-configured sources for <a href="https://overturemaps.org/">Overture 
12 |     Maps</a>, <a href="https://source.coop/">Source Cooperative</a>, and you can enter the location 
13 |     of any online GeoParquet file or partition. It works best with 
14 |     the bbox struct from GeoParquet 1.1, but any GeoParquet file 
15 |     will work. You can save the output data as GeoParquet, 
16 |     GeoPackage, DuckDB, FlatGeobuf, or GeoJSON.
17 | 
18 |     The plugin does not require that your QGIS supports 
19 |     GeoParquet, as you can download data as GeoPackage, but 
20 |     GeoParquet generally works better (faster and better nested 
21 |     data). Most Windows installations come with it, and for Mac 
22 |     and Linux you can install via conda. For information on 
23 |     installing Geoparquet support see <a href="https://github.com/cholmes/qgis_plugin_gpq_downloader/wiki/Installing-GeoParquet-Support-in-QGIS">this wiki page</a>.
24 | 
25 |     The plugin depends on DuckDB, which should be installed
26 |     automatically when you install the plugin. If you have issues
27 |     with DuckDB installing please file an issue on the <a href="https://github.com/cholmes/qgis_plugin_gpq_downloader/issues">GitHub issue tracker</a>.
28 | 
29 | tags=geoparquet,parquet,overture,source cooperative,cloud,duckdb,geopackage
30 | 
31 | # credits and contact
32 | author=Chris Holmes
33 | email=cholmes@9eo.org
34 | homepage=https://github.com/cholmes/qgis_plugin_gpq_downloader/
35 | repository=https://github.com/cholmes/qgis_plugin_gpq_downloader/
36 | tracker=https://github.com/cholmes/qgis_plugin_gpq_downloader/issues
37 | 
38 | [dependencies]
39 | pip_dependencies=duckdb>=1.1.0


--------------------------------------------------------------------------------
/gpq_downloader/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import pytest
 4 | from qgis.core import QgsApplication, QgsCoordinateReferenceSystem, QgsRectangle
 5 | from qgis.PyQt.QtCore import QCoreApplication, QObject
 6 | from qgis.PyQt.QtWidgets import QMainWindow
 7 | 
 8 | # Add the parent directory to sys.path
 9 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10 | 
11 | # Mock QGIS Application
12 | @pytest.fixture(scope="session")
13 | def qgs_app():
14 |     """QGIS application fixture"""
15 |     qgs_app = QgsApplication([], False)
16 |     qgs_app.initQgis()
17 |     yield qgs_app
18 |     qgs_app.exitQgis()
19 | 
20 | # Mock iface
21 | class MockIface(QObject):
22 |     def __init__(self):
23 |         super().__init__()
24 |         self.canvas = MockCanvas()
25 |         self._window = QMainWindow()
26 |         self.toolbar_icons = []  # Add this to track added icons
27 |     
28 |     def mapCanvas(self):
29 |         return self.canvas
30 |     
31 |     def mainWindow(self):
32 |         return self._window
33 |     
34 |     def addToolBarIcon(self, action):  # Add this method
35 |         """Mock method for adding toolbar icons"""
36 |         self.toolbar_icons.append(action)
37 |     
38 |     def removeToolBarIcon(self, action):  # Add this method too
39 |         """Mock method for removing toolbar icons"""
40 |         if action in self.toolbar_icons:
41 |             self.toolbar_icons.remove(action)
42 | 
43 | class MockCanvas:
44 |     def __init__(self):
45 |         self.settings = MockMapSettings()
46 |     
47 |     def mapSettings(self):
48 |         return self.settings
49 |     
50 |     def extent(self):
51 |         return QgsRectangle(0, 0, 1, 1)
52 | 
53 | class MockMapSettings:
54 |     def destinationCrs(self):
55 |         return QgsCoordinateReferenceSystem("EPSG:4326")
56 | 
57 | @pytest.fixture
58 | def mock_iface():
59 |     """Mock iface fixture"""
60 |     return MockIface()
61 | 
62 | # Sample test data
63 | @pytest.fixture
64 | def sample_bbox():
65 |     """Sample bounding box fixture"""
66 |     return QgsRectangle(1, 2, 3, 4)
67 | 
68 | @pytest.fixture
69 | def sample_validation_results():
70 |     """Sample validation results fixture"""
71 |     return {
72 |         "has_bbox": True,
73 |         "bbox_column": "bbox",
74 |         "geometry_column": "geometry",
75 |         "schema": [
76 |             ("id", "INTEGER", "YES", None, None, None),
77 |             ("name", "VARCHAR", "YES", None, None, None),
78 |             ("bbox", "STRUCT(xmin DOUBLE, ymin DOUBLE, xmax DOUBLE, ymax DOUBLE)", "YES", None, None, None),
79 |             ("geometry", "GEOMETRY", "YES", None, None, None)
80 |         ]
81 |     }
82 | 
83 | @pytest.fixture
84 | def sample_validation_results_no_bbox():
85 |     """Sample validation results with no bbox fixture"""
86 |     return {
87 |         "has_bbox": False,
88 |         "bbox_column": None,
89 |         "geometry_column": "geometry",
90 |         "schema": [
91 |             ("id", "INTEGER", "YES", None, None, None),
92 |             ("name", "VARCHAR", "YES", None, None, None),
93 |             ("geometry", "GEOMETRY", "YES", None, None, None)
94 |         ]
95 |     } 


--------------------------------------------------------------------------------
/gpq_downloader/tests/test_validation.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from unittest.mock import MagicMock, patch
 3 | import json
 4 | import os
 5 | 
 6 | from gpq_downloader.utils import ValidationWorker
 7 | 
 8 | @patch("duckdb.connect")
 9 | def test_validation_worker_with_bbox(mock_connect, mock_iface, sample_bbox):
10 |     """Test the validation worker with a dataset that has a bbox column"""
11 |     # Setup mock connection
12 |     mock_conn = MagicMock()
13 |     mock_conn.execute.return_value.fetchall.return_value = [
14 |         ("id", "INTEGER", "YES", None, None, None),
15 |         ("bbox", "STRUCT(xmin DOUBLE, ymin DOUBLE, xmax DOUBLE, ymax DOUBLE)", "YES", None, None, None),
16 |         ("geometry", "GEOMETRY", "YES", None, None, None)
17 |     ]
18 |     mock_connect.return_value = mock_conn
19 |     
20 |     # Setup validation signals
21 |     finished_signal_received = False
22 |     validation_results = None
23 |     
24 |     def on_finished(success, message, results):
25 |         nonlocal finished_signal_received, validation_results
26 |         finished_signal_received = True
27 |         validation_results = results
28 |     
29 |     # Create worker
30 |     worker = ValidationWorker("https://example.com/test.parquet", mock_iface, sample_bbox)
31 |     worker.finished.connect(on_finished)
32 |     
33 |     # Mock presets.json to return empty dict
34 |     with patch.object(worker, 'PRESET_DATASETS', {}):
35 |         worker.run()
36 |     
37 |     # Check results
38 |     assert finished_signal_received
39 |     assert validation_results["has_bbox"] is True
40 |     assert validation_results["bbox_column"] == "bbox"
41 | 
42 | @patch("duckdb.connect")
43 | def test_validation_worker_without_bbox(mock_connect, mock_iface, sample_bbox):
44 |     """Test the validation worker with a dataset that has no bbox column"""
45 |     # Setup mock connection
46 |     mock_conn = MagicMock()
47 |     mock_conn.execute.return_value.fetchall.return_value = [
48 |         ("id", "INTEGER", "YES", None, None, None),
49 |         ("geometry", "GEOMETRY", "YES", None, None, None)
50 |     ]
51 |     mock_connect.return_value = mock_conn
52 |     
53 |     # Setup validation signals
54 |     warning_signal_received = False
55 |     finished_signal_received = False
56 |     validation_results = None
57 |     
58 |     def on_finished(success, message, results):
59 |         nonlocal finished_signal_received, validation_results
60 |         finished_signal_received = True
61 |         validation_results = results
62 |         print(f"Received validation results: {results}")  # Add debug print
63 |     
64 |     def on_warning():
65 |         nonlocal warning_signal_received
66 |         warning_signal_received = True
67 |         print("Warning signal received")  # Add debug print
68 |     
69 |     # Create worker
70 |     worker = ValidationWorker("https://example.com/test.parquet", mock_iface, sample_bbox)
71 |     worker.finished.connect(on_finished)
72 |     worker.needs_bbox_warning.connect(on_warning)
73 |     
74 |     # Mock presets.json to return empty dict
75 |     with patch.object(worker, 'PRESET_DATASETS', {}):
76 |         worker.run()
77 |     
78 |     # Check results
79 |     assert finished_signal_received, "Finished signal was not emitted"
80 |     assert validation_results is not None, "No validation results received"
81 |     assert "has_bbox" in validation_results, f"has_bbox not in validation_results: {validation_results}"
82 |     assert validation_results["has_bbox"] is False
83 |     assert validation_results["bbox_column"] is None
84 |     assert warning_signal_received, "Warning signal was not emitted" 


--------------------------------------------------------------------------------
/gpq_downloader/tests/test_worker.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from unittest.mock import MagicMock, patch
  3 | import os
  4 | from qgis.PyQt.QtCore import QObject
  5 | 
  6 | from gpq_downloader.utils import Worker
  7 | 
  8 | class MockResult:
  9 |     def __init__(self, data):
 10 |         self.data = data
 11 |     
 12 |     def fetchall(self):
 13 |         return self.data
 14 |     
 15 |     def fetchone(self):
 16 |         return self.data[0] if self.data else None
 17 | 
 18 | class MockConnection:
 19 |     def __init__(self, schema_data=None, count_result=1):
 20 |         self.schema_data = schema_data or []
 21 |         self.count_result = count_result
 22 |         self.executed_queries = []
 23 |     
 24 |     def execute(self, query):
 25 |         self.executed_queries.append(query)
 26 |         if "DESCRIBE" in query:
 27 |             return MockResult(self.schema_data)
 28 |         elif "COUNT" in query:
 29 |             return MockResult([(self.count_result,)])
 30 |         return MockResult([])
 31 |     
 32 |     def commit(self):
 33 |         pass
 34 |     
 35 |     def close(self):
 36 |         pass
 37 | 
 38 | @pytest.fixture
 39 | def schema_with_bbox():
 40 |     return [
 41 |         ("id", "INTEGER", "YES", None, None, None),
 42 |         ("bbox", "STRUCT(xmin DOUBLE, ymin DOUBLE, xmax DOUBLE, ymax DOUBLE)", "YES", None, None, None),
 43 |         ("geometry", "GEOMETRY", "YES", None, None, None)
 44 |     ]
 45 | 
 46 | @pytest.fixture
 47 | def schema_without_bbox():
 48 |     return [
 49 |         ("id", "INTEGER", "YES", None, None, None),
 50 |         ("geometry", "GEOMETRY", "YES", None, None, None)
 51 |     ]
 52 | 
 53 | @patch("duckdb.connect")
 54 | def test_worker_run_with_bbox(mock_connect, mock_iface, sample_bbox, tmp_path, sample_validation_results, schema_with_bbox):
 55 |     """Test Worker run method with a bbox column"""
 56 |     # Setup
 57 |     mock_conn = MockConnection(schema_data=schema_with_bbox)
 58 |     mock_connect.return_value = mock_conn
 59 |     
 60 |     # Create signals for testing
 61 |     progress_messages = []
 62 |     
 63 |     # Create worker
 64 |     worker = Worker(
 65 |         "https://example.com/test.parquet", 
 66 |         sample_bbox, 
 67 |         os.path.join(tmp_path, "output.gpkg"), 
 68 |         mock_iface, 
 69 |         sample_validation_results
 70 |     )
 71 |     
 72 |     # Connect to our test slots
 73 |     worker.progress.connect(lambda msg: progress_messages.append(msg))
 74 |     
 75 |     # Run the worker
 76 |     worker.run()
 77 |     
 78 |     # Check queries
 79 |     bbox_query_found = False
 80 |     for query in mock_conn.executed_queries:
 81 |         if '"bbox".xmin BETWEEN' in query:
 82 |             bbox_query_found = True
 83 |     
 84 |     assert bbox_query_found, "Should use bbox in the query"
 85 |     assert any("Downloading" in msg for msg in progress_messages)
 86 | 
 87 | @patch("duckdb.connect")
 88 | def test_worker_run_without_bbox(mock_connect, mock_iface, sample_bbox, tmp_path, sample_validation_results_no_bbox, schema_without_bbox):
 89 |     """Test Worker run method without a bbox column"""
 90 |     # Setup
 91 |     mock_conn = MockConnection(schema_data=schema_without_bbox)
 92 |     mock_connect.return_value = mock_conn
 93 |     
 94 |     # Create signals for testing
 95 |     progress_messages = []
 96 |     
 97 |     # Create worker with no bbox
 98 |     worker = Worker(
 99 |         "https://example.com/test.parquet", 
100 |         sample_bbox, 
101 |         os.path.join(tmp_path, "output.gpkg"), 
102 |         mock_iface, 
103 |         sample_validation_results_no_bbox
104 |     )
105 |     
106 |     # Connect to our test slots
107 |     worker.progress.connect(lambda msg: progress_messages.append(msg))
108 |     
109 |     # Run the worker
110 |     worker.run()
111 |     
112 |     # Check queries
113 |     st_intersects_found = False
114 |     for query in mock_conn.executed_queries:
115 |         if 'ST_Intersects' in query:
116 |             st_intersects_found = True
117 |     
118 |     assert st_intersects_found, "Should use ST_Intersects in the query when no bbox column"
119 |     assert any("Downloading" in msg for msg in progress_messages) 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GeoParquet Downloader for QGIS
 2 | 
 3 | This repo contains a QGIS plugin for downloading GeoParquet data from cloud sources, including Overture Maps, Source Cooperative, and the ability to enter the location of any online GeoParquet file or partition. Just the user's current viewport then gets downloaded, as GeoParquet, DuckDB or GeoPackage.
 4 | 
 5 | ![gpq-downloader-demo4](https://github.com/user-attachments/assets/10f2a73f-2aa6-45a1-9491-41e63b7fec24)
 6 | 
 7 | 
 8 | The core idea is that GeoParquet can act more like a 'server', letting users download only the data they need, if you add a bit more smarts to the client. So this plugin uses [DuckDB](https://duckdb.org/) but abstracts all the details of forming the right queries to external sources, so users can just pick the data they want and pull it down with ease. And with GeoPackage output users don't even need to know anything about GeoParquet. More info is on the [plugin homepage](https://plugins.qgis.org/plugins/qgis_plugin_gpq_downloader/).
 9 | 
10 | 
11 | ## Installation
12 | 
13 | The easiest way to install the plugin file is to use the QGIS plugin manager. Just go to `Plugins > Manage and Install Plugins`, click 
14 | the 'install' tab and search for 'GeoParquet Downloader'. Click on 'Install Plugin' and it will install. Alternatively you can download the zip file from
15 | one of the [releases](https://github.com/cholmes/qgis_plugin_gpq_downloader/releases) and 'install from zip' in QGIS. For the plugin to work DuckDB
16 | needs to be installed. As of version 0.3 the plugin should try to automatically install DuckDB, but it doesn't work reliably. If you installed but don't see the 
17 | icon below then it's likely because DuckDB isn't there.
18 | 
19 | If the installation of DuckDB doesn't work, then on Windows you can use the [QDuckDB plugin](https://oslandia.gitlab.io/qgis/qduckdb/) which includes a precompiled binary. 
20 | They also document how to install DuckDB on [Linux](https://oslandia.gitlab.io/qgis/qduckdb/usage/installation.html#linux) and 
21 | [Mac OS/X](https://oslandia.gitlab.io/qgis/qduckdb/usage/installation.html#macos). If you're on Mac we recommend trying 
22 | the [QGIS 4.0 mac build preview](https://github.com/opengisch/qgis-notarize/) which ships with DuckDB.
23 | 
24 | See [metadata.txt](gpq_downloader/metadata.txt) for more installation notes.
25 | 
26 | ## Usage
27 | 
28 | The plugin will install 1 button on the "Plugin" QGIS toolbar, that you might have to enable through `View > Toolbars > Plugins`:
29 | 
30 | ![1_UuUno32b4P_UNUqJZvSPoQ](https://github.com/user-attachments/assets/16003294-9a76-42cb-a740-b5bbd308e484)
31 | 
32 | It opens a dialog box, that lets you select Overture and Source Cooperative, Hugging Face or 'custom' - where you 
33 | can enter the location of any GeoParquet or partition file online.
34 | 
35 | <img width="564" height="236" alt="Screenshot 2025-11-11 at 9 36 33 PM" src="https://github.com/user-attachments/assets/895f5ac4-58e9-42ba-b998-fedd5b21f15d" />
36 | 
37 | 
38 | To use it move to an area where you'd like to download data and then select which layer you'd like to download. From there you can choose the output format (GeoParquet, GeoPackage, DuckDB, GeoJSON or FlatGeobuf) and the location to download the data to.
39 | 
40 | Downloads can sometimes take awhile, especially if the data provider hasn't optimized their GeoParquet files very well, or if you're downloading an area with a lot of data. Overture is one of the faster ones for now, others may take a minute or two. But it should most always be faster than trying to figure out exactly which files you need and downloading them manually.
41 | 
42 | For now we only support downloading into the current viewport, but hope to [improve that](https://github.com/cholmes/qgis_plugin_gpq_downloader/issues/10). Note also that right now only lat/long data is supported, but we also hope to [support it](https://github.com/cholmes/qgis_plugin_gpq_downloader/issues/102).
43 | 
44 | If your QGIS doesn't have GeoParquet support you'll get a warning dialog after the data downloads completes. The GeoParquet will be there, but it won't automatically open on the map. We definitely recommend getting your QGIS working with GeoParquet, as the format is faster and handles nested attributes better. See [Installing GeoParquet Support in QGIS](https://github.com/cholmes/qgis_plugin_gpq_downloader/wiki/Installing-GeoParquet-Support-in-QGIS) for more details.
45 | 
46 | 
47 | ## Contributing
48 | 
49 | This plugin has been made entirely with AI coding tools (primarily Cursor with claude-3.5-sonnet). Contributions are very welcome, both from more experienced python developers who can help clean up the code and add missing features, and from anyone who wants a place to do AI-assisted coding that (hopefully) actually gets widely used.
50 | 
51 | I'm interested in exploring open source collaboration in the age of AI coding tools, especially working with less experienced developers who'd like to contribute, so don't hesitate to jump in with AI-assisted pull requests.
52 | 
53 | And any help on ideas/feedback, documentation, testing, promoting, etc. is very welcome!
54 | 
55 | 
56 |  
57 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | *.zip
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | cover/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | .pybuilder/
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | #   For a library or package, you might want to ignore these files since the code is
 88 | #   intended to run in multiple environments; otherwise, check them in:
 89 | # .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # poetry
 99 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
101 | #   commonly ignored for libraries.
102 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103 | #poetry.lock
104 | 
105 | # pdm
106 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107 | #pdm.lock
108 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109 | #   in version control.
110 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
111 | .pdm.toml
112 | .pdm-python
113 | .pdm-build/
114 | 
115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116 | __pypackages__/
117 | 
118 | # Celery stuff
119 | celerybeat-schedule
120 | celerybeat.pid
121 | 
122 | # SageMath parsed files
123 | *.sage.py
124 | 
125 | # Environments
126 | .env
127 | .venv
128 | env/
129 | venv/
130 | ENV/
131 | env.bak/
132 | venv.bak/
133 | 
134 | # Spyder project settings
135 | .spyderproject
136 | .spyproject
137 | 
138 | # Rope project settings
139 | .ropeproject
140 | 
141 | # mkdocs documentation
142 | /site
143 | 
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 | 
149 | # Pyre type checker
150 | .pyre/
151 | 
152 | # pytype static type analyzer
153 | .pytype/
154 | 
155 | # Cython debug symbols
156 | cython_debug/
157 | 
158 | # mac stuff
159 | .DS_Store
160 | 
161 | # PyCharm
162 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
163 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
164 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
165 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
166 | #.idea/
167 | 
168 | # --> This is the recommended way
169 | 
170 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
171 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
172 | 
173 | # User-specific stuff
174 | .idea/**/workspace.xml
175 | .idea/**/tasks.xml
176 | .idea/**/usage.statistics.xml
177 | .idea/**/dictionaries
178 | .idea/**/shelf
179 | 
180 | # AWS User-specific
181 | .idea/**/aws.xml
182 | 
183 | # Generated files
184 | .idea/**/contentModel.xml
185 | 
186 | # Sensitive or high-churn files
187 | .idea/**/dataSources/
188 | .idea/**/dataSources.ids
189 | .idea/**/dataSources.local.xml
190 | .idea/**/sqlDataSources.xml
191 | .idea/**/dynamic.xml
192 | .idea/**/uiDesigner.xml
193 | .idea/**/dbnavigator.xml
194 | 
195 | # Gradle
196 | .idea/**/gradle.xml
197 | .idea/**/libraries
198 | 
199 | # Gradle and Maven with auto-import
200 | # When using Gradle or Maven with auto-import, you should exclude module files,
201 | # since they will be recreated, and may cause churn.  Uncomment if using
202 | # auto-import.
203 | # .idea/artifacts
204 | # .idea/compiler.xml
205 | # .idea/jarRepositories.xml
206 | # .idea/modules.xml
207 | # .idea/*.iml
208 | # .idea/modules
209 | # *.iml
210 | # *.ipr
211 | 
212 | # CMake
213 | cmake-build-*/
214 | 
215 | # Mongo Explorer plugin
216 | .idea/**/mongoSettings.xml
217 | 
218 | # File-based project format
219 | *.iws
220 | 
221 | # IntelliJ
222 | out/
223 | 
224 | # mpeltonen/sbt-idea plugin
225 | .idea_modules/
226 | 
227 | # JIRA plugin
228 | atlassian-ide-plugin.xml
229 | 
230 | # Cursive Clojure plugin
231 | .idea/replstate.xml
232 | 
233 | # SonarLint plugin
234 | .idea/sonarlint/
235 | 
236 | # Crashlytics plugin (for Android Studio and IntelliJ)
237 | com_crashlytics_export_strings.xml
238 | crashlytics.properties
239 | crashlytics-build.properties
240 | fabric.properties
241 | 
242 | # Editor-based Rest Client
243 | .idea/httpRequests
244 | 
245 | # Android studio 3.1+ serialized cache file
246 | .idea/caches/build_file_checksums.ser
247 | 
248 | .claude/


--------------------------------------------------------------------------------
/gpq_downloader/data/presets.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "overture": {
  3 |         "buildings": {
  4 |             "url_template": "s3://overturemaps-us-west-2/release/{release}/theme=buildings/type=building/*",
  5 |             "info_url": "https://docs.overturemaps.org/reference/buildings",
  6 |             "needs_validation": false
  7 |         },
  8 |         "places": {
  9 |             "url_template": "s3://overturemaps-us-west-2/release/{release}/theme=places/type=place/*",
 10 |             "info_url": "https://docs.overturemaps.org/reference/places",
 11 |             "needs_validation": false
 12 |         },
 13 |         "transportation": {
 14 |             "url_template": "s3://overturemaps-us-west-2/release/{release}/theme=transportation/type=segment/*",
 15 |             "info_url": "https://docs.overturemaps.org/reference/transportation",
 16 |             "needs_validation": false
 17 |         },
 18 |         "addresses": {
 19 |             "url_template": "s3://overturemaps-us-west-2/release/{release}/theme=addresses/type=*/*",
 20 |             "info_url": "https://docs.overturemaps.org/reference/addresses",
 21 |             "needs_validation": false
 22 |         },
 23 |         "base": {
 24 |             "url_template": "s3://overturemaps-us-west-2/release/{release}/theme=base/type={subtype}/*",
 25 |             "info_url": "https://docs.overturemaps.org/reference/base",
 26 |             "needs_validation": false,
 27 |             "subtypes": [
 28 |                 "infrastructure",
 29 |                 "land",
 30 |                 "land_cover",
 31 |                 "land_use",
 32 |                 "water",
 33 |                 "bathymetry"
 34 |             ]
 35 |         },
 36 |         "divisions": {
 37 |             "url_template": "s3://overturemaps-us-west-2/release/{release}/theme=divisions/type=division_area/*",
 38 |             "info_url": "https://docs.overturemaps.org/reference/administrative",
 39 |             "needs_validation": false
 40 |         }
 41 |     },
 42 |     "source_cooperative": {
 43 |         "vida_buildings": {
 44 |             "url": "s3://us-west-2.opendata.source.coop/vida/google-microsoft-osm-open-buildings/geoparquet/by_country/*/*.parquet",
 45 |             "info_url": "https://source.coop/vida/google-microsoft-osm-open-buildings",
 46 |             "needs_validation": false,
 47 |             "display_name": "VIDA Google/Microsoft/OSM Buildings"
 48 |         },
 49 |         "microsoft_ml_roads": {
 50 |             "url": "s3://us-west-2.opendata.source.coop/nlebovits/microsoft-ml-road-detections/by_country/*/*.parquet",
 51 |             "info_url": "https://source.coop/nlebovits/microsoft-ml-road-detections",
 52 |             "needs_validation": false,
 53 |             "display_name": "Microsoft ML Road Detections"
 54 |         },
 55 |         "globalbuildingatlas": {
 56 |             "url": "s3://us-west-2.opendata.source.coop/tge-labs/globalbuildingatlas-lod1/*.parquet",
 57 |             "info_url": "https://source.coop/tge-labs/globalbuildingatlas-lod1",
 58 |             "needs_validation": false,
 59 |             "display_name": "GlobalBuildingAtlas"
 60 |         },
 61 |         "openbuildingmap": {
 62 |             "url": "s3://us-west-2.opendata.source.coop/tge-labs/openbuildingmap/*.parquet",
 63 |             "info_url": "https://source.coop/tge-labs/openbuildingmap",
 64 |             "needs_validation": false,
 65 |             "display_name": "OpenBuildingMap"
 66 |         },
 67 |         "fsq_places_fused": {
 68 |             "url": "s3://us-west-2.opendata.source.coop/fused/fsq-os-places/2025-02-06/places/*.parquet",
 69 |             "info_url": "https://source.coop/fused/fsq-os-places",
 70 |             "needs_validation": false,
 71 |             "display_name": "Foursquare Open Source Places - Fused-partitioned"
 72 |         },
 73 |         "us_structures": {
 74 |             "url": "s3://us-west-2.opendata.source.coop/wherobots/usa-structures/geoparquet/*.parquet",
 75 |             "info_url": "https://source.coop/wherobots/usa-structures/geoparquet",
 76 |             "needs_validation": false,
 77 |             "display_name": "US Structures from ORNL by Wherobots"
 78 |         },
 79 |         "planet_eu_boundaries": {
 80 |             "url": "https://data.source.coop/planet/eu-field-boundaries/field_boundaries.parquet",
 81 |             "info_url": "https://source.coop/planet/eu-field-boundaries",
 82 |             "needs_validation": false,
 83 |             "display_name": "Planet EU Field Boundaries (2022)"
 84 |         },
 85 |         "usda_crop": {
 86 |             "url": "https://data.source.coop/fiboa/us-usda-cropland/us_usda_cropland.parquet",
 87 |             "info_url": "https://source.coop/fiboa/us-usda-cropland",
 88 |             "needs_validation": false,
 89 |             "display_name": "USDA Crop Sequence Boundaries"
 90 |         },
 91 |         "nhd_flowlines": {
 92 |             "url": "https://data.source.coop/cholmes/nhd/NHDFlowline.parquet",
 93 |             "info_url": "https://source.coop/cholmes/nhd",
 94 |             "needs_validation": true,
 95 |             "display_name": "NHD Flowlines (experimental)"
 96 |         }
 97 |     },
 98 |     "openstreetmap": {
 99 |         "buildings": {
100 |             "url": "https://data.openstreetmap.us/layercake/buildings.parquet",
101 |             "info_url": "https://data.openstreetmap.us/",
102 |             "needs_validation": false
103 |         },
104 |         "boundaries": {
105 |             "url": "https://data.openstreetmap.us/layercake/boundaries.parquet",
106 |             "info_url": "https://data.openstreetmap.us/",
107 |             "needs_validation": false
108 |         },
109 |         "highways": {
110 |             "url": "https://data.openstreetmap.us/layercake/highways.parquet",
111 |             "info_url": "https://data.openstreetmap.us/",
112 |             "needs_validation": false
113 |         },
114 |         "settlements": {
115 |             "url": "https://data.openstreetmap.us/layercake/settlements.parquet",
116 |             "info_url": "https://data.openstreetmap.us/",
117 |             "needs_validation": false
118 |         }
119 |     }
120 | }
121 | 


--------------------------------------------------------------------------------
/gpq_downloader/__init__.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import platform
  3 | import subprocess
  4 | import sys
  5 | import shutil
  6 | from qgis.PyQt.QtWidgets import QProgressBar, QMessageBox
  7 | from qgis.PyQt.QtCore import QCoreApplication, QTimer
  8 | from qgis.core import QgsTask, QgsApplication, QgsSettings
  9 | from qgis.utils import iface, loadPlugin, startPlugin, unloadPlugin, plugins
 10 | 
 11 | from . import logger
 12 | 
 13 | # Global flag to track installation status
 14 | _duckdb_ready = False
 15 | 
 16 | 
 17 | class DuckDBInstallerTask(QgsTask):
 18 |     def __init__(self, callback):
 19 |         # Simple initialization with just CanCancel flag
 20 |         super().__init__("Installing DuckDB", QgsTask.CanCancel)
 21 |         self.success = False
 22 |         self.message = ""
 23 |         self.exception = None
 24 |         self.callback = callback
 25 |         # logger.log("Task initialized")
 26 | 
 27 |     def run(self):
 28 |         # logger.log("Task run method started")
 29 |         try:
 30 |             logger.log("Starting DuckDB installation...")
 31 |             if platform.system() == "Windows":
 32 |                 py_path = os.path.join(os.path.dirname(sys.executable), "python.exe")
 33 |             elif platform.system() == "Darwin":
 34 |                 qgis_bin = os.path.dirname(sys.executable)
 35 |                 possible_paths = [
 36 |                     os.path.join(qgis_bin, "python3"),
 37 |                     os.path.join(qgis_bin, "bin", "python3"),
 38 |                     os.path.join(qgis_bin, "Resources", "python", "bin", "python3"),
 39 |                 ]
 40 |                 py_path = next(
 41 |                     (path for path in possible_paths if os.path.exists(path)),
 42 |                     sys.executable,
 43 |                 )
 44 |             else:
 45 |                 py_path = sys.executable
 46 | 
 47 |             # logger.log(f"Using Python path: {py_path}")
 48 |             # logger.log(f"Running pip install command...")
 49 | 
 50 |             subprocess.check_call([py_path, "-m", "pip", "install", "--user", "duckdb"])
 51 | 
 52 |             # logger.log("Pip install completed, reloading modules...")
 53 |             import importlib
 54 | 
 55 |             importlib.invalidate_caches()
 56 | 
 57 |             self.success = True
 58 |             self.message = "DuckDB installed successfully"
 59 |             return True
 60 | 
 61 |         except subprocess.CalledProcessError as e:
 62 |             self.exception = e
 63 |             self.message = f"Pip install failed: {str(e)}"
 64 |             logger.log(f"Installation failed with error: {str(e)}")
 65 |             return False
 66 |         except Exception as e:
 67 |             self.exception = e
 68 |             self.message = f"Failed to install/upgrade DuckDB: {str(e)}"
 69 |             logger.log(f"Installation failed with error: {str(e)}", 2)
 70 |             return False
 71 | 
 72 |     def finished(self, result):
 73 |         global _duckdb_ready
 74 |         msg_bar = iface.messageBar()
 75 |         msg_bar.clearWidgets()
 76 | 
 77 |         if result and self.success:
 78 |             try:
 79 |                 import duckdb
 80 | 
 81 |                 self.message = f"DuckDB {duckdb.__version__} installed successfully"
 82 |             except ImportError:
 83 |                 pass
 84 |             msg_bar.pushSuccess("Success", self.message)
 85 |             logger.log(self.message)
 86 |             _duckdb_ready = True
 87 |             if self.callback:
 88 |                 self.callback()
 89 |         else:
 90 |             msg_bar.pushCritical("Error", self.message)
 91 |             logger.log(self.message)
 92 |             _duckdb_ready = False
 93 | 
 94 | 
 95 | def ensure_duckdb(callback=None):
 96 |     try:
 97 |         import duckdb
 98 | 
 99 |         version = duckdb.__version__
100 |         from packaging import version as version_parser
101 | 
102 |         if version_parser.parse(version) >= version_parser.parse("1.1.0"):
103 |             logger.log(f"DuckDB {version} already installed")
104 |             global _duckdb_ready
105 |             _duckdb_ready = True
106 |             if callback:
107 |                 callback()
108 |             return True
109 |         else:
110 |             logger.log(f"DuckDB {version} found but needs upgrade to 1.1.0+", 2)
111 |             raise ImportError("Version too old")
112 | 
113 |     except ImportError:
114 |         logger.log("DuckDB not found or needs upgrade, attempting to install/upgrade...", 2)
115 |         try:
116 |             msg_bar = iface.messageBar()
117 |             progress = QProgressBar()
118 |             progress.setMinimum(0)
119 |             progress.setMaximum(0)
120 |             progress.setValue(0)
121 | 
122 |             msg = msg_bar.createMessage("Installing DuckDB...")
123 |             msg.layout().addWidget(progress)
124 |             msg_bar.pushWidget(msg)
125 |             QCoreApplication.processEvents()
126 | 
127 |             # Create and start the task
128 |             task = DuckDBInstallerTask(callback)
129 |             # logger.log("Created installer task")
130 | 
131 |             # Get the task manager and add the task
132 |             task_manager = QgsApplication.taskManager()
133 |             # logger.log(f"Task manager has {task_manager.count()} tasks")
134 | 
135 |             # Add task and check if it was added successfully
136 |             task_manager.addTask(task)
137 |             # logger.log(f"Task added successfully: {success}")
138 | 
139 |             # Check task status
140 |             # logger.log(f"Task manager now has {task_manager.count()} tasks")
141 |             # logger.log(f"Task description: {task.description()}")
142 |             # logger.log(f"Task status: {task.status()}")
143 | 
144 |             # Schedule periodic status checks with guarded access
145 |             def check_status():
146 |                 try:
147 |                     status = task.status()
148 |                 except RuntimeError:
149 |                     # logger.log("Task has been deleted, stopping status checks")
150 |                     return
151 | 
152 |                 # logger.log(f"Current task status: {status}")
153 |                 if status == QgsTask.Queued:
154 |                     # logger.log("Task still queued, retriggering...")
155 |                     try:
156 |                         QgsApplication.taskManager().triggerTask(task)
157 |                     except RuntimeError:
158 |                         logger.log("Failed to trigger task, object likely deleted")
159 |                         return
160 |                     QTimer.singleShot(1000, check_status)
161 |                 elif status == QgsTask.Running:
162 |                     # logger.log("Task is running")
163 |                     QTimer.singleShot(1000, check_status)
164 |                 elif status == QgsTask.Complete:
165 |                     logger.log("Task completed")
166 | 
167 |             # Start checking status after a short delay
168 |             QTimer.singleShot(100, check_status)
169 | 
170 |             return True
171 | 
172 |         except Exception as e:
173 |             msg_bar.clearWidgets()
174 |             msg_bar.pushCritical("Error", f"Failed to install/upgrade DuckDB: {str(e)}", 2)
175 |             logger.log(f"Failed to setup task with error: {str(e)}", 2)
176 |             logger.log(f"Error type: {type(e)}", 2)
177 |             import traceback
178 | 
179 |             logger.log(f"Traceback: {traceback.format_exc()}", 2)
180 |             return False
181 | 
182 | 
183 | def classFactory(iface):
184 |     """Load the plugin class."""
185 |     from .plugin import QgisPluginGeoParquet
186 |     return QgisPluginGeoParquet(iface)
187 | 
188 | 
189 | 


--------------------------------------------------------------------------------
/gpq_downloader/tests/test_utils.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from unittest.mock import MagicMock, patch
  3 | import os
  4 | from qgis.core import QgsRectangle, QgsCoordinateReferenceSystem
  5 | from pathlib import Path
  6 | 
  7 | from gpq_downloader.utils import (
  8 |     transform_bbox_to_4326, 
  9 |     Worker, 
 10 |     ValidationWorker
 11 | )
 12 | 
 13 | # Add new test for file size estimation
 14 | def test_estimate_file_size(mock_iface, sample_bbox, tmp_path):
 15 |     """Test file size estimation for GeoJSON output"""
 16 |     # Create mock connection and cursor
 17 |     mock_conn = MagicMock()
 18 |     mock_conn.execute.return_value.fetchone.side_effect = [
 19 |         (1000,),  # row count
 20 |         (2000.0,)  # avg feature size
 21 |     ]
 22 |     
 23 |     # Create worker
 24 |     worker = Worker(
 25 |         "https://example.com/test.parquet",
 26 |         sample_bbox,
 27 |         str(tmp_path / "test.geojson"),
 28 |         mock_iface,
 29 |         {"has_bbox": True, "bbox_column": "bbox"}
 30 |     )
 31 |     
 32 |     # Test size estimation
 33 |     estimated_size = worker.estimate_file_size(mock_conn, "test_table")
 34 |     assert estimated_size > 0
 35 |     assert isinstance(estimated_size, float)
 36 | 
 37 | # Add test for process_schema_columns
 38 | def test_process_schema_columns():
 39 |     """Test schema column processing for different data types"""
 40 |     # Create worker
 41 |     worker = Worker(
 42 |         "https://example.com/test.parquet",
 43 |         QgsRectangle(0, 0, 1, 1),
 44 |         "test.parquet",
 45 |         MagicMock(),
 46 |         {"has_bbox": True}
 47 |     )
 48 |     
 49 |     # Test different column types
 50 |     schema_result = [
 51 |         ("id", "INTEGER", "YES", None, None, None),
 52 |         ("tags", "MAP(VARCHAR, VARCHAR)", "YES", None, None, None),
 53 |         ("names", "STRUCT(primary VARCHAR)", "YES", None, None, None),
 54 |         ("categories", "VARCHAR[]", "YES", None, None, None),
 55 |         ("small_num", "UTINYINT", "YES", None, None, None),
 56 |         ("geometry", "GEOMETRY", "YES", None, None, None)
 57 |     ]
 58 |     
 59 |     columns = worker.process_schema_columns(schema_result)
 60 |     
 61 |     assert len(columns) == 6
 62 |     assert 'TO_JSON("tags")' in columns[1]
 63 |     assert 'TO_JSON("names")' in columns[2]
 64 |     assert 'array_to_string("categories"' in columns[3]
 65 |     assert 'CAST("small_num" AS INTEGER)' in columns[4]
 66 | 
 67 | # Add test for ValidationWorker metadata parsing
 68 | @patch('duckdb.connect')
 69 | def test_validation_worker_metadata_parsing(mock_connect, mock_iface):
 70 |     """Test GeoParquet metadata parsing in ValidationWorker"""
 71 |     # Mock connection with metadata
 72 |     mock_conn = MagicMock()
 73 |     mock_conn.execute.return_value.fetchall.return_value = [
 74 |         (b"geo", b'{"columns":{"geometry":{"encoding":"WKB","geometry_types":["Point"],"covering":{"bbox":{"xmin":[0],"ymin":[0],"xmax":[1],"ymax":[1]}}}}}')
 75 |     ]
 76 |     mock_connect.return_value = mock_conn
 77 |     
 78 |     worker = ValidationWorker(
 79 |         "https://example.com/test.parquet",
 80 |         mock_iface,
 81 |         QgsRectangle(0, 0, 1, 1)
 82 |     )
 83 |     
 84 |     # Test metadata parsing
 85 |     bbox_column = worker.check_bbox_metadata(mock_conn)
 86 |     assert bbox_column is not None
 87 | 
 88 | # Add test for needs_validation method
 89 | def test_validation_worker_needs_validation():
 90 |     """Test needs_validation logic for different URLs"""
 91 |     worker = ValidationWorker(
 92 |         "https://example.com/test.parquet",
 93 |         MagicMock(),
 94 |         QgsRectangle(0, 0, 1, 1)
 95 |     )
 96 |     
 97 |     # Test custom URL
 98 |     assert worker.needs_validation() == True
 99 |     
100 |     # Test Overture URL
101 |     worker.dataset_url = "s3://overturemaps-us-west-2/release/2025-10-22.0/theme=buildings"
102 |     assert worker.needs_validation() == False
103 |     
104 |     # Test Source Cooperative URL with validation flag
105 |     worker.PRESET_DATASETS = {
106 |         "source_cooperative": {
107 |             "test_dataset": {
108 |                 "url": "https://example.com/test.parquet",
109 |                 "needs_validation": False
110 |             }
111 |         }
112 |     }
113 |     worker.dataset_url = "https://example.com/test.parquet"
114 |     assert worker.needs_validation() == False
115 | 
116 | # Add test for transform_bbox_to_4326 with invalid inputs
117 | def test_transform_bbox_invalid_inputs(qgs_app):
118 |     """Test bbox transformation with invalid inputs"""
119 |     # Test with None extent
120 |     assert transform_bbox_to_4326(None, QgsCoordinateReferenceSystem("EPSG:4326")) is None
121 |     
122 |     # Test with None CRS
123 |     assert transform_bbox_to_4326(QgsRectangle(0, 0, 1, 1), None) is None
124 |     
125 |     # Test with invalid CRS
126 |     invalid_crs = QgsCoordinateReferenceSystem()
127 |     assert not invalid_crs.isValid()
128 |     result = transform_bbox_to_4326(QgsRectangle(0, 0, 1, 1), invalid_crs)
129 |     assert isinstance(result, QgsRectangle)
130 | 
131 | # Add test for Worker initialization with layer name
132 | def test_worker_initialization_with_layer_name(mock_iface, sample_bbox, tmp_path):
133 |     """Test Worker initialization with optional layer name"""
134 |     worker = Worker(
135 |         "https://example.com/test.parquet",
136 |         sample_bbox,
137 |         str(tmp_path / "test.parquet"),
138 |         mock_iface,
139 |         {"has_bbox": True},
140 |         layer_name="Test Layer"
141 |     )
142 |     
143 |     assert worker.layer_name == "Test Layer"
144 |     assert not worker.size_warning_accepted
145 |     assert not worker.killed
146 | 
147 | def test_transform_bbox_to_4326(qgs_app):
148 |     """Test transforming a bounding box to EPSG:4326"""
149 |     # Create test bbox in EPSG:3857
150 |     source_crs = QgsCoordinateReferenceSystem("EPSG:3857")
151 |     input_bbox = QgsRectangle(1000000, 2000000, 1010000, 2010000)
152 |     
153 |     # Transform
154 |     result_bbox = transform_bbox_to_4326(input_bbox, source_crs)
155 |     
156 |     # Check result is in 4326
157 |     assert isinstance(result_bbox, QgsRectangle)
158 |     assert result_bbox.xMinimum() != input_bbox.xMinimum()  # Values should change after transform
159 |     
160 |     # Test when already in 4326 (no transformation needed)
161 |     already_4326 = QgsRectangle(1, 2, 3, 4)
162 |     result = transform_bbox_to_4326(already_4326, QgsCoordinateReferenceSystem("EPSG:4326"))
163 |     assert result.xMinimum() == already_4326.xMinimum()
164 | 
165 | def test_worker_initialization(mock_iface, sample_bbox, tmp_path, sample_validation_results):
166 |     """Test Worker initialization"""
167 |     # Create test parameters
168 |     dataset_url = "https://example.com/test.parquet"
169 |     output_file = os.path.join(tmp_path, "output.gpkg")
170 |     
171 |     # Initialize worker
172 |     worker = Worker(dataset_url, sample_bbox, output_file, mock_iface, sample_validation_results)
173 |     
174 |     # Check properties
175 |     assert worker.dataset_url == dataset_url
176 |     assert worker.extent == sample_bbox
177 |     assert worker.output_file == output_file
178 |     assert worker.validation_results == sample_validation_results
179 |     assert worker.killed is False
180 | 
181 | def test_validation_worker_initialization(mock_iface, sample_bbox):
182 |     """Test ValidationWorker initialization"""
183 |     dataset_url = "https://example.com/test.parquet"
184 |     
185 |     # Initialize validation worker
186 |     worker = ValidationWorker(dataset_url, mock_iface, sample_bbox)
187 |     
188 |     # Check properties
189 |     assert worker.dataset_url == dataset_url
190 |     assert worker.extent == sample_bbox
191 |     assert worker.killed is False
192 | 
193 | def test_transform_bbox_with_none(qgs_app):
194 |     """Test transform_bbox_to_4326 with None input"""
195 |     result = transform_bbox_to_4326(None, None)
196 |     assert result is None
197 | 
198 | @patch('duckdb.connect')
199 | def test_worker_error_handling(mock_connect, mock_iface, sample_bbox, tmp_path):
200 |     """Test Worker error handling"""
201 |     mock_connect.side_effect = Exception("Test error")
202 |     
203 |     # Create signals for testing
204 |     error_message = None
205 |     def on_error(msg):
206 |         nonlocal error_message
207 |         error_message = msg
208 |     
209 |     # Create worker
210 |     worker = Worker(
211 |         "https://example.com/test.parquet",
212 |         sample_bbox,
213 |         str(tmp_path / "test.parquet"),
214 |         mock_iface,
215 |         {"has_bbox": True, "bbox_column": "bbox"}
216 |     )
217 |     worker.error.connect(on_error)
218 |     
219 |     # Run worker
220 |     worker.run()
221 |     
222 |     assert error_message is not None
223 |     assert "Test error" in error_message 


--------------------------------------------------------------------------------
/gpq_downloader/tests/create_test_data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Create test data files for the GPQ Downloader plugin tests.
  4 | 
  5 | This module can create:
  6 | 1. Non-GeoParquet compliant parquet files (compatible parquet as per spec)
  7 | 2. Standard GeoParquet files (with proper metadata)
  8 | 3. Other test data as needed
  9 | """
 10 | 
 11 | import pyarrow as pa
 12 | import pyarrow.parquet as pq
 13 | from shapely.geometry import LineString
 14 | from shapely import wkb
 15 | import pandas as pd
 16 | 
 17 | 
 18 | def create_non_geoparquet_file(output_path="non_geoparquet_with_geometry.parquet"):
 19 |     """Create a parquet file with WKB geometry but no GeoParquet metadata."""
 20 |     
 21 |     # Create sample LineString geometries representing street segments in San Francisco
 22 |     # Using approximate coordinates for real SF streets
 23 |     geometries = [
 24 |         # Market Street segment
 25 |         LineString([(-122.4194, 37.7749), (-122.4184, 37.7759), (-122.4174, 37.7769)]),
 26 |         
 27 |         # Mission Street segment
 28 |         LineString([(-122.4180, 37.7600), (-122.4170, 37.7610), (-122.4160, 37.7620)]),
 29 |         
 30 |         # Geary Boulevard segment
 31 |         LineString([(-122.4650, 37.7810), (-122.4640, 37.7810), (-122.4630, 37.7810)]),
 32 |         
 33 |         # Van Ness Avenue segment
 34 |         LineString([(-122.4220, 37.7750), (-122.4220, 37.7760), (-122.4220, 37.7770)]),
 35 |         
 36 |         # Embarcadero segment
 37 |         LineString([(-122.3950, 37.7950), (-122.3940, 37.7940), (-122.3930, 37.7930)]),
 38 |         
 39 |         # Lombard Street segment (the famous crooked part)
 40 |         LineString([(-122.4186, 37.8021), (-122.4176, 37.8020), (-122.4166, 37.8019)]),
 41 |         
 42 |         # Golden Gate Park path
 43 |         LineString([(-122.4820, 37.7700), (-122.4810, 37.7700), (-122.4800, 37.7700)])
 44 |     ]
 45 |     
 46 |     # Convert to WKB
 47 |     wkb_geometries = [wkb.dumps(geom) for geom in geometries]
 48 |     
 49 |     # Create other sample data
 50 |     data = {
 51 |         'id': [1, 2, 3, 4, 5, 6, 7],
 52 |         'name': [
 53 |             'Market St',
 54 |             'Mission St', 
 55 |             'Geary Blvd',
 56 |             'Van Ness Ave',
 57 |             'Embarcadero',
 58 |             'Lombard St',
 59 |             'GG Park Trail'
 60 |         ],
 61 |         'type': [
 62 |             'major_street',
 63 |             'major_street',
 64 |             'boulevard',
 65 |             'avenue',
 66 |             'waterfront',
 67 |             'tourist_street',
 68 |             'park_path'
 69 |         ],
 70 |         'length_m': [250.5, 220.3, 180.7, 200.9, 190.1, 150.2, 210.4],
 71 |         'geometry': wkb_geometries  # WKB-encoded geometry column
 72 |     }
 73 |     
 74 |     # Create DataFrame
 75 |     df = pd.DataFrame(data)
 76 |     
 77 |     # Convert to PyArrow Table
 78 |     # Note: We're explicitly NOT adding any geo metadata
 79 |     table = pa.Table.from_pandas(df)
 80 |     
 81 |     # Write to Parquet file WITHOUT any geo metadata
 82 |     pq.write_table(table, output_path)
 83 |     
 84 |     print(f"Created non-GeoParquet file: {output_path}")
 85 |     print(f"Columns: {list(data.keys())}")
 86 |     print(f"Rows: {len(df)}")
 87 |     
 88 |     # Verify it has no geo metadata
 89 |     parquet_file = pq.ParquetFile(output_path)
 90 |     metadata = parquet_file.metadata
 91 |     
 92 |     # Check that there's no "geo" key in the metadata
 93 |     if metadata.metadata:
 94 |         metadata_dict = {k.decode(): v.decode() for k, v in metadata.metadata.items()}
 95 |         has_geo = 'geo' in metadata_dict
 96 |         print(f"Has 'geo' metadata: {has_geo}")
 97 |     else:
 98 |         print("No metadata present")
 99 |     
100 |     return output_path
101 | 
102 | 
103 | def create_geoparquet_file(output_path="geoparquet_with_metadata.parquet"):
104 |     """Create a proper GeoParquet file with geo metadata."""
105 |     
106 |     # Same geometries as non-geoparquet version
107 |     geometries = [
108 |         LineString([(-122.4194, 37.7749), (-122.4184, 37.7759), (-122.4174, 37.7769)]),
109 |         LineString([(-122.4180, 37.7600), (-122.4170, 37.7610), (-122.4160, 37.7620)]),
110 |         LineString([(-122.4650, 37.7810), (-122.4640, 37.7810), (-122.4630, 37.7810)]),
111 |         LineString([(-122.4220, 37.7750), (-122.4220, 37.7760), (-122.4220, 37.7770)]),
112 |         LineString([(-122.3950, 37.7950), (-122.3940, 37.7940), (-122.3930, 37.7930)]),
113 |         LineString([(-122.4186, 37.8021), (-122.4176, 37.8020), (-122.4166, 37.8019)]),
114 |         LineString([(-122.4820, 37.7700), (-122.4810, 37.7700), (-122.4800, 37.7700)])
115 |     ]
116 |     
117 |     # Convert to WKB
118 |     wkb_geometries = [wkb.dumps(geom) for geom in geometries]
119 |     
120 |     # Create data
121 |     data = {
122 |         'id': [1, 2, 3, 4, 5, 6, 7],
123 |         'name': [
124 |             'Market St',
125 |             'Mission St', 
126 |             'Geary Blvd',
127 |             'Van Ness Ave',
128 |             'Embarcadero',
129 |             'Lombard St',
130 |             'GG Park Trail'
131 |         ],
132 |         'type': [
133 |             'major_street',
134 |             'major_street',
135 |             'boulevard',
136 |             'avenue',
137 |             'waterfront',
138 |             'tourist_street',
139 |             'park_path'
140 |         ],
141 |         'length_m': [250.5, 220.3, 180.7, 200.9, 190.1, 150.2, 210.4],
142 |         'geometry': wkb_geometries
143 |     }
144 |     
145 |     df = pd.DataFrame(data)
146 |     table = pa.Table.from_pandas(df)
147 |     
148 |     # Create GeoParquet metadata
149 |     geo_metadata = {
150 |         "version": "1.0.0",
151 |         "primary_column": "geometry",
152 |         "columns": {
153 |             "geometry": {
154 |                 "encoding": "WKB",
155 |                 "geometry_types": ["LineString"],
156 |                 "crs": {
157 |                     "$schema": "https://proj.org/schemas/v0.6/projjson.schema.json",
158 |                     "type": "GeographicCRS",
159 |                     "name": "WGS 84",
160 |                     "datum": {
161 |                         "type": "GeodeticReferenceFrame",
162 |                         "name": "World Geodetic System 1984",
163 |                         "ellipsoid": {
164 |                             "name": "WGS 84",
165 |                             "semi_major_axis": 6378137,
166 |                             "inverse_flattening": 298.257223563
167 |                         }
168 |                     },
169 |                     "coordinate_system": {
170 |                         "subtype": "ellipsoidal",
171 |                         "axis": [
172 |                             {
173 |                                 "name": "Geodetic longitude",
174 |                                 "abbreviation": "Lon",
175 |                                 "direction": "east",
176 |                                 "unit": "degree"
177 |                             },
178 |                             {
179 |                                 "name": "Geodetic latitude",
180 |                                 "abbreviation": "Lat",
181 |                                 "direction": "north",
182 |                                 "unit": "degree"
183 |                             }
184 |                         ]
185 |                     },
186 |                     "id": {
187 |                         "authority": "EPSG",
188 |                         "code": 4326
189 |                     }
190 |                 }
191 |             }
192 |         }
193 |     }
194 |     
195 |     # Convert metadata to JSON string
196 |     import json
197 |     geo_metadata_str = json.dumps(geo_metadata)
198 |     
199 |     # Create new metadata with geo key
200 |     metadata = table.schema.metadata or {}
201 |     metadata[b'geo'] = geo_metadata_str.encode('utf-8')
202 |     
203 |     # Create new table with metadata
204 |     table = table.replace_schema_metadata(metadata)
205 |     
206 |     # Write GeoParquet file
207 |     pq.write_table(table, output_path)
208 |     
209 |     print(f"Created GeoParquet file: {output_path}")
210 |     print(f"Columns: {list(data.keys())}")
211 |     print(f"Rows: {len(df)}")
212 |     
213 |     # Verify it has geo metadata
214 |     parquet_file = pq.ParquetFile(output_path)
215 |     metadata = parquet_file.metadata
216 |     
217 |     if metadata.metadata and b'geo' in metadata.metadata:
218 |         print("Has 'geo' metadata: True")
219 |     else:
220 |         print("Has 'geo' metadata: False")
221 |     
222 |     return output_path
223 | 
224 | 
225 | if __name__ == "__main__":
226 |     import os
227 |     
228 |     # Create data directory if it doesn't exist
229 |     data_dir = os.path.join(os.path.dirname(__file__), 'data')
230 |     os.makedirs(data_dir, exist_ok=True)
231 |     
232 |     # Create both types of files
233 |     non_geo_path = os.path.join(data_dir, 'non_geoparquet_with_geometry.parquet')
234 |     geo_path = os.path.join(data_dir, 'geoparquet_with_metadata.parquet')
235 |     
236 |     print("Creating test data files...")
237 |     print("-" * 50)
238 |     create_non_geoparquet_file(non_geo_path)
239 |     print("-" * 50)
240 |     create_geoparquet_file(geo_path)
241 |     print("-" * 50)
242 |     print("Test data creation complete!")


--------------------------------------------------------------------------------
/gpq_downloader/tests/test_plugin.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import datetime
  3 | from unittest.mock import MagicMock, patch, call
  4 | from qgis.PyQt.QtWidgets import QAction, QProgressDialog, QMessageBox, QFileDialog, QDialog, QVBoxLayout, QLabel
  5 | from qgis.core import QgsProject, QgsVectorLayer, QgsSettings, QgsCoordinateReferenceSystem, QgsRectangle
  6 | from pathlib import Path
  7 | from pytestqt import qtbot
  8 | 
  9 | from gpq_downloader.plugin import QgisPluginGeoParquet
 10 | from gpq_downloader.dialog import DataSourceDialog
 11 | 
 12 | def test_plugin_run_with_active_download(qgs_app, mock_iface):
 13 |     """Test run method when a download is already in progress"""
 14 |     plugin = QgisPluginGeoParquet(mock_iface)
 15 |     plugin.worker = MagicMock()
 16 |     plugin.worker_thread = MagicMock()
 17 |     plugin.worker_thread.isRunning.return_value = True
 18 |     
 19 |     with patch('gpq_downloader.plugin.QMessageBox.warning') as mock_warning:
 20 |         plugin.run()
 21 |         mock_warning.assert_called_once()
 22 |         assert "Download in Progress" in mock_warning.call_args[0][1]
 23 | 
 24 | @patch('gpq_downloader.plugin.DataSourceDialog')
 25 | def test_plugin_run_dialog_rejected(mock_dialog, qgs_app, mock_iface):
 26 |     """Test run method when dialog is rejected"""
 27 |     plugin = QgisPluginGeoParquet(mock_iface)
 28 |     
 29 |     # Setup mock dialog
 30 |     dialog_instance = MagicMock()
 31 |     dialog_instance.exec.return_value = QDialog.Rejected
 32 |     mock_dialog.return_value = dialog_instance
 33 |     
 34 |     plugin.run()
 35 |     
 36 |     dialog_instance.exec.assert_called_once()
 37 |     assert plugin.worker is None
 38 |     assert plugin.worker_thread is None
 39 | 
 40 | @patch('gpq_downloader.plugin.QgsSettings')
 41 | @patch('gpq_downloader.plugin.QFileDialog.getSaveFileName')
 42 | @patch('gpq_downloader.plugin.DataSourceDialog')
 43 | def test_plugin_run_with_download(mock_dialog, mock_save_dialog, mock_settings, qgs_app, mock_iface, tmp_path):
 44 |     """Test run method with successful download setup"""
 45 |     plugin = QgisPluginGeoParquet(mock_iface)
 46 |     
 47 |     # Setup mock dialog
 48 |     dialog_instance = MagicMock()
 49 |     dialog_instance.exec.return_value = QDialog.Accepted
 50 |     dialog_instance.get_urls.return_value = ["https://example.com/test.parquet?theme=buildings"]
 51 |     dialog_instance.overture_radio.isChecked.return_value = True
 52 |     mock_dialog.return_value = dialog_instance
 53 |     
 54 |     # Setup mock save dialog
 55 |     output_file = str(tmp_path / "test.parquet")
 56 |     mock_save_dialog.return_value = (output_file, "GeoParquet (*.parquet)")
 57 |     
 58 |     # Setup mock settings
 59 |     mock_settings_instance = MagicMock()
 60 |     mock_settings.return_value = mock_settings_instance
 61 |     
 62 |     # Mock datetime to avoid timestamp issues
 63 |     with patch('gpq_downloader.plugin.datetime') as mock_datetime:
 64 |         mock_datetime.datetime.now.return_value.strftime.return_value = "20230101_120000"
 65 |         
 66 |         # Mock the process_download_queue method to avoid actual processing
 67 |         with patch.object(plugin, 'process_download_queue'):
 68 |             plugin.run()
 69 |     
 70 |     mock_save_dialog.assert_called_once()
 71 | 
 72 | def test_plugin_handle_error(qgs_app, mock_iface):
 73 |     """Test error handling"""
 74 |     plugin = QgisPluginGeoParquet(mock_iface)
 75 |     plugin.progress_dialog = MagicMock()
 76 |     error_msg = "Test error"
 77 |     
 78 |     with patch('gpq_downloader.plugin.QMessageBox.critical') as mock_critical:
 79 |         plugin.handle_error(error_msg)
 80 |         mock_critical.assert_called_once()
 81 |         assert mock_critical.call_args[0][1] == "Error" or error_msg in mock_critical.call_args[0][1]
 82 |         plugin.progress_dialog.close.assert_called_once()
 83 | 
 84 | def test_plugin_update_progress(qgs_app, mock_iface):
 85 |     """Test progress updates"""
 86 |     plugin = QgisPluginGeoParquet(mock_iface)
 87 |     plugin.progress_dialog = MagicMock()
 88 |     
 89 |     plugin.update_progress("Test progress")
 90 |     plugin.progress_dialog.setLabelText.assert_called_once_with("Test progress")
 91 | 
 92 | def test_plugin_cancel_download(qgs_app, mock_iface):
 93 |     """Test download cancellation"""
 94 |     plugin = QgisPluginGeoParquet(mock_iface)
 95 |     plugin.worker = MagicMock()
 96 |     plugin.worker_thread = MagicMock()
 97 |     
 98 |     # Patch the cleanup_thread method to verify it's called
 99 |     with patch.object(plugin, 'cleanup_thread') as mock_cleanup:
100 |         plugin.cancel_download()
101 |         plugin.worker.kill.assert_called_once()
102 |         mock_cleanup.assert_called_once()
103 | 
104 | @patch('gpq_downloader.plugin.QgsVectorLayer')
105 | def test_plugin_load_layer_success(mock_vector_layer, qgs_app, mock_iface):
106 |     """Test successful layer loading"""
107 |     plugin = QgisPluginGeoParquet(mock_iface)
108 |     
109 |     # Setup mock layer
110 |     mock_layer = MagicMock()
111 |     mock_layer.isValid.return_value = True
112 |     mock_vector_layer.return_value = mock_layer
113 |     
114 |     # Setup mock project
115 |     mock_project = MagicMock()
116 |     
117 |     with patch('gpq_downloader.plugin.QgsProject.instance', return_value=mock_project):
118 |         plugin.load_layer("test.gpkg")
119 |         mock_project.addMapLayer.assert_called_once_with(mock_layer)
120 | 
121 | @patch('gpq_downloader.plugin.QgsVectorLayer')
122 | def test_plugin_load_layer_invalid(mock_vector_layer, qgs_app, mock_iface):
123 |     """Test loading invalid layer"""
124 |     plugin = QgisPluginGeoParquet(mock_iface)
125 |     
126 |     # Setup mock layer
127 |     mock_layer = MagicMock()
128 |     mock_layer.isValid.return_value = False
129 |     mock_vector_layer.return_value = mock_layer
130 |     
131 |     with patch('gpq_downloader.plugin.QMessageBox.critical') as mock_critical:
132 |         plugin.load_layer("test.gpkg")
133 |         mock_critical.assert_called_once()
134 |         assert mock_critical.call_args[0][0] == mock_iface.mainWindow()
135 |         assert mock_critical.call_args[0][1] == "Error" or "test.gpkg" in mock_critical.call_args[0][1]
136 | 
137 | def test_plugin_show_info(qgs_app, mock_iface):
138 |     """Test info message display"""
139 |     plugin = QgisPluginGeoParquet(mock_iface)
140 |     test_message = "Test info"
141 |     
142 |     with patch('gpq_downloader.plugin.QMessageBox.information') as mock_info:
143 |         plugin.show_info(test_message)
144 |         mock_info.assert_called_once()
145 |         assert mock_info.call_args[0][0] == mock_iface.mainWindow()
146 |         assert mock_info.call_args[0][1] == "Success" or test_message in mock_info.call_args[0][1]
147 | 
148 | def test_plugin_initialization(qgs_app, mock_iface):
149 |     """Test plugin initialization"""
150 |     plugin = QgisPluginGeoParquet(mock_iface)
151 |     assert plugin.iface == mock_iface
152 |     assert plugin.worker is None
153 |     assert plugin.worker_thread is None
154 |     assert isinstance(plugin.download_dir, Path)
155 | 
156 | def test_plugin_init_gui(qgs_app, mock_iface):
157 |     """Test initGui method"""
158 |     plugin = QgisPluginGeoParquet(mock_iface)
159 |     plugin.initGui()
160 |     
161 |     # Check that action was created
162 |     assert isinstance(plugin.action, QAction)
163 |     assert plugin.action.text() == "Download GeoParquet Data"
164 |     
165 |     # Check that icon was added to toolbar
166 |     assert len(mock_iface.toolbar_icons) == 1
167 |     assert mock_iface.toolbar_icons[0] == plugin.action
168 | 
169 | def test_plugin_unload(qgs_app, mock_iface):
170 |     """Test plugin unload"""
171 |     plugin = QgisPluginGeoParquet(mock_iface)
172 |     plugin.initGui()  # Add the icon first
173 |     
174 |     # Verify icon was added
175 |     assert len(mock_iface.toolbar_icons) == 1
176 |     
177 |     # Mock worker thread to not be running
178 |     plugin.worker_thread = MagicMock()
179 |     plugin.worker_thread.isRunning.return_value = False
180 |     
181 |     # Unload plugin
182 |     plugin.unload()
183 |     
184 |     # Check that icon was removed
185 |     assert len(mock_iface.toolbar_icons) == 0
186 | 
187 | @patch('gpq_downloader.plugin.QThread')
188 | def test_plugin_cleanup_thread(mock_thread, qgs_app, mock_iface):
189 |     """Test thread cleanup"""
190 |     plugin = QgisPluginGeoParquet(mock_iface)
191 |     plugin.worker = MagicMock()
192 |     plugin.worker_thread = MagicMock()
193 |     
194 |     plugin.cleanup_thread()
195 |     assert plugin.worker is None
196 |     assert plugin.worker_thread is None
197 | 
198 | def test_handle_validation_complete_success(qgs_app, mock_iface, qtbot):
199 |     plugin = QgisPluginGeoParquet(mock_iface)
200 |     
201 |     # Create a fake dialog and attach the expected attributes.
202 |     fake_dialog = QDialog()
203 |     qtbot.addWidget(fake_dialog)
204 |     # Fake overture radio button; isChecked() returns True.
205 |     fake_radio = MagicMock()
206 |     fake_radio.isChecked.return_value = True
207 |     fake_dialog.overture_radio = fake_radio
208 |     
209 |     # Fake overture combo box for theme selection.
210 |     fake_combo = MagicMock()
211 |     fake_combo.currentText.return_value = "castle"  # Any theme other than "base"
212 |     fake_dialog.overture_combo = fake_combo
213 | 
214 |     # Use a valid dummy extent (avoid using a MagicMock)
215 |     dummy_extent = QgsRectangle(0, 0, 10, 10)
216 |     
217 |     # Patch the file dialog: simulate user clicking "Save" by returning a valid filename.
218 |     with patch('qgis.PyQt.QtWidgets.QFileDialog.getSaveFileName',
219 |                return_value=("test_output.parquet", "GeoParquet (*.parquet)")) as mock_save_dialog:
220 |         plugin.download_and_save = MagicMock()
221 |         
222 |         plugin.handle_validation_complete(
223 |             success=True,
224 |             message="",
225 |             validation_results={},
226 |             url="https://example.com/test.parquet",
227 |             extent=dummy_extent,
228 |             dialog=fake_dialog
229 |         )
230 |         
231 |         mock_save_dialog.assert_called_once()
232 |         plugin.download_and_save.assert_called_once()
233 | 
234 | def test_handle_validation_complete_cancel(qgs_app, mock_iface, qtbot):
235 |     plugin = QgisPluginGeoParquet(mock_iface)
236 |     
237 |     # Create a fake dialog with the same expected attributes.
238 |     fake_dialog = QDialog()
239 |     qtbot.addWidget(fake_dialog)
240 |     fake_radio = MagicMock()
241 |     fake_radio.isChecked.return_value = True
242 |     fake_dialog.overture_radio = fake_radio
243 | 
244 |     fake_combo = MagicMock()
245 |     fake_combo.currentText.return_value = "castle"
246 |     fake_dialog.overture_combo = fake_combo
247 | 
248 |     # Use a valid dummy extent instead of a MagicMock.
249 |     dummy_extent = QgsRectangle(0, 0, 10, 10)
250 | 
251 |     # Simulate the file dialog being cancelled by returning empty strings.
252 |     with patch('qgis.PyQt.QtWidgets.QFileDialog.getSaveFileName',
253 |                return_value=("", "")) as mock_save_dialog:
254 |         plugin.download_and_save = MagicMock()
255 |         # Optionally, also patch the warning to confirm no warning is shown.
256 |         with patch('gpq_downloader.plugin.QMessageBox.warning') as mock_warning:
257 |             plugin.handle_validation_complete(
258 |                 success=True,
259 |                 message="",
260 |                 validation_results={},
261 |                 url="https://example.com/test.parquet",
262 |                 extent=dummy_extent,
263 |                 dialog=fake_dialog
264 |             )
265 |             mock_save_dialog.assert_called_once()
266 |             plugin.download_and_save.assert_not_called()
267 |             # In the cancel case, no warning message is expected.
268 |             mock_warning.assert_not_called()
269 | 
270 | def test_handle_validation_complete_failure(qgs_app, mock_iface):
271 |     plugin = QgisPluginGeoParquet(mock_iface)
272 |     
273 |     with patch('gpq_downloader.plugin.QMessageBox.warning') as mock_warning:
274 |         plugin.handle_validation_complete(
275 |             success=False,
276 |             message="Validation failed",
277 |             validation_results={},
278 |             url="https://example.com/test.parquet",
279 |             extent=MagicMock(),
280 |             dialog=MagicMock()
281 |         )
282 |         mock_warning.assert_called_once_with(mock_iface.mainWindow(), "Validation Error", "Validation failed")
283 | 
284 | def test_create_progress_dialog(qgs_app, mock_iface):
285 |     plugin = QgisPluginGeoParquet(mock_iface)
286 |     progress_dialog = plugin.create_progress_dialog("Test Title", "Test Message")
287 |     
288 |     assert progress_dialog.windowTitle() == "Test Title"
289 |     assert progress_dialog.labelText() == "Test Message" 
290 | 
291 | def test_setup_worker(qgs_app, mock_iface):
292 |     plugin = QgisPluginGeoParquet(mock_iface)
293 |     plugin.progress_dialog = MagicMock()  # Ensure progress_dialog is initialized
294 |     dataset_url = "https://example.com/test.parquet"
295 |     extent = MagicMock()
296 |     output_file = "output.parquet"
297 |     validation_results = {"has_bbox": True}
298 |     
299 |     worker, worker_thread = plugin.setup_worker(dataset_url, extent, output_file, validation_results)
300 |     
301 |     assert worker is not None
302 |     assert worker_thread is not None
303 |     assert worker.dataset_url == dataset_url
304 |     assert worker.extent == extent
305 |     assert worker.output_file == output_file
306 |     assert worker.validation_results == validation_results 


--------------------------------------------------------------------------------
/gpq_downloader/icons/parquet-download.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="1080" zoomAndPan="magnify" viewBox="0 0 810 809.999993" height="1080" preserveAspectRatio="xMidYMid meet" version="1.0"><path fill="#50abf1" d="M 366.035156 489.433594 C 340.039062 461.707031 339.265625 460.511719 343.941406 455.265625 C 351.347656 446.957031 667.28125 157.878906 671.734375 155.226562 C 675.660156 152.964844 714.382812 171.65625 722.855469 179.902344 C 726.097656 183.0625 700.929688 209.957031 563.621094 350.003906 C 473.921875 441.519531 398.769531 516.820312 396.757812 517.242188 C 394.671875 517.703125 380.84375 505.152344 366.03125 489.347656 Z M 1762.234375 486.570312 C 1759.757812 484.089844 1758.785156 468.667969 1758.785156 431.808594 C 1758.785156 403.585938 1758.042969 380.492188 1757.136719 380.492188 C 1756.226562 380.492188 1748.996094 386.164062 1741.074219 393.09375 C 1722.183594 409.613281 1705.65625 415.007812 1677.953125 413.683594 C 1616.253906 410.726562 1579.371094 357.398438 1579.371094 271.136719 C 1579.371094 217.246094 1592.101562 179.929688 1619.265625 154.238281 C 1647.675781 127.371094 1687.683594 120.261719 1720.921875 136.179688 C 1730.175781 140.613281 1741.457031 148.175781 1745.992188 153 C 1750.523438 157.820312 1755.1875 161.765625 1756.359375 161.765625 C 1757.527344 161.765625 1758.871094 155.769531 1759.339844 148.441406 L 1760.195312 135.121094 L 1831.765625 133.550781 L 1830.320312 488.378906 L 1798.007812 489.164062 C 1775.257812 489.714844 1764.675781 488.925781 1762.25 486.5 Z M 1728.492188 345.082031 C 1768.234375 324.785156 1771.851562 230.410156 1733.984375 201.476562 C 1720.203125 190.964844 1696.425781 189.667969 1683.121094 198.710938 C 1663.539062 212.011719 1652.195312 238.425781 1652.195312 270.726562 C 1652.195312 329.738281 1688.757812 365.371094 1728.484375 345.074219 Z M 295.183594 415.761719 C 281.167969 401.675781 272.304688 390.613281 273.195312 388.3125 C 274.792969 384.148438 456.390625 237.394531 459.78125 237.5 C 463.964844 237.65625 506.703125 270.121094 506.703125 273.140625 C 506.703125 276.320312 324.148438 439.320312 320.542969 439.320312 C 319.480469 439.320312 308.054688 428.703125 295.140625 415.730469 Z M 2264.921875 412.664062 C 2240.941406 407.457031 2222.238281 396.628906 2203.761719 377.257812 C 2177.808594 350.0625 2166.011719 316.984375 2166.085938 271.664062 C 2166.191406 209.730469 2192.714844 161.125 2238.84375 138.445312 C 2256.027344 129.988281 2258.890625 129.464844 2288 129.464844 C 2315.691406 129.464844 2320.589844 130.253906 2335.007812 137.003906 C 2378.917969 157.574219 2406.066406 211.636719 2403.882812 274.144531 L 2403.0625 297.695312 L 2323.214844 298.441406 C 2279.300781 298.851562 2242.28125 300.277344 2240.945312 301.609375 C 2237.035156 305.519531 2250.046875 332.035156 2260.285156 341.023438 C 2282.652344 360.652344 2314.394531 359.410156 2344.617188 337.726562 L 2357.425781 328.539062 L 2368.511719 341.160156 C 2374.613281 348.101562 2382.953125 358.054688 2387.054688 363.277344 L 2394.5 372.777344 L 2381.945312 384.011719 C 2359.8125 403.832031 2323.722656 417.226562 2293.988281 416.644531 C 2287.640625 416.523438 2274.566406 414.710938 2264.917969 412.613281 Z M 2329.558594 247.96875 C 2333.84375 244.777344 2333.902344 243.101562 2330.234375 229.304688 C 2320.292969 191.945312 2286.289062 176.292969 2261.113281 197.476562 C 2251.175781 205.835938 2246.039062 215.1875 2241.519531 233.109375 C 2238.746094 244.132812 2238.839844 246.867188 2242.074219 248.910156 C 2248.378906 252.902344 2324.035156 252.085938 2329.558594 247.96875 Z M 1171.886719 410.226562 C 1134.558594 396.34375 1114.003906 354.980469 1122.132812 310.136719 C 1127.800781 278.894531 1141.777344 260.175781 1168.386719 248.210938 C 1187.332031 239.6875 1232.15625 237.558594 1254.703125 244.117188 C 1263.242188 246.597656 1270.839844 248.625 1271.59375 248.625 C 1275.246094 248.625 1276.109375 238.425781 1273.339844 227.992188 C 1269.875 214.921875 1262.15625 205.363281 1249.722656 198.738281 C 1237.125 192.03125 1205.671875 193.023438 1181.160156 200.898438 C 1169.628906 204.605469 1160.109375 206.296875 1158.847656 204.859375 C 1155.730469 201.320312 1141.734375 160.359375 1141.734375 154.785156 C 1141.734375 144.121094 1190.4375 130.589844 1229.9375 130.257812 C 1276.5625 129.878906 1306.90625 141.453125 1325.414062 166.664062 C 1344.613281 192.832031 1345.089844 196.285156 1345.089844 308.894531 L 1345.089844 409.839844 L 1313.074219 410.636719 C 1295.464844 411.078125 1279.933594 410.3125 1278.558594 408.945312 C 1277.1875 407.574219 1275.539062 402.308594 1274.898438 397.242188 C 1274.257812 392.183594 1273.109375 387.414062 1272.339844 386.648438 C 1271.574219 385.882812 1266.472656 389.183594 1260.996094 393.988281 C 1243.886719 409.003906 1234.972656 412.214844 1207.652344 413.203125 C 1190.871094 413.808594 1178.878906 412.800781 1171.882812 410.199219 Z M 1255.042969 354.4375 C 1269.601562 346.097656 1276.375 332.796875 1276.375 312.5625 C 1276.375 292.542969 1272.863281 290.136719 1241.316406 288.515625 C 1221.238281 287.484375 1217.84375 288.0625 1209.097656 294.019531 C 1195.6875 303.152344 1191.785156 311.699219 1193.035156 329.207031 C 1194.660156 351.960938 1204.550781 360.332031 1230.089844 360.578125 C 1239.660156 360.660156 1247.582031 358.71875 1255.042969 354.445312 Z M 1941.589844 410.511719 C 1926.113281 405.5625 1905.777344 385.035156 1898.496094 366.984375 C 1892.132812 351.238281 1892.03125 349.234375 1892.027344 242.988281 L 1892.023438 134.992188 L 1963.265625 133.421875 L 1964.109375 226.742188 C 1964.855469 309.746094 1965.5 321.066406 1969.917969 329.152344 C 1984.558594 355.945312 2022.902344 355.730469 2037.320312 328.792969 C 2043.257812 317.695312 2043.523438 313.785156 2044.394531 225.34375 L 2045.300781 133.425781 L 2116.417969 134.992188 L 2117.144531 269.269531 C 2117.679688 367.792969 2117.007812 404.609375 2114.625 407.472656 C 2112.117188 410.496094 2103.992188 411.203125 2078.835938 410.597656 L 2046.289062 409.808594 L 2045.410156 397.1875 C 2044.929688 390.246094 2043.480469 383.492188 2042.191406 382.191406 C 2040.90625 380.882812 2034.359375 385.167969 2027.648438 391.71875 C 2010.050781 408.886719 1997.230469 413.929688 1971.957031 413.667969 C 1960.390625 413.542969 1946.714844 412.097656 1941.585938 410.457031 Z M 2507.066406 411.40625 C 2490.476562 406.136719 2479.96875 396.992188 2472.105469 380.980469 L 2464.253906 364.996094 L 2462.417969 195.320312 L 2451.753906 195.320312 C 2436.316406 195.320312 2434.453125 191.449219 2435.386719 161.257812 C 2436.179688 135.617188 2436.351562 135.015625 2443.207031 134.25 C 2463.976562 131.925781 2462.84375 133.949219 2462.84375 99.085938 C 2462.84375 78.21875 2463.988281 66.304688 2466.207031 64.089844 C 2470.980469 59.320312 2531.796875 59.441406 2534.90625 64.230469 C 2536.152344 66.160156 2537.804688 82.25 2538.578125 99.980469 L 2539.980469 132.230469 L 2592.054688 133.84375 L 2591.257812 163.878906 L 2590.464844 193.914062 L 2538.570312 195.527344 L 2538.570312 266.929688 C 2538.570312 356.84375 2536.726562 353.6875 2584.855469 346.214844 C 2590.097656 345.398438 2590.519531 347.140625 2591.269531 372.796875 L 2592.074219 400.246094 L 2578.652344 406.371094 C 2564 413.058594 2521.671875 416.019531 2507.0625 411.378906 Z M 845.832031 407.847656 C 845.074219 405.875 844.785156 324.132812 845.183594 226.195312 L 845.910156 48.144531 L 921.648438 48.308594 C 992.644531 48.464844 998.519531 48.871094 1015.617188 54.777344 C 1065.488281 72.011719 1092.734375 108.636719 1096.21875 163.09375 C 1099.78125 218.722656 1074.53125 264.371094 1028.804688 284.972656 C 1004.964844 295.707031 986.71875 299.101562 952.710938 299.101562 C 932.089844 299.101562 922.84375 300.171875 920.652344 302.8125 C 918.722656 305.136719 917.28125 325.886719 916.800781 358.199219 L 916.035156 409.867188 L 881.625 410.652344 C 855.648438 411.25 846.871094 410.558594 845.832031 407.847656 Z M 988.949219 226.511719 C 1009.652344 216.960938 1021.304688 197.8125 1021.082031 173.710938 C 1020.726562 134.878906 1001.480469 117.921875 954.28125 114.859375 C 915.703125 112.359375 917.421875 109.714844 917.421875 171.378906 C 917.421875 199.730469 918.1875 224.921875 919.125 227.359375 C 921.75 234.199219 973.648438 233.558594 988.945312 226.496094 Z M 1405.796875 407.425781 C 1403.507812 404.667969 1402.824219 366.132812 1403.347656 269.328125 L 1404.078125 135.050781 L 1438.914062 134.257812 C 1464.898438 133.667969 1474.332031 134.378906 1476.039062 137.0625 C 1477.296875 139.039062 1478.339844 147.28125 1478.363281 155.375 C 1478.40625 169.789062 1482.082031 175.003906 1485.21875 165.121094 C 1487.417969 158.191406 1505.816406 139.726562 1515.527344 134.703125 C 1519.964844 132.410156 1529.839844 129.796875 1537.464844 128.890625 L 1551.339844 127.25 L 1551.339844 206.058594 L 1539.425781 208.042969 C 1519.589844 211.34375 1506.847656 217.835938 1496.519531 229.894531 C 1480.46875 248.632812 1478.414062 260.566406 1478.414062 334.839844 C 1478.414062 374.679688 1477.273438 402.542969 1475.511719 405.835938 C 1472.902344 410.710938 1469.375 411.261719 1440.789062 411.261719 C 1417.339844 411.261719 1408.136719 410.257812 1405.796875 407.4375 Z M 229.042969 348.042969 C 218.070312 335.792969 211.179688 325.894531 212.664062 324.515625 C 220.316406 317.371094 299.277344 262.632812 301.882812 262.664062 C 306.128906 262.714844 344.238281 295.378906 344.238281 298.972656 C 344.238281 303.382812 258.726562 369.1875 252.992188 369.1875 C 250.230469 369.1875 239.453125 359.671875 229.042969 348.042969 Z M 174.625 292.210938 C 158.914062 276.324219 157.039062 273.355469 160.601562 270.050781 C 165.710938 265.300781 416.730469 103.339844 421.816406 101.433594 C 425.832031 99.976562 460.597656 121.019531 460.597656 124.90625 C 460.597656 127.6875 203.300781 307.832031 197.046875 309.421875 C 194.558594 310.0625 185.152344 302.828125 174.625 292.179688 Z M 342.019531 266.628906 C 331.660156 258.027344 323.234375 249.511719 323.300781 247.699219 C 323.363281 245.886719 332.519531 238.015625 343.632812 230.199219 C 354.753906 222.390625 404.589844 186.75 454.417969 150.988281 C 504.210938 115.230469 547.007812 85.96875 549.511719 85.96875 C 556.214844 85.96875 590.148438 105.574219 589.054688 108.828125 C 587.480469 113.546875 369.710938 282.21875 365.152344 282.21875 C 362.789062 282.238281 352.378906 275.21875 342.011719 266.617188 Z M 124.058594 241.371094 C 111.683594 228.292969 109.894531 225.136719 113.125 222.1875 C 115.929688 219.632812 271.40625 127.820312 290.167969 117.625 C 293.277344 115.941406 326.234375 139.714844 325.1875 142.871094 C 323.824219 146.960938 148.460938 257.015625 143.265625 257.015625 C 140.789062 257.015625 132.101562 249.976562 123.960938 241.371094 Z M 499.980469 242.417969 C 488.605469 234.257812 478.851562 226.253906 478.3125 224.644531 C 477.546875 222.34375 580.691406 136.648438 604.046875 120.144531 C 608.59375 116.9375 612.171875 118.003906 631.683594 128.398438 C 644.011719 134.964844 654.074219 141.667969 654.050781 143.296875 C 653.996094 147.472656 531.183594 255.429688 525.457031 256.488281 C 522.828125 256.9375 511.367188 250.628906 499.980469 242.464844 Z M 82.179688 197.015625 L 67.222656 181.871094 L 73.863281 177.566406 C 77.515625 175.203125 151.644531 134.78125 238.609375 87.738281 L 396.675781 2.210938 L 411.792969 9.621094 C 420.105469 13.695312 426.910156 18.414062 426.910156 20.101562 C 426.910156 22.410156 101.871094 212.140625 97.949219 212.140625 C 97.515625 212.140625 90.433594 205.324219 82.207031 196.996094 Z M 325.269531 120.664062 C 317.945312 115.167969 311.957031 109.207031 311.964844 107.425781 C 311.976562 105.648438 341.609375 86.75 377.8125 65.425781 L 443.636719 26.667969 L 460.535156 35.675781 C 469.828125 40.628906 477.433594 45.636719 477.433594 46.800781 C 477.433594 50.128906 348.308594 130.835938 343.21875 130.742188 C 340.6875 130.699219 332.621094 126.15625 325.292969 120.65625 Z M 457.152344 103.898438 C 448.28125 98.417969 441.046875 92.464844 441.082031 90.652344 C 441.132812 87.59375 485.96875 57.574219 493.324219 55.664062 C 496.707031 54.789062 526.878906 70.703125 529.316406 74.648438 C 530.050781 75.835938 528.472656 78.75 525.808594 81.121094 C 516.699219 89.226562 479.300781 114.015625 476.289062 113.941406 C 474.636719 113.902344 466.027344 109.382812 457.15625 103.902344 Z M 457.152344 103.898438 " fill-opacity="1" fill-rule="nonzero"/><path stroke-linecap="round" transform="matrix(13.499999, 0, 0, 13.499999, 242.999991, 519.95109)" fill="none" stroke-linejoin="round" d="M 20.000002 16.999862 L 20.000002 19.999862 C 20.000002 20.065545 19.993636 20.130649 19.980904 20.195175 C 19.967884 20.259411 19.949076 20.321911 19.923902 20.382675 C 19.898728 20.443439 19.868057 20.50102 19.831599 20.555707 C 19.794851 20.610105 19.753474 20.660742 19.707178 20.707038 C 19.660592 20.753624 19.610245 20.795001 19.555557 20.83146 C 19.50087 20.867918 19.443289 20.898879 19.382814 20.923763 C 19.32205 20.948937 19.25955 20.968034 19.195025 20.980765 C 19.130789 20.993497 19.065684 20.999862 19.000002 20.999862 L 5.000001 20.999862 C 4.934318 20.999862 4.869214 20.993497 4.804978 20.980765 C 4.740452 20.968034 4.677952 20.948937 4.617188 20.923763 C 4.556714 20.898879 4.499133 20.867918 4.444445 20.83146 C 4.389758 20.795001 4.339411 20.753624 4.292825 20.707038 C 4.246529 20.660742 4.205151 20.610105 4.168404 20.555707 C 4.131945 20.50102 4.101274 20.443439 4.0761 20.382675 C 4.050927 20.321911 4.032119 20.259411 4.019098 20.195175 C 4.006367 20.130649 4.000001 20.065545 4.000001 19.999862 L 4.000001 16.999862 " stroke="#2ca9bc" stroke-width="2" stroke-opacity="1" stroke-miterlimit="4"/><path stroke-linecap="round" transform="matrix(13.499999, 0, 0, 13.499999, 242.999991, 519.95109)" fill="none" stroke-linejoin="round" d="M 5.649885 10.560047 L 7.000001 8.999862 C 7.042536 8.950093 7.0897 8.904954 7.141494 8.864156 C 7.192999 8.823647 7.247976 8.788346 7.306714 8.758542 C 7.365163 8.728739 7.425927 8.705012 7.489295 8.687362 C 7.552374 8.669422 7.61661 8.658137 7.682293 8.652929 C 7.747686 8.647721 7.81308 8.649167 7.878184 8.65698 C 7.943288 8.664792 8.007235 8.678681 8.069735 8.699225 C 8.132235 8.71948 8.191841 8.745522 8.249133 8.777929 C 8.306425 8.810047 8.360244 8.847373 8.410013 8.889908 L 10.000001 10.34014 L 10.000001 2.999862 L 14.000001 2.999862 L 14.000001 10.34014 L 15.640048 8.930128 C 15.688659 8.886147 15.741032 8.847373 15.797455 8.813809 C 15.853589 8.780244 15.912617 8.752466 15.974249 8.730475 C 16.035881 8.708485 16.099249 8.69257 16.164064 8.683022 C 16.228879 8.673473 16.293983 8.670579 16.359376 8.673762 C 16.42477 8.676945 16.489295 8.686494 16.552953 8.702408 C 16.61661 8.718322 16.677953 8.740313 16.736981 8.76838 C 16.796298 8.796447 16.852143 8.829723 16.904805 8.868785 C 16.957467 8.907848 17.006078 8.95154 17.050059 8.999862 L 18.350117 10.520117 C 18.374134 10.54529 18.396703 10.571621 18.418115 10.59911 C 18.439238 10.626887 18.459203 10.655533 18.477432 10.685047 C 18.495951 10.71485 18.512444 10.745522 18.527779 10.776772 C 18.542826 10.808311 18.556136 10.840429 18.567999 10.873126 C 18.579573 10.906112 18.589701 10.939387 18.598092 10.973242 C 18.606194 11.007096 18.612849 11.041239 18.617478 11.075961 C 18.622108 11.110394 18.625002 11.145117 18.626159 11.180128 C 18.627027 11.21485 18.626448 11.249573 18.623844 11.284295 C 18.62124 11.319306 18.6169 11.353739 18.610534 11.388172 C 18.604458 11.422316 18.596356 11.45617 18.586518 11.489735 C 18.576969 11.523299 18.565395 11.555996 18.552374 11.588404 C 18.539064 11.620811 18.524307 11.652061 18.507814 11.683022 C 18.491321 11.713693 18.473381 11.743496 18.453705 11.772142 C 18.434029 11.801077 18.412907 11.828855 18.390627 11.855475 C 18.368057 11.882096 18.34433 11.907559 18.319157 11.931575 C 18.293983 11.955881 18.267652 11.978739 18.239874 11.999862 L 12.649886 16.790082 C 12.605036 16.828566 12.557004 16.862709 12.506078 16.892513 C 12.455152 16.922316 12.401911 16.947779 12.346355 16.968034 C 12.291089 16.988578 12.234087 17.003913 12.175927 17.01433 C 12.117767 17.024747 12.059029 17.029955 12.000001 17.029955 C 11.940973 17.029955 11.882235 17.024747 11.824075 17.01433 C 11.765916 17.003913 11.708913 16.988578 11.653647 16.968034 C 11.598092 16.947779 11.544851 16.922316 11.493925 16.892513 C 11.442999 16.862709 11.394966 16.828566 11.350117 16.790082 L 5.760128 11.999862 C 5.707756 11.957038 5.660591 11.909584 5.618056 11.857212 C 5.575522 11.804839 5.538774 11.748705 5.507524 11.688809 C 5.476563 11.628913 5.451679 11.566702 5.433161 11.501598 C 5.414642 11.436783 5.402779 11.370811 5.39757 11.303392 C 5.392651 11.236262 5.394098 11.169133 5.402779 11.102293 C 5.41117 11.035163 5.426216 10.969769 5.447918 10.905823 C 5.469619 10.842165 5.497397 10.781112 5.53154 10.722663 C 5.565394 10.664503 5.605036 10.610105 5.649885 10.560047 Z M 5.649885 10.560047 " stroke="#000000" stroke-width="2" stroke-opacity="1" stroke-miterlimit="4"/></svg>


--------------------------------------------------------------------------------
/gpq_downloader/tests/test_non_geoparquet.py:
--------------------------------------------------------------------------------
  1 | """Tests for handling non-GeoParquet compliant parquet files with geometry columns."""
  2 | 
  3 | import pytest
  4 | from pathlib import Path
  5 | from unittest.mock import MagicMock, patch, call
  6 | import pyarrow.parquet as pq
  7 | import duckdb
  8 | import tempfile
  9 | import os
 10 | 
 11 | from gpq_downloader.utils import Worker, ValidationWorker
 12 | 
 13 | 
 14 | class TestNonGeoParquetHandling:
 15 |     """Test handling of parquet files with geometry but no geo metadata."""
 16 |     
 17 |     @pytest.fixture
 18 |     def test_data_path(self):
 19 |         """Path to test data directory."""
 20 |         return Path(__file__).parent / "data"
 21 |     
 22 |     @pytest.fixture
 23 |     def non_geoparquet_file(self, test_data_path):
 24 |         """Path to non-GeoParquet test file."""
 25 |         return test_data_path / "non_geoparquet_with_geometry.parquet"
 26 |     
 27 |     def test_non_geoparquet_file_exists(self, non_geoparquet_file):
 28 |         """Verify test file exists and has expected structure."""
 29 |         assert non_geoparquet_file.exists(), f"Test file not found: {non_geoparquet_file}"
 30 |         
 31 |         # Verify file structure
 32 |         pf = pq.ParquetFile(non_geoparquet_file)
 33 |         schema = pf.schema
 34 |         
 35 |         # Check expected columns
 36 |         column_names = [field.name for field in schema]
 37 |         assert "geometry" in column_names
 38 |         assert "id" in column_names
 39 |         assert "name" in column_names
 40 |         
 41 |         # Verify no geo metadata
 42 |         metadata = pf.metadata.metadata
 43 |         if metadata:
 44 |             metadata_dict = {k.decode(): v.decode() for k, v in metadata.items()}
 45 |             assert "geo" not in metadata_dict
 46 |     
 47 |     @patch('gpq_downloader.utils.transform_bbox_to_4326')
 48 |     @patch('gpq_downloader.utils.duckdb.connect')
 49 |     def test_worker_handles_non_geoparquet(self, mock_connect, mock_transform_bbox, non_geoparquet_file, tmp_path):
 50 |         """Test that Worker can process non-GeoParquet files with geometry."""
 51 |         # Mock connection
 52 |         mock_conn = MagicMock()
 53 |         mock_connect.return_value = mock_conn
 54 |         
 55 |         # Mock execute method to handle spatial extension loading
 56 |         def mock_execute(query):
 57 |             result = MagicMock()
 58 |             if "DESCRIBE SELECT" in query:
 59 |                 # Schema query result
 60 |                 result.fetchall.return_value = [
 61 |                     ('id', 'BIGINT', 'YES', None, None, None),
 62 |                     ('name', 'VARCHAR', 'YES', None, None, None), 
 63 |                     ('type', 'VARCHAR', 'YES', None, None, None),
 64 |                     ('length_m', 'DOUBLE', 'YES', None, None, None),
 65 |                     ('geometry', 'BLOB', 'YES', None, None, None)  # Geometry as BLOB, not WKB_BLOB
 66 |                 ]
 67 |             elif "SELECT COUNT(*)" in query:
 68 |                 # Count query result
 69 |                 result.fetchone.return_value = (7,)
 70 |             else:
 71 |                 # For other queries (INSTALL, LOAD, CREATE TABLE, etc.)
 72 |                 result.fetchall.return_value = []
 73 |                 result.fetchone.return_value = None
 74 |             return result
 75 |         
 76 |         mock_conn.execute.side_effect = mock_execute
 77 |         
 78 |         # Mock transform_bbox_to_4326 to return a proper bbox for testing
 79 |         from qgis.core import QgsRectangle
 80 |         mock_bbox = QgsRectangle(-180, -90, 180, 90)  # Global extent
 81 |         mock_transform_bbox.return_value = mock_bbox
 82 |         
 83 |         # Create worker with temp output file
 84 |         output_file = tmp_path / "test_output.parquet"
 85 |         
 86 |         # Mock iface
 87 |         mock_iface = MagicMock()
 88 |         mock_iface.mapCanvas.return_value.mapSettings.return_value.destinationCrs.return_value = MagicMock()
 89 |         
 90 |         # Mock validation results
 91 |         validation_results = {
 92 |             'has_geometry': True,
 93 |             'geometry_type': 'BLOB',
 94 |             'total_features': 7
 95 |         }
 96 |         
 97 |         worker = Worker(
 98 |             dataset_url=f"file://{non_geoparquet_file}",
 99 |             extent=None,
100 |             output_file=str(output_file),
101 |             iface=mock_iface,
102 |             validation_results=validation_results
103 |         )
104 |         
105 |         # Mock signals
106 |         worker.progress = MagicMock()
107 |         worker.error = MagicMock()
108 |         worker.finished = MagicMock()
109 |         
110 |         # Run worker
111 |         worker.run()
112 |         
113 |         # Verify spatial extension was loaded
114 |         execute_calls = [call[0][0] for call in mock_conn.execute.call_args_list]
115 |         assert any("INSTALL spatial" in call for call in execute_calls)
116 |         assert any("LOAD spatial" in call for call in execute_calls)
117 |         
118 |         # Verify no errors
119 |         worker.error.emit.assert_not_called()
120 |         
121 |         # Verify finished signal was emitted
122 |         worker.finished.emit.assert_called_once()
123 |     
124 |     @patch('gpq_downloader.utils.transform_bbox_to_4326')
125 |     @patch('gpq_downloader.utils.duckdb.connect')
126 |     def test_non_geoparquet_spatial_query(self, mock_connect, mock_transform_bbox, non_geoparquet_file, tmp_path):
127 |         """Test spatial filtering works without bbox column."""
128 |         mock_conn = MagicMock()
129 |         mock_connect.return_value = mock_conn
130 |         
131 |         # Track all queries
132 |         queries_executed = []
133 |         
134 |         def mock_execute(query):
135 |             queries_executed.append(query)
136 |             result = MagicMock()
137 |             if "DESCRIBE SELECT" in query:
138 |                 result.fetchall.return_value = [
139 |                     ('geometry', 'BLOB', 'YES', None, None, None), 
140 |                     ('id', 'BIGINT', 'YES', None, None, None), 
141 |                     ('name', 'VARCHAR', 'YES', None, None, None)
142 |                 ]
143 |             elif "SELECT COUNT(*)" in query:
144 |                 result.fetchone.return_value = (5,)
145 |             else:
146 |                 result.fetchall.return_value = []
147 |                 result.fetchone.return_value = None
148 |             return result
149 |         
150 |         mock_conn.execute.side_effect = mock_execute
151 |         
152 |         # Mock transform_bbox_to_4326 to return the extent
153 |         from qgis.core import QgsRectangle
154 |         mock_bbox = QgsRectangle(-122.5, 37.7, -122.4, 37.8)
155 |         mock_transform_bbox.return_value = mock_bbox
156 |         
157 |         # Create worker with bbox filter
158 |         output_file = tmp_path / "test_output.parquet"
159 |         
160 |         # Mock iface
161 |         mock_iface = MagicMock()
162 |         mock_iface.mapCanvas.return_value.mapSettings.return_value.destinationCrs.return_value = MagicMock()
163 |         
164 |         # Create extent for bbox filter
165 |         from qgis.core import QgsRectangle
166 |         extent = QgsRectangle(-122.5, 37.7, -122.4, 37.8)  # SF area
167 |         
168 |         # Mock validation results
169 |         validation_results = {
170 |             'has_geometry': True,
171 |             'geometry_type': 'BLOB',
172 |             'total_features': 5
173 |         }
174 |         
175 |         worker = Worker(
176 |             dataset_url=f"file://{non_geoparquet_file}",
177 |             extent=extent,
178 |             output_file=str(output_file),
179 |             iface=mock_iface,
180 |             validation_results=validation_results
181 |         )
182 |         
183 |         # Mock signals
184 |         worker.progress = MagicMock()
185 |         worker.error = MagicMock()
186 |         worker.finished = MagicMock()
187 |         
188 |         # Run worker
189 |         worker.run()
190 |         
191 |         # For BLOB geometry columns, spatial filtering happens after conversion
192 |         # So we should see the conversion happening in a separate step
193 |         conversion_query = any(
194 |             "ST_GeomFromWKB" in query and "CREATE TABLE" in query
195 |             for query in queries_executed
196 |         )
197 |         assert conversion_query, f"Expected geometry conversion for BLOB column. Queries: {queries_executed}"
198 |     
199 |     def test_duckdb_reads_non_geoparquet(self, non_geoparquet_file):
200 |         """Test that DuckDB can actually read the non-GeoParquet file with spatial extension."""
201 |         conn = duckdb.connect()
202 |         
203 |         # Load spatial extension
204 |         conn.execute("INSTALL spatial;")
205 |         conn.execute("LOAD spatial;")
206 |         
207 |         # Read the file
208 |         query = f"SELECT * FROM read_parquet('{non_geoparquet_file}')"
209 |         result = conn.execute(query).fetchall()
210 |         
211 |         # Should have 7 rows
212 |         assert len(result) == 7
213 |         
214 |         # Test geometry column can be converted from WKB
215 |         geom_query = f"""
216 |         SELECT 
217 |             id, 
218 |             name,
219 |             ST_AsText(ST_GeomFromWKB(geometry)) as geom_wkt
220 |         FROM read_parquet('{non_geoparquet_file}')
221 |         LIMIT 1
222 |         """
223 |         geom_result = conn.execute(geom_query).fetchone()
224 |         
225 |         assert geom_result is not None
226 |         assert geom_result[0] == 1  # id
227 |         assert geom_result[1] == 'Market St'  # name
228 |         assert 'LINESTRING' in geom_result[2]  # geometry as WKT
229 |         
230 |         conn.close()
231 |     
232 |     @pytest.mark.integration
233 |     @pytest.mark.skipif(
234 |         os.environ.get('SKIP_INTEGRATION_TESTS', 'false').lower() == 'true',
235 |         reason="Skipping integration tests"
236 |     )
237 |     @patch('gpq_downloader.utils.transform_bbox_to_4326')
238 |     def test_end_to_end_remote_non_geoparquet(self, mock_transform_bbox):
239 |         """End-to-end test downloading and processing remote non-geoparquet file."""
240 |         dataset_url = "https://data.source.coop/cholmes/aois/non_geoparquet_with_geometry.parquet"
241 |         
242 |         # Create a temporary directory for output
243 |         with tempfile.TemporaryDirectory() as temp_dir:
244 |             output_file = os.path.join(temp_dir, "test_output.parquet")
245 |             
246 |             # Mock iface
247 |             mock_iface = MagicMock()
248 |             mock_canvas = MagicMock()
249 |             mock_settings = MagicMock()
250 |             mock_crs = MagicMock()
251 |             
252 |             # Setup the chain of mocks
253 |             mock_iface.mapCanvas.return_value = mock_canvas
254 |             mock_canvas.mapSettings.return_value = mock_settings
255 |             mock_settings.destinationCrs.return_value = mock_crs
256 |             mock_crs.authid.return_value = "EPSG:4326"
257 |             
258 |             # Create extent for filtering (San Francisco area)
259 |             from qgis.core import QgsRectangle
260 |             extent = QgsRectangle(-122.5, 37.7, -122.4, 37.8)
261 |             
262 |             # Mock transform_bbox_to_4326 to return the same extent (already in 4326)
263 |             mock_transform_bbox.return_value = extent
264 |             
265 |             # Run validation manually to get results
266 |             # Since we can't easily test the actual ValidationWorker with signals,
267 |             # we'll validate using duckdb directly
268 |             conn = duckdb.connect()
269 |             conn.execute("INSTALL spatial;")
270 |             conn.execute("LOAD spatial;")
271 |             
272 |             # Get schema
273 |             schema_query = f"DESCRIBE SELECT * FROM read_parquet('{dataset_url}')"
274 |             schema = conn.execute(schema_query).fetchall()
275 |             
276 |             # Check for geometry column
277 |             has_geometry = False
278 |             geometry_column = None
279 |             for col_name, col_type, _, _, _, _ in schema:
280 |                 if col_name == 'geometry' or 'geom' in col_name.lower():
281 |                     has_geometry = True
282 |                     geometry_column = col_name
283 |                     break
284 |             
285 |             # Check for bbox metadata
286 |             has_bbox = False
287 |             bbox_column = None
288 |             try:
289 |                 metadata_query = f"SELECT key, value FROM parquet_kv_metadata('{dataset_url}')"
290 |                 metadata_results = conn.execute(metadata_query).fetchall()
291 |                 for key, value in metadata_results:
292 |                     if key == b"geo":
293 |                         has_bbox = True  # Would need more parsing to get actual bbox column
294 |                         break
295 |             except:
296 |                 pass
297 |             
298 |             conn.close()
299 |             
300 |             # Create validation results based on our checks
301 |             validation_results = {
302 |                 'has_geometry': has_geometry,
303 |                 'geometry_column': geometry_column,
304 |                 'has_bbox': has_bbox,
305 |                 'bbox_column': bbox_column,
306 |                 'schema': schema
307 |             }
308 |             
309 |             # Now run the worker with validation results
310 |             worker = Worker(
311 |                 dataset_url=dataset_url,
312 |                 extent=extent,
313 |                 output_file=output_file,
314 |                 iface=mock_iface,
315 |                 validation_results=validation_results
316 |             )
317 |             
318 |             # Mock signals for worker
319 |             worker.finished = MagicMock()
320 |             worker.error = MagicMock()
321 |             worker.progress = MagicMock()
322 |             worker.percent = MagicMock()
323 |             worker.info = MagicMock()
324 |             worker.load_layer = MagicMock()
325 |             worker.file_size_warning = MagicMock()
326 |             
327 |             # Run worker
328 |             worker.run()
329 |             
330 |             # The worker should succeed or at least handle the spatial extension issue gracefully
331 |             # For now, let's check if it tried to load the spatial extension
332 |             if worker.error.emit.called:
333 |                 error_message = worker.error.emit.call_args[0][0]
334 |                 # This is actually revealing a bug - the spatial extension isn't being loaded properly
335 |                 print(f"Worker encountered error: {error_message}")
336 |                 # The test should now pass without this error
337 |                 assert False, f"Worker should not encounter spatial extension error: {error_message}"
338 |             
339 |             # Check finished signal was emitted
340 |             worker.finished.emit.assert_called_once()
341 |             
342 |             # Verify output file was created
343 |             assert os.path.exists(output_file)
344 |             
345 |             # Verify the output is valid GeoParquet
346 |             conn = duckdb.connect()
347 |             conn.execute("INSTALL spatial;")
348 |             conn.execute("LOAD spatial;")
349 |             
350 |             # Check we can read the output file
351 |             result = conn.execute(f"SELECT COUNT(*) FROM read_parquet('{output_file}')").fetchone()
352 |             assert result[0] > 0  # Should have filtered some features
353 |             
354 |             # Check geometry column exists and is valid
355 |             schema_result = conn.execute(f"DESCRIBE SELECT * FROM read_parquet('{output_file}')").fetchall()
356 |             column_names = [row[0] for row in schema_result]
357 |             assert 'geometry' in column_names
358 |             
359 |             # Check we can read geometry
360 |             # First check what type the geometry column is
361 |             geom_col_type = None
362 |             for row in schema_result:
363 |                 if row[0] == 'geometry':
364 |                     geom_col_type = row[1]
365 |                     break
366 |             
367 |             # If it's already GEOMETRY type, don't use ST_GeomFromWKB
368 |             if geom_col_type and 'GEOMETRY' in geom_col_type.upper():
369 |                 geom_result = conn.execute(f"""
370 |                     SELECT ST_AsText(geometry) as wkt
371 |                     FROM read_parquet('{output_file}')
372 |                     LIMIT 1
373 |                 """).fetchone()
374 |             else:
375 |                 # It's still BLOB, so convert it
376 |                 geom_result = conn.execute(f"""
377 |                     SELECT ST_AsText(ST_GeomFromWKB(geometry)) as wkt
378 |                     FROM read_parquet('{output_file}')
379 |                     LIMIT 1
380 |                 """).fetchone()
381 |             
382 |             assert geom_result is not None
383 |             assert 'LINESTRING' in geom_result[0] or 'POINT' in geom_result[0] or 'POLYGON' in geom_result[0]
384 |             
385 |             conn.close()


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.
  5 |                        51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense, or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free
248 | Software Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |    END OF TERMS AND CONDITIONS
281 | 
282 |         How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     <one line to give the program's name and a brief idea of what it does.>
294 |     Copyright (C) <year>  <name of author>
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   <signature of Ty Coon>, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.


--------------------------------------------------------------------------------
/gpq_downloader/dialog.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | import requests
  4 | 
  5 | from qgis.PyQt.QtWidgets import (
  6 |     QMessageBox,
  7 |     QDialog,
  8 |     QVBoxLayout,
  9 |     QHBoxLayout,
 10 |     QLabel,
 11 |     QLineEdit,
 12 |     QPushButton,
 13 |     QComboBox,
 14 |     QProgressDialog,
 15 |     QRadioButton,
 16 |     QStackedWidget,
 17 |     QWidget,
 18 |     QCheckBox,
 19 | )
 20 | from qgis.PyQt.QtCore import pyqtSignal, Qt, QThread
 21 | from qgis.core import QgsSettings
 22 | import os
 23 | from .utils import ValidationWorker
 24 | 
 25 | 
 26 | class DataSourceDialog(QDialog):
 27 |     validation_complete = pyqtSignal(bool, str, dict)
 28 | 
 29 |     def __init__(self, parent=None, iface=None):
 30 |         super().__init__(parent)
 31 |         self.iface = iface
 32 |         self.validation_thread = None
 33 |         self.validation_worker = None
 34 |         self.progress_message = None
 35 |         self.requires_validation = True
 36 |         self.setWindowTitle("GeoParquet Data Source")
 37 |         self.setMinimumWidth(500)
 38 |         
 39 | 
 40 |         base_path = os.path.dirname(os.path.abspath(__file__))
 41 |         presets_path = os.path.join(base_path, "data", "presets.json")
 42 |         with open(presets_path, "r") as f:
 43 |             self.PRESET_DATASETS = json.load(f)
 44 | 
 45 |         # Create main layout
 46 |         layout = QVBoxLayout()
 47 | 
 48 |         # Create horizontal layout for radio buttons
 49 |         radio_layout = QHBoxLayout()
 50 | 
 51 |         # Create radio buttons
 52 |         self.overture_radio = QRadioButton("Overture Maps")
 53 |         self.sourcecoop_radio = QRadioButton("Source Cooperative")
 54 |         self.osm_radio = QRadioButton("OpenStreetMap")
 55 |         self.custom_radio = QRadioButton("Custom URL")
 56 | 
 57 |         # Add radio buttons to horizontal layout
 58 |         radio_layout.addWidget(self.overture_radio)
 59 |         radio_layout.addWidget(self.sourcecoop_radio)
 60 |         radio_layout.addWidget(self.osm_radio)
 61 |         radio_layout.addWidget(self.custom_radio)
 62 | 
 63 |         # Connect to save state
 64 |         self.overture_radio.released.connect(self.save_radio_button_state)
 65 |         self.sourcecoop_radio.released.connect(self.save_radio_button_state)
 66 |         self.osm_radio.released.connect(self.save_radio_button_state)
 67 |         self.custom_radio.released.connect(self.save_radio_button_state)
 68 | 
 69 |         # Add radio button layout to main layout
 70 |         layout.addLayout(radio_layout)
 71 | 
 72 |         # Add some spacing between radio buttons and content
 73 |         layout.addSpacing(10)
 74 | 
 75 |         # Create and setup the stacked widget for different options
 76 |         self.stack = QStackedWidget()
 77 | 
 78 |         # Custom URL page
 79 |         custom_page = QWidget()
 80 |         custom_layout = QVBoxLayout()
 81 |         self.url_input = QLineEdit()
 82 |         self.url_input.setPlaceholderText(
 83 |             "Enter URL to Parquet file or folder (s3:// or https://)"
 84 |         )
 85 |         custom_layout.addWidget(self.url_input)
 86 |         custom_page.setLayout(custom_layout)
 87 | 
 88 |         # Overture Maps page
 89 |         overture_page = QWidget()
 90 |         overture_layout = QVBoxLayout()
 91 | 
 92 |         # Create horizontal layout for main checkboxes
 93 |         checkbox_layout = QHBoxLayout()
 94 | 
 95 |         # Create a widget to hold checkboxes
 96 |         self.overture_checkboxes = {}
 97 |         for key in self.PRESET_DATASETS['overture'].keys():
 98 |             if key != 'base':  # Handle base separately
 99 |                 checkbox = QCheckBox(key.title())
100 |                 self.overture_checkboxes[key] = checkbox
101 |                 checkbox_layout.addWidget(checkbox)
102 | 
103 |         # Add the horizontal checkbox layout to main layout
104 |         overture_layout.addLayout(checkbox_layout)
105 | 
106 |         # Add base layer section
107 |         base_group = QWidget()
108 |         base_layout = QVBoxLayout()
109 |         base_layout.setContentsMargins(0, 10, 0, 0)  # Add some top margin
110 | 
111 |         self.base_checkbox = QCheckBox("Base")
112 |         self.overture_checkboxes['base'] = self.base_checkbox
113 |         base_layout.addWidget(self.base_checkbox)
114 | 
115 |         # Add base subtype checkboxes
116 |         self.base_subtype_widget = QWidget()
117 |         base_subtype_layout = QHBoxLayout()  # Horizontal layout for subtypes
118 |         base_subtype_layout.setContentsMargins(20, 0, 0, 0)  # Add left margin for indentation
119 | 
120 |         # Replace combo box with checkboxes
121 |         self.base_subtype_checkboxes = {}
122 |         subtype_display_names = {
123 |             'infrastructure': 'Infrastructure',
124 |             'land': 'Land',
125 |             'land_cover': 'Land Cover',
126 |             'land_use': 'Land Use',
127 |             'water': 'Water',
128 |             'bathymetry': 'Bathymetry'
129 |         }
130 | 
131 |         for subtype in self.PRESET_DATASETS['overture']['base']['subtypes']:
132 |             checkbox = QCheckBox(subtype_display_names[subtype])
133 |             self.base_subtype_checkboxes[subtype] = checkbox
134 |             base_subtype_layout.addWidget(checkbox)
135 | 
136 |         self.base_subtype_widget.setLayout(base_subtype_layout)
137 |         self.base_subtype_widget.hide()
138 | 
139 |         base_layout.addWidget(self.base_subtype_widget)
140 |         base_group.setLayout(base_layout)
141 |         overture_layout.addWidget(base_group)
142 | 
143 |         # Connect base checkbox to show/hide subtype checkboxes and resize dialog
144 |         self.base_checkbox.toggled.connect(self.base_subtype_widget.setVisible)
145 |         self.base_checkbox.toggled.connect(lambda checked: self.adjust_dialog_width(checked, 100))
146 |         
147 | 
148 |         overture_page.setLayout(overture_layout)
149 | 
150 |         # Source Cooperative page
151 |         sourcecoop_page = QWidget()
152 |         sourcecoop_layout = QVBoxLayout()
153 |         self.sourcecoop_combo = QComboBox()
154 |         self.sourcecoop_combo.addItems(
155 |             sorted([
156 |                 dataset["display_name"]
157 |                 for dataset in self.PRESET_DATASETS["source_cooperative"].values()
158 |             ], key=str.lower)
159 |         )
160 |         sourcecoop_layout.addWidget(self.sourcecoop_combo)
161 | 
162 |         # Add link label
163 |         self.sourcecoop_link = QLabel()
164 |         self.sourcecoop_link.setOpenExternalLinks(True)
165 |         self.sourcecoop_link.setWordWrap(True)
166 |         sourcecoop_layout.addWidget(self.sourcecoop_link)
167 | 
168 |         # Connect combo box change to update link
169 |         self.sourcecoop_combo.currentTextChanged.connect(self.update_sourcecoop_link)
170 |         sourcecoop_page.setLayout(sourcecoop_layout)
171 | 
172 |         # OpenStreetMap page
173 |         osm_page = QWidget()
174 |         osm_layout = QVBoxLayout()
175 | 
176 |         # Create horizontal layout for checkboxes
177 |         osm_checkbox_layout = QHBoxLayout()
178 | 
179 |         # Create checkboxes for OSM datasets
180 |         self.osm_checkboxes = {}
181 |         for key in self.PRESET_DATASETS['openstreetmap'].keys():
182 |             checkbox = QCheckBox(key.title())
183 |             self.osm_checkboxes[key] = checkbox
184 |             osm_checkbox_layout.addWidget(checkbox)
185 | 
186 |         # Add the horizontal checkbox layout to main layout
187 |         osm_layout.addLayout(osm_checkbox_layout)
188 | 
189 |         # Add link label for LayerCake info
190 |         self.osm_link = QLabel()
191 |         self.osm_link.setText(
192 |             'Data from <a href="https://openstreetmap.us/our-work/layercake/">LayerCake GeoParquet files</a>'
193 |         )
194 |         self.osm_link.setOpenExternalLinks(True)
195 |         self.osm_link.setWordWrap(True)
196 |         osm_layout.addWidget(self.osm_link)
197 | 
198 |         osm_page.setLayout(osm_layout)
199 | 
200 |         # Add pages to stack
201 |         self.stack.addWidget(custom_page)
202 |         self.stack.addWidget(overture_page)
203 |         self.stack.addWidget(sourcecoop_page)
204 |         self.stack.addWidget(osm_page)
205 | 
206 |         layout.addWidget(self.stack)
207 | 
208 |         # Buttons
209 |         button_layout = QHBoxLayout()
210 |         self.ok_button = QPushButton("OK")
211 |         self.cancel_button = QPushButton("Cancel")
212 |         button_layout.addWidget(self.ok_button)
213 |         button_layout.addWidget(self.cancel_button)
214 |         layout.addLayout(button_layout)
215 | 
216 |         self.setLayout(layout)
217 | 
218 |         # Connect signals
219 |         self.custom_radio.toggled.connect(lambda: self.stack.setCurrentIndex(0))
220 |         self.overture_radio.toggled.connect(lambda: self.stack.setCurrentIndex(1))
221 |         self.sourcecoop_radio.toggled.connect(lambda: self.stack.setCurrentIndex(2))
222 |         self.osm_radio.toggled.connect(lambda: self.stack.setCurrentIndex(3))
223 |         self.ok_button.clicked.connect(self.validate_and_accept)
224 |         self.cancel_button.clicked.connect(self.reject)
225 | 
226 |         # Add after setting up the sourcecoop_combo
227 |         self.update_sourcecoop_link(self.sourcecoop_combo.currentText())
228 | 
229 |         # Load checkbox states during initialization
230 |         self.load_checkbox_states()
231 | 
232 |         # Connect each checkbox to save its state when toggled
233 |         for checkbox in self.overture_checkboxes.values():
234 |             checkbox.toggled.connect(self.save_checkbox_states)
235 |         for checkbox in self.base_subtype_checkboxes.values():
236 |             checkbox.toggled.connect(self.save_checkbox_states)
237 |         for checkbox in self.osm_checkboxes.values():
238 |             checkbox.toggled.connect(self.save_checkbox_states)
239 | 
240 |         # Ensure to call save_checkbox_states when the dialog is accepted
241 |         self.ok_button.clicked.connect(self.save_checkbox_states)
242 | 
243 |     def save_radio_button_state(self) -> None:
244 |         if self.custom_radio.isChecked():
245 |             button_name = self.custom_radio.text()
246 |         elif self.overture_radio.isChecked():
247 |             button_name = self.overture_radio.text()
248 |         elif self.sourcecoop_radio.isChecked():
249 |             button_name = self.sourcecoop_radio.text()
250 |         elif self.osm_radio.isChecked():
251 |             button_name = self.osm_radio.text()
252 |         else:
253 |             button_name = self.custom_radio.text()
254 | 
255 |         QgsSettings().setValue(
256 |             "gpq_downloader/radio_selection",
257 |             button_name,
258 |             section=QgsSettings.Plugins,
259 |         )
260 | 
261 |     def handle_overture_selection(self, text):
262 |         """Show/hide base subtype combo based on selection"""
263 |         self.base_subtype_widget.setVisible(text == "Base")
264 | 
265 |     def validate_and_accept(self):
266 |         """Validate the input and accept the dialog if valid"""
267 |         urls = self.get_urls()
268 |         if not urls:
269 |             QMessageBox.warning(self, "Validation Error", "Please select at least one dataset")
270 |             return
271 | 
272 |         # For Overture and OSM datasets, we know they're valid so we can skip validation
273 |         if self.overture_radio.isChecked() or self.osm_radio.isChecked():
274 |             self.accept()
275 |             return
276 | 
277 |         # For custom URLs, do validation
278 |         if self.custom_radio.isChecked():
279 |             for url in urls:
280 |                 if not (url.startswith('http://') or url.startswith('https://') or 
281 |                        url.startswith('s3://') or url.startswith('file://') or url.startswith('hf://')):
282 |                     QMessageBox.warning(self, "Validation Error", 
283 |                         "URL must start with http://, https://, s3://, hf://, or file://")
284 |                     return
285 | 
286 |                 # Create progress dialog for validation
287 |                 self.progress_dialog = QProgressDialog("Validating URL...", "Cancel", 0, 0, self)
288 |                 self.progress_dialog.setWindowModality(Qt.WindowModality.WindowModal)
289 |                 self.progress_dialog.canceled.connect(self.cancel_validation)
290 | 
291 |                 # Create validation worker
292 |                 self.validation_worker = ValidationWorker(url, self.iface, self.iface.mapCanvas().extent())
293 |                 self.validation_thread = QThread()
294 |                 self.validation_worker.moveToThread(self.validation_thread)
295 | 
296 |                 # Connect signals
297 |                 self.validation_thread.started.connect(self.validation_worker.run)
298 |                 self.validation_worker.progress.connect(self.progress_dialog.setLabelText)
299 |                 self.validation_worker.finished.connect(
300 |                     lambda success, message, results: self.handle_validation_result(
301 |                         success, message, results
302 |                     )
303 |                 )
304 |                 self.validation_worker.needs_bbox_warning.connect(self.show_bbox_warning)
305 | 
306 |                 # Start validation
307 |                 self.validation_thread.start()
308 |                 self.progress_dialog.exec()
309 |                 return
310 | 
311 |         # For other preset sources, we can skip validation
312 |         self.accept()
313 | 
314 |     def handle_validation_result(self, success, message, validation_results):
315 |         """Handle validation result in the dialog"""
316 |         self.cleanup_validation()
317 |         
318 |         if success:
319 |             self.validation_complete.emit(True, message, validation_results)
320 |             self.accept()
321 |         else:
322 |             QMessageBox.warning(self, "Validation Error", message)
323 |             self.validation_complete.emit(False, message, validation_results)
324 | 
325 |     def cancel_validation(self):
326 |         """Handle validation cancellation"""
327 |         if self.validation_worker:
328 |             self.validation_worker.killed = True
329 |         self.cleanup_validation()
330 | 
331 |     def cleanup_validation(self):
332 |         """Clean up validation resources"""
333 |         if hasattr(self, 'progress_dialog') and self.progress_dialog:
334 |             self.progress_dialog.close()
335 |             self.progress_dialog = None
336 | 
337 |         if self.validation_worker:
338 |             self.validation_worker.deleteLater()
339 |             self.validation_worker = None
340 | 
341 |         if self.validation_thread:
342 |             self.validation_thread.quit()
343 |             self.validation_thread.wait()
344 |             self.validation_thread.deleteLater()
345 |             self.validation_thread = None
346 | 
347 |     def closeEvent(self, event):
348 |         """Handle dialog closing"""
349 |         self.cleanup_validation()
350 |         super().closeEvent(event)
351 | 
352 |     def get_urls(self):
353 |         """Returns a list of URLs for selected datasets"""
354 |         urls = []
355 |         if self.custom_radio.isChecked():
356 |             return [self.url_input.text().strip()]
357 |         elif self.overture_radio.isChecked():
358 |             latest_release = requests.get('https://labs.overturemaps.org/data/releases.json').json()['latest']
359 | 
360 |             for theme, checkbox in self.overture_checkboxes.items():
361 |                 if checkbox.isChecked():
362 |                     dataset = self.PRESET_DATASETS['overture'][theme]
363 |                     if theme == "transportation":
364 |                         type_str = "segment"
365 |                     elif theme == "divisions":
366 |                         type_str = "division_area"
367 |                     elif theme == "addresses":
368 |                         type_str = "*"
369 |                     elif theme == "base":
370 |                         # Handle multiple base subtypes
371 |                         for subtype, subtype_checkbox in self.base_subtype_checkboxes.items():
372 |                             if subtype_checkbox.isChecked():
373 |                                 urls.append(dataset['url_template'].format(subtype=subtype, release=latest_release))
374 |                         continue  # Skip the normal URL append for base
375 |                     else:
376 |                         type_str = theme.rstrip('s')  # remove trailing 's' for singular form
377 |                     urls.append(dataset['url_template'].format(subtype=type_str, release=latest_release))
378 |         elif self.sourcecoop_radio.isChecked():
379 |             selection = self.sourcecoop_combo.currentText()
380 |             dataset = next((dataset for dataset in self.PRESET_DATASETS['source_cooperative'].values()
381 |                            if dataset['display_name'] == selection), None)
382 |             return [dataset['url']] if dataset else []
383 |         elif self.osm_radio.isChecked():
384 |             for layer, checkbox in self.osm_checkboxes.items():
385 |                 if checkbox.isChecked():
386 |                     dataset = self.PRESET_DATASETS['openstreetmap'][layer]
387 |                     urls.append(dataset['url'])
388 |         return urls
389 | 
390 |     def update_sourcecoop_link(self, selection):
391 |         """Update the link based on the selected dataset"""
392 |         # Find the dataset by display_name
393 |         dataset = next((dataset for dataset in self.PRESET_DATASETS['source_cooperative'].values()
394 |                        if dataset['display_name'] == selection), None)
395 |         if dataset and 'info_url' in dataset:
396 |             self.sourcecoop_link.setText(
397 |                 f'<a href="{dataset["info_url"]}">View dataset info</a>'
398 |             )
399 |         else:
400 |             self.sourcecoop_link.setText("")
401 | 
402 | 
403 |     def show_bbox_warning(self):
404 |         """Show bbox warning dialog in main thread"""
405 |         # Close the progress dialog if it exists
406 |         if hasattr(self, "progress_dialog") and self.progress_dialog:
407 |             self.progress_dialog.close()
408 |             self.progress_dialog = None
409 | 
410 |         reply = QMessageBox.warning(
411 |             self,
412 |             "No bbox Column Detected",
413 |             "This dataset doesn't have a bbox column, which means downloads will be slower. "
414 |             "GeoParquet 1.1 files with a bbox column work much better - tell your data provider to upgrade!\n\n"
415 |             "Do you want to continue with the download?",
416 |             QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No,
417 |             QMessageBox.StandardButton.No,
418 |         )
419 | 
420 |         validation_results = {"has_bbox": False, "schema": None, "bbox_column": None, "geometry_column": "geometry"}
421 |         if reply == QMessageBox.StandardButton.No:
422 |             self.validation_complete.emit(
423 |                 False, "Download cancelled by user.", validation_results
424 |             )
425 |         else:
426 |             # Accept the dialog when user clicks Yes
427 |             self.validation_complete.emit(
428 |                 True, "Validation successful", validation_results
429 |             )
430 |             self.accept()
431 | 
432 |     def adjust_dialog_width(self, checked, width):
433 |         """Adjust the dialog width based on the base checkbox state."""
434 |         if checked:
435 |             self.resize(self.width() + width, self.height())
436 |         else:
437 |             self.resize(self.width() - width, self.height())
438 | 
439 |     def save_checkbox_states(self) -> None:
440 |         # Save main checkboxes
441 |         for key, checkbox in self.overture_checkboxes.items():
442 |             QgsSettings().setValue(
443 |                 f"gpq_downloader/checkbox_{key}",
444 |                 checkbox.isChecked(),
445 |                 section=QgsSettings.Plugins,
446 |             )
447 | 
448 |         # Save base subtype checkboxes
449 |         for key, checkbox in self.base_subtype_checkboxes.items():
450 |             QgsSettings().setValue(
451 |                 f"gpq_downloader/base_subtype_checkbox_{key}",
452 |                 checkbox.isChecked(),
453 |                 section=QgsSettings.Plugins,
454 |             )
455 | 
456 |         # Save OSM checkboxes
457 |         for key, checkbox in self.osm_checkboxes.items():
458 |             QgsSettings().setValue(
459 |                 f"gpq_downloader/osm_checkbox_{key}",
460 |                 checkbox.isChecked(),
461 |                 section=QgsSettings.Plugins,
462 |             )
463 | 
464 |     def load_checkbox_states(self) -> None:
465 |         # Load main checkboxes
466 |         for key, checkbox in self.overture_checkboxes.items():
467 |             checked = QgsSettings().value(
468 |                 f"gpq_downloader/checkbox_{key}",
469 |                 False,
470 |                 type=bool,
471 |                 section=QgsSettings.Plugins,
472 |             )
473 |             checkbox.setChecked(checked)
474 | 
475 |         # Load base subtype checkboxes
476 |         for key, checkbox in self.base_subtype_checkboxes.items():
477 |             checked = QgsSettings().value(
478 |                 f"gpq_downloader/base_subtype_checkbox_{key}",
479 |                 False,
480 |                 type=bool,
481 |                 section=QgsSettings.Plugins,
482 |             )
483 |             checkbox.setChecked(checked)
484 | 
485 |         # Load OSM checkboxes
486 |         for key, checkbox in self.osm_checkboxes.items():
487 |             checked = QgsSettings().value(
488 |                 f"gpq_downloader/osm_checkbox_{key}",
489 |                 False,
490 |                 type=bool,
491 |                 section=QgsSettings.Plugins,
492 |             )
493 |             checkbox.setChecked(checked)
494 | 
495 |         # Update base subtype widget visibility based on base checkbox state
496 |         self.base_subtype_widget.setVisible(self.base_checkbox.isChecked())
497 | 
498 |     def on_validation_finished(self, success, message, results):
499 |         # This method should handle the validation results
500 |         # Check how it's setting validation_results
501 |         pass
502 | 


--------------------------------------------------------------------------------
/gpq_downloader/plugin.py:
--------------------------------------------------------------------------------
  1 | from qgis.PyQt.QtWidgets import (
  2 |     QAction,
  3 |     QFileDialog,
  4 |     QMessageBox,
  5 |     QDialog,
  6 |     QVBoxLayout,
  7 |     QHBoxLayout,
  8 |     QLabel,
  9 |     QPushButton,
 10 |     QComboBox,
 11 |     QProgressDialog,
 12 |     QCheckBox,
 13 |     QWidget,
 14 |     QLineEdit,
 15 | )
 16 | from qgis.PyQt.QtGui import QIcon
 17 | from qgis.PyQt.QtCore import Qt, QThread
 18 | from qgis.core import QgsProject, QgsVectorLayer, QgsSettings
 19 | import os
 20 | import datetime
 21 | from pathlib import Path
 22 | 
 23 | from .dialog import DataSourceDialog
 24 | from .utils import Worker
 25 | 
 26 | 
 27 | class QgisPluginGeoParquet:
 28 |     def __init__(self, iface):
 29 |         self.iface = iface
 30 |         self.worker = None
 31 |         self.worker_thread = None
 32 |         self.action = None
 33 |         self.output_file = None
 34 |         # Create a default downloads directory in user's home directory
 35 |         self.download_dir = Path.home() / "Downloads"
 36 |         # Create the directory if it doesn't exist
 37 |         self.download_dir.mkdir(parents=True, exist_ok=True)
 38 | 
 39 |     def initGui(self):
 40 |         # Create the action with the icon and tooltip
 41 |         base_path = os.path.dirname(os.path.abspath(__file__))
 42 |         icon_path = os.path.join(base_path, "icons", "parquet-download.svg")
 43 |         self.action = QAction(
 44 |             QIcon(icon_path), "Download GeoParquet Data", self.iface.mainWindow()
 45 |         )
 46 |         self.action.setToolTip("Download GeoParquet Data")
 47 |         self.action.triggered.connect(self.run)
 48 | 
 49 |         # Add the actions to the toolbar
 50 |         self.iface.addToolBarIcon(self.action)
 51 | 
 52 |     def unload(self):
 53 |         # Clean up worker and thread when plugin is unloaded
 54 |         if self.worker_thread and self.worker_thread.isRunning():
 55 |             QMessageBox.warning(
 56 |                 self.iface.mainWindow(),
 57 |                 "Download in Progress",
 58 |                 "Please wait for any downloads to complete before unloading the plugin."
 59 |             )
 60 |             return
 61 |         self.cleanup_thread()
 62 |         # Remove all actions from the toolbar
 63 |         self.iface.removeToolBarIcon(self.action)
 64 | 
 65 |     def run(self, default_source=None):
 66 |         # Check if a worker is already running
 67 |         if self.worker is not None and self.worker_thread is not None and self.worker_thread.isRunning():
 68 |             QMessageBox.warning(
 69 |                 self.iface.mainWindow(),
 70 |                 "Download in Progress",
 71 |                 "A download is already in progress. Please wait for it to complete before starting a new download."
 72 |             )
 73 |             return
 74 | 
 75 |         # Reset any existing worker
 76 |         self.worker = None
 77 |         self.worker_thread = None
 78 |         
 79 |         dialog = DataSourceDialog(self.iface.mainWindow(), self.iface)
 80 | 
 81 |         selected_name = QgsSettings().value("gpq_downloader/radio_selection", section=QgsSettings.Plugins)
 82 |         for button in [dialog.overture_radio, dialog.sourcecoop_radio, dialog.osm_radio, dialog.custom_radio]:
 83 |             if button.text() == selected_name:
 84 |                 button.setChecked(True)
 85 |         if not selected_name:
 86 |             dialog.overture_radio.setChecked(True)
 87 |         
 88 |         if dialog.exec() == QDialog.DialogCode.Accepted:
 89 |             # Get the selected URLs from the dialog
 90 |             urls = dialog.get_urls()
 91 |             extent = self.iface.mapCanvas().extent()
 92 |             
 93 |             # First, collect all file locations from user
 94 |             download_queue = []
 95 |             for url in urls:
 96 |                 # Get current date for filename
 97 |                 current_date = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
 98 |                 
 99 |                 # Generate filename based on the URL and source type
100 |                 if dialog.overture_radio.isChecked():
101 |                     # Extract theme from URL
102 |                     theme = url.split('theme=')[1].split('/')[0]
103 |                     if 'type=' in url:
104 |                         type_str = url.split('type=')[1].split('/')[0]
105 |                         if theme == 'base':
106 |                             filename = f"overture_base_{type_str}_{current_date}.parquet"
107 |                         else:
108 |                             filename = f"overture_{theme}_{current_date}.parquet"
109 |                     else:
110 |                         filename = f"overture_{theme}_{current_date}.parquet"
111 |                 elif dialog.sourcecoop_radio.isChecked():
112 |                     dataset_name = dialog.sourcecoop_combo.currentText()
113 |                     clean_name = dataset_name.lower().replace(' ', '_').replace('/', '_').replace('(', '').replace(')', '')
114 |                     filename = f"sourcecoop_{clean_name}_{current_date}.parquet"
115 |                 elif dialog.osm_radio.isChecked():
116 |                     # Extract layer name from URL
117 |                     layer_name = url.split('/')[-1].replace('.parquet', '')
118 |                     filename = f"osm_{layer_name}_{current_date}.parquet"
119 |                 else:
120 |                     filename = f"custom_download_{current_date}.parquet"
121 | 
122 |                 default_save_path = str(self.download_dir / filename)
123 |                 
124 |                 # Show save file dialog
125 |                 output_file, selected_filter = QFileDialog.getSaveFileName(
126 |                     self.iface.mainWindow(),
127 |                     f"Save Data for {theme if dialog.overture_radio.isChecked() else 'dataset'}",
128 |                     default_save_path,
129 |                     "GeoParquet (*.parquet);;DuckDB Database (*.duckdb);;GeoPackage (*.gpkg);;FlatGeobuf (*.fgb);;GeoJSON (*.geojson)"
130 |                 )
131 |                 
132 |                 if output_file:
133 |                     download_queue.append((url, output_file))
134 |                 else:
135 |                     return
136 |             
137 |             # Now process downloads one at a time
138 |             self.process_download_queue(download_queue, extent)
139 | 
140 |     def handle_validation_complete(
141 |         self, success, message, validation_results, url, extent, dialog
142 |     ):
143 |         """Handle validation completion and start download if successful."""
144 |         if success:
145 |             # Get current date for filename
146 |             current_date = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
147 | 
148 |             # Generate the default filename based on dialog selection
149 |             if dialog.overture_radio.isChecked():
150 |                 theme = dialog.overture_combo.currentText().lower()
151 |                 if theme == "base":
152 |                     subtype = dialog.base_subtype_combo.currentText()
153 |                     filename = f"overture_base_{subtype}_{current_date}.parquet"
154 |                 else:
155 |                     filename = f"overture_{theme}_{current_date}.parquet"
156 | 
157 |             elif dialog.sourcecoop_radio.isChecked():
158 |                 selection = dialog.sourcecoop_combo.currentText()
159 |                 # Convert display name to safe filename format
160 |                 safe_name = selection.lower().replace(" ", "_").replace("/", "_")
161 |                 filename = f"sourcecoop_{safe_name}_{current_date}.parquet"
162 | 
163 |             else:  # custom URL
164 |                 filename = f"custom_download_{current_date}.parquet"
165 | 
166 |             default_save_path = str(self.download_dir / filename)
167 | 
168 |             # Show save file dialog
169 |             output_file, selected_filter = QFileDialog.getSaveFileName(
170 |                 self.iface.mainWindow(),
171 |                 "Save Data",
172 |                 default_save_path,
173 |                 "GeoParquet (*.parquet);;DuckDB Database (*.duckdb);;GeoPackage (*.gpkg);;FlatGeobuf (*.fgb);;GeoJSON (*.geojson)",
174 |             )
175 | 
176 |             if output_file:
177 |                 self.output_file = output_file
178 |                 self.download_and_save(url, extent, output_file, validation_results)
179 |         else:
180 |             QMessageBox.warning(self.iface.mainWindow(), "Validation Error", message)
181 | 
182 |     def download_and_save(self, dataset_url, extent, output_file, validation_results):
183 |         # Ensure we start with a fresh worker
184 |         self.cleanup_thread()
185 | 
186 |         # Create progress dialog
187 |         self.progress_dialog = self.create_progress_dialog("Downloading Data")
188 | 
189 |         # Create worker with validation results
190 |         self.worker, self.worker_thread = self.setup_worker(
191 |             dataset_url, extent, output_file, validation_results
192 |         )
193 | 
194 |         # Show the progress dialog and start the thread
195 |         self.progress_dialog.show()
196 |         self.worker_thread.start()
197 | 
198 |     def handle_error(self, message):
199 |         self.progress_dialog.close()
200 |         QMessageBox.critical(self.iface.mainWindow(), "Error", message)
201 | 
202 |     def update_progress(self, message):
203 |         if hasattr(self, "progress_dialog"):
204 |             self.progress_dialog.setLabelText(message)
205 | 
206 |     def cancel_download(self):
207 |         if self.worker:
208 |             self.worker.kill()
209 |         self.cleanup_thread()
210 | 
211 |     def cleanup_thread(self):
212 |         if self.worker_thread is not None:
213 |             if self.worker:
214 |                 self.worker.kill()
215 |             self.worker_thread.quit()
216 |             self.worker_thread.wait()
217 |             self.worker_thread = None
218 |             self.worker = None
219 |         if hasattr(self, "progress_dialog"):
220 |             self.progress_dialog.close()
221 | 
222 |     def load_layer(self, output_file):
223 |         """Load the layer into QGIS if GeoParquet is supported"""
224 |         if output_file.lower().endswith(".parquet"):
225 |             # Try to create a test layer to check GeoParquet support
226 |             test_layer = QgsVectorLayer(output_file, "test", "ogr")
227 |             if not test_layer.isValid():
228 |                 dialog = QDialog(self.iface.mainWindow())
229 |                 dialog.setWindowTitle("GeoParquet Support Not Available")
230 |                 dialog.setMinimumWidth(400)
231 | 
232 |                 layout = QVBoxLayout()
233 | 
234 |                 message = QLabel(
235 |                     "Data has been successfully saved to GeoParquet file.\n\n"
236 |                     "Note: Your current QGIS installation does not support reading GeoParquet files directly. You can select GeoPackage for your output format to view immediately.\n\n"
237 |                     "To view GeoParquet files in QGIS, you'll need to install QGIS with GDAL 3.8 "
238 |                     "or higher with 'libgdal-arrow-parquet'. You can find instructions at:"
239 |                 )
240 |                 message.setWordWrap(True)
241 |                 layout.addWidget(message)
242 | 
243 |                 link = QLabel()
244 |                 link.setText(
245 |                     '<a href="https://github.com/cholmes/qgis_plugin_gpq_downloader/wiki/Installing-GeoParquet-Support-in-QGIS">Installing GeoParquet Support in QGIS</a>'
246 |                 )
247 |                 link.setOpenExternalLinks(True)
248 |                 layout.addWidget(link)
249 | 
250 |                 button_box = QPushButton("OK")
251 |                 button_box.clicked.connect(dialog.accept)
252 |                 layout.addWidget(button_box)
253 | 
254 |                 dialog.setLayout(layout)
255 |                 dialog.exec()
256 |                 return
257 | 
258 |         layer_name = Path(output_file).stem  # Get filename without extension
259 |         # Create the layer
260 |         layer = QgsVectorLayer(output_file, layer_name, "ogr")
261 |         if not layer.isValid():
262 |             QMessageBox.critical(
263 |                 self.iface.mainWindow(),
264 |                 "Error",
265 |                 f"Failed to load the layer from {output_file}",
266 |             )
267 |             return
268 |         # Add the layer to the QGIS project
269 |         QgsProject.instance().addMapLayer(layer)
270 | 
271 |     def show_info(self, message):
272 |         """Show an information message to the user"""
273 |         QMessageBox.information(self.iface.mainWindow(), "Success", message)
274 | 
275 |     def handle_large_file_warning(self, estimated_size):
276 |         """Handle warning about large GeoJSON file size with a more streamlined UI"""
277 |         if not hasattr(self, 'worker') or self.worker is None:
278 |             QMessageBox.critical(self.iface.mainWindow(), "Error", "Download session lost. Please try again.")
279 |             return
280 | 
281 |         worker_info = {
282 |             'dataset_url': self.worker.dataset_url,
283 |             'extent': self.worker.extent,
284 |             'iface': self.worker.iface,
285 |             'validation_results': self.worker.validation_results,
286 |             'output_file': self.worker.output_file,
287 |             'size_warning_accepted': False,
288 |             'remaining_queue': getattr(self.worker, 'remaining_queue', [])
289 |         }
290 |         
291 |         if hasattr(self, 'progress_dialog') and self.progress_dialog:
292 |             self.progress_dialog.close()
293 |         
294 |         dialog = QDialog(self.iface.mainWindow())
295 |         dialog.setWindowTitle("Large File Warning")
296 |         dialog.setMinimumWidth(400)
297 |         layout = QVBoxLayout()
298 | 
299 |         if estimated_size >= 1024:
300 |             size_str = f"{estimated_size/1024:.2f} GB"
301 |         else:
302 |             size_str = f"{estimated_size:.0f} MB"
303 |         
304 |         msg = QLabel(
305 |             f"The estimated file size is {size_str}. Large GeoJSON files can be slow to process and load.\n\n"
306 |         )
307 |         msg.setWordWrap(True)
308 |         layout.addWidget(msg)
309 | 
310 |         format_group = QVBoxLayout()
311 |         recommended_label = QLabel("Alternative formats (recommended for large datasets):")
312 |         format_group.addWidget(recommended_label)
313 |         
314 |         format_row = QHBoxLayout()
315 |         
316 |         format_combo = QComboBox()
317 |         format_combo.addItems([
318 |             "FlatGeobuf (*.fgb)",
319 |             "GeoPackage (*.gpkg)",
320 |             "GeoParquet (*.parquet)"
321 |         ])
322 |         format_row.addWidget(format_combo)
323 |         
324 |         save_button = QPushButton("Save As...")
325 |         format_row.addWidget(save_button)
326 |         
327 |         format_group.addLayout(format_row)
328 |         layout.addLayout(format_group)
329 | 
330 |         button_box = QHBoxLayout()
331 |         proceed_button = QPushButton("Proceed with GeoJSON anyway")
332 |         cancel_button = QPushButton("Cancel")
333 |         button_box.addWidget(proceed_button)
334 |         button_box.addWidget(cancel_button)
335 |         layout.addLayout(button_box)
336 | 
337 |         dialog.setLayout(layout)
338 | 
339 |         cancel_button.clicked.connect(dialog.reject)
340 |         save_button.clicked.connect(lambda: dialog.done(1))
341 |         proceed_button.clicked.connect(lambda: dialog.done(2))
342 | 
343 |         while True:
344 |             result = dialog.exec()
345 |             if result == 1:
346 |                 selected_format = format_combo.currentText()
347 |                 extension = selected_format.split("*")[1].rstrip(")")
348 |                 
349 |                 new_output_file = os.path.splitext(worker_info['output_file'])[0] + extension
350 |                 
351 |                 output_file, _ = QFileDialog.getSaveFileName(
352 |                     self.iface.mainWindow(),
353 |                     "Save Data",
354 |                     new_output_file,
355 |                     selected_format
356 |                 )
357 |                 
358 |                 if output_file:
359 |                     self.progress_dialog = QProgressDialog("Starting download...", "Cancel", 0, 0, self.iface.mainWindow())
360 |                     self.progress_dialog.setWindowTitle("Downloading Data")
361 |                     self.progress_dialog.setWindowModality(Qt.WindowModality.NonModal)
362 |                     self.progress_dialog.setMinimumDuration(0)
363 |                     
364 |                     self.output_file = output_file
365 |                     
366 |                     self.worker = Worker(
367 |                         worker_info['dataset_url'],
368 |                         worker_info['extent'],
369 |                         output_file,
370 |                         worker_info['iface'],
371 |                         worker_info['validation_results']
372 |                     )
373 |                     self.worker.remaining_queue = worker_info['remaining_queue']
374 |                     self.worker_thread = QThread()
375 |                     self.worker.moveToThread(self.worker_thread)
376 |                     
377 |                     self.worker_thread.started.connect(self.worker.run)
378 |                     self.worker.error.connect(self.handle_error)
379 |                     self.worker.load_layer.connect(self.load_layer)
380 |                     self.worker.info.connect(self.show_info)
381 |                     self.worker.file_size_warning.connect(self.handle_large_file_warning)
382 |                     self.worker.finished.connect(lambda: self.handle_download_complete(worker_info['remaining_queue'], worker_info['extent']))
383 |                     self.worker.progress.connect(self.update_progress)
384 |                     self.progress_dialog.canceled.connect(self.cancel_download)
385 |                     
386 |                     self.progress_dialog.show()
387 |                     self.worker_thread.start()
388 |                     return
389 |                 continue
390 |             
391 |             elif result == 2:
392 |                 self.progress_dialog = QProgressDialog("Starting download...", "Cancel", 0, 0, self.iface.mainWindow())
393 |                 self.progress_dialog.setWindowTitle("Downloading Data")
394 |                 self.progress_dialog.setWindowModality(Qt.WindowModality.NonModal)
395 |                 self.progress_dialog.setMinimumDuration(0)
396 |                 
397 |                 self.worker = Worker(
398 |                     worker_info['dataset_url'],
399 |                     worker_info['extent'],
400 |                     worker_info['output_file'],
401 |                     worker_info['iface'],
402 |                     worker_info['validation_results']
403 |                 )
404 |                 self.worker.remaining_queue = worker_info['remaining_queue']
405 |                 self.worker_thread = QThread()
406 |                 self.worker.moveToThread(self.worker_thread)
407 |                 
408 |                 self.worker_thread.started.connect(self.worker.run)
409 |                 self.worker.error.connect(self.handle_error)
410 |                 self.worker.load_layer.connect(self.load_layer)
411 |                 self.worker.info.connect(self.show_info)
412 |                 self.worker.file_size_warning.connect(self.handle_large_file_warning)
413 |                 self.worker.finished.connect(lambda: self.handle_download_complete(worker_info['remaining_queue'], worker_info['extent']))
414 |                 self.worker.progress.connect(self.update_progress)
415 |                 self.progress_dialog.canceled.connect(self.cancel_download)
416 |                 
417 |                 self.worker.size_warning_accepted = True
418 |                 
419 |                 self.progress_dialog.show()
420 |                 self.worker_thread.start()
421 |                 return
422 |             
423 |             else:
424 |                 if worker_info['remaining_queue']:
425 |                     self.process_download_queue(worker_info['remaining_queue'], worker_info['extent'])
426 |                 else:
427 |                     self.cleanup_thread()
428 |                 return
429 | 
430 |     def create_progress_dialog(
431 |         self, title="Downloading Data", message="Starting download..."
432 |     ):
433 |         """Create and return a configured progress dialog"""
434 |         progress_dialog = QProgressDialog(
435 |             message, "Cancel", 0, 0, self.iface.mainWindow()
436 |         )
437 |         progress_dialog.setWindowTitle(title)
438 |         progress_dialog.setWindowModality(Qt.WindowModality.NonModal)
439 |         progress_dialog.setMinimumDuration(0)
440 |         return progress_dialog
441 | 
442 |     def setup_worker(self, dataset_url, extent, output_file, validation_results):
443 |         """Create and setup a worker thread with all connections"""
444 |         self.worker = Worker(
445 |             dataset_url, extent, output_file, self.iface, validation_results
446 |         )
447 |         self.worker_thread = QThread()
448 |         self.worker.moveToThread(self.worker_thread)
449 | 
450 |         # Connect signals
451 |         self.worker_thread.started.connect(self.worker.run)
452 |         self.worker.error.connect(self.handle_error)
453 |         self.worker.load_layer.connect(self.load_layer)
454 |         self.worker.info.connect(self.show_info)
455 |         self.worker.file_size_warning.connect(self.handle_large_file_warning)
456 |         self.worker.finished.connect(self.cleanup_thread)
457 |         self.worker.progress.connect(self.update_progress)
458 |         self.progress_dialog.canceled.connect(self.cancel_download)
459 | 
460 |         return self.worker, self.worker_thread
461 | 
462 |     def process_download_queue(self, download_queue, extent):
463 |         """Process downloads sequentially"""
464 |         if not download_queue:
465 |             return
466 |         
467 |         # Get the next download
468 |         url, output_file = download_queue[0]
469 |         remaining_queue = download_queue[1:]
470 |         
471 |         # Extract layer name from URL for Overture data
472 |         layer_name = None
473 |         if 'overture' in url:
474 |             if 'theme=' in url:
475 |                 theme = url.split('theme=')[1].split('/')[0]
476 |                 if theme == 'base':
477 |                     # For base layers, include the subtype
478 |                     subtype = url.split('type=')[1].split('/')[0]
479 |                     layer_name = f"Overture {theme.title()} - {subtype.title()}"
480 |                 else:
481 |                     layer_name = f"Overture {theme.title()}"
482 |         
483 |         # Create validation results (we know Overture URLs are valid)
484 |         validation_results = {'has_bbox': True, 'bbox_column': 'bbox', 'geometry_column': 'geometry'}
485 |         
486 |         # For non-Overture data, try to detect the geometry column name from the URL
487 |         if 'overture' not in url:
488 |             from . import logger
489 |             #logger.log(f"Processing URL: {url}")
490 |             
491 |             # Try to extract dataset name from URL for better logging
492 |             dataset_name = url.split('/')[-1].split('?')[0]
493 |             #logger.log(f"Dataset name from URL: {dataset_name}")
494 |             
495 |             # For specific known datasets, set the geometry column
496 |             if 'addresses.nobbox.pq' in url or 'addresses.pq' in url:
497 |                 #logger.log("Detected addresses dataset, setting geometry column to 'geom'")
498 |                 validation_results['geometry_column'] = 'geom'
499 |         
500 |        #logger.log(f"Initial validation_results: {validation_results}")
501 |         
502 |         # Create progress dialog
503 |         self.progress_dialog = QProgressDialog(
504 |             "Starting download..." if not layer_name else f"Starting {layer_name} download...",
505 |             "Cancel", 0, 0, self.iface.mainWindow()
506 |         )
507 |         self.progress_dialog.setWindowTitle("Downloading Data")
508 |         self.progress_dialog.setWindowModality(Qt.WindowModality.NonModal)
509 |         self.progress_dialog.setMinimumDuration(0)
510 |         
511 |         # Create worker with layer name
512 |         self.worker = Worker(url, extent, output_file, self.iface, validation_results, layer_name)
513 |         self.worker.remaining_queue = remaining_queue  # Store remaining queue in worker
514 |         self.worker_thread = QThread()
515 |         
516 |         # Move worker to thread
517 |         self.worker.moveToThread(self.worker_thread)
518 |         
519 |         # Connect signals
520 |         self.worker_thread.started.connect(self.worker.run)
521 |         self.worker.error.connect(self.handle_error)
522 |         self.worker.load_layer.connect(self.load_layer)
523 |         self.worker.info.connect(self.show_info)
524 |         self.worker.file_size_warning.connect(self.handle_large_file_warning)
525 |         self.worker.finished.connect(lambda: self.handle_download_complete(remaining_queue, extent))
526 |         self.worker.progress.connect(self.update_progress)
527 |         self.progress_dialog.canceled.connect(self.cancel_download)
528 |         
529 |         # Show the progress dialog and start the thread
530 |         self.progress_dialog.show()
531 |         self.worker_thread.start()
532 | 
533 |     def handle_download_complete(self, remaining_queue, extent):
534 |         """Handle completion of a download and start the next one if any"""
535 |         self.cleanup_thread()
536 |         if remaining_queue:
537 |             # Start the next download
538 |             self.process_download_queue(remaining_queue, extent)
539 | 
540 | 
541 | def classFactory(iface):
542 |     return QgisPluginGeoParquet(iface)
543 | 


--------------------------------------------------------------------------------
/gpq_downloader/utils.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | from qgis.core import QgsCoordinateReferenceSystem, QgsCoordinateTransform, QgsProject
  4 | from qgis.PyQt.QtCore import pyqtSignal, QObject
  5 | import os
  6 | import duckdb
  7 | 
  8 | from . import logger
  9 | 
 10 | 
 11 | def transform_bbox_to_4326(extent, source_crs):
 12 |     """
 13 |     Transform a bounding box to EPSG:4326 (WGS84)
 14 | 
 15 |     Args:
 16 |         extent (QgsRectangle): The input extent to transform
 17 |         source_crs (QgsCoordinateReferenceSystem): The source CRS of the extent
 18 | 
 19 |     Returns:
 20 |         QgsRectangle: The transformed extent in EPSG:4326, or None if inputs are invalid
 21 |     """
 22 |     if extent is None or source_crs is None:
 23 |         return None
 24 |         
 25 |     dest_crs = QgsCoordinateReferenceSystem("EPSG:4326")
 26 | 
 27 |     if source_crs != dest_crs:
 28 |         transform = QgsCoordinateTransform(source_crs, dest_crs, QgsProject.instance())
 29 |         extent = transform.transformBoundingBox(extent)
 30 |     
 31 |     return extent
 32 | 
 33 | 
 34 | class Worker(QObject):
 35 |     finished = pyqtSignal()
 36 |     error = pyqtSignal(str)
 37 |     load_layer = pyqtSignal(str)
 38 |     info = pyqtSignal(str)
 39 |     progress = pyqtSignal(str)
 40 |     percent = pyqtSignal(int)
 41 |     file_size_warning = pyqtSignal(float)  # Signal for file size warnings (in MB)
 42 | 
 43 |     def __init__(self, dataset_url, extent, output_file, iface, validation_results, layer_name=None):
 44 |         super().__init__()
 45 |         self.dataset_url = dataset_url
 46 |         self.extent = extent
 47 |         self.output_file = output_file
 48 |         self.iface = iface
 49 |         #logger.log(f"Worker __init__ received validation_results: {validation_results}")
 50 |         self.validation_results = validation_results
 51 |         self.killed = False
 52 |         self.layer_name = layer_name  # Ensure this is included if needed
 53 |         self.size_warning_accepted = False  # Ensure this is False on initialization
 54 | 
 55 |     def get_bbox_info_from_metadata(self, conn):
 56 |         """Read GeoParquet metadata to find bbox column info"""
 57 |         self.progress.emit("Checking for bbox metadata...")
 58 |         metadata_query = (
 59 |             f"SELECT key, value FROM parquet_kv_metadata('{self.dataset_url}')"
 60 |         )
 61 |         metadata_results = conn.execute(metadata_query).fetchall()
 62 | 
 63 |         for key, value in metadata_results:
 64 |             if key == b"geo":
 65 |                 try:
 66 |                     decoded_value = value.decode()
 67 |                     #logger.log("\nRaw metadata value:")
 68 |                     #logger.log(decoded_value)
 69 | 
 70 |                     # Parse JSON using DuckDB's JSON functions
 71 |                     json_query = (
 72 |                         f"SELECT json_parse('{decoded_value}'::VARCHAR) as json"
 73 |                     )
 74 |                     #logger.log("\nExecuting JSON query:")
 75 |                     #logger.log(json_query)
 76 | 
 77 |                     geo_metadata = conn.execute(json_query).fetchone()[0]
 78 |                     #logger.log("\nParsed metadata:")
 79 |                     #logger.log(geo_metadata)
 80 | 
 81 |                     if geo_metadata and "covering" in geo_metadata:
 82 |                         #logger.log("\nFound covering:")
 83 |                         #logger.log(geo_metadata["covering"])
 84 |                         if "bbox" in geo_metadata["covering"]:
 85 |                             bbox_info = geo_metadata["covering"]["bbox"]
 86 |                             #logger.log("\nExtracted bbox info:")
 87 |                             #logger.log(bbox_info)
 88 |                             return bbox_info
 89 |                 except Exception as e:
 90 |                     logger.log(f"\nError parsing geo metadata: {str(e)}", 2)
 91 |                     logger.log(f"Exception type: {type(e)}", 2)
 92 |                     import traceback
 93 | 
 94 |                     logger.log(traceback.format_exc(), 2)
 95 |                     continue
 96 |         return None
 97 | 
 98 |     def run(self):
 99 |         try:
100 |             layer_info = f" for {self.layer_name}" if self.layer_name else ""
101 |             self.progress.emit(f"Connecting to database{layer_info}...")
102 |             source_crs = self.iface.mapCanvas().mapSettings().destinationCrs()
103 |             bbox = transform_bbox_to_4326(self.extent, source_crs)
104 | 
105 |             # Log validation results dictionary at the beginning of run
106 |             #logger.log(f"Full validation_results at start of run: {self.validation_results}")
107 | 
108 |             conn = None
109 |             try:
110 |                 # Install and load the spatial extension
111 |                 self.progress.emit(f"Loading spatial extension{layer_info}...")
112 | 
113 |                 if self.output_file.lower().endswith('.duckdb'):
114 |                     conn = duckdb.connect(self.output_file)  # Connect directly to output file
115 |                 else:
116 |                     conn = duckdb.connect() 
117 | 
118 |                 conn.execute("INSTALL httpfs;")
119 |                 conn.execute("INSTALL spatial;")
120 |                 conn.execute("LOAD httpfs;")
121 |                 conn.execute("LOAD spatial;")
122 |                 
123 |                 # Verify spatial extension is loaded by testing a spatial function
124 |                 try:
125 |                     conn.execute("SELECT ST_AsText(ST_GeomFromText('POINT(0 0)'))").fetchone()
126 |                 except Exception as e:
127 |                     logger.log(f"Failed to verify spatial extension: {e}")
128 |                     # Force reload
129 |                     conn.execute("LOAD spatial;")
130 | 
131 |                 # Get schema early as we need it for both column names and bbox check
132 |                 schema_query = f"DESCRIBE SELECT * FROM read_parquet('{self.dataset_url}')"
133 |                 schema_result = conn.execute(schema_query).fetchall()
134 |                 self.validation_results['schema'] = schema_result
135 |                 
136 |                 # Log the schema for debugging
137 |                 #logger.log("Schema in Worker:")
138 |                 #for row in schema_result:
139 |                     #logger.log(f"Column: {row[0]}, Type: {row[1]}")
140 | 
141 |                 # If geometry_column is not in validation_results, detect it now
142 |                 if 'geometry_column' not in self.validation_results:
143 |                     #logger.log("No geometry_column in validation_results, detecting now")
144 |                     self.validation_results['geometry_column'] = 'geometry'  # Default
145 |                     geometry_found = False
146 |                     
147 |                     for row in schema_result:
148 |                         col_name = row[0]
149 |                         col_type = row[1].upper()
150 |                         #logger.log(f"Checking column {col_name} with type {col_type} for geometry")
151 |                         if 'GEOMETRY' in col_type or 'GEOGRAPHY' in col_type:
152 |                             self.validation_results['geometry_column'] = col_name
153 |                             logger.log(f"Found geometry column by type: {col_name}")
154 |                             geometry_found = True
155 |                             break
156 |                     
157 |                     if not geometry_found:
158 |                         # Try a different approach - look for columns
159 |                         #logger.log("No standard geometry column found, trying alternative detection")
160 |                         for row in schema_result:
161 |                             col_name = row[0].lower()
162 |                             col_name_orig = row[0]  # Keep original case
163 |                             col_type = row[1].upper()
164 |                             
165 |                             # Check for common geometry column names
166 |                             if col_name in ['geometry', 'geom', 'the_geom', 'wkb_geometry']:
167 |                                 self.validation_results['geometry_column'] = col_name_orig
168 |                                 #logger.log(f"Found likely geometry column by name: {col_name_orig}")
169 |                                 geometry_found = True
170 |                                 break
171 |                             # Also check for BLOB columns with geometry-like names
172 |                             elif 'BLOB' in col_type and col_name in ['geometry', 'geom', 'the_geom', 'wkb_geometry']:
173 |                                 self.validation_results['geometry_column'] = col_name_orig
174 |                                 logger.log(f"Found WKB BLOB geometry column: {col_name_orig}")
175 |                                 geometry_found = True
176 |                                 break
177 |                 
178 |                #logger.log(f"Final geometry column detection result: {self.validation_results['geometry_column']}")
179 | 
180 |                 table_name = "download_data"
181 | 
182 |                 self.progress.emit(f"Preparing query{layer_info}...")
183 |                 select_query = "SELECT *"
184 |                 if not self.output_file.endswith(".parquet"):
185 |                     # Construct the SELECT clause with array conversion to strings
186 |                     columns = []
187 |                     for row in schema_result:
188 |                         col_name = row[0]
189 |                         col_type = row[1]
190 |                         
191 |                         # Quote the column name to handle special characters
192 |                         quoted_col_name = f'"{col_name}"'
193 |                         
194 |                         if 'STRUCT' in col_type.upper() or 'MAP' in col_type.upper():
195 |                             columns.append(f"TO_JSON({quoted_col_name}) AS {quoted_col_name}")
196 |                         elif '[]' in col_type:  # Check for array types like VARCHAR[]
197 |                             columns.append(f"array_to_string({quoted_col_name}, ', ') AS {quoted_col_name}")
198 |                         elif col_type.upper() == 'UTINYINT':
199 |                             columns.append(f"CAST({quoted_col_name} AS INTEGER) AS {quoted_col_name}")
200 |                         elif 'BLOB' in col_type.upper() and col_name == geometry_column:
201 |                             # For BLOB geometry columns, we'll handle conversion differently
202 |                             # to avoid spatial function validation issues
203 |                             columns.append(quoted_col_name)
204 |                         else:
205 |                             columns.append(quoted_col_name)
206 | 
207 |                     # Check if this is Overture data and has a names column
208 |                     has_names_column = any('names' in row[0] for row in schema_result)
209 |                     if 'overture' in self.dataset_url and has_names_column:
210 |                         select_query = f'SELECT "names"."primary" as name,{", ".join(columns)}'
211 |                     else:
212 |                         select_query = f'SELECT {", ".join(columns)}'
213 | 
214 |                 # First check: Does the schema actually have a bbox column?
215 |                 has_bbox_in_schema = False
216 |                 if 'schema' in self.validation_results and self.validation_results['schema']:
217 |                     for row in self.validation_results['schema']:
218 |                         if row[0].lower() == 'bbox' and 'struct' in row[1].lower():
219 |                             has_bbox_in_schema = True
220 |                             #logger.log("Found actual bbox column in schema")
221 |                             break
222 |                     
223 |                     if not has_bbox_in_schema:
224 |                         #logger.log("No bbox column found in schema, overriding validation_results")
225 |                         # Force override incorrect bbox settings if schema doesn't have bbox
226 |                         self.validation_results['has_bbox'] = False
227 |                         self.validation_results['bbox_column'] = None
228 | 
229 |                 # Now use the corrected validation_results
230 |                 bbox_column = self.validation_results.get('bbox_column')
231 |                 geometry_column = self.validation_results.get('geometry_column', 'geometry')
232 |                 #logger.log(f"Final bbox_column value: {bbox_column}")
233 |                 #logger.log(f"Using geometry column: {geometry_column}")
234 | 
235 |                 # Check if geometry column is a BLOB that needs conversion
236 |                 geometry_col_type = None
237 |                 for row in schema_result:
238 |                     if row[0] == geometry_column:
239 |                         geometry_col_type = row[1].upper()
240 |                         break
241 |                 
242 |                 if bbox_column is not None:
243 |                     #logger.log(f"Using bbox column for query: {bbox_column}")
244 |                     where_clause = f"""
245 |                     WHERE "{bbox_column}".xmin BETWEEN {bbox.xMinimum()} AND {bbox.xMaximum()}
246 |                     AND "{bbox_column}".ymin BETWEEN {bbox.yMinimum()} AND {bbox.yMaximum()}
247 |                     """
248 |                 else:
249 |                     #logger.log("Using spatial filter instead of bbox")
250 |                     # If it's a BLOB column, we can't use spatial functions in the initial query
251 |                     # We'll apply the filter after converting the geometry
252 |                     if geometry_col_type and 'BLOB' in geometry_col_type:
253 |                         where_clause = ""  # No spatial filter initially for BLOB columns
254 |                     else:
255 |                         # For proper geometry columns, we can use spatial filter directly
256 |                         geometry_expr = f'"{geometry_column}"'
257 |                         where_clause = f"""
258 |                         WHERE ST_Intersects(
259 |                             {geometry_expr},
260 |                             ST_GeomFromText('POLYGON(({bbox.xMinimum()} {bbox.yMinimum()},
261 |                                                 {bbox.xMaximum()} {bbox.yMinimum()},
262 |                                                 {bbox.xMaximum()} {bbox.yMaximum()},
263 |                                                 {bbox.xMinimum()} {bbox.yMaximum()},
264 |                                                 {bbox.xMinimum()} {bbox.yMinimum()}))')
265 |                         )
266 |                         """
267 | 
268 |                 # Base query
269 |                 base_query = f"""
270 |                 CREATE TABLE {table_name} AS (
271 |                     {select_query} FROM read_parquet('{self.dataset_url}')
272 |                     {where_clause}
273 |                 ) 
274 |                 """
275 |                 self.progress.emit(f"Downloading{layer_info} data...")
276 |                 logger.log("Executing SQL query:")
277 |                 logger.log(base_query)
278 |                 
279 |                 conn.execute(base_query)
280 |                 
281 |                 # If we have a BLOB geometry column, we need to convert it after table creation
282 |                 # and apply spatial filter if needed
283 |                 if (geometry_column and geometry_col_type and 'BLOB' in geometry_col_type):
284 |                     # Create a new table with converted geometry
285 |                     temp_table = f"{table_name}_converted"
286 |                     
287 |                     # Build column list for conversion
288 |                     convert_columns = []
289 |                     for col_name, col_type, _, _, _, _ in schema_result:
290 |                         quoted_col_name = f'"{col_name}"'
291 |                         if col_name == geometry_column:
292 |                             convert_columns.append(f"ST_GeomFromWKB({quoted_col_name}) AS {quoted_col_name}")
293 |                         else:
294 |                             convert_columns.append(quoted_col_name)
295 |                     
296 |                     # Add spatial filter if bbox is available and we didn't filter earlier
297 |                     spatial_filter = ""
298 |                     if bbox and not bbox_column:  # Only if we didn't filter with bbox column
299 |                         spatial_filter = f"""
300 |                         WHERE ST_Intersects(
301 |                             ST_GeomFromWKB("{geometry_column}"),
302 |                             ST_GeomFromText('POLYGON(({bbox.xMinimum()} {bbox.yMinimum()},
303 |                                                 {bbox.xMaximum()} {bbox.yMinimum()},
304 |                                                 {bbox.xMaximum()} {bbox.yMaximum()},
305 |                                                 {bbox.xMinimum()} {bbox.yMaximum()},
306 |                                                 {bbox.xMinimum()} {bbox.yMinimum()}))')
307 |                         )
308 |                         """
309 |                     
310 |                     convert_query = f"""
311 |                     CREATE TABLE {temp_table} AS
312 |                     SELECT {', '.join(convert_columns)}
313 |                     FROM {table_name}
314 |                     {spatial_filter}
315 |                     """
316 |                     
317 |                     conn.execute(convert_query)
318 |                     
319 |                     # Drop original and rename
320 |                     conn.execute(f"DROP TABLE {table_name}")
321 |                     conn.execute(f"ALTER TABLE {temp_table} RENAME TO {table_name}")
322 |                 
323 |                 # Add check for empty results
324 |                 row_count = conn.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0]
325 |                 if row_count == 0:
326 |                     self.info.emit(f"No data found{layer_info} in the requested area. Check that your map extent overlaps with the data and/or expand your map extent. Skipping to next dataset if available.")
327 |                     self.finished.emit()  # Ensure finished signal is emitted
328 |                     return
329 | 
330 |                 self.progress.emit(f"Processing{layer_info} data to requested format...")
331 | 
332 |                 file_extension = self.output_file.lower().split('.')[-1]
333 | 
334 |                 if file_extension == 'duckdb':
335 |                     # Commit the transaction to ensure the data is saved
336 |                     conn.commit()
337 |                     if not self.killed:
338 |                         self.info.emit(
339 |                             "Data has been successfully saved to DuckDB database.\n\n"
340 |                             "Note: QGIS does not currently support loading DuckDB files directly."
341 |                         )
342 |                 else:
343 |                     # Check size if exporting to GeoJSON
344 |                     if self.output_file.lower().endswith('.geojson'):
345 |                         estimated_size = self.estimate_file_size(conn, table_name)
346 |                         if estimated_size > 4096 and not self.size_warning_accepted:  # 4GB warning threshold
347 |                             self.file_size_warning.emit(estimated_size)
348 |                             return
349 | 
350 |                     # Use the geometry column from validation results for the Hilbert sorting
351 |                     # At this point, if we converted BLOB to geometry, it's already a GEOMETRY type
352 |                     # So we don't need ST_GeomFromWKB anymore
353 |                     geometry_expr = f'"{geometry_column}"'
354 |                     extent_expr = f'"{geometry_column}"'
355 |                     
356 |                     copy_query = f"""
357 |                     COPY (
358 |                         WITH bbox AS (
359 |                             SELECT ST_Extent(ST_Extent_Agg({extent_expr}))::BOX_2D AS b
360 |                             FROM   {table_name}
361 |                         )
362 |                         SELECT   t.*
363 |                         FROM     {table_name} AS t
364 |                                 CROSS JOIN bbox
365 |                         ORDER BY ST_Hilbert(t.{geometry_expr}, bbox.b)
366 |                     ) TO '{self.output_file}' 
367 |                     """
368 | 
369 |                     if file_extension == "parquet":
370 |                         format_options = "(FORMAT 'parquet', COMPRESSION 'ZSTD', COMPRESSION_LEVEL 22);"
371 |                     elif self.output_file.endswith(".gpkg"):
372 |                         format_options = "(FORMAT GDAL, DRIVER 'GPKG');"
373 |                     elif self.output_file.endswith(".fgb"):
374 |                         format_options = "(FORMAT GDAL, DRIVER 'FlatGeobuf', SRS 'EPSG:4326');"
375 |                     elif self.output_file.endswith(".geojson"):
376 |                         format_options = "(FORMAT GDAL, DRIVER 'GeoJSON', SRS 'EPSG:4326');"
377 |                     else:
378 |                         self.error.emit("Unsupported file format.")
379 |                     
380 |                     logger.log("Executing SQL query:")
381 |                     logger.log(copy_query + format_options)
382 |                     conn.execute(copy_query + format_options)
383 | 
384 |                 
385 |                 if self.killed:
386 |                     return
387 | 
388 |                 if not self.killed:
389 |                     if self.output_file.lower().endswith('.duckdb'):
390 |                         self.info.emit(
391 |                             "Data has been successfully saved to DuckDB database.\n\n"
392 |                             "Note: QGIS does not currently support loading DuckDB files directly."
393 |                         )
394 |                     else:
395 |                         self.load_layer.emit(self.output_file)
396 |                     self.finished.emit()
397 | 
398 |             except Exception as e:
399 |                 if not self.killed:
400 |                     # Change error to info if it's a "no data" error
401 |                     error_str = str(e)
402 |                     if "No data found" in error_str:
403 |                         self.info.emit(f"No data found{layer_info} in the requested area for {self.dataset_url}. Skipping to next dataset if available.")
404 |                         self.finished.emit()  # Ensure finished signal is emitted
405 |                     else:
406 |                         self.error.emit(error_str)
407 |             finally:
408 |                 if conn:
409 |                     if not self.output_file.lower().endswith('.duckdb'): # Clean up temporary table
410 |                         try:
411 |                             conn.execute(f"DROP TABLE IF EXISTS {table_name}")
412 |                         except:
413 |                             pass
414 |                     conn.close()
415 | 
416 |         except Exception as e:
417 |             if not self.killed:
418 |                 self.error.emit(str(e))
419 | 
420 |     def kill(self):
421 |         self.killed = True
422 | 
423 |     def estimate_file_size(self, conn, table_name):
424 |         """Estimate the output file size in MB using GeoJSON feature collection structure"""
425 |         try:
426 |             # Get total row count
427 |             row_count = conn.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0]
428 | 
429 |             # Use a smaller sample size for large datasets
430 |             sample_size = min(100, row_count)
431 | 
432 |             if sample_size > 0:
433 |                 # Create a proper GeoJSON FeatureCollection sample with all properties
434 |                 sample_query = f"""
435 |                     WITH sample AS (
436 |                         SELECT * FROM {table_name} LIMIT {sample_size}
437 |                     )
438 |                     SELECT AVG(LENGTH(
439 |                         json_object(
440 |                             'type', 'Feature',
441 |                             'geometry', ST_AsGeoJSON(geometry),
442 |                             'properties', json_object(
443 |                                 {', '.join([
444 |                     f"'{col[0]}', COALESCE(CAST({col[0]} AS VARCHAR), 'null')"
445 |                     for col in conn.execute(f"DESCRIBE {table_name}").fetchall()
446 |                     if col[0] != 'geometry'
447 |                 ])}
448 |                             )
449 |                         )::VARCHAR
450 |                     )) as avg_feature_size
451 |                     FROM sample;
452 |                 """
453 | 
454 |                 # Get average feature size
455 |                 avg_feature_size = conn.execute(sample_query).fetchone()[0]
456 | 
457 |                 if avg_feature_size:
458 |                     # Account for GeoJSON overhead
459 |                     collection_overhead = (
460 |                         50  # {"type":"FeatureCollection","features":[]}
461 |                     )
462 |                     comma_overhead = row_count - 1  # Commas between features
463 | 
464 |                     total_estimated_bytes = (
465 |                         (row_count * avg_feature_size)
466 |                         + collection_overhead
467 |                         + comma_overhead
468 |                     )
469 |                     return total_estimated_bytes / (1024 * 1024)  # Convert to MB
470 |             return 0
471 | 
472 |         except Exception as e:
473 |             logger.log(f"Error estimating file size: {str(e)}", 2)
474 |             return 0
475 | 
476 |     def process_schema_columns(self, schema_result):
477 |         """Process schema columns and return formatted SELECT clause"""
478 |         columns = []
479 |         for row in schema_result:
480 |             col_name = row[0]
481 |             col_type = row[1]
482 |             quoted_col_name = f'"{col_name}"'
483 | 
484 |             if "STRUCT" in col_type.upper() or "MAP" in col_type.upper():
485 |                 columns.append(f"TO_JSON({quoted_col_name}) AS {quoted_col_name}")
486 |             elif "[]" in col_type:
487 |                 columns.append(
488 |                     f"array_to_string({quoted_col_name}, ', ') AS {quoted_col_name}"
489 |                 )
490 |             elif col_type.upper() == "UTINYINT":
491 |                 columns.append(
492 |                     f"CAST({quoted_col_name} AS INTEGER) AS {quoted_col_name}"
493 |                 )
494 |             else:
495 |                 columns.append(quoted_col_name)
496 |         return columns
497 | 
498 | 
499 | class ValidationWorker(QObject):
500 |     finished = pyqtSignal(bool, str, dict)
501 |     progress = pyqtSignal(str)
502 |     needs_bbox_warning = pyqtSignal()
503 | 
504 |     def __init__(self, dataset_url, iface, extent):
505 |         super().__init__()
506 |         self.dataset_url = dataset_url
507 |         self.iface = iface
508 |         self.extent = extent
509 |         self.killed = False
510 | 
511 |         base_path = os.path.dirname(os.path.abspath(__file__))
512 |         presets_path = os.path.join(base_path, "data", "presets.json")
513 |         with open(presets_path, "r") as f:
514 |             self.PRESET_DATASETS = json.load(f)
515 | 
516 |     def check_bbox_metadata(self, conn):
517 |         """Check for bbox information in GeoParquet metadata"""
518 |         metadata_query = (
519 |             f"SELECT key, value FROM parquet_kv_metadata('{self.dataset_url}')"
520 |         )
521 |         metadata_results = conn.execute(metadata_query).fetchall()
522 | 
523 |         for key, value in metadata_results:
524 |             if key == b"geo":
525 |                 try:
526 |                     decoded_value = value.decode()
527 |                     #logger.log("\nRaw metadata value:")
528 |                     #logger.log(decoded_value)
529 | 
530 |                     # Install and load JSON extension
531 |                     conn.execute("INSTALL json;")
532 |                     conn.execute("LOAD json;")
533 | 
534 |                     # Create a table with the JSON string
535 |                     conn.execute(
536 |                         f"CREATE TEMP TABLE temp_json AS SELECT '{decoded_value}' as json_str"
537 |                     )
538 | 
539 |                     # Extract the bbox column name using JSON path
540 |                     # First get the geometry column info which contains the covering
541 |                     result = conn.execute("""
542 |                         SELECT json_str->'$.columns.geometry.covering.bbox.xmin[0]' as bbox_column
543 |                         FROM temp_json
544 |                     """).fetchone()
545 | 
546 |                     #logger.log("\nExtracted bbox column name:")
547 |                     #logger.log(result[0] if result else None)
548 | 
549 |                     if result and result[0]:
550 |                         # Remove quotes from the result if present
551 |                         bbox_col = result[0].strip('"')
552 |                         return bbox_col
553 | 
554 |                 except Exception as e:
555 |                     logger.log(f"\nError parsing geo metadata: {str(e)}", 2)
556 |                     logger.log(f"Exception type: {type(e)}", 2)
557 |                     import traceback
558 | 
559 |                     logger.log(traceback.format_exc())
560 |                 finally:
561 |                     # Clean up temporary table
562 |                     conn.execute("DROP TABLE IF EXISTS temp_json")
563 |         return None
564 | 
565 |     def run(self):
566 |         # Initialize validation results with default values
567 |         validation_results = {
568 |             "schema": None,
569 |             "has_bbox": False,
570 |             "bbox_column": None,
571 |             "geometry_column": "geometry"  # Default fallback
572 |         }
573 |         
574 |         try:
575 |             self.progress.emit("Connecting to data source...")
576 |             conn = duckdb.connect()
577 |             conn.execute("INSTALL spatial;")
578 |             conn.execute("LOAD spatial;")
579 |             conn.execute("INSTALL httpfs;")
580 |             conn.execute("LOAD httpfs;")
581 | 
582 |             if not self.needs_validation():
583 |                 validation_results.update({
584 |                     "has_bbox": True,
585 |                     "bbox_column": "bbox",
586 |                 })
587 |                 self.finished.emit(True, "Validation successful", validation_results)
588 |                 return
589 | 
590 |             self.progress.emit("Checking data format...")
591 |             schema_query = f"DESCRIBE SELECT * FROM read_parquet('{self.dataset_url}')"
592 |             schema_result = conn.execute(schema_query).fetchall()
593 | 
594 |             # Update validation results with schema
595 |             validation_results["schema"] = schema_result
596 | 
597 |             # Check for standard bbox column first
598 |             has_bbox = any(
599 |                 row[0].lower() == "bbox" and "struct" in row[1].lower()
600 |                 for row in schema_result
601 |             )
602 |             
603 |             if has_bbox:
604 |                 validation_results["has_bbox"] = True
605 |                 validation_results["bbox_column"] = "bbox"
606 |                 self.finished.emit(True, "Validation successful", validation_results)
607 |             else:
608 |                 # Check metadata for alternative bbox column
609 |                 bbox_column = self.check_bbox_metadata(conn)
610 |                 if bbox_column:
611 |                     validation_results["has_bbox"] = True
612 |                     validation_results["bbox_column"] = bbox_column
613 |                     self.finished.emit(True, "Validation successful", validation_results)
614 |                 else:
615 |                     # No bbox column found - emit warning signal first
616 |                     self.needs_bbox_warning.emit()
617 |                     # Then emit finished signal with no bbox results
618 |                     self.finished.emit(True, "Validation with no bbox column", validation_results)
619 | 
620 |         except Exception as e:
621 |             logger.log(f"Error in ValidationWorker: {str(e)}")
622 |             # Emit warning before error if no bbox was found
623 |             if not validation_results.get("has_bbox"):
624 |                 self.needs_bbox_warning.emit()
625 |             # Still emit validation results with default values in case of error
626 |             self.finished.emit(False, f"Error validating source: {str(e)}", validation_results)
627 |         finally:
628 |             conn.close()
629 | 
630 |     def needs_validation(self):
631 |         """Determine if the dataset needs any validation"""
632 |         # Check if URL matches any preset dataset
633 |         for source in self.PRESET_DATASETS.values():
634 |             for dataset in source.values():
635 |                 if (
636 |                     isinstance(dataset.get("url"), str)
637 |                     and dataset["url"] in self.dataset_url
638 |                 ):
639 |                     return dataset.get("needs_validation", True)
640 |                 elif (
641 |                     isinstance(dataset.get("url_template"), str)
642 |                     and dataset["url_template"].split("{")[0] in self.dataset_url
643 |                 ):
644 |                     return dataset.get("needs_validation", True)
645 | 
646 |         # All other datasets need validation
647 |         return True
648 | 


--------------------------------------------------------------------------------