├── gpq_downloader ├── icons │ ├── parquet-download.png │ └── parquet-download.svg ├── tests │ ├── data │ │ ├── geoparquet_with_metadata.parquet │ │ └── non_geoparquet_with_geometry.parquet │ ├── test_logger.py │ ├── test_dialog.py │ ├── test_integration.py │ ├── conftest.py │ ├── test_validation.py │ ├── test_worker.py │ ├── test_utils.py │ ├── create_test_data.py │ ├── test_plugin.py │ └── test_non_geoparquet.py ├── logger.py ├── data │ ├── formats.json │ └── presets.json ├── metadata.txt ├── __init__.py ├── dialog.py ├── plugin.py └── utils.py ├── pyproject.toml ├── .github └── workflows │ └── tests.yml ├── make_release.sh ├── README.md ├── .gitignore └── LICENSE /gpq_downloader/icons/parquet-download.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cholmes/qgis_plugin_gpq_downloader/HEAD/gpq_downloader/icons/parquet-download.png -------------------------------------------------------------------------------- /gpq_downloader/tests/data/geoparquet_with_metadata.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cholmes/qgis_plugin_gpq_downloader/HEAD/gpq_downloader/tests/data/geoparquet_with_metadata.parquet -------------------------------------------------------------------------------- /gpq_downloader/tests/data/non_geoparquet_with_geometry.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cholmes/qgis_plugin_gpq_downloader/HEAD/gpq_downloader/tests/data/non_geoparquet_with_geometry.parquet -------------------------------------------------------------------------------- /gpq_downloader/tests/test_logger.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from gpq_downloader.logger import log 3 | 4 | def test_logger_basic(): 5 | """Test basic logger functionality""" 6 | log("Test message") 7 | log("Test message", 1) 8 | log("Test message", 2) 9 | 10 | def test_logger_levels(): 11 | """Test different logger levels""" 12 | log("Info message", 0) 13 | log("Warning message", 1) 14 | log("Error message", 2) -------------------------------------------------------------------------------- /gpq_downloader/logger.py: -------------------------------------------------------------------------------- 1 | from qgis.core import Qgis, QgsMessageLog 2 | 3 | 4 | def log(message: str, level_in: int = 0): 5 | if level_in == 0: 6 | level = Qgis.MessageLevel.Info 7 | elif level_in == 1: 8 | level = Qgis.MessageLevel.Warning 9 | elif level_in == 2: 10 | level = Qgis.MessageLevel.Critical 11 | else: 12 | level = Qgis.MessageLevel.Info 13 | 14 | QgsMessageLog.logMessage(str(message), "GeoParquet Downloader", level) 15 | -------------------------------------------------------------------------------- /gpq_downloader/data/formats.json: -------------------------------------------------------------------------------- 1 | { 2 | "GeoParquet (*.parquet)": { 3 | "extension": ".parquet", 4 | "format_options": "(FORMAT 'parquet', COMPRESSION 'ZSTD')" 5 | }, 6 | "GeoPackage (*.gpkg)": { 7 | "extension": ".gpkg", 8 | "format_options": "(FORMAT GDAL, DRIVER 'GPKG', SRS 'EPSG:4326')" 9 | }, 10 | "FlatGeobuf (*.fgb)": { 11 | "extension": ".fgb", 12 | "format_options": "(FORMAT GDAL, DRIVER 'FlatGeobuf', SRS 'EPSG:4326')" 13 | }, 14 | "GeoJSON (*.geojson)": { 15 | "extension": ".geojson", 16 | "format_options": "(FORMAT GDAL, DRIVER 'GeoJSON', SRS 'EPSG:4326')" 17 | } 18 | } -------------------------------------------------------------------------------- /gpq_downloader/tests/test_dialog.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from unittest.mock import MagicMock, patch 3 | from qgis.PyQt.QtWidgets import QDialog 4 | from qgis.PyQt.QtCore import Qt 5 | 6 | from gpq_downloader.dialog import DataSourceDialog 7 | 8 | def test_dialog_initialization(qgs_app, mock_iface): 9 | """Test dialog initialization""" 10 | dialog = DataSourceDialog(None, mock_iface) 11 | assert dialog is not None 12 | assert dialog.iface == mock_iface 13 | 14 | def test_dialog_radio_buttons(qgs_app, mock_iface): 15 | """Test radio button functionality""" 16 | dialog = DataSourceDialog(None, mock_iface) 17 | 18 | # Set Overture radio to checked (since it might not be default) 19 | dialog.overture_radio.setChecked(True) 20 | 21 | # Check state after explicitly setting 22 | assert dialog.overture_radio.isChecked() 23 | assert not dialog.sourcecoop_radio.isChecked() 24 | assert not dialog.osm_radio.isChecked() 25 | 26 | # Test switching radio buttons 27 | dialog.sourcecoop_radio.setChecked(True) 28 | assert not dialog.overture_radio.isChecked() 29 | assert dialog.sourcecoop_radio.isChecked() 30 | assert not dialog.osm_radio.isChecked() 31 | 32 | @patch('gpq_downloader.dialog.QgsSettings') 33 | def test_dialog_settings_saved(mock_settings, qgs_app, mock_iface): 34 | """Test that settings are saved""" 35 | dialog = DataSourceDialog(None, mock_iface) 36 | dialog.save_checkbox_states() 37 | mock_settings.assert_called() -------------------------------------------------------------------------------- /gpq_downloader/tests/test_integration.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | import sys 4 | from qgis.core import QgsProject, QgsVectorLayer 5 | from qgis.PyQt.QtWidgets import QApplication 6 | 7 | from gpq_downloader.plugin import QgisPluginGeoParquet 8 | 9 | @pytest.mark.skipif(not os.environ.get('RUN_INTEGRATION_TESTS'), reason="Integration tests not enabled") 10 | def test_plugin_load(qgs_app, mock_iface): 11 | """Test that plugin loads properly""" 12 | plugin = QgisPluginGeoParquet(mock_iface) 13 | assert plugin is not None 14 | 15 | # Initialize plugin 16 | plugin.initGui() 17 | 18 | # Check that actions were created 19 | assert plugin.action is not None 20 | 21 | @pytest.mark.skipif(not os.environ.get('RUN_INTEGRATION_TESTS'), reason="Integration tests not enabled") 22 | def test_plugin_unload(qgs_app, mock_iface): 23 | """Test that plugin unloads properly""" 24 | plugin = QgisPluginGeoParquet(mock_iface) 25 | plugin.initGui() 26 | 27 | # Unload the plugin 28 | plugin.unload() 29 | 30 | # Check that cleanup was successful 31 | assert plugin.worker is None 32 | assert plugin.worker_thread is None 33 | 34 | @pytest.mark.skipif(not os.environ.get('RUN_INTEGRATION_TESTS'), reason="Integration tests not enabled") 35 | def test_plugin_download_dir(qgs_app, mock_iface): 36 | """Test that plugin creates download directory""" 37 | plugin = QgisPluginGeoParquet(mock_iface) 38 | 39 | # Check that download directory exists 40 | assert plugin.download_dir.exists() 41 | assert plugin.download_dir.is_dir() -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=42", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "gpq_downloader" 7 | version = "0.8.5" 8 | description = "QGIS plugin for downloading and processing GeoParquet files" 9 | readme = "README.md" 10 | requires-python = ">=3.7" 11 | license = {text = "GPL-2.0-or-later"} 12 | authors = [ 13 | {name = "Chris Holmes", email = "cholmes@9eo.org"} 14 | ] 15 | dependencies = [ 16 | "duckdb>=1.1.0", 17 | ] 18 | 19 | [project.urls] 20 | "Homepage" = "https://github.com/cholmes/qgis_plugin_gpq_downloader" 21 | "Bug Tracker" = "https://github.com/yourusername/qgis_plugin_gpq_downloader/issues" 22 | 23 | [tool.setuptools] 24 | packages = ["gpq_downloader"] 25 | 26 | [tool.pytest.ini_options] 27 | testpaths = ["gpq_downloader/tests"] 28 | python_files = "test_*.py" 29 | addopts = "--cov=gpq_downloader" 30 | 31 | [tool.coverage.run] 32 | source = ["gpq_downloader"] 33 | omit = ["gpq_downloader/tests/*"] 34 | 35 | [tool.coverage.report] 36 | exclude_lines = [ 37 | "pragma: no cover", 38 | "def __repr__", 39 | "raise NotImplementedError", 40 | "if __name__ == .__main__.:", 41 | "pass", 42 | "raise ImportError", 43 | ] 44 | 45 | [tool.black] 46 | line-length = 88 47 | target-version = ['py37', 'py38', 'py39', 'py310'] 48 | include = '\.pyi?$' 49 | 50 | [project.optional-dependencies] 51 | dev = [ 52 | "pytest>=7.4.0", 53 | "pytest-qt>=4.2.0", 54 | "pytest-mock>=3.11.1", 55 | "pytest-cov>=4.1.0", 56 | "pyarrow>=10.0.0", 57 | "black>=23.3.0", 58 | ] 59 | 60 | [tool.setuptools.package-data] 61 | gpq_downloader = ["data/*.json"] -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | container: 13 | image: qgis/qgis:release-3_34 # QGIS docker image with QGIS 3.34 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | 18 | - name: Install Python dependencies 19 | run: | 20 | python3 -m pip install --upgrade pip 21 | pip3 install pytest pytest-qt 22 | # Install in development mode to ensure data files are available 23 | pip3 install -e .[dev] 24 | 25 | - name: Debug package installation 26 | run: | 27 | # Print out installed package location 28 | python3 -c "import gpq_downloader; print(gpq_downloader.__file__)" 29 | # Check if data directory exists 30 | ls -la $(python3 -c "import gpq_downloader; import os; print(os.path.dirname(gpq_downloader.__file__))")/data || echo "Data directory not found" 31 | 32 | - name: Create data directory if missing 33 | run: | 34 | # Create data directory if it doesn't exist 35 | PACKAGE_DIR=$(python3 -c "import gpq_downloader; import os; print(os.path.dirname(gpq_downloader.__file__))") 36 | mkdir -p $PACKAGE_DIR/data 37 | # If presets.json doesn't exist, create a minimal version 38 | if [ ! -f "$PACKAGE_DIR/data/presets.json" ]; then 39 | echo '{"datasets": {}}' > $PACKAGE_DIR/data/presets.json 40 | echo "Created minimal presets.json file at $PACKAGE_DIR/data/presets.json" 41 | fi 42 | 43 | - name: Run tests with xvfb 44 | run: | 45 | # Make sure xvfb is installed in container 46 | apt-get update && apt-get install -y xvfb 47 | 48 | # Run tests with virtual display 49 | xvfb-run --auto-servernum --server-args="-screen 0 1024x768x24" pytest -------------------------------------------------------------------------------- /make_release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Function to extract version from metadata.txt 4 | get_version_from_metadata() { 5 | if [ -f "gpq_downloader/metadata.txt" ]; then 6 | VERSION=$(grep "^version=" gpq_downloader/metadata.txt | cut -d'=' -f2 | tr -d '[:space:]') 7 | if [ -n "$VERSION" ]; then 8 | echo "Found version $VERSION in metadata.txt" 9 | return 0 10 | fi 11 | fi 12 | echo "Warning: Could not extract version from metadata.txt" 13 | return 1 14 | } 15 | 16 | # Get version from command line argument or metadata.txt or use date 17 | if [ -n "$1" ]; then 18 | VERSION=$1 19 | echo "Using provided version: $VERSION" 20 | else 21 | if ! get_version_from_metadata; then 22 | VERSION=$(date +"%Y%m%d") 23 | echo "Using date-based version: $VERSION" 24 | fi 25 | fi 26 | 27 | ZIP_FILENAME="gpq_downloader_${VERSION}.zip" 28 | TEMP_DIR=$(mktemp -d) 29 | 30 | echo "Creating release zip: ${ZIP_FILENAME}" 31 | 32 | # Create a temporary directory with the renamed plugin 33 | echo "Creating temporary directory with renamed plugin..." 34 | cp -r gpq_downloader/ "${TEMP_DIR}/qgis_plugin_gpq_downloader" 35 | 36 | # Copy LICENSE file if it exists 37 | if [ -f "LICENSE" ]; then 38 | echo "Copying LICENSE file..." 39 | cp LICENSE "${TEMP_DIR}/qgis_plugin_gpq_downloader/" 40 | else 41 | echo "Warning: LICENSE file not found" 42 | fi 43 | 44 | # Navigate to the temp directory 45 | cd "${TEMP_DIR}" 46 | 47 | # Create zip file excluding unwanted files 48 | echo "Creating zip file..." 49 | zip -r "${ZIP_FILENAME}" qgis_plugin_gpq_downloader/ \ 50 | -x "*.DS_Store" "*.gitignore" "*/.git/*" "*/__pycache__/*" "*.pyc" "*.pyo" "*.zip" "*/tests/*" 51 | 52 | # Move the zip file back to the original directory 53 | mv "${ZIP_FILENAME}" "${OLDPWD}/" 54 | 55 | # Clean up 56 | cd "${OLDPWD}" 57 | rm -rf "${TEMP_DIR}" 58 | 59 | echo "Release zip created: ${ZIP_FILENAME}" 60 | echo "You can now upload this file to the QGIS Plugin Repository." -------------------------------------------------------------------------------- /gpq_downloader/metadata.txt: -------------------------------------------------------------------------------- 1 | [general] 2 | name=GeoParquet Downloader (Overture, Source Coop & Custom Cloud) 3 | qgisMinimumVersion=3.16 4 | qgisMaximumVersion=4.99.0 5 | version=0.8.5 6 | supportsQt6=yes 7 | icon=icons/parquet-download.png 8 | description=Plugin for downloading GeoParquet data from cloud sources. 9 | about=This plugin connects to cloud-based GeoParquet data and downloads the portion in the current viewport. 10 | 11 | The plugin comes with pre-configured sources for Overture 12 | Maps, Source Cooperative, and you can enter the location 13 | of any online GeoParquet file or partition. It works best with 14 | the bbox struct from GeoParquet 1.1, but any GeoParquet file 15 | will work. You can save the output data as GeoParquet, 16 | GeoPackage, DuckDB, FlatGeobuf, or GeoJSON. 17 | 18 | The plugin does not require that your QGIS supports 19 | GeoParquet, as you can download data as GeoPackage, but 20 | GeoParquet generally works better (faster and better nested 21 | data). Most Windows installations come with it, and for Mac 22 | and Linux you can install via conda. For information on 23 | installing Geoparquet support see this wiki page. 24 | 25 | The plugin depends on DuckDB, which should be installed 26 | automatically when you install the plugin. If you have issues 27 | with DuckDB installing please file an issue on the GitHub issue tracker. 28 | 29 | tags=geoparquet,parquet,overture,source cooperative,cloud,duckdb,geopackage 30 | 31 | # credits and contact 32 | author=Chris Holmes 33 | email=cholmes@9eo.org 34 | homepage=https://github.com/cholmes/qgis_plugin_gpq_downloader/ 35 | repository=https://github.com/cholmes/qgis_plugin_gpq_downloader/ 36 | tracker=https://github.com/cholmes/qgis_plugin_gpq_downloader/issues 37 | 38 | [dependencies] 39 | pip_dependencies=duckdb>=1.1.0 -------------------------------------------------------------------------------- /gpq_downloader/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import pytest 4 | from qgis.core import QgsApplication, QgsCoordinateReferenceSystem, QgsRectangle 5 | from qgis.PyQt.QtCore import QCoreApplication, QObject 6 | from qgis.PyQt.QtWidgets import QMainWindow 7 | 8 | # Add the parent directory to sys.path 9 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 10 | 11 | # Mock QGIS Application 12 | @pytest.fixture(scope="session") 13 | def qgs_app(): 14 | """QGIS application fixture""" 15 | qgs_app = QgsApplication([], False) 16 | qgs_app.initQgis() 17 | yield qgs_app 18 | qgs_app.exitQgis() 19 | 20 | # Mock iface 21 | class MockIface(QObject): 22 | def __init__(self): 23 | super().__init__() 24 | self.canvas = MockCanvas() 25 | self._window = QMainWindow() 26 | self.toolbar_icons = [] # Add this to track added icons 27 | 28 | def mapCanvas(self): 29 | return self.canvas 30 | 31 | def mainWindow(self): 32 | return self._window 33 | 34 | def addToolBarIcon(self, action): # Add this method 35 | """Mock method for adding toolbar icons""" 36 | self.toolbar_icons.append(action) 37 | 38 | def removeToolBarIcon(self, action): # Add this method too 39 | """Mock method for removing toolbar icons""" 40 | if action in self.toolbar_icons: 41 | self.toolbar_icons.remove(action) 42 | 43 | class MockCanvas: 44 | def __init__(self): 45 | self.settings = MockMapSettings() 46 | 47 | def mapSettings(self): 48 | return self.settings 49 | 50 | def extent(self): 51 | return QgsRectangle(0, 0, 1, 1) 52 | 53 | class MockMapSettings: 54 | def destinationCrs(self): 55 | return QgsCoordinateReferenceSystem("EPSG:4326") 56 | 57 | @pytest.fixture 58 | def mock_iface(): 59 | """Mock iface fixture""" 60 | return MockIface() 61 | 62 | # Sample test data 63 | @pytest.fixture 64 | def sample_bbox(): 65 | """Sample bounding box fixture""" 66 | return QgsRectangle(1, 2, 3, 4) 67 | 68 | @pytest.fixture 69 | def sample_validation_results(): 70 | """Sample validation results fixture""" 71 | return { 72 | "has_bbox": True, 73 | "bbox_column": "bbox", 74 | "geometry_column": "geometry", 75 | "schema": [ 76 | ("id", "INTEGER", "YES", None, None, None), 77 | ("name", "VARCHAR", "YES", None, None, None), 78 | ("bbox", "STRUCT(xmin DOUBLE, ymin DOUBLE, xmax DOUBLE, ymax DOUBLE)", "YES", None, None, None), 79 | ("geometry", "GEOMETRY", "YES", None, None, None) 80 | ] 81 | } 82 | 83 | @pytest.fixture 84 | def sample_validation_results_no_bbox(): 85 | """Sample validation results with no bbox fixture""" 86 | return { 87 | "has_bbox": False, 88 | "bbox_column": None, 89 | "geometry_column": "geometry", 90 | "schema": [ 91 | ("id", "INTEGER", "YES", None, None, None), 92 | ("name", "VARCHAR", "YES", None, None, None), 93 | ("geometry", "GEOMETRY", "YES", None, None, None) 94 | ] 95 | } -------------------------------------------------------------------------------- /gpq_downloader/tests/test_validation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from unittest.mock import MagicMock, patch 3 | import json 4 | import os 5 | 6 | from gpq_downloader.utils import ValidationWorker 7 | 8 | @patch("duckdb.connect") 9 | def test_validation_worker_with_bbox(mock_connect, mock_iface, sample_bbox): 10 | """Test the validation worker with a dataset that has a bbox column""" 11 | # Setup mock connection 12 | mock_conn = MagicMock() 13 | mock_conn.execute.return_value.fetchall.return_value = [ 14 | ("id", "INTEGER", "YES", None, None, None), 15 | ("bbox", "STRUCT(xmin DOUBLE, ymin DOUBLE, xmax DOUBLE, ymax DOUBLE)", "YES", None, None, None), 16 | ("geometry", "GEOMETRY", "YES", None, None, None) 17 | ] 18 | mock_connect.return_value = mock_conn 19 | 20 | # Setup validation signals 21 | finished_signal_received = False 22 | validation_results = None 23 | 24 | def on_finished(success, message, results): 25 | nonlocal finished_signal_received, validation_results 26 | finished_signal_received = True 27 | validation_results = results 28 | 29 | # Create worker 30 | worker = ValidationWorker("https://example.com/test.parquet", mock_iface, sample_bbox) 31 | worker.finished.connect(on_finished) 32 | 33 | # Mock presets.json to return empty dict 34 | with patch.object(worker, 'PRESET_DATASETS', {}): 35 | worker.run() 36 | 37 | # Check results 38 | assert finished_signal_received 39 | assert validation_results["has_bbox"] is True 40 | assert validation_results["bbox_column"] == "bbox" 41 | 42 | @patch("duckdb.connect") 43 | def test_validation_worker_without_bbox(mock_connect, mock_iface, sample_bbox): 44 | """Test the validation worker with a dataset that has no bbox column""" 45 | # Setup mock connection 46 | mock_conn = MagicMock() 47 | mock_conn.execute.return_value.fetchall.return_value = [ 48 | ("id", "INTEGER", "YES", None, None, None), 49 | ("geometry", "GEOMETRY", "YES", None, None, None) 50 | ] 51 | mock_connect.return_value = mock_conn 52 | 53 | # Setup validation signals 54 | warning_signal_received = False 55 | finished_signal_received = False 56 | validation_results = None 57 | 58 | def on_finished(success, message, results): 59 | nonlocal finished_signal_received, validation_results 60 | finished_signal_received = True 61 | validation_results = results 62 | print(f"Received validation results: {results}") # Add debug print 63 | 64 | def on_warning(): 65 | nonlocal warning_signal_received 66 | warning_signal_received = True 67 | print("Warning signal received") # Add debug print 68 | 69 | # Create worker 70 | worker = ValidationWorker("https://example.com/test.parquet", mock_iface, sample_bbox) 71 | worker.finished.connect(on_finished) 72 | worker.needs_bbox_warning.connect(on_warning) 73 | 74 | # Mock presets.json to return empty dict 75 | with patch.object(worker, 'PRESET_DATASETS', {}): 76 | worker.run() 77 | 78 | # Check results 79 | assert finished_signal_received, "Finished signal was not emitted" 80 | assert validation_results is not None, "No validation results received" 81 | assert "has_bbox" in validation_results, f"has_bbox not in validation_results: {validation_results}" 82 | assert validation_results["has_bbox"] is False 83 | assert validation_results["bbox_column"] is None 84 | assert warning_signal_received, "Warning signal was not emitted" -------------------------------------------------------------------------------- /gpq_downloader/tests/test_worker.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from unittest.mock import MagicMock, patch 3 | import os 4 | from qgis.PyQt.QtCore import QObject 5 | 6 | from gpq_downloader.utils import Worker 7 | 8 | class MockResult: 9 | def __init__(self, data): 10 | self.data = data 11 | 12 | def fetchall(self): 13 | return self.data 14 | 15 | def fetchone(self): 16 | return self.data[0] if self.data else None 17 | 18 | class MockConnection: 19 | def __init__(self, schema_data=None, count_result=1): 20 | self.schema_data = schema_data or [] 21 | self.count_result = count_result 22 | self.executed_queries = [] 23 | 24 | def execute(self, query): 25 | self.executed_queries.append(query) 26 | if "DESCRIBE" in query: 27 | return MockResult(self.schema_data) 28 | elif "COUNT" in query: 29 | return MockResult([(self.count_result,)]) 30 | return MockResult([]) 31 | 32 | def commit(self): 33 | pass 34 | 35 | def close(self): 36 | pass 37 | 38 | @pytest.fixture 39 | def schema_with_bbox(): 40 | return [ 41 | ("id", "INTEGER", "YES", None, None, None), 42 | ("bbox", "STRUCT(xmin DOUBLE, ymin DOUBLE, xmax DOUBLE, ymax DOUBLE)", "YES", None, None, None), 43 | ("geometry", "GEOMETRY", "YES", None, None, None) 44 | ] 45 | 46 | @pytest.fixture 47 | def schema_without_bbox(): 48 | return [ 49 | ("id", "INTEGER", "YES", None, None, None), 50 | ("geometry", "GEOMETRY", "YES", None, None, None) 51 | ] 52 | 53 | @patch("duckdb.connect") 54 | def test_worker_run_with_bbox(mock_connect, mock_iface, sample_bbox, tmp_path, sample_validation_results, schema_with_bbox): 55 | """Test Worker run method with a bbox column""" 56 | # Setup 57 | mock_conn = MockConnection(schema_data=schema_with_bbox) 58 | mock_connect.return_value = mock_conn 59 | 60 | # Create signals for testing 61 | progress_messages = [] 62 | 63 | # Create worker 64 | worker = Worker( 65 | "https://example.com/test.parquet", 66 | sample_bbox, 67 | os.path.join(tmp_path, "output.gpkg"), 68 | mock_iface, 69 | sample_validation_results 70 | ) 71 | 72 | # Connect to our test slots 73 | worker.progress.connect(lambda msg: progress_messages.append(msg)) 74 | 75 | # Run the worker 76 | worker.run() 77 | 78 | # Check queries 79 | bbox_query_found = False 80 | for query in mock_conn.executed_queries: 81 | if '"bbox".xmin BETWEEN' in query: 82 | bbox_query_found = True 83 | 84 | assert bbox_query_found, "Should use bbox in the query" 85 | assert any("Downloading" in msg for msg in progress_messages) 86 | 87 | @patch("duckdb.connect") 88 | def test_worker_run_without_bbox(mock_connect, mock_iface, sample_bbox, tmp_path, sample_validation_results_no_bbox, schema_without_bbox): 89 | """Test Worker run method without a bbox column""" 90 | # Setup 91 | mock_conn = MockConnection(schema_data=schema_without_bbox) 92 | mock_connect.return_value = mock_conn 93 | 94 | # Create signals for testing 95 | progress_messages = [] 96 | 97 | # Create worker with no bbox 98 | worker = Worker( 99 | "https://example.com/test.parquet", 100 | sample_bbox, 101 | os.path.join(tmp_path, "output.gpkg"), 102 | mock_iface, 103 | sample_validation_results_no_bbox 104 | ) 105 | 106 | # Connect to our test slots 107 | worker.progress.connect(lambda msg: progress_messages.append(msg)) 108 | 109 | # Run the worker 110 | worker.run() 111 | 112 | # Check queries 113 | st_intersects_found = False 114 | for query in mock_conn.executed_queries: 115 | if 'ST_Intersects' in query: 116 | st_intersects_found = True 117 | 118 | assert st_intersects_found, "Should use ST_Intersects in the query when no bbox column" 119 | assert any("Downloading" in msg for msg in progress_messages) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GeoParquet Downloader for QGIS 2 | 3 | This repo contains a QGIS plugin for downloading GeoParquet data from cloud sources, including Overture Maps, Source Cooperative, and the ability to enter the location of any online GeoParquet file or partition. Just the user's current viewport then gets downloaded, as GeoParquet, DuckDB or GeoPackage. 4 | 5 | ![gpq-downloader-demo4](https://github.com/user-attachments/assets/10f2a73f-2aa6-45a1-9491-41e63b7fec24) 6 | 7 | 8 | The core idea is that GeoParquet can act more like a 'server', letting users download only the data they need, if you add a bit more smarts to the client. So this plugin uses [DuckDB](https://duckdb.org/) but abstracts all the details of forming the right queries to external sources, so users can just pick the data they want and pull it down with ease. And with GeoPackage output users don't even need to know anything about GeoParquet. More info is on the [plugin homepage](https://plugins.qgis.org/plugins/qgis_plugin_gpq_downloader/). 9 | 10 | 11 | ## Installation 12 | 13 | The easiest way to install the plugin file is to use the QGIS plugin manager. Just go to `Plugins > Manage and Install Plugins`, click 14 | the 'install' tab and search for 'GeoParquet Downloader'. Click on 'Install Plugin' and it will install. Alternatively you can download the zip file from 15 | one of the [releases](https://github.com/cholmes/qgis_plugin_gpq_downloader/releases) and 'install from zip' in QGIS. For the plugin to work DuckDB 16 | needs to be installed. As of version 0.3 the plugin should try to automatically install DuckDB, but it doesn't work reliably. If you installed but don't see the 17 | icon below then it's likely because DuckDB isn't there. 18 | 19 | If the installation of DuckDB doesn't work, then on Windows you can use the [QDuckDB plugin](https://oslandia.gitlab.io/qgis/qduckdb/) which includes a precompiled binary. 20 | They also document how to install DuckDB on [Linux](https://oslandia.gitlab.io/qgis/qduckdb/usage/installation.html#linux) and 21 | [Mac OS/X](https://oslandia.gitlab.io/qgis/qduckdb/usage/installation.html#macos). If you're on Mac we recommend trying 22 | the [QGIS 4.0 mac build preview](https://github.com/opengisch/qgis-notarize/) which ships with DuckDB. 23 | 24 | See [metadata.txt](gpq_downloader/metadata.txt) for more installation notes. 25 | 26 | ## Usage 27 | 28 | The plugin will install 1 button on the "Plugin" QGIS toolbar, that you might have to enable through `View > Toolbars > Plugins`: 29 | 30 | ![1_UuUno32b4P_UNUqJZvSPoQ](https://github.com/user-attachments/assets/16003294-9a76-42cb-a740-b5bbd308e484) 31 | 32 | It opens a dialog box, that lets you select Overture and Source Cooperative, Hugging Face or 'custom' - where you 33 | can enter the location of any GeoParquet or partition file online. 34 | 35 | Screenshot 2025-11-11 at 9 36 33 PM 36 | 37 | 38 | To use it move to an area where you'd like to download data and then select which layer you'd like to download. From there you can choose the output format (GeoParquet, GeoPackage, DuckDB, GeoJSON or FlatGeobuf) and the location to download the data to. 39 | 40 | Downloads can sometimes take awhile, especially if the data provider hasn't optimized their GeoParquet files very well, or if you're downloading an area with a lot of data. Overture is one of the faster ones for now, others may take a minute or two. But it should most always be faster than trying to figure out exactly which files you need and downloading them manually. 41 | 42 | For now we only support downloading into the current viewport, but hope to [improve that](https://github.com/cholmes/qgis_plugin_gpq_downloader/issues/10). Note also that right now only lat/long data is supported, but we also hope to [support it](https://github.com/cholmes/qgis_plugin_gpq_downloader/issues/102). 43 | 44 | If your QGIS doesn't have GeoParquet support you'll get a warning dialog after the data downloads completes. The GeoParquet will be there, but it won't automatically open on the map. We definitely recommend getting your QGIS working with GeoParquet, as the format is faster and handles nested attributes better. See [Installing GeoParquet Support in QGIS](https://github.com/cholmes/qgis_plugin_gpq_downloader/wiki/Installing-GeoParquet-Support-in-QGIS) for more details. 45 | 46 | 47 | ## Contributing 48 | 49 | This plugin has been made entirely with AI coding tools (primarily Cursor with claude-3.5-sonnet). Contributions are very welcome, both from more experienced python developers who can help clean up the code and add missing features, and from anyone who wants a place to do AI-assisted coding that (hopefully) actually gets widely used. 50 | 51 | I'm interested in exploring open source collaboration in the age of AI coding tools, especially working with less experienced developers who'd like to contribute, so don't hesitate to jump in with AI-assisted pull requests. 52 | 53 | And any help on ideas/feedback, documentation, testing, promoting, etc. is very welcome! 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | *.zip 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | cover/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | .pybuilder/ 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | # For a library or package, you might want to ignore these files since the code is 88 | # intended to run in multiple environments; otherwise, check them in: 89 | # .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # poetry 99 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 100 | # This is especially recommended for binary packages to ensure reproducibility, and is more 101 | # commonly ignored for libraries. 102 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 103 | #poetry.lock 104 | 105 | # pdm 106 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 107 | #pdm.lock 108 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 109 | # in version control. 110 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 111 | .pdm.toml 112 | .pdm-python 113 | .pdm-build/ 114 | 115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 116 | __pypackages__/ 117 | 118 | # Celery stuff 119 | celerybeat-schedule 120 | celerybeat.pid 121 | 122 | # SageMath parsed files 123 | *.sage.py 124 | 125 | # Environments 126 | .env 127 | .venv 128 | env/ 129 | venv/ 130 | ENV/ 131 | env.bak/ 132 | venv.bak/ 133 | 134 | # Spyder project settings 135 | .spyderproject 136 | .spyproject 137 | 138 | # Rope project settings 139 | .ropeproject 140 | 141 | # mkdocs documentation 142 | /site 143 | 144 | # mypy 145 | .mypy_cache/ 146 | .dmypy.json 147 | dmypy.json 148 | 149 | # Pyre type checker 150 | .pyre/ 151 | 152 | # pytype static type analyzer 153 | .pytype/ 154 | 155 | # Cython debug symbols 156 | cython_debug/ 157 | 158 | # mac stuff 159 | .DS_Store 160 | 161 | # PyCharm 162 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 163 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 164 | # and can be added to the global gitignore or merged into this file. For a more nuclear 165 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 166 | #.idea/ 167 | 168 | # --> This is the recommended way 169 | 170 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 171 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 172 | 173 | # User-specific stuff 174 | .idea/**/workspace.xml 175 | .idea/**/tasks.xml 176 | .idea/**/usage.statistics.xml 177 | .idea/**/dictionaries 178 | .idea/**/shelf 179 | 180 | # AWS User-specific 181 | .idea/**/aws.xml 182 | 183 | # Generated files 184 | .idea/**/contentModel.xml 185 | 186 | # Sensitive or high-churn files 187 | .idea/**/dataSources/ 188 | .idea/**/dataSources.ids 189 | .idea/**/dataSources.local.xml 190 | .idea/**/sqlDataSources.xml 191 | .idea/**/dynamic.xml 192 | .idea/**/uiDesigner.xml 193 | .idea/**/dbnavigator.xml 194 | 195 | # Gradle 196 | .idea/**/gradle.xml 197 | .idea/**/libraries 198 | 199 | # Gradle and Maven with auto-import 200 | # When using Gradle or Maven with auto-import, you should exclude module files, 201 | # since they will be recreated, and may cause churn. Uncomment if using 202 | # auto-import. 203 | # .idea/artifacts 204 | # .idea/compiler.xml 205 | # .idea/jarRepositories.xml 206 | # .idea/modules.xml 207 | # .idea/*.iml 208 | # .idea/modules 209 | # *.iml 210 | # *.ipr 211 | 212 | # CMake 213 | cmake-build-*/ 214 | 215 | # Mongo Explorer plugin 216 | .idea/**/mongoSettings.xml 217 | 218 | # File-based project format 219 | *.iws 220 | 221 | # IntelliJ 222 | out/ 223 | 224 | # mpeltonen/sbt-idea plugin 225 | .idea_modules/ 226 | 227 | # JIRA plugin 228 | atlassian-ide-plugin.xml 229 | 230 | # Cursive Clojure plugin 231 | .idea/replstate.xml 232 | 233 | # SonarLint plugin 234 | .idea/sonarlint/ 235 | 236 | # Crashlytics plugin (for Android Studio and IntelliJ) 237 | com_crashlytics_export_strings.xml 238 | crashlytics.properties 239 | crashlytics-build.properties 240 | fabric.properties 241 | 242 | # Editor-based Rest Client 243 | .idea/httpRequests 244 | 245 | # Android studio 3.1+ serialized cache file 246 | .idea/caches/build_file_checksums.ser 247 | 248 | .claude/ -------------------------------------------------------------------------------- /gpq_downloader/data/presets.json: -------------------------------------------------------------------------------- 1 | { 2 | "overture": { 3 | "buildings": { 4 | "url_template": "s3://overturemaps-us-west-2/release/{release}/theme=buildings/type=building/*", 5 | "info_url": "https://docs.overturemaps.org/reference/buildings", 6 | "needs_validation": false 7 | }, 8 | "places": { 9 | "url_template": "s3://overturemaps-us-west-2/release/{release}/theme=places/type=place/*", 10 | "info_url": "https://docs.overturemaps.org/reference/places", 11 | "needs_validation": false 12 | }, 13 | "transportation": { 14 | "url_template": "s3://overturemaps-us-west-2/release/{release}/theme=transportation/type=segment/*", 15 | "info_url": "https://docs.overturemaps.org/reference/transportation", 16 | "needs_validation": false 17 | }, 18 | "addresses": { 19 | "url_template": "s3://overturemaps-us-west-2/release/{release}/theme=addresses/type=*/*", 20 | "info_url": "https://docs.overturemaps.org/reference/addresses", 21 | "needs_validation": false 22 | }, 23 | "base": { 24 | "url_template": "s3://overturemaps-us-west-2/release/{release}/theme=base/type={subtype}/*", 25 | "info_url": "https://docs.overturemaps.org/reference/base", 26 | "needs_validation": false, 27 | "subtypes": [ 28 | "infrastructure", 29 | "land", 30 | "land_cover", 31 | "land_use", 32 | "water", 33 | "bathymetry" 34 | ] 35 | }, 36 | "divisions": { 37 | "url_template": "s3://overturemaps-us-west-2/release/{release}/theme=divisions/type=division_area/*", 38 | "info_url": "https://docs.overturemaps.org/reference/administrative", 39 | "needs_validation": false 40 | } 41 | }, 42 | "source_cooperative": { 43 | "vida_buildings": { 44 | "url": "s3://us-west-2.opendata.source.coop/vida/google-microsoft-osm-open-buildings/geoparquet/by_country/*/*.parquet", 45 | "info_url": "https://source.coop/vida/google-microsoft-osm-open-buildings", 46 | "needs_validation": false, 47 | "display_name": "VIDA Google/Microsoft/OSM Buildings" 48 | }, 49 | "microsoft_ml_roads": { 50 | "url": "s3://us-west-2.opendata.source.coop/nlebovits/microsoft-ml-road-detections/by_country/*/*.parquet", 51 | "info_url": "https://source.coop/nlebovits/microsoft-ml-road-detections", 52 | "needs_validation": false, 53 | "display_name": "Microsoft ML Road Detections" 54 | }, 55 | "globalbuildingatlas": { 56 | "url": "s3://us-west-2.opendata.source.coop/tge-labs/globalbuildingatlas-lod1/*.parquet", 57 | "info_url": "https://source.coop/tge-labs/globalbuildingatlas-lod1", 58 | "needs_validation": false, 59 | "display_name": "GlobalBuildingAtlas" 60 | }, 61 | "openbuildingmap": { 62 | "url": "s3://us-west-2.opendata.source.coop/tge-labs/openbuildingmap/*.parquet", 63 | "info_url": "https://source.coop/tge-labs/openbuildingmap", 64 | "needs_validation": false, 65 | "display_name": "OpenBuildingMap" 66 | }, 67 | "fsq_places_fused": { 68 | "url": "s3://us-west-2.opendata.source.coop/fused/fsq-os-places/2025-02-06/places/*.parquet", 69 | "info_url": "https://source.coop/fused/fsq-os-places", 70 | "needs_validation": false, 71 | "display_name": "Foursquare Open Source Places - Fused-partitioned" 72 | }, 73 | "us_structures": { 74 | "url": "s3://us-west-2.opendata.source.coop/wherobots/usa-structures/geoparquet/*.parquet", 75 | "info_url": "https://source.coop/wherobots/usa-structures/geoparquet", 76 | "needs_validation": false, 77 | "display_name": "US Structures from ORNL by Wherobots" 78 | }, 79 | "planet_eu_boundaries": { 80 | "url": "https://data.source.coop/planet/eu-field-boundaries/field_boundaries.parquet", 81 | "info_url": "https://source.coop/planet/eu-field-boundaries", 82 | "needs_validation": false, 83 | "display_name": "Planet EU Field Boundaries (2022)" 84 | }, 85 | "usda_crop": { 86 | "url": "https://data.source.coop/fiboa/us-usda-cropland/us_usda_cropland.parquet", 87 | "info_url": "https://source.coop/fiboa/us-usda-cropland", 88 | "needs_validation": false, 89 | "display_name": "USDA Crop Sequence Boundaries" 90 | }, 91 | "nhd_flowlines": { 92 | "url": "https://data.source.coop/cholmes/nhd/NHDFlowline.parquet", 93 | "info_url": "https://source.coop/cholmes/nhd", 94 | "needs_validation": true, 95 | "display_name": "NHD Flowlines (experimental)" 96 | } 97 | }, 98 | "openstreetmap": { 99 | "buildings": { 100 | "url": "https://data.openstreetmap.us/layercake/buildings.parquet", 101 | "info_url": "https://data.openstreetmap.us/", 102 | "needs_validation": false 103 | }, 104 | "boundaries": { 105 | "url": "https://data.openstreetmap.us/layercake/boundaries.parquet", 106 | "info_url": "https://data.openstreetmap.us/", 107 | "needs_validation": false 108 | }, 109 | "highways": { 110 | "url": "https://data.openstreetmap.us/layercake/highways.parquet", 111 | "info_url": "https://data.openstreetmap.us/", 112 | "needs_validation": false 113 | }, 114 | "settlements": { 115 | "url": "https://data.openstreetmap.us/layercake/settlements.parquet", 116 | "info_url": "https://data.openstreetmap.us/", 117 | "needs_validation": false 118 | } 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /gpq_downloader/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | import subprocess 4 | import sys 5 | import shutil 6 | from qgis.PyQt.QtWidgets import QProgressBar, QMessageBox 7 | from qgis.PyQt.QtCore import QCoreApplication, QTimer 8 | from qgis.core import QgsTask, QgsApplication, QgsSettings 9 | from qgis.utils import iface, loadPlugin, startPlugin, unloadPlugin, plugins 10 | 11 | from . import logger 12 | 13 | # Global flag to track installation status 14 | _duckdb_ready = False 15 | 16 | 17 | class DuckDBInstallerTask(QgsTask): 18 | def __init__(self, callback): 19 | # Simple initialization with just CanCancel flag 20 | super().__init__("Installing DuckDB", QgsTask.CanCancel) 21 | self.success = False 22 | self.message = "" 23 | self.exception = None 24 | self.callback = callback 25 | # logger.log("Task initialized") 26 | 27 | def run(self): 28 | # logger.log("Task run method started") 29 | try: 30 | logger.log("Starting DuckDB installation...") 31 | if platform.system() == "Windows": 32 | py_path = os.path.join(os.path.dirname(sys.executable), "python.exe") 33 | elif platform.system() == "Darwin": 34 | qgis_bin = os.path.dirname(sys.executable) 35 | possible_paths = [ 36 | os.path.join(qgis_bin, "python3"), 37 | os.path.join(qgis_bin, "bin", "python3"), 38 | os.path.join(qgis_bin, "Resources", "python", "bin", "python3"), 39 | ] 40 | py_path = next( 41 | (path for path in possible_paths if os.path.exists(path)), 42 | sys.executable, 43 | ) 44 | else: 45 | py_path = sys.executable 46 | 47 | # logger.log(f"Using Python path: {py_path}") 48 | # logger.log(f"Running pip install command...") 49 | 50 | subprocess.check_call([py_path, "-m", "pip", "install", "--user", "duckdb"]) 51 | 52 | # logger.log("Pip install completed, reloading modules...") 53 | import importlib 54 | 55 | importlib.invalidate_caches() 56 | 57 | self.success = True 58 | self.message = "DuckDB installed successfully" 59 | return True 60 | 61 | except subprocess.CalledProcessError as e: 62 | self.exception = e 63 | self.message = f"Pip install failed: {str(e)}" 64 | logger.log(f"Installation failed with error: {str(e)}") 65 | return False 66 | except Exception as e: 67 | self.exception = e 68 | self.message = f"Failed to install/upgrade DuckDB: {str(e)}" 69 | logger.log(f"Installation failed with error: {str(e)}", 2) 70 | return False 71 | 72 | def finished(self, result): 73 | global _duckdb_ready 74 | msg_bar = iface.messageBar() 75 | msg_bar.clearWidgets() 76 | 77 | if result and self.success: 78 | try: 79 | import duckdb 80 | 81 | self.message = f"DuckDB {duckdb.__version__} installed successfully" 82 | except ImportError: 83 | pass 84 | msg_bar.pushSuccess("Success", self.message) 85 | logger.log(self.message) 86 | _duckdb_ready = True 87 | if self.callback: 88 | self.callback() 89 | else: 90 | msg_bar.pushCritical("Error", self.message) 91 | logger.log(self.message) 92 | _duckdb_ready = False 93 | 94 | 95 | def ensure_duckdb(callback=None): 96 | try: 97 | import duckdb 98 | 99 | version = duckdb.__version__ 100 | from packaging import version as version_parser 101 | 102 | if version_parser.parse(version) >= version_parser.parse("1.1.0"): 103 | logger.log(f"DuckDB {version} already installed") 104 | global _duckdb_ready 105 | _duckdb_ready = True 106 | if callback: 107 | callback() 108 | return True 109 | else: 110 | logger.log(f"DuckDB {version} found but needs upgrade to 1.1.0+", 2) 111 | raise ImportError("Version too old") 112 | 113 | except ImportError: 114 | logger.log("DuckDB not found or needs upgrade, attempting to install/upgrade...", 2) 115 | try: 116 | msg_bar = iface.messageBar() 117 | progress = QProgressBar() 118 | progress.setMinimum(0) 119 | progress.setMaximum(0) 120 | progress.setValue(0) 121 | 122 | msg = msg_bar.createMessage("Installing DuckDB...") 123 | msg.layout().addWidget(progress) 124 | msg_bar.pushWidget(msg) 125 | QCoreApplication.processEvents() 126 | 127 | # Create and start the task 128 | task = DuckDBInstallerTask(callback) 129 | # logger.log("Created installer task") 130 | 131 | # Get the task manager and add the task 132 | task_manager = QgsApplication.taskManager() 133 | # logger.log(f"Task manager has {task_manager.count()} tasks") 134 | 135 | # Add task and check if it was added successfully 136 | task_manager.addTask(task) 137 | # logger.log(f"Task added successfully: {success}") 138 | 139 | # Check task status 140 | # logger.log(f"Task manager now has {task_manager.count()} tasks") 141 | # logger.log(f"Task description: {task.description()}") 142 | # logger.log(f"Task status: {task.status()}") 143 | 144 | # Schedule periodic status checks with guarded access 145 | def check_status(): 146 | try: 147 | status = task.status() 148 | except RuntimeError: 149 | # logger.log("Task has been deleted, stopping status checks") 150 | return 151 | 152 | # logger.log(f"Current task status: {status}") 153 | if status == QgsTask.Queued: 154 | # logger.log("Task still queued, retriggering...") 155 | try: 156 | QgsApplication.taskManager().triggerTask(task) 157 | except RuntimeError: 158 | logger.log("Failed to trigger task, object likely deleted") 159 | return 160 | QTimer.singleShot(1000, check_status) 161 | elif status == QgsTask.Running: 162 | # logger.log("Task is running") 163 | QTimer.singleShot(1000, check_status) 164 | elif status == QgsTask.Complete: 165 | logger.log("Task completed") 166 | 167 | # Start checking status after a short delay 168 | QTimer.singleShot(100, check_status) 169 | 170 | return True 171 | 172 | except Exception as e: 173 | msg_bar.clearWidgets() 174 | msg_bar.pushCritical("Error", f"Failed to install/upgrade DuckDB: {str(e)}", 2) 175 | logger.log(f"Failed to setup task with error: {str(e)}", 2) 176 | logger.log(f"Error type: {type(e)}", 2) 177 | import traceback 178 | 179 | logger.log(f"Traceback: {traceback.format_exc()}", 2) 180 | return False 181 | 182 | 183 | def classFactory(iface): 184 | """Load the plugin class.""" 185 | from .plugin import QgisPluginGeoParquet 186 | return QgisPluginGeoParquet(iface) 187 | 188 | 189 | -------------------------------------------------------------------------------- /gpq_downloader/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from unittest.mock import MagicMock, patch 3 | import os 4 | from qgis.core import QgsRectangle, QgsCoordinateReferenceSystem 5 | from pathlib import Path 6 | 7 | from gpq_downloader.utils import ( 8 | transform_bbox_to_4326, 9 | Worker, 10 | ValidationWorker 11 | ) 12 | 13 | # Add new test for file size estimation 14 | def test_estimate_file_size(mock_iface, sample_bbox, tmp_path): 15 | """Test file size estimation for GeoJSON output""" 16 | # Create mock connection and cursor 17 | mock_conn = MagicMock() 18 | mock_conn.execute.return_value.fetchone.side_effect = [ 19 | (1000,), # row count 20 | (2000.0,) # avg feature size 21 | ] 22 | 23 | # Create worker 24 | worker = Worker( 25 | "https://example.com/test.parquet", 26 | sample_bbox, 27 | str(tmp_path / "test.geojson"), 28 | mock_iface, 29 | {"has_bbox": True, "bbox_column": "bbox"} 30 | ) 31 | 32 | # Test size estimation 33 | estimated_size = worker.estimate_file_size(mock_conn, "test_table") 34 | assert estimated_size > 0 35 | assert isinstance(estimated_size, float) 36 | 37 | # Add test for process_schema_columns 38 | def test_process_schema_columns(): 39 | """Test schema column processing for different data types""" 40 | # Create worker 41 | worker = Worker( 42 | "https://example.com/test.parquet", 43 | QgsRectangle(0, 0, 1, 1), 44 | "test.parquet", 45 | MagicMock(), 46 | {"has_bbox": True} 47 | ) 48 | 49 | # Test different column types 50 | schema_result = [ 51 | ("id", "INTEGER", "YES", None, None, None), 52 | ("tags", "MAP(VARCHAR, VARCHAR)", "YES", None, None, None), 53 | ("names", "STRUCT(primary VARCHAR)", "YES", None, None, None), 54 | ("categories", "VARCHAR[]", "YES", None, None, None), 55 | ("small_num", "UTINYINT", "YES", None, None, None), 56 | ("geometry", "GEOMETRY", "YES", None, None, None) 57 | ] 58 | 59 | columns = worker.process_schema_columns(schema_result) 60 | 61 | assert len(columns) == 6 62 | assert 'TO_JSON("tags")' in columns[1] 63 | assert 'TO_JSON("names")' in columns[2] 64 | assert 'array_to_string("categories"' in columns[3] 65 | assert 'CAST("small_num" AS INTEGER)' in columns[4] 66 | 67 | # Add test for ValidationWorker metadata parsing 68 | @patch('duckdb.connect') 69 | def test_validation_worker_metadata_parsing(mock_connect, mock_iface): 70 | """Test GeoParquet metadata parsing in ValidationWorker""" 71 | # Mock connection with metadata 72 | mock_conn = MagicMock() 73 | mock_conn.execute.return_value.fetchall.return_value = [ 74 | (b"geo", b'{"columns":{"geometry":{"encoding":"WKB","geometry_types":["Point"],"covering":{"bbox":{"xmin":[0],"ymin":[0],"xmax":[1],"ymax":[1]}}}}}') 75 | ] 76 | mock_connect.return_value = mock_conn 77 | 78 | worker = ValidationWorker( 79 | "https://example.com/test.parquet", 80 | mock_iface, 81 | QgsRectangle(0, 0, 1, 1) 82 | ) 83 | 84 | # Test metadata parsing 85 | bbox_column = worker.check_bbox_metadata(mock_conn) 86 | assert bbox_column is not None 87 | 88 | # Add test for needs_validation method 89 | def test_validation_worker_needs_validation(): 90 | """Test needs_validation logic for different URLs""" 91 | worker = ValidationWorker( 92 | "https://example.com/test.parquet", 93 | MagicMock(), 94 | QgsRectangle(0, 0, 1, 1) 95 | ) 96 | 97 | # Test custom URL 98 | assert worker.needs_validation() == True 99 | 100 | # Test Overture URL 101 | worker.dataset_url = "s3://overturemaps-us-west-2/release/2025-10-22.0/theme=buildings" 102 | assert worker.needs_validation() == False 103 | 104 | # Test Source Cooperative URL with validation flag 105 | worker.PRESET_DATASETS = { 106 | "source_cooperative": { 107 | "test_dataset": { 108 | "url": "https://example.com/test.parquet", 109 | "needs_validation": False 110 | } 111 | } 112 | } 113 | worker.dataset_url = "https://example.com/test.parquet" 114 | assert worker.needs_validation() == False 115 | 116 | # Add test for transform_bbox_to_4326 with invalid inputs 117 | def test_transform_bbox_invalid_inputs(qgs_app): 118 | """Test bbox transformation with invalid inputs""" 119 | # Test with None extent 120 | assert transform_bbox_to_4326(None, QgsCoordinateReferenceSystem("EPSG:4326")) is None 121 | 122 | # Test with None CRS 123 | assert transform_bbox_to_4326(QgsRectangle(0, 0, 1, 1), None) is None 124 | 125 | # Test with invalid CRS 126 | invalid_crs = QgsCoordinateReferenceSystem() 127 | assert not invalid_crs.isValid() 128 | result = transform_bbox_to_4326(QgsRectangle(0, 0, 1, 1), invalid_crs) 129 | assert isinstance(result, QgsRectangle) 130 | 131 | # Add test for Worker initialization with layer name 132 | def test_worker_initialization_with_layer_name(mock_iface, sample_bbox, tmp_path): 133 | """Test Worker initialization with optional layer name""" 134 | worker = Worker( 135 | "https://example.com/test.parquet", 136 | sample_bbox, 137 | str(tmp_path / "test.parquet"), 138 | mock_iface, 139 | {"has_bbox": True}, 140 | layer_name="Test Layer" 141 | ) 142 | 143 | assert worker.layer_name == "Test Layer" 144 | assert not worker.size_warning_accepted 145 | assert not worker.killed 146 | 147 | def test_transform_bbox_to_4326(qgs_app): 148 | """Test transforming a bounding box to EPSG:4326""" 149 | # Create test bbox in EPSG:3857 150 | source_crs = QgsCoordinateReferenceSystem("EPSG:3857") 151 | input_bbox = QgsRectangle(1000000, 2000000, 1010000, 2010000) 152 | 153 | # Transform 154 | result_bbox = transform_bbox_to_4326(input_bbox, source_crs) 155 | 156 | # Check result is in 4326 157 | assert isinstance(result_bbox, QgsRectangle) 158 | assert result_bbox.xMinimum() != input_bbox.xMinimum() # Values should change after transform 159 | 160 | # Test when already in 4326 (no transformation needed) 161 | already_4326 = QgsRectangle(1, 2, 3, 4) 162 | result = transform_bbox_to_4326(already_4326, QgsCoordinateReferenceSystem("EPSG:4326")) 163 | assert result.xMinimum() == already_4326.xMinimum() 164 | 165 | def test_worker_initialization(mock_iface, sample_bbox, tmp_path, sample_validation_results): 166 | """Test Worker initialization""" 167 | # Create test parameters 168 | dataset_url = "https://example.com/test.parquet" 169 | output_file = os.path.join(tmp_path, "output.gpkg") 170 | 171 | # Initialize worker 172 | worker = Worker(dataset_url, sample_bbox, output_file, mock_iface, sample_validation_results) 173 | 174 | # Check properties 175 | assert worker.dataset_url == dataset_url 176 | assert worker.extent == sample_bbox 177 | assert worker.output_file == output_file 178 | assert worker.validation_results == sample_validation_results 179 | assert worker.killed is False 180 | 181 | def test_validation_worker_initialization(mock_iface, sample_bbox): 182 | """Test ValidationWorker initialization""" 183 | dataset_url = "https://example.com/test.parquet" 184 | 185 | # Initialize validation worker 186 | worker = ValidationWorker(dataset_url, mock_iface, sample_bbox) 187 | 188 | # Check properties 189 | assert worker.dataset_url == dataset_url 190 | assert worker.extent == sample_bbox 191 | assert worker.killed is False 192 | 193 | def test_transform_bbox_with_none(qgs_app): 194 | """Test transform_bbox_to_4326 with None input""" 195 | result = transform_bbox_to_4326(None, None) 196 | assert result is None 197 | 198 | @patch('duckdb.connect') 199 | def test_worker_error_handling(mock_connect, mock_iface, sample_bbox, tmp_path): 200 | """Test Worker error handling""" 201 | mock_connect.side_effect = Exception("Test error") 202 | 203 | # Create signals for testing 204 | error_message = None 205 | def on_error(msg): 206 | nonlocal error_message 207 | error_message = msg 208 | 209 | # Create worker 210 | worker = Worker( 211 | "https://example.com/test.parquet", 212 | sample_bbox, 213 | str(tmp_path / "test.parquet"), 214 | mock_iface, 215 | {"has_bbox": True, "bbox_column": "bbox"} 216 | ) 217 | worker.error.connect(on_error) 218 | 219 | # Run worker 220 | worker.run() 221 | 222 | assert error_message is not None 223 | assert "Test error" in error_message -------------------------------------------------------------------------------- /gpq_downloader/tests/create_test_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Create test data files for the GPQ Downloader plugin tests. 4 | 5 | This module can create: 6 | 1. Non-GeoParquet compliant parquet files (compatible parquet as per spec) 7 | 2. Standard GeoParquet files (with proper metadata) 8 | 3. Other test data as needed 9 | """ 10 | 11 | import pyarrow as pa 12 | import pyarrow.parquet as pq 13 | from shapely.geometry import LineString 14 | from shapely import wkb 15 | import pandas as pd 16 | 17 | 18 | def create_non_geoparquet_file(output_path="non_geoparquet_with_geometry.parquet"): 19 | """Create a parquet file with WKB geometry but no GeoParquet metadata.""" 20 | 21 | # Create sample LineString geometries representing street segments in San Francisco 22 | # Using approximate coordinates for real SF streets 23 | geometries = [ 24 | # Market Street segment 25 | LineString([(-122.4194, 37.7749), (-122.4184, 37.7759), (-122.4174, 37.7769)]), 26 | 27 | # Mission Street segment 28 | LineString([(-122.4180, 37.7600), (-122.4170, 37.7610), (-122.4160, 37.7620)]), 29 | 30 | # Geary Boulevard segment 31 | LineString([(-122.4650, 37.7810), (-122.4640, 37.7810), (-122.4630, 37.7810)]), 32 | 33 | # Van Ness Avenue segment 34 | LineString([(-122.4220, 37.7750), (-122.4220, 37.7760), (-122.4220, 37.7770)]), 35 | 36 | # Embarcadero segment 37 | LineString([(-122.3950, 37.7950), (-122.3940, 37.7940), (-122.3930, 37.7930)]), 38 | 39 | # Lombard Street segment (the famous crooked part) 40 | LineString([(-122.4186, 37.8021), (-122.4176, 37.8020), (-122.4166, 37.8019)]), 41 | 42 | # Golden Gate Park path 43 | LineString([(-122.4820, 37.7700), (-122.4810, 37.7700), (-122.4800, 37.7700)]) 44 | ] 45 | 46 | # Convert to WKB 47 | wkb_geometries = [wkb.dumps(geom) for geom in geometries] 48 | 49 | # Create other sample data 50 | data = { 51 | 'id': [1, 2, 3, 4, 5, 6, 7], 52 | 'name': [ 53 | 'Market St', 54 | 'Mission St', 55 | 'Geary Blvd', 56 | 'Van Ness Ave', 57 | 'Embarcadero', 58 | 'Lombard St', 59 | 'GG Park Trail' 60 | ], 61 | 'type': [ 62 | 'major_street', 63 | 'major_street', 64 | 'boulevard', 65 | 'avenue', 66 | 'waterfront', 67 | 'tourist_street', 68 | 'park_path' 69 | ], 70 | 'length_m': [250.5, 220.3, 180.7, 200.9, 190.1, 150.2, 210.4], 71 | 'geometry': wkb_geometries # WKB-encoded geometry column 72 | } 73 | 74 | # Create DataFrame 75 | df = pd.DataFrame(data) 76 | 77 | # Convert to PyArrow Table 78 | # Note: We're explicitly NOT adding any geo metadata 79 | table = pa.Table.from_pandas(df) 80 | 81 | # Write to Parquet file WITHOUT any geo metadata 82 | pq.write_table(table, output_path) 83 | 84 | print(f"Created non-GeoParquet file: {output_path}") 85 | print(f"Columns: {list(data.keys())}") 86 | print(f"Rows: {len(df)}") 87 | 88 | # Verify it has no geo metadata 89 | parquet_file = pq.ParquetFile(output_path) 90 | metadata = parquet_file.metadata 91 | 92 | # Check that there's no "geo" key in the metadata 93 | if metadata.metadata: 94 | metadata_dict = {k.decode(): v.decode() for k, v in metadata.metadata.items()} 95 | has_geo = 'geo' in metadata_dict 96 | print(f"Has 'geo' metadata: {has_geo}") 97 | else: 98 | print("No metadata present") 99 | 100 | return output_path 101 | 102 | 103 | def create_geoparquet_file(output_path="geoparquet_with_metadata.parquet"): 104 | """Create a proper GeoParquet file with geo metadata.""" 105 | 106 | # Same geometries as non-geoparquet version 107 | geometries = [ 108 | LineString([(-122.4194, 37.7749), (-122.4184, 37.7759), (-122.4174, 37.7769)]), 109 | LineString([(-122.4180, 37.7600), (-122.4170, 37.7610), (-122.4160, 37.7620)]), 110 | LineString([(-122.4650, 37.7810), (-122.4640, 37.7810), (-122.4630, 37.7810)]), 111 | LineString([(-122.4220, 37.7750), (-122.4220, 37.7760), (-122.4220, 37.7770)]), 112 | LineString([(-122.3950, 37.7950), (-122.3940, 37.7940), (-122.3930, 37.7930)]), 113 | LineString([(-122.4186, 37.8021), (-122.4176, 37.8020), (-122.4166, 37.8019)]), 114 | LineString([(-122.4820, 37.7700), (-122.4810, 37.7700), (-122.4800, 37.7700)]) 115 | ] 116 | 117 | # Convert to WKB 118 | wkb_geometries = [wkb.dumps(geom) for geom in geometries] 119 | 120 | # Create data 121 | data = { 122 | 'id': [1, 2, 3, 4, 5, 6, 7], 123 | 'name': [ 124 | 'Market St', 125 | 'Mission St', 126 | 'Geary Blvd', 127 | 'Van Ness Ave', 128 | 'Embarcadero', 129 | 'Lombard St', 130 | 'GG Park Trail' 131 | ], 132 | 'type': [ 133 | 'major_street', 134 | 'major_street', 135 | 'boulevard', 136 | 'avenue', 137 | 'waterfront', 138 | 'tourist_street', 139 | 'park_path' 140 | ], 141 | 'length_m': [250.5, 220.3, 180.7, 200.9, 190.1, 150.2, 210.4], 142 | 'geometry': wkb_geometries 143 | } 144 | 145 | df = pd.DataFrame(data) 146 | table = pa.Table.from_pandas(df) 147 | 148 | # Create GeoParquet metadata 149 | geo_metadata = { 150 | "version": "1.0.0", 151 | "primary_column": "geometry", 152 | "columns": { 153 | "geometry": { 154 | "encoding": "WKB", 155 | "geometry_types": ["LineString"], 156 | "crs": { 157 | "$schema": "https://proj.org/schemas/v0.6/projjson.schema.json", 158 | "type": "GeographicCRS", 159 | "name": "WGS 84", 160 | "datum": { 161 | "type": "GeodeticReferenceFrame", 162 | "name": "World Geodetic System 1984", 163 | "ellipsoid": { 164 | "name": "WGS 84", 165 | "semi_major_axis": 6378137, 166 | "inverse_flattening": 298.257223563 167 | } 168 | }, 169 | "coordinate_system": { 170 | "subtype": "ellipsoidal", 171 | "axis": [ 172 | { 173 | "name": "Geodetic longitude", 174 | "abbreviation": "Lon", 175 | "direction": "east", 176 | "unit": "degree" 177 | }, 178 | { 179 | "name": "Geodetic latitude", 180 | "abbreviation": "Lat", 181 | "direction": "north", 182 | "unit": "degree" 183 | } 184 | ] 185 | }, 186 | "id": { 187 | "authority": "EPSG", 188 | "code": 4326 189 | } 190 | } 191 | } 192 | } 193 | } 194 | 195 | # Convert metadata to JSON string 196 | import json 197 | geo_metadata_str = json.dumps(geo_metadata) 198 | 199 | # Create new metadata with geo key 200 | metadata = table.schema.metadata or {} 201 | metadata[b'geo'] = geo_metadata_str.encode('utf-8') 202 | 203 | # Create new table with metadata 204 | table = table.replace_schema_metadata(metadata) 205 | 206 | # Write GeoParquet file 207 | pq.write_table(table, output_path) 208 | 209 | print(f"Created GeoParquet file: {output_path}") 210 | print(f"Columns: {list(data.keys())}") 211 | print(f"Rows: {len(df)}") 212 | 213 | # Verify it has geo metadata 214 | parquet_file = pq.ParquetFile(output_path) 215 | metadata = parquet_file.metadata 216 | 217 | if metadata.metadata and b'geo' in metadata.metadata: 218 | print("Has 'geo' metadata: True") 219 | else: 220 | print("Has 'geo' metadata: False") 221 | 222 | return output_path 223 | 224 | 225 | if __name__ == "__main__": 226 | import os 227 | 228 | # Create data directory if it doesn't exist 229 | data_dir = os.path.join(os.path.dirname(__file__), 'data') 230 | os.makedirs(data_dir, exist_ok=True) 231 | 232 | # Create both types of files 233 | non_geo_path = os.path.join(data_dir, 'non_geoparquet_with_geometry.parquet') 234 | geo_path = os.path.join(data_dir, 'geoparquet_with_metadata.parquet') 235 | 236 | print("Creating test data files...") 237 | print("-" * 50) 238 | create_non_geoparquet_file(non_geo_path) 239 | print("-" * 50) 240 | create_geoparquet_file(geo_path) 241 | print("-" * 50) 242 | print("Test data creation complete!") -------------------------------------------------------------------------------- /gpq_downloader/tests/test_plugin.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import datetime 3 | from unittest.mock import MagicMock, patch, call 4 | from qgis.PyQt.QtWidgets import QAction, QProgressDialog, QMessageBox, QFileDialog, QDialog, QVBoxLayout, QLabel 5 | from qgis.core import QgsProject, QgsVectorLayer, QgsSettings, QgsCoordinateReferenceSystem, QgsRectangle 6 | from pathlib import Path 7 | from pytestqt import qtbot 8 | 9 | from gpq_downloader.plugin import QgisPluginGeoParquet 10 | from gpq_downloader.dialog import DataSourceDialog 11 | 12 | def test_plugin_run_with_active_download(qgs_app, mock_iface): 13 | """Test run method when a download is already in progress""" 14 | plugin = QgisPluginGeoParquet(mock_iface) 15 | plugin.worker = MagicMock() 16 | plugin.worker_thread = MagicMock() 17 | plugin.worker_thread.isRunning.return_value = True 18 | 19 | with patch('gpq_downloader.plugin.QMessageBox.warning') as mock_warning: 20 | plugin.run() 21 | mock_warning.assert_called_once() 22 | assert "Download in Progress" in mock_warning.call_args[0][1] 23 | 24 | @patch('gpq_downloader.plugin.DataSourceDialog') 25 | def test_plugin_run_dialog_rejected(mock_dialog, qgs_app, mock_iface): 26 | """Test run method when dialog is rejected""" 27 | plugin = QgisPluginGeoParquet(mock_iface) 28 | 29 | # Setup mock dialog 30 | dialog_instance = MagicMock() 31 | dialog_instance.exec.return_value = QDialog.Rejected 32 | mock_dialog.return_value = dialog_instance 33 | 34 | plugin.run() 35 | 36 | dialog_instance.exec.assert_called_once() 37 | assert plugin.worker is None 38 | assert plugin.worker_thread is None 39 | 40 | @patch('gpq_downloader.plugin.QgsSettings') 41 | @patch('gpq_downloader.plugin.QFileDialog.getSaveFileName') 42 | @patch('gpq_downloader.plugin.DataSourceDialog') 43 | def test_plugin_run_with_download(mock_dialog, mock_save_dialog, mock_settings, qgs_app, mock_iface, tmp_path): 44 | """Test run method with successful download setup""" 45 | plugin = QgisPluginGeoParquet(mock_iface) 46 | 47 | # Setup mock dialog 48 | dialog_instance = MagicMock() 49 | dialog_instance.exec.return_value = QDialog.Accepted 50 | dialog_instance.get_urls.return_value = ["https://example.com/test.parquet?theme=buildings"] 51 | dialog_instance.overture_radio.isChecked.return_value = True 52 | mock_dialog.return_value = dialog_instance 53 | 54 | # Setup mock save dialog 55 | output_file = str(tmp_path / "test.parquet") 56 | mock_save_dialog.return_value = (output_file, "GeoParquet (*.parquet)") 57 | 58 | # Setup mock settings 59 | mock_settings_instance = MagicMock() 60 | mock_settings.return_value = mock_settings_instance 61 | 62 | # Mock datetime to avoid timestamp issues 63 | with patch('gpq_downloader.plugin.datetime') as mock_datetime: 64 | mock_datetime.datetime.now.return_value.strftime.return_value = "20230101_120000" 65 | 66 | # Mock the process_download_queue method to avoid actual processing 67 | with patch.object(plugin, 'process_download_queue'): 68 | plugin.run() 69 | 70 | mock_save_dialog.assert_called_once() 71 | 72 | def test_plugin_handle_error(qgs_app, mock_iface): 73 | """Test error handling""" 74 | plugin = QgisPluginGeoParquet(mock_iface) 75 | plugin.progress_dialog = MagicMock() 76 | error_msg = "Test error" 77 | 78 | with patch('gpq_downloader.plugin.QMessageBox.critical') as mock_critical: 79 | plugin.handle_error(error_msg) 80 | mock_critical.assert_called_once() 81 | assert mock_critical.call_args[0][1] == "Error" or error_msg in mock_critical.call_args[0][1] 82 | plugin.progress_dialog.close.assert_called_once() 83 | 84 | def test_plugin_update_progress(qgs_app, mock_iface): 85 | """Test progress updates""" 86 | plugin = QgisPluginGeoParquet(mock_iface) 87 | plugin.progress_dialog = MagicMock() 88 | 89 | plugin.update_progress("Test progress") 90 | plugin.progress_dialog.setLabelText.assert_called_once_with("Test progress") 91 | 92 | def test_plugin_cancel_download(qgs_app, mock_iface): 93 | """Test download cancellation""" 94 | plugin = QgisPluginGeoParquet(mock_iface) 95 | plugin.worker = MagicMock() 96 | plugin.worker_thread = MagicMock() 97 | 98 | # Patch the cleanup_thread method to verify it's called 99 | with patch.object(plugin, 'cleanup_thread') as mock_cleanup: 100 | plugin.cancel_download() 101 | plugin.worker.kill.assert_called_once() 102 | mock_cleanup.assert_called_once() 103 | 104 | @patch('gpq_downloader.plugin.QgsVectorLayer') 105 | def test_plugin_load_layer_success(mock_vector_layer, qgs_app, mock_iface): 106 | """Test successful layer loading""" 107 | plugin = QgisPluginGeoParquet(mock_iface) 108 | 109 | # Setup mock layer 110 | mock_layer = MagicMock() 111 | mock_layer.isValid.return_value = True 112 | mock_vector_layer.return_value = mock_layer 113 | 114 | # Setup mock project 115 | mock_project = MagicMock() 116 | 117 | with patch('gpq_downloader.plugin.QgsProject.instance', return_value=mock_project): 118 | plugin.load_layer("test.gpkg") 119 | mock_project.addMapLayer.assert_called_once_with(mock_layer) 120 | 121 | @patch('gpq_downloader.plugin.QgsVectorLayer') 122 | def test_plugin_load_layer_invalid(mock_vector_layer, qgs_app, mock_iface): 123 | """Test loading invalid layer""" 124 | plugin = QgisPluginGeoParquet(mock_iface) 125 | 126 | # Setup mock layer 127 | mock_layer = MagicMock() 128 | mock_layer.isValid.return_value = False 129 | mock_vector_layer.return_value = mock_layer 130 | 131 | with patch('gpq_downloader.plugin.QMessageBox.critical') as mock_critical: 132 | plugin.load_layer("test.gpkg") 133 | mock_critical.assert_called_once() 134 | assert mock_critical.call_args[0][0] == mock_iface.mainWindow() 135 | assert mock_critical.call_args[0][1] == "Error" or "test.gpkg" in mock_critical.call_args[0][1] 136 | 137 | def test_plugin_show_info(qgs_app, mock_iface): 138 | """Test info message display""" 139 | plugin = QgisPluginGeoParquet(mock_iface) 140 | test_message = "Test info" 141 | 142 | with patch('gpq_downloader.plugin.QMessageBox.information') as mock_info: 143 | plugin.show_info(test_message) 144 | mock_info.assert_called_once() 145 | assert mock_info.call_args[0][0] == mock_iface.mainWindow() 146 | assert mock_info.call_args[0][1] == "Success" or test_message in mock_info.call_args[0][1] 147 | 148 | def test_plugin_initialization(qgs_app, mock_iface): 149 | """Test plugin initialization""" 150 | plugin = QgisPluginGeoParquet(mock_iface) 151 | assert plugin.iface == mock_iface 152 | assert plugin.worker is None 153 | assert plugin.worker_thread is None 154 | assert isinstance(plugin.download_dir, Path) 155 | 156 | def test_plugin_init_gui(qgs_app, mock_iface): 157 | """Test initGui method""" 158 | plugin = QgisPluginGeoParquet(mock_iface) 159 | plugin.initGui() 160 | 161 | # Check that action was created 162 | assert isinstance(plugin.action, QAction) 163 | assert plugin.action.text() == "Download GeoParquet Data" 164 | 165 | # Check that icon was added to toolbar 166 | assert len(mock_iface.toolbar_icons) == 1 167 | assert mock_iface.toolbar_icons[0] == plugin.action 168 | 169 | def test_plugin_unload(qgs_app, mock_iface): 170 | """Test plugin unload""" 171 | plugin = QgisPluginGeoParquet(mock_iface) 172 | plugin.initGui() # Add the icon first 173 | 174 | # Verify icon was added 175 | assert len(mock_iface.toolbar_icons) == 1 176 | 177 | # Mock worker thread to not be running 178 | plugin.worker_thread = MagicMock() 179 | plugin.worker_thread.isRunning.return_value = False 180 | 181 | # Unload plugin 182 | plugin.unload() 183 | 184 | # Check that icon was removed 185 | assert len(mock_iface.toolbar_icons) == 0 186 | 187 | @patch('gpq_downloader.plugin.QThread') 188 | def test_plugin_cleanup_thread(mock_thread, qgs_app, mock_iface): 189 | """Test thread cleanup""" 190 | plugin = QgisPluginGeoParquet(mock_iface) 191 | plugin.worker = MagicMock() 192 | plugin.worker_thread = MagicMock() 193 | 194 | plugin.cleanup_thread() 195 | assert plugin.worker is None 196 | assert plugin.worker_thread is None 197 | 198 | def test_handle_validation_complete_success(qgs_app, mock_iface, qtbot): 199 | plugin = QgisPluginGeoParquet(mock_iface) 200 | 201 | # Create a fake dialog and attach the expected attributes. 202 | fake_dialog = QDialog() 203 | qtbot.addWidget(fake_dialog) 204 | # Fake overture radio button; isChecked() returns True. 205 | fake_radio = MagicMock() 206 | fake_radio.isChecked.return_value = True 207 | fake_dialog.overture_radio = fake_radio 208 | 209 | # Fake overture combo box for theme selection. 210 | fake_combo = MagicMock() 211 | fake_combo.currentText.return_value = "castle" # Any theme other than "base" 212 | fake_dialog.overture_combo = fake_combo 213 | 214 | # Use a valid dummy extent (avoid using a MagicMock) 215 | dummy_extent = QgsRectangle(0, 0, 10, 10) 216 | 217 | # Patch the file dialog: simulate user clicking "Save" by returning a valid filename. 218 | with patch('qgis.PyQt.QtWidgets.QFileDialog.getSaveFileName', 219 | return_value=("test_output.parquet", "GeoParquet (*.parquet)")) as mock_save_dialog: 220 | plugin.download_and_save = MagicMock() 221 | 222 | plugin.handle_validation_complete( 223 | success=True, 224 | message="", 225 | validation_results={}, 226 | url="https://example.com/test.parquet", 227 | extent=dummy_extent, 228 | dialog=fake_dialog 229 | ) 230 | 231 | mock_save_dialog.assert_called_once() 232 | plugin.download_and_save.assert_called_once() 233 | 234 | def test_handle_validation_complete_cancel(qgs_app, mock_iface, qtbot): 235 | plugin = QgisPluginGeoParquet(mock_iface) 236 | 237 | # Create a fake dialog with the same expected attributes. 238 | fake_dialog = QDialog() 239 | qtbot.addWidget(fake_dialog) 240 | fake_radio = MagicMock() 241 | fake_radio.isChecked.return_value = True 242 | fake_dialog.overture_radio = fake_radio 243 | 244 | fake_combo = MagicMock() 245 | fake_combo.currentText.return_value = "castle" 246 | fake_dialog.overture_combo = fake_combo 247 | 248 | # Use a valid dummy extent instead of a MagicMock. 249 | dummy_extent = QgsRectangle(0, 0, 10, 10) 250 | 251 | # Simulate the file dialog being cancelled by returning empty strings. 252 | with patch('qgis.PyQt.QtWidgets.QFileDialog.getSaveFileName', 253 | return_value=("", "")) as mock_save_dialog: 254 | plugin.download_and_save = MagicMock() 255 | # Optionally, also patch the warning to confirm no warning is shown. 256 | with patch('gpq_downloader.plugin.QMessageBox.warning') as mock_warning: 257 | plugin.handle_validation_complete( 258 | success=True, 259 | message="", 260 | validation_results={}, 261 | url="https://example.com/test.parquet", 262 | extent=dummy_extent, 263 | dialog=fake_dialog 264 | ) 265 | mock_save_dialog.assert_called_once() 266 | plugin.download_and_save.assert_not_called() 267 | # In the cancel case, no warning message is expected. 268 | mock_warning.assert_not_called() 269 | 270 | def test_handle_validation_complete_failure(qgs_app, mock_iface): 271 | plugin = QgisPluginGeoParquet(mock_iface) 272 | 273 | with patch('gpq_downloader.plugin.QMessageBox.warning') as mock_warning: 274 | plugin.handle_validation_complete( 275 | success=False, 276 | message="Validation failed", 277 | validation_results={}, 278 | url="https://example.com/test.parquet", 279 | extent=MagicMock(), 280 | dialog=MagicMock() 281 | ) 282 | mock_warning.assert_called_once_with(mock_iface.mainWindow(), "Validation Error", "Validation failed") 283 | 284 | def test_create_progress_dialog(qgs_app, mock_iface): 285 | plugin = QgisPluginGeoParquet(mock_iface) 286 | progress_dialog = plugin.create_progress_dialog("Test Title", "Test Message") 287 | 288 | assert progress_dialog.windowTitle() == "Test Title" 289 | assert progress_dialog.labelText() == "Test Message" 290 | 291 | def test_setup_worker(qgs_app, mock_iface): 292 | plugin = QgisPluginGeoParquet(mock_iface) 293 | plugin.progress_dialog = MagicMock() # Ensure progress_dialog is initialized 294 | dataset_url = "https://example.com/test.parquet" 295 | extent = MagicMock() 296 | output_file = "output.parquet" 297 | validation_results = {"has_bbox": True} 298 | 299 | worker, worker_thread = plugin.setup_worker(dataset_url, extent, output_file, validation_results) 300 | 301 | assert worker is not None 302 | assert worker_thread is not None 303 | assert worker.dataset_url == dataset_url 304 | assert worker.extent == extent 305 | assert worker.output_file == output_file 306 | assert worker.validation_results == validation_results -------------------------------------------------------------------------------- /gpq_downloader/icons/parquet-download.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gpq_downloader/tests/test_non_geoparquet.py: -------------------------------------------------------------------------------- 1 | """Tests for handling non-GeoParquet compliant parquet files with geometry columns.""" 2 | 3 | import pytest 4 | from pathlib import Path 5 | from unittest.mock import MagicMock, patch, call 6 | import pyarrow.parquet as pq 7 | import duckdb 8 | import tempfile 9 | import os 10 | 11 | from gpq_downloader.utils import Worker, ValidationWorker 12 | 13 | 14 | class TestNonGeoParquetHandling: 15 | """Test handling of parquet files with geometry but no geo metadata.""" 16 | 17 | @pytest.fixture 18 | def test_data_path(self): 19 | """Path to test data directory.""" 20 | return Path(__file__).parent / "data" 21 | 22 | @pytest.fixture 23 | def non_geoparquet_file(self, test_data_path): 24 | """Path to non-GeoParquet test file.""" 25 | return test_data_path / "non_geoparquet_with_geometry.parquet" 26 | 27 | def test_non_geoparquet_file_exists(self, non_geoparquet_file): 28 | """Verify test file exists and has expected structure.""" 29 | assert non_geoparquet_file.exists(), f"Test file not found: {non_geoparquet_file}" 30 | 31 | # Verify file structure 32 | pf = pq.ParquetFile(non_geoparquet_file) 33 | schema = pf.schema 34 | 35 | # Check expected columns 36 | column_names = [field.name for field in schema] 37 | assert "geometry" in column_names 38 | assert "id" in column_names 39 | assert "name" in column_names 40 | 41 | # Verify no geo metadata 42 | metadata = pf.metadata.metadata 43 | if metadata: 44 | metadata_dict = {k.decode(): v.decode() for k, v in metadata.items()} 45 | assert "geo" not in metadata_dict 46 | 47 | @patch('gpq_downloader.utils.transform_bbox_to_4326') 48 | @patch('gpq_downloader.utils.duckdb.connect') 49 | def test_worker_handles_non_geoparquet(self, mock_connect, mock_transform_bbox, non_geoparquet_file, tmp_path): 50 | """Test that Worker can process non-GeoParquet files with geometry.""" 51 | # Mock connection 52 | mock_conn = MagicMock() 53 | mock_connect.return_value = mock_conn 54 | 55 | # Mock execute method to handle spatial extension loading 56 | def mock_execute(query): 57 | result = MagicMock() 58 | if "DESCRIBE SELECT" in query: 59 | # Schema query result 60 | result.fetchall.return_value = [ 61 | ('id', 'BIGINT', 'YES', None, None, None), 62 | ('name', 'VARCHAR', 'YES', None, None, None), 63 | ('type', 'VARCHAR', 'YES', None, None, None), 64 | ('length_m', 'DOUBLE', 'YES', None, None, None), 65 | ('geometry', 'BLOB', 'YES', None, None, None) # Geometry as BLOB, not WKB_BLOB 66 | ] 67 | elif "SELECT COUNT(*)" in query: 68 | # Count query result 69 | result.fetchone.return_value = (7,) 70 | else: 71 | # For other queries (INSTALL, LOAD, CREATE TABLE, etc.) 72 | result.fetchall.return_value = [] 73 | result.fetchone.return_value = None 74 | return result 75 | 76 | mock_conn.execute.side_effect = mock_execute 77 | 78 | # Mock transform_bbox_to_4326 to return a proper bbox for testing 79 | from qgis.core import QgsRectangle 80 | mock_bbox = QgsRectangle(-180, -90, 180, 90) # Global extent 81 | mock_transform_bbox.return_value = mock_bbox 82 | 83 | # Create worker with temp output file 84 | output_file = tmp_path / "test_output.parquet" 85 | 86 | # Mock iface 87 | mock_iface = MagicMock() 88 | mock_iface.mapCanvas.return_value.mapSettings.return_value.destinationCrs.return_value = MagicMock() 89 | 90 | # Mock validation results 91 | validation_results = { 92 | 'has_geometry': True, 93 | 'geometry_type': 'BLOB', 94 | 'total_features': 7 95 | } 96 | 97 | worker = Worker( 98 | dataset_url=f"file://{non_geoparquet_file}", 99 | extent=None, 100 | output_file=str(output_file), 101 | iface=mock_iface, 102 | validation_results=validation_results 103 | ) 104 | 105 | # Mock signals 106 | worker.progress = MagicMock() 107 | worker.error = MagicMock() 108 | worker.finished = MagicMock() 109 | 110 | # Run worker 111 | worker.run() 112 | 113 | # Verify spatial extension was loaded 114 | execute_calls = [call[0][0] for call in mock_conn.execute.call_args_list] 115 | assert any("INSTALL spatial" in call for call in execute_calls) 116 | assert any("LOAD spatial" in call for call in execute_calls) 117 | 118 | # Verify no errors 119 | worker.error.emit.assert_not_called() 120 | 121 | # Verify finished signal was emitted 122 | worker.finished.emit.assert_called_once() 123 | 124 | @patch('gpq_downloader.utils.transform_bbox_to_4326') 125 | @patch('gpq_downloader.utils.duckdb.connect') 126 | def test_non_geoparquet_spatial_query(self, mock_connect, mock_transform_bbox, non_geoparquet_file, tmp_path): 127 | """Test spatial filtering works without bbox column.""" 128 | mock_conn = MagicMock() 129 | mock_connect.return_value = mock_conn 130 | 131 | # Track all queries 132 | queries_executed = [] 133 | 134 | def mock_execute(query): 135 | queries_executed.append(query) 136 | result = MagicMock() 137 | if "DESCRIBE SELECT" in query: 138 | result.fetchall.return_value = [ 139 | ('geometry', 'BLOB', 'YES', None, None, None), 140 | ('id', 'BIGINT', 'YES', None, None, None), 141 | ('name', 'VARCHAR', 'YES', None, None, None) 142 | ] 143 | elif "SELECT COUNT(*)" in query: 144 | result.fetchone.return_value = (5,) 145 | else: 146 | result.fetchall.return_value = [] 147 | result.fetchone.return_value = None 148 | return result 149 | 150 | mock_conn.execute.side_effect = mock_execute 151 | 152 | # Mock transform_bbox_to_4326 to return the extent 153 | from qgis.core import QgsRectangle 154 | mock_bbox = QgsRectangle(-122.5, 37.7, -122.4, 37.8) 155 | mock_transform_bbox.return_value = mock_bbox 156 | 157 | # Create worker with bbox filter 158 | output_file = tmp_path / "test_output.parquet" 159 | 160 | # Mock iface 161 | mock_iface = MagicMock() 162 | mock_iface.mapCanvas.return_value.mapSettings.return_value.destinationCrs.return_value = MagicMock() 163 | 164 | # Create extent for bbox filter 165 | from qgis.core import QgsRectangle 166 | extent = QgsRectangle(-122.5, 37.7, -122.4, 37.8) # SF area 167 | 168 | # Mock validation results 169 | validation_results = { 170 | 'has_geometry': True, 171 | 'geometry_type': 'BLOB', 172 | 'total_features': 5 173 | } 174 | 175 | worker = Worker( 176 | dataset_url=f"file://{non_geoparquet_file}", 177 | extent=extent, 178 | output_file=str(output_file), 179 | iface=mock_iface, 180 | validation_results=validation_results 181 | ) 182 | 183 | # Mock signals 184 | worker.progress = MagicMock() 185 | worker.error = MagicMock() 186 | worker.finished = MagicMock() 187 | 188 | # Run worker 189 | worker.run() 190 | 191 | # For BLOB geometry columns, spatial filtering happens after conversion 192 | # So we should see the conversion happening in a separate step 193 | conversion_query = any( 194 | "ST_GeomFromWKB" in query and "CREATE TABLE" in query 195 | for query in queries_executed 196 | ) 197 | assert conversion_query, f"Expected geometry conversion for BLOB column. Queries: {queries_executed}" 198 | 199 | def test_duckdb_reads_non_geoparquet(self, non_geoparquet_file): 200 | """Test that DuckDB can actually read the non-GeoParquet file with spatial extension.""" 201 | conn = duckdb.connect() 202 | 203 | # Load spatial extension 204 | conn.execute("INSTALL spatial;") 205 | conn.execute("LOAD spatial;") 206 | 207 | # Read the file 208 | query = f"SELECT * FROM read_parquet('{non_geoparquet_file}')" 209 | result = conn.execute(query).fetchall() 210 | 211 | # Should have 7 rows 212 | assert len(result) == 7 213 | 214 | # Test geometry column can be converted from WKB 215 | geom_query = f""" 216 | SELECT 217 | id, 218 | name, 219 | ST_AsText(ST_GeomFromWKB(geometry)) as geom_wkt 220 | FROM read_parquet('{non_geoparquet_file}') 221 | LIMIT 1 222 | """ 223 | geom_result = conn.execute(geom_query).fetchone() 224 | 225 | assert geom_result is not None 226 | assert geom_result[0] == 1 # id 227 | assert geom_result[1] == 'Market St' # name 228 | assert 'LINESTRING' in geom_result[2] # geometry as WKT 229 | 230 | conn.close() 231 | 232 | @pytest.mark.integration 233 | @pytest.mark.skipif( 234 | os.environ.get('SKIP_INTEGRATION_TESTS', 'false').lower() == 'true', 235 | reason="Skipping integration tests" 236 | ) 237 | @patch('gpq_downloader.utils.transform_bbox_to_4326') 238 | def test_end_to_end_remote_non_geoparquet(self, mock_transform_bbox): 239 | """End-to-end test downloading and processing remote non-geoparquet file.""" 240 | dataset_url = "https://data.source.coop/cholmes/aois/non_geoparquet_with_geometry.parquet" 241 | 242 | # Create a temporary directory for output 243 | with tempfile.TemporaryDirectory() as temp_dir: 244 | output_file = os.path.join(temp_dir, "test_output.parquet") 245 | 246 | # Mock iface 247 | mock_iface = MagicMock() 248 | mock_canvas = MagicMock() 249 | mock_settings = MagicMock() 250 | mock_crs = MagicMock() 251 | 252 | # Setup the chain of mocks 253 | mock_iface.mapCanvas.return_value = mock_canvas 254 | mock_canvas.mapSettings.return_value = mock_settings 255 | mock_settings.destinationCrs.return_value = mock_crs 256 | mock_crs.authid.return_value = "EPSG:4326" 257 | 258 | # Create extent for filtering (San Francisco area) 259 | from qgis.core import QgsRectangle 260 | extent = QgsRectangle(-122.5, 37.7, -122.4, 37.8) 261 | 262 | # Mock transform_bbox_to_4326 to return the same extent (already in 4326) 263 | mock_transform_bbox.return_value = extent 264 | 265 | # Run validation manually to get results 266 | # Since we can't easily test the actual ValidationWorker with signals, 267 | # we'll validate using duckdb directly 268 | conn = duckdb.connect() 269 | conn.execute("INSTALL spatial;") 270 | conn.execute("LOAD spatial;") 271 | 272 | # Get schema 273 | schema_query = f"DESCRIBE SELECT * FROM read_parquet('{dataset_url}')" 274 | schema = conn.execute(schema_query).fetchall() 275 | 276 | # Check for geometry column 277 | has_geometry = False 278 | geometry_column = None 279 | for col_name, col_type, _, _, _, _ in schema: 280 | if col_name == 'geometry' or 'geom' in col_name.lower(): 281 | has_geometry = True 282 | geometry_column = col_name 283 | break 284 | 285 | # Check for bbox metadata 286 | has_bbox = False 287 | bbox_column = None 288 | try: 289 | metadata_query = f"SELECT key, value FROM parquet_kv_metadata('{dataset_url}')" 290 | metadata_results = conn.execute(metadata_query).fetchall() 291 | for key, value in metadata_results: 292 | if key == b"geo": 293 | has_bbox = True # Would need more parsing to get actual bbox column 294 | break 295 | except: 296 | pass 297 | 298 | conn.close() 299 | 300 | # Create validation results based on our checks 301 | validation_results = { 302 | 'has_geometry': has_geometry, 303 | 'geometry_column': geometry_column, 304 | 'has_bbox': has_bbox, 305 | 'bbox_column': bbox_column, 306 | 'schema': schema 307 | } 308 | 309 | # Now run the worker with validation results 310 | worker = Worker( 311 | dataset_url=dataset_url, 312 | extent=extent, 313 | output_file=output_file, 314 | iface=mock_iface, 315 | validation_results=validation_results 316 | ) 317 | 318 | # Mock signals for worker 319 | worker.finished = MagicMock() 320 | worker.error = MagicMock() 321 | worker.progress = MagicMock() 322 | worker.percent = MagicMock() 323 | worker.info = MagicMock() 324 | worker.load_layer = MagicMock() 325 | worker.file_size_warning = MagicMock() 326 | 327 | # Run worker 328 | worker.run() 329 | 330 | # The worker should succeed or at least handle the spatial extension issue gracefully 331 | # For now, let's check if it tried to load the spatial extension 332 | if worker.error.emit.called: 333 | error_message = worker.error.emit.call_args[0][0] 334 | # This is actually revealing a bug - the spatial extension isn't being loaded properly 335 | print(f"Worker encountered error: {error_message}") 336 | # The test should now pass without this error 337 | assert False, f"Worker should not encounter spatial extension error: {error_message}" 338 | 339 | # Check finished signal was emitted 340 | worker.finished.emit.assert_called_once() 341 | 342 | # Verify output file was created 343 | assert os.path.exists(output_file) 344 | 345 | # Verify the output is valid GeoParquet 346 | conn = duckdb.connect() 347 | conn.execute("INSTALL spatial;") 348 | conn.execute("LOAD spatial;") 349 | 350 | # Check we can read the output file 351 | result = conn.execute(f"SELECT COUNT(*) FROM read_parquet('{output_file}')").fetchone() 352 | assert result[0] > 0 # Should have filtered some features 353 | 354 | # Check geometry column exists and is valid 355 | schema_result = conn.execute(f"DESCRIBE SELECT * FROM read_parquet('{output_file}')").fetchall() 356 | column_names = [row[0] for row in schema_result] 357 | assert 'geometry' in column_names 358 | 359 | # Check we can read geometry 360 | # First check what type the geometry column is 361 | geom_col_type = None 362 | for row in schema_result: 363 | if row[0] == 'geometry': 364 | geom_col_type = row[1] 365 | break 366 | 367 | # If it's already GEOMETRY type, don't use ST_GeomFromWKB 368 | if geom_col_type and 'GEOMETRY' in geom_col_type.upper(): 369 | geom_result = conn.execute(f""" 370 | SELECT ST_AsText(geometry) as wkt 371 | FROM read_parquet('{output_file}') 372 | LIMIT 1 373 | """).fetchone() 374 | else: 375 | # It's still BLOB, so convert it 376 | geom_result = conn.execute(f""" 377 | SELECT ST_AsText(ST_GeomFromWKB(geometry)) as wkt 378 | FROM read_parquet('{output_file}') 379 | LIMIT 1 380 | """).fetchone() 381 | 382 | assert geom_result is not None 383 | assert 'LINESTRING' in geom_result[0] or 'POINT' in geom_result[0] or 'POLYGON' in geom_result[0] 384 | 385 | conn.close() -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc. 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense, or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free 248 | Software Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. -------------------------------------------------------------------------------- /gpq_downloader/dialog.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import requests 4 | 5 | from qgis.PyQt.QtWidgets import ( 6 | QMessageBox, 7 | QDialog, 8 | QVBoxLayout, 9 | QHBoxLayout, 10 | QLabel, 11 | QLineEdit, 12 | QPushButton, 13 | QComboBox, 14 | QProgressDialog, 15 | QRadioButton, 16 | QStackedWidget, 17 | QWidget, 18 | QCheckBox, 19 | ) 20 | from qgis.PyQt.QtCore import pyqtSignal, Qt, QThread 21 | from qgis.core import QgsSettings 22 | import os 23 | from .utils import ValidationWorker 24 | 25 | 26 | class DataSourceDialog(QDialog): 27 | validation_complete = pyqtSignal(bool, str, dict) 28 | 29 | def __init__(self, parent=None, iface=None): 30 | super().__init__(parent) 31 | self.iface = iface 32 | self.validation_thread = None 33 | self.validation_worker = None 34 | self.progress_message = None 35 | self.requires_validation = True 36 | self.setWindowTitle("GeoParquet Data Source") 37 | self.setMinimumWidth(500) 38 | 39 | 40 | base_path = os.path.dirname(os.path.abspath(__file__)) 41 | presets_path = os.path.join(base_path, "data", "presets.json") 42 | with open(presets_path, "r") as f: 43 | self.PRESET_DATASETS = json.load(f) 44 | 45 | # Create main layout 46 | layout = QVBoxLayout() 47 | 48 | # Create horizontal layout for radio buttons 49 | radio_layout = QHBoxLayout() 50 | 51 | # Create radio buttons 52 | self.overture_radio = QRadioButton("Overture Maps") 53 | self.sourcecoop_radio = QRadioButton("Source Cooperative") 54 | self.osm_radio = QRadioButton("OpenStreetMap") 55 | self.custom_radio = QRadioButton("Custom URL") 56 | 57 | # Add radio buttons to horizontal layout 58 | radio_layout.addWidget(self.overture_radio) 59 | radio_layout.addWidget(self.sourcecoop_radio) 60 | radio_layout.addWidget(self.osm_radio) 61 | radio_layout.addWidget(self.custom_radio) 62 | 63 | # Connect to save state 64 | self.overture_radio.released.connect(self.save_radio_button_state) 65 | self.sourcecoop_radio.released.connect(self.save_radio_button_state) 66 | self.osm_radio.released.connect(self.save_radio_button_state) 67 | self.custom_radio.released.connect(self.save_radio_button_state) 68 | 69 | # Add radio button layout to main layout 70 | layout.addLayout(radio_layout) 71 | 72 | # Add some spacing between radio buttons and content 73 | layout.addSpacing(10) 74 | 75 | # Create and setup the stacked widget for different options 76 | self.stack = QStackedWidget() 77 | 78 | # Custom URL page 79 | custom_page = QWidget() 80 | custom_layout = QVBoxLayout() 81 | self.url_input = QLineEdit() 82 | self.url_input.setPlaceholderText( 83 | "Enter URL to Parquet file or folder (s3:// or https://)" 84 | ) 85 | custom_layout.addWidget(self.url_input) 86 | custom_page.setLayout(custom_layout) 87 | 88 | # Overture Maps page 89 | overture_page = QWidget() 90 | overture_layout = QVBoxLayout() 91 | 92 | # Create horizontal layout for main checkboxes 93 | checkbox_layout = QHBoxLayout() 94 | 95 | # Create a widget to hold checkboxes 96 | self.overture_checkboxes = {} 97 | for key in self.PRESET_DATASETS['overture'].keys(): 98 | if key != 'base': # Handle base separately 99 | checkbox = QCheckBox(key.title()) 100 | self.overture_checkboxes[key] = checkbox 101 | checkbox_layout.addWidget(checkbox) 102 | 103 | # Add the horizontal checkbox layout to main layout 104 | overture_layout.addLayout(checkbox_layout) 105 | 106 | # Add base layer section 107 | base_group = QWidget() 108 | base_layout = QVBoxLayout() 109 | base_layout.setContentsMargins(0, 10, 0, 0) # Add some top margin 110 | 111 | self.base_checkbox = QCheckBox("Base") 112 | self.overture_checkboxes['base'] = self.base_checkbox 113 | base_layout.addWidget(self.base_checkbox) 114 | 115 | # Add base subtype checkboxes 116 | self.base_subtype_widget = QWidget() 117 | base_subtype_layout = QHBoxLayout() # Horizontal layout for subtypes 118 | base_subtype_layout.setContentsMargins(20, 0, 0, 0) # Add left margin for indentation 119 | 120 | # Replace combo box with checkboxes 121 | self.base_subtype_checkboxes = {} 122 | subtype_display_names = { 123 | 'infrastructure': 'Infrastructure', 124 | 'land': 'Land', 125 | 'land_cover': 'Land Cover', 126 | 'land_use': 'Land Use', 127 | 'water': 'Water', 128 | 'bathymetry': 'Bathymetry' 129 | } 130 | 131 | for subtype in self.PRESET_DATASETS['overture']['base']['subtypes']: 132 | checkbox = QCheckBox(subtype_display_names[subtype]) 133 | self.base_subtype_checkboxes[subtype] = checkbox 134 | base_subtype_layout.addWidget(checkbox) 135 | 136 | self.base_subtype_widget.setLayout(base_subtype_layout) 137 | self.base_subtype_widget.hide() 138 | 139 | base_layout.addWidget(self.base_subtype_widget) 140 | base_group.setLayout(base_layout) 141 | overture_layout.addWidget(base_group) 142 | 143 | # Connect base checkbox to show/hide subtype checkboxes and resize dialog 144 | self.base_checkbox.toggled.connect(self.base_subtype_widget.setVisible) 145 | self.base_checkbox.toggled.connect(lambda checked: self.adjust_dialog_width(checked, 100)) 146 | 147 | 148 | overture_page.setLayout(overture_layout) 149 | 150 | # Source Cooperative page 151 | sourcecoop_page = QWidget() 152 | sourcecoop_layout = QVBoxLayout() 153 | self.sourcecoop_combo = QComboBox() 154 | self.sourcecoop_combo.addItems( 155 | sorted([ 156 | dataset["display_name"] 157 | for dataset in self.PRESET_DATASETS["source_cooperative"].values() 158 | ], key=str.lower) 159 | ) 160 | sourcecoop_layout.addWidget(self.sourcecoop_combo) 161 | 162 | # Add link label 163 | self.sourcecoop_link = QLabel() 164 | self.sourcecoop_link.setOpenExternalLinks(True) 165 | self.sourcecoop_link.setWordWrap(True) 166 | sourcecoop_layout.addWidget(self.sourcecoop_link) 167 | 168 | # Connect combo box change to update link 169 | self.sourcecoop_combo.currentTextChanged.connect(self.update_sourcecoop_link) 170 | sourcecoop_page.setLayout(sourcecoop_layout) 171 | 172 | # OpenStreetMap page 173 | osm_page = QWidget() 174 | osm_layout = QVBoxLayout() 175 | 176 | # Create horizontal layout for checkboxes 177 | osm_checkbox_layout = QHBoxLayout() 178 | 179 | # Create checkboxes for OSM datasets 180 | self.osm_checkboxes = {} 181 | for key in self.PRESET_DATASETS['openstreetmap'].keys(): 182 | checkbox = QCheckBox(key.title()) 183 | self.osm_checkboxes[key] = checkbox 184 | osm_checkbox_layout.addWidget(checkbox) 185 | 186 | # Add the horizontal checkbox layout to main layout 187 | osm_layout.addLayout(osm_checkbox_layout) 188 | 189 | # Add link label for LayerCake info 190 | self.osm_link = QLabel() 191 | self.osm_link.setText( 192 | 'Data from LayerCake GeoParquet files' 193 | ) 194 | self.osm_link.setOpenExternalLinks(True) 195 | self.osm_link.setWordWrap(True) 196 | osm_layout.addWidget(self.osm_link) 197 | 198 | osm_page.setLayout(osm_layout) 199 | 200 | # Add pages to stack 201 | self.stack.addWidget(custom_page) 202 | self.stack.addWidget(overture_page) 203 | self.stack.addWidget(sourcecoop_page) 204 | self.stack.addWidget(osm_page) 205 | 206 | layout.addWidget(self.stack) 207 | 208 | # Buttons 209 | button_layout = QHBoxLayout() 210 | self.ok_button = QPushButton("OK") 211 | self.cancel_button = QPushButton("Cancel") 212 | button_layout.addWidget(self.ok_button) 213 | button_layout.addWidget(self.cancel_button) 214 | layout.addLayout(button_layout) 215 | 216 | self.setLayout(layout) 217 | 218 | # Connect signals 219 | self.custom_radio.toggled.connect(lambda: self.stack.setCurrentIndex(0)) 220 | self.overture_radio.toggled.connect(lambda: self.stack.setCurrentIndex(1)) 221 | self.sourcecoop_radio.toggled.connect(lambda: self.stack.setCurrentIndex(2)) 222 | self.osm_radio.toggled.connect(lambda: self.stack.setCurrentIndex(3)) 223 | self.ok_button.clicked.connect(self.validate_and_accept) 224 | self.cancel_button.clicked.connect(self.reject) 225 | 226 | # Add after setting up the sourcecoop_combo 227 | self.update_sourcecoop_link(self.sourcecoop_combo.currentText()) 228 | 229 | # Load checkbox states during initialization 230 | self.load_checkbox_states() 231 | 232 | # Connect each checkbox to save its state when toggled 233 | for checkbox in self.overture_checkboxes.values(): 234 | checkbox.toggled.connect(self.save_checkbox_states) 235 | for checkbox in self.base_subtype_checkboxes.values(): 236 | checkbox.toggled.connect(self.save_checkbox_states) 237 | for checkbox in self.osm_checkboxes.values(): 238 | checkbox.toggled.connect(self.save_checkbox_states) 239 | 240 | # Ensure to call save_checkbox_states when the dialog is accepted 241 | self.ok_button.clicked.connect(self.save_checkbox_states) 242 | 243 | def save_radio_button_state(self) -> None: 244 | if self.custom_radio.isChecked(): 245 | button_name = self.custom_radio.text() 246 | elif self.overture_radio.isChecked(): 247 | button_name = self.overture_radio.text() 248 | elif self.sourcecoop_radio.isChecked(): 249 | button_name = self.sourcecoop_radio.text() 250 | elif self.osm_radio.isChecked(): 251 | button_name = self.osm_radio.text() 252 | else: 253 | button_name = self.custom_radio.text() 254 | 255 | QgsSettings().setValue( 256 | "gpq_downloader/radio_selection", 257 | button_name, 258 | section=QgsSettings.Plugins, 259 | ) 260 | 261 | def handle_overture_selection(self, text): 262 | """Show/hide base subtype combo based on selection""" 263 | self.base_subtype_widget.setVisible(text == "Base") 264 | 265 | def validate_and_accept(self): 266 | """Validate the input and accept the dialog if valid""" 267 | urls = self.get_urls() 268 | if not urls: 269 | QMessageBox.warning(self, "Validation Error", "Please select at least one dataset") 270 | return 271 | 272 | # For Overture and OSM datasets, we know they're valid so we can skip validation 273 | if self.overture_radio.isChecked() or self.osm_radio.isChecked(): 274 | self.accept() 275 | return 276 | 277 | # For custom URLs, do validation 278 | if self.custom_radio.isChecked(): 279 | for url in urls: 280 | if not (url.startswith('http://') or url.startswith('https://') or 281 | url.startswith('s3://') or url.startswith('file://') or url.startswith('hf://')): 282 | QMessageBox.warning(self, "Validation Error", 283 | "URL must start with http://, https://, s3://, hf://, or file://") 284 | return 285 | 286 | # Create progress dialog for validation 287 | self.progress_dialog = QProgressDialog("Validating URL...", "Cancel", 0, 0, self) 288 | self.progress_dialog.setWindowModality(Qt.WindowModality.WindowModal) 289 | self.progress_dialog.canceled.connect(self.cancel_validation) 290 | 291 | # Create validation worker 292 | self.validation_worker = ValidationWorker(url, self.iface, self.iface.mapCanvas().extent()) 293 | self.validation_thread = QThread() 294 | self.validation_worker.moveToThread(self.validation_thread) 295 | 296 | # Connect signals 297 | self.validation_thread.started.connect(self.validation_worker.run) 298 | self.validation_worker.progress.connect(self.progress_dialog.setLabelText) 299 | self.validation_worker.finished.connect( 300 | lambda success, message, results: self.handle_validation_result( 301 | success, message, results 302 | ) 303 | ) 304 | self.validation_worker.needs_bbox_warning.connect(self.show_bbox_warning) 305 | 306 | # Start validation 307 | self.validation_thread.start() 308 | self.progress_dialog.exec() 309 | return 310 | 311 | # For other preset sources, we can skip validation 312 | self.accept() 313 | 314 | def handle_validation_result(self, success, message, validation_results): 315 | """Handle validation result in the dialog""" 316 | self.cleanup_validation() 317 | 318 | if success: 319 | self.validation_complete.emit(True, message, validation_results) 320 | self.accept() 321 | else: 322 | QMessageBox.warning(self, "Validation Error", message) 323 | self.validation_complete.emit(False, message, validation_results) 324 | 325 | def cancel_validation(self): 326 | """Handle validation cancellation""" 327 | if self.validation_worker: 328 | self.validation_worker.killed = True 329 | self.cleanup_validation() 330 | 331 | def cleanup_validation(self): 332 | """Clean up validation resources""" 333 | if hasattr(self, 'progress_dialog') and self.progress_dialog: 334 | self.progress_dialog.close() 335 | self.progress_dialog = None 336 | 337 | if self.validation_worker: 338 | self.validation_worker.deleteLater() 339 | self.validation_worker = None 340 | 341 | if self.validation_thread: 342 | self.validation_thread.quit() 343 | self.validation_thread.wait() 344 | self.validation_thread.deleteLater() 345 | self.validation_thread = None 346 | 347 | def closeEvent(self, event): 348 | """Handle dialog closing""" 349 | self.cleanup_validation() 350 | super().closeEvent(event) 351 | 352 | def get_urls(self): 353 | """Returns a list of URLs for selected datasets""" 354 | urls = [] 355 | if self.custom_radio.isChecked(): 356 | return [self.url_input.text().strip()] 357 | elif self.overture_radio.isChecked(): 358 | latest_release = requests.get('https://labs.overturemaps.org/data/releases.json').json()['latest'] 359 | 360 | for theme, checkbox in self.overture_checkboxes.items(): 361 | if checkbox.isChecked(): 362 | dataset = self.PRESET_DATASETS['overture'][theme] 363 | if theme == "transportation": 364 | type_str = "segment" 365 | elif theme == "divisions": 366 | type_str = "division_area" 367 | elif theme == "addresses": 368 | type_str = "*" 369 | elif theme == "base": 370 | # Handle multiple base subtypes 371 | for subtype, subtype_checkbox in self.base_subtype_checkboxes.items(): 372 | if subtype_checkbox.isChecked(): 373 | urls.append(dataset['url_template'].format(subtype=subtype, release=latest_release)) 374 | continue # Skip the normal URL append for base 375 | else: 376 | type_str = theme.rstrip('s') # remove trailing 's' for singular form 377 | urls.append(dataset['url_template'].format(subtype=type_str, release=latest_release)) 378 | elif self.sourcecoop_radio.isChecked(): 379 | selection = self.sourcecoop_combo.currentText() 380 | dataset = next((dataset for dataset in self.PRESET_DATASETS['source_cooperative'].values() 381 | if dataset['display_name'] == selection), None) 382 | return [dataset['url']] if dataset else [] 383 | elif self.osm_radio.isChecked(): 384 | for layer, checkbox in self.osm_checkboxes.items(): 385 | if checkbox.isChecked(): 386 | dataset = self.PRESET_DATASETS['openstreetmap'][layer] 387 | urls.append(dataset['url']) 388 | return urls 389 | 390 | def update_sourcecoop_link(self, selection): 391 | """Update the link based on the selected dataset""" 392 | # Find the dataset by display_name 393 | dataset = next((dataset for dataset in self.PRESET_DATASETS['source_cooperative'].values() 394 | if dataset['display_name'] == selection), None) 395 | if dataset and 'info_url' in dataset: 396 | self.sourcecoop_link.setText( 397 | f'View dataset info' 398 | ) 399 | else: 400 | self.sourcecoop_link.setText("") 401 | 402 | 403 | def show_bbox_warning(self): 404 | """Show bbox warning dialog in main thread""" 405 | # Close the progress dialog if it exists 406 | if hasattr(self, "progress_dialog") and self.progress_dialog: 407 | self.progress_dialog.close() 408 | self.progress_dialog = None 409 | 410 | reply = QMessageBox.warning( 411 | self, 412 | "No bbox Column Detected", 413 | "This dataset doesn't have a bbox column, which means downloads will be slower. " 414 | "GeoParquet 1.1 files with a bbox column work much better - tell your data provider to upgrade!\n\n" 415 | "Do you want to continue with the download?", 416 | QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No, 417 | QMessageBox.StandardButton.No, 418 | ) 419 | 420 | validation_results = {"has_bbox": False, "schema": None, "bbox_column": None, "geometry_column": "geometry"} 421 | if reply == QMessageBox.StandardButton.No: 422 | self.validation_complete.emit( 423 | False, "Download cancelled by user.", validation_results 424 | ) 425 | else: 426 | # Accept the dialog when user clicks Yes 427 | self.validation_complete.emit( 428 | True, "Validation successful", validation_results 429 | ) 430 | self.accept() 431 | 432 | def adjust_dialog_width(self, checked, width): 433 | """Adjust the dialog width based on the base checkbox state.""" 434 | if checked: 435 | self.resize(self.width() + width, self.height()) 436 | else: 437 | self.resize(self.width() - width, self.height()) 438 | 439 | def save_checkbox_states(self) -> None: 440 | # Save main checkboxes 441 | for key, checkbox in self.overture_checkboxes.items(): 442 | QgsSettings().setValue( 443 | f"gpq_downloader/checkbox_{key}", 444 | checkbox.isChecked(), 445 | section=QgsSettings.Plugins, 446 | ) 447 | 448 | # Save base subtype checkboxes 449 | for key, checkbox in self.base_subtype_checkboxes.items(): 450 | QgsSettings().setValue( 451 | f"gpq_downloader/base_subtype_checkbox_{key}", 452 | checkbox.isChecked(), 453 | section=QgsSettings.Plugins, 454 | ) 455 | 456 | # Save OSM checkboxes 457 | for key, checkbox in self.osm_checkboxes.items(): 458 | QgsSettings().setValue( 459 | f"gpq_downloader/osm_checkbox_{key}", 460 | checkbox.isChecked(), 461 | section=QgsSettings.Plugins, 462 | ) 463 | 464 | def load_checkbox_states(self) -> None: 465 | # Load main checkboxes 466 | for key, checkbox in self.overture_checkboxes.items(): 467 | checked = QgsSettings().value( 468 | f"gpq_downloader/checkbox_{key}", 469 | False, 470 | type=bool, 471 | section=QgsSettings.Plugins, 472 | ) 473 | checkbox.setChecked(checked) 474 | 475 | # Load base subtype checkboxes 476 | for key, checkbox in self.base_subtype_checkboxes.items(): 477 | checked = QgsSettings().value( 478 | f"gpq_downloader/base_subtype_checkbox_{key}", 479 | False, 480 | type=bool, 481 | section=QgsSettings.Plugins, 482 | ) 483 | checkbox.setChecked(checked) 484 | 485 | # Load OSM checkboxes 486 | for key, checkbox in self.osm_checkboxes.items(): 487 | checked = QgsSettings().value( 488 | f"gpq_downloader/osm_checkbox_{key}", 489 | False, 490 | type=bool, 491 | section=QgsSettings.Plugins, 492 | ) 493 | checkbox.setChecked(checked) 494 | 495 | # Update base subtype widget visibility based on base checkbox state 496 | self.base_subtype_widget.setVisible(self.base_checkbox.isChecked()) 497 | 498 | def on_validation_finished(self, success, message, results): 499 | # This method should handle the validation results 500 | # Check how it's setting validation_results 501 | pass 502 | -------------------------------------------------------------------------------- /gpq_downloader/plugin.py: -------------------------------------------------------------------------------- 1 | from qgis.PyQt.QtWidgets import ( 2 | QAction, 3 | QFileDialog, 4 | QMessageBox, 5 | QDialog, 6 | QVBoxLayout, 7 | QHBoxLayout, 8 | QLabel, 9 | QPushButton, 10 | QComboBox, 11 | QProgressDialog, 12 | QCheckBox, 13 | QWidget, 14 | QLineEdit, 15 | ) 16 | from qgis.PyQt.QtGui import QIcon 17 | from qgis.PyQt.QtCore import Qt, QThread 18 | from qgis.core import QgsProject, QgsVectorLayer, QgsSettings 19 | import os 20 | import datetime 21 | from pathlib import Path 22 | 23 | from .dialog import DataSourceDialog 24 | from .utils import Worker 25 | 26 | 27 | class QgisPluginGeoParquet: 28 | def __init__(self, iface): 29 | self.iface = iface 30 | self.worker = None 31 | self.worker_thread = None 32 | self.action = None 33 | self.output_file = None 34 | # Create a default downloads directory in user's home directory 35 | self.download_dir = Path.home() / "Downloads" 36 | # Create the directory if it doesn't exist 37 | self.download_dir.mkdir(parents=True, exist_ok=True) 38 | 39 | def initGui(self): 40 | # Create the action with the icon and tooltip 41 | base_path = os.path.dirname(os.path.abspath(__file__)) 42 | icon_path = os.path.join(base_path, "icons", "parquet-download.svg") 43 | self.action = QAction( 44 | QIcon(icon_path), "Download GeoParquet Data", self.iface.mainWindow() 45 | ) 46 | self.action.setToolTip("Download GeoParquet Data") 47 | self.action.triggered.connect(self.run) 48 | 49 | # Add the actions to the toolbar 50 | self.iface.addToolBarIcon(self.action) 51 | 52 | def unload(self): 53 | # Clean up worker and thread when plugin is unloaded 54 | if self.worker_thread and self.worker_thread.isRunning(): 55 | QMessageBox.warning( 56 | self.iface.mainWindow(), 57 | "Download in Progress", 58 | "Please wait for any downloads to complete before unloading the plugin." 59 | ) 60 | return 61 | self.cleanup_thread() 62 | # Remove all actions from the toolbar 63 | self.iface.removeToolBarIcon(self.action) 64 | 65 | def run(self, default_source=None): 66 | # Check if a worker is already running 67 | if self.worker is not None and self.worker_thread is not None and self.worker_thread.isRunning(): 68 | QMessageBox.warning( 69 | self.iface.mainWindow(), 70 | "Download in Progress", 71 | "A download is already in progress. Please wait for it to complete before starting a new download." 72 | ) 73 | return 74 | 75 | # Reset any existing worker 76 | self.worker = None 77 | self.worker_thread = None 78 | 79 | dialog = DataSourceDialog(self.iface.mainWindow(), self.iface) 80 | 81 | selected_name = QgsSettings().value("gpq_downloader/radio_selection", section=QgsSettings.Plugins) 82 | for button in [dialog.overture_radio, dialog.sourcecoop_radio, dialog.osm_radio, dialog.custom_radio]: 83 | if button.text() == selected_name: 84 | button.setChecked(True) 85 | if not selected_name: 86 | dialog.overture_radio.setChecked(True) 87 | 88 | if dialog.exec() == QDialog.DialogCode.Accepted: 89 | # Get the selected URLs from the dialog 90 | urls = dialog.get_urls() 91 | extent = self.iface.mapCanvas().extent() 92 | 93 | # First, collect all file locations from user 94 | download_queue = [] 95 | for url in urls: 96 | # Get current date for filename 97 | current_date = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') 98 | 99 | # Generate filename based on the URL and source type 100 | if dialog.overture_radio.isChecked(): 101 | # Extract theme from URL 102 | theme = url.split('theme=')[1].split('/')[0] 103 | if 'type=' in url: 104 | type_str = url.split('type=')[1].split('/')[0] 105 | if theme == 'base': 106 | filename = f"overture_base_{type_str}_{current_date}.parquet" 107 | else: 108 | filename = f"overture_{theme}_{current_date}.parquet" 109 | else: 110 | filename = f"overture_{theme}_{current_date}.parquet" 111 | elif dialog.sourcecoop_radio.isChecked(): 112 | dataset_name = dialog.sourcecoop_combo.currentText() 113 | clean_name = dataset_name.lower().replace(' ', '_').replace('/', '_').replace('(', '').replace(')', '') 114 | filename = f"sourcecoop_{clean_name}_{current_date}.parquet" 115 | elif dialog.osm_radio.isChecked(): 116 | # Extract layer name from URL 117 | layer_name = url.split('/')[-1].replace('.parquet', '') 118 | filename = f"osm_{layer_name}_{current_date}.parquet" 119 | else: 120 | filename = f"custom_download_{current_date}.parquet" 121 | 122 | default_save_path = str(self.download_dir / filename) 123 | 124 | # Show save file dialog 125 | output_file, selected_filter = QFileDialog.getSaveFileName( 126 | self.iface.mainWindow(), 127 | f"Save Data for {theme if dialog.overture_radio.isChecked() else 'dataset'}", 128 | default_save_path, 129 | "GeoParquet (*.parquet);;DuckDB Database (*.duckdb);;GeoPackage (*.gpkg);;FlatGeobuf (*.fgb);;GeoJSON (*.geojson)" 130 | ) 131 | 132 | if output_file: 133 | download_queue.append((url, output_file)) 134 | else: 135 | return 136 | 137 | # Now process downloads one at a time 138 | self.process_download_queue(download_queue, extent) 139 | 140 | def handle_validation_complete( 141 | self, success, message, validation_results, url, extent, dialog 142 | ): 143 | """Handle validation completion and start download if successful.""" 144 | if success: 145 | # Get current date for filename 146 | current_date = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") 147 | 148 | # Generate the default filename based on dialog selection 149 | if dialog.overture_radio.isChecked(): 150 | theme = dialog.overture_combo.currentText().lower() 151 | if theme == "base": 152 | subtype = dialog.base_subtype_combo.currentText() 153 | filename = f"overture_base_{subtype}_{current_date}.parquet" 154 | else: 155 | filename = f"overture_{theme}_{current_date}.parquet" 156 | 157 | elif dialog.sourcecoop_radio.isChecked(): 158 | selection = dialog.sourcecoop_combo.currentText() 159 | # Convert display name to safe filename format 160 | safe_name = selection.lower().replace(" ", "_").replace("/", "_") 161 | filename = f"sourcecoop_{safe_name}_{current_date}.parquet" 162 | 163 | else: # custom URL 164 | filename = f"custom_download_{current_date}.parquet" 165 | 166 | default_save_path = str(self.download_dir / filename) 167 | 168 | # Show save file dialog 169 | output_file, selected_filter = QFileDialog.getSaveFileName( 170 | self.iface.mainWindow(), 171 | "Save Data", 172 | default_save_path, 173 | "GeoParquet (*.parquet);;DuckDB Database (*.duckdb);;GeoPackage (*.gpkg);;FlatGeobuf (*.fgb);;GeoJSON (*.geojson)", 174 | ) 175 | 176 | if output_file: 177 | self.output_file = output_file 178 | self.download_and_save(url, extent, output_file, validation_results) 179 | else: 180 | QMessageBox.warning(self.iface.mainWindow(), "Validation Error", message) 181 | 182 | def download_and_save(self, dataset_url, extent, output_file, validation_results): 183 | # Ensure we start with a fresh worker 184 | self.cleanup_thread() 185 | 186 | # Create progress dialog 187 | self.progress_dialog = self.create_progress_dialog("Downloading Data") 188 | 189 | # Create worker with validation results 190 | self.worker, self.worker_thread = self.setup_worker( 191 | dataset_url, extent, output_file, validation_results 192 | ) 193 | 194 | # Show the progress dialog and start the thread 195 | self.progress_dialog.show() 196 | self.worker_thread.start() 197 | 198 | def handle_error(self, message): 199 | self.progress_dialog.close() 200 | QMessageBox.critical(self.iface.mainWindow(), "Error", message) 201 | 202 | def update_progress(self, message): 203 | if hasattr(self, "progress_dialog"): 204 | self.progress_dialog.setLabelText(message) 205 | 206 | def cancel_download(self): 207 | if self.worker: 208 | self.worker.kill() 209 | self.cleanup_thread() 210 | 211 | def cleanup_thread(self): 212 | if self.worker_thread is not None: 213 | if self.worker: 214 | self.worker.kill() 215 | self.worker_thread.quit() 216 | self.worker_thread.wait() 217 | self.worker_thread = None 218 | self.worker = None 219 | if hasattr(self, "progress_dialog"): 220 | self.progress_dialog.close() 221 | 222 | def load_layer(self, output_file): 223 | """Load the layer into QGIS if GeoParquet is supported""" 224 | if output_file.lower().endswith(".parquet"): 225 | # Try to create a test layer to check GeoParquet support 226 | test_layer = QgsVectorLayer(output_file, "test", "ogr") 227 | if not test_layer.isValid(): 228 | dialog = QDialog(self.iface.mainWindow()) 229 | dialog.setWindowTitle("GeoParquet Support Not Available") 230 | dialog.setMinimumWidth(400) 231 | 232 | layout = QVBoxLayout() 233 | 234 | message = QLabel( 235 | "Data has been successfully saved to GeoParquet file.\n\n" 236 | "Note: Your current QGIS installation does not support reading GeoParquet files directly. You can select GeoPackage for your output format to view immediately.\n\n" 237 | "To view GeoParquet files in QGIS, you'll need to install QGIS with GDAL 3.8 " 238 | "or higher with 'libgdal-arrow-parquet'. You can find instructions at:" 239 | ) 240 | message.setWordWrap(True) 241 | layout.addWidget(message) 242 | 243 | link = QLabel() 244 | link.setText( 245 | 'Installing GeoParquet Support in QGIS' 246 | ) 247 | link.setOpenExternalLinks(True) 248 | layout.addWidget(link) 249 | 250 | button_box = QPushButton("OK") 251 | button_box.clicked.connect(dialog.accept) 252 | layout.addWidget(button_box) 253 | 254 | dialog.setLayout(layout) 255 | dialog.exec() 256 | return 257 | 258 | layer_name = Path(output_file).stem # Get filename without extension 259 | # Create the layer 260 | layer = QgsVectorLayer(output_file, layer_name, "ogr") 261 | if not layer.isValid(): 262 | QMessageBox.critical( 263 | self.iface.mainWindow(), 264 | "Error", 265 | f"Failed to load the layer from {output_file}", 266 | ) 267 | return 268 | # Add the layer to the QGIS project 269 | QgsProject.instance().addMapLayer(layer) 270 | 271 | def show_info(self, message): 272 | """Show an information message to the user""" 273 | QMessageBox.information(self.iface.mainWindow(), "Success", message) 274 | 275 | def handle_large_file_warning(self, estimated_size): 276 | """Handle warning about large GeoJSON file size with a more streamlined UI""" 277 | if not hasattr(self, 'worker') or self.worker is None: 278 | QMessageBox.critical(self.iface.mainWindow(), "Error", "Download session lost. Please try again.") 279 | return 280 | 281 | worker_info = { 282 | 'dataset_url': self.worker.dataset_url, 283 | 'extent': self.worker.extent, 284 | 'iface': self.worker.iface, 285 | 'validation_results': self.worker.validation_results, 286 | 'output_file': self.worker.output_file, 287 | 'size_warning_accepted': False, 288 | 'remaining_queue': getattr(self.worker, 'remaining_queue', []) 289 | } 290 | 291 | if hasattr(self, 'progress_dialog') and self.progress_dialog: 292 | self.progress_dialog.close() 293 | 294 | dialog = QDialog(self.iface.mainWindow()) 295 | dialog.setWindowTitle("Large File Warning") 296 | dialog.setMinimumWidth(400) 297 | layout = QVBoxLayout() 298 | 299 | if estimated_size >= 1024: 300 | size_str = f"{estimated_size/1024:.2f} GB" 301 | else: 302 | size_str = f"{estimated_size:.0f} MB" 303 | 304 | msg = QLabel( 305 | f"The estimated file size is {size_str}. Large GeoJSON files can be slow to process and load.\n\n" 306 | ) 307 | msg.setWordWrap(True) 308 | layout.addWidget(msg) 309 | 310 | format_group = QVBoxLayout() 311 | recommended_label = QLabel("Alternative formats (recommended for large datasets):") 312 | format_group.addWidget(recommended_label) 313 | 314 | format_row = QHBoxLayout() 315 | 316 | format_combo = QComboBox() 317 | format_combo.addItems([ 318 | "FlatGeobuf (*.fgb)", 319 | "GeoPackage (*.gpkg)", 320 | "GeoParquet (*.parquet)" 321 | ]) 322 | format_row.addWidget(format_combo) 323 | 324 | save_button = QPushButton("Save As...") 325 | format_row.addWidget(save_button) 326 | 327 | format_group.addLayout(format_row) 328 | layout.addLayout(format_group) 329 | 330 | button_box = QHBoxLayout() 331 | proceed_button = QPushButton("Proceed with GeoJSON anyway") 332 | cancel_button = QPushButton("Cancel") 333 | button_box.addWidget(proceed_button) 334 | button_box.addWidget(cancel_button) 335 | layout.addLayout(button_box) 336 | 337 | dialog.setLayout(layout) 338 | 339 | cancel_button.clicked.connect(dialog.reject) 340 | save_button.clicked.connect(lambda: dialog.done(1)) 341 | proceed_button.clicked.connect(lambda: dialog.done(2)) 342 | 343 | while True: 344 | result = dialog.exec() 345 | if result == 1: 346 | selected_format = format_combo.currentText() 347 | extension = selected_format.split("*")[1].rstrip(")") 348 | 349 | new_output_file = os.path.splitext(worker_info['output_file'])[0] + extension 350 | 351 | output_file, _ = QFileDialog.getSaveFileName( 352 | self.iface.mainWindow(), 353 | "Save Data", 354 | new_output_file, 355 | selected_format 356 | ) 357 | 358 | if output_file: 359 | self.progress_dialog = QProgressDialog("Starting download...", "Cancel", 0, 0, self.iface.mainWindow()) 360 | self.progress_dialog.setWindowTitle("Downloading Data") 361 | self.progress_dialog.setWindowModality(Qt.WindowModality.NonModal) 362 | self.progress_dialog.setMinimumDuration(0) 363 | 364 | self.output_file = output_file 365 | 366 | self.worker = Worker( 367 | worker_info['dataset_url'], 368 | worker_info['extent'], 369 | output_file, 370 | worker_info['iface'], 371 | worker_info['validation_results'] 372 | ) 373 | self.worker.remaining_queue = worker_info['remaining_queue'] 374 | self.worker_thread = QThread() 375 | self.worker.moveToThread(self.worker_thread) 376 | 377 | self.worker_thread.started.connect(self.worker.run) 378 | self.worker.error.connect(self.handle_error) 379 | self.worker.load_layer.connect(self.load_layer) 380 | self.worker.info.connect(self.show_info) 381 | self.worker.file_size_warning.connect(self.handle_large_file_warning) 382 | self.worker.finished.connect(lambda: self.handle_download_complete(worker_info['remaining_queue'], worker_info['extent'])) 383 | self.worker.progress.connect(self.update_progress) 384 | self.progress_dialog.canceled.connect(self.cancel_download) 385 | 386 | self.progress_dialog.show() 387 | self.worker_thread.start() 388 | return 389 | continue 390 | 391 | elif result == 2: 392 | self.progress_dialog = QProgressDialog("Starting download...", "Cancel", 0, 0, self.iface.mainWindow()) 393 | self.progress_dialog.setWindowTitle("Downloading Data") 394 | self.progress_dialog.setWindowModality(Qt.WindowModality.NonModal) 395 | self.progress_dialog.setMinimumDuration(0) 396 | 397 | self.worker = Worker( 398 | worker_info['dataset_url'], 399 | worker_info['extent'], 400 | worker_info['output_file'], 401 | worker_info['iface'], 402 | worker_info['validation_results'] 403 | ) 404 | self.worker.remaining_queue = worker_info['remaining_queue'] 405 | self.worker_thread = QThread() 406 | self.worker.moveToThread(self.worker_thread) 407 | 408 | self.worker_thread.started.connect(self.worker.run) 409 | self.worker.error.connect(self.handle_error) 410 | self.worker.load_layer.connect(self.load_layer) 411 | self.worker.info.connect(self.show_info) 412 | self.worker.file_size_warning.connect(self.handle_large_file_warning) 413 | self.worker.finished.connect(lambda: self.handle_download_complete(worker_info['remaining_queue'], worker_info['extent'])) 414 | self.worker.progress.connect(self.update_progress) 415 | self.progress_dialog.canceled.connect(self.cancel_download) 416 | 417 | self.worker.size_warning_accepted = True 418 | 419 | self.progress_dialog.show() 420 | self.worker_thread.start() 421 | return 422 | 423 | else: 424 | if worker_info['remaining_queue']: 425 | self.process_download_queue(worker_info['remaining_queue'], worker_info['extent']) 426 | else: 427 | self.cleanup_thread() 428 | return 429 | 430 | def create_progress_dialog( 431 | self, title="Downloading Data", message="Starting download..." 432 | ): 433 | """Create and return a configured progress dialog""" 434 | progress_dialog = QProgressDialog( 435 | message, "Cancel", 0, 0, self.iface.mainWindow() 436 | ) 437 | progress_dialog.setWindowTitle(title) 438 | progress_dialog.setWindowModality(Qt.WindowModality.NonModal) 439 | progress_dialog.setMinimumDuration(0) 440 | return progress_dialog 441 | 442 | def setup_worker(self, dataset_url, extent, output_file, validation_results): 443 | """Create and setup a worker thread with all connections""" 444 | self.worker = Worker( 445 | dataset_url, extent, output_file, self.iface, validation_results 446 | ) 447 | self.worker_thread = QThread() 448 | self.worker.moveToThread(self.worker_thread) 449 | 450 | # Connect signals 451 | self.worker_thread.started.connect(self.worker.run) 452 | self.worker.error.connect(self.handle_error) 453 | self.worker.load_layer.connect(self.load_layer) 454 | self.worker.info.connect(self.show_info) 455 | self.worker.file_size_warning.connect(self.handle_large_file_warning) 456 | self.worker.finished.connect(self.cleanup_thread) 457 | self.worker.progress.connect(self.update_progress) 458 | self.progress_dialog.canceled.connect(self.cancel_download) 459 | 460 | return self.worker, self.worker_thread 461 | 462 | def process_download_queue(self, download_queue, extent): 463 | """Process downloads sequentially""" 464 | if not download_queue: 465 | return 466 | 467 | # Get the next download 468 | url, output_file = download_queue[0] 469 | remaining_queue = download_queue[1:] 470 | 471 | # Extract layer name from URL for Overture data 472 | layer_name = None 473 | if 'overture' in url: 474 | if 'theme=' in url: 475 | theme = url.split('theme=')[1].split('/')[0] 476 | if theme == 'base': 477 | # For base layers, include the subtype 478 | subtype = url.split('type=')[1].split('/')[0] 479 | layer_name = f"Overture {theme.title()} - {subtype.title()}" 480 | else: 481 | layer_name = f"Overture {theme.title()}" 482 | 483 | # Create validation results (we know Overture URLs are valid) 484 | validation_results = {'has_bbox': True, 'bbox_column': 'bbox', 'geometry_column': 'geometry'} 485 | 486 | # For non-Overture data, try to detect the geometry column name from the URL 487 | if 'overture' not in url: 488 | from . import logger 489 | #logger.log(f"Processing URL: {url}") 490 | 491 | # Try to extract dataset name from URL for better logging 492 | dataset_name = url.split('/')[-1].split('?')[0] 493 | #logger.log(f"Dataset name from URL: {dataset_name}") 494 | 495 | # For specific known datasets, set the geometry column 496 | if 'addresses.nobbox.pq' in url or 'addresses.pq' in url: 497 | #logger.log("Detected addresses dataset, setting geometry column to 'geom'") 498 | validation_results['geometry_column'] = 'geom' 499 | 500 | #logger.log(f"Initial validation_results: {validation_results}") 501 | 502 | # Create progress dialog 503 | self.progress_dialog = QProgressDialog( 504 | "Starting download..." if not layer_name else f"Starting {layer_name} download...", 505 | "Cancel", 0, 0, self.iface.mainWindow() 506 | ) 507 | self.progress_dialog.setWindowTitle("Downloading Data") 508 | self.progress_dialog.setWindowModality(Qt.WindowModality.NonModal) 509 | self.progress_dialog.setMinimumDuration(0) 510 | 511 | # Create worker with layer name 512 | self.worker = Worker(url, extent, output_file, self.iface, validation_results, layer_name) 513 | self.worker.remaining_queue = remaining_queue # Store remaining queue in worker 514 | self.worker_thread = QThread() 515 | 516 | # Move worker to thread 517 | self.worker.moveToThread(self.worker_thread) 518 | 519 | # Connect signals 520 | self.worker_thread.started.connect(self.worker.run) 521 | self.worker.error.connect(self.handle_error) 522 | self.worker.load_layer.connect(self.load_layer) 523 | self.worker.info.connect(self.show_info) 524 | self.worker.file_size_warning.connect(self.handle_large_file_warning) 525 | self.worker.finished.connect(lambda: self.handle_download_complete(remaining_queue, extent)) 526 | self.worker.progress.connect(self.update_progress) 527 | self.progress_dialog.canceled.connect(self.cancel_download) 528 | 529 | # Show the progress dialog and start the thread 530 | self.progress_dialog.show() 531 | self.worker_thread.start() 532 | 533 | def handle_download_complete(self, remaining_queue, extent): 534 | """Handle completion of a download and start the next one if any""" 535 | self.cleanup_thread() 536 | if remaining_queue: 537 | # Start the next download 538 | self.process_download_queue(remaining_queue, extent) 539 | 540 | 541 | def classFactory(iface): 542 | return QgisPluginGeoParquet(iface) 543 | -------------------------------------------------------------------------------- /gpq_downloader/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from qgis.core import QgsCoordinateReferenceSystem, QgsCoordinateTransform, QgsProject 4 | from qgis.PyQt.QtCore import pyqtSignal, QObject 5 | import os 6 | import duckdb 7 | 8 | from . import logger 9 | 10 | 11 | def transform_bbox_to_4326(extent, source_crs): 12 | """ 13 | Transform a bounding box to EPSG:4326 (WGS84) 14 | 15 | Args: 16 | extent (QgsRectangle): The input extent to transform 17 | source_crs (QgsCoordinateReferenceSystem): The source CRS of the extent 18 | 19 | Returns: 20 | QgsRectangle: The transformed extent in EPSG:4326, or None if inputs are invalid 21 | """ 22 | if extent is None or source_crs is None: 23 | return None 24 | 25 | dest_crs = QgsCoordinateReferenceSystem("EPSG:4326") 26 | 27 | if source_crs != dest_crs: 28 | transform = QgsCoordinateTransform(source_crs, dest_crs, QgsProject.instance()) 29 | extent = transform.transformBoundingBox(extent) 30 | 31 | return extent 32 | 33 | 34 | class Worker(QObject): 35 | finished = pyqtSignal() 36 | error = pyqtSignal(str) 37 | load_layer = pyqtSignal(str) 38 | info = pyqtSignal(str) 39 | progress = pyqtSignal(str) 40 | percent = pyqtSignal(int) 41 | file_size_warning = pyqtSignal(float) # Signal for file size warnings (in MB) 42 | 43 | def __init__(self, dataset_url, extent, output_file, iface, validation_results, layer_name=None): 44 | super().__init__() 45 | self.dataset_url = dataset_url 46 | self.extent = extent 47 | self.output_file = output_file 48 | self.iface = iface 49 | #logger.log(f"Worker __init__ received validation_results: {validation_results}") 50 | self.validation_results = validation_results 51 | self.killed = False 52 | self.layer_name = layer_name # Ensure this is included if needed 53 | self.size_warning_accepted = False # Ensure this is False on initialization 54 | 55 | def get_bbox_info_from_metadata(self, conn): 56 | """Read GeoParquet metadata to find bbox column info""" 57 | self.progress.emit("Checking for bbox metadata...") 58 | metadata_query = ( 59 | f"SELECT key, value FROM parquet_kv_metadata('{self.dataset_url}')" 60 | ) 61 | metadata_results = conn.execute(metadata_query).fetchall() 62 | 63 | for key, value in metadata_results: 64 | if key == b"geo": 65 | try: 66 | decoded_value = value.decode() 67 | #logger.log("\nRaw metadata value:") 68 | #logger.log(decoded_value) 69 | 70 | # Parse JSON using DuckDB's JSON functions 71 | json_query = ( 72 | f"SELECT json_parse('{decoded_value}'::VARCHAR) as json" 73 | ) 74 | #logger.log("\nExecuting JSON query:") 75 | #logger.log(json_query) 76 | 77 | geo_metadata = conn.execute(json_query).fetchone()[0] 78 | #logger.log("\nParsed metadata:") 79 | #logger.log(geo_metadata) 80 | 81 | if geo_metadata and "covering" in geo_metadata: 82 | #logger.log("\nFound covering:") 83 | #logger.log(geo_metadata["covering"]) 84 | if "bbox" in geo_metadata["covering"]: 85 | bbox_info = geo_metadata["covering"]["bbox"] 86 | #logger.log("\nExtracted bbox info:") 87 | #logger.log(bbox_info) 88 | return bbox_info 89 | except Exception as e: 90 | logger.log(f"\nError parsing geo metadata: {str(e)}", 2) 91 | logger.log(f"Exception type: {type(e)}", 2) 92 | import traceback 93 | 94 | logger.log(traceback.format_exc(), 2) 95 | continue 96 | return None 97 | 98 | def run(self): 99 | try: 100 | layer_info = f" for {self.layer_name}" if self.layer_name else "" 101 | self.progress.emit(f"Connecting to database{layer_info}...") 102 | source_crs = self.iface.mapCanvas().mapSettings().destinationCrs() 103 | bbox = transform_bbox_to_4326(self.extent, source_crs) 104 | 105 | # Log validation results dictionary at the beginning of run 106 | #logger.log(f"Full validation_results at start of run: {self.validation_results}") 107 | 108 | conn = None 109 | try: 110 | # Install and load the spatial extension 111 | self.progress.emit(f"Loading spatial extension{layer_info}...") 112 | 113 | if self.output_file.lower().endswith('.duckdb'): 114 | conn = duckdb.connect(self.output_file) # Connect directly to output file 115 | else: 116 | conn = duckdb.connect() 117 | 118 | conn.execute("INSTALL httpfs;") 119 | conn.execute("INSTALL spatial;") 120 | conn.execute("LOAD httpfs;") 121 | conn.execute("LOAD spatial;") 122 | 123 | # Verify spatial extension is loaded by testing a spatial function 124 | try: 125 | conn.execute("SELECT ST_AsText(ST_GeomFromText('POINT(0 0)'))").fetchone() 126 | except Exception as e: 127 | logger.log(f"Failed to verify spatial extension: {e}") 128 | # Force reload 129 | conn.execute("LOAD spatial;") 130 | 131 | # Get schema early as we need it for both column names and bbox check 132 | schema_query = f"DESCRIBE SELECT * FROM read_parquet('{self.dataset_url}')" 133 | schema_result = conn.execute(schema_query).fetchall() 134 | self.validation_results['schema'] = schema_result 135 | 136 | # Log the schema for debugging 137 | #logger.log("Schema in Worker:") 138 | #for row in schema_result: 139 | #logger.log(f"Column: {row[0]}, Type: {row[1]}") 140 | 141 | # If geometry_column is not in validation_results, detect it now 142 | if 'geometry_column' not in self.validation_results: 143 | #logger.log("No geometry_column in validation_results, detecting now") 144 | self.validation_results['geometry_column'] = 'geometry' # Default 145 | geometry_found = False 146 | 147 | for row in schema_result: 148 | col_name = row[0] 149 | col_type = row[1].upper() 150 | #logger.log(f"Checking column {col_name} with type {col_type} for geometry") 151 | if 'GEOMETRY' in col_type or 'GEOGRAPHY' in col_type: 152 | self.validation_results['geometry_column'] = col_name 153 | logger.log(f"Found geometry column by type: {col_name}") 154 | geometry_found = True 155 | break 156 | 157 | if not geometry_found: 158 | # Try a different approach - look for columns 159 | #logger.log("No standard geometry column found, trying alternative detection") 160 | for row in schema_result: 161 | col_name = row[0].lower() 162 | col_name_orig = row[0] # Keep original case 163 | col_type = row[1].upper() 164 | 165 | # Check for common geometry column names 166 | if col_name in ['geometry', 'geom', 'the_geom', 'wkb_geometry']: 167 | self.validation_results['geometry_column'] = col_name_orig 168 | #logger.log(f"Found likely geometry column by name: {col_name_orig}") 169 | geometry_found = True 170 | break 171 | # Also check for BLOB columns with geometry-like names 172 | elif 'BLOB' in col_type and col_name in ['geometry', 'geom', 'the_geom', 'wkb_geometry']: 173 | self.validation_results['geometry_column'] = col_name_orig 174 | logger.log(f"Found WKB BLOB geometry column: {col_name_orig}") 175 | geometry_found = True 176 | break 177 | 178 | #logger.log(f"Final geometry column detection result: {self.validation_results['geometry_column']}") 179 | 180 | table_name = "download_data" 181 | 182 | self.progress.emit(f"Preparing query{layer_info}...") 183 | select_query = "SELECT *" 184 | if not self.output_file.endswith(".parquet"): 185 | # Construct the SELECT clause with array conversion to strings 186 | columns = [] 187 | for row in schema_result: 188 | col_name = row[0] 189 | col_type = row[1] 190 | 191 | # Quote the column name to handle special characters 192 | quoted_col_name = f'"{col_name}"' 193 | 194 | if 'STRUCT' in col_type.upper() or 'MAP' in col_type.upper(): 195 | columns.append(f"TO_JSON({quoted_col_name}) AS {quoted_col_name}") 196 | elif '[]' in col_type: # Check for array types like VARCHAR[] 197 | columns.append(f"array_to_string({quoted_col_name}, ', ') AS {quoted_col_name}") 198 | elif col_type.upper() == 'UTINYINT': 199 | columns.append(f"CAST({quoted_col_name} AS INTEGER) AS {quoted_col_name}") 200 | elif 'BLOB' in col_type.upper() and col_name == geometry_column: 201 | # For BLOB geometry columns, we'll handle conversion differently 202 | # to avoid spatial function validation issues 203 | columns.append(quoted_col_name) 204 | else: 205 | columns.append(quoted_col_name) 206 | 207 | # Check if this is Overture data and has a names column 208 | has_names_column = any('names' in row[0] for row in schema_result) 209 | if 'overture' in self.dataset_url and has_names_column: 210 | select_query = f'SELECT "names"."primary" as name,{", ".join(columns)}' 211 | else: 212 | select_query = f'SELECT {", ".join(columns)}' 213 | 214 | # First check: Does the schema actually have a bbox column? 215 | has_bbox_in_schema = False 216 | if 'schema' in self.validation_results and self.validation_results['schema']: 217 | for row in self.validation_results['schema']: 218 | if row[0].lower() == 'bbox' and 'struct' in row[1].lower(): 219 | has_bbox_in_schema = True 220 | #logger.log("Found actual bbox column in schema") 221 | break 222 | 223 | if not has_bbox_in_schema: 224 | #logger.log("No bbox column found in schema, overriding validation_results") 225 | # Force override incorrect bbox settings if schema doesn't have bbox 226 | self.validation_results['has_bbox'] = False 227 | self.validation_results['bbox_column'] = None 228 | 229 | # Now use the corrected validation_results 230 | bbox_column = self.validation_results.get('bbox_column') 231 | geometry_column = self.validation_results.get('geometry_column', 'geometry') 232 | #logger.log(f"Final bbox_column value: {bbox_column}") 233 | #logger.log(f"Using geometry column: {geometry_column}") 234 | 235 | # Check if geometry column is a BLOB that needs conversion 236 | geometry_col_type = None 237 | for row in schema_result: 238 | if row[0] == geometry_column: 239 | geometry_col_type = row[1].upper() 240 | break 241 | 242 | if bbox_column is not None: 243 | #logger.log(f"Using bbox column for query: {bbox_column}") 244 | where_clause = f""" 245 | WHERE "{bbox_column}".xmin BETWEEN {bbox.xMinimum()} AND {bbox.xMaximum()} 246 | AND "{bbox_column}".ymin BETWEEN {bbox.yMinimum()} AND {bbox.yMaximum()} 247 | """ 248 | else: 249 | #logger.log("Using spatial filter instead of bbox") 250 | # If it's a BLOB column, we can't use spatial functions in the initial query 251 | # We'll apply the filter after converting the geometry 252 | if geometry_col_type and 'BLOB' in geometry_col_type: 253 | where_clause = "" # No spatial filter initially for BLOB columns 254 | else: 255 | # For proper geometry columns, we can use spatial filter directly 256 | geometry_expr = f'"{geometry_column}"' 257 | where_clause = f""" 258 | WHERE ST_Intersects( 259 | {geometry_expr}, 260 | ST_GeomFromText('POLYGON(({bbox.xMinimum()} {bbox.yMinimum()}, 261 | {bbox.xMaximum()} {bbox.yMinimum()}, 262 | {bbox.xMaximum()} {bbox.yMaximum()}, 263 | {bbox.xMinimum()} {bbox.yMaximum()}, 264 | {bbox.xMinimum()} {bbox.yMinimum()}))') 265 | ) 266 | """ 267 | 268 | # Base query 269 | base_query = f""" 270 | CREATE TABLE {table_name} AS ( 271 | {select_query} FROM read_parquet('{self.dataset_url}') 272 | {where_clause} 273 | ) 274 | """ 275 | self.progress.emit(f"Downloading{layer_info} data...") 276 | logger.log("Executing SQL query:") 277 | logger.log(base_query) 278 | 279 | conn.execute(base_query) 280 | 281 | # If we have a BLOB geometry column, we need to convert it after table creation 282 | # and apply spatial filter if needed 283 | if (geometry_column and geometry_col_type and 'BLOB' in geometry_col_type): 284 | # Create a new table with converted geometry 285 | temp_table = f"{table_name}_converted" 286 | 287 | # Build column list for conversion 288 | convert_columns = [] 289 | for col_name, col_type, _, _, _, _ in schema_result: 290 | quoted_col_name = f'"{col_name}"' 291 | if col_name == geometry_column: 292 | convert_columns.append(f"ST_GeomFromWKB({quoted_col_name}) AS {quoted_col_name}") 293 | else: 294 | convert_columns.append(quoted_col_name) 295 | 296 | # Add spatial filter if bbox is available and we didn't filter earlier 297 | spatial_filter = "" 298 | if bbox and not bbox_column: # Only if we didn't filter with bbox column 299 | spatial_filter = f""" 300 | WHERE ST_Intersects( 301 | ST_GeomFromWKB("{geometry_column}"), 302 | ST_GeomFromText('POLYGON(({bbox.xMinimum()} {bbox.yMinimum()}, 303 | {bbox.xMaximum()} {bbox.yMinimum()}, 304 | {bbox.xMaximum()} {bbox.yMaximum()}, 305 | {bbox.xMinimum()} {bbox.yMaximum()}, 306 | {bbox.xMinimum()} {bbox.yMinimum()}))') 307 | ) 308 | """ 309 | 310 | convert_query = f""" 311 | CREATE TABLE {temp_table} AS 312 | SELECT {', '.join(convert_columns)} 313 | FROM {table_name} 314 | {spatial_filter} 315 | """ 316 | 317 | conn.execute(convert_query) 318 | 319 | # Drop original and rename 320 | conn.execute(f"DROP TABLE {table_name}") 321 | conn.execute(f"ALTER TABLE {temp_table} RENAME TO {table_name}") 322 | 323 | # Add check for empty results 324 | row_count = conn.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0] 325 | if row_count == 0: 326 | self.info.emit(f"No data found{layer_info} in the requested area. Check that your map extent overlaps with the data and/or expand your map extent. Skipping to next dataset if available.") 327 | self.finished.emit() # Ensure finished signal is emitted 328 | return 329 | 330 | self.progress.emit(f"Processing{layer_info} data to requested format...") 331 | 332 | file_extension = self.output_file.lower().split('.')[-1] 333 | 334 | if file_extension == 'duckdb': 335 | # Commit the transaction to ensure the data is saved 336 | conn.commit() 337 | if not self.killed: 338 | self.info.emit( 339 | "Data has been successfully saved to DuckDB database.\n\n" 340 | "Note: QGIS does not currently support loading DuckDB files directly." 341 | ) 342 | else: 343 | # Check size if exporting to GeoJSON 344 | if self.output_file.lower().endswith('.geojson'): 345 | estimated_size = self.estimate_file_size(conn, table_name) 346 | if estimated_size > 4096 and not self.size_warning_accepted: # 4GB warning threshold 347 | self.file_size_warning.emit(estimated_size) 348 | return 349 | 350 | # Use the geometry column from validation results for the Hilbert sorting 351 | # At this point, if we converted BLOB to geometry, it's already a GEOMETRY type 352 | # So we don't need ST_GeomFromWKB anymore 353 | geometry_expr = f'"{geometry_column}"' 354 | extent_expr = f'"{geometry_column}"' 355 | 356 | copy_query = f""" 357 | COPY ( 358 | WITH bbox AS ( 359 | SELECT ST_Extent(ST_Extent_Agg({extent_expr}))::BOX_2D AS b 360 | FROM {table_name} 361 | ) 362 | SELECT t.* 363 | FROM {table_name} AS t 364 | CROSS JOIN bbox 365 | ORDER BY ST_Hilbert(t.{geometry_expr}, bbox.b) 366 | ) TO '{self.output_file}' 367 | """ 368 | 369 | if file_extension == "parquet": 370 | format_options = "(FORMAT 'parquet', COMPRESSION 'ZSTD', COMPRESSION_LEVEL 22);" 371 | elif self.output_file.endswith(".gpkg"): 372 | format_options = "(FORMAT GDAL, DRIVER 'GPKG');" 373 | elif self.output_file.endswith(".fgb"): 374 | format_options = "(FORMAT GDAL, DRIVER 'FlatGeobuf', SRS 'EPSG:4326');" 375 | elif self.output_file.endswith(".geojson"): 376 | format_options = "(FORMAT GDAL, DRIVER 'GeoJSON', SRS 'EPSG:4326');" 377 | else: 378 | self.error.emit("Unsupported file format.") 379 | 380 | logger.log("Executing SQL query:") 381 | logger.log(copy_query + format_options) 382 | conn.execute(copy_query + format_options) 383 | 384 | 385 | if self.killed: 386 | return 387 | 388 | if not self.killed: 389 | if self.output_file.lower().endswith('.duckdb'): 390 | self.info.emit( 391 | "Data has been successfully saved to DuckDB database.\n\n" 392 | "Note: QGIS does not currently support loading DuckDB files directly." 393 | ) 394 | else: 395 | self.load_layer.emit(self.output_file) 396 | self.finished.emit() 397 | 398 | except Exception as e: 399 | if not self.killed: 400 | # Change error to info if it's a "no data" error 401 | error_str = str(e) 402 | if "No data found" in error_str: 403 | self.info.emit(f"No data found{layer_info} in the requested area for {self.dataset_url}. Skipping to next dataset if available.") 404 | self.finished.emit() # Ensure finished signal is emitted 405 | else: 406 | self.error.emit(error_str) 407 | finally: 408 | if conn: 409 | if not self.output_file.lower().endswith('.duckdb'): # Clean up temporary table 410 | try: 411 | conn.execute(f"DROP TABLE IF EXISTS {table_name}") 412 | except: 413 | pass 414 | conn.close() 415 | 416 | except Exception as e: 417 | if not self.killed: 418 | self.error.emit(str(e)) 419 | 420 | def kill(self): 421 | self.killed = True 422 | 423 | def estimate_file_size(self, conn, table_name): 424 | """Estimate the output file size in MB using GeoJSON feature collection structure""" 425 | try: 426 | # Get total row count 427 | row_count = conn.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0] 428 | 429 | # Use a smaller sample size for large datasets 430 | sample_size = min(100, row_count) 431 | 432 | if sample_size > 0: 433 | # Create a proper GeoJSON FeatureCollection sample with all properties 434 | sample_query = f""" 435 | WITH sample AS ( 436 | SELECT * FROM {table_name} LIMIT {sample_size} 437 | ) 438 | SELECT AVG(LENGTH( 439 | json_object( 440 | 'type', 'Feature', 441 | 'geometry', ST_AsGeoJSON(geometry), 442 | 'properties', json_object( 443 | {', '.join([ 444 | f"'{col[0]}', COALESCE(CAST({col[0]} AS VARCHAR), 'null')" 445 | for col in conn.execute(f"DESCRIBE {table_name}").fetchall() 446 | if col[0] != 'geometry' 447 | ])} 448 | ) 449 | )::VARCHAR 450 | )) as avg_feature_size 451 | FROM sample; 452 | """ 453 | 454 | # Get average feature size 455 | avg_feature_size = conn.execute(sample_query).fetchone()[0] 456 | 457 | if avg_feature_size: 458 | # Account for GeoJSON overhead 459 | collection_overhead = ( 460 | 50 # {"type":"FeatureCollection","features":[]} 461 | ) 462 | comma_overhead = row_count - 1 # Commas between features 463 | 464 | total_estimated_bytes = ( 465 | (row_count * avg_feature_size) 466 | + collection_overhead 467 | + comma_overhead 468 | ) 469 | return total_estimated_bytes / (1024 * 1024) # Convert to MB 470 | return 0 471 | 472 | except Exception as e: 473 | logger.log(f"Error estimating file size: {str(e)}", 2) 474 | return 0 475 | 476 | def process_schema_columns(self, schema_result): 477 | """Process schema columns and return formatted SELECT clause""" 478 | columns = [] 479 | for row in schema_result: 480 | col_name = row[0] 481 | col_type = row[1] 482 | quoted_col_name = f'"{col_name}"' 483 | 484 | if "STRUCT" in col_type.upper() or "MAP" in col_type.upper(): 485 | columns.append(f"TO_JSON({quoted_col_name}) AS {quoted_col_name}") 486 | elif "[]" in col_type: 487 | columns.append( 488 | f"array_to_string({quoted_col_name}, ', ') AS {quoted_col_name}" 489 | ) 490 | elif col_type.upper() == "UTINYINT": 491 | columns.append( 492 | f"CAST({quoted_col_name} AS INTEGER) AS {quoted_col_name}" 493 | ) 494 | else: 495 | columns.append(quoted_col_name) 496 | return columns 497 | 498 | 499 | class ValidationWorker(QObject): 500 | finished = pyqtSignal(bool, str, dict) 501 | progress = pyqtSignal(str) 502 | needs_bbox_warning = pyqtSignal() 503 | 504 | def __init__(self, dataset_url, iface, extent): 505 | super().__init__() 506 | self.dataset_url = dataset_url 507 | self.iface = iface 508 | self.extent = extent 509 | self.killed = False 510 | 511 | base_path = os.path.dirname(os.path.abspath(__file__)) 512 | presets_path = os.path.join(base_path, "data", "presets.json") 513 | with open(presets_path, "r") as f: 514 | self.PRESET_DATASETS = json.load(f) 515 | 516 | def check_bbox_metadata(self, conn): 517 | """Check for bbox information in GeoParquet metadata""" 518 | metadata_query = ( 519 | f"SELECT key, value FROM parquet_kv_metadata('{self.dataset_url}')" 520 | ) 521 | metadata_results = conn.execute(metadata_query).fetchall() 522 | 523 | for key, value in metadata_results: 524 | if key == b"geo": 525 | try: 526 | decoded_value = value.decode() 527 | #logger.log("\nRaw metadata value:") 528 | #logger.log(decoded_value) 529 | 530 | # Install and load JSON extension 531 | conn.execute("INSTALL json;") 532 | conn.execute("LOAD json;") 533 | 534 | # Create a table with the JSON string 535 | conn.execute( 536 | f"CREATE TEMP TABLE temp_json AS SELECT '{decoded_value}' as json_str" 537 | ) 538 | 539 | # Extract the bbox column name using JSON path 540 | # First get the geometry column info which contains the covering 541 | result = conn.execute(""" 542 | SELECT json_str->'$.columns.geometry.covering.bbox.xmin[0]' as bbox_column 543 | FROM temp_json 544 | """).fetchone() 545 | 546 | #logger.log("\nExtracted bbox column name:") 547 | #logger.log(result[0] if result else None) 548 | 549 | if result and result[0]: 550 | # Remove quotes from the result if present 551 | bbox_col = result[0].strip('"') 552 | return bbox_col 553 | 554 | except Exception as e: 555 | logger.log(f"\nError parsing geo metadata: {str(e)}", 2) 556 | logger.log(f"Exception type: {type(e)}", 2) 557 | import traceback 558 | 559 | logger.log(traceback.format_exc()) 560 | finally: 561 | # Clean up temporary table 562 | conn.execute("DROP TABLE IF EXISTS temp_json") 563 | return None 564 | 565 | def run(self): 566 | # Initialize validation results with default values 567 | validation_results = { 568 | "schema": None, 569 | "has_bbox": False, 570 | "bbox_column": None, 571 | "geometry_column": "geometry" # Default fallback 572 | } 573 | 574 | try: 575 | self.progress.emit("Connecting to data source...") 576 | conn = duckdb.connect() 577 | conn.execute("INSTALL spatial;") 578 | conn.execute("LOAD spatial;") 579 | conn.execute("INSTALL httpfs;") 580 | conn.execute("LOAD httpfs;") 581 | 582 | if not self.needs_validation(): 583 | validation_results.update({ 584 | "has_bbox": True, 585 | "bbox_column": "bbox", 586 | }) 587 | self.finished.emit(True, "Validation successful", validation_results) 588 | return 589 | 590 | self.progress.emit("Checking data format...") 591 | schema_query = f"DESCRIBE SELECT * FROM read_parquet('{self.dataset_url}')" 592 | schema_result = conn.execute(schema_query).fetchall() 593 | 594 | # Update validation results with schema 595 | validation_results["schema"] = schema_result 596 | 597 | # Check for standard bbox column first 598 | has_bbox = any( 599 | row[0].lower() == "bbox" and "struct" in row[1].lower() 600 | for row in schema_result 601 | ) 602 | 603 | if has_bbox: 604 | validation_results["has_bbox"] = True 605 | validation_results["bbox_column"] = "bbox" 606 | self.finished.emit(True, "Validation successful", validation_results) 607 | else: 608 | # Check metadata for alternative bbox column 609 | bbox_column = self.check_bbox_metadata(conn) 610 | if bbox_column: 611 | validation_results["has_bbox"] = True 612 | validation_results["bbox_column"] = bbox_column 613 | self.finished.emit(True, "Validation successful", validation_results) 614 | else: 615 | # No bbox column found - emit warning signal first 616 | self.needs_bbox_warning.emit() 617 | # Then emit finished signal with no bbox results 618 | self.finished.emit(True, "Validation with no bbox column", validation_results) 619 | 620 | except Exception as e: 621 | logger.log(f"Error in ValidationWorker: {str(e)}") 622 | # Emit warning before error if no bbox was found 623 | if not validation_results.get("has_bbox"): 624 | self.needs_bbox_warning.emit() 625 | # Still emit validation results with default values in case of error 626 | self.finished.emit(False, f"Error validating source: {str(e)}", validation_results) 627 | finally: 628 | conn.close() 629 | 630 | def needs_validation(self): 631 | """Determine if the dataset needs any validation""" 632 | # Check if URL matches any preset dataset 633 | for source in self.PRESET_DATASETS.values(): 634 | for dataset in source.values(): 635 | if ( 636 | isinstance(dataset.get("url"), str) 637 | and dataset["url"] in self.dataset_url 638 | ): 639 | return dataset.get("needs_validation", True) 640 | elif ( 641 | isinstance(dataset.get("url_template"), str) 642 | and dataset["url_template"].split("{")[0] in self.dataset_url 643 | ): 644 | return dataset.get("needs_validation", True) 645 | 646 | # All other datasets need validation 647 | return True 648 | --------------------------------------------------------------------------------