├── .github └── workflows │ ├── publish.yml │ └── test.yml ├── .gitignore ├── .gitmodules ├── LICENSE.txt ├── README.md ├── pyproject.toml ├── src └── datasette_reconcile │ ├── __about__.py │ ├── __init__.py │ ├── reconcile.py │ ├── settings.py │ └── utils.py └── tests ├── __init__.py ├── conftest.py ├── test_reconcile.py ├── test_reconcile_config.py ├── test_reconcile_schema.py └── test_reconcile_utils.py /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | permissions: 8 | contents: read 9 | 10 | jobs: 11 | test: 12 | runs-on: ubuntu-latest 13 | environment: release 14 | strategy: 15 | max-parallel: 4 16 | matrix: 17 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 18 | steps: 19 | - uses: actions/checkout@v3 20 | with: 21 | submodules: recursive 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v4 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | cache: pip 27 | - name: Install dependencies 28 | run: | 29 | pip install -e . 30 | pip install hatch 31 | - name: Run tests 32 | run: | 33 | hatch run test 34 | lint: 35 | runs-on: ubuntu-latest 36 | environment: release 37 | strategy: 38 | max-parallel: 4 39 | matrix: 40 | python-version: ["3.10"] 41 | steps: 42 | - uses: actions/checkout@v3 43 | - name: Set up Python ${{ matrix.python-version }} 44 | uses: actions/setup-python@v4 45 | with: 46 | python-version: ${{ matrix.python-version }} 47 | cache: pip 48 | - name: Install dependencies 49 | run: | 50 | pip install -e .[lint] 51 | pip install hatch 52 | - name: Run lint 53 | run: | 54 | hatch run lint:style 55 | deploy: 56 | runs-on: ubuntu-latest 57 | environment: release 58 | permissions: 59 | id-token: write # IMPORTANT: this permission is mandatory for trusted publishing 60 | needs: [test, lint] 61 | steps: 62 | - uses: actions/checkout@v3 63 | - name: Set up Python 64 | uses: actions/setup-python@v4 65 | with: 66 | python-version: "3.10" 67 | cache: pip 68 | - name: Install dependencies 69 | run: | 70 | pip install -e . 71 | pip install hatch 72 | - name: Build 73 | run: | 74 | hatch build 75 | - name: Publish package distributions to PyPI 76 | uses: pypa/gh-action-pypi-publish@release/v1 77 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | test: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | max-parallel: 4 10 | matrix: 11 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 12 | steps: 13 | - uses: actions/checkout@v3 14 | with: 15 | submodules: recursive 16 | - name: Set up Python ${{ matrix.python-version }} 17 | uses: actions/setup-python@v4 18 | with: 19 | python-version: ${{ matrix.python-version }} 20 | cache: pip 21 | - name: Install dependencies 22 | run: | 23 | pip install -e . 24 | pip install hatch 25 | - name: Run tests 26 | run: | 27 | hatch run test 28 | lint: 29 | runs-on: ubuntu-latest 30 | strategy: 31 | max-parallel: 4 32 | matrix: 33 | python-version: ["3.10"] 34 | steps: 35 | - uses: actions/checkout@v3 36 | - name: Set up Python ${{ matrix.python-version }} 37 | uses: actions/setup-python@v4 38 | with: 39 | python-version: ${{ matrix.python-version }} 40 | cache: pip 41 | - name: Install dependencies 42 | run: | 43 | pip install -e .[lint] 44 | pip install hatch 45 | - name: Run lint 46 | run: | 47 | hatch run lint:style 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | venv 6 | .eggs 7 | .pytest_cache 8 | *.egg-info 9 | .DS_Store 10 | .vscode 11 | db/ 12 | .idea 13 | dist/ 14 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "specs"] 2 | path = specs 3 | url = https://github.com/reconciliation-api/specs.git 4 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023-present David Kane 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # datasette-reconcile 2 | 3 | [![PyPI - Version](https://img.shields.io/pypi/v/datasette-reconcile.svg)](https://pypi.org/project/datasette-reconcile) 4 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/datasette-reconcile.svg)](https://pypi.org/project/datasette-reconcile) 5 | [![Changelog](https://img.shields.io/github/v/release/drkane/datasette-reconcile?include_prereleases&label=changelog)](https://github.com/drkane/datasette-reconcile/releases) 6 | [![Tests](https://github.com/drkane/datasette-reconcile/workflows/Test/badge.svg)](https://github.com/drkane/datasette-reconcile/actions?query=workflow%3ATest) 7 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/drkane/datasette-reconcile/blob/main/LICENSE) 8 | 9 | Adds a reconciliation API endpoint to [Datasette](https://docs.datasette.io/en/stable/), based on the [Reconciliation Service API](https://reconciliation-api.github.io/specs/latest/) specification. 10 | 11 | The reconciliation API is used to match a set of strings to their correct identifiers, to help with disambiguation and consistency in large datasets. For example, the strings "United Kingdom", "United Kingdom of Great Britain and Northern Ireland" and "UK" could all be used to identify the country which has the ISO country code `GB`. It is particularly implemented in [OpenRefine](https://openrefine.org/). 12 | 13 | The plugin adds a `/-/reconcile` endpoint to a table served by datasette, which responds based on the Reconciliation Service API specification. In order to activate this endpoint you need to configure the reconciliation service, as dscribed in the [usage](#usage) section. 14 | 15 | ## Installation 16 | 17 | Install this plugin in the same environment as Datasette. 18 | 19 | $ datasette install datasette-reconcile 20 | 21 | ## Usage 22 | 23 | ### Plugin configuration 24 | 25 | The plugin should be configured using Datasette's [`metadata.json`](https://docs.datasette.io/en/stable/metadata.html) file. The configuration can be put at the root, database or table layer of `metadata.json`, for most use cases it will make most sense to configure at the table level. 26 | 27 | Add a `datasette-reconcile` object under `plugins` in `metadata.json`. This should look something like: 28 | 29 | ```json 30 | { 31 | "databases": { 32 | "sf-trees": { 33 | "tables": { 34 | "Street_Tree_List": { 35 | "plugins": { 36 | "datasette-reconcile": { 37 | "id_field": "id", 38 | "name_field": "name", 39 | "type_field": "type", 40 | "type_default": [ 41 | { 42 | "id": "tree", 43 | "name": "Tree" 44 | } 45 | ], 46 | "max_limit": 5, 47 | "service_name": "Tree reconciliation", 48 | "view_url": "https://example.com/trees/{{id}}" 49 | } 50 | } 51 | } 52 | } 53 | } 54 | } 55 | } 56 | ``` 57 | 58 | The only required item in the configuration is `name_field`. This refers to the field in the table which will be searched to match the query text. 59 | 60 | The rest of the configuration items are optional, and are as follows: 61 | 62 | - `id_field`: The field containing the identifier for this entity. If not provided, and there is a primary key set, then the primary key will be used. A primary key of more than one field will give an error. 63 | - `type_field`: If provided, this field will be used to determine the type of the entity. If not provided, then the `type_default` setting will be used instead. 64 | - `type_default`: If provided, this value will be used as the type of every entity returned. If not provided the default of `Object` will be used for every entity. 65 | - `max_limit`: The maximum number of records that a query can request to return. This is 5 by default. A individual query can request fewer results than this, but it cannot request more. 66 | - `service_name`: The name of the reconciliation service that will appear in the service manifest. If not provided it will take the form ` reconciliation`. 67 | - `identifierSpace`: [Identifier space](https://reconciliation-api.github.io/specs/latest/#identifier-and-schema-spaces) given in the service manifest. If not provided a default of `http://rdf.freebase.com/ns/type.object.id` is used. 68 | - `schemaSpace`: [Schema space](https://reconciliation-api.github.io/specs/latest/#identifier-and-schema-spaces) given in the service manifest. If not provided a default of `http://rdf.freebase.com/ns/type.object.id` is used. 69 | - `view_url`: [URL for a view of an individual entity](https://reconciliation-api.github.io/specs/latest/#dfn-view-template). It must contain the string `{{id}}` which will be replaced with the ID of the entity. If not provided it will use the default datasette view for the entity record (something like `//
/{{id}}`). 70 | 71 | ### Using the endpoint 72 | 73 | Once the plugin is configured for a particular database or table, you can access the reconciliation endpoint using the url `//
/-/reconcile`. 74 | 75 | A simple GET request to `//
/-/reconcile` will return the [Service Manifest](https://reconciliation-api.github.io/specs/latest/#service-manifest) as JSON which reconciliation clients can use to determine how the service is set up. 76 | 77 | A POST request to the same url with the `queries` argument set will trigger the reconciliation process. The `queries` parameter should be a json object in the format described in [the specification](https://reconciliation-api.github.io/specs/latest/#reconciliation-queries). An example set of two queries would look like: 78 | 79 | ```json 80 | { 81 | "q1": { 82 | "query": "Hans-Eberhard Urbaniak" 83 | }, 84 | "q2": { 85 | "query": "Ernst Schwanhold" 86 | } 87 | } 88 | ``` 89 | 90 | The query can optionally be encoded as a `queries` parameter in a GET request. For example: 91 | 92 | ``` 93 | //
/-/reconcile?queries={"q1":{"query":"Hans-Eberhard Urbaniak"},"q2":{"query": "Ernst Schwanhold"}} 94 | ``` 95 | 96 | Various options are available in the query object. Current the only ones implemented in datasette-reconcile are the mandatory `query` string, and the `limit` option, which must be less than or equal to the value in the `max_limit` configration option. 97 | 98 | All endpoints that start with `//
/-/reconcile` are configured to send an `Access-Control-Allow-Origin: *` CORS header to allow access [as described in the specification](https://reconciliation-api.github.io/specs/latest/#cross-origin-access). 99 | 100 | JSONP output is not yet supported. 101 | 102 | ### Returned value 103 | 104 | The result of the GET or POST `queries` requests described above is a json object describing potential [reconciliation candidates](https://reconciliation-api.github.io/specs/latest/#reconciliation-query-responses) for each of the queries specified. The result will look something like: 105 | 106 | ```json 107 | { 108 | "q1": { 109 | "result": [ 110 | { 111 | "id": "120333937", 112 | "name": "Urbaniak, Regina", 113 | "score": 53.015232, 114 | "match": false, 115 | "type": [ 116 | { 117 | "id": "person", 118 | "name": "Person" 119 | } 120 | ] 121 | }, 122 | { 123 | "id": "1127147390", 124 | "name": "Urbaniak, Jan", 125 | "score": 52.357353, 126 | "match": false, 127 | "type": [ 128 | { 129 | "id": "person", 130 | "name": "Person" 131 | } 132 | ] 133 | } 134 | ] 135 | }, 136 | "q2": { 137 | "result": [ 138 | { 139 | "id": "123064325", 140 | "name": "Schwanhold, Ernst", 141 | "score": 86.43497, 142 | "match": true, 143 | "type": [ 144 | { 145 | "id": "person", 146 | "name": "Person" 147 | } 148 | ] 149 | }, 150 | { 151 | "id": "116362988X", 152 | "name": "Schwanhold, Nadine", 153 | "score": 62.04763, 154 | "match": false, 155 | "type": [ 156 | { 157 | "id": "person", 158 | "name": "Person" 159 | } 160 | ] 161 | } 162 | ] 163 | } 164 | } 165 | ``` 166 | 167 | ### Behind the scenes 168 | 169 | The reconcile engine works by performing an SQL query against the `name_field` within the specified database table. Where that table has a full text search index implemented, the search will be performed against that index. 170 | 171 | When a full text search index is present on the table, the SQL query takes the form (based on the search query `test`, note that double quotes are added to facilitate searching - these are not present in the original query): 172 | 173 | ```sql 174 | select , 175 | from
176 | inner join ( 177 | select "rowid", "rank" 178 | from 179 | where MATCH '"test"' 180 | ) as "a" on
."rowid" = a."rowid" 181 | order by a.rank 182 | limit 5 183 | ``` 184 | 185 | If a full text search index is not present, the query looks like this (note that the wildcard `%` is added to either side of the query - these are not present in the original query): 186 | 187 | ```sql 188 | select , 189 | from
190 | where like '%test%' 191 | limit 5 192 | ``` 193 | 194 | ### Extend endpoint 195 | 196 | You can also use the reconciliation API [Data extension service](https://www.w3.org/community/reports/reconciliation/CG-FINAL-specs-0.2-20230410/#data-extension-service) to find additional properties for a set of entities, given an ID. 197 | 198 | Send a GET request to the `//
/-/reconcile/extend/propose` endpoint to find a list of the possible properties you can select. The properties are all the columns in the table (excluding any that have been hidden). An example response would look like: 199 | 200 | ```json 201 | { 202 | "limit": 5, 203 | "type": "Person", 204 | "properties": [ 205 | { 206 | "id": "preferredName", 207 | "name": "preferredName" 208 | }, 209 | { 210 | "id": "professionOrOccupation", 211 | "name": "professionOrOccupation" 212 | }, 213 | { 214 | "id": "wikidataId", 215 | "name": "wikidataId" 216 | } 217 | ] 218 | } 219 | ``` 220 | 221 | Then send a POST request to the `//
/-/reconcile` endpoint with an `extend` argument. The `extend` argument should be a JSON object with a set of `ids` to lookup and `properties` to return. For example: 222 | 223 | ```json 224 | { 225 | "ids": ["10662041X", "1064905412"], 226 | "properties": [ 227 | { 228 | "id": "professionOrOccupation" 229 | }, 230 | { 231 | "id": "wikidataId" 232 | } 233 | ] 234 | } 235 | ``` 236 | 237 | The endpoint will return a result that looks like: 238 | 239 | ```json 240 | { 241 | "meta": [ 242 | { 243 | "id": "professionOrOccupation", 244 | "name": "professionOrOccupation" 245 | }, 246 | { 247 | "id": "wikidataId", 248 | "name": "wikidataId" 249 | } 250 | ], 251 | "rows": { 252 | "10662041X": { 253 | "professionOrOccupation": [ 254 | { 255 | "str": "Doctor" 256 | } 257 | ], 258 | "wikidataId": [ 259 | { 260 | "str": "Q3874347" 261 | } 262 | ] 263 | }, 264 | "1064905412": { 265 | "professionOrOccupation": [ 266 | { 267 | "str": "Architect" 268 | } 269 | ], 270 | "wikidataId": [ 271 | { 272 | "str": "Q3874347" 273 | } 274 | ] 275 | } 276 | } 277 | } 278 | ``` 279 | 280 | ### Suggest endpoints 281 | 282 | You can also use the [suggest endpoints](https://www.w3.org/community/reports/reconciliation/CG-FINAL-specs-0.2-20230410/#suggest-services) to get quick suggestions, for example for an auto-complete dropdown menu. The following endpoints are available: 283 | 284 | - `//
/-/reconcile/suggest/property` - looks up in a list of table columns 285 | - `//
/-/reconcile/suggest/entity` - looks up in a list of table rows 286 | - `//
/-/reconcile/suggest/type` - not currently implemented 287 | 288 | Each endpoint takes a `prefix` argument which can be used in a GET request. For example, the GET request `//
/-/reconcile/suggest/entity?prefix=abc` will produce a response such as: 289 | 290 | ```json 291 | { 292 | "result": [ 293 | { 294 | "name": "abc company limited", 295 | "id": "Q123456" 296 | }, 297 | { 298 | "name": "abc other company limited", 299 | "id": "Q123457" 300 | } 301 | ] 302 | } 303 | ``` 304 | 305 | ## Development 306 | 307 | This plugin uses hatch for build and testing. To set up this plugin locally, first checkout the code. 308 | 309 | You'll need to fetch the git submodules for the tests too: 310 | 311 | git submodule init 312 | git submodule update 313 | 314 | To run the tests: 315 | 316 | hatch run test 317 | 318 | Run tests then report on coverage 319 | 320 | hatch run cov 321 | 322 | Run tests then run a server showing where coverage is missing 323 | 324 | hatch run cov-html 325 | 326 | ### Linting/formatting 327 | 328 | Black and ruff should be run before committing any changes. 329 | 330 | To check for any changes needed: 331 | 332 | hatch run lint:style 333 | 334 | To run any autoformatting possible: 335 | 336 | hatch run lint:fmt 337 | 338 | ### Publish to pypi 339 | 340 | hatch build 341 | hatch publish 342 | git tag v 343 | git push origin v 344 | 345 | ## Acknowledgements 346 | 347 | Thanks for [@simonw](https://github.com/simonw/) for developing datasette and the datasette ecosystem. 348 | 349 | Other contributions from: 350 | 351 | - [@JBPressac](https://github.com/JBPressac/) 352 | - [@nicokant](https://github.com/nicokant/) - implementation of extend service 353 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "datasette-reconcile" 7 | dynamic = ["version"] 8 | description = 'Adds a reconciliation API to Datasette.' 9 | readme = "README.md" 10 | requires-python = ">=3.7" 11 | license = "MIT" 12 | keywords = [] 13 | authors = [{ name = "David Kane", email = "david@dkane.net" }] 14 | classifiers = [ 15 | "Development Status :: 4 - Beta", 16 | "Programming Language :: Python", 17 | "Programming Language :: Python :: 3.8", 18 | "Programming Language :: Python :: 3.9", 19 | "Programming Language :: Python :: 3.10", 20 | "Programming Language :: Python :: 3.11", 21 | "Programming Language :: Python :: 3.12", 22 | "Programming Language :: Python :: Implementation :: CPython", 23 | "Programming Language :: Python :: Implementation :: PyPy", 24 | "Framework :: Datasette", 25 | ] 26 | dependencies = ["datasette", "fuzzywuzzy[speedup]"] 27 | 28 | 29 | [project.optional-dependencies] 30 | test = [ 31 | "coverage[toml]>=6.5", 32 | "pytest", 33 | "pytest-asyncio", 34 | "httpx", 35 | "sqlite-utils", 36 | "jsonschema", 37 | ] 38 | lint = ["mypy>=1.0.0", "ruff>=0.1.8"] 39 | 40 | [project.entry-points.datasette] 41 | reconcile = "datasette_reconcile" 42 | 43 | [project.urls] 44 | Documentation = "https://github.com/drkane/datasette-reconcile#readme" 45 | Issues = "https://github.com/drkane/datasette-reconcile/issues" 46 | Source = "https://github.com/drkane/datasette-reconcile" 47 | CI = "https://github.com/drkane/datasette-reconcile/actions" 48 | Changelog = "https://github.com/drkane/datasette-reconcile/releases" 49 | 50 | [tool.hatch.version] 51 | path = "src/datasette_reconcile/__about__.py" 52 | 53 | [tool.hatch.envs.default] 54 | features = ["test", "lint"] 55 | 56 | [tool.hatch.envs.default.scripts] 57 | test = "pytest {args:tests}" 58 | test-cov = "coverage run -m pytest {args:tests}" 59 | cov-report = ["- coverage combine", "coverage report"] 60 | cov = ["test-cov", "cov-report"] 61 | cov-fail = ["test-cov", "- coverage combine", "coverage report --fail-under=95"] 62 | cov-html = [ 63 | "test-cov", 64 | "- coverage combine", 65 | "coverage report", 66 | "coverage html", 67 | "python -m http.server -d htmlcov", 68 | ] 69 | 70 | [[tool.hatch.envs.all.matrix]] 71 | python = ["3.8", "3.9", "3.10", "3.11", "3.12"] 72 | 73 | [tool.hatch.envs.lint] 74 | detached = true 75 | features = ["lint"] 76 | 77 | [tool.hatch.envs.lint.scripts] 78 | typing = "mypy --install-types --non-interactive {args:src/datasette_reconcile tests}" 79 | style = ["ruff {args:.}", "ruff format --check {args:.}"] 80 | fmt = ["ruff format {args:.}", "ruff --fix {args:.}", "style"] 81 | all = ["style", "typing"] 82 | 83 | [tool.ruff] 84 | target-version = "py38" 85 | line-length = 120 86 | select = [ 87 | "A", 88 | "ARG", 89 | "B", 90 | "C", 91 | "DTZ", 92 | "E", 93 | "EM", 94 | "F", 95 | "FBT", 96 | "I", 97 | "ICN", 98 | "ISC", 99 | "N", 100 | "PLC", 101 | "PLE", 102 | "PLR", 103 | "PLW", 104 | "Q", 105 | "RUF", 106 | "S", 107 | "T", 108 | "TID", 109 | "UP", 110 | "W", 111 | "YTT", 112 | ] 113 | ignore = [ 114 | # Allow non-abstract empty methods in abstract base classes 115 | "B027", 116 | # Allow boolean positional values in function calls, like `dict.get(... True)` 117 | "FBT003", 118 | # Ignore checks for possible passwords 119 | "S105", 120 | "S106", 121 | "S107", 122 | # Ignore complexity 123 | "C901", 124 | "PLR0911", 125 | "PLR0912", 126 | "PLR0913", 127 | "PLR0915", 128 | ] 129 | unfixable = [ 130 | # Don't touch unused imports 131 | "F401", 132 | ] 133 | 134 | [tool.ruff.isort] 135 | known-first-party = ["datasette_reconcile"] 136 | 137 | [tool.ruff.flake8-tidy-imports] 138 | ban-relative-imports = "all" 139 | 140 | [tool.ruff.per-file-ignores] 141 | # Tests can use magic values, assertions, and relative imports 142 | "tests/**/*" = ["PLR2004", "S101", "TID252"] 143 | 144 | [tool.coverage.run] 145 | source_pkgs = ["datasette_reconcile", "tests"] 146 | branch = true 147 | parallel = true 148 | omit = ["src/datasette_reconcile/__about__.py"] 149 | 150 | [tool.coverage.paths] 151 | datasette_reconcile = [ 152 | "src/datasette_reconcile", 153 | "*/datasette-reconcile/src/datasette_reconcile", 154 | ] 155 | tests = ["tests", "*/datasette-reconcile/tests"] 156 | 157 | [tool.coverage.report] 158 | exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] 159 | -------------------------------------------------------------------------------- /src/datasette_reconcile/__about__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023-present David Kane 2 | # 3 | # SPDX-License-Identifier: MIT 4 | __version__ = "0.6.3" 5 | -------------------------------------------------------------------------------- /src/datasette_reconcile/__init__.py: -------------------------------------------------------------------------------- 1 | from datasette import hookimpl 2 | 3 | from datasette_reconcile.reconcile import ReconcileAPI 4 | from datasette_reconcile.utils import check_config, check_permissions 5 | 6 | 7 | async def get_api(request, datasette): 8 | database = request.url_vars["db_name"] 9 | table = request.url_vars["db_table"] 10 | db = datasette.get_database(database) 11 | 12 | # get plugin configuration 13 | config = datasette.plugin_config("datasette-reconcile", database=database, table=table) 14 | config = await check_config(config, db, table) 15 | 16 | # check user can at least view this table 17 | await check_permissions( 18 | request, 19 | [ 20 | ("view-table", (database, table)), 21 | ("view-database", database), 22 | "view-instance", 23 | ], 24 | datasette, 25 | ) 26 | 27 | # get the reconciliation API and call it 28 | return ReconcileAPI(config, database, table, datasette) 29 | 30 | 31 | async def reconcile(request, datasette): 32 | reconcile_api = await get_api(request, datasette) 33 | return await reconcile_api.reconcile(request) 34 | 35 | 36 | async def properties(request, datasette): 37 | reconcile_api = await get_api(request, datasette) 38 | return await reconcile_api.properties(request) 39 | 40 | 41 | async def suggest_entity(request, datasette): 42 | reconcile_api = await get_api(request, datasette) 43 | return await reconcile_api.suggest_entity(request) 44 | 45 | 46 | async def suggest_property(request, datasette): 47 | reconcile_api = await get_api(request, datasette) 48 | return await reconcile_api.suggest_property(request) 49 | 50 | 51 | async def suggest_type(request, datasette): 52 | reconcile_api = await get_api(request, datasette) 53 | return await reconcile_api.suggest_type(request) 54 | 55 | 56 | @hookimpl 57 | def register_routes(): 58 | return [ 59 | (r"/(?P[^/]+)/(?P[^/]+?)/-/reconcile$", reconcile), 60 | (r"/(?P[^/]+)/(?P[^/]+?)/-/reconcile/extend/propose$", properties), 61 | (r"/(?P[^/]+)/(?P[^/]+?)/-/reconcile/suggest/entity$", suggest_entity), 62 | (r"/(?P[^/]+)/(?P[^/]+?)/-/reconcile/suggest/property$", suggest_property), 63 | (r"/(?P[^/]+)/(?P[^/]+?)/-/reconcile/suggest/type$", suggest_type), 64 | ] 65 | -------------------------------------------------------------------------------- /src/datasette_reconcile/reconcile.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from datasette.utils import escape_fts, escape_sqlite 4 | from datasette.utils.asgi import Response 5 | from fuzzywuzzy import fuzz 6 | 7 | from datasette_reconcile.settings import ( 8 | DEFAULT_IDENTIFER_SPACE, 9 | DEFAULT_LIMIT, 10 | DEFAULT_SCHEMA_SPACE, 11 | DEFAULT_TYPE, 12 | ) 13 | from datasette_reconcile.utils import get_select_fields, get_view_url 14 | 15 | 16 | class ReconcileAPI: 17 | api_version = "0.2" 18 | 19 | def __init__(self, config, database, table, datasette): 20 | self.config = config 21 | self.database = database 22 | self.db = datasette.get_database(database) 23 | self.table = table 24 | self.datasette = datasette 25 | 26 | async def reconcile(self, request): 27 | """ 28 | Takes a request and returns a response based on the queries. 29 | """ 30 | # work out if we are looking for queries 31 | post_vars = await request.post_vars() 32 | queries = post_vars.get("queries", request.args.get("queries")) 33 | extend = post_vars.get("extend", request.args.get("extend")) 34 | 35 | if queries: 36 | return self._response({q[0]: {"result": q[1]} async for q in self._reconcile_queries(json.loads(queries))}) 37 | elif extend: 38 | response = await self._extend(json.loads(extend)) 39 | return self._response(response) 40 | else: 41 | # if we're not then just return the service specification 42 | return self._response(await self._service_manifest(request)) 43 | 44 | async def properties(self, request): 45 | limit = request.args.get("limit", DEFAULT_LIMIT) 46 | type_ = request.args.get("type", DEFAULT_TYPE) 47 | 48 | return self._response( 49 | { 50 | "limit": limit, 51 | "type": type_, 52 | "properties": [{"id": p["id"], "name": p["name"]} async for p in self._get_properties()], 53 | } 54 | ) 55 | 56 | async def suggest_entity(self, request): 57 | prefix = request.args.get("prefix") 58 | cursor = int(request.args.get("cursor", 0)) 59 | 60 | name_field = self.config["name_field"] 61 | id_field = self.config.get("id_field", "id") 62 | query_sql = f""" 63 | select {escape_sqlite(id_field)} as id, {escape_sqlite(name_field)} as name 64 | from {escape_sqlite(self.table)} 65 | where {escape_sqlite(name_field)} like :search_query 66 | limit {DEFAULT_LIMIT} offset {cursor} 67 | """ # noqa: S608 68 | params = {"search_query": f"{prefix}%"} 69 | 70 | return self._response( 71 | {"result": [{"id": r["id"], "name": r["name"]} for r in await self.db.execute(query_sql, params)]} 72 | ) 73 | 74 | async def suggest_property(self, request): 75 | prefix = request.args.get("prefix") 76 | cursor = request.args.get("cursor", 0) 77 | 78 | properties = [ 79 | {"id": p["id"], "name": p["name"]} 80 | async for p in self._get_properties() 81 | if p["name"].startswith(prefix) or p["id"].startswith(prefix) 82 | ][cursor : cursor + DEFAULT_LIMIT] 83 | 84 | return self._response({"result": properties}) 85 | 86 | async def suggest_type(self, request): 87 | prefix = request.args.get("prefix") 88 | 89 | default_type = self.config.get("type_default", [DEFAULT_TYPE]) 90 | type_field = self.config.get("type_field") 91 | if type_field: 92 | query_sql = """ 93 | SELECT CASE WHEN {type_field} IS NULL THEN '{default_type}' ELSE {type_field} END as type 94 | FROM {from_clause} 95 | GROUP BY type 96 | """.format( # noqa: S608 97 | type_field=escape_sqlite(type_field), 98 | default_type=default_type[0]["id"], 99 | from_clause=escape_sqlite(self.table), 100 | ) 101 | types = [ 102 | { 103 | "id": r["type"], 104 | "name": r["type"], 105 | } 106 | for r in await self.db.execute(query_sql) 107 | ] 108 | else: 109 | types = default_type 110 | 111 | return self._response( 112 | { 113 | "result": [ 114 | type_ for type_ in types if prefix.lower() in type_["id"] or prefix.lower() in type_["name"] 115 | ][:DEFAULT_LIMIT] 116 | } 117 | ) 118 | 119 | async def _get_properties(self): 120 | column_descriptions = self.datasette.table_metadata(self.database, self.table).get("columns") or {} 121 | for column in await self.db.table_column_details(self.table): 122 | yield { 123 | "id": column.name, 124 | "name": column_descriptions.get(column.name, column.name), 125 | "type": column.type, 126 | } 127 | 128 | def _response(self, response): 129 | return Response.json( 130 | response, 131 | headers={ 132 | "Access-Control-Allow-Origin": "*", 133 | }, 134 | ) 135 | 136 | async def _extend(self, data): 137 | ids = data["ids"] 138 | data_properties = data["properties"] 139 | properties = {p["name"]: p async for p in self._get_properties()} 140 | id_field = self.config.get("id_field", "id") 141 | 142 | select_fields = [id_field] + [p["id"] for p in data_properties] 143 | 144 | query_sql = """ 145 | select {fields} 146 | from {table} 147 | where {where_clause} 148 | """.format( # noqa: S608 149 | table=escape_sqlite(self.table), 150 | where_clause=f"{escape_sqlite(id_field)} in ({','.join(['?'] * len(ids))})", 151 | fields=",".join([escape_sqlite(f) for f in select_fields]), 152 | ) 153 | query_results = await self.db.execute(query_sql, ids) 154 | 155 | rows = {} 156 | for row in query_results: 157 | values = {} 158 | for p in data_properties: 159 | property_ = properties[p["id"]] 160 | if property_["type"] == "INTEGER": 161 | values[p["id"]] = [{"int": row[p["id"]]}] 162 | elif property_["type"] == "FLOAT": 163 | values[p["id"]] = [{"float": row[p["id"]]}] 164 | else: 165 | values[p["id"]] = [{"str": row[p["id"]]}] 166 | 167 | rows[row[id_field]] = values 168 | 169 | response = { 170 | "meta": [{"id": p["id"], "name": properties[p["id"]]["name"]} for p in data_properties], 171 | "rows": rows, 172 | } 173 | 174 | return response 175 | 176 | async def _reconcile_queries(self, queries): 177 | select_fields = get_select_fields(self.config) 178 | for query_id, query in queries.items(): 179 | limit = min( 180 | query.get("limit", self.config.get("max_limit", DEFAULT_LIMIT)), 181 | self.config.get("max_limit", DEFAULT_LIMIT), 182 | ) 183 | 184 | where_clauses = ["1"] 185 | from_clause = escape_sqlite(self.table) 186 | order_by = "" 187 | params = {} 188 | if self.config["fts_table"]: 189 | # NB this will fail if the table name has non-alphanumeric 190 | # characters in and sqlite3 version < 3.30.0 191 | # see: https://www.sqlite.org/src/info/00e9a8f2730eb723 192 | from_clause = """ 193 | {table} 194 | inner join ( 195 | SELECT "rowid", "rank" 196 | FROM {fts_table} 197 | WHERE {fts_table} MATCH :search_query 198 | ) as "a" on {table}."rowid" = a."rowid" 199 | """.format( # noqa: S608 200 | table=escape_sqlite(self.table), 201 | fts_table=escape_sqlite(self.config["fts_table"]), 202 | ) 203 | order_by = "order by a.rank" 204 | params["search_query"] = escape_fts(query["query"]) 205 | else: 206 | where_clauses.append( 207 | "{search_col} like :search_query".format( 208 | search_col=escape_sqlite(self.config["name_field"]), 209 | ) 210 | ) 211 | params["search_query"] = f"%{query['query']}%" 212 | 213 | types = query.get("type", []) 214 | if not isinstance(types, list) and types: 215 | types = [types] 216 | type_field = self.config.get("type_field") 217 | if types and type_field: 218 | type_values = {f"type_value{index}": t for index, t in enumerate(types)} 219 | where_clauses.append( 220 | "{type_field} in ({type_values})".format( 221 | type_field=escape_sqlite(type_field), 222 | type_values=", ".join([f":{value}" for value in type_values.keys()]), 223 | ) 224 | ) 225 | params = {**params, **type_values} 226 | 227 | if query.get("properties"): 228 | for index, prop in enumerate(query["properties"]): 229 | if prop["v"]: 230 | property_id = prop["pid"] 231 | property_values = prop["v"] 232 | if not isinstance(property_values, list): 233 | property_values = [property_values] 234 | property_values = {f"property_value{index}_{i}": v for i, v in enumerate(property_values)} 235 | where_clauses.append( 236 | "{property_id} in ({property_values})".format( 237 | property_id=escape_sqlite(property_id), 238 | property_values=", ".join([f":{value}" for value in property_values.keys()]), 239 | ) 240 | ) 241 | params = {**params, **property_values} 242 | 243 | query_sql = """ 244 | SELECT {select_fields} 245 | FROM {from_clause} 246 | WHERE {where_clause} {order_by} 247 | LIMIT {limit}""".format( # noqa: S608 248 | select_fields=",".join([escape_sqlite(f) for f in select_fields]), 249 | from_clause=from_clause, 250 | where_clause=" and ".join(where_clauses), 251 | order_by=order_by, 252 | limit=limit, 253 | ) 254 | query_results = [self._get_query_result(r, query) for r in await self.db.execute(query_sql, params)] 255 | query_results = sorted(query_results, key=lambda x: -x["score"]) 256 | yield query_id, query_results 257 | 258 | def _get_query_result(self, row, query): 259 | row = dict(row) 260 | 261 | name = str(row.pop(self.config["name_field"])) 262 | name_match = str(name).lower().strip() 263 | query_match = str(query["query"]).lower().strip() 264 | 265 | type_ = self.config.get("type_default", [DEFAULT_TYPE]) 266 | type_field = self.config.get("type_field") 267 | if type_field and type_field in row: 268 | type_value = row.pop(type_field) 269 | type_ = [ 270 | { 271 | "id": type_value, 272 | "name": type_value, 273 | } 274 | ] 275 | 276 | id_value = str(row.pop(self.config["id_field"])) 277 | 278 | result = { 279 | "id": id_value, 280 | "name": name, 281 | "type": type_, 282 | "score": fuzz.ratio(name_match, query_match), 283 | "match": name_match == query_match, 284 | } 285 | if self.config["description_field"]: 286 | result["description"] = str(row.pop(self.config["description_field"])) 287 | 288 | return result 289 | 290 | async def _service_manifest(self, request): 291 | # @todo: if type_field is set then get a list of types to use in the "defaultTypes" item below. 292 | # handle X-FORWARDED-PROTO in Datasette: https://github.com/simonw/datasette/issues/2215 293 | scheme = request.scheme 294 | if "x-forwarded-proto" in request.headers: 295 | scheme = request.headers.get("x-forwarded-proto") 296 | 297 | base_url = f'{scheme}://{request.host}{self.datasette.setting("base_url")}' 298 | if not base_url.endswith("/"): 299 | base_url += "/" 300 | 301 | service_url = f"{base_url}{self.database}/{self.table}/-/reconcile" 302 | 303 | view_url = self.config.get("view_url") 304 | if not view_url: 305 | view_url = f"{scheme}://{request.host}{get_view_url(self.datasette, self.database, self.table)}" 306 | 307 | properties = self._get_properties() 308 | 309 | manifest = { 310 | "versions": ["0.1", "0.2"], 311 | "name": self.config.get( 312 | "service_name", 313 | f"{self.database} {self.table} reconciliation", 314 | ), 315 | "identifierSpace": self.config.get("identifierSpace", DEFAULT_IDENTIFER_SPACE), 316 | "schemaSpace": self.config.get("schemaSpace", DEFAULT_SCHEMA_SPACE), 317 | "defaultTypes": self.config.get("type_default", [DEFAULT_TYPE]), 318 | "view": {"url": view_url}, 319 | "extend": { 320 | "propose_properties": ( 321 | { 322 | "service_url": service_url, 323 | "service_path": "/extend/propose", 324 | } 325 | if self.api_version in ["0.1", "0.2"] 326 | else True 327 | ), 328 | "property_settings": [ 329 | { 330 | "name": p["id"], 331 | "label": p["name"], 332 | "type": "number" if p["type"] in ["INTEGER", "FLOAT"] else "text", 333 | } 334 | async for p in properties 335 | ], 336 | }, 337 | "suggest": { 338 | "entity": ( 339 | { 340 | "service_url": service_url, 341 | "service_path": "/suggest/entity", 342 | } 343 | if self.api_version in ["0.1", "0.2"] 344 | else True 345 | ), 346 | "type": ( 347 | { 348 | "service_url": service_url, 349 | "service_path": "/suggest/type", 350 | } 351 | if self.api_version in ["0.1", "0.2"] 352 | else True 353 | ), 354 | "property": ( 355 | { 356 | "service_url": service_url, 357 | "service_path": "/suggest/property", 358 | } 359 | if self.api_version in ["0.1", "0.2"] 360 | else True 361 | ), 362 | }, 363 | } 364 | 365 | return manifest 366 | -------------------------------------------------------------------------------- /src/datasette_reconcile/settings.py: -------------------------------------------------------------------------------- 1 | DEFAULT_LIMIT = 5 2 | DEFAULT_TYPE = { 3 | "name": "Object", 4 | "id": "object", 5 | } 6 | DEFAULT_IDENTIFER_SPACE = "http://rdf.freebase.com/ns/type.object.id" 7 | DEFAULT_SCHEMA_SPACE = "http://rdf.freebase.com/ns/type.object.id" 8 | SQLITE_VERSION_WARNING = (3, 30, 0) 9 | SUPPORTED_API_VERSIONS = ["0.1", "0.2"] 10 | -------------------------------------------------------------------------------- /src/datasette_reconcile/utils.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import warnings 3 | 4 | from datasette.utils import HASH_LENGTH 5 | from datasette.utils.asgi import Forbidden, NotFound 6 | 7 | from datasette_reconcile.settings import DEFAULT_TYPE, SQLITE_VERSION_WARNING 8 | 9 | PERMISSION_TUPLE_SIZE = 2 10 | 11 | 12 | class ReconcileError(Exception): 13 | pass 14 | 15 | 16 | async def check_permissions(request, permissions, ds): 17 | "permissions is a list of (action, resource) tuples or 'action' strings" 18 | "from https://github.com/simonw/datasette/blob/main/datasette/views/base.py#L69" 19 | for permission in permissions: 20 | if isinstance(permission, str): 21 | action = permission 22 | resource = None 23 | elif isinstance(permission, (tuple, list)) and len(permission) == PERMISSION_TUPLE_SIZE: 24 | action, resource = permission 25 | else: 26 | msg = f"permission should be string or tuple of two items: {permission!r}" 27 | raise AssertionError(msg) 28 | ok = await ds.permission_allowed( 29 | request.actor, 30 | action, 31 | resource=resource, 32 | default=None, 33 | ) 34 | if ok is not None: 35 | if ok: 36 | return 37 | else: 38 | raise Forbidden(action) 39 | 40 | 41 | async def check_config(config, db, table): 42 | is_view = bool(await db.get_view_definition(table)) 43 | table_exists = bool(await db.table_exists(table)) 44 | if not is_view and not table_exists: 45 | msg = f"Table not found: {table}" 46 | raise NotFound(msg) 47 | 48 | if not config: 49 | msg = f"datasette-reconcile not configured for table {table} in database {db!s}" 50 | raise NotFound(msg) 51 | 52 | pks = await db.primary_keys(table) 53 | if not pks: 54 | pks = ["rowid"] 55 | 56 | if "id_field" not in config and len(pks) == 1: 57 | config["id_field"] = pks[0] 58 | elif "id_field" not in config: 59 | msg = "Could not determine an ID field to use" 60 | raise ReconcileError(msg) 61 | if "name_field" not in config: 62 | msg = "Name field must be defined to activate reconciliation" 63 | raise ReconcileError(msg) 64 | if "description_field" not in config: 65 | config["description_field"] = None 66 | if "type_field" not in config and "type_default" not in config: 67 | config["type_default"] = [DEFAULT_TYPE] 68 | 69 | if "max_limit" in config and not isinstance(config["max_limit"], int): 70 | msg = "max_limit in reconciliation config must be an integer" 71 | raise TypeError(msg) 72 | if "type_default" in config: 73 | if not isinstance(config["type_default"], list): 74 | msg = "type_default should be a list of objects" 75 | raise ReconcileError(msg) 76 | for t in config["type_default"]: 77 | if not isinstance(t, dict): 78 | msg = "type_default values should be objects" 79 | raise ReconcileError(msg) 80 | if not isinstance(t.get("id"), str): 81 | msg = "type_default 'id' values should be strings" 82 | raise ReconcileError(msg) 83 | if not isinstance(t.get("name"), str): 84 | msg = "type_default 'name' values should be strings" 85 | raise ReconcileError(msg) 86 | 87 | if "view_url" in config: 88 | if "{{id}}" not in config["view_url"]: 89 | msg = "View URL must contain {{id}}" 90 | raise ReconcileError(msg) 91 | 92 | if "fts_table" not in config: 93 | config["fts_table"] = await db.fts_table(table) 94 | 95 | # let's show a warning if sqlite3 version is less than 3.30.0 96 | # full text search results will fail for < 3.30.0 if the table 97 | # name contains special characters 98 | if config["fts_table"] and ( 99 | ( 100 | sqlite3.sqlite_version_info[0] == SQLITE_VERSION_WARNING[0] 101 | and sqlite3.sqlite_version_info[1] < SQLITE_VERSION_WARNING[1] 102 | ) 103 | or sqlite3.sqlite_version_info[0] < SQLITE_VERSION_WARNING[0] 104 | ): 105 | warnings.warn( 106 | "Full Text Search queries for sqlite3 version < 3.30.0 wil fail if table name contains special characters", 107 | stacklevel=2, 108 | ) 109 | 110 | return config 111 | 112 | 113 | def get_select_fields(config): 114 | select_fields = [config["id_field"], config["name_field"], *config.get("additional_fields", [])] 115 | if config.get("type_field"): 116 | select_fields.append(config["type_field"]) 117 | if config.get("description_field"): 118 | select_fields.append(config["description_field"]) 119 | return select_fields 120 | 121 | 122 | def get_view_url(ds, database, table): 123 | id_str = "{{id}}" 124 | if hasattr(ds, "urls"): 125 | return ds.urls.row(database, table, id_str) 126 | db = ds.databases[database] 127 | base_url = ds.config("base_url") 128 | if ds.config("hash_urls") and db.hash: 129 | return f"{base_url}{database}-{db.hash[:HASH_LENGTH]}/{table}/{id_str}" 130 | else: 131 | return f"{base_url}{database}/{table}/{id_str}" 132 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023-present David Kane 2 | # 3 | # SPDX-License-Identifier: MIT 4 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | 5 | import pytest 6 | import sqlite_utils 7 | from datasette.app import Datasette 8 | from referencing import Registry 9 | from referencing.jsonschema import DRAFT7 10 | 11 | from datasette_reconcile.settings import SUPPORTED_API_VERSIONS 12 | 13 | SCHEMA_DIR = os.path.join( 14 | os.path.dirname(__file__), 15 | "../specs", 16 | ) 17 | 18 | 19 | def create_db(tmp_path_factory, enable_fts): 20 | db_directory = tmp_path_factory.mktemp("dbs") 21 | db_path = db_directory / "test.db" 22 | db = sqlite_utils.Database(db_path) 23 | db["dogs"].insert_all( 24 | [ 25 | {"id": 1, "name": "Cleo", "age": 5, "status": "good dog"}, 26 | {"id": 2, "name": "Pancakes", "age": 4, "status": "bad dog"}, 27 | {"id": 3, "name": "Fido", "age": 3, "status": "bad dog"}, 28 | {"id": 4, "name": "Scratch", "age": 3, "status": "good dog"}, 29 | {"id": 5, "name": "Pancakes", "age": 5, "status": "bad dog"}, 30 | ], 31 | pk="id", 32 | ) 33 | 34 | if enable_fts: 35 | db["dogs"].enable_fts(["name"]) 36 | 37 | return db_path 38 | 39 | 40 | def plugin_metadata(metadata=None): 41 | to_return = {"databases": {"test": {"tables": {"dogs": {"title": "Some dogs"}}}}} 42 | if isinstance(metadata, dict): 43 | to_return["databases"]["test"]["tables"]["dogs"]["plugins"] = {"datasette-reconcile": metadata} 44 | return to_return 45 | 46 | 47 | def get_schema(filename): 48 | schemas = {} 49 | for f in os.scandir(SCHEMA_DIR): 50 | if not f.is_dir(): 51 | continue 52 | if f.name not in SUPPORTED_API_VERSIONS: 53 | continue 54 | schema_path = os.path.join(f.path, "schemas", filename) 55 | if os.path.exists(schema_path): 56 | with open(schema_path, encoding="utf8") as schema_file: 57 | schemas[f.name] = json.load(schema_file) 58 | return schemas 59 | 60 | 61 | @pytest.fixture(scope="session") 62 | def ds(tmp_path_factory): 63 | ds = Datasette([create_db(tmp_path_factory, False)], metadata=plugin_metadata()) 64 | return ds 65 | 66 | 67 | @pytest.fixture(scope="session") 68 | def db_path(tmp_path_factory): 69 | return create_db(tmp_path_factory, False) 70 | 71 | 72 | @pytest.fixture(scope="session") 73 | def ds_fts(tmp_path_factory): 74 | ds = Datasette([create_db(tmp_path_factory, True)], metadata=plugin_metadata()) 75 | return ds 76 | 77 | 78 | @pytest.fixture(scope="session") 79 | def db_path_fts(tmp_path_factory): 80 | return create_db(tmp_path_factory, True) 81 | 82 | 83 | def retrieve_schema_from_filesystem(uri: str): 84 | recon_schema = re.match( 85 | r"https://reconciliation-api\.github\.io/specs/(.*)/schemas/(.*\.json)", 86 | uri, 87 | ) 88 | if recon_schema: 89 | schema_version = recon_schema.group(1) 90 | schema_file = recon_schema.group(2) 91 | return DRAFT7.create_resource(get_schema(schema_file)[schema_version]) 92 | 93 | msg = f"Unknown URI {uri}" 94 | raise ValueError(msg) 95 | 96 | 97 | registry = Registry(retrieve=retrieve_schema_from_filesystem) 98 | 99 | 100 | def do_method(client, method, *args, **kwargs): 101 | if method == "post": 102 | return client.post(*args, **kwargs) 103 | kwargs["params"] = kwargs.pop("data") 104 | return client.get(*args, **kwargs) 105 | -------------------------------------------------------------------------------- /tests/test_reconcile.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import httpx 4 | import pytest 5 | from datasette.app import Datasette 6 | 7 | from tests.conftest import do_method, plugin_metadata 8 | 9 | 10 | @pytest.mark.asyncio 11 | async def test_plugin_is_installed(): 12 | app = Datasette([], memory=True).app() 13 | async with httpx.AsyncClient(app=app) as client: 14 | response = await client.get("http://localhost/-/plugins.json") 15 | assert 200 == response.status_code 16 | installed_plugins = {p["name"] for p in response.json()} 17 | assert "datasette-reconcile" in installed_plugins 18 | 19 | 20 | @pytest.mark.asyncio 21 | async def test_response_not_configured(db_path): 22 | app = Datasette([db_path]).app() 23 | async with httpx.AsyncClient(app=app) as client: 24 | response = await client.get("http://localhost/test/dogs/-/reconcile") 25 | assert 404 == response.status_code 26 | 27 | 28 | @pytest.mark.asyncio 29 | async def test_response_without_query(db_path): 30 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 31 | async with httpx.AsyncClient(app=app) as client: 32 | response = await client.get("http://localhost/test/dogs/-/reconcile") 33 | assert 200 == response.status_code 34 | data = response.json() 35 | assert "name" in data.keys() 36 | assert isinstance(data["defaultTypes"], list) 37 | assert len(data["defaultTypes"]) == 1 38 | assert data["defaultTypes"][0]["id"] == "object" 39 | assert data["defaultTypes"][0]["name"] == "Object" 40 | assert data["view"]["url"].startswith("http") 41 | assert response.headers["Access-Control-Allow-Origin"] == "*" 42 | 43 | 44 | @pytest.mark.asyncio 45 | async def test_servce_manifest_view_url_default(db_path): 46 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 47 | async with httpx.AsyncClient(app=app) as client: 48 | response = await client.get("http://localhost/test/dogs/-/reconcile") 49 | assert 200 == response.status_code 50 | data = response.json() 51 | assert data["view"]["url"] == "http://localhost/test/dogs/{{id}}" 52 | 53 | 54 | @pytest.mark.asyncio 55 | async def test_servce_manifest_https(db_path): 56 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 57 | async with httpx.AsyncClient(app=app) as client: 58 | response = await client.get("https://localhost/test/dogs/-/reconcile") 59 | assert 200 == response.status_code 60 | data = response.json() 61 | assert data["view"]["url"] == "https://localhost/test/dogs/{{id}}" 62 | 63 | 64 | @pytest.mark.asyncio 65 | async def test_servce_manifest_x_forwarded_proto_https(db_path): 66 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 67 | async with httpx.AsyncClient(app=app) as client: 68 | response = await client.get("http://localhost/test/dogs/-/reconcile", headers={"x-forwarded-proto": "https"}) 69 | assert 200 == response.status_code 70 | data = response.json() 71 | assert data["view"]["url"] == "https://localhost/test/dogs/{{id}}" 72 | 73 | 74 | @pytest.mark.asyncio 75 | async def test_servce_manifest_view_url_custom(db_path): 76 | custom_view_url = "https://example.com/{{id}}" 77 | app = Datasette( 78 | [db_path], 79 | metadata=plugin_metadata( 80 | { 81 | "name_field": "name", 82 | "view_url": custom_view_url, 83 | } 84 | ), 85 | ).app() 86 | async with httpx.AsyncClient(app=app) as client: 87 | response = await client.get("http://localhost/test/dogs/-/reconcile") 88 | assert 200 == response.status_code 89 | data = response.json() 90 | assert data["view"]["url"] == custom_view_url 91 | 92 | 93 | @pytest.mark.asyncio 94 | async def test_servce_manifest_view_extend(db_path): 95 | app = Datasette( 96 | [db_path], 97 | metadata=plugin_metadata({"name_field": "name"}), 98 | ).app() 99 | async with httpx.AsyncClient(app=app) as client: 100 | response = await client.get("http://localhost/test/dogs/-/reconcile") 101 | assert 200 == response.status_code 102 | data = response.json() 103 | assert "extend" in data 104 | assert data["extend"]["propose_properties"]["service_url"] == "http://localhost/test/dogs/-/reconcile" 105 | assert data["extend"]["property_settings"][3]["name"] == "status" 106 | 107 | 108 | @pytest.mark.asyncio 109 | @pytest.mark.parametrize("suggest_type", ["entity", "type", "property"]) 110 | async def test_servce_manifest_view_suggest(db_path, suggest_type): 111 | app = Datasette( 112 | [db_path], 113 | metadata=plugin_metadata({"name_field": "name"}), 114 | ).app() 115 | async with httpx.AsyncClient(app=app) as client: 116 | response = await client.get("http://localhost/test/dogs/-/reconcile") 117 | assert 200 == response.status_code 118 | data = response.json() 119 | assert "extend" in data 120 | assert data["suggest"][suggest_type]["service_url"] == "http://localhost/test/dogs/-/reconcile" 121 | assert data["suggest"][suggest_type]["service_path"] == f"/suggest/{suggest_type}" 122 | assert len(data["suggest"]) == 3 123 | 124 | 125 | @pytest.mark.asyncio 126 | @pytest.mark.parametrize("method", ["post", "get"]) 127 | async def test_response_queries(db_path, method): 128 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 129 | async with httpx.AsyncClient(app=app) as client: 130 | response = await do_method( 131 | client, 132 | method, 133 | "http://localhost/test/dogs/-/reconcile", 134 | data={"queries": json.dumps({"q0": {"query": "fido"}})}, 135 | ) 136 | assert 200 == response.status_code 137 | data = response.json() 138 | assert "q0" in data.keys() 139 | assert len(data["q0"]["result"]) == 1 140 | result = data["q0"]["result"][0] 141 | assert result["id"] == "3" 142 | assert result["name"] == "Fido" 143 | assert result["score"] == 100 144 | assert result["type"] == [ 145 | { 146 | "name": "Object", 147 | "id": "object", 148 | } 149 | ] 150 | assert "description" not in result 151 | assert response.headers["Access-Control-Allow-Origin"] == "*" 152 | 153 | 154 | @pytest.mark.asyncio 155 | @pytest.mark.parametrize("method", ["post", "get"]) 156 | async def test_response_queries_with_properties(db_path, method): 157 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 158 | async with httpx.AsyncClient(app=app) as client: 159 | response = await do_method( 160 | client, 161 | method, 162 | "http://localhost/test/dogs/-/reconcile", 163 | data={"queries": json.dumps({"q0": {"query": "pancakes", "properties": [{"pid": "age", "v": 5}]}})}, 164 | ) 165 | assert 200 == response.status_code 166 | data = response.json() 167 | assert "q0" in data.keys() 168 | assert len(data["q0"]["result"]) == 1 169 | result = data["q0"]["result"][0] 170 | assert result["id"] == "5" 171 | assert result["name"] == "Pancakes" 172 | assert result["score"] == 100 173 | assert result["type"] == [ 174 | { 175 | "name": "Object", 176 | "id": "object", 177 | } 178 | ] 179 | assert "description" not in result 180 | assert response.headers["Access-Control-Allow-Origin"] == "*" 181 | 182 | 183 | @pytest.mark.asyncio 184 | @pytest.mark.parametrize("method", ["post", "get"]) 185 | async def test_response_queries_without_properties(db_path, method): 186 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 187 | async with httpx.AsyncClient(app=app) as client: 188 | response = await do_method( 189 | client, 190 | method, 191 | "http://localhost/test/dogs/-/reconcile", 192 | data={"queries": json.dumps({"q0": {"query": "pancakes"}})}, 193 | ) 194 | assert 200 == response.status_code 195 | data = response.json() 196 | assert "q0" in data.keys() 197 | assert len(data["q0"]["result"]) == 2 198 | result = data["q0"]["result"][0] 199 | assert result["id"] in ("2", "5") 200 | assert result["name"] == "Pancakes" 201 | assert result["score"] == 100 202 | assert result["type"] == [ 203 | { 204 | "name": "Object", 205 | "id": "object", 206 | } 207 | ] 208 | assert "description" not in result 209 | assert response.headers["Access-Control-Allow-Origin"] == "*" 210 | 211 | 212 | @pytest.mark.asyncio 213 | @pytest.mark.parametrize("method", ["post", "get"]) 214 | async def test_response_queries_no_results(db_path, method): 215 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 216 | async with httpx.AsyncClient(app=app) as client: 217 | response = await do_method( 218 | client, 219 | method, 220 | "http://localhost/test/dogs/-/reconcile", 221 | data={"queries": json.dumps({"q0": {"query": "abcdef"}})}, 222 | ) 223 | assert 200 == response.status_code 224 | data = response.json() 225 | assert "q0" in data.keys() 226 | assert len(data["q0"]["result"]) == 0 227 | assert response.headers["Access-Control-Allow-Origin"] == "*" 228 | 229 | 230 | @pytest.mark.asyncio 231 | async def test_response_propose_properties(db_path): 232 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 233 | async with httpx.AsyncClient(app=app) as client: 234 | response = await client.get("http://localhost/test/dogs/-/reconcile/extend/propose?type=object") 235 | assert 200 == response.status_code 236 | data = response.json() 237 | assert len(data["properties"]) == 4 238 | result = data["properties"][3] 239 | assert result["name"] == "status" 240 | assert result["id"] == "status" 241 | assert response.headers["Access-Control-Allow-Origin"] == "*" 242 | 243 | 244 | @pytest.mark.asyncio 245 | @pytest.mark.parametrize("method", ["post", "get"]) 246 | async def test_response_extend(db_path, method): 247 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 248 | async with httpx.AsyncClient(app=app) as client: 249 | extend = {"extend": json.dumps({"ids": ["1", "2", "3", "4"], "properties": [{"id": "status"}, {"id": "age"}]})} 250 | response = await do_method(client, method, "http://localhost/test/dogs/-/reconcile", data=extend) 251 | assert 200 == response.status_code 252 | data = response.json() 253 | 254 | assert "meta" in data 255 | assert data["meta"][0]["id"] == "status" 256 | assert data["meta"][0]["name"] == "status" 257 | assert "rows" in data 258 | 259 | expect = { 260 | "1": "good dog", 261 | "2": "bad dog", 262 | "3": "bad dog", 263 | "4": "good dog", 264 | } 265 | 266 | for key in expect.keys(): 267 | assert data["rows"][key]["status"][0]["str"] == expect[key] 268 | 269 | expect_nums = { 270 | "1": 5, 271 | "2": 4, 272 | "3": 3, 273 | "4": 3, 274 | } 275 | 276 | for key in expect_nums.keys(): 277 | assert data["rows"][key]["age"][0]["int"] == expect_nums[key] 278 | 279 | assert response.headers["Access-Control-Allow-Origin"] == "*" 280 | 281 | 282 | @pytest.mark.asyncio 283 | async def test_response_suggest_entity(db_path): 284 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 285 | async with httpx.AsyncClient(app=app) as client: 286 | response = await client.get("http://localhost/test/dogs/-/reconcile/suggest/entity?prefix=f") 287 | assert 200 == response.status_code 288 | data = response.json() 289 | 290 | assert "result" in data 291 | assert data["result"][0]["id"] == 3 292 | assert data["result"][0]["name"] == "Fido" 293 | assert response.headers["Access-Control-Allow-Origin"] == "*" 294 | 295 | 296 | @pytest.mark.asyncio 297 | async def test_response_suggest_property(db_path): 298 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 299 | async with httpx.AsyncClient(app=app) as client: 300 | response = await client.get("http://localhost/test/dogs/-/reconcile/suggest/property?prefix=a") 301 | assert 200 == response.status_code 302 | data = response.json() 303 | 304 | assert "result" in data 305 | assert data["result"][0]["id"] == "age" 306 | assert data["result"][0]["name"] == "age" 307 | assert response.headers["Access-Control-Allow-Origin"] == "*" 308 | 309 | 310 | @pytest.mark.asyncio 311 | async def test_response_suggest_type_default(db_path): 312 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 313 | async with httpx.AsyncClient(app=app) as client: 314 | response = await client.get("http://localhost/test/dogs/-/reconcile/suggest/type?prefix=obj") 315 | assert 200 == response.status_code 316 | data = response.json() 317 | 318 | assert "result" in data 319 | assert len(data["result"]) == 1 320 | assert response.headers["Access-Control-Allow-Origin"] == "*" 321 | 322 | 323 | @pytest.mark.asyncio 324 | async def test_response_suggest_type_default_empty(db_path): 325 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 326 | async with httpx.AsyncClient(app=app) as client: 327 | response = await client.get("http://localhost/test/dogs/-/reconcile/suggest/type?prefix=xxx") 328 | assert 200 == response.status_code 329 | data = response.json() 330 | 331 | assert "result" in data 332 | assert len(data["result"]) == 0 333 | assert response.headers["Access-Control-Allow-Origin"] == "*" 334 | 335 | 336 | @pytest.mark.asyncio 337 | async def test_response_suggest_type_empty(db_path): 338 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name", "type_field": "status"})).app() 339 | async with httpx.AsyncClient(app=app) as client: 340 | response = await client.get("http://localhost/test/dogs/-/reconcile/suggest/type?prefix=xxx") 341 | assert 200 == response.status_code 342 | data = response.json() 343 | 344 | assert "result" in data 345 | assert len(data["result"]) == 0 346 | assert response.headers["Access-Control-Allow-Origin"] == "*" 347 | 348 | 349 | @pytest.mark.asyncio 350 | async def test_response_suggest_type_all(db_path): 351 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name", "type_field": "status"})).app() 352 | async with httpx.AsyncClient(app=app) as client: 353 | response = await client.get("http://localhost/test/dogs/-/reconcile/suggest/type?prefix=dog") 354 | assert 200 == response.status_code 355 | data = response.json() 356 | 357 | assert "result" in data 358 | assert len(data["result"]) == 2 359 | assert response.headers["Access-Control-Allow-Origin"] == "*" 360 | 361 | 362 | @pytest.mark.asyncio 363 | async def test_response_suggest_type_1(db_path): 364 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name", "type_field": "status"})).app() 365 | async with httpx.AsyncClient(app=app) as client: 366 | response = await client.get("http://localhost/test/dogs/-/reconcile/suggest/type?prefix=good") 367 | assert 200 == response.status_code 368 | data = response.json() 369 | 370 | assert "result" in data 371 | assert len(data["result"]) == 1 372 | assert response.headers["Access-Control-Allow-Origin"] == "*" 373 | 374 | 375 | @pytest.mark.asyncio 376 | @pytest.mark.parametrize("method", ["post", "get"]) 377 | async def test_response_queries_post_type(db_path, method): 378 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name", "type_field": "status"})).app() 379 | async with httpx.AsyncClient(app=app) as client: 380 | response = await do_method( 381 | client, 382 | method, 383 | "http://localhost/test/dogs/-/reconcile", 384 | data={"queries": json.dumps({"q0": {"query": "fido", "type": "bad dog"}})}, 385 | ) 386 | assert 200 == response.status_code 387 | data = response.json() 388 | assert "q0" in data.keys() 389 | assert len(data["q0"]["result"]) == 1 390 | result = data["q0"]["result"][0] 391 | assert result["id"] == "3" 392 | assert result["name"] == "Fido" 393 | assert result["score"] == 100 394 | assert result["type"] == [ 395 | { 396 | "name": "bad dog", 397 | "id": "bad dog", 398 | } 399 | ] 400 | assert response.headers["Access-Control-Allow-Origin"] == "*" 401 | 402 | 403 | @pytest.mark.asyncio 404 | @pytest.mark.parametrize("method", ["post", "get"]) 405 | async def test_response_queries_post_type_list(db_path, method): 406 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name", "type_field": "status"})).app() 407 | async with httpx.AsyncClient(app=app) as client: 408 | response = await do_method( 409 | client, 410 | method, 411 | "http://localhost/test/dogs/-/reconcile", 412 | data={"queries": json.dumps({"q0": {"query": "fido", "type": ["bad dog"]}})}, 413 | ) 414 | assert 200 == response.status_code 415 | data = response.json() 416 | assert "q0" in data.keys() 417 | assert len(data["q0"]["result"]) == 1 418 | result = data["q0"]["result"][0] 419 | assert result["id"] == "3" 420 | assert result["name"] == "Fido" 421 | assert result["score"] == 100 422 | assert result["type"] == [ 423 | { 424 | "name": "bad dog", 425 | "id": "bad dog", 426 | } 427 | ] 428 | assert response.headers["Access-Control-Allow-Origin"] == "*" 429 | 430 | 431 | @pytest.mark.asyncio 432 | @pytest.mark.parametrize("method", ["post", "get"]) 433 | async def test_response_queries_post_type_empty(db_path, method): 434 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name", "type_field": "status"})).app() 435 | async with httpx.AsyncClient(app=app) as client: 436 | response = await do_method( 437 | client, 438 | method, 439 | "http://localhost/test/dogs/-/reconcile", 440 | data={"queries": json.dumps({"q0": {"query": "fido", "type": ["good dog"]}})}, 441 | ) 442 | assert 200 == response.status_code 443 | data = response.json() 444 | assert "q0" in data.keys() 445 | assert len(data["q0"]["result"]) == 0 446 | assert response.headers["Access-Control-Allow-Origin"] == "*" 447 | 448 | 449 | @pytest.mark.asyncio 450 | @pytest.mark.parametrize("method", ["post", "get"]) 451 | async def test_response_queries_post_type_not_given(db_path, method): 452 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name", "type_field": "status"})).app() 453 | async with httpx.AsyncClient(app=app) as client: 454 | response = await do_method( 455 | client, 456 | method, 457 | "http://localhost/test/dogs/-/reconcile", 458 | data={"queries": json.dumps({"q0": {"query": "fido"}})}, 459 | ) 460 | assert 200 == response.status_code 461 | data = response.json() 462 | assert "q0" in data.keys() 463 | assert len(data["q0"]["result"]) == 1 464 | result = data["q0"]["result"][0] 465 | assert result["id"] == "3" 466 | assert result["name"] == "Fido" 467 | assert result["score"] == 100 468 | assert result["type"] == [ 469 | { 470 | "name": "bad dog", 471 | "id": "bad dog", 472 | } 473 | ] 474 | assert response.headers["Access-Control-Allow-Origin"] == "*" 475 | 476 | 477 | @pytest.mark.asyncio 478 | @pytest.mark.parametrize("method", ["post", "get"]) 479 | async def test_response_queries_description_field(db_path, method): 480 | app = Datasette( 481 | [db_path], 482 | metadata=plugin_metadata( 483 | { 484 | "name_field": "name", 485 | "description_field": "status", 486 | } 487 | ), 488 | ).app() 489 | async with httpx.AsyncClient(app=app) as client: 490 | response = await do_method( 491 | client, 492 | method, 493 | "http://localhost/test/dogs/-/reconcile", 494 | data={"queries": json.dumps({"q0": {"query": "fido"}})}, 495 | ) 496 | assert 200 == response.status_code 497 | data = response.json() 498 | assert "q0" in data.keys() 499 | assert len(data["q0"]["result"]) == 1 500 | result = data["q0"]["result"][0] 501 | assert result["id"] == "3" 502 | assert result["name"] == "Fido" 503 | assert result["score"] == 100 504 | assert result["description"] == "bad dog" 505 | assert response.headers["Access-Control-Allow-Origin"] == "*" 506 | -------------------------------------------------------------------------------- /tests/test_reconcile_config.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from datasette.utils.asgi import NotFound 3 | 4 | from datasette_reconcile.utils import ReconcileError, check_config 5 | 6 | 7 | @pytest.mark.asyncio 8 | async def test_plugin_configuration_missing(ds): 9 | with pytest.raises(NotFound, match="datasette-reconcile not configured .*"): 10 | await check_config({}, ds.get_database("test"), "dogs") 11 | 12 | 13 | @pytest.mark.asyncio 14 | async def test_plugin_configuration_no_name(ds): 15 | with pytest.raises(ReconcileError, match="Name field must be defined to activate reconciliation"): 16 | await check_config({"id_field": "id"}, ds.get_database("test"), "dogs") 17 | 18 | 19 | @pytest.mark.asyncio 20 | async def test_plugin_configuration_table_not_found(ds): 21 | with pytest.raises(NotFound, match="Table not found: test"): 22 | await check_config({"name_field": "name"}, ds.get_database("test"), "test") 23 | 24 | 25 | @pytest.mark.asyncio 26 | async def test_plugin_configuration_use_pk(ds): 27 | config = await check_config({"name_field": "name"}, ds.get_database("test"), "dogs") 28 | assert config["name_field"] == "name" 29 | assert config["id_field"] == "id" 30 | assert config["type_default"] == [ 31 | { 32 | "name": "Object", 33 | "id": "object", 34 | } 35 | ] 36 | assert "type_field" not in config 37 | 38 | 39 | @pytest.mark.asyncio 40 | async def test_plugin_configuration_max_limit(ds): 41 | with pytest.raises(TypeError, match="max_limit in reconciliation config must be an integer"): 42 | await check_config({"name_field": "name", "max_limit": "BLAH"}, ds.get_database("test"), "dogs") 43 | 44 | 45 | @pytest.mark.asyncio 46 | async def test_plugin_configuration_type_default(ds): 47 | with pytest.raises(ReconcileError, match="type_default should be a list of objects"): 48 | await check_config({"name_field": "name", "type_default": "BLAH"}, ds.get_database("test"), "dogs") 49 | with pytest.raises(ReconcileError, match="type_default values should be objects"): 50 | await check_config({"name_field": "name", "type_default": ["BLAH"]}, ds.get_database("test"), "dogs") 51 | with pytest.raises(ReconcileError, match="type_default 'id' values should be strings"): 52 | await check_config( 53 | {"name_field": "name", "type_default": [{"id": 1, "name": "test"}]}, ds.get_database("test"), "dogs" 54 | ) 55 | with pytest.raises(ReconcileError, match="type_default 'name' values should be strings"): 56 | await check_config( 57 | {"name_field": "name", "type_default": [{"name": 1, "id": "test"}]}, ds.get_database("test"), "dogs" 58 | ) 59 | 60 | 61 | @pytest.mark.asyncio 62 | async def test_plugin_configuration_use_id_field(ds): 63 | config = await check_config( 64 | { 65 | "name_field": "name", 66 | "id_field": "id", 67 | }, 68 | ds.get_database("test"), 69 | "dogs", 70 | ) 71 | assert config["name_field"] == "name" 72 | assert config["id_field"] == "id" 73 | assert config["type_default"] == [ 74 | { 75 | "name": "Object", 76 | "id": "object", 77 | } 78 | ] 79 | assert "type_field" not in config 80 | 81 | 82 | @pytest.mark.asyncio 83 | async def test_plugin_configuration_use_type_field(ds): 84 | config = await check_config( 85 | { 86 | "name_field": "name", 87 | "id_field": "id", 88 | "type_field": [ 89 | { 90 | "name": "Status", 91 | "id": "status", 92 | } 93 | ], 94 | }, 95 | ds.get_database("test"), 96 | "dogs", 97 | ) 98 | assert config["name_field"] == "name" 99 | assert config["id_field"] == "id" 100 | assert config["type_field"] == [ 101 | { 102 | "name": "Status", 103 | "id": "status", 104 | } 105 | ] 106 | assert "type_default" not in config 107 | 108 | 109 | @pytest.mark.asyncio 110 | async def test_plugin_configuration_use_type_default_incorrect(ds): 111 | with pytest.raises(ReconcileError, match="type_default should"): 112 | await check_config( 113 | { 114 | "name_field": "name", 115 | "id_field": "id", 116 | "type_default": "dog", 117 | }, 118 | ds.get_database("test"), 119 | "dogs", 120 | ) 121 | 122 | 123 | @pytest.mark.asyncio 124 | async def test_plugin_configuration_use_type_default(ds): 125 | config = await check_config( 126 | { 127 | "name_field": "name", 128 | "id_field": "id", 129 | "type_default": [ 130 | { 131 | "name": "Dog", 132 | "id": "dog", 133 | } 134 | ], 135 | }, 136 | ds.get_database("test"), 137 | "dogs", 138 | ) 139 | assert config["name_field"] == "name" 140 | assert config["id_field"] == "id" 141 | assert config["type_default"] == [ 142 | { 143 | "name": "Dog", 144 | "id": "dog", 145 | } 146 | ] 147 | assert "type_field" not in config 148 | 149 | 150 | @pytest.mark.asyncio 151 | async def test_plugin_configuration_use_fts_table_none(ds): 152 | config = await check_config( 153 | { 154 | "name_field": "name", 155 | "id_field": "id", 156 | "type_default": [ 157 | { 158 | "name": "Dog", 159 | "id": "dog", 160 | } 161 | ], 162 | }, 163 | ds.get_database("test"), 164 | "dogs", 165 | ) 166 | assert config["fts_table"] is None 167 | 168 | 169 | @pytest.mark.asyncio 170 | async def test_plugin_configuration_use_fts_table_config(ds): 171 | config = await check_config( 172 | { 173 | "name_field": "name", 174 | "id_field": "id", 175 | "fts_table": "dog_fts", 176 | "type_default": [ 177 | { 178 | "name": "Dog", 179 | "id": "dog", 180 | } 181 | ], 182 | }, 183 | ds.get_database("test"), 184 | "dogs", 185 | ) 186 | assert config["fts_table"] == "dog_fts" 187 | 188 | 189 | @pytest.mark.asyncio 190 | async def test_plugin_configuration_ignore_fts_table(ds_fts): 191 | config = await check_config( 192 | { 193 | "name_field": "name", 194 | "id_field": "id", 195 | "fts_table": "dog_blah_fts", 196 | "type_default": [ 197 | { 198 | "name": "Dog", 199 | "id": "dog", 200 | } 201 | ], 202 | }, 203 | ds_fts.get_database("test"), 204 | "dogs", 205 | ) 206 | assert config["fts_table"] == "dog_blah_fts" 207 | 208 | 209 | @pytest.mark.asyncio 210 | async def test_plugin_configuration_use_fts_table(ds_fts): 211 | config = await check_config( 212 | { 213 | "name_field": "name", 214 | "id_field": "id", 215 | "type_default": [ 216 | { 217 | "name": "Dog", 218 | "id": "dog", 219 | } 220 | ], 221 | }, 222 | ds_fts.get_database("test"), 223 | "dogs", 224 | ) 225 | assert config["fts_table"] == "dogs_fts" 226 | 227 | 228 | @pytest.mark.asyncio 229 | async def test_plugin_configuration_use_fts_table_ignore(ds_fts): 230 | config = await check_config( 231 | { 232 | "name_field": "name", 233 | "id_field": "id", 234 | "fts_table": None, 235 | "type_default": [ 236 | { 237 | "name": "Dog", 238 | "id": "dog", 239 | } 240 | ], 241 | }, 242 | ds_fts.get_database("test"), 243 | "dogs", 244 | ) 245 | assert config["fts_table"] is None 246 | 247 | 248 | @pytest.mark.asyncio 249 | async def test_view_url_set(ds): 250 | config = await check_config( 251 | { 252 | "name_field": "name", 253 | "id_field": "id", 254 | "view_url": "https://example.com/{{id}}", 255 | }, 256 | ds.get_database("test"), 257 | "dogs", 258 | ) 259 | assert config["view_url"] == "https://example.com/{{id}}" 260 | 261 | 262 | @pytest.mark.asyncio 263 | async def test_view_url_no_id(ds): 264 | with pytest.raises(ReconcileError, match="View URL must contain {{id}}"): 265 | _ = await check_config( 266 | { 267 | "name_field": "name", 268 | "id_field": "id", 269 | "view_url": "https://example.com/", 270 | }, 271 | ds.get_database("test"), 272 | "dogs", 273 | ) 274 | 275 | 276 | @pytest.mark.asyncio 277 | async def test_plugin_configuration_use_description_field(ds): 278 | config = await check_config( 279 | { 280 | "name_field": "name", 281 | "description_field": "status", 282 | }, 283 | ds.get_database("test"), 284 | "dogs", 285 | ) 286 | assert config["description_field"] == "status" 287 | 288 | 289 | @pytest.mark.asyncio 290 | async def test_plugin_configuration_no_description_field(ds): 291 | config = await check_config( 292 | { 293 | "name_field": "name", 294 | }, 295 | ds.get_database("test"), 296 | "dogs", 297 | ) 298 | assert config["description_field"] is None 299 | -------------------------------------------------------------------------------- /tests/test_reconcile_schema.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | 4 | import httpx 5 | import jsonschema 6 | import pytest 7 | from datasette.app import Datasette 8 | 9 | from tests.conftest import do_method, get_schema, plugin_metadata, registry 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | @pytest.mark.asyncio 15 | @pytest.mark.parametrize("schema_version, schema", get_schema("manifest.json").items()) 16 | async def test_schema_manifest(schema_version, schema, db_path): 17 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 18 | async with httpx.AsyncClient(app=app) as client: 19 | response = await client.get("http://localhost/test/dogs/-/reconcile") 20 | data = response.json() 21 | logging.info(f"Schema version: {schema_version}") 22 | jsonschema.validate( 23 | instance=data, 24 | schema=schema, 25 | cls=jsonschema.Draft7Validator, 26 | registry=registry, 27 | ) 28 | 29 | 30 | @pytest.mark.asyncio 31 | @pytest.mark.parametrize("schema_version, schema", get_schema("manifest.json").items()) 32 | async def test_schema_manifest_extend(schema_version, schema, db_path): 33 | app = Datasette( 34 | [db_path], 35 | metadata=plugin_metadata( 36 | {"name_field": "name", "properties": [{"name": "status", "label": "Status", "type": "text"}]} 37 | ), 38 | ).app() 39 | async with httpx.AsyncClient(app=app) as client: 40 | response = await client.get("http://localhost/test/dogs/-/reconcile") 41 | data = response.json() 42 | logging.info(f"Schema version: {schema_version}") 43 | jsonschema.validate( 44 | instance=data, 45 | schema=schema, 46 | cls=jsonschema.Draft7Validator, 47 | registry=registry, 48 | ) 49 | 50 | 51 | @pytest.mark.asyncio 52 | @pytest.mark.parametrize("schema_version, schema", get_schema("reconciliation-result-batch.json").items()) 53 | @pytest.mark.parametrize("method", ["post", "get"]) 54 | async def test_response_queries_schema(schema_version, schema, db_path, method): 55 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 56 | async with httpx.AsyncClient(app=app) as client: 57 | response = await do_method( 58 | client, 59 | method, 60 | "http://localhost/test/dogs/-/reconcile", 61 | data={"queries": json.dumps({"q0": {"query": "fido"}})}, 62 | ) 63 | assert 200 == response.status_code 64 | data = response.json() 65 | logging.info(f"Schema version: {schema_version}") 66 | jsonschema.validate( 67 | instance=data, 68 | schema=schema, 69 | cls=jsonschema.Draft7Validator, 70 | registry=registry, 71 | ) 72 | 73 | 74 | @pytest.mark.asyncio 75 | @pytest.mark.parametrize("schema_version, schema", get_schema("reconciliation-result-batch.json").items()) 76 | @pytest.mark.parametrize("method", ["post", "get"]) 77 | async def test_response_queries_schema_description_field(schema_version, schema, db_path, method): 78 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name", "description_field": "status"})).app() 79 | async with httpx.AsyncClient(app=app) as client: 80 | response = await do_method( 81 | client, 82 | method, 83 | "http://localhost/test/dogs/-/reconcile", 84 | data={"queries": json.dumps({"q0": {"query": "fido"}})}, 85 | ) 86 | assert 200 == response.status_code 87 | data = response.json() 88 | logging.info(f"Schema version: {schema_version}") 89 | jsonschema.validate( 90 | instance=data, 91 | schema=schema, 92 | cls=jsonschema.Draft7Validator, 93 | registry=registry, 94 | ) 95 | 96 | 97 | @pytest.mark.asyncio 98 | @pytest.mark.parametrize("schema_version, schema", get_schema("reconciliation-result-batch.json").items()) 99 | @pytest.mark.parametrize("method", ["post", "get"]) 100 | async def test_response_queries_no_results_schema(schema_version, schema, db_path, method): 101 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 102 | async with httpx.AsyncClient(app=app) as client: 103 | response = await do_method( 104 | client, 105 | method, 106 | "http://localhost/test/dogs/-/reconcile", 107 | data={"queries": json.dumps({"q0": {"query": "abcdef"}})}, 108 | ) 109 | assert 200 == response.status_code 110 | data = response.json() 111 | logging.info(f"Schema version: {schema_version}") 112 | jsonschema.validate( 113 | instance=data, 114 | schema=schema, 115 | cls=jsonschema.Draft7Validator, 116 | registry=registry, 117 | ) 118 | 119 | 120 | @pytest.mark.asyncio 121 | @pytest.mark.parametrize("schema_version, schema", get_schema("data-extension-response.json").items()) 122 | @pytest.mark.parametrize("method", ["post", "get"]) 123 | async def test_extend_schema(schema_version, schema, db_path, method): 124 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 125 | async with httpx.AsyncClient(app=app) as client: 126 | extend = {"extend": json.dumps({"ids": ["1", "2", "3", "4"], "properties": [{"id": "status"}, {"id": "age"}]})} 127 | response = await do_method( 128 | client, 129 | method, 130 | "http://localhost/test/dogs/-/reconcile", 131 | data=extend, 132 | ) 133 | assert 200 == response.status_code 134 | data = response.json() 135 | logging.info(f"Schema version: {schema_version}") 136 | jsonschema.validate( 137 | instance=data, 138 | schema=schema, 139 | cls=jsonschema.Draft7Validator, 140 | registry=registry, 141 | ) 142 | 143 | 144 | @pytest.mark.asyncio 145 | @pytest.mark.parametrize("schema_version, schema", get_schema("suggest-properties-response.json").items()) 146 | async def test_suggest_property_schema(schema_version, schema, db_path): 147 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 148 | async with httpx.AsyncClient(app=app) as client: 149 | response = await client.get("http://localhost/test/dogs/-/reconcile/suggest/property?prefix=a") 150 | assert 200 == response.status_code 151 | data = response.json() 152 | logging.info(f"Schema version: {schema_version}") 153 | jsonschema.validate( 154 | instance=data, 155 | schema=schema, 156 | cls=jsonschema.Draft7Validator, 157 | registry=registry, 158 | ) 159 | 160 | 161 | @pytest.mark.asyncio 162 | @pytest.mark.parametrize("schema_version, schema", get_schema("suggest-entities-response.json").items()) 163 | async def test_suggest_entity_schema(schema_version, schema, db_path): 164 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 165 | async with httpx.AsyncClient(app=app) as client: 166 | response = await client.get("http://localhost/test/dogs/-/reconcile/suggest/entity?prefix=a") 167 | assert 200 == response.status_code 168 | data = response.json() 169 | logging.info(f"Schema version: {schema_version}") 170 | jsonschema.validate( 171 | instance=data, 172 | schema=schema, 173 | cls=jsonschema.Draft7Validator, 174 | registry=registry, 175 | ) 176 | 177 | 178 | @pytest.mark.asyncio 179 | @pytest.mark.parametrize("schema_version, schema", get_schema("suggest-types-response.json").items()) 180 | async def test_suggest_type_schema(schema_version, schema, db_path): 181 | app = Datasette([db_path], metadata=plugin_metadata({"name_field": "name"})).app() 182 | async with httpx.AsyncClient(app=app) as client: 183 | response = await client.get("http://localhost/test/dogs/-/reconcile/suggest/type?prefix=a") 184 | assert 200 == response.status_code 185 | data = response.json() 186 | logging.info(f"Schema version: {schema_version}") 187 | jsonschema.validate( 188 | instance=data, 189 | schema=schema, 190 | cls=jsonschema.Draft7Validator, 191 | registry=registry, 192 | ) 193 | -------------------------------------------------------------------------------- /tests/test_reconcile_utils.py: -------------------------------------------------------------------------------- 1 | from datasette_reconcile.utils import get_select_fields 2 | 3 | 4 | def test_get_select_fields(): 5 | config = { 6 | "id_field": "id", 7 | "name_field": "name", 8 | "type_field": "type", 9 | "type_default": [{"id": "default", "name": "Default"}], 10 | } 11 | assert get_select_fields(config) == ["id", "name", "type"] 12 | 13 | 14 | def test_get_select_fields_description(): 15 | config = { 16 | "id_field": "id", 17 | "name_field": "name", 18 | "type_field": "type", 19 | "description_field": "description", 20 | "type_default": [{"id": "default", "name": "Default"}], 21 | } 22 | assert get_select_fields(config) == ["id", "name", "type", "description"] 23 | --------------------------------------------------------------------------------