├── tests ├── __init__.py ├── test_sdterms.csv ├── test_lindas.py ├── test_data.csv ├── test_sdterms.yml ├── test.yml ├── test_sdterms.py └── test_shared_dimension_generation.py ├── pylindas ├── getter │ ├── __init__.py │ └── get.py ├── lindas │ ├── __init__.py │ ├── validate.py │ ├── query.py │ ├── namespaces.py │ └── upload.py ├── pycube │ ├── __init__.py │ └── shared_dimension.py ├── pyshareddimension │ ├── __init__.py │ └── README.md ├── __init__.py ├── example.py ├── shared_dimension_queries │ ├── example_sd.py │ ├── README.md │ └── shared_dimensions_queries.py ├── description.schema.json ├── cli.py └── fetch.py ├── example ├── Cubes │ ├── kita │ │ ├── .gitignore │ │ ├── README.md │ │ ├── Makefile │ │ └── description.json │ ├── shared │ │ ├── bundeslander │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── Makefile │ │ │ ├── schema.json │ │ │ └── transform.py │ │ └── README.md │ ├── concept_table_airport │ │ ├── data.csv │ │ ├── airportdummyconcept.csv │ │ ├── airportconcept.csv │ │ ├── airport.py │ │ ├── description.yml │ │ ├── cube_with_concept.ttl │ │ └── README.md │ ├── wind │ │ ├── README.md │ │ ├── Makefile │ │ ├── data.csv │ │ ├── frictionless.json │ │ └── description.json │ ├── Population_Aargau │ │ ├── func.py │ │ ├── integration.py │ │ ├── age.csv │ │ ├── fetch.py │ │ ├── prepare.py │ │ └── description.yml │ ├── Biotope_Statistik │ │ ├── data.csv │ │ ├── biotope.py │ │ └── description.yml │ ├── greenhouse_limit │ │ ├── data.csv │ │ └── description.yml │ ├── mock │ │ ├── mock.py │ │ ├── mock-cube-cube.ttl │ │ ├── data.csv │ │ └── description.yml │ ├── co2-limits │ │ ├── data.csv │ │ └── description.yml │ └── corona │ │ └── description.json └── Shared_Dimensions │ └── shared_dimension_generation │ ├── sd_example_SHACL_result.ttl │ ├── sd_example.py │ ├── sd_terms.csv │ └── sd_description.yml ├── .prettierrc ├── scripts └── fuseki │ ├── start.sh │ └── config-mem.ttl ├── requirements.txt ├── docs ├── contributing.md ├── examples.md ├── uris.md ├── installation.md ├── concepts.md ├── roadmap.md ├── sd.md ├── cli.md ├── functionality.md └── yaml.md ├── README.md ├── LICENSE ├── pyproject.toml ├── .github └── workflows │ ├── ci.yaml │ └── publish-pypi.yml └── .gitignore /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pylindas/getter/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pylindas/lindas/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /example/Cubes/kita/.gitignore: -------------------------------------------------------------------------------- 1 | cube.ttl 2 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "tabWidth": 2, 3 | "useTabs": false 4 | } 5 | -------------------------------------------------------------------------------- /pylindas/pycube/__init__.py: -------------------------------------------------------------------------------- 1 | from pylindas.pycube.cube import Cube -------------------------------------------------------------------------------- /tests/test_sdterms.csv: -------------------------------------------------------------------------------- 1 | id,name 2 | 1,Zürich 3 | 2,Bern 4 | 3,Luzern -------------------------------------------------------------------------------- /example/Cubes/shared/bundeslander/.gitignore: -------------------------------------------------------------------------------- 1 | data.transformed.geojson 2 | data.pretty.geojson 3 | -------------------------------------------------------------------------------- /pylindas/pyshareddimension/__init__.py: -------------------------------------------------------------------------------- 1 | from pylindas.pyshareddimension.shared_dimension import SharedDimension -------------------------------------------------------------------------------- /example/Cubes/shared/README.md: -------------------------------------------------------------------------------- 1 | ## Shared dimensions 2 | 3 | This folder contains shared dimensions that can be used by other examples. 4 | -------------------------------------------------------------------------------- /example/Cubes/concept_table_airport/data.csv: -------------------------------------------------------------------------------- 1 | year,typeOfAirport,typeOfAirport2nd,measure 2 | 2001,A,a,12 3 | 2002,A,a,15 4 | 2001,B,b,19 5 | 2002,B,b,20 6 | 2001,A,dummy,15 -------------------------------------------------------------------------------- /scripts/fuseki/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker run --rm -p 3030:3030 -v $(pwd):/usr/share/data atomgraph/fuseki --config=/usr/share/data/scripts/fuseki/config-mem.ttl 4 | -------------------------------------------------------------------------------- /example/Cubes/wind/README.md: -------------------------------------------------------------------------------- 1 | Original: https://data.europa.eu/data/datasets/fc49eebf-3750-4c9c-a29e-6696eb644362/quality?locale=en&validate=90598e7e-5e72-403a-abb3-797165e1b487 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==2.1.3 2 | pandas==2.2.3 3 | pyshacl==0.26.0 4 | pystardog==0.17.0 5 | PyYAML==6.0.2 6 | rdflib==7.0.0 7 | requests==2.32.3 8 | sparql-dataframe==0.4 9 | SPARQLWrapper==2.0.0 10 | -------------------------------------------------------------------------------- /tests/test_lindas.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pylindas.pycube import Cube 3 | from rdflib import Graph 4 | 5 | class TestClass: 6 | def test_validate_cube(self): 7 | pass #test fora wrapper function? -------------------------------------------------------------------------------- /example/Cubes/shared/bundeslander/README.md: -------------------------------------------------------------------------------- 1 | # German states shapes 2 | 3 | ``` 4 | cd examples/shared/bundeslander 5 | make 6 | curl 'http://localhost:3030/dataset' -H 'Content-Type: text/turtle' -X POST -T data.ttl 7 | ``` 8 | -------------------------------------------------------------------------------- /example/Cubes/wind/Makefile: -------------------------------------------------------------------------------- 1 | cube.ttl: data.csv description.json 2 | echo "Navigating to the project root directory" 3 | cd ../.. && \ 4 | python cli.py serialize example/wind/ example/wind/cube.ttl --sep ";" --decimal "," -------------------------------------------------------------------------------- /example/Shared_Dimensions/shared_dimension_generation/sd_example_SHACL_result.ttl: -------------------------------------------------------------------------------- 1 | @prefix sh: . 2 | @prefix xsd: . 3 | 4 | [] a sh:ValidationReport ; 5 | sh:conforms true . 6 | 7 | -------------------------------------------------------------------------------- /example/Cubes/kita/README.md: -------------------------------------------------------------------------------- 1 | # Example Kita dataset 2 | 3 | This is an example working with the dataset "Children in day-to-day facilities by country" 4 | from the [European Open Data Portal](https://data.europa.eu/). 5 | 6 | ``` 7 | make cube.ttl 8 | ``` 9 | -------------------------------------------------------------------------------- /pylindas/__init__.py: -------------------------------------------------------------------------------- 1 | from pylindas.pycube import Cube 2 | from pylindas.getter.get import get_cube, get_observations 3 | from pylindas.lindas.namespaces import Namespaces 4 | from pylindas.lindas.upload import upload_ttl 5 | from pylindas.lindas.query import query_lindas 6 | 7 | __version__ = "0.6.5" 8 | -------------------------------------------------------------------------------- /pylindas/lindas/validate.py: -------------------------------------------------------------------------------- 1 | from pyshacl import validate 2 | from rdflib import Graph 3 | 4 | def validate(data_graph: Graph, shacl_graph: Graph) -> tuple: 5 | conforms, results_graph, text = validate(data_graph, shacl_graph=shacl_graph, abort_on_first=True, inference="none", advanced=True) 6 | return conforms, text -------------------------------------------------------------------------------- /example/Cubes/Population_Aargau/func.py: -------------------------------------------------------------------------------- 1 | def replace_with_shared_dimension(value): 2 | if value.startswith("C_"): 3 | return "https://ld.admin.ch/canton/" + value[2:] 4 | elif value.startswith("D_"): 5 | return "https://ld.admin.ch/district/" + value[2:] 6 | else: 7 | return "https://ld.admin.ch/municipality/" + value [2:] -------------------------------------------------------------------------------- /example/Cubes/wind/data.csv: -------------------------------------------------------------------------------- 1 | Jahr;Anzahl_inBetrieb_WKA_SH;Leistung_MW 2 | 2012;2194;3270,125 3 | 2013;2243;3634,157 4 | 2014;2583;4812,401 5 | 2015;2779;5638,156 6 | 2016;2942;6198,915 7 | 2017;2991;6598,15 8 | 2018;3007;6698,53 9 | 2019;3009;6718,22 10 | 2020;3025;6811,57 11 | 2021;2980;6939,67 12 | 2022;3040;7383,92 13 | 2023;3173;8505,24 14 | 2024;3170;8592,34 15 | -------------------------------------------------------------------------------- /example/Cubes/concept_table_airport/airportdummyconcept.csv: -------------------------------------------------------------------------------- 1 | typeOfAirportID,typeOfAirportSecondID,typeOfAirport_en,typeOfAirport_de,typeOfAirport_fr,position,description_en,description_fr,other_property_example 2 | A,dummy,Dummy airport,Dummy airport (de),Dummy airport (fr),3,A dummy airport type to test the matchings,A dummy airport type to test the matchings (fr),another property example for dummy airport 3 | -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing and Suggestions 2 | 3 | If you wish to contribute to this project, feel free to clone this repository and open a pull request to be reviewed and merged. 4 | 5 | Alternatively feel free to open an [issue](https://github.com/Kronmar-Bafu/lindas-pylindas/issues) with a question or a suggestion on what could be implemented. There is also a [roadmap](roadmap.md) for the further development of `pylindas`. 6 | -------------------------------------------------------------------------------- /docs/examples.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | There are multiple examples for cubes and shared dimensions in the [example](../example) folder. They usually consist of the following files: 4 | 5 | - .csv file for the tabular data 6 | - .yml file for the description of the cube 7 | - .py file for running `pylindas` 8 | - .ttl file as output after running `pylindas` 9 | 10 | For some examples, there are also files for different concepts. 11 | -------------------------------------------------------------------------------- /example/Cubes/concept_table_airport/airportconcept.csv: -------------------------------------------------------------------------------- 1 | typeOfAirportID,typeOfAirportSecondID,typeOfAirport_en,typeOfAirport_de,typeOfAirport_fr,position,description_en,description_fr,other_property_example 2 | A,a,Domestic airport,Inlandflughafen,Aéroport national,1,Domestic airport description,Description de Aéroport national,another property example for domesctic airport 3 | B,b,International airport,Internationaler Flughafen,Aéroport international,2,International airport description,Description de Aéroport international,another property example for international airport -------------------------------------------------------------------------------- /example/Cubes/kita/Makefile: -------------------------------------------------------------------------------- 1 | all: cube.ttl 2 | 3 | data.csv: 4 | echo "1. Download CSV from https://data.europa.eu/data/datasets/https-www-datenportal-bmbf-de-portal-2-2-5?locale=en" 5 | # Add your download command here 6 | echo "2. Clean the file (removing merged cells, removing rows about the header, removing extra Anzahl grouping columns)" 7 | echo "3. Export as CSV" 8 | 9 | cube.ttl: data.csv description.json 10 | echo "Navigating to the project root directory" 11 | cd ../.. && \ 12 | python cli.py serialize example/kita/ example/kita/cube.ttl --na_value "-" 13 | -------------------------------------------------------------------------------- /tests/test_data.csv: -------------------------------------------------------------------------------- 1 | Jahr,Station,Wert,LowerUnsicherheit,UpperUnsicherheit,Wert2,Standardfehler,Status 2 | 2000,Bern,23.0,1.0,10.0,11.5,5,final 3 | 2000,Zürich,23.555744036232408,1.0,10.0,11.6,5,final 4 | 2000,Schweiz,23.1,1.0,11,4,12.0,final 5 | 2001,Bern,21.536090723505524,1.0,10.0,10.75,5,final 6 | 2001,Zürich,21.659924330021255,1.0,10.0,10.3,5,final 7 | 2001,Schweiz,21.1,1.0,10.0,10.3,11.3,final 8 | 2002,Bern,22.575144684250287,2.0,10.0,11.25,5,provisionally 9 | 2002,Zürich,20.688211936144263,2.0,10.0,10.3,5,provisionally 10 | 2002,Schweiz,21.4,2.1,4.3,41.2,8,provisionally -------------------------------------------------------------------------------- /tests/test_sdterms.yml: -------------------------------------------------------------------------------- 1 | Identifier: test_canton 2 | Name: 3 | en: cantons 4 | fr: cantons 5 | de: cantons 6 | it: cantons 7 | # Description is optional 8 | Description: 9 | fr: cantons 10 | en: cantons 11 | # Valid-from is optional, it is a date/time value 12 | # Note: it is currently optional, but might need to become mandatory as validFrom, and later validThrough, are used to make a SD and its term 'deprecated' 13 | Valid-from: 2025-02-05T00:00:00Z 14 | # Contributor is optional, it is now added by the Cube Creator when creating a new SD 15 | Contributor: 16 | name: Joshua Hirt 17 | email: joshua.hirt@bafu.admin.ch 18 | Terms: 19 | identifier-field: id 20 | name-field: name 21 | multilingual: False 22 | -------------------------------------------------------------------------------- /docs/uris.md: -------------------------------------------------------------------------------- 1 | # URIs 2 | 3 | It is important to understand, how the settings in the `description.yaml` file determine the different URIs of the cube: 4 | 5 | For the following settings: 6 | 7 | ```yaml 8 | Base-URI: https://environment.ld.admin.ch/foen/ 9 | Identifier: wps 10 | Version: 1 11 | ``` 12 | 13 | the following URIs will result: 14 | 15 | - Cube: https://environment.ld.admin.ch/foen/cube/wps/1 16 | - Observation Set: https://environment.ld.admin.ch/foen/cube/wps/1/ObservationSet 17 | - Observation Constraints: https://environment.ld.admin.ch/foen/cube/wps/1/shape 18 | - Observations: https://environment.ld.admin.ch/foen/cube/wps/1/observation/{list_of_key_dimensions} 19 | - Properties: https://environment.ld.admin.ch/foen/{propertyName} -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | > [!NOTE] 4 | > Using `pylindas` does require basic to intermediate python skills. 5 | 6 | There are two ways to install this package, locally or through the [Python Package Index (PyPI)](https://pypi.org). 7 | 8 | ## Published Version 9 | 10 | You can install this package through pip without cloning the repository. 11 | 12 | ``` 13 | pip install pylindas 14 | ``` 15 | 16 | ## Locally 17 | 18 | Clone this repository and `cd` into the directory. You can now install this package locally on your machine - we advise to use a virtual environment to avoid conflicts with other projects. Additionally, install all dependencies as described in `requirements.txt` 19 | 20 | ``` 21 | pip install -e . 22 | pip install -r requirements.txt 23 | ``` 24 | -------------------------------------------------------------------------------- /example/Cubes/wind/frictionless.json: -------------------------------------------------------------------------------- 1 | { 2 | "path": "https://opendata.schleswig-holstein.de/dataset/fc49eebf-3750-4c9c-a29e-6696eb644362/resource/b8a7b43c-3529-4b92-bb49-7bf4e9109dfb/download/opendata_wka_inbetrieb_sh_20230103.csv", 3 | "name": "wka-inbetrieb", 4 | "profile": "tabular-data-resource", 5 | "format": "csv", 6 | "encoding": "utf-8", 7 | "dialect": { 8 | "delimiter": ";" 9 | }, 10 | "schema": { 11 | "fields": [ 12 | { 13 | "type": "integer", 14 | "name": "Jahr" 15 | }, 16 | { 17 | "type": "integer", 18 | "name": "Anzahl_inBetrieb_WKA_SH", 19 | "title": "Anzahl" 20 | }, 21 | { 22 | "type": "number", 23 | "decimalChar": ",", 24 | "name": "Leistung_MW", 25 | "unit": "MW" 26 | } 27 | ] 28 | } 29 | } -------------------------------------------------------------------------------- /example/Cubes/Population_Aargau/integration.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import yaml 3 | import os 4 | 5 | from pylindas.pycube import Cube 6 | from pylindas.lindas.namespaces import SCHEMA 7 | 8 | ENVIRONMENT = os.getenv("CI_ENVIRONMENT_NAME") 9 | 10 | # Load data and yaml 11 | df = pd.read_csv("3_data_preparation/data.csv", encoding="utf-8", sep=",") 12 | with open("4_data_integration/integration.yaml", encoding="utf-8") as file: 13 | cube_yaml = yaml.safe_load(file) 14 | 15 | cube = Cube(dataframe=df, cube_yaml=cube_yaml, environment="TEST", local=True) 16 | 17 | cube.prepare_data() 18 | cube.write_cube() 19 | cube.write_observations() 20 | cube.write_shape() 21 | 22 | # Create concept 23 | age_group_concept = pd.read_csv("3_data_preparation/age.csv", encoding="utf-8", sep=",") 24 | cube.write_concept("age-group", age_group_concept) 25 | 26 | cube.serialize("4_data_integration/cube.ttl") -------------------------------------------------------------------------------- /example/Cubes/Biotope_Statistik/data.csv: -------------------------------------------------------------------------------- 1 | Typ,Überlappung,Anzahl,Anteil der CH-Biotope,Fläche,Anteil CH-Fläche,Anteil der CH-Biotope (Fläche) 2 | Hochmoore (Typen I + II),Mit Überlappung,551,7.8,1567.5,0.04,1.5 3 | Flachmoore,Mit Überlappung,1335,18.8,22501.4,0.54,22.1 4 | Auengebiete,Mit Überlappung,326,4.6,27844.5,0.67,27.3 5 | Amphibienlaichgebiete,Mit Überlappung,929,13.1,21670.9,0.52,21.3 6 | Trockenwiesen und -weiden,Mit Überlappung,3951,55.7,28280.6,0.68,27.8 7 | Biotope,Mit Überlappung,7092,100.0,101864.9,2.47,100.0 8 | Hochmoore (Typen I + II),Ohne Überlappung,551,7.8,1567.5,0.04,1.7 9 | Flachmoore,Ohne Überlappung,1335,18.8,22495.0,0.54,24.0 10 | Auengebiete,Ohne Überlappung,326,4.6,26416.9,0.64,28.2 11 | Amphibienlaichgebiete,Ohne Überlappung,929,13.1,14847.6,0.36,15.9 12 | Trockenwiesen und -weiden,Ohne Überlappung,3951,55.7,28280.6,0.68,30.2 13 | Biotope,Ohne Überlappung,7092,100.0,93607.6,2.27,100.0 -------------------------------------------------------------------------------- /example/Cubes/shared/bundeslander/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all download ../../../py_cube/cube/shared_dimensions.py 2 | 3 | all: data.pretty.geojson data.transformed.geojson data.ttl 4 | 5 | data.geojson: 6 | echo "Original data downloaded from https://opendatalab.de/projects/geojson-utilities/" 7 | echo " * Checked all checkboxes in Auswahlhilfe," 8 | echo " * then Optionen > \"Welche Flächen: Bundesland Flächen\"," 9 | echo " * then close Dialog, and click \"Export .geojson\" -> data.geojson" 10 | 11 | data.pretty.geojson: data.geojson 12 | jq . $< > $@ 13 | 14 | data.transformed.geojson: transform.py data.pretty.geojson 15 | python transform.py data.geojson $@ 16 | 17 | data.ttl: data.transformed.geojson ../../../py_cube/cube/shared_dimensions.py 18 | cd ../../.. && \ 19 | python cli.py shared convert_geojson example/shared/bundeslander/data.transformed.geojson example/shared/bundeslander/data.ttl 20 | -------------------------------------------------------------------------------- /docs/concepts.md: -------------------------------------------------------------------------------- 1 | # Concepts 2 | 3 | The term `concept` refers to a very specific data structure within the [cube.link](https://cube.link) universe. 4 | 5 | ## Multi-Lingual Concepts 6 | 7 | `pylindas` has a basic implementation to handle: 8 | 9 | - concept tables 10 | - multilingual concepts 11 | 12 | A concept table is the possibility to handle the values of a dimension as a URI to a new resource (a concept). This is similar to an object that is the URI of a Shared Dimension's term, but here the concepts are created for the cube and uploaded with the cube. 13 | 14 | Remark: if the resource/concept already exists, then the case is similar to handling of a Shared Dimensions mapping, and this is already handled by `pylindas` with the "mapping" mechanism. 15 | 16 | See the folder `example/Cubes/concept_table_airport` and its [README](../example/Cubes/concept_table_airport/README.md) for detailed explanations. 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pylindas 2 | 3 | ## About 4 | 5 | `pylindas` is a python package for building and publishing linked data cubes according to the [cube.link](https://cube.link) schema. This schema is used to transform tabular data into [RDF](https://www.w3.org/RDF/). `pylindas` is an alternative to the [Cube-Creator](https://cube-creator.lindas.admin.ch). Whereas the Cube-Creator is a [GUI](https://en.wikipedia.org/wiki/Graphical_user_interface) tool, `pylindas` is more suited to fit into a pipeline workflow to create RDF. Currently this project is heavily linked to [LINDAS](https://lindas.admin.ch) the Swiss Federal Linked Data Service. 6 | 7 | ## Documentation 8 | 9 | - [Installation](docs/installation.md) 10 | - [Contributing](docs/contributing.md) 11 | - [Basic functionality](docs/functionality.md) 12 | - [Command line usage](docs/cli.md) 13 | - [Examples](docs/examples.md) 14 | - [URIs](docs/uris.md) 15 | - [Concepts](docs/concepts.md) 16 | - [Shared Dimensions](docs/sd.md) 17 | -------------------------------------------------------------------------------- /example/Cubes/shared/bundeslander/schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "title": "Extended GeoJSON Schema", 4 | "type": "object", 5 | "allOf": [ 6 | { 7 | "$ref": "http://json.schemastore.org/geojson" 8 | } 9 | ], 10 | "properties": { 11 | "features": { 12 | "type": "array", 13 | "items": { 14 | "type": "object", 15 | "properties": { 16 | "geometry": { 17 | "type": "object", 18 | "properties": { 19 | "type": { 20 | "type": "string", 21 | "enum": ["Point", "MultiPolygon", "Polygon"] 22 | } 23 | }, 24 | "required": ["type", "coordinates"] 25 | }, 26 | "properties": { 27 | "type": "object", 28 | "required": ["name_de"], 29 | "properties": { 30 | "name_de": { 31 | "type": "string" 32 | } 33 | } 34 | } 35 | } 36 | } 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /docs/roadmap.md: -------------------------------------------------------------------------------- 1 | # Roadmap 2 | 3 | Feel free to add yourselves :) 4 | The quarters are ment as rough guidelines, not something fixed. It's recommended, that each feature gets a seperate issue (please link them). 5 | 6 | Quarter | Features | Lead | Working on it 7 | ----- | ----- | ---- | ---- 8 | Q4-24 | Renaming, migration to BAR, CI, publication on pypi | Marco | Thomas, Marco 9 | Q1-25 | Validation (pycube) | Marco | Marco 10 | Q1-25 | Disentanglement of linpy and pycube | Claudio | Marco 11 | Q1-25 | Downloads of cubes from lindas (see here https://github.com/zazuko/cube-creator/wiki/Manually-removing-published-cube) | Marco | Marco 12 | Q1-25 | Benchmarking tripleization + validation of pycube, comparing with tarql | Marco | Marco 13 | Q1-25 (?) | Shared Dimensions in python | | Lian 14 | Q1-25 | Concepts with geo location (point), especially multi-lang | Marco | Marco 15 | Q1-25 | Smart Shared Dimensions (some API to look up existing ones, reuse them, etc etc) | Claudio | Fabian (?) 16 | Q2-25 | hierarchies | | 17 | Q2-25 | yaml downloads | Marco | Marco 18 | Q3-25 | fastAPI | | 19 | Q3-25 | smart upload (diff) | | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Kronmar-Bafu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /example/Cubes/Biotope_Statistik/biotope.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import pandas as pd 4 | import yaml 5 | import os 6 | 7 | from pylindas.pycube import Cube 8 | from pylindas.lindas.upload import upload_ttl 9 | from pylindas.lindas.query import cube_exists 10 | 11 | BASEDIR = os.path.dirname(__file__) 12 | DATAFILE = os.path.join(BASEDIR, "data.csv") 13 | CONFIGFILE = os.path.join(BASEDIR, "description.yml") 14 | CUBEFILE = os.path.join(BASEDIR, "cube.ttl") 15 | 16 | data = pd.read_csv(DATAFILE, encoding="utf-8", sep=",") 17 | with open(CONFIGFILE, encoding="utf-8") as file: 18 | config = yaml.safe_load(file) 19 | 20 | cube = Cube(dataframe=data, cube_yaml=config, environment="TEST", local=True) 21 | cube.prepare_data() 22 | cube.write_cube() 23 | cube.write_observations() 24 | cube.write_shape() 25 | valid, text = cube.validate() 26 | if valid: 27 | print(text) 28 | cube.serialize(CUBEFILE) 29 | if os.path.isfile("lindas.ini"): 30 | upload_ttl(filename=CUBEFILE, db_file="lindas.ini", environment="TEST", graph_uri="") 31 | else: 32 | print(text) 33 | #check for exception in .validate 34 | raise ValueError("Cube not Valid") -------------------------------------------------------------------------------- /pylindas/example.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | import os 4 | 5 | examples_dir = "example" 6 | 7 | 8 | def load_example(example_id, base_uri="http://localhost:3030/dataset"): 9 | file_path = os.path.join(examples_dir, example_id, 'cube.ttl') 10 | with open(file_path, 'rb') as f: 11 | response = requests.post(base_uri, headers={"Content-Type": "text/turtle"}, data=f) 12 | response.raise_for_status() 13 | 14 | 15 | def list_examples(language="en"): 16 | result = [] 17 | for root, dirs, files in os.walk(examples_dir): 18 | if "cube.ttl" in files and "description.json" in files: 19 | description_path = os.path.join(root, "description.json") 20 | with open(description_path, 'r') as desc_file: 21 | desc = json.load(desc_file) 22 | name = desc.get("Name", {}).get(language, "") 23 | description = desc.get("Description", {}).get(language, "") 24 | result.append({ 25 | "id": os.path.relpath(root, examples_dir), 26 | "name": name, 27 | "description": description 28 | }) 29 | return result 30 | -------------------------------------------------------------------------------- /docs/sd.md: -------------------------------------------------------------------------------- 1 | 2 | # Shared Dimensions 3 | 4 | The term `Shared Dimension` refers to a very specific data structure within the [cube.link](https://cube.link) universe. 5 | 6 | ## Shared Dimensions Queries 7 | 8 | To link a dimension to an existing Shared Dimension, the following steps are necessary: 9 | 10 | - find a suitable Shared Dimension 11 | - use the URIs of the terms of that Shared Dimension to configure dimension in the yaml file and its "mapping" field 12 | 13 | `pylindas` has a basic implementation of: 14 | 15 | - basic queries to request shared dimensions information from [LINDAS](https://lindas.admin.ch) (including terms and their URIs) 16 | - display the results, line by line 17 | 18 | See the folder `pylindas/shared_dimension_queries` and its [README](../pylindas/shared_dimension_queries/README.md) for detailed explanation 19 | 20 | ## Generation of Shared Dimensions 21 | 22 | `pylindas` has a basic implementation to generate a Shared Dimension by transforming a .csv file to a corresponding RDF. 23 | 24 | See the folder `pylindas/pyshareddimension` and its [README](../pylindas/pyshareddimension/README.md) for detailed explanations. 25 | -------------------------------------------------------------------------------- /example/Cubes/greenhouse_limit/data.csv: -------------------------------------------------------------------------------- 1 | Jahr,THG-Emissionen ohne die Treibhausgasbilanz der Landnutzung,THG-Emissionen mit der Treibhausgasbilanz der Landnutzung 2 | 1990,55.24389386,52.5774385 3 | 1991,57.07963814,50.24361298 4 | 1992,56.81419162,52.11014189 5 | 1993,54.23476738,48.90182898 6 | 1994,53.14582365,51.22014611 7 | 1995,54.03628518,49.86596536 8 | 1996,54.61752292,47.14090851 9 | 1997,53.41576335,49.23482595 10 | 1998,54.94403332,51.34175802 11 | 1999,54.72593647,53.33937066 12 | 2000,54.06542928,57.99289598 13 | 2001,55.57776168,55.42619108 14 | 2002,54.00443567,51.31726012 15 | 2003,55.03748992,53.00206377 16 | 2004,55.62473696,50.80056551 17 | 2005,56.30254624,53.90548843 18 | 2006,55.92278244,55.35708306 19 | 2007,54.01779198,52.45681872 20 | 2008,55.38803169,53.52469138 21 | 2009,53.90798922,51.64351554 22 | 2010,55.5070925,53.29617343 23 | 2011,51.35603093,47.99791545 24 | 2012,52.73698154,52.3108047 25 | 2013,53.56107825,53.32817871 26 | 2014,49.61468214,45.61805031 27 | 2015,49.10202318,50.27561402 28 | 2016,49.4573766,46.25278383 29 | 2017,48.56228908,46.7405917 30 | 2018,47.01032821,48.1301930 31 | 2019,46.7668571,43.62184325 32 | 2020,44.13997184,42.91719233 33 | 2021,45.45529541,45.06430734 34 | 2022,41.89470681,40.46522701 35 | 2023,40.84691968,42.14611779 -------------------------------------------------------------------------------- /pylindas/lindas/query.py: -------------------------------------------------------------------------------- 1 | from SPARQLWrapper import SPARQLWrapper, JSON 2 | 3 | def query_lindas(query: str, environment: str): 4 | match environment: 5 | case "TEST": 6 | sparql = SPARQLWrapper("https://test.lindas.admin.ch/query") 7 | case "INT": 8 | sparql = SPARQLWrapper("https://int.lindas.admin.ch/query") 9 | case "PROD": 10 | sparql = SPARQLWrapper("https://lindas.admin.ch/query") 11 | sparql.setQuery(query=query) 12 | sparql.setReturnFormat(JSON) 13 | results = sparql.query().convert() 14 | return results["boolean"] 15 | 16 | 17 | def cube_exists(cube_uri: str, environment: str): 18 | """ 19 | This function checks whether a cube already exists in the provided environment using the Lindas query endpoint. 20 | If the cube already exists and the local flag is not set, the function will exit with an appropriate error message. 21 | Otherwise, the function will return the constructed cube URI as a URIRef object. 22 | 23 | Args: 24 | local (bool): A flag indicating whether the cube is local. 25 | environment (str): The environment of the cube. 26 | """ 27 | query = f"ASK {{ <{cube_uri}> ?p ?o}}" 28 | return query_lindas(query, environment=environment) 29 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "pylindas" 7 | version = "0.6.5" 8 | description = "Utilities for working with the linked data service LINDAS of the Swiss Federal Administration. Includes modules for working with cubes." 9 | readme = "README.md" 10 | authors = [{ name = "Marco Kronenberg", email="marco.kronenberg@bafu.admin.ch" }] 11 | license = { file = "LICENSE" } 12 | classifiers = [ 13 | "Programming Language :: Python :: 3", 14 | "License :: OSI Approved :: MIT License", 15 | ] 16 | keywords = ["linked data", "LINDAS", "cubes", "RDF"] 17 | requires-python = ">=3.10.11" 18 | dynamic = ["dependencies"] 19 | [tool.setuptools.dynamic] 20 | dependencies = {file = ["requirements.txt"]} 21 | 22 | [project.urls] 23 | Homepage = "https://github.com/Kronmar-Bafu/py-cube" 24 | 25 | [project.scripts] 26 | pycube = "pylindas.cli:main" 27 | 28 | [tool.bumpver] 29 | current_version = "0.6.5" 30 | version_pattern = "MAJOR.MINOR.PATCH" 31 | commit_message = "bump version {old_version} -> {new_version}" 32 | pre_commit_hook = "" 33 | post_commit_hook = "" 34 | 35 | [tool.bumpver.file_patterns] 36 | "pyproject.toml" = ['current_version = "{version}"', 'version = "{version}"'] 37 | "pylindas/__init__.py" = ["{version}"] 38 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: 🧊 CI tests 2 | 3 | concurrency: 4 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 5 | cancel-in-progress: true 6 | 7 | on: 8 | push: 9 | pull_request: 10 | schedule: 11 | - cron: '0 0 * * 1' 12 | 13 | permissions: 14 | contents: read 15 | 16 | jobs: 17 | test: 18 | name: 🧊 CI test 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | os: [ 'ubuntu-24.04', 'macos-15', 'windows-2022' ] 23 | python-version: ['3.10', '3.11.2', '3.12', '3.13'] 24 | runs-on: ${{ matrix.os }} 25 | 26 | steps: 27 | - uses: actions/checkout@v4 28 | - name: Set up Python ${{ matrix.python-version }} 29 | uses: actions/setup-python@v5 30 | with: 31 | python-version: ${{ matrix.python-version }} 32 | - name: install requirements 33 | run: | 34 | python -m pip install --upgrade pip 35 | pip install -r requirements.txt 36 | pip install pytest 37 | - name: test 38 | env: 39 | PYTHONWARNINGS: default 40 | PYTHONUTF8: 1 41 | run: | 42 | pytest -s -vv tests example/Cubes/Biotope_Statistik/biotope.py example/Cubes/concept_table_airport/airport.py example/Cubes/mock/mock.py example/Shared_Dimensions/shared_dimension_generation/sd_example.py -------------------------------------------------------------------------------- /example/Cubes/shared/bundeslander/transform.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | from urllib.parse import quote 4 | 5 | def transform_geojson(input_file, output_file): 6 | with open(input_file, 'r') as f: 7 | data = json.load(f) 8 | 9 | transformed_features = [] 10 | for feature in data['features']: 11 | name = feature['properties']['GEN'] 12 | iri = f"https://example.org/land/{quote(feature['properties']['GEN'])}" 13 | transformed_feature = { 14 | 'type': feature['type'], 15 | 'geometry': feature['geometry'], 16 | 'properties': { 17 | 'iri': iri, 18 | 'name_de': name 19 | } 20 | } 21 | transformed_features.append(transformed_feature) 22 | 23 | transformed_data = { 24 | 'type': data['type'], 25 | '$schema': './schema.json', 26 | 'features': transformed_features 27 | } 28 | 29 | with open(output_file, 'w') as f: 30 | json.dump(transformed_data, f, indent=2) 31 | 32 | def main(): 33 | parser = argparse.ArgumentParser(description='Transform a GeoJSON file.') 34 | parser.add_argument('input_file', type=str, help='The input GeoJSON file') 35 | parser.add_argument('output_file', type=str, help='The output GeoJSON file') 36 | args = parser.parse_args() 37 | 38 | transform_geojson(args.input_file, args.output_file) 39 | 40 | if __name__ == '__main__': 41 | main() -------------------------------------------------------------------------------- /pylindas/shared_dimension_queries/example_sd.py: -------------------------------------------------------------------------------- 1 | from pylindas.shared_dimension_queries.shared_dimensions_queries import list_shared_dimensions, list_shared_dimension_terms, list_shared_dimensions_print, print_sparql_result 2 | from rdflib import URIRef 3 | 4 | """ 5 | Author: Fabian Cretton - HEVS 6 | 7 | See README for an explanation 8 | """ 9 | 10 | def main(): 11 | print("Shared dimensions query examples") 12 | print("================================") 13 | 14 | print("List all Shared Dimensions:") 15 | print("---------------------------") 16 | result = list_shared_dimensions("INT") 17 | list_shared_dimensions_print(result) 18 | 19 | # print("List 10 Shared Dimensions:") 20 | # print("--------------------------") 21 | # result = list_shared_dimensions("INT", "fr", 0, 10) 22 | # list_shared_dimensions_print(result) 23 | 24 | print("\nList Shared Dimensions that contains \"Canton\" in the english name") 25 | print("---------------------------------------------------------------") 26 | result = list_shared_dimensions("INT", "en", 0, 0, "Canton") 27 | list_shared_dimensions_print(result, "INT") 28 | 29 | print("\nList the Cantons shared dimension's terms") 30 | print("-----------------------------------------") 31 | result = list_shared_dimension_terms("INT", "https://ld.admin.ch/dimension/canton", "fr") 32 | print_sparql_result(result, ["name", "sdTerm"]) 33 | 34 | if __name__ == "__main__": 35 | main() 36 | -------------------------------------------------------------------------------- /example/Shared_Dimensions/shared_dimension_generation/sd_example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import pandas as pd 4 | import yaml 5 | import os 6 | 7 | from pylindas.pyshareddimension import SharedDimension 8 | from pylindas.lindas.upload import upload_ttl 9 | from pylindas.lindas.query import cube_exists 10 | 11 | BASEDIR = os.path.dirname(__file__) 12 | DIMENSIONFILE = os.path.join(BASEDIR, "sd_terms.csv") 13 | CONFIGFILE = os.path.join(BASEDIR, "sd_description.yml") 14 | SDFILE = os.path.join(BASEDIR, "sd_example.ttl") 15 | SHACLFILE = os.path.join(BASEDIR, "sd_example_Shacl_result.ttl") 16 | SHAREDDIMENSIONSHAPE = "https://raw.githubusercontent.com/Kronmar-Bafu/lindas-pylindas/refs/heads/main/pylindas/pyshareddimension/shared_dimension_shape.ttl" 17 | 18 | terms_df = pd.read_csv(DIMENSIONFILE, encoding="utf8", sep=";") 19 | 20 | with open(CONFIGFILE) as file: 21 | sd_yaml = yaml.safe_load(file) 22 | 23 | sd = SharedDimension(dataframe=terms_df, sd_yaml=sd_yaml, environment="TEST", local=True) 24 | sd.prepare_data() 25 | sd.write_sd() 26 | sd.write_terms() 27 | sd.serialize(SDFILE) 28 | print(sd) 29 | 30 | # About the SHACL validation, please see the comment of the SharedDimension.validate() method 31 | # in order to understand the parameters 32 | # This is work in progress as the SHACL file has to be passed as parameter instead of being downloaded from the Web behind the scene 33 | resultBool, resultTxt = sd.validate(SHAREDDIMENSIONSHAPE, SHACLFILE) 34 | print(f"Shared dimension validation result: {resultBool}, with message '{resultTxt}'") 35 | -------------------------------------------------------------------------------- /pylindas/lindas/namespaces.py: -------------------------------------------------------------------------------- 1 | from rdflib import Graph, Namespace 2 | 3 | 4 | CUBE = Namespace("https://cube.link/") 5 | DCAT = Namespace("http://www.w3.org/ns/dcat#") 6 | DCT = Namespace("http://purl.org/dc/terms/") 7 | FOAF = Namespace("http://xmlns.com/foaf/0.1/") 8 | LDADMIN = Namespace("https.//ld.admin.ch/application/") 9 | META = Namespace("https://cube.link/meta/") 10 | QUDT = Namespace("http://qudt.org/schema/qudt/") 11 | RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#") 12 | RELATION = Namespace("https://cube.link/relation/") 13 | SCHEMA = Namespace("http://schema.org/") 14 | SH = Namespace("http://www.w3.org/ns/shacl#") 15 | TIME = Namespace("http://www.w3.org/2006/time#") 16 | UNIT = Namespace("http://qudt.org/vocab/unit/") 17 | VCARD = Namespace("http://www.w3.org/2006/vcard/ns#") 18 | VOID = Namespace("http://rdfs.org/ns/void#") 19 | GEO = Namespace("http://www.opengis.net/ont/geosparql#") 20 | SKOS = Namespace("http://www.w3.org/2004/02/skos/core#") 21 | SD_MD = Namespace("https://cube-creator.zazuko.com/shared-dimensions/vocab#") 22 | XSD = Namespace("http://www.w3.org/2001/XMLSchema#") 23 | 24 | Namespaces = { 25 | "cube": CUBE, 26 | "dcat": DCAT, 27 | "dct": DCT, 28 | "schema": SCHEMA, 29 | "sh": SH, 30 | "foaf": FOAF, 31 | "ldadmin": LDADMIN, 32 | "meta": META, 33 | "qudt": QUDT, 34 | "rdf": RDF, 35 | "relation": RELATION, 36 | "time": TIME, 37 | "unit": UNIT, 38 | "vcard": VCARD, 39 | "void": VOID, 40 | "geo": GEO, 41 | "skos": SKOS, 42 | "sd_md": SD_MD, 43 | "xsd": XSD, 44 | } 45 | -------------------------------------------------------------------------------- /example/Cubes/mock/mock.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import pandas as pd 4 | import yaml 5 | import os 6 | 7 | from pylindas.pycube import Cube 8 | from pylindas.lindas.upload import upload_ttl 9 | from pylindas.lindas.query import cube_exists 10 | 11 | BASEDIR = os.path.dirname(__file__) 12 | DATAFILE = os.path.join(BASEDIR, "data.csv") 13 | CONFIGFILE = os.path.join(BASEDIR, "description.yml") 14 | CUBEFILE = os.path.join(BASEDIR, "mock-cube.ttl") 15 | 16 | mock_df = pd.read_csv(DATAFILE) 17 | 18 | with open(CONFIGFILE) as file: 19 | config = yaml.safe_load(file) 20 | 21 | cube = Cube(dataframe=mock_df, cube_yaml=config, environment="TEST", local=True) 22 | cube.prepare_data() 23 | cube.write_cube() 24 | cube.write_observations() 25 | cube.write_shape() 26 | cube.serialize("example/Cubes/mock/cube.ttl") 27 | print(cube) 28 | 29 | if not cube_exists(cube_uri=cube.get_iri(), environment="TEST"): 30 | if os.path.isfile("lindas.ini"): 31 | upload_ttl(filename=CUBEFILE, db_file="lindas.ini", environment="TEST", graph_uri="") 32 | 33 | modk_df_two_sided = pd.read_csv("tests/test_data.csv") 34 | with open("tests/test.yml") as file: 35 | two_sided_yaml = yaml.safe_load(file) 36 | cube_two_sided = Cube(dataframe=modk_df_two_sided, cube_yaml=two_sided_yaml, environment="TEST", local=True) 37 | cube_two_sided.prepare_data() 38 | cube_two_sided.write_cube() 39 | cube_two_sided.write_observations() 40 | cube_two_sided.write_shape() 41 | 42 | cube_two_sided.serialize("./example/Cubes/mock-cube-two-sided.ttl") 43 | if os.path.isfile("lindas.ini"): 44 | upload_ttl(filename="mock/mock-cube-two-sided.ttl", db_file="lindas.ini", environment="TEST", graph_uri="") 45 | -------------------------------------------------------------------------------- /pylindas/lindas/upload.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import stardog 3 | from configparser import ConfigParser 4 | from typing import Union 5 | 6 | 7 | 8 | #URL = "https://stardog-test.cluster.ldbar.ch/lindas?graph=..." 9 | #HEADERS = {'Content-Type': 'text/turtle', 'Authorization': } 10 | 11 | # def uplod_ttl(filename: str, named_graph: str, password: str): 12 | # with open(filename) as file: 13 | # graph = file.read() 14 | # response = requests.request("POST", ) 15 | 16 | 17 | def _load_config(db_file:str, environment: str) -> dict: 18 | parser = ConfigParser() 19 | parser.read(db_file) 20 | 21 | config = {} 22 | if parser.has_section(environment): 23 | params = parser.items(environment) 24 | for param in params: 25 | config[param[0]] = param[1] 26 | else: 27 | raise Exception(f"Environment '{environment}' not found in db_file") 28 | 29 | return config 30 | 31 | 32 | def upload_ttl(filename: Union[str,list], db_file: str, environment: str, graph_uri: str, clear_graph: bool = False): 33 | conn_details = _load_config(db_file, environment) 34 | 35 | # todo: could graph_uri be specified in lindas.ini? 36 | with stardog.Connection("lindas", **conn_details) as conn: 37 | conn.begin() 38 | if clear_graph and graph_uri: 39 | #if graph_URI is Null or not given as an arugment, conn.clear clears the whole database, we are not risking that. 40 | conn.clear(graph_uri=graph_uri) 41 | 42 | def _add_file(file: str, graph_uri: str): 43 | print(f"uploading: {file}") 44 | conn.add(stardog.content.File(file=file), graph_uri=graph_uri) 45 | 46 | if isinstance(filename, str): 47 | _add_file(file=filename, graph_uri=graph_uri) 48 | else: 49 | for f in filename: 50 | _add_file(file=f, graph_uri=graph_uri) 51 | conn.commit() -------------------------------------------------------------------------------- /docs/cli.md: -------------------------------------------------------------------------------- 1 | # Command line 2 | 3 | There is also a `pylindas` command line utility, that expects an opinionated way to store 4 | the data and the description in a directory. It then is able to perform common operations. 5 | 6 | ## Necessary Directory Layout 7 | 8 | The directory must be structured as follows: 9 | 10 | - `data.csv`: This file contains the observations. 11 | - `description.json` or `description.yml`: This file contains the cube and dimension descriptions. 12 | 13 | ## Command Line Usage 14 | 15 | For example, to serialize the data, use: 16 | 17 | ``` 18 | python cli.py serialize 19 | ``` 20 | 21 | For additional help and options, you can use: 22 | 23 | ``` 24 | python cli.py --help 25 | ``` 26 | 27 | ## Fetching from data sources 28 | 29 | There is the possibility to download datasets from other data sources. Right now, the functionality is basic, but 30 | it could be possible in the future to extend it. 31 | 32 | - It supports only datasets coming from data.europa.eu 33 | - It supports only datasets with a Frictionless datapackage 34 | 35 | See [Frictionless](https://frictionlessdata.io/introduction/#why-frictionless) for more information on Frictionless. 36 | 37 | ``` 38 | python fetch.py 'https://data.europa.eu/data/datasets/fc49eebf-3750-4c9c-a29e-6696eb644362?locale=en' example/corona/ 39 | ``` 40 | 41 | ## CLI Examples 42 | 43 | Multiple cube examples are ready in the [example](../example) directory. 44 | 45 | ```bash 46 | $ python cli.py example list 47 | corona: Corona Numbers Timeline 48 | kita: Number of kids in day care facilities 49 | wind: Wind turbines — operated WKA per year in Schleswig-Holstein 50 | ``` 51 | 52 | To load an example in a Fuseki database, you can use the load subcommand of the example command. 53 | 54 | ```bash 55 | $ python cli.py example load kita 56 | ``` 57 | 58 | There is a `start-fuseki` command that can be used to start a Fuseki server containing data 59 | from the examples. 60 | 61 | ```bash 62 | $ python cli.py example start-fuseki 63 | ``` 64 | -------------------------------------------------------------------------------- /docs/functionality.md: -------------------------------------------------------------------------------- 1 | # Basic Functionality and Structure 2 | 3 | The `pylindas` package consists of multiple sub modules: 4 | 5 | ## `pycube` 6 | 7 | To avoid the feeling of a black box, the philosophy of `pycube` is to make the construction of cubes modular. The process will take place in multiple steps, outlined below: 8 | 9 | 1. **Initialization** 10 | 11 | ```python 12 | from pylindas.pycube import Cube 13 | 14 | cube = pycube.Cube(dataframe: pd.Dataframe, cube_yaml: dict) 15 | ``` 16 | 17 | This step initializes the cube with the data (`dataframe`) and the configuration (`cube_yaml`). 18 | 19 | 2. **Mapping** 20 | 21 | ```python 22 | cube.prepare_data() 23 | ``` 24 | 25 | Creates the observation URIs and applies the mappings as described in the `cube_yaml`. 26 | 27 | 3. **Write `cube:Cube`** 28 | 29 | ```python 30 | cube.write_cube() 31 | ``` 32 | 33 | Writes the `cube:Cube`. 34 | 35 | 4. **Write `cube:Observation`** 36 | 37 | ```python 38 | cube.write_observations() 39 | ``` 40 | 41 | Writes the `cube:Observation`s and the `cube:ObservationSet`. 42 | 43 | 5. **Write `cube:ObersvationConstraint`** 44 | 45 | ```python 46 | cube.write_shape() 47 | ``` 48 | 49 | Writes the `cube:ObservationConstraint`. 50 | 51 | ## The Complete Work-Flow 52 | 53 | ```python 54 | # Write the cube 55 | cube = pycube.Cube(dataframe: pd.DataFrame, cube_yaml: dict, shape_yaml: dict) 56 | cube.prepare_data() 57 | cube.write_cube() 58 | cube.write_observations() 59 | cube.write_shape() 60 | 61 | # Upload the cube 62 | cube.upload(endpoint: str, named_graph: str) 63 | ``` 64 | 65 | For an upload, use `cube.upload(endpoint: str, named_graph: str)` with the proper `endpoint` as well as `named_graph`. 66 | 67 | A `lindas.ini` file is read for this step, containing these information as well as a password. It contains the structure: 68 | 69 | ``` 70 | [TEST] 71 | endpoint=https://stardog-test.cluster.ldbar.ch 72 | username=a-lindas-user-name 73 | password=something-you-don't-need-to-see;) 74 | ``` 75 | 76 | With additional information for the other environments. -------------------------------------------------------------------------------- /scripts/fuseki/config-mem.ttl: -------------------------------------------------------------------------------- 1 | ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 2 | 3 | PREFIX : <#> 4 | PREFIX fuseki: 5 | PREFIX rdf: 6 | PREFIX rdfs: 7 | PREFIX ja: 8 | 9 | [] rdf:type fuseki:Server ; 10 | fuseki:services ( 11 | :service 12 | ) . 13 | 14 | ## Service description for "/dataset" with all endpoints. 15 | ## e.g. 16 | ## GET /dataset/query?query=... 17 | ## GET /dataset/get?default (SPARQL Graph Store Protocol) 18 | 19 | :service rdf:type fuseki:Service ; 20 | fuseki:name "dataset" ; 21 | 22 | ## The GET /dataset?query= variants 23 | fuseki:endpoint [ fuseki:operation fuseki:query ; ] ; 24 | ## gsp-rw covers gsp-r and upload. 25 | fuseki:endpoint [ fuseki:operation fuseki:update ; ] ; 26 | fuseki:endpoint [ fuseki:operation fuseki:gsp-rw ; ] ; 27 | ## RDF Patch 28 | # fuseki:endpoint [ fuseki:operation fuseki:patch ; ] ; 29 | 30 | fuseki:endpoint [ 31 | fuseki:operation fuseki:query ; 32 | fuseki:name "sparql" 33 | ]; 34 | fuseki:endpoint [ 35 | fuseki:operation fuseki:query ; 36 | fuseki:name "query" 37 | ] ; 38 | fuseki:endpoint [ 39 | fuseki:operation fuseki:update ; 40 | fuseki:name "update" 41 | ] ; 42 | fuseki:endpoint [ 43 | fuseki:operation fuseki:gsp-r ; 44 | fuseki:name "get" 45 | ] ; 46 | fuseki:endpoint [ 47 | fuseki:operation fuseki:gsp-rw ; 48 | fuseki:name "data" 49 | ] ; 50 | # fuseki:endpoint [ 51 | # ## RDF Patch 52 | # fuseki:operation fuseki:patch ; 53 | # fuseki:name "patch" 54 | # ] ; 55 | fuseki:dataset :dataset ; 56 | . 57 | 58 | # Transactional in-memory dataset. 59 | :dataset rdf:type ja:MemoryDataset ; 60 | ## Optional load with data on start-up 61 | ja:data "/usr/share/data/example/kita/cube.ttl"; 62 | ja:data "/usr/share/data/example/wind/cube.ttl"; 63 | ja:data "/usr/share/data/example/shared/bundeslander/data.ttl"; 64 | ## ja:data "data2.trig"; 65 | . 66 | -------------------------------------------------------------------------------- /example/Cubes/Population_Aargau/age.csv: -------------------------------------------------------------------------------- 1 | ageID,ageName_en,ageDescription_en,ageName_de,ageDescription_de 2 | age_00_04,Age 00 to 04,People with age 00 to 04 years,Alter 00 bis 04,Personen mit Alter 00 bis 04 Jahre 3 | age_05_09,Age 05 to 09,People with age 05 to 09 years,Alter 05 bis 09,Personen mit Alter 05 bis 09 Jahre 4 | age_10_14,Age 10 to 14,People with age 10 to 14 years,Alter 10 bis 14,Personen mit Alter 10 bis 14 Jahre 5 | age_15_19,Age 15 to 19,People with age 15 to 19 years,Alter 15 bis 19,Personen mit Alter 15 bis 19 Jahre 6 | age_20_24,Age 20 to 24,People with age 20 to 24 years,Alter 20 bis 24,Personen mit Alter 20 bis 24 Jahre 7 | age_25_29,Age 25 to 29,People with age 25 to 29 years,Alter 25 bis 29,Personen mit Alter 25 bis 29 Jahre 8 | age_30_34,Age 30 to 34,People with age 30 to 34 years,Alter 30 bis 34,Personen mit Alter 30 bis 34 Jahre 9 | age_35_39,Age 35 to 39,People with age 35 to 39 years,Alter 35 bis 39,Personen mit Alter 35 bis 39 Jahre 10 | age_40_44,Age 40 to 44,People with age 40 to 44 years,Alter 40 bis 44,Personen mit Alter 40 bis 44 Jahre 11 | age_45_49,Age 45 to 49,People with age 45 to 49 years,Alter 45 bis 49,Personen mit Alter 45 bis 49 Jahre 12 | age_50_54,Age 50 to 54,People with age 50 to 54 years,Alter 50 bis 54,Personen mit Alter 50 bis 54 Jahre 13 | age_55_59,Age 55 to 59,People with age 55 to 59 years,Alter 55 bis 59,Personen mit Alter 55 bis 59 Jahre 14 | age_60_64,Age 60 to 64,People with age 60 to 64 years,Alter 60 bis 64,Personen mit Alter 60 bis 64 Jahre 15 | age_65_69,Age 65 to 69,People with age 65 to 69 years,Alter 65 bis 69,Personen mit Alter 65 bis 69 Jahre 16 | age_70_74,Age 70 to 74,People with age 70 to 74 years,Alter 70 bis 74,Personen mit Alter 70 bis 74 Jahre 17 | age_75_79,Age 75 to 79,People with age 75 to 79 years,Alter 75 bis 79,Personen mit Alter 75 bis 79 Jahre 18 | age_80_84,Age 80 to 84,People with age 80 to 84 years,Alter 80 bis 84,Personen mit Alter 80 bis 84 Jahre 19 | age_85_89,Age 85 to 89,People with age 85 to 89 years,Alter 85 bis 89,Personen mit Alter 85 bis 89 Jahre 20 | age_90_,Age 90 and older,People with age 90 years and older,Alter 90 und älter,Personen mit Alter 90 Jahre und älter 21 | all,Total population,Total population of all ages,Gesamtbevölkerung,Gesamtbevölkerung aller Altersgruppen 22 | -------------------------------------------------------------------------------- /.github/workflows/publish-pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPI 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | bump-version-and-publish: 10 | name: Bump Version and Publish to PyPI 11 | runs-on: ubuntu-latest 12 | 13 | permissions: 14 | contents: write 15 | 16 | # don't run on this job on forks 17 | if: ${{ github.repository_owner == 'Kronmar-Bafu' }} 18 | 19 | steps: 20 | - name: Checkout code 21 | uses: actions/checkout@v4 22 | 23 | - name: Set up python 24 | uses: actions/setup-python@v5 25 | with: 26 | python-version: '3.13' 27 | 28 | - name: Install dependencies 29 | run: | 30 | python -m pip install --upgrade pip 31 | pip install bumpver build twine 32 | 33 | - name: Determine version bump 34 | id: version_bump 35 | run: | 36 | VERSION_BUMP="patch" 37 | if git log -1 --pretty=%B | grep -iq "#major"; then 38 | VERSION_BUMP="major" 39 | elif git log -1 --pretty=%B | grep -iq "#minor"; then 40 | VERSION_BUMP="minor" 41 | fi 42 | echo "Version bump type: $VERSION_BUMP" 43 | echo "bump_type=$VERSION_BUMP" >> $GITHUB_ENV 44 | 45 | - name: Bump version 46 | run: | 47 | bumpver update --${{ env.bump_type}} 48 | 49 | - name: Commit version bump 50 | uses: stefanzweifel/git-auto-commit-action@v5 51 | with: 52 | commit_message: "Bump version for release" 53 | branch: main 54 | 55 | - name: Build package 56 | run: | 57 | python -m build 58 | 59 | - name: Check package 60 | run: | 61 | twine check dist/* 62 | 63 | - name: Publish to PyPI 64 | env: 65 | TWINE_USERNAME: __token__ 66 | TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} 67 | run: | 68 | twine upload dist/* 69 | -------------------------------------------------------------------------------- /example/Cubes/mock/mock-cube-cube.ttl: -------------------------------------------------------------------------------- 1 | @prefix cube: . 2 | @prefix dcat: . 3 | @prefix dct: . 4 | @prefix schema1: . 5 | @prefix vcard: . 6 | @prefix void: . 7 | @prefix xsd: . 8 | 9 | a void:Dataset, 10 | schema1:Dataset, 11 | dcat:Dataset, 12 | cube:Cube ; 13 | dct:accrualPeriodicity ; 14 | dct:description "Ein Beispiel Cube, der simulierte Daten enthält"@de, 15 | "An example Cube containing some simulated data"@en ; 16 | dct:title "Mock Cube"@de, 17 | "Mock Cube"@en, 18 | "Mock Cube"@fr, 19 | "Mock Cube"@it ; 20 | schema1:contributor ; 21 | schema1:creativeWorkStatus ; 22 | schema1:creator ; 23 | schema1:dateCreated "2024-08-26"^^xsd:date ; 24 | schema1:dateModified "2024-09-24T14:43:29+00:00"^^xsd:dateTime, 25 | "2024-09-24T14:44:21+00:00"^^xsd:dateTime ; 26 | schema1:datePublished "2024-09-24"^^xsd:date ; 27 | schema1:description "Ein Beispiel Cube, der simulierte Daten enthält"@de, 28 | "An example Cube containing some simulated data"@en ; 29 | schema1:name "Mock Cube"@de, 30 | "Mock Cube"@en, 31 | "Mock Cube"@fr, 32 | "Mock Cube"@it ; 33 | schema1:publisher ; 34 | schema1:version 1 ; 35 | schema1:workExample ; 36 | dcat:contactPoint [ a vcard:Organization ; 37 | vcard:fn "Bundesamt für Mock Data"^^xsd:string ; 38 | vcard:hasEmail "contact@mock.ld.admin.ch"^^xsd:string ], 39 | [ a vcard:Organization ; 40 | vcard:fn "Bundesamt für Mock Data"^^xsd:string ; 41 | vcard:hasEmail "contact@mock.ld.admin.ch"^^xsd:string ] ; 42 | cube:observationConstraint ; 43 | cube:observationSet . 44 | 45 | -------------------------------------------------------------------------------- /example/Cubes/Population_Aargau/fetch.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | # URL of the CSV file 4 | url = "https://www.ag.ch/app/sajato-api/api/v2/export?columns=SIS_BEVS0001D.1.ALTER_00_04-ALTER_05_09-ALTER_10_14-ALTER_15_19-ALTER_20_24-ALTER_25_29-ALTER_30_34-ALTER_35_39-ALTER_40_44-ALTER_45_49-ALTER_50_54-ALTER_55_59-ALTER_60_64-ALTER_65_69-ALTER_70_74-ALTER_75_79-ALTER_80_84-ALTER_85_89-ALTER_90_-TOTAL&prefix=multiple&sep=%E2%82%AC&search=19%2C1901%2C1902%2C1903%2C1904%2C1905%2C1906%2C1907%2C1908%2C1909%2C1910%2C1911%2C4001%2C4271%2C4221%2C4191%2C4222%2C4061%2C4272%2C4091%2C4223%2C4323%2C4021%2C4301%2C4224%2C4131%2C4022%2C4225%2C4023%2C4062%2C4226%2C4227%2C4002%2C4024%2C4092%2C4093%2C4132%2C4192%2C4228%2C4273%2C4063%2C4274%2C4095%2C4193%2C4003%2C4133%2C4230%2C4302%2C4303%2C4124%2C4094%2C4185%2C4229%2C4064%2C4004%2C4231%2C4194%2C4065%2C4025%2C4304%2C4134%2C4096%2C4066%2C4195%2C4049%2C4161%2C4097%2C4305%2C4026%2C4005%2C4162%2C4196%2C4067%2C4306%2C4027%2C4028%2C4163%2C4307%2C4098%2C4164%2C4029%2C4232%2C4165%2C4135%2C4006%2C4099%2C4197%2C4100%2C4251%2C4198%2C4069%2C4166%2C4186%2C4070%2C4007%2C4199%2C4136%2C4167%2C4101%2C4200%2C4068%2C4084%2C4168%2C4071%2C4252%2C4308%2C4169%2C4233%2C4030%2C4275%2C4309%2C4310%2C4276%2C4031%2C4008%2C4170%2C4102%2C4311%2C4137%2C4312%2C4201%2C4313%2C4138%2C4103%2C4104%2C4253%2C4105%2C4202%2C4314%2C4033%2C4139%2C4234%2C4171%2C4184%2C4277%2C4009%2C4255%2C4279%2C4236%2C4032%2C4254%2C4106%2C4203%2C4235%2C4278%2C4107%2C4172%2C4034%2C4204%2C4035%2C4072%2C4108%2C4036%2C4010%2C4109%2C4173%2C4174%2C4140%2C4073%2C4256%2C4037%2C4237%2C4038%2C4074%2C4175%2C4280%2C4257%2C4205%2C4141%2C4281%2C4315%2C4039%2C4110%2C4258%2C4316%2C4111%2C4011%2C4282%2C4238%2C4075%2C4206%2C4112%2C4317%2C4283%2C4076%2C4207%2C4113%2C4125%2C4114%2C4115%2C4142%2C4143%2C4318%2C4259%2C4176%2C4144%2C4208%2C4209%2C4319%2C4239%2C4177%2C4040%2C4284%2C4210%2C4260%2C4041%2C4116%2C4285%2C4012%2C4178%2C4320%2C4145%2C4117%2C4042%2C4077%2C4179%2C4286%2C4078%2C4118%2C4119%2C4043%2C4321%2C4013%2C4146%2C4079%2C4044%2C4120%2C4121%2C4080%2C4122%2C4287%2C4261%2C4240%2C4262%2C4045%2C4081%2C4180%2C4288%2C4123%2C4322%2C4181%2C4082%2C4046%2C4182%2C4047%2C4048%2C4183%2C4263%2C4147%2C4289%2C4083%2C4324%2C4264&fileType=csv&dateFrom=&dateTo=" 5 | 6 | # Local filename to save the downloaded file 7 | filename = "./example/Cubes/Population_Aargau/data_raw.csv" 8 | 9 | # Download the file 10 | response = requests.get(url) 11 | if response.status_code == 200: 12 | with open("./" + filename, 'wb') as f: 13 | f.write(response.content.decode('latin-1').encode('utf-8')) 14 | print(f"File downloaded and saved as {filename}") 15 | else: 16 | print(f"Failed to download file. Status code: {response.status_code}") -------------------------------------------------------------------------------- /example/Cubes/mock/data.csv: -------------------------------------------------------------------------------- 1 | Jahr,Station,Wert,Standardfehler,Status 2 | 2000,Bern,23.0,5.0,definitiv 3 | 2000,Zürich,23.555744036232408,7.517863529610764,definitiv 4 | 2001,Bern,21.536090723505524,6.466524763963783,definitiv 5 | 2001,Zürich,21.659924330021255,6.030296213104391,definitiv 6 | 2002,Bern,22.575144684250287,1.493504195537817,definitiv 7 | 2002,Zürich,20.688211936144263,4.460656090931694,definitiv 8 | 2003,Bern,20.34210673843992,3.1225318830708093,definitiv 9 | 2003,Zürich,21.515733621541955,9.256702605801236,definitiv 10 | 2004,Bern,19.883669558588743,4.901117604444538,definitiv 11 | 2004,Zürich,19.06703223788886,7.3909398210112585,definitiv 12 | 2005,Bern,20.37741134479489,0.3648574303067509,definitiv 13 | 2005,Zürich,19.10206139418359,5.334716140807069,definitiv 14 | 2006,Bern,17.851780950874087,2.5205528663922094,definitiv 15 | 2006,Zürich,19.166364372767042,9.574551975171314,definitiv 16 | 2007,Bern,18.321158320002034,4.873759639672014,definitiv 17 | 2007,Zürich,16.70866751700804,7.127258811335302,definitiv 18 | 2008,Bern,17.89457243174931,0.9142998372297384,definitiv 19 | 2008,Zürich,17.518675046034367,4.510509430147336,definitiv 20 | 2009,Bern,15.644260115877954,3.6030273778709576,definitiv 21 | 2009,Zürich,16.57885415743719,8.255475397580657,definitiv 22 | 2010,Bern,16.67404534861387,6.403036956705559,definitiv 23 | 2010,Zürich,14.658857249625616,5.41838925075702,definitiv 24 | 2011,Bern,15.239527332053381,2.7685586054789892,definitiv 25 | 2011,Zürich,15.77006036400352,2.547974456080931,definitiv 26 | 2012,Bern,13.746463964437758,5.634558406905088,definitiv 27 | 2012,Zürich,13.898790381464226,6.195609420937518,definitiv 28 | 2013,Bern,14.79423943661537,8.450009649429939,definitiv 29 | 2013,Zürich,12.895269405129998,3.425280014313877,definitiv 30 | 2014,Bern,12.578954061301419,4.667912290722137,definitiv 31 | 2014,Zürich,13.739824320879597,0.780391898961843,definitiv 32 | 2015,Bern,12.08858766109956,7.234991659001383,definitiv 33 | 2015,Zürich,11.300664194959296,4.794358156286226,definitiv 34 | 2016,Bern,12.606317539212025,9.624032873046964,definitiv 35 | 2016,Zürich,11.306186956398,2.5019829145341275,definitiv 36 | 2017,Bern,10.081257039139402,5.322003468934927,definitiv 37 | 2017,Zürich,11.399522010793316,0.4085982816951822,definitiv 38 | 2018,Bern,10.525901301404176,7.317046263544459,definitiv 39 | 2018,Zürich,8.933378287580624,5.003684887934572,definitiv 40 | 2019,Bern,10.131078794353765,9.12751448116438,definitiv 41 | 2019,Zürich,9.725396222849866,3.27575514766154,definitiv 42 | 2020,Bern,7.863975722345669,4.286464451069697,definitiv 43 | 2020,Zürich,8.817539768108874,1.6851777638149157,definitiv 44 | 2021,Bern,8.883947915155796,5.824977064184451,definitiv 45 | 2021,Zürich,6.873745757485178,6.681380029364922,definitiv 46 | 2022,Bern,7.479049116629595,7.297772477326248,provisionally 47 | 2022,Zürich,7.984094086903241,5.220921715260017,provisionally 48 | -------------------------------------------------------------------------------- /example/Cubes/mock/description.yml: -------------------------------------------------------------------------------- 1 | Name: 2 | de: Mock Cube 3 | fr: Mock Cube 4 | it: Mock Cube 5 | en: Mock Cube 6 | Description: 7 | de: Ein Beispiel Cube, der simulierte Daten enthält 8 | en: An example Cube containing some simulated data 9 | Publisher: 10 | - IRI: https://register.ld.admin.ch/opendataswiss/org/office_of_Mock 11 | Creator: 12 | - IRI: https://register.ld.admin.ch/opendataswiss/org/office_of_Mock 13 | Contributor: 14 | - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu 15 | Name: Bundesamt für Mock Data 16 | Date Created: 17 | 2024-08-26 18 | Contact Point: 19 | E-Mail: contact@mock.ld.admin.ch 20 | Name: Bundesamt für Mock Data 21 | Base-URI: https://mock.ld.admin.ch/ 22 | Identifier: mock-example 23 | Version: 1 24 | Work Status: 25 | Draft 26 | Visualize: 27 | True 28 | # Optional but recommended 29 | Accrual Periodicity: yearly 30 | 31 | # Optional 32 | Namespace: mock 33 | 34 | dimensions: 35 | # required 36 | Jahr: 37 | name: 38 | de: Jahr 39 | fr: An 40 | it: Anno 41 | en: Year 42 | description: 43 | de: Jahr der Erhebung 44 | dimension-type: Key Dimension 45 | datatype: URI 46 | scale-type: ordinal 47 | path: year 48 | data-kind: 49 | type: temporal 50 | unit: year 51 | mapping: 52 | type: additive 53 | base: https://ld.admin.ch/time/year/ 54 | 55 | Station: 56 | name: 57 | de: Station 58 | fr: Station 59 | it: Stazione 60 | en: Station 61 | description: 62 | de: Station der Untersuchung 63 | dimension-type: Key Dimension 64 | scale-type: nominal 65 | datatype: URI 66 | path: station 67 | mapping: 68 | type: replace 69 | replacements: 70 | Bern: https://mock.ld.admin.ch/station/01 71 | Zürich: https://mock.ld.admin.ch/station/02 72 | 73 | Wert: 74 | name: 75 | de: Wert 76 | fr: Valeur 77 | it: Valore 78 | en: Value 79 | description: 80 | de: Gemessener Wert an der Station 81 | dimension-type: Measure Dimension 82 | datatype: float 83 | scale-type: interval 84 | path: value 85 | unit: KiloGM 86 | 87 | Standardfehler: 88 | name: 89 | de: Standardfehler 90 | fr: Erreur standard 91 | it: Errore standard 92 | en: Standard error 93 | description: 94 | de: Standardfehler des berechneten Werts 95 | dimension-type: Standard Error 96 | datatype: float 97 | relates-to: value 98 | scale-type: ratio 99 | path: standardError 100 | unit: PERCENT 101 | 102 | Status: 103 | name: 104 | de: Veröffentlichungsstatus 105 | fr: Statut de publication 106 | it: Stato di pubblicazione 107 | en: State of publication 108 | description: 109 | de: "Status der Veröffentlichung, provisorisch oder final" 110 | dimension-type: Annotation 111 | datatype: string 112 | scale-type: nominal 113 | path: status -------------------------------------------------------------------------------- /example/Cubes/co2-limits/data.csv: -------------------------------------------------------------------------------- 1 | Jahr,Energieträger,CO2-Emissionen (Mt),Nicht gerundeter Wert (Mt) 2 | 1990,Brennstoffe,23.409,23.4088409539035 3 | 1990,Treibstoffe,15.449,15.4491830781084 4 | 1991,Brennstoffe,23.251,23.2508647438026 5 | 1991,Treibstoffe,15.929,15.9291182873447 6 | 1992,Brennstoffe,23.849,23.8486416497727 7 | 1992,Treibstoffe,16.259,16.2585739839915 8 | 1993,Brennstoffe,22.656,22.6555635646011 9 | 1993,Treibstoffe,15.217,15.2168569310758 10 | 1994,Brennstoffe,22.931,22.9309962146956 11 | 1994,Treibstoffe,15.399,15.3994934644429 12 | 1995,Brennstoffe,22.702,22.7024598374676 13 | 1995,Treibstoffe,15.101,15.1005792556581 14 | 1996,Brennstoffe,21.872,21.8724337824029 15 | 1996,Treibstoffe,15.16,15.1603802991533 16 | 1997,Brennstoffe,22.739,22.7394923442782 17 | 1997,Treibstoffe,15.736,15.7364012222237 18 | 1998,Brennstoffe,23.212,23.2117416464586 19 | 1998,Treibstoffe,15.964,15.9635530644151 20 | 1999,Brennstoffe,22.226,22.2262923026919 21 | 1999,Treibstoffe,16.569,16.5686443608428 22 | 2000,Brennstoffe,22.461,22.4614503218056 23 | 2000,Treibstoffe,16.836,16.8364619054554 24 | 2001,Brennstoffe,22.743,22.7430826369274 25 | 2001,Treibstoffe,16.559,16.5594894298454 26 | 2002,Brennstoffe,22.378,22.3778988775046 27 | 2002,Treibstoffe,16.514,16.5139078060435 28 | 2003,Brennstoffe,22.183,22.1825020578164 29 | 2003,Treibstoffe,16.683,16.6832641674477 30 | 2004,Brennstoffe,22.332,22.3323303647259 31 | 2004,Treibstoffe,16.809,16.8089067625261 32 | 2005,Brennstoffe,22.058,22.0581028668418 33 | 2005,Treibstoffe,16.893,16.8926949527344 34 | 2006,Brennstoffe,21.87,21.8704005096137 35 | 2006,Treibstoffe,17.03,17.029954451496 36 | 2007,Brennstoffe,21.473,21.4734775638869 37 | 2007,Treibstoffe,17.355,17.3545199250883 38 | 2008,Brennstoffe,20.874,20.8743790902275 39 | 2008,Treibstoffe,17.706,17.7057048965966 40 | 2009,Brennstoffe,20.364,20.3644771142418 41 | 2009,Treibstoffe,17.515,17.5146844187239 42 | 2010,Brennstoffe,20.081,20.0809192014044 43 | 2010,Treibstoffe,17.417,17.4171042931388 44 | 2011,Brennstoffe,19.542,19.5421072281977 45 | 2011,Treibstoffe,17.225,17.2246963681246 46 | 2012,Brennstoffe,19.203,19.203496216434 47 | 2012,Treibstoffe,17.347,17.3466069719127 48 | 2013,Brennstoffe,18.862,18.8615373863506 49 | 2013,Treibstoffe,17.258,17.2576046460133 50 | 2014,Brennstoffe,18.362,18.3616203250162 51 | 2014,Treibstoffe,17.154,17.1540834660966 52 | 2015,Brennstoffe,17.86,17.8604720539075 53 | 2015,Treibstoffe,16.415,16.4147799431993 54 | 2016,Brennstoffe,17.543,17.5432203420959 55 | 2016,Treibstoffe,16.247,16.246639568902 56 | 2017,Brennstoffe,17.207,17.2065691870665 57 | 2017,Treibstoffe,15.963,15.9628629448215 58 | 2018,Brennstoffe,16.796,16.7963008002405 59 | 2018,Treibstoffe,15.956,15.9555492234206 60 | 2019,Brennstoffe,16.436,16.4355294371483 61 | 2019,Treibstoffe,15.895,15.8945043183231 62 | 2020,Brennstoffe,16.117,16.1168503388347 63 | 2020,Treibstoffe,14.598,14.5980752146887 64 | 2021,Brennstoffe,15.746,15.7463144422352 65 | 2021,Treibstoffe,14.777,14.777176066368 66 | 2022,Brennstoffe,14.981,14.9809740495392 67 | 2022,Treibstoffe,14.613,14.6129515197184 68 | 2023,Brennstoffe,13.656,13.6562990399726 69 | 2023,Treibstoffe,14.639,14.6385734439246 -------------------------------------------------------------------------------- /example/Cubes/concept_table_airport/airport.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import pandas as pd 4 | import yaml 5 | import os 6 | 7 | from pylindas.lindas.namespaces import SCHEMA 8 | from pylindas.pycube import Cube 9 | from pylindas.lindas.upload import upload_ttl 10 | from pylindas.lindas.query import cube_exists 11 | 12 | """ 13 | Author: Fabian Cretton - HEVS 14 | 15 | See the description in the README. 16 | 17 | This example script only generates the .ttl file, the upload operations are not performed 18 | """ 19 | 20 | BASEDIR = os.path.dirname(__file__) 21 | CONFIGFILE = os.path.join(BASEDIR, "description.yml") 22 | CUBEFILE = os.path.join(BASEDIR, "cube.ttl") 23 | 24 | 25 | # data.csv contains an airport type identifier that doesn't exist in airportconcept.csv 26 | # the goal is to demonstrate that the check_dimension_object_property() called here under will detect that 27 | DATADUMMY = os.path.join(BASEDIR, "data.csv") 28 | data_df = pd.read_csv(DATADUMMY) 29 | 30 | with open(CONFIGFILE) as file: 31 | config = yaml.safe_load(file) 32 | 33 | cube = Cube(dataframe=data_df, cube_yaml=config, environment="TEST", local=True) 34 | cube.prepare_data() 35 | cube.write_cube() 36 | cube.write_observations() 37 | cube.write_shape() 38 | 39 | # Add the concept data 40 | # The concept must be defined in the cube_yaml file, as a nested key under the "Concepts" key 41 | # "typeOfAirport" is the name of that nested key 42 | AIRPORTDATA = os.path.join(BASEDIR, "airportconcept.csv") 43 | airport_concept_df = pd.read_csv(AIRPORTDATA) 44 | cube.write_concept("typeOfAirport", airport_concept_df) 45 | 46 | # Check that all the generated URLs for the typeOfAirport are resources (concept) with a SCHEMA.name triple 47 | # This allows to check if all the entries in data.csv correspond to an entry in airportconcept.csv 48 | # This check should identify the error of the 'dummy' airport type 49 | allConceptsFound = cube.check_dimension_object_property("typeOfAirport", SCHEMA.name) 50 | 51 | if not allConceptsFound: 52 | print("""\nCheck result - WARNING: It seems that some objects of the \"typeOfAirport\" dimension have no matching concept. 53 | See the log for details and check your data + cube dimension and concepts configuration""") 54 | else: 55 | print("\nCheck result - SUCCESS: It seems that all objects of the \"typeOfAirport\" dimension have a matching concept.") 56 | 57 | cube.serialize(CUBEFILE) 58 | 59 | # Just for testing the functionality: add the 'dummy' airport type 60 | AIRPORTDUMMYDATA = os.path.join(BASEDIR, "airportdummyconcept.csv") 61 | airport_concept_dummy_df = pd.read_csv(AIRPORTDUMMYDATA) 62 | cube.write_concept("typeOfAirport", airport_concept_dummy_df) 63 | allConceptsFound = cube.check_dimension_object_property("typeOfAirport", SCHEMA.name) 64 | 65 | if not allConceptsFound: 66 | print("""\nCheck result - WARNING: It seems that some objects of the \"typeOfAirport\" dimension have no matching concept. 67 | See the log for details and check your data + cube dimension and concepts configuration""") 68 | else: 69 | print("\nCheck result - SUCCESS: It seems that all objects of the \"typeOfAirport\" dimension have a matching concept.") 70 | 71 | print(cube) 72 | 73 | 74 | -------------------------------------------------------------------------------- /example/Cubes/wind/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "../../linpy/description.schema.json", 3 | "Name": { 4 | "de": "Windkraftanlagen - betriebene WKA pro Jahr in Schleswig-Holstein", 5 | "en": "Wind turbines \u2014 operated WKA per year in Schleswig-Holstein" 6 | }, 7 | "Description": { 8 | "de": "Die Aufstellung zeigt die Anzahl und die genehmigte Leistung der betriebenen Windkraftanlagen (WKA) in Schleswig-Holstein. Es sind nur nach dem BImSchG genehmigungsbed\u00fcrftige WKA ber\u00fccksichtigt. \r\n\r\nBei den Zahlen f\u00fcr das laufende Jahr handelt es sich um vorl\u00e4ufige Werte, wie sie am angegebenen Endzeitpunkt bekannt waren.\r\n\r\nQuelle: LfU, Fachdatenbank LIS-A\r\n\r\nAuf der [Themenseite Windenergie auf schleswig-holstein.de](https://www.schleswig-holstein.de/DE/landesregierung/themen/energie/windenergie/windenergie_node.html) sind weitere Informationen zum Thema ver\u00f6ffentlicht.", 9 | "en": "The list shows the number and approved power of the operated wind turbines (WKA) in Schleswig-Holstein. Only WKAs requiring authorisation under the BImSchG are taken into account. \n\nSource: LfU, specialist database LIS-A\n \nFurther information on the topic is published on the [Theme page Windenergie onschleswig-holstein.de](https://www.schleswig-holstein.de/DE/landesregierung/themen/energie/windenergie/windenergie_node.html)." 10 | }, 11 | "Publisher": [ 12 | { 13 | "IRI": "https://opendata.schleswig-holstein.de/organization/02619150-8e16-46a5-b2b9-de73ecfc617d" 14 | } 15 | ], 16 | "Creator": [ 17 | { 18 | "IRI": "https://opendata.schleswig-holstein.de/organization/02619150-8e16-46a5-b2b9-de73ecfc617d" 19 | } 20 | ], 21 | "Contributor": [], 22 | "Date Created": "2024-11-13T11:45:45.558776", 23 | "Contact Point": { 24 | "E-Mail": "opendata@example.ch", 25 | "Name": "Landesamt f\u00fcr Umwelt" 26 | }, 27 | "Base-URI": "https://opendata.schleswig-holstein.de/dataset/fc49eebf-3750-4c9c-a29e-6696eb644362/resource/b8a7b43c-3529-4b92-bb49-7bf4e9109dfb/download/opendata_wka_inbetrieb_sh_20230103.csv", 28 | "Identifier": "wka-inbetrieb", 29 | "Version": 0.1, 30 | "Work Status": "Draft", 31 | "Visualize": true, 32 | "Accrual Periodicity": "", 33 | "Namespace": "https://opendata.example.ch", 34 | "dimensions": { 35 | "Jahr": { 36 | "name": { 37 | "de": "Jahr", 38 | "en": "Jahr" 39 | }, 40 | "dimension-type": "Measure Dimension", 41 | "scale-type": "interval", 42 | "path": "Jahr", 43 | "description": { 44 | "de": "Beschreibung f\u00fcr Jahr", 45 | "en": "Description for Jahr" 46 | }, 47 | "data-kind": { 48 | "type": "temporal", 49 | "unit": "year" 50 | } 51 | }, 52 | "Anzahl_inBetrieb_WKA_SH": { 53 | "name": { 54 | "de": "Anzahl", 55 | "en": "Anzahl" 56 | }, 57 | "dimension-type": "Measure Dimension", 58 | "scale-type": "interval", 59 | "path": "Anzahl_inBetrieb_WKA_SH", 60 | "description": { 61 | "de": "Beschreibung f\u00fcr Anzahl_inBetrieb_WKA_SH", 62 | "en": "Description for Anzahl_inBetrieb_WKA_SH" 63 | } 64 | }, 65 | "Leistung_MW": { 66 | "name": { 67 | "de": "Leistung_MW", 68 | "en": "Leistung_MW" 69 | }, 70 | "dimension-type": "Measure Dimension", 71 | "scale-type": "ratio", 72 | "path": "Leistung_MW", 73 | "description": { 74 | "de": "Beschreibung f\u00fcr Leistung_MW", 75 | "en": "Description for Leistung_MW" 76 | }, 77 | "unit": "MW" 78 | } 79 | } 80 | } -------------------------------------------------------------------------------- /pylindas/pycube/shared_dimension.py: -------------------------------------------------------------------------------- 1 | from rdflib import Graph, Literal, RDF, URIRef 2 | from py_cube.lindas.namespaces import * 3 | from shapely.geometry import shape 4 | import json 5 | import argparse 6 | 7 | 8 | class GeoSharedDimension(object): 9 | _base_uri: URIRef 10 | _graph: Graph 11 | _description: dict 12 | 13 | def __init__(self, base_uri: URIRef, description: dict, graph: Graph): 14 | self._base_uri = base_uri 15 | self._graph = graph 16 | self._description = description 17 | 18 | def _setup_graph(self) -> Graph: 19 | """Set up the graph by binding namespaces and returning the graph object. 20 | 21 | Returns: 22 | Graph: The graph object with namespaces bound. 23 | """ 24 | graph = Graph() 25 | for prefix, nmspc in Namespaces.items(): 26 | graph.bind(prefix=prefix, namespace=nmspc) 27 | try: 28 | graph.bind(prefix=self._cube_dict.get("Namespace"), namespace=Namespace(self._base_uri)) 29 | except KeyError: 30 | print("no Namespace") 31 | pass 32 | return graph 33 | 34 | 35 | def _geojson_to_wkt(self, geojson: dict) -> str: 36 | """Convert GeoJSON to WKT. 37 | 38 | Returns: 39 | str: The WKT string. 40 | """ 41 | if not geojson: 42 | return None 43 | geom = shape(geojson) 44 | return geom.wkt 45 | 46 | 47 | def _add_geo_feature_to_graph(self, geojson_feature): 48 | properties = geojson_feature.get("properties") 49 | if not properties: 50 | raise ValueError("Feature must have properties") 51 | iri = properties.get("iri") 52 | if not iri: 53 | raise ValueError("Feature must have an IRI") 54 | feature = URIRef(iri) 55 | self._graph.add((feature, RDF.type, URIRef("http://schema.org/Place"))) 56 | 57 | for lang in ["fr", "en", "de", "it"]: 58 | name_key = f"name_{lang}" 59 | if name_key in properties: 60 | self._graph.add((feature, URIRef("http://schema.org/name"), Literal(properties[name_key], lang=lang))) 61 | 62 | geometry = URIRef(f"{iri}/geometry") 63 | self._graph.add((feature, URIRef("http://www.opengis.net/ont/geosparql#hasGeometry"), geometry)) 64 | wkt = self._geojson_to_wkt(geojson_feature['geometry']) 65 | if wkt: 66 | self._graph.add((geometry, URIRef("http://www.opengis.net/ont/geosparql#asWKT"), Literal(wkt, datatype=URIRef("http://www.opengis.net/ont/geosparql#wktLiteral")))) 67 | 68 | 69 | def serialize(self, filename: str) -> None: 70 | """Serialize the cube to a file. 71 | 72 | This function serializes the cube to the given file name in turtle format. 73 | 74 | Args: 75 | filename (str): The name of the file to write the cube to. 76 | 77 | Returns: 78 | None 79 | """ 80 | self._graph.serialize(destination=filename, format="turtle", encoding="utf-8") 81 | 82 | 83 | def convert_geojson_to_ttl(geojson_filename, ttl_filename): 84 | with open(geojson_filename, 'r') as f: 85 | geojson_data = json.load(f) 86 | 87 | base_uri = URIRef("http://example.org/base") 88 | description = {} 89 | graph = Graph() 90 | 91 | shared_dimension = GeoSharedDimension(base_uri, description, graph) 92 | 93 | for feature in geojson_data.get("features", []): 94 | print(f"Adding feature {feature['properties']['name_de']}") 95 | shared_dimension._add_geo_feature_to_graph(feature) 96 | 97 | shared_dimension.serialize(ttl_filename) 98 | -------------------------------------------------------------------------------- /example/Shared_Dimensions/shared_dimension_generation/sd_terms.csv: -------------------------------------------------------------------------------- 1 | code;parent_code;name_de;name_fr;name_it;name_EN;location;wikidata;concept 2 | 1;;Alle Artengruppen;Tous les groupes d'espèces;Tutti i gruppi di specie;All species groups;Q2;x;1 3 | 1.1;1;Tiere;Animaux;Animali;Animals;Q2;x;2 4 | 1.1.1;1.1;Wirbeltiere;Vertébrés;Vertebrati;Vertebrates;Q2;x;3 5 | 1.1.1.1;1.1.1;Säugetiere ;Mammifères;Mammiferi;Mammals;Q2;x;4 6 | 1.1.1.1.1;1.1.1.1;Säugetiere (ohne Fledermäuse);Mammifères (sans Chauves-souris);Mammiferi (senza Pipistrelli);Mammals (without bats);Q2;x;5 7 | 1.1.1.1.2;1.1.1.1;Fledermäuse;Chauves-souris;Pipistrelli;Bats;Q2;x;6 8 | 1.1.1.2;1.1.1;Brutvögel;Oiseaux nicheurs;Uccelli nidificanti;Breeding birds;Q2;x;7 9 | 1.1.1.3;1.1.1;Reptilien;Reptiles;Rettili;Reptiles;Q2;x;8 10 | 1.1.1.4;1.1.1;Amphibien;Amphibiens;Anfibi;Amphibians;Q2;x;9 11 | 1.1.1.5;1.1.1;Fische und Rundmäuler;Poissons et Cyclostomes;Pesci e Ciclostomi;Fishes and cyclostomes;Q2;x;10 12 | 1.1.2;1.1;Weichtiere;Mollusques;Molluschi;Molluscs;Q2;x;11 13 | 1.1.2.1;1.1.2;Muscheln;Bivalves;Bivalvi;Bivalves;Q2;x;12 14 | 1.1.2.2;1.1.2;Schnecken;Gastéropodes;Gasteropodi;Gasteropodi;Q2;x;13 15 | 1.1.3;1.1;Krebstiere;Crustacés;Crostacei;Crustaceans;Q2;x;14 16 | 1.1.3.1;1.1.3;Zehnfusskrebse;Écrevisses;Gamberi;Crayfishes;Q2;x;15 17 | 1.1.4;1.1;Insekten;Insectes;Insetti;Insects;Q2;x;16 18 | 1.1.4.1;1.1.4;Hautflügler;Hyménoptères;Imenotteri;Hymenopterae;Q2;x;17 19 | 1.1.4.1.1;1.1.4.1;Bienen;Abeilles;Api;Bees;Q2;x;18 20 | 1.1.4.1.2;1.1.4.1;Ameisen;Fourmis;Formiche;Ants;Q2;x;19 21 | 1.1.4.2;1.1.4;Schmetterlinge;Papillons;Farfalle;Butterflies;Q2;x;20 22 | 1.1.4.2.1;1.1.4.2;Tagfalter und Widderchen;Papillons diurnes et Zygènes;Farfalle diurne e Zigene;Diurnal Butterflies and Zigene;Q2;x;21 23 | 1.1.4.3;1.1.4;Köcherfliegen;Trichoptères;Tricotteri;Caddisflies;Q2;x;22 24 | 1.1.4.4;1.1.4;Schnaken;Tipules;Ditteri Tipulidi;Diptera Tipulids;Q2;x;23 25 | 1.1.4.5;1.1.4;Käfer;Coléoptères;Coleotteri;Coleopterae;Q2;x;24 26 | 1.1.4.5.1;1.1.4.5;Pracht-, Bock-, Rosenkäfer und Schröter;Coléoptères Buprestidés, Cérambycidés, Cétoniidés et Lucanidés;Coleotteri Buprestidi, Cerambicidi, Cetonidi e Lucanidi;Coleopterae Buprestides, Cerambicides, Cetonides, Lucanides;Q2;x;25 27 | 1.1.4.5.2;1.1.4.5;Laufkäfer und Sandlaufkäfer;Carabidés et Cicindèles;Carabidi e Cicindelidi;Carabidae and Cicindelinae;Q2;x;26 28 | 1.1.4.5.3;1.1.4.5;Wasserkäfer;Coléoptères hydradéphages;Coleotteri Adefagi acquatici;Aquatic beetles Adefagians;Q2;x;27 29 | 1.1.4.6;1.1.4;Netzflügler;Névroptères;Neurotteri;Neuropterans;Q2;x;28 30 | 1.1.4.7;1.1.4;Singzikaden;Cigales;Cicale;Cicadas;Q2;x;29 31 | 1.1.4.8;1.1.4;Heuschrecken;Orthoptères;Ortotteri;Ortopterans;Q2;x;30 32 | 1.1.4.9;1.1.4;Steinfliegen;Plécoptères;Plecotteri;Stoneflies;Q2;x;31 33 | 1.1.4.10;1.1.4;Libellen;Libellules, Odonates;Libellule;Dragonflies;Q2;x;32 34 | 1.1.4.11;1.1.4;Eintagsfliegen;Éphémères;Efemerotteri;Mayflies;Q2;x;33 35 | 1.2;1;Pflanzen;Plantes;Piante;Plants;Q2;x;34 36 | 1.2.1;1.2;Gefässpflanzen;Plantes vasculaires;Piante vascolari;Vascular plants;Q2;x;35 37 | 1.2.1.1;1.2.1;Blütenpflanzen;Plantes â  fleurs;Piante da fiori;Flowering plants;Q2;x;36 38 | 1.2.1.2;1.2.1;Farnartige Pflanzen;Fougères;Felci;Ferns;Q2;x;37 39 | 1.2.2;1.2;Moose;Bryophytes;Briofite;Bryophytes;Q2;x;38 40 | 1.2.2.1;1.2.2;Lebermoose;Hépatiques;Epatiche;Hepaticae;Q2;x;39 41 | 1.2.2.2;1.2.2;Laubmoose;Mousses;Muschi;Musci;Q2;x;40 42 | 1.2.2.3;1.2.2;Hormoose;Anthocérotes;Antocerote;Anthocerotae;Q2;x;41 43 | 1.2.3;1.2;Makroalgen;Macroalgues;Macroalga;Macroalgae;Q2;x;42 44 | 1.2.3.1;1.2.3;Armleuchteralgen;Characées;Caracee;Stoneworts;Q2;x;43 45 | 1.3;1;Flechten und Pilze;Lichens et champignons;Licheni e funghi;Lichens and mycetes;Q2;x;44 46 | 1.3.1;1.3;Flechten;Lichens;Licheni;Lichens and fungi;Q2;x;45 47 | 1.3.1.1;1.3.1;Baumflechten;Lichens épiphytes;Licheni epifiti;Epiphytic lichens;Q2;x;46 48 | 1.3.1.2;1.3.1;Bodenflechten;Lichens terricoles;Licheni terricoli;Soil lichens;Q2;x;47 49 | 1.3.2;1.3;Grosspilze;Champignons supérieurs;Macromiceti;Macromycetes;Q2;x;48 50 | -------------------------------------------------------------------------------- /pylindas/getter/get.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import sparql_dataframe 3 | from SPARQLWrapper import SPARQLWrapper, JSON 4 | 5 | 6 | def get_cube(endpoint: str, identifier: str, version: str): 7 | """Retrieve the cube URI based on the provided identifier and version using SPARQL query. 8 | 9 | Args: 10 | endpoint (str): The SPARQL endpoint URL. 11 | identifier (str): The identifier of the cube. 12 | version (str): The version of the cube. 13 | 14 | Returns: 15 | str: The URI of the cube. 16 | 17 | Raises: 18 | Exception: If an error occurs during the SPARQL query execution. 19 | """ 20 | match endpoint: 21 | case "TEST": 22 | endpoint = "https://test.lindas.admin.ch/query" 23 | case "INT": 24 | endpoint = "https://int.lindas.admin.ch/query" 25 | case "PROD": 26 | endpoint = "https://lindas.admin.ch/query" 27 | 28 | query = f""" 29 | PREFIX dcterms: 30 | PREFIX cube: 31 | PREFIX schema: 32 | 33 | SELECT ?cube 34 | {{ 35 | ?cube a cube:Cube ; 36 | dcterms:identifier "{identifier}" ; 37 | schema:version {version} . 38 | }} 39 | """ 40 | sparql = SPARQLWrapper(endpoint) 41 | sparql.setReturnFormat(JSON) 42 | sparql.setQuery(query) 43 | 44 | try: 45 | resp = sparql.queryAndConvert() 46 | 47 | return resp["results"]["bindings"][0]["cube"]["value"] 48 | except Exception as e: 49 | return e 50 | 51 | 52 | def get_observations(endpoint: str, identifier: str, version: str): 53 | """Retrieve observations from a given endpoint based on the provided identifier and version. 54 | 55 | Args: 56 | endpoint (str): The SPARQL endpoint URL. 57 | identifier (str): The identifier for the observations. 58 | version (str): The version of the observations. 59 | 60 | Returns: 61 | pandas.DataFrame: A DataFrame containing the observations with columns for observation, predicate, and value. 62 | """ 63 | match endpoint: 64 | case "TEST": 65 | endpoint = "https://test.lindas.admin.ch/query" 66 | case "INT": 67 | endpoint = "https://int.lindas.admin.ch/query" 68 | case "PROD": 69 | endpoint = "https://lindas.admin.ch/query" 70 | cube_uri = get_cube(endpoint=endpoint, identifier=identifier, version=version) 71 | query = f""" 72 | PREFIX rdf: 73 | PREFIX sh: 74 | PREFIX schema: 75 | PREFIX xsd: 76 | PREFIX cube: 77 | 78 | SELECT ?obs ?pred ?value 79 | {{ 80 | <{cube_uri}> cube:observationSet/cube:observation ?obs . 81 | <{cube_uri}> cube:observationConstraint/sh:property ?dim . 82 | ?dim sh:path ?predURI ; 83 | schema:name ?pred . 84 | FILTER(LANG(?pred)='de') 85 | {{ 86 | ?dim a cube:KeyDimension . 87 | ?obs ?predURI ?vl . 88 | ?vl schema:name ?value 89 | }} UNION {{ 90 | ?dim a cube:KeyDimension . 91 | ?obs ?predURI ?value . 92 | FILTER (DATATYPE(?value) != xsd:anyURI) 93 | }} UNION {{ 94 | ?dim a cube:MeasureDimension . 95 | ?obs ?predURI ?value 96 | }} 97 | }} 98 | """ 99 | 100 | df = sparql_dataframe.get(endpoint, query) 101 | observations = df.pivot(index="obs", columns="pred", values="value").reset_index(drop=True) 102 | return observations 103 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # Database lindas information 158 | *.ini 159 | 160 | # PyCharm 161 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 162 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 163 | # and can be added to the global gitignore or merged into this file. For a more nuclear 164 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 165 | .idea/ 166 | 167 | .DS_Store 168 | 169 | # Test cube for unit tests 170 | /tests/test_cube.ttl 171 | 172 | # VSCode 173 | .vscode 174 | -------------------------------------------------------------------------------- /example/Shared_Dimensions/shared_dimension_generation/sd_description.yml: -------------------------------------------------------------------------------- 1 | # The SD URL will be the Identifier concatenated to the fixed "https://ld.admin.ch/cube/dimension/" 2 | # If the SD URL starts with http, it will instead use that instead of the default 3 | # Note: a SD's URL is not relative to the LINDAS environment, and it will be dereferenceable only when published on PROD 4 | Identifier: pylindas_sd_generation_example 5 | # TODO Base: Optional, allows for separation of DefinedTermSet and the base for identifier-field, where identifier defines the TermSet 6 | Name: 7 | en: PyLindas Shared Dimension generation example 8 | fr: PyLindas example de génération d'une Shared Dimension 9 | de: PyLindas Shared Dimension generation example (de) 10 | it: PyLindas Shared Dimension generation example (it) 11 | # Description is optional 12 | Description: 13 | fr: Un example de génération d'une Shared Dimension par PyLindas 14 | en: An example of Shared Dimension generation by PyLindas 15 | # Valid-from is optional, it is a date/time value 16 | # Note: it is currently optional, but might need to become mandatory as validFrom, and later validThrough, are used to make a SD and its term 'deprecated' 17 | Valid-from: 2025-02-05T00:00:00Z 18 | # Contributor is optional, it is now added by the Cube Creator when creating a new SD 19 | Contributor: 20 | name: Fabian Cretton 21 | email: fabian.cretton@dummy.ch 22 | Terms: 23 | identifier-field: code 24 | name-field: name 25 | multilingual: True 26 | # links-to-other-terms is optional 27 | # It allows to create links between terms of a same dataset, as for instance the creation of a hierarchy based on child to parent relations with the skos:broader property 28 | # The identifier of the other term must be found on the same line, as for example: 29 | # code;parent_code;name 30 | # 1;;Alle Artengruppen 31 | # 1.1;1;Tiere 32 | links-to-other-terms: 33 | #parent_code: name of the column that contains the identifier of the other term 34 | parent_code: 35 | # property: the current proposal here does not handle relative URIs, but request an existing property 36 | # to be adapted if needed 37 | property: http://www.w3.org/2004/02/skos/core#broader 38 | 39 | #mapping is optional, and is used to replace the entrys in the SharedDimension with URI's to different SharedDimensions 40 | #there are currently 4 types additive, replace, regex and concept adapted from Cube.py 41 | #anytime something is added to mapping, it should also be added to other-fields, and given the appropriate datatype and URI 42 | #if made into an URI, make sure the entries are valid as URI's so no empty spaces 43 | mapping: 44 | #this is the name of the column in which the entries are to be replaced 45 | location: 46 | #type addtitive, adds whatever is in the entry at the end of the "base" so , "base" + "entry" 47 | type: additive 48 | base: https://www.wikidata.org/wiki/ 49 | wikidata: 50 | #type replace, replaces the entry fully with the given replacement, all entries are changed to the same thing, nondynamically 51 | type: replace 52 | replacement: https://www.wikidata.org/wiki/Q1 53 | concept: 54 | #type concept, the entry is changed to the URI given 55 | #Columns can be placed in {} and when done so, those places in the URI will dynamically be appropriated as the given entries inside the Column 56 | #if started with "/" it will use the cubes base_URL and add concept inside ex: /parent/{parent}, will give baseURI + /parent + /(entry in the column of parent) 57 | type: concept 58 | replacement-automated: http://the_cube_uri/concept/{concept}/{code} 59 | 60 | #TODO Regex example 61 | 62 | # other-fields are optional, URI could be relative (and concatenated to the SD's URI) or a full URI starting with 'http/https' 63 | other-fields: 64 | wikidata: 65 | URI: http://schema.org/isPartOf 66 | datatype: URI 67 | concept: 68 | URI: /partoftest 69 | datatype: URI 70 | parent_code: 71 | URI: /parent_identifier_example 72 | datatype: string 73 | location: 74 | URI: http://schema.org/isPartOf 75 | datatype: URI 76 | -------------------------------------------------------------------------------- /pylindas/shared_dimension_queries/README.md: -------------------------------------------------------------------------------- 1 | # Shared dimensions queries 2 | The goal of [shared_dimensions_queries.py](shared_dimensions_queries.py) is to become a tool for developers to find a useful shared dimension, 3 | then get the URLs of the terms in order to configure the mapping for a cube's dimension. 4 | 5 | This is a first implementation of: 6 | - Basic queries to request shared dimensions information from LINDAS 7 | - Display the results, line by line 8 | 9 | ## Example 10 | See an example usage in [example_sd.py](example_sd.py) 11 | 12 | List all the shared dimensions for a specific LINDAS environment and print them line by line: 13 | ``` 14 | result=list_shared_dimensions("INT") 15 | list_shared_dimensions_print(result) 16 | ``` 17 | 18 | The result is ordered alphabetically. 19 | 20 | list_shared_dimensions() has a number of optional parameters, with default values except for the environment: 21 | - environment: LINDAS environment, one of `TEST`, `INT`, `PROD` 22 | - name_lng: the language of the label of the shared dimensions to retrieve (default "en") 23 | Note: a shared dimension with no label in that language will not be listed (no fall-back handled yet) 24 | - offset/limit: stardard possibility to page through the result with offset/limit (default to 0) 25 | OFFSET: "skip this many rows from the total result set", 0 to skip no row and begin from start 26 | LIMIT: "only give me this many rows (starting after any OFFSET)" 27 | a limit of 0 = no limit, display all results starting from offset (LIMIT will not be added to the query) 28 | - search_word: to limit the results to labels containing a specific word (default "" -> ignored) 29 | 30 | List 10 Shared Dimensions that contains "Canton" in the french name 31 | ``` 32 | result = list_shared_dimensions("INT", "fr", 0, 10, "Canton") 33 | ``` 34 | 35 | As the goal is to observe the URLs of the terms in a shared dimension, URL that will be used to define the mappings, a feature of list_shared_dimensions_print() is to print 2-3 terms for each listed shared dimension. 36 | To do this, pass a second 'environment' parameter to the function 37 | ``` 38 | list_shared_dimensions_print(result, "INT") 39 | ``` 40 | This environment should of course match the one used for `list_shared_dimensions()`. While displaying each shared dimension, LINDAS environment will be queried to get 2 terms. 41 | Example result: 42 | ``` 43 | Cantons - validFrom 2021-01-01T00:00:00Z 44 | { Terms sample: 45 | Aargau 46 | Appenzell Ausserrhoden 47 | } 48 | Cantons NFI 49 | { Terms sample: 50 | Aargau 51 | Appenzell Ausserrhoden 52 | } 53 | ``` 54 | Note: `list_shared_dimensions()` will also display validFrom and validTo values, when available, as some shared dimensions could be deprecated. 55 | 56 | It is finally possible to list all the terms for a specific shared dimension. 57 | Here is an example to list the Cantons shared dimension's terms, in french: 58 | ``` 59 | result = list_shared_dimension_terms("INT", "https://ld.admin.ch/dimension/canton", "fr") 60 | print_sparql_result(result, ["name", "sdTerm"]) 61 | ``` 62 | `print_sparql_result()` prints line by line the JSON result of a SPARQL query, printing the specific expected fields of the query. 63 | 64 | ## Next steps 65 | All of this is a first proposal, and should be further improved according to the developers needs. 66 | 67 | It is not yet a class with methods, and contains code that could be more generic. 68 | For instance, query_lindas could be a very generic function as the one found in /lindas/query.py 69 | To be noted that the existing query_lindas() is specific for ASK queries (returns a bool value), and is maybe "wrongly" named currently. 70 | 71 | A class could be created, passing for instance the environment in the constructor. Thus avoiding to pass the environment parameter to the different queries. 72 | Furthermore, the environment could be coming from a configuration file (or environment variables), to avoid hard-coding them. 73 | -------------------------------------------------------------------------------- /example/Cubes/greenhouse_limit/description.yml: -------------------------------------------------------------------------------- 1 | Name: 2 | de: Treibhausgasinventar 3 | en: Greenhouse Gas Inventory 4 | fr: Emissions de gaz a effet de serre 5 | it: Emissioni di gas di effetto serre 6 | Description: 7 | de: Ein Beispiel Cube mit einem Zielwert mit Anfangs- und Endpunkt 8 | en: An example cube with a target value with start and end 9 | fr: Un exemple de Cube avec une valeur cible avec une date de debut et une date de fin 10 | it: Un esempio di Cube con un valore obiettivo con una data di inizio e una data di fine 11 | Publisher: 12 | - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu 13 | Creator: 14 | - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu 15 | Contributor: 16 | - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu 17 | Name: Bundesamt für Umwelt 18 | Date Created: 19 | 2025-03-27 20 | Contact Point: 21 | E-Mail: marco.kronenberg@bafu.admin.ch 22 | Name: Bundesamt für Umwelt 23 | Base-URI: https://environment.ld.admin.ch/foen/test_target/ 24 | Identifier: target_timespan 25 | Version: 1 26 | Work Status: 27 | Draft 28 | Visualize: 29 | True 30 | # Optional but recommended 31 | Accrual Periodicity: yearly 32 | 33 | Namespace: limit_timespan 34 | 35 | dimensions: 36 | Jahr: 37 | name: 38 | de: Jahr 39 | fr: An 40 | it: Anno 41 | en: Year 42 | description: 43 | de: Jahr der Erhebung 44 | dimension-type: Key Dimension 45 | datatype: URI 46 | scale-type: ordinal 47 | path: year 48 | data-kind: 49 | type: temporal 50 | unit: year 51 | mapping: 52 | type: additive 53 | base: https://ld.admin.ch/time/year/ 54 | 55 | THG-Emissionen ohne die Treibhausgasbilanz der Landnutzung: 56 | name: 57 | de: THG-Emissionen ohne die Treibhausgasbilanz der Landnutzung 58 | en: GHG emissions without the greenhouse gas balance of land use 59 | fr: Emissions de GES sans le bilan des émissions des gaz à effet de serre lié à l'utilisation des terres 60 | it: Emissioni di gas serra senza il bilancio dei gas serra da uso del territorio 61 | description: 62 | de: THG-Emissionen ohne die Treibhausgasbilanz der Landnutzung 63 | en: GHG emissions without the greenhouse gas balance of land use 64 | fr: Emissions de GES sans le bilan des émissions des gaz à effet de serre lié à l'utilisation des terres 65 | it: Emissioni di gas serra senza il bilancio dei gas serra da uso del territorio 66 | dimension-type: Measure Dimension 67 | datatype: float 68 | scale-type: ratio 69 | path: ghgEmission 70 | unit: MegaTONNE 71 | 72 | THG-Emissionen mit der Treibhausgasbilanz der Landnutzung: 73 | name: 74 | de: THG-Emissionen mit der Treibhausgasbilanz der Landnutzung 75 | en: GHG emissions with the greenhouse gas balance of land use 76 | fr: Emissions de GES avec le bilan des gaz à effet de serre lié à l'utilisation des terres 77 | it: Emissioni di gas serra senza con il bilancio dei gas serra da uso del territorio 78 | description: 79 | de: THG-Emissionen mit der Treibhausgasbilanz der Landnutzung 80 | en: GHG emissions with the greenhouse gas balance of land use 81 | fr: Emissions de GES avec le bilan des gaz à effet de serre lié à l'utilisation des terres 82 | it: Emissioni di gas serra senza con il bilancio dei gas serra da uso del territorio 83 | dimension-type: Measure Dimension 84 | datatype: float 85 | scale-type: ratio 86 | path: ghgEmissionLanduse 87 | unit: MegaTONNE 88 | annotation: 89 | - type: limit 90 | value: 26.28871925 91 | name: 92 | de: Ziel 2030 gemäss Übereinkommen von Paris 93 | en: Target according to Paris Agreement for 2030 94 | fr: Objectif selon l'accord de Paris pour 2030 95 | it: Obiettivo secondo l'Accordo di Parigi per 2030 96 | context: 97 | Jahr: 2030 98 | - type: limit 99 | value: 34.17533502 100 | name: 101 | de: Durchschnittsziel gemäss Übereinkommen von Paris für 2021-2030 102 | en: Average target according to Paris Agreement for 2021-2030 103 | fr: Objectif moyen selon l'accord de Paris pour 2021-2030 104 | it: Obiettivo medio secondo l'Accordo di Parigi per 2021-2030 105 | context: 106 | Jahr: 107 | min: 2021 108 | max: 2030 109 | 110 | -------------------------------------------------------------------------------- /example/Cubes/Population_Aargau/prepare.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | # Input file 4 | csv_file = "./example/Cubes/Population_Aargau/data_raw.csv" 5 | 6 | # Read the CSV file 7 | df = pd.read_csv(csv_file, sep=",") 8 | 9 | # Set display options to avoid silent downcasting warnings 10 | pd.set_option('future.no_silent_downcasting', True) 11 | 12 | # Rename "ALTER_xy" columns to "age_xy" 13 | df.rename(columns=lambda x: x.replace("ALTER_", "age_") if x.startswith("ALTER_") else x, inplace=True) 14 | 15 | # Rename column "TOTAL" to "all" 16 | df.rename(columns={"TOTAL": "all"}, inplace=True) 17 | 18 | # Create a new list with all the columns that start with "age_" 19 | age_concept_ids = [col for col in df.columns if col.startswith("age_")] 20 | 21 | # Create names for the age concepts by replacing "age_00_04" with "Age 00 to 04" 22 | age_concept_names_en = ["Age " + col[4:6] + " to " + col[7:9] for col in age_concept_ids] 23 | # Special case for the last age group 24 | age_concept_names_en[-1] = "Age 90 and older" 25 | 26 | age_concept_names_de = ["Alter " + col[4:6] + " bis " + col[7:9] for col in age_concept_ids] 27 | # Special case for the last age group in German 28 | age_concept_names_de[-1] = "Alter 90 und älter" 29 | 30 | # Create descriptions for the age concepts 31 | age_concept_descriptions_en = ["People with age " + col[4:6] + " to " + col[7:9] + " years" for col in age_concept_ids] 32 | # Special case for the last age group 33 | age_concept_descriptions_en[-1] = "People with age 90 years and older" 34 | 35 | age_concept_descriptions_de = ["Personen mit Alter " + col[4:6] + " bis " + col[7:9] + " Jahre" for col in 36 | age_concept_ids] 37 | # Special case for the last age group in German 38 | age_concept_descriptions_de[-1] = "Personen mit Alter 90 Jahre und älter" 39 | 40 | # Add "all" to the list of age concepts 41 | age_concept_ids.append("all") 42 | # Add "All ages" for the "all" column 43 | age_concept_names_en.append("Total population") 44 | # Add description for the "all" column 45 | age_concept_descriptions_en.append("Total population of all ages") 46 | age_concept_names_de.append("Gesamtbevölkerung") 47 | age_concept_descriptions_de.append("Gesamtbevölkerung aller Altersgruppen") 48 | 49 | # Create a DataFrame for age concepts 50 | age_concepts_df = pd.DataFrame({ 51 | "ageID": age_concept_ids, 52 | "ageName_en": age_concept_names_en, 53 | "ageDescription_en": age_concept_descriptions_en, 54 | "ageName_de": age_concept_names_de, 55 | "ageDescription_de": age_concept_descriptions_de 56 | }) 57 | 58 | # Save the age concepts DataFrame to a CSV file 59 | age_concepts_df.to_csv("./example/Cubes/Population_Aargau/age.csv", index=False) 60 | 61 | # Create a new date column from year, month, and day columns in Format YYYY-MM-DD 62 | df.insert(1, "date", 63 | df["year"].astype(str) + "-" + df["month"].astype(str).str.zfill(2) + "-" + df["day"].astype(str).str.zfill( 64 | 2)) 65 | 66 | 67 | # Create a new column for the region 68 | def region(line): 69 | if line.locationType == "CANTON": 70 | return "C_" + str(line.bfsNr) 71 | elif line.locationType == "DISTRICT": 72 | return "D_" + str(line.bfsNr) 73 | elif line.locationType == "TOWNSHIP": 74 | return "M_" + str(line.bfsNr) 75 | 76 | 77 | df.insert(0, "region", df.apply(region, axis=1)) 78 | 79 | # Keep only values for "region" = C_19, D_1901 and M_4001 through M_4013 (district Aarau and total of canton Aargau) 80 | df = df[df["region"].isin( 81 | ["C_19", "D_1901", "M_4001", "M_4002", "M_4003", "M_4004", "M_4005", 82 | "M_4006", "M_4007", "M_4008", "M_4009", "M_4010", "M_4011", "M_4012", "M_4013" 83 | ] 84 | )] 85 | 86 | df = df[df["year"] > 2020] 87 | 88 | # Drop unnecessary columns 89 | df.drop(columns=["bfsNr", "year", "month", "day", "locationName", "locationType"], inplace=True) 90 | 91 | 92 | # Melt data to long format 93 | df = df.melt(id_vars=["region", "date"], var_name="group", value_name="number") 94 | 95 | # Column number as integer 96 | df["number"] = df["number"].astype(int) 97 | 98 | # Add percentage column 99 | all_df = df[df["group"] == "all"].rename(columns={"number": "all_number"}) 100 | df = df.merge(all_df[["region", "date", "all_number"]], on=["region", "date"], how="left") 101 | df["percentage"] = round(df["number"] / df["all_number"] * 100, 4) 102 | df.drop(columns=["all_number"], inplace=True) 103 | 104 | # Save to CSV 105 | df.to_csv("./example/Cubes/Population_Aargau/data.csv", index=False) 106 | print("Saved extracted data to data.csv") 107 | -------------------------------------------------------------------------------- /example/Cubes/corona/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "../../linpy/description.schema.json", 3 | "Name": { 4 | "de": "Corona-Zahlen Zeitverlauf", 5 | "en": "Corona Numbers Timeline" 6 | }, 7 | "Description": { 8 | "de": "Zeitreihe der Corona-Zahlen für Schleswig-Holstein\r\n\r\nEnthalten sind folgende Felder:\r\n\r\n- `Datum` - Datum im Format TT.MM.JJJJ\r\n- `Gemeldete Fälle`\r\n- `Hospitalisierungen`\r\n- `Verstorben`\r\n\r\nFeldtrenner ist Komma, Zeichenketten-Trenner ist doppeltes Anführungszeichen (\")\r\n\r\nBasis für die hier veröffentlichten Daten sind die Zahlen, die die Kreise und kreisfreien Städte auf dem offiziellen Meldeweg der Landesmeldestelle mitteilen. Da die Datenerfassung und Übermittlung Zeit benötigt, können Abweichungen von den vor Ort kommunizierten Fällen entstehen. Im Einzelfall kann es auch zu einer Reduzierung der gemeldeten Fälle kommen, zum Beispiel wenn sich eine Meldung nicht bestätigt hat oder der Wohnort der Person außerhalb des Kreises liegt.\r\n\r\n[Mehr Daten und Diagramme zu Corona in Schleswig-Holstein](https://www.schleswig-holstein.de/DE/Schwerpunkte/Coronavirus/Zahlen/zahlen_node.html)", 9 | "en": "Time series of Corona numbers for Schleswig-Holstein\n\nThe following fields are included: \n\n— ‘date’ — date in format dd.mm.yyyy\n— ‘reported cases’\n— ‘hospitalisations’\n— ‘deceased’\n\nField separator is comma, string separator is double quote (\")\n\nThe data published here are based on the figures provided by the districts and non-circular cities by the official means of reporting to the Land Registration Office. As data collection and transmission takes time, deviations from the cases communicated on the spot may arise. In individual cases, there may also be a reduction in the reported cases, for example if a report has not been confirmed or if the person’s place of residence is outside the circle. \n \n[More data and charts on Corona in Schleswig-Holstein](https://www.schleswig-holstein.de/DE/Schwerpunkte/Coronavirus/Zahlen/zahlen_node.html)" 10 | }, 11 | "Creator": [ 12 | { 13 | "IRI": "https://opendata.schleswig-holstein.de/organization/94498aaa-9bd0-4ac0-ae70-db319a3ca1d8" 14 | } 15 | ], 16 | "Publisher": [ 17 | { 18 | "IRI": "https://opendata.schleswig-holstein.de/organization/94498aaa-9bd0-4ac0-ae70-db319a3ca1d8" 19 | } 20 | ], 21 | "Contributor": [ 22 | { 23 | "IRI": "https://schleswig-holstein.de/opendata/", 24 | "Name": "Schleswig Holstein Open Data" 25 | } 26 | ], 27 | "Date Created": "2023-10-05T00:00:00Z", 28 | "Contact Point": { 29 | "E-Mail": "info@schleswig-holstein.de", 30 | "Name": "Landesmeldestelle Schleswig-Holstein" 31 | }, 32 | "Base-URI": "https://phpefi.schleswig-holstein.de/corona/data202011/cvd_sh_verlauf.csv", 33 | "Identifier": "cvd_sh_verlauf", 34 | "Version": 1.0, 35 | "Work Status": "Published", 36 | "Visualize": true, 37 | "Accrual Periodicity": "daily", 38 | "Namespace": "https://phpefi.schleswig-holstein.de/corona", 39 | "dimensions": { 40 | "Datum": { 41 | "name": { 42 | "de": "Datum", 43 | "en": "Date" 44 | }, 45 | "dimension-type": "Key Dimension", 46 | "scale-type": "ordinal", 47 | "path": "date", 48 | "description": { 49 | "en": "Date in format dd.mm.yyyy", 50 | "de": "Datum im Format TT.MM.JJJJ" 51 | }, 52 | "mapping": { 53 | "type": "regex", 54 | "pattern": "(?P[0-9]{2})\\.(?P[0-9]{2})\\.(?P[0-9]{4})", 55 | "replacement": "\\g/\\g/\\g", 56 | "value-type": "Literal" 57 | } 58 | }, 59 | "Gemeldete Fälle": { 60 | "name": { 61 | "de": "Gemeldete Fälle", 62 | "en": "Reported Cases" 63 | }, 64 | "dimension-type": "Measure Dimension", 65 | "scale-type": "ratio", 66 | "path": "reported_cases", 67 | "description": { 68 | "en": "Number of reported cases" 69 | } 70 | }, 71 | "Hospitalisierungen": { 72 | "name": { 73 | "de": "Hospitalisierungen", 74 | "en": "Hospitalisations" 75 | }, 76 | "dimension-type": "Measure Dimension", 77 | "scale-type": "ratio", 78 | "path": "hospitalisations", 79 | "description": { 80 | "en": "Number of hospitalisations" 81 | } 82 | }, 83 | "Verstorben": { 84 | "name": { 85 | "de": "Verstorben", 86 | "en": "Deceased" 87 | }, 88 | "dimension-type": "Measure Dimension", 89 | "scale-type": "ratio", 90 | "path": "deceased", 91 | "description": { 92 | "en": "Number of deceased" 93 | } 94 | } 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /example/Cubes/Population_Aargau/description.yml: -------------------------------------------------------------------------------- 1 | Name: 2 | de: Bevölkerung - Bevölkerungsbestand - Altersaufbau 3 | en: Population - Population Stock - Age Structure 4 | Description: 5 | de: Die kantonale Bevölkerungsstatistik erfasst die ständige Wohnbevölkerung an ihrem Hauptwohnsitz. Das heisst, dass beispielsweise Wochenaufenthalter nicht dort gezählt werden, wo sie sich während der Woche aufhalten, sondern dort wo sie angemeldet sind. Nicht enthalten sind Kurzaufenthalter mit einer Aufenthaltsbewilligung von unter 12 Monaten. Sofern für den 31.12. eines Jahres und für den 01.01. des Folgejahrs Daten vorliegen, sind diese grundsätzlich identisch. Wenn aber per 01.01. des Folgejahrs eine Fusion zweier Gemeinden stattgefunden hat, ist der Gebietsstand unterschiedlich. Entsprechend werden die Daten per 01.01. des Folgejahrs für die fusionierte Gemeinde ausgewiesen. 6 | en: The cantonal population statistics record the permanent resident population at their main residence. This means that, for example, weekly commuters are not counted where they stay during the week, but where they are registered. Short-term residents with a residence permit of less than 12 months are not included. If data is available for December 31 of one year and January 1 of the following year, these are generally identical. However, if a merger of two municipalities has taken place on January 1 of the following year, the territorial status is different. Accordingly, the data for January 1 of the following year is shown for the merged municipality. 7 | Publisher: 8 | - IRI: https://www.ag.ch/de/verwaltung/dfr/statistik 9 | Creator: 10 | - IRI: https://www.ag.ch/de/verwaltung/dfr/statistik 11 | Contributor: 12 | - IRI: https://www.ag.ch/de/verwaltung/dfr/statistik 13 | Date Created: 14 | 2025-01-01 15 | Contact Point: 16 | E-Mail: statistik@ag.ch 17 | Name: Statistik Aargau 18 | Base-URI: https://ld.admin.ch/bfh/ 19 | Identifier: poc_ag 20 | Version: 1 21 | Work Status: 22 | Draft 23 | Visualize: 24 | True 25 | # Optional but recommended 26 | Accrual Periodicity: irregular 27 | 28 | # Optional 29 | Namespace: pocag 30 | 31 | dimensions: 32 | # required 33 | region: 34 | name: 35 | de: Region 36 | en: Region 37 | description: 38 | de: Region 39 | en: Region 40 | dimension-type: Key Dimension 41 | datatype: URI 42 | scale-type: nominal 43 | path: region 44 | mapping: 45 | type: function 46 | filepath: example/Cubes/Population_Aargau/func.py 47 | function-name: replace_with_shared_dimension 48 | data-kind: 49 | type: spatial-shape 50 | hierarchy: 51 | - root: https://ld.admin.ch/canton/19 52 | name: AG - District - Municipality 53 | next-in-hierarchy: 54 | name: District 55 | path: http://schema.org/hasPart 56 | next-in-hierarchy: 57 | name: Municipality 58 | path: http://schema.org/hasPart 59 | 60 | date: 61 | name: 62 | de: Datum 63 | en: Date 64 | description: 65 | de: Datum der Erhebung 66 | en: Date of the survey 67 | dimension-type: Key Dimension 68 | scale-type: interval 69 | datatype: date 70 | path: date 71 | data-kind: 72 | type: temporal 73 | unit: day 74 | 75 | group: 76 | name: 77 | de: Altersgruppe 78 | en: Age Group 79 | description: 80 | de: Altersgruppe 81 | en: Age Group 82 | dimension-type: Key Dimension 83 | scale-type: nominal 84 | datatype: URI 85 | path: ageGroup 86 | mapping: 87 | type: concept 88 | replacement-automated: /age_group/{group} 89 | 90 | number: 91 | name: 92 | de: Anzahl 93 | en: Number 94 | description: 95 | de: Anzahl 96 | en: Number 97 | dimension-type: Measure Dimension 98 | scale-type: ratio 99 | datatype: integer 100 | path: number 101 | unit: NUM 102 | 103 | percentage: 104 | name: 105 | de: Anteil 106 | en: Percentage 107 | description: 108 | de: Anteil der Altersgruppe an der Gesamtbevölkerung 109 | en: Percentage of the age group in the total population 110 | dimension-type: Measure Dimension 111 | scale-type: ratio 112 | datatype: float 113 | path: percentage 114 | unit: PERCENT 115 | 116 | Concepts: 117 | age-group: 118 | URI: /age_group/{ageID} 119 | name-field: ageName 120 | multilingual: true 121 | other-fields: 122 | ageDescription: 123 | URI: http://schema.org/description 124 | multilingual: true 125 | datatype: string 126 | -------------------------------------------------------------------------------- /tests/test.yml: -------------------------------------------------------------------------------- 1 | Name: 2 | de: Mock Cube - two sided error 3 | fr: Mock Cube - two sided error 4 | it: Mock Cube - two sided error 5 | en: Mock Cube - two sided error 6 | Description: 7 | de: Ein Beispiel Cube, der simulierte Daten enthält mit zweiseitiger Unsicherheit 8 | en: An example Cube containing some simulated data with two-sided uncertainty 9 | Publisher: 10 | - IRI: https://register.ld.admin.ch/opendataswiss/org/office_of_Mock 11 | Creator: 12 | - IRI: https://register.ld.admin.ch/opendataswiss/org/office_of_Mock 13 | Contributor: 14 | - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu 15 | Name: Bundesamt für Mock Data 16 | Date Created: 17 | 2024-11-12 18 | Contact Point: 19 | E-Mail: contact@mock.ld.admin.ch 20 | Name: Bundesamt für Mock Data 21 | Base-URI: https://mock.ld.admin.ch/ 22 | Identifier: mock-two-sided 23 | Version: 1 24 | Work Status: 25 | Published 26 | Visualize: 27 | True 28 | Opendataswiss: 29 | True 30 | # Optional but recommended 31 | Accrual Periodicity: yearly 32 | 33 | # Optional 34 | Namespace: mock 35 | 36 | dimensions: 37 | # required 38 | Jahr: 39 | name: 40 | de: Jahr 41 | fr: An 42 | it: Anno 43 | en: Year 44 | description: 45 | de: Jahr der Erhebung 46 | dimension-type: Key Dimension 47 | datatype: URI 48 | scale-type: ordinal 49 | path: year 50 | data-kind: 51 | type: temporal 52 | unit: year 53 | mapping: 54 | type: additive 55 | base: https://ld.admin.ch/time/year/ 56 | 57 | Station: 58 | name: 59 | de: Station 60 | fr: Station 61 | it: Stazione 62 | en: Station 63 | description: 64 | de: Station der Untersuchung 65 | dimension-type: Key Dimension 66 | datatype: URI 67 | scale-type: nominal 68 | path: station 69 | hierarchy: 70 | - root: Schweiz 71 | name: Schweiz 72 | next-in-hierarchy: 73 | path: http://schema.org/hasPart 74 | name: Stationen 75 | mapping: 76 | type: replace 77 | replacements: 78 | Bern: https://mock.ld.admin.ch/station/01 79 | Zürich: https://mock.ld.admin.ch/station/02 80 | Schweiz: https://mock.ld.admin.ch/station/switzerland 81 | 82 | Wert: 83 | name: 84 | de: Wert 85 | fr: Valeur 86 | it: Valore 87 | en: Value 88 | description: 89 | de: Gemessener Wert an der Station 90 | dimension-type: Measure Dimension 91 | datatype: float 92 | scale-type: interval 93 | path: value 94 | unit: KiloGM 95 | 96 | UpperUnsicherheit: 97 | name: 98 | de: Upper Unsicherheit 99 | description: 100 | de: Upper Unsicherheit 101 | dimension-type: Upper uncertainty 102 | datatype: float 103 | relates-to: value 104 | scale-type: ratio 105 | path: upperUncertainty 106 | unit: PERCENT 107 | 108 | LowerUnsicherheit: 109 | name: 110 | de: Lower Unsicherheit 111 | description: 112 | de: Lower Unsicherheit 113 | dimension-type: Lower uncertainty 114 | datatype: float 115 | relates-to: value 116 | scale-type: ratio 117 | path: lowerUncertainty 118 | unit: PERCENT 119 | 120 | Wert2: 121 | name: 122 | de: Wert2 123 | fr: Valeur2 124 | it: Valore2 125 | en: Value2 126 | description: 127 | de: Gemessener Wert 2 an der Station 128 | dimension-type: Measure Dimension 129 | scale-type: ratio 130 | datatype: float 131 | path: value2 132 | unit: KiloGM 133 | annotation: 134 | - type: limit 135 | value: 11 136 | name: 137 | de: Richtwert 2020 in Zürich 138 | context: 139 | Jahr: 2020 140 | Station: Zürich 141 | - type: limit-range 142 | min-value: 9 143 | max-value: 13 144 | name: 145 | de: Zielwert für 2021 in Zürich 146 | context: 147 | Jahr: 2021 148 | Station: Zürich 149 | 150 | Standardfehler: 151 | name: 152 | de: Standardfehler für Wert2 153 | fr: erreur standard pour Value2 154 | it: errore 155 | en: standard error for Value2 156 | description: 157 | de: Standardfehler der Schätzung Wert2 158 | dimension-type: Standard Error 159 | datatype: float 160 | relates-to: value2 161 | scale-type: ratio 162 | path: standardError 163 | unit: PERCENT 164 | 165 | Status: 166 | name: 167 | de: Veröffentlichungsstatus 168 | fr: Statut de publication 169 | it: Stato di pubblicazione 170 | en: State of publication 171 | description: 172 | de: Status der Veröffentlichung, provisorisch oder final 173 | dimension-type: Annotation 174 | scale-type: nominal 175 | path: status 176 | -------------------------------------------------------------------------------- /example/Cubes/co2-limits/description.yml: -------------------------------------------------------------------------------- 1 | Name: 2 | de: CO2-Emissionen nach Energieträger 3 | en: CO2 Emissions per energy source 4 | fr: CO2-Emissions par source d'energie 5 | it: Emissioni di CO2 per fonte di energia 6 | Description: 7 | de: Ein Beispiel Cube mit einem Zielwert 8 | en: An example Cube with a target value 9 | fr: Un exemple de Cube avec une valeur cible 10 | it: Un esempio di Cube con un valore obiettivo 11 | Publisher: 12 | - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu 13 | Creator: 14 | - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu 15 | Contributor: 16 | - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu 17 | Name: Bundesamt für Umwelt 18 | Date Created: 19 | 2024-11-26 20 | Contact Point: 21 | E-Mail: marco.kronenberg@bafu.admin.ch 22 | Name: Bundesamt für Umwelt 23 | Base-URI: https://environment.ld.admin.ch/foen/test_target/ 24 | Identifier: target_1 25 | Version: 1 26 | Work Status: 27 | Draft 28 | Visualize: 29 | True 30 | # Optional but recommended 31 | Accrual Periodicity: yearly 32 | 33 | Namespace: limit_1 34 | 35 | dimensions: 36 | Jahr: 37 | name: 38 | de: Jahr 39 | fr: An 40 | it: Anno 41 | en: Year 42 | description: 43 | de: Jahr der Erhebung 44 | dimension-type: Key Dimension 45 | datatype: URI 46 | scale-type: ordinal 47 | path: year 48 | data-kind: temporal 49 | mapping: 50 | type: additive 51 | base: https://ld.admin.ch/time/year/ 52 | 53 | Energieträger: 54 | name: 55 | de: Energieträger 56 | fr: Source d'energie 57 | it: Fonte di energia 58 | en: Energy source 59 | description: 60 | de: Energieträger der Objekte 61 | fr: Source d'energie 62 | it: Fonte di energia 63 | en: Energy source 64 | dimension-type: Key Dimension 65 | datatype: URI 66 | scale-type: nominal 67 | path: energySource 68 | mapping: 69 | type: replace 70 | replacements: 71 | Brennstoffe: https://mock.ld.admin.ch/energySource/01 72 | Treibstoffe: https://mock.ld.admin.ch/energySource/02 73 | 74 | Nicht gerundeter Wert (Mt): 75 | name: 76 | de: CO2-Emissionen 77 | en: CO2 Emissions 78 | fr: CO2-Emissions 79 | it: Emissioni di CO2 80 | description: 81 | de: Emissionen von CO2 in Millionen Tonnen 82 | en: Emissions of CO2 in millions of tonnes 83 | fr: Emissions de CO2 en millions de tonnes 84 | it: Emissioni di CO2 in milioni di tonnellate 85 | dimension-type: Measure Dimension 86 | datatype: float 87 | scale-type: ratio 88 | path: co2Emissions 89 | unit: MegaTONNE 90 | annotation: 91 | - type: limit-range 92 | min-value: 1.708845e+01 93 | max-value: 1.779072e+01 94 | name: 95 | de: Richtwertbande 2016 für Brennstoffe 96 | en: target value range for thermal fuel 2016 97 | fr: bande de valeurs cibles pour le combustible thermique 2016 98 | it: bande di valori obiettivo per il combustibile termico 2016 99 | context: 100 | Jahr: 2016 101 | Energieträger: Brennstoffe 102 | - type: limit 103 | value: 1.568392e+01 104 | name: 105 | de: Richtwert 2021 für Brennstoffe 106 | en: target value for thermal fuel 2021 107 | fr: valeur cible pour le combustible thermique 2021 108 | it: valore obiettivo per il combustibile termico 2021 109 | context: 110 | Jahr: 2021 111 | Energieträger: Brennstoffe 112 | - type: limit 113 | value: 1.849298e+01 114 | name: 115 | de: Richtwert 2012 für Brennstoffe 116 | en: target value for thermal fuel 2012 117 | fr: valeur cible pour le combustible thermique 2012 118 | it: valore obiettivo per il combustibile termico 2012 119 | context: 120 | Jahr: 2012 121 | Energieträger: Brennstoffe 122 | - type: limit-range 123 | min-value: 1.779072e+01 124 | max-value: 1.82589e+01 125 | name: 126 | de: Richtwertbande 2014 für Brennstoffe 127 | en: target value range for thermal fuel 2014 128 | fr: bande de valeurs cibles pour le combustible thermique 2014 129 | it: bande di valori obiettivo per il combustibile termico 2014 130 | context: 131 | Jahr: 2014 132 | Energieträger: Brennstoffe 133 | 134 | CO2-Emissionen (Mt): 135 | name: 136 | de: CO2-Emissionen (gerundet) 137 | en: CO2 Emissions (rounded) 138 | fr: CO2-Emissions (rounde) 139 | it: Emissioni di CO2 (arrotondati) 140 | description: 141 | de: gerundete emissionen von CO2 in Millionen Tonnen 142 | en: rounded emissions of CO2 in millions of tonnes 143 | fr: arrondies emissions de CO2 en millions de tonnes 144 | it: arrondite emissioni di CO2 in milioni di tonnellate 145 | dimension-type: Measure Dimension 146 | datatype: float 147 | scale-type: ratio 148 | path: roundedCO2Emissions 149 | unit: MegaTONNE -------------------------------------------------------------------------------- /example/Cubes/Biotope_Statistik/description.yml: -------------------------------------------------------------------------------- 1 | Name: 2 | de: Biotope von nationaler Bedeutung 3 | fr: Biotopes d'importance nationale 4 | it: Biotope von nationaler Bedeutung 5 | en: Biotope von nationaler Bedeutung 6 | Description: 7 | de: Die fünf Biotopinventare Hoch- und Flachmoore, Auen, Amphibienlaichgebiete und Trockenwiesen/-weiden im Überblick 8 | fr: Vue d’ensemble des cinq inventaires de biotopes - hauts-marais, bas-marais, zones alluviales, sites de reproduction de batraciens ainsi que prairies et pâturages secs 9 | it: Vue d’ensemble des cinq inventaires de biotopes - hauts-marais, bas-marais, zones alluviales, sites de reproduction de batraciens ainsi que prairies et pâturages secs 10 | en: Die fünf Biotopinventare Hoch- und Flachmoore, Auen, Amphibienlaichgebiete und Trockenwiesen/-weiden im Überblick 11 | Publisher: 12 | - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu 13 | Creator: 14 | - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu 15 | Contributor: 16 | - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu 17 | Name: Bundesamt für Umwelt 18 | Date Created: 19 | 2024-01-16 20 | Contact Point: 21 | E-Mail: info@bafu.admin.ch 22 | Name: Bundesamt für Umwelt 23 | Base-URI: https://environment.ld.admin.ch/foen/biotopes 24 | Identifier: UZ-2404 25 | Version: 1 26 | Work Status: 27 | Draft 28 | Visualize: 29 | True 30 | # Optional but recommended 31 | Accrual Periodicity: irregular 32 | 33 | Namespace: biotop 34 | 35 | dimensions: 36 | Typ: 37 | name: 38 | de: Art des Biotops 39 | fr: Art des Biotops 40 | it: Art des Biotops 41 | en: Art des Biotops 42 | description: 43 | de: Die Art des Biotopes nach denen in der Biotopstatistik unterschieden wird 44 | dimension-type: Key Dimension 45 | datatype: URI 46 | scale-type: nominal 47 | path: type 48 | mapping: 49 | type: replace 50 | replacements: 51 | Hochmoore (Typen I + II): https://environment.ld.admin.ch/foen/biotopes/1 52 | Flachmoore: https://environment.ld.admin.ch/foen/biotopes/2 53 | Auengebiete: https://environment.ld.admin.ch/foen/biotopes/3 54 | Amphibienlaichgebiete: https://environment.ld.admin.ch/foen/biotopes/4 55 | Trockenwiesen und -weiden: https://environment.ld.admin.ch/foen/biotopes/5 56 | Biotope: https://environment.ld.admin.ch/foen/biotopes/tot 57 | hierarchy: 58 | - root: Biotope 59 | name: Biotope 60 | next-in-hierarchy: 61 | path: http://schema.org/hasPart 62 | name: Biotoparten 63 | 64 | Überlappung: 65 | name: 66 | de: Überlappung 67 | fr: Überlappung 68 | it: Überlappung 69 | en: Überlappung 70 | description: 71 | de: Die Fläche kann einzeln pro Objekt oder überlappend betrachtet werden. 72 | dimension-type: Key Dimension 73 | datatype: URI 74 | scale-type: nominal 75 | path: overlap 76 | mapping: 77 | type: replace 78 | replacements: 79 | Mit Überlappung: https://environment.ld.admin.ch/foen/overlapping/1 80 | Ohne Überlappung: https://environment.ld.admin.ch/foen/overlapping/2 81 | 82 | Anzahl: 83 | name: 84 | de: Anzahl Objekte 85 | fr: Anzahl Objekte 86 | it: Anzahl Objekte 87 | en: Anzahl Objekte 88 | description: 89 | de: Anzahl der Objekte 90 | datatype: integer 91 | dimension-type: Measure Dimension 92 | scale-type: ratio 93 | path: count 94 | unit: UNITLESS 95 | 96 | Anteil der CH-Biotope: 97 | name: 98 | de: Anteil der CH-Biotope 99 | fr: Anteil der CH-Biotope 100 | it: Anteil der CH-Biotope 101 | en: Anteil der CH-Biotope 102 | description: 103 | de: Anteil der Objekte an Gesamtmenge der Biotope 104 | dimension-type: Measure Dimension 105 | datatype: float 106 | scale-type: ratio 107 | path: ratio_of_objects 108 | unit: PERCENT 109 | 110 | Fläche: 111 | name: 112 | de: Fläche der Objekte 113 | fr: Fläche der Objekte 114 | it: Fläche der Objekte 115 | en: Fläche der Objekte 116 | description: 117 | de: Summe der Flächen der Objekte 118 | dimension-type: Measure Dimension 119 | datatype: float 120 | scale-type: ratio 121 | path: area 122 | unit: HA 123 | 124 | Anteil CH-Fläche: 125 | name: 126 | de: Anteil CH-Fläche 127 | fr: Anteil CH-Fläche 128 | it: Anteil CH-Fläche 129 | en: Anteil CH-Fläche 130 | description: 131 | de: Anteil der Objekte an der Gesamtfläche der Schweiz 132 | dimension-type: Measure Dimension 133 | datatype: float 134 | scale-type: ratio 135 | path: area_of_switzerland 136 | unit: PERCENT 137 | 138 | Anteil der CH-Biotope (Fläche): 139 | name: 140 | de: Anteil der CH-Biotope (Fläche) 141 | fr: Anteil der CH-Biotope (Fläche) 142 | it: Anteil der CH-Biotope (Fläche) 143 | en: Anteil der CH-Biotope (Fläche) 144 | description: 145 | de: Anteil der Objekte an der Gesamtfläche aller Objekte in der Biotopstatistik 146 | dimension-type: Measure Dimension 147 | datatype: float 148 | scale-type: ratio 149 | path: area_of_biotopes 150 | unit: PERCENT -------------------------------------------------------------------------------- /example/Cubes/concept_table_airport/description.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "$schema": "../../linpy/description.schema.json" 3 | Name: 4 | en: Example with a concept table 5 | fr: Exemple avec une table de concept 6 | de: Example with a concept table (DE) 7 | it: Example with a concept table (IT) 8 | Description: 9 | fr: Un jeu de données avec deux csv, un pour une table de concept 10 | en: A dataset containing two csv, one for a concept table 11 | Publisher: 12 | - IRI: https://register.ld.admin.ch/opendataswiss/org/office_of_Mock 13 | Creator: 14 | - IRI: https://register.ld.admin.ch/opendataswiss/org/office_of_Mock 15 | Contributor: 16 | - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu 17 | Name: Bundesamt für Mock Data 18 | Date Created: '2024-08-26T00:00:00.000Z' 19 | Contact Point: 20 | E-Mail: contact@mock.ld.admin.ch 21 | Name: Bundesamt für Mock Data 22 | Base-URI: https://mock-concept.ld.admin.ch/ 23 | Identifier: mock-concept 24 | Version: 1 25 | Work Status: Draft 26 | Visualize: true 27 | Accrual Periodicity: yearly 28 | Namespace: mock 29 | dimensions: 30 | year: 31 | name: 32 | de: Jahr 33 | fr: Année 34 | it: Anno 35 | en: Year 36 | description: 37 | de: Jahr der Erhebung 38 | fr: Année du relevé 39 | it: Anno di rilevamento 40 | en: Year of survey 41 | dimension-type: Key Dimension 42 | scale-type: ordinal 43 | path: Jahr 44 | datatype: URI 45 | data-kind: 46 | type: temporal 47 | unit: year 48 | mapping: 49 | type: additive 50 | base: https://ld.admin.ch/time/year/ 51 | typeOfAirport: 52 | name: 53 | fr: Type d'aéroport 54 | de: Flughafentyp 55 | it: Tipo di aeroporto 56 | en: Type of Airport 57 | description: 58 | de: Flughafentyp - DESC 59 | fr: Type d'aéroport - DESC 60 | it: Tipo di aeroporto - DESC 61 | en: Type of Airport - DESC 62 | dimension-type: Key Dimension 63 | datatype: URI 64 | scale-type: nominal 65 | path: airport_type 66 | # This is a new mapping type, for concepts 67 | # Concept are independant Resources, a little like Shared Dimension Terms 68 | # See the 'Concepts' key here under 69 | mapping: 70 | type: concept 71 | # Each value will be replaced by an URL that links to the concept 72 | # The URL starts with "/" and is relative to the URL of the cube 73 | # The URL must allow to identify a concept, and can be made of one ore more fields 74 | # This is an example with two fields needed to identify an airport type: {typeOfAirport} and {typeOfAirport2nd} 75 | # It must match the URI defined for the concept (see here under), and the fields name can be different as coming from different files 76 | replacement-automated: /airport_type/{typeOfAirport}/{typeOfAirport2nd} 77 | typeOfAirport2nd: 78 | name: 79 | fr: Type d'aéroport (second key for demo) 80 | de: Flughafentyp (second key for demo) 81 | it: Tipo di aeroporto (second key for demo) 82 | en: Type of Airport (second key for demo) 83 | description: 84 | de: Flughafentyp - second key for demo 85 | fr: Type d'aéroport - second key for demo 86 | it: Tipo di aeroporto - second key for demo 87 | en: Type of Airport - second key for demo 88 | dimension-type: Key Dimension 89 | scale-type: nominal 90 | path: airport_type_2nd 91 | measure: 92 | name: 93 | fr: Valeur 94 | de: Wert 95 | it: Valore 96 | en: Value 97 | description: 98 | de: Wert - DESC 99 | fr: Valeur - DESC 100 | it: Valore - DESC 101 | en: Value - DESC 102 | dimension-type: Measure Dimension 103 | scale-type: interval 104 | path: value 105 | unit: kilogramm 106 | # See the README for further explanation about the Concepts metadata 107 | Concepts: 108 | typeOfAirport: 109 | # The URL starts with "/" and is relative to the URL of the cube 110 | # The URL must allow to identify a concept, and can be made of one ore more fields 111 | # This is an example with two fields needed to identify an airport type: {typeOfAirportID} and {typeOfAirportSecondID} 112 | # It must match the URI defined for the dimension's replacemetn (see here above), and the fields name can be different as coming from different files 113 | URI: /airport_type/{typeOfAirportID}/{typeOfAirportSecondID} 114 | # The name of the field/column that contains the name (label) 115 | name-field: typeOfAirport 116 | # Defines if the name is multilingual, meaning that a language suffix (_de, _fr, etc.) wil be concatenated to the name-field to find the different values 117 | multilingual: true 118 | # position-field is optional: the name of the csv column that contains a numeric position value for the concept 119 | # used by Visualize to order the concept (instead of alphabe) 120 | position-field: position 121 | # other-fields are optional, URI could be relative (and concatenated to the concept's URI) or a full URI starting with 'http/https' 122 | other-fields: 123 | description: 124 | URI: http://schema.org/description 125 | multilingual: true 126 | datatype: string 127 | other_property_example: 128 | URI: /airport_type/other_property_example 129 | datatype: string 130 | language: en -------------------------------------------------------------------------------- /tests/test_sdterms.py: -------------------------------------------------------------------------------- 1 | from pylindas.pyshareddimension import SharedDimension 2 | from rdflib import Graph 3 | import pandas as pd 4 | import pytest 5 | import yaml 6 | import os 7 | 8 | class TestClass: 9 | 10 | TEST_CASE_PATH = os.path.dirname(__file__) 11 | SHAREDDIMENSIONSHAPE = "https://raw.githubusercontent.com/Kronmar-Bafu/lindas-pylindas/refs/heads/main/pylindas/pyshareddimension/shared_dimension_shape.ttl" 12 | 13 | @classmethod 14 | def setup_test_shared_dimension(cls, dataframe_path: str, description_path: str) -> SharedDimension: 15 | with open(os.path.join(cls.TEST_CASE_PATH, description_path)) as file: 16 | description = yaml.safe_load(file) 17 | dataframe = pd.read_csv(os.path.join(cls.TEST_CASE_PATH, dataframe_path), sep=",") 18 | sd = SharedDimension(dataframe=dataframe, sd_yaml=description, environment="TEST", local=True) 19 | return sd.prepare_data().write_sd().write_terms() 20 | 21 | def setup_method(self): 22 | self.shared_dimension = self.setup_test_shared_dimension( 23 | "test_sdterms.csv", "test_sdterms.yml") 24 | 25 | # SHACL validation of the Shared Dimension 26 | # Please see the comment of the SharedDimension.validate() method 27 | # in order to understand the parameters 28 | # This is work in progress as the SHACL file has to be passed as parameter instead of being downloaded from the Web behind the scene 29 | def test_perform_SHACL_validation(self): 30 | result_bool, result_message = self.shared_dimension.validate(self.SHAREDDIMENSIONSHAPE) 31 | assert result_bool == True 32 | 33 | # Test some basic triples of the shared dimension 34 | # Will better be done by the SHACL validation 35 | # However, this allows to test that triples that might be optional for SHACL are correctly generated in the example 36 | # as for instance schema:validFrom 37 | def test_shared_dimension_triples(self): 38 | sparql = ( 39 | "PREFIX meta: " 40 | "PREFIX schema: " 41 | "PREFIX dct: " 42 | "ASK" 43 | "{" 44 | " ?sd a meta:SharedDimension, schema:DefinedTermSet ;" 45 | " schema:name ?name ;" 46 | " schema:description ?desc ;" 47 | " schema:identifier ?identifier ;" 48 | " dct:contributor/schema:email ?contributorEmail ;" 49 | " dct:contributor/schema:name ?contributorName ;" 50 | " schema:validFrom ?validFrom" 51 | "}" 52 | ) 53 | 54 | result = self.shared_dimension._graph.query(sparql) 55 | assert bool(result) 56 | 57 | def test_shared_dimension_unwanted_triples(self): 58 | # A Shared dimension generated by code should not have the rdf:type: 59 | # hydra:Resource, md:SharedDimension 60 | # Those types are given to shared dimensions generated from the Cube Creator 61 | # When generated by code, the shared dimension should marked 'read-only' in the Cube Creator 62 | # and this is achieved by not having those 2 types 63 | sparql = ( 64 | "PREFIX meta: " 65 | "PREFIX md: " 66 | "PREFIX hydra: " 67 | "ASK" 68 | "{" 69 | " ?sd a meta:SharedDimension," 70 | " hydra:Resource, md:SharedDimension" 71 | "}" 72 | ) 73 | 74 | result = self.shared_dimension._graph.query(sparql) 75 | # assert that NO result is found 76 | assert not bool(result) 77 | 78 | 79 | def test_shared_dimension_terms(self): 80 | # Find terms that are missing some triples 81 | sparql = ( 82 | "PREFIX meta: " 83 | "PREFIX schema: " 84 | "PREFIX md: " 85 | "PREFIX hydra: " 86 | "PREFIX sd_md: " 87 | "ASK" 88 | "{" 89 | " ?sd a schema:DefinedTerm, sd_md:SharedDimensionTerm ." 90 | " FILTER NOT EXISTS {?sd schema:identifier ?id;" 91 | " schema:inDefinedTermSet ;" 92 | " schema:name ?name ;" 93 | " schema:validFrom ?validFrom ;" 94 | " }" 95 | "}" 96 | ) 97 | 98 | result = self.shared_dimension._graph.query(sparql) 99 | # There should be no term missing those triples 100 | assert not bool(result) 101 | 102 | def test_shared_dimension_one_single_root(self): 103 | # In that example, there should be 3 individual terms 104 | sparql = ( 105 | "PREFIX schema: " 106 | "PREFIX sd_md: " 107 | "PREFIX skos: " 108 | "SELECT *" 109 | "{" 110 | " ?sd a schema:DefinedTerm, sd_md:SharedDimensionTerm ." 111 | "}" 112 | ) 113 | 114 | result = self.shared_dimension._graph.query(sparql) 115 | # There should be exactly 3 results 116 | assert len(list(result)) == 3 -------------------------------------------------------------------------------- /tests/test_shared_dimension_generation.py: -------------------------------------------------------------------------------- 1 | from pylindas.pyshareddimension import SharedDimension 2 | from rdflib import Graph 3 | import pandas as pd 4 | import pytest 5 | import yaml 6 | 7 | class TestClass: 8 | 9 | TEST_CASE_PATH = "example/Shared_Dimensions/" 10 | 11 | @classmethod 12 | def setup_test_shared_dimension(cls, dataframe_path: str, description_path: str) -> SharedDimension: 13 | with open(cls.TEST_CASE_PATH + description_path) as file: 14 | description = yaml.safe_load(file) 15 | dataframe = pd.read_csv(cls.TEST_CASE_PATH + dataframe_path, sep=";") 16 | sd = SharedDimension(dataframe=dataframe, sd_yaml=description, environment="TEST", local=True) 17 | return sd.prepare_data().write_sd().write_terms() 18 | 19 | def setup_method(self): 20 | self.shared_dimension = self.setup_test_shared_dimension( 21 | "shared_dimension_generation/sd_terms.csv", "shared_dimension_generation/sd_description.yml") 22 | 23 | # SHACL validation of the Shared Dimension 24 | # Please see the comment of the SharedDimension.validate() method 25 | # in order to understand the parameters 26 | # This is work in progress as the SHACL file has to be passed as parameter instead of being downloaded from the Web behind the scene 27 | def test_perform_SHACL_validation(self): 28 | result_bool, result_message = self.shared_dimension.validate("./pylindas/pyshareddimension/shared_dimension_shape.ttl") 29 | assert result_bool == True 30 | 31 | # Test some basic triples of the shared dimension 32 | # Will better be done by the SHACL validation 33 | # However, this allows to test that triples that might be optional for SHACL are correctly generated in the example 34 | # as for instance schema:validFrom 35 | def test_shared_dimension_triples(self): 36 | sparql = ( 37 | "PREFIX meta: " 38 | "PREFIX schema: " 39 | "PREFIX dct: " 40 | "ASK" 41 | "{" 42 | " ?sd a meta:SharedDimension, schema:DefinedTermSet ;" 43 | " schema:name ?name ;" 44 | " schema:description ?desc ;" 45 | " schema:identifier ?identifier ;" 46 | " dct:contributor/schema:email ?contributorEmail ;" 47 | " dct:contributor/schema:name ?contributorName ;" 48 | " schema:validFrom ?validFrom" 49 | "}" 50 | ) 51 | 52 | result = self.shared_dimension._graph.query(sparql) 53 | assert bool(result) 54 | 55 | def test_shared_dimension_unwanted_triples(self): 56 | # A Shared dimension generated by code should not have the rdf:type: 57 | # hydra:Resource, md:SharedDimension 58 | # Those types are given to shared dimensions generated from the Cube Creator 59 | # When generated by code, the shared dimension should marked 'read-only' in the Cube Creator 60 | # and this is achieved by not having those 2 types 61 | sparql = ( 62 | "PREFIX meta: " 63 | "PREFIX md: " 64 | "PREFIX hydra: " 65 | "ASK" 66 | "{" 67 | " ?sd a meta:SharedDimension," 68 | " hydra:Resource, md:SharedDimension" 69 | "}" 70 | ) 71 | 72 | result = self.shared_dimension._graph.query(sparql) 73 | # assert that NO result is found 74 | assert not bool(result) 75 | 76 | 77 | def test_shared_dimension_terms(self): 78 | # Find terms that are missing some triples 79 | sparql = ( 80 | "PREFIX meta: " 81 | "PREFIX schema: " 82 | "PREFIX md: " 83 | "PREFIX hydra: " 84 | "PREFIX sd_md: " 85 | "ASK" 86 | "{" 87 | " ?sd a schema:DefinedTerm, sd_md:SharedDimensionTerm ." 88 | " FILTER NOT EXISTS {?sd schema:identifier ?id;" 89 | " schema:inDefinedTermSet ;" 90 | " schema:name ?name ;" 91 | " schema:validFrom ?validFrom ;" 92 | " }" 93 | "}" 94 | ) 95 | 96 | result = self.shared_dimension._graph.query(sparql) 97 | # There should be no term missing those triples 98 | assert not bool(result) 99 | 100 | def test_shared_dimension_one_single_root(self): 101 | # In that example, there should be only one terme with no parent (skos:broader) 102 | sparql = ( 103 | "PREFIX schema: " 104 | "PREFIX sd_md: " 105 | "PREFIX skos: " 106 | "SELECT *" 107 | "{" 108 | " ?sd a schema:DefinedTerm, sd_md:SharedDimensionTerm ." 109 | " FILTER NOT EXISTS {?sd skos:broader ?parent}" 110 | "}" 111 | ) 112 | 113 | result = self.shared_dimension._graph.query(sparql) 114 | # There should be only one result 115 | assert len(list(result)) == 1 -------------------------------------------------------------------------------- /example/Cubes/kita/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "../../linpy/description.schema.json", 3 | "Name": { 4 | "de": "Anzahl Kinder in Kindertagesstätten", 5 | "fr": "Nombre d'enfants dans les crèches", 6 | "it": "Numero di bambini negli asili nido", 7 | "en": "Number of kids in day care facilities" 8 | }, 9 | "Description": { 10 | "de": "Ein Datensatz, der die Anzahl der Kinder in Kindertagesstätten enthält", 11 | "en": "A dataset containing the number of kids in day care facilities" 12 | }, 13 | "Publisher": [ 14 | { 15 | "IRI": "https://schleswig-holstein.de/opendata/org/office_of_daycare" 16 | } 17 | ], 18 | "Creator": [ 19 | { 20 | "IRI": "https://schleswig-holstein.de/opendata/org/office_of_daycare" 21 | } 22 | ], 23 | "Contributor": [ 24 | { 25 | "IRI": "https://schleswig-holstein.de/opendata/org/office_of_daycare", 26 | "Name": "Bundesamt für Kindertagesstätten" 27 | } 28 | ], 29 | "Date Created": "2024-08-26T00:00:00.000Z", 30 | "Contact Point": { 31 | "E-Mail": "contact@daycare.ld.admin.ch", 32 | "Name": "Bundesamt für Kindertagesstätten" 33 | }, 34 | "Base-URI": "https://daycare.ld.admin.ch/", 35 | "Identifier": "kids-daycare-facilities-per-lander", 36 | "Version": 1, 37 | "Work Status": "Draft", 38 | "Visualize": true, 39 | "Accrual Periodicity": "yearly", 40 | "Namespace": "mock", 41 | "dimensions": { 42 | "Land": { 43 | "name": { 44 | "de": "Land", 45 | "fr": "Etat", 46 | "it": "Stato", 47 | "en": "State" 48 | }, 49 | "description": { 50 | "de": "Bundesland, in dem die Daten erhoben wurden" 51 | }, 52 | "dimension-type": "Key Dimension", 53 | "scale-type": "nominal", 54 | "data-kind": { 55 | "type": "spatial-shape" 56 | }, 57 | "path": "Land", 58 | "mapping": { 59 | "type": "additive", 60 | "base": "https://example.org/land/" 61 | } 62 | }, 63 | "Jahr": { 64 | "name": { 65 | "de": "Jahr", 66 | "fr": "Année", 67 | "it": "Anno", 68 | "en": "Year" 69 | }, 70 | "description": { 71 | "de": "Jahr der Erhebung", 72 | "fr": "Année du relevé", 73 | "it": "Anno di rilevamento", 74 | "en": "Year of survey" 75 | }, 76 | "dimension-type": "Key Dimension", 77 | "scale-type": "ordinal", 78 | "path": "Jahr", 79 | "data-kind": { 80 | "type": "temporal", 81 | "unit": "year" 82 | }, 83 | "mapping": { 84 | "type": "additive", 85 | "base": "https://ld.admin.ch/time/year/" 86 | } 87 | }, 88 | "Kinder bis unter 3 Jahren": { 89 | "name": { 90 | "fr": "Nombre d'enfants de moins de 3 ans", 91 | "de": "Kinder bis unter 3 Jahren", 92 | "it": "Numero di bambini di età inferiore a 3 anni", 93 | "en": "Children under 3 years old" 94 | }, 95 | "description": { 96 | "de": "Anzahl der Kinder bis unter 3 Jahren", 97 | "fr": "Nombre d'enfants de moins de 3 ans", 98 | "it": "Numero di bambini di età inferiore a 3 anni", 99 | "en": "Number of children under 3 years old" 100 | }, 101 | "dimension-type": "Measure Dimension", 102 | "scale-type": "interval", 103 | "path": "Kinder-bis-unter-3-Jahren" 104 | }, 105 | "Kinder von 3 bis unter 7 Jahren (ohne Schulkinder)": { 106 | "name": { 107 | "fr": "Nombre d'enfants de 3 à moins de 7 ans (sans enfants scolarisés)", 108 | "de": "Kinder von 3 bis unter 7 Jahren (ohne Schulkinder)", 109 | "it": "Numero di bambini da 3 a meno di 7 anni (senza bambini scolastici)", 110 | "en": "Children aged 3 to under 7 years (excluding school children)" 111 | }, 112 | "description": { 113 | "de": "Anzahl der Kinder von 3 bis unter 7", 114 | "fr": "Nombre d'enfants de 3 à moins de 7 ans", 115 | "it": "Numero di bambini da 3 a meno di 7 anni", 116 | "en": "Number of children aged 3 to under 7" 117 | }, 118 | "dimension-type": "Measure Dimension", 119 | "scale-type": "interval", 120 | "path": "Kinder-von-3-bis-unter-7-Jahren" 121 | }, 122 | "Kinder von 5 bis unter 14 Jahren (nur Schulkinder)": { 123 | "name": { 124 | "fr": "Nombre d'enfants de 5 à moins de 14 ans (uniquement scolarisés)", 125 | "de": "Kinder von 5 bis unter 14 Jahren (nur Schulkinder)", 126 | "it": "Numero di bambini da 5 a meno di 14 anni (solo scolari)", 127 | "en": "Children aged 5 to under 14 years (school children only)" 128 | }, 129 | "description": { 130 | "de": "Anzahl der Kinder von 5 bis unter 15", 131 | "fr": "Nombre d'enfants de 5 à moins de 14 ans", 132 | "it": "Numero di bambini da 5 a meno di 14 anni", 133 | "en": "Number of children aged 5 to under 14" 134 | }, 135 | "dimension-type": "Measure Dimension", 136 | "scale-type": "interval", 137 | "path": "Kinder-von-5-bis-unter-14-Jahren" 138 | }, 139 | "Kinder von 7 Jahren und älter (Nicht-schulkinder)": { 140 | "name": { 141 | "fr": "Nombre d'enfants de 7 ans et plus (non scolarisés)", 142 | "de": "Kinder von 7 Jahren und älter (Nicht-schulkinder)", 143 | "it": "Numero di bambini di 7 anni e più (non scolari)", 144 | "en": "Children aged 7 and older (non-school children)" 145 | }, 146 | "description": { 147 | "de": "Anzahl der Kinder von 7 Jahren und älter (Nicht-schulkinder)", 148 | "fr": "Nombre d'enfants de 7 ans et plus (non scolarisés)", 149 | "it": "Numero di bambini di 7 anni e più (non scolari)", 150 | "en": "Number of children aged 7 and older (non-school children)" 151 | }, 152 | "dimension-type": "Measure Dimension", 153 | "scale-type": "interval", 154 | "path": "Kinder-von-7-Jahren-und-älter" 155 | }, 156 | "Insgesamt": { 157 | "name": { 158 | "fr": "Nombre total d'enfants", 159 | "de": "Gesamtzahl der Kinder", 160 | "it": "Numero totale di bambini", 161 | "en": "Total number of children" 162 | }, 163 | "description": { 164 | "de": "Gesamtzahl der Kinder in Kindertagesstätten", 165 | "fr": "Nombre total d'enfants dans les crèches", 166 | "it": "Numero totale di bambini negli asili nido", 167 | "en": "Total number of children in day care facilities" 168 | }, 169 | "dimension-type": "Measure Dimension", 170 | "scale-type": "interval", 171 | "path": "Insgesamt" 172 | } 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /pylindas/shared_dimension_queries/shared_dimensions_queries.py: -------------------------------------------------------------------------------- 1 | from SPARQLWrapper import SPARQLWrapper, JSON 2 | from rdflib import URIRef 3 | from typing import List 4 | import json 5 | 6 | """ 7 | Author: Fabian Cretton - HEVS 8 | 9 | The goal of this file is to become a tool for developers to find a useful shared dimension, 10 | then get the URLs of the terms in order to configure the mapping for a cube's dimension. 11 | 12 | It is not yet a class with methods, and contains code that could be more generic. 13 | For instance, query_lindas could be a very generic function as the one found in /lindas/query.py 14 | But existing query_lindas() is specific for ASK queries (returns a bool value) 15 | 16 | See an example usage in example_sd.py 17 | 18 | This is a first implementation of: 19 | - Basic queries to request shared dimensions information from LINDAS 20 | - Display the results, line by line 21 | """ 22 | 23 | def query_lindas(query: str, environment: str): 24 | """ 25 | Send a SPARQL query to a LINDAS end-point and return the JSON result 26 | Note: the values of the different environments URL should come from a config file/environment variables 27 | """ 28 | match environment: 29 | case "PROD": 30 | endpoint = "https://lindas.admin.ch/query" 31 | case "INT": 32 | endpoint = "https://int.lindas.admin.ch/query" 33 | case _: 34 | endpoint = "https://test.lindas.admin.ch/query" 35 | 36 | sparql = SPARQLWrapper(endpoint) 37 | sparql.setQuery(query=query) 38 | sparql.setReturnFormat(JSON) 39 | return sparql.query().convert() 40 | 41 | def list_shared_dimensions(environment: str, name_lng: str="en", offset: int=0, limit: int=0, search_word: str=""): 42 | """ 43 | List existing Shared Dimensions in a specific environment 44 | Returns the JSON object of the SPARQL query result 45 | 46 | Args: 47 | limit: no limit if 0 48 | 49 | If a SD has a validThrough date, it could be deprecated (depending on the current date) 50 | """ 51 | query = f""" 52 | PREFIX meta: 53 | PREFIX schema: 54 | SELECT * WHERE {{ 55 | ?sd a meta:SharedDimension . 56 | OPTIONAL{{ ?sd schema:name ?name .}} 57 | FILTER(lang(?name) = \"{name_lng}\") 58 | OPTIONAL{{?sd schema:validFrom ?validFrom}} 59 | OPTIONAL{{?sd schema:validThrough ?validThrough}} 60 | """ 61 | 62 | if search_word != "": 63 | query += f"FILTER contains(?name,\"{search_word}\")" 64 | 65 | query += f""" 66 | }} 67 | ORDER BY ?name 68 | OFFSET {offset} 69 | """ 70 | if limit != 0: 71 | query += f"LIMIT {limit}" 72 | 73 | #print(query) 74 | return query_lindas(query, environment=environment) 75 | 76 | def list_shared_dimensions_print(result: json, environment_for_terms: str=""): 77 | """ 78 | Print the result of the list_shared_dimensions() query 79 | To the console, in a friendly manner, one sd per line with its URL, label, validFrom and validThrough values 80 | 81 | Args: 82 | environment_for_terms: if an environment is passed, for each shared dimension 2 terms will be queried and displayed 83 | This possibility to display 2 terms by querying LINDAS is just a POC, should be better refined 84 | """ 85 | # Pretty print the JSON - for debuging purpose 86 | #print(json.dumps(result, indent=4)) 87 | 88 | # Loop through the "bindings" and display dimensions name and URL (sd) 89 | if 'results' in result and 'bindings' in result['results'] and result['results']['bindings']: 90 | for item in result['results']['bindings']: 91 | # Extract the 'sd' and 'name' values 92 | sd = item['sd']['value'] 93 | 94 | if 'name' in item: 95 | name = item['name']['value'] 96 | else: 97 | name = "(no name in that language)" 98 | 99 | if 'validFrom' in item: 100 | validFrom = "- validFrom " + item['validFrom']['value'] 101 | else: 102 | validFrom = "" 103 | 104 | if 'validThrough' in item: 105 | validThrough = "- validThrough " + item['validThrough']['value'] 106 | else: 107 | validThrough = "" 108 | 109 | print(f"{name} <{sd}> {validFrom} {validThrough}") 110 | 111 | # if -> list 2 terms for that sd 112 | if environment_for_terms != "": 113 | termsResult = list_shared_dimension_terms(environment_for_terms, sd, "en", 0, 2) 114 | print("{ Terms sample:") 115 | print_sparql_result(termsResult, ["name", "sdTerm"]) 116 | print("}") 117 | 118 | else: 119 | print("No result binding found in that JSON result") 120 | 121 | def list_shared_dimension_terms(environment: str, sd_URL: URIRef, name_lng: str="en", offset: int=0, limit: int=0): 122 | """ 123 | List the terms URL of a Shared Dimensions in a specific environment 124 | Returns the JSON object of the SPARQL query result 125 | 126 | Args: 127 | limit: no limit if 0 128 | """ 129 | query = f""" 130 | PREFIX schema: 131 | SELECT * WHERE {{ 132 | ?sdTerm schema:inDefinedTermSet <{sd_URL}> . 133 | OPTIONAL{{?sdTerm schema:name ?name .}} 134 | FILTER(lang(?name) = \"{name_lng}\") 135 | }} 136 | ORDER BY ?name 137 | OFFSET {offset} 138 | """ 139 | 140 | if limit != 0: 141 | query += f"LIMIT {limit}" 142 | 143 | #print(query) 144 | return query_lindas(query, environment=environment) 145 | 146 | def print_sparql_result(result: json, fields: List[str]): 147 | """ 148 | Print line by line the result of a sparql query, according to the fields in the list parameter 149 | - Each field is tested for existance (this function do not know about the mandatory/OPTIONAL field in the original query) 150 | - If a value starts with "http" -> it is displayed inbetween <> 151 | """ 152 | 153 | if 'results' in result and 'bindings' in result['results'] and result['results']['bindings']: 154 | for item in result['results']['bindings']: 155 | line = "" 156 | for field in fields: 157 | if field in item: 158 | fieldValue = item[field]['value'] 159 | if fieldValue.lower().startswith("http"): 160 | fieldValue = "<" + fieldValue + ">" 161 | else: 162 | fieldValue = "" 163 | 164 | line += fieldValue + " " 165 | 166 | print(line) 167 | else: 168 | print("No result binding found in that JSON result") -------------------------------------------------------------------------------- /pylindas/description.schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "type": "object", 4 | "properties": { 5 | "Name": { 6 | "type": "object", 7 | "properties": { 8 | "de": { "type": "string" }, 9 | "fr": { "type": "string" }, 10 | "it": { "type": "string" }, 11 | "en": { "type": "string" } 12 | }, 13 | "required": ["de", "en"] 14 | }, 15 | "Description": { 16 | "type": "object", 17 | "properties": { 18 | "de": { "type": "string" }, 19 | "en": { "type": "string" } 20 | }, 21 | "required": ["de", "en"] 22 | }, 23 | "Publisher": { 24 | "type": "array", 25 | "items": { 26 | "$ref": "#/definitions/Publisher" 27 | } 28 | }, 29 | "Creator": { 30 | "type": "array", 31 | "items": { 32 | "$ref": "#/definitions/Creator" 33 | } 34 | }, 35 | "Contributor": { 36 | "type": "array", 37 | "items": { 38 | "$ref": "#/definitions/Contributor" 39 | } 40 | }, 41 | "Themes": { 42 | "type": "array", 43 | "items": { 44 | "$ref": "#/definitions/Theme" 45 | } 46 | }, 47 | 48 | "Date Created": { 49 | "type": "string", 50 | "format": "date-time" 51 | }, 52 | "Contact Point": { 53 | "$ref": "#/definitions/ContactPoint" 54 | }, 55 | "Base-URI": { 56 | "type": "string" 57 | }, 58 | "Identifier": { 59 | "type": "string" 60 | }, 61 | "Version": { 62 | "type": "number" 63 | }, 64 | "Work Status": { 65 | "type": "string", 66 | "enum": ["Draft", "In Progress", "Published", "Obsolete"] 67 | }, 68 | "Visualize": { 69 | "type": "boolean" 70 | }, 71 | "Accrual Periodicity": { 72 | "type": "string", 73 | "enum": ["daily", "weekly", "monthly", "quarterly", "yearly"] 74 | }, 75 | "Namespace": { 76 | "type": "string" 77 | }, 78 | "dimensions": { 79 | "type": "object", 80 | "patternProperties": { 81 | ".*": { 82 | "$ref": "#/definitions/Dimension" 83 | } 84 | }, 85 | "additionalProperties": true 86 | } 87 | }, 88 | 89 | "required": [ 90 | "Name", 91 | "Description", 92 | "Publisher", 93 | "Creator", 94 | "Contributor", 95 | "Date Created", 96 | "Contact Point", 97 | "Base-URI", 98 | "Identifier", 99 | "Version", 100 | "Work Status", 101 | "Visualize", 102 | "Accrual Periodicity", 103 | "Namespace", 104 | "dimensions" 105 | ], 106 | "definitions": { 107 | "Publisher": { 108 | "type": "object", 109 | "properties": { 110 | "IRI": { 111 | "type": "string" 112 | } 113 | }, 114 | "required": ["IRI"] 115 | }, 116 | "Creator": { 117 | "type": "object", 118 | "properties": { 119 | "IRI": { 120 | "type": "string" 121 | }, 122 | "Name": { 123 | "type": "string" 124 | } 125 | }, 126 | "required": ["IRI"] 127 | }, 128 | "Contributor": { 129 | "type": "object", 130 | "properties": { 131 | "IRI": { 132 | "type": "string" 133 | }, 134 | "Name": { 135 | "type": "string" 136 | } 137 | }, 138 | "required": ["IRI", "Name"] 139 | }, 140 | "Theme": { 141 | "type": "object", 142 | "properties": { 143 | "IRI": { 144 | "type": "string" 145 | }, 146 | "Name": { 147 | "type": "string" 148 | } 149 | }, 150 | "required": ["IRI"] 151 | }, 152 | "ContactPoint": { 153 | "type": "object", 154 | "properties": { 155 | "E-Mail": { 156 | "type": "string", 157 | "format": "email" 158 | }, 159 | "Name": { 160 | "type": "string" 161 | } 162 | }, 163 | "required": ["E-Mail", "Name"] 164 | }, 165 | "Dimension": { 166 | "type": "object", 167 | "properties": { 168 | "name": { 169 | "type": "object", 170 | "properties": { 171 | "de": { "type": "string" }, 172 | "fr": { "type": "string" }, 173 | "it": { "type": "string" }, 174 | "en": { "type": "string" } 175 | }, 176 | "required": ["de", "en"] 177 | }, 178 | "dimension-type": { 179 | "type": "string", 180 | "enum": ["Key Dimension", "Measure Dimension", "Standard Error"] 181 | }, 182 | "data-kind": { 183 | "type": "object", 184 | "description": "See https://cube.link/#meta-datakind-temporal-spatial", 185 | "properties": { 186 | "type": { 187 | "type": "string", 188 | "enum": ["temporal", "spatial-shape", "spatial-coordinates"] 189 | }, 190 | "unit": { 191 | "type": "string" 192 | } 193 | } 194 | }, 195 | "scale-type": { 196 | "description": "See https://cube.link/#qudt-scaletype", 197 | "type": "string", 198 | "enum": ["nominal", "ordinal", "interval", "ratio"] 199 | }, 200 | "mapping": { 201 | "type": "object", 202 | "properties": { 203 | "value-type": { 204 | "type": "string", 205 | "enum": ["Shared", "Literal"] 206 | }, 207 | "type": { 208 | "type": "string", 209 | "enum": ["regex", "lookup", "replace", "additive"] 210 | }, 211 | "pattern": { 212 | "type": "string" 213 | }, 214 | "replacement": { 215 | "type": "string" 216 | }, 217 | "replacements": { 218 | "type": "array", 219 | "items": { 220 | "type": "string" 221 | } 222 | }, 223 | "base": { 224 | "type": "string" 225 | } 226 | }, 227 | "allOf": [ 228 | { 229 | "if": { 230 | "properties": { "type": { "const": "regex" } } 231 | }, 232 | "then": { 233 | "required": ["pattern", "replacement"] 234 | } 235 | }, 236 | { 237 | "if": { 238 | "properties": { "type": { "const": "replace" } } 239 | }, 240 | "then": { 241 | "required": ["replacements"] 242 | } 243 | }, 244 | { 245 | "if": { 246 | "properties": { "type": { "const": "additive" } } 247 | }, 248 | "then": { 249 | "required": ["base"] 250 | } 251 | } 252 | ] 253 | }, 254 | "unit": { 255 | "type": "string" 256 | }, 257 | "path": { 258 | "type": "string" 259 | } 260 | }, 261 | "required": [ 262 | "name", 263 | "dimension-type", 264 | "scale-type", 265 | "path", 266 | "description" 267 | ] 268 | } 269 | } 270 | } 271 | -------------------------------------------------------------------------------- /pylindas/cli.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pandas as pd 4 | import yaml 5 | import py_cube 6 | import logging 7 | 8 | from py_cube.fetch import fetch 9 | from py_cube.example import list_examples, load_example 10 | from py_cube.cube.shared_dimension import convert_geojson_to_ttl 11 | 12 | 13 | logger = logging.getLogger('pycube') 14 | 15 | 16 | def serialize(input_directory: str, output_ttl: str, na_values: list[str], sep: str = ",", decimal: str = "."): 17 | csv_path = os.path.join(input_directory, "data.csv") 18 | yml_path = os.path.join(input_directory, "description.yml") 19 | json_path = os.path.join(input_directory, "description.json") 20 | 21 | if os.path.exists(yml_path): 22 | with open(yml_path) as file: 23 | cube_yaml = yaml.safe_load(file) 24 | elif os.path.exists(json_path): 25 | with open(json_path) as file: 26 | cube_yaml = yaml.safe_load(file) 27 | else: 28 | raise FileNotFoundError("Neither description.yml nor description.json found in the directory") 29 | 30 | df = pd.read_csv(csv_path, na_values=na_values, sep=sep, decimal=decimal) 31 | 32 | cube = py_cube.Cube(dataframe=df, cube_yaml=cube_yaml, environment="TEST", local=True) 33 | cube.prepare_data() 34 | cube.write_cube() 35 | cube.write_observations() 36 | cube.write_shape() 37 | cube.serialize(os.path.join(os.getcwd(), output_ttl)) 38 | print(cube) 39 | 40 | 41 | def configure_logging(log_level): 42 | class CustomFormatter(logging.Formatter): 43 | """Custom logging formatter to add colors based on log level.""" 44 | 45 | COLORS = { 46 | 'DEBUG': '\033[0m', # Normal 47 | 'INFO': '\033[94m', # Blue 48 | 'WARNING': '\033[93m', # Yellow 49 | 'ERROR': '\033[91m', # Red 50 | 'CRITICAL': '\033[91m', # Red 51 | } 52 | 53 | def format(self, record): 54 | log_fmt = self.COLORS.get(record.levelname, '\033[0m') + '%(levelname)s: %(message)s\033[0m' 55 | formatter = logging.Formatter(log_fmt) 56 | return formatter.format(record) 57 | 58 | console_handler = logging.StreamHandler() 59 | logger.setLevel(log_level) 60 | console_handler.setFormatter(CustomFormatter()) 61 | logger.addHandler(console_handler) 62 | 63 | 64 | def main(): 65 | parser = argparse.ArgumentParser(description="Cube data operations") 66 | subparsers = parser.add_subparsers(dest="operation", help="Operation to perform") 67 | 68 | serialize_parser = subparsers.add_parser("serialize", help="Serialize cube data") 69 | serialize_parser.add_argument("input_directory", help="Directory containing the data files") 70 | serialize_parser.add_argument("output_ttl", help="Output TTL file") 71 | serialize_parser.add_argument("--na_value", nargs="+", help="Values to treat as NA") 72 | serialize_parser.add_argument("--sep", default=",", nargs="?", help="Separator for CSV file") 73 | serialize_parser.add_argument("--decimal", default=".", nargs="?", help="Decimal separator") 74 | serialize_parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase verbosity") 75 | 76 | fetch_parser = subparsers.add_parser("fetch", help="Fetches a dataset from a URL") 77 | fetch_parser.add_argument("input_url", type=str, help="The URL of the dataset to fetch") 78 | fetch_parser.add_argument("output", type=str, help="The directory to save the output files") 79 | fetch_parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase verbosity") 80 | 81 | shared_parser = subparsers.add_parser("shared", help="Shared Dimension operations") 82 | shared_subparsers = shared_parser.add_subparsers(dest="suboperation", help="Shared sub-operations") 83 | 84 | convert_geojson_parser = shared_subparsers.add_parser("convert_geojson", help="Convert GeoJSON to TTL") 85 | convert_geojson_parser.add_argument("input_geojson", type=str, help="Input GeoJSON file") 86 | convert_geojson_parser.add_argument("output_ttl", type=str, help="Output TTL file") 87 | convert_geojson_parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase verbosity") 88 | 89 | example_parser = subparsers.add_parser("example", help="Example operations") 90 | example_subparsers = example_parser.add_subparsers(dest="suboperation", help="Example sub-operations") 91 | 92 | list_parser = example_subparsers.add_parser("list", help="List all examples") 93 | list_parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase verbosity") 94 | 95 | start_fuseki_parser = example_subparsers.add_parser("start-fuseki", help="Start a Fuseki database") 96 | start_fuseki_parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase verbosity") 97 | 98 | load_parser = example_subparsers.add_parser("load", help="Load an example by name") 99 | load_parser.add_argument("example_name", type=str, help="The name of the example to load", choices=[example["id"] for example in list_examples()]) 100 | # add optional base_uri argument to load parser 101 | load_parser.add_argument("--base-uri", type=str, help="The base URI for a SPARQL database (Fuseki supported)", default="http://localhost:3030/dataset") 102 | load_parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase verbosity") 103 | 104 | schema_parser = subparsers.add_parser("schema", help="Schema operations") 105 | schema_subparsers = schema_parser.add_subparsers(dest="suboperation", help="Schema sub-operations") 106 | schema_subparsers.add_parser("import", help="Import the description schema file") 107 | schema_parser.add_argument("output", type=str, help="Output file") 108 | schema_parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase verbosity") 109 | 110 | 111 | args = parser.parse_args() 112 | log_level = logging.DEBUG if args.verbose == 1 else logging.INFO 113 | 114 | configure_logging(log_level) 115 | 116 | if args.operation == "serialize": 117 | serialize(args.input_directory, args.output_ttl, args.na_value, args.sep, args.decimal) 118 | elif args.operation == "fetch": 119 | fetch(args.input_url, args.output) 120 | elif args.operation == "example": 121 | if args.suboperation == "list": 122 | examples = list_examples() 123 | for example in examples: 124 | print(f'{example["id"]}: {example["name"]}') 125 | elif args.suboperation == "load": 126 | load_example(args.example_name, args.base_uri) 127 | elif args.suboperation == "start-fuseki": 128 | os.system("scripts/fuseki/start.sh") 129 | elif args.operation == "shared": 130 | if args.suboperation == "convert_geojson": 131 | convert_geojson_to_ttl(args.input_geojson, args.output_ttl) 132 | elif args.operation == 'schema': 133 | if args.suboperation == "import-description": 134 | description_schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'description.schema.json') 135 | with open(description_schema_path, 'r') as f: 136 | schema = f.read() 137 | with open(args.output, 'w') as f: 138 | f.write(schema) 139 | logger.debug(f"Imported description into current directory: {args.output}") 140 | 141 | 142 | 143 | if __name__ == "__main__": 144 | main() -------------------------------------------------------------------------------- /docs/yaml.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | `pylindas` works with dictionaries to describe meta data for the various constructs that are supported (namely `cube:Cube`, `meta:SharedDimension`). One way to construct these nested dictionaries is through a `yaml` file. They are flexible and easy to read and are currently the main way (as well as the only supported way) to provide the necessary metadata. 4 | 5 | This page describes the structure needed for a valid `yaml` file. 6 | 7 | ## Namespaces 8 | | **PREFIX** | **IRI** | 9 | | --- | --- | 10 | | `cube` | `` | 11 | | `dcat` | ``| 12 | | `dcterms` | `` | 13 | | `meta` | `` | 14 | | `schema` | `` | 15 | | `sh` | `` | 16 | 17 | *** 18 | 19 | # `cube:Cube` 20 | 21 | Below the table you'll find a working example. For additional examples, please refer to [the example directory](https://github.com/Kronmar-Bafu/lindas-pylindas/tree/main/example/Cubes). 22 | 23 | | Key | Status | Expected Entry | Description | Target Predicate | 24 | | --- | --- | --- | --- | --- | 25 | |**Name** | Required | Key-Value pairs, with key being a [ISO 639 language code](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes)) for the language in question and the corresponding value | Name of the dataset with corresponding language | `schema:name`, `dcterms:title` | 26 | **Description** | Required | Key-Value pairs, with key being a [ISO 639 language code](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes)) for the language in question and the corresponding value | Description of the data set with correspoing language | `schema:description`, `dcterms:description` | 27 | |**Publisher** | Required | List of Key-Value pairs, with `key = IRI` and the correct IRI | Describes the publisher of the dataset with the correct IRI | `schema:publisher`, `dcterms:publisher` | 28 | |**Creator** | Required | List of Key-Value pairs, with `key = IRI` and the correct IRI | Describes the creator of the dataset with the correct IRI | `schema:creator`, `dcterms:creator` | 29 | |**Contributor** | Required | List of Key-Value pairs, with `key = IRI` and the correct IRI and `key = Name`| Describes the contributors of the dataset. with both, the correct IRI and name | `schema:contributor`, `dcterms:contributor` | 30 | |**Date Created** | Required | Date of Publication, given in ISO Format xxxx, i.e. YYYY-MM-DD | Publication date of the dataset. | 31 | |**Contact Point** | Required | Key-Value pairs, with keys `E-Mail` and `Name` for contact E-mail as well as name | Contact point of the data set | `schema:contactPoint`, `dcat:contactPoint` | 32 | |**Base-URI** | Required | a valid URI | The Base-URI will be used to construct a URI for the cube as well as other parts of the cube. Please make sure to give something meaningful and contact the Federal Archive | | 33 | |**Identifier** | Required | a *unique* identifier for the cube | The unique identifier under which a cube (or a family of cubes with differing versions) can be identified | `dcterms:identifier` | 34 | |**Version** | Required | a numerical value | the version of the cube | `schema:version` | 35 | |**Work Status** | Required | Either `Draft` or `Published` | the work status of the Cube. Either Published for final iterations of the given version or Draft for earlier versions. | `schema:creativeWorkStatus` | 36 | |**Visualize** | optional | True or False | boolean describing whether the Cube should be displayed on `visualize.admin.ch`. Key-value pair can be ommited, which will be considered `False` | `schema:workExample` | 37 | |**Accrual Periodicity** | optional | `daily`, `weekly`, `monthly`, `yearly` or `irregular` | The frequency with which the cube is expected to be updated | `dct:accrualPeriodicity` | 38 | |**Namespace** | optional | a string | does not have a technical impact but instead improves readability if one serializes a cube | | 39 | |**dimensions** | required | a key-value pair with key being the column name in the `pandas.DataFrame`. The value is a valid `dimension` as described in [dimension](#dimension) | Describes the meta data of a given dimension. | `cube:observationConstraint/sh:property` | 40 | 41 | ## `dimension` 42 | 43 | | Key | Status | Expected Entry | Description | Target Predicate | 44 | | --- | --- | --- | --- | --- | 45 | |**name** | Required | Key-Value pairs, with key being a language short hand and the corresponding value | Name of the dimension with corresponding language | `schema:name` | 46 | |**description** | Required | Key-Value pairs, with key being a language short hand and the corresponding value | Description of the dimension with corresponding language | `schema:description` | 47 | |**dimension type** | Required | Either `Key Dimension`, `Measure Dimension` or `Standard Error` | Type of dimension, which either is a measure dimension, key dimension or a standard error. Can only be one | `rdf:type` | 48 | |**scale type** | Required | Either `nominal`, `ordinal`, `interval`, or `ratio` | Ratio type of dimension. Please refer to [link einfügen] for further details. | `qudt:scaleType`| 49 | |**path** | required | a per cube unique string `path`, describing the predicate used for the dimension. | `cube:Observation` are written with ` "Value"`. | `sh:path` | 50 | |**mapping** | required for dimensions using URI objects | key-value pairs, at least one key-value pair with key `type` and value being either `replace` or `additive` | a logic which should be employed when mapping values in the data frame to some URI | None | 51 | |**unit** | required for measure dimensions | a unit from the qudt:unit namespace. Refer to [these Units here](https://www.qudt.org/doc/DOC_VOCAB-UNITS.html) - namespace does not need to be provided, for example for kg, provide `KiloGM` | Unit in which the measure dimension is provided | `unit:hasUnit` | | 52 | |**datatype** | Required | a datatype defined in [section 3](https://www.w3.org/TR/xmlschema-2/#built-in-datatypes), without namespace | the datatype of the column in question | `sh:datatype` | 53 | 54 | ### Example 55 | ```yaml 56 | Name: 57 | de: Mock Cube 58 | fr: Mock Cube 59 | it: Mock Cube 60 | en: Mock Cube 61 | Description: 62 | de: Ein Beispiel Cube, der simulierte Daten enthält 63 | en: An example Cube containing some simulated data 64 | Publisher: 65 | - IRI: https://register.ld.admin.ch/opendataswiss/org/office_of_Mock 66 | Creator: 67 | - IRI: https://register.ld.admin.ch/opendataswiss/org/office_of_Mock 68 | Contributor: 69 | - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu 70 | Name: Bundesamt für Mock Data 71 | Date Created: 72 | 2024-08-26 73 | Contact Point: 74 | E-Mail: contact@mock.ld.admin.ch 75 | Name: Bundesamt für Mock Data 76 | Base-URI: https://mock.ld.admin.ch/ 77 | Identifier: mock-example 78 | Version: 1 79 | Work Status: 80 | Draft 81 | Visualize: 82 | True 83 | # Optional but recommended 84 | Accrual Periodicity: yearly 85 | 86 | # Optional 87 | Namespace: mock 88 | 89 | dimensions: 90 | # required 91 | Jahr: 92 | name: 93 | de: Jahr 94 | fr: An 95 | it: Anno 96 | en: Year 97 | description: 98 | de: Jahr der Erhebung 99 | dimension-type: Key Dimension 100 | datatype: URI 101 | scale-type: ordinal 102 | path: year 103 | data-kind: 104 | type: temporal 105 | unit: year 106 | mapping: 107 | type: additive 108 | base: https://ld.admin.ch/time/year/ 109 | ``` 110 | 111 | *** 112 | 113 | ## New features: Shared dimension generation + Concept tables 114 | Early March 2025, those two new features were added. 115 | As there might be some refactoring about the yaml configuration, you can currently find the explanations in the respective README and in the yaml examples as comments: 116 | - Shared dimensions: [README](https://github.com/Kronmar-Bafu/lindas-pylindas/blob/main/pylindas/pyshareddimension/README.md) and [sd_description.yml](https://github.com/Kronmar-Bafu/lindas-pylindas/blob/main/example/Shared_Dimensions/shared_dimension_generation/sd_description.yml) 117 | - Concept tables: [README](https://github.com/Kronmar-Bafu/lindas-pylindas/blob/main/example/Cubes/concept_table_airport/README.md) and [description.yml](https://github.com/Kronmar-Bafu/lindas-pylindas/blob/main/example/Cubes/concept_table_airport/description.yml) 118 | 119 | -------------------------------------------------------------------------------- /pylindas/pyshareddimension/README.md: -------------------------------------------------------------------------------- 1 | # Generation of shared dimension 2 | This is a first implementation to generate a shared dimension, following an approach similar to pyCube, but to transform a .csv file to the corresponding RDF. 3 | I will abbreviate Shared Dimension by SD, for convenience. 4 | 5 | The pyCube generates two things: the cube's resource (with its specific URL and properties as the cube's name and meta-data), and a list of observations (each a resource with a specific URL and properties). 6 | Similarily, a SD is also composed of the SD's resource itself (with its specific URL and properties as the SD's name), and a list of terms (each a resource with a specific URL and properties). 7 | 8 | The implementation is done in [shared_dimension.py](shared_dimension.py), which is a copy and adaptation of cube.py, to reproduce code that matches the pyCube "approach". 9 | 10 | An example is given in the [example/Shared_Dimensions/shared_dimension_generation](/example/Shared_Dimensions/shared_dimension_generation/) folder, which contains: 11 | 12 | - [sd_description.yml](/example/Shared_Dimensions/shared_dimension_generation/sd_description.yml): the information about the SD itself (Identifier, Name in different languages, etc) and about the Terms generation 13 | - [sd_terms.csv](/example/Shared_Dimensions/shared_dimension_generation/sd_terms.csv): the data for the terms with an identifier and a name in different languages 14 | To be noted that I took the terms from the BAFU's Red List, a use-case that I did work on 15 | - [sd_example.py](/example/Shared_Dimensions/shared_dimension_generation/sd_example.py): example code to run the transformation 16 | 17 | ## WARNING: Persistent URLs 18 | It is to be noted that when publishing a SD, the goal is that other datasets will make links to that SD. 19 | This link is the basic principle of Linked Data, and it consist in the re-use of the identifier of the SD and its terms (their URLs) in other datasets, as Cube's dimensions for instance. 20 | Therefore, the basic requirement of Persistent URLs should be carefully applied when publishing Shared Dimensions, because removing an existing SD or one of its term could break another dataset (or hundreds, thousands of other datasets). 21 | When trying things out on LINDAS TEST, it might not really matter, but when publishing a SD on LINDAS INT it is already more important, and when publishing to LINDAS PROD it is of course vital. 22 | 23 | To handle this properly, a SD and each term have a `schema:validFrom` triple which indicates the starting date of validity. 24 | The value comes from the configuration .yml file and is a date/time value: 25 | ``` 26 | Valid-from: 2025-02-05T00:00:00Z 27 | ``` 28 | When a SD or one term should no more to be used, it must still exist but become "deprecated". This is done by adding a `schema:validThrough` triple with an ending date/time. 29 | This mechanism allows to avoid breaking existing datasets. 30 | 31 | The generation of the `schema:validThrough` triple is not currently handled in this code, further thoughts might be needed to handle this properly and allow to deprecate a whole SD, or only one/some of its terms. 32 | 33 | ## Links between terms: hierarchy example 34 | A first implementation is available, and the current example demonstrates how to build a hierarchy with `skos:broader` links from child to parent. 35 | 36 | The links (hierarchy) must be provided in the data itself: 37 | - [sd_terms.csv](sd_terms.csv): has an identifier for the term itself (the `code` field), and an identifier for its parent (the `parent_code` field) 38 | - [sd_description.yml](sd_description.yml): defines a link between terms with the `links-to-other-terms key`. The sub-key `parent_code` is the name of the column that contains the identifier of the other term. The value of `property` is the URL of the property to use to link the current term to its related term, the parent in this example. 39 | `links-to-other-terms key` is optional, just omit it if there is no links between terms in the dataset 40 | 41 | Notes about the hierarchy example: 42 | - The root term does not have a parent, this is currently handled properly 43 | - The description of the hierarchy is not generated yet, this could be added in a coming version 44 | 45 | This current implementation allows to create links between two terms and can thus be configured to link the term to its parent with the `skos:broader` property. 46 | Multiple links can be defined under the `links-to-other-terms key` key. 47 | One current "limitation" is that it links one term to another (not to multiple others). 48 | 49 | ## About hierarchies "description" or "template" 50 | When a hierarchy exists in a Shared Dimension, the Cube Creator allows to describe that hierarchy under the "Hierarchy" tab. 51 | 52 | The goal is to describe the existing hierarchy by defining the root(s) node(s), the levels, and the property that links the terms to build that hierarchy (as `skos:broader` for instance). When linking a cube's dimension to an existing Shared Dimension, the hierarchy description must be defined in the metadata, and it is then possible to copy an existing hierarchy description as explained in the [Cube Creator's User guide]](https://github.com/zazuko/cube-creator/wiki/3.-Cube-Designer#linking-to-shared-dimensions). 53 | 54 | In automn 2024, it was not yet possible to add, by code, a hiearchy description in LINDAS. The cause was that the Cube Creator was expecting the hiearchy description to be in a specific Named Graph (only available to the Cube Creator itself). The possibility to add hiearchies descriptions was requested [in this issue](https://gitlab.ldbar.ch/zazuko/misc/-/issues/197), and was first tested when creating this feature of Shared Dimension generation. At the time of writing (early March 2025), that possibility was not yet working properly (see the [comment](https://gitlab.ldbar.ch/zazuko/misc/-/issues/197#note_18273) in that feature request). 55 | 56 | **Currently proposed solution**: this step to add a hierarchy description to LINDAS, and then copy it when defining a cube's dimension, is just an option. It is not working yet with pyLindas. But it is also possible to directly add the hierarchy description to the metadata of the dimension while generating a cube with pyCube. This is a feature under development. 57 | 58 | For information, here is the RDF of the hierarchy description that was used to perform that test: 59 | ``` 60 | @prefix sd_md: . 61 | @prefix meta: . 62 | @prefix hydra: . 63 | @prefix schema1: . 64 | @prefix shacl: . 65 | 66 | a sd_md:Hierarchy, meta:Hierarchy, hydra:Resource ; 67 | schema1:name "PyLindas Hierarchy Description fo Shared Dimension generation example" ; 68 | sd_md:sharedDimension ; 69 | meta:hierarchyRoot ns1:1 ; 70 | meta:nextInHierarchy [ schema1:name "Level 1" ; 71 | shacl:path [shacl:inversePath skos:broader] ; 72 | meta:nextInHierarchy [ schema1:name "Level 2" ; 73 | shacl:path [shacl:inversePath skos:broader] ; 74 | meta:nextInHierarchy [ schema1:name "Level 3" ; 75 | shacl:path [shacl:inversePath skos:broader] ; 76 | meta:nextInHierarchy [ schema1:name "Level 4" ; 77 | shacl:path [shacl:inversePath skos:broader] ; 78 | meta:nextInHierarchy [ schema1:name "Level 5" ; 79 | shacl:path [shacl:inversePath skos:broader] 80 | ] 81 | ] 82 | ] 83 | ] 84 | ] . 85 | ``` 86 | Note: the links `nextInHierarchy` must be defined from parent to child. Therefore, if the link in the data is from child to parent, the `shacl:inversePath` must be used as in that example. If the link is already parent to child, it can be simply stated: 87 | ``` 88 | meta:nextInHierarchy [ schema1:name "Level 1" ; 89 | shacl:path skos:narrower 90 | ] 91 | ``` 92 | 93 | ## Generated Shared dimension's RDF validation with SHACL 94 | As the SHACL validation has now been implemented in PyCube, with the `validate()` method, a first temporary version is proposed here. 95 | 96 | **IMPORTANT Remark:** 97 | The code of the `validate()` method is copied from the cube.py validate() and adapted. 98 | However, no official SHACL file is available yet online to validate a Shared Dimension. 99 | During former talks with Zazuko, when writing the [page about Data Validation](https://gitlab.ldbar.ch/hevs/lindas-architecture-and-components/-/blob/main/DataValidation.md?ref_type=heads), they sent us an extract of their data validation process, specific to Shared Dimension. 100 | This extract is temporarily added in this project, in the [shared_dimension_shape.ttl](shared_dimension_shape.ttl) file, and used for that SHACL validation. 101 | It is currently not hard-coded in the `validate()` method, but passed as parameter. See [sd_example.py](/example/Shared_Dimensions/shared_dimension_generation/sd_example.py) for an example. 102 | 103 | This code demonstrates the validation, but should be improved when that SHACL is finalized and saved online. 104 | 105 | -------------------------------------------------------------------------------- /example/Cubes/concept_table_airport/cube_with_concept.ttl: -------------------------------------------------------------------------------- 1 | @prefix cube: . 2 | @prefix dcat: . 3 | @prefix dct: . 4 | @prefix meta: . 5 | @prefix mock: . 6 | @prefix ns1: . 7 | @prefix qudt: . 8 | @prefix rdf: . 9 | @prefix schema: . 10 | @prefix sh: . 11 | @prefix time: . 12 | @prefix unit: . 13 | @prefix vcard: . 14 | @prefix void: . 15 | 16 | a void:Dataset, 17 | schema:Dataset, 18 | dcat:Dataset, 19 | cube:Cube ; 20 | dct:accrualPeriodicity ; 21 | dct:creator ; 22 | dct:identifier "mock-concept" ; 23 | schema:contactPoint [ a schema:ContactPoint ; 24 | schema:email "contact@mock.ld.admin.ch"^^ ; 25 | schema:name "Bundesamt für Mock Data"^^ ] ; 26 | schema:contributor ; 27 | schema:creativeWorkStatus ; 28 | schema:creator ; 29 | schema:dateCreated "2024-08-26"^^ ; 30 | schema:dateModified "2025-02-20T09:38:41+00:00"^^ ; 31 | schema:datePublished "2025-02-20"^^ ; 32 | schema:description "A dataset containing two csv, one for a concept table"@en, 33 | "Un jeu de données avec deux csv, un pour une table de concept"@fr ; 34 | schema:name "Example with a concept table (DE)"@de, 35 | "Example with a concept table"@en, 36 | "Exemple avec une table de concept"@fr, 37 | "Example with a concept table (IT)"@it ; 38 | schema:publisher ; 39 | schema:version 1 ; 40 | schema:workExample ; 41 | dcat:contactPoint [ a vcard:Organization ; 42 | vcard:fn "Bundesamt für Mock Data"^^ ; 43 | vcard:hasEmail "contact@mock.ld.admin.ch"^^ ] ; 44 | cube:observationConstraint ; 45 | cube:observationSet . 46 | 47 | a cube:ObservationSet ; 48 | cube:observation , 49 | , 50 | , 51 | , 52 | . 53 | 54 | a cube:Observation ; 55 | cube:observedBy ; 56 | mock:Jahr ; 57 | mock:airport_type ; 58 | mock:airport_type_2nd "a" ; 59 | mock:value 12 . 60 | 61 | a cube:Observation ; 62 | cube:observedBy ; 63 | mock:Jahr ; 64 | mock:airport_type ; 65 | mock:airport_type_2nd "dummy" ; 66 | mock:value 15 . 67 | 68 | a cube:Observation ; 69 | cube:observedBy ; 70 | mock:Jahr ; 71 | mock:airport_type ; 72 | mock:airport_type_2nd "b" ; 73 | mock:value 19 . 74 | 75 | a cube:Observation ; 76 | cube:observedBy ; 77 | mock:Jahr ; 78 | mock:airport_type ; 79 | mock:airport_type_2nd "a" ; 80 | mock:value 15 . 81 | 82 | a cube:Observation ; 83 | cube:observedBy ; 84 | mock:Jahr ; 85 | mock:airport_type ; 86 | mock:airport_type_2nd "b" ; 87 | mock:value 20 . 88 | 89 | a sh:NodeShape, 90 | cube:Constraint ; 91 | sh:closed true ; 92 | sh:property [ a cube:KeyDimension ; 93 | qudt:scaleType qudt:OrdinalScale ; 94 | schema:description "Jahr der Erhebung"@de, 95 | "Year of survey"@en, 96 | "Année du relevé"@fr, 97 | "Anno di rilevamento"@it ; 98 | schema:name "Jahr"@de, 99 | "Year"@en, 100 | "Année"@fr, 101 | "Anno"@it ; 102 | sh:in ( ) ; 103 | sh:maxCount 1 ; 104 | sh:minCount 1 ; 105 | sh:nodeKind sh:IRI ; 106 | sh:path mock:Jahr ; 107 | meta:dataKind [ a time:GeneralDateTimeDescription ; 108 | time:unitType time:unitYear ] ], 109 | [ sh:in ( cube:Observation ) ; 110 | sh:nodeKind sh:IRI ; 111 | sh:path rdf:type ], 112 | [ a cube:KeyDimension ; 113 | qudt:scaleType qudt:NominalScale ; 114 | schema:description "Flughafentyp - DESC"@de, 115 | "Type of Airport - DESC"@en, 116 | "Type d'aéroport - DESC"@fr, 117 | "Tipo di aeroporto - DESC"@it ; 118 | schema:name "Flughafentyp"@de, 119 | "Type of Airport"@en, 120 | "Type d'aéroport"@fr, 121 | "Tipo di aeroporto"@it ; 122 | sh:in ( ) ; 123 | sh:maxCount 1 ; 124 | sh:minCount 1 ; 125 | sh:nodeKind sh:IRI ; 126 | sh:path mock:airport_type ], 127 | [ sh:in ( ) ; 128 | sh:nodeKind sh:IRI ; 129 | sh:path cube:observedBy ], 130 | [ a cube:MeasureDimension ; 131 | qudt:hasUnit unit:kilogramm ; 132 | qudt:scaleType qudt:IntervalScale ; 133 | schema:description "Wert - DESC"@de, 134 | "Value - DESC"@en, 135 | "Valeur - DESC"@fr, 136 | "Valore - DESC"@it ; 137 | schema:name "Wert"@de, 138 | "Value"@en, 139 | "Valeur"@fr, 140 | "Valore"@it ; 141 | sh:max "20" ; 142 | sh:maxCount 1 ; 143 | sh:min "12" ; 144 | sh:minCount 1 ; 145 | sh:nodeKind sh:Literal ; 146 | sh:path mock:value ], 147 | [ a cube:KeyDimension ; 148 | qudt:scaleType qudt:NominalScale ; 149 | schema:description "Flughafentyp - second key for demo"@de, 150 | "Type of Airport - second key for demo"@en, 151 | "Type d'aéroport - second key for demo"@fr, 152 | "Tipo di aeroporto - second key for demo"@it ; 153 | schema:name "Flughafentyp (second key for demo)"@de, 154 | "Type of Airport (second key for demo)"@en, 155 | "Type d'aéroport (second key for demo)"@fr, 156 | "Tipo di aeroporto (second key for demo)"@it ; 157 | sh:in ( ) ; 158 | sh:maxCount 1 ; 159 | sh:minCount 1 ; 160 | sh:nodeKind sh:IRI ; 161 | sh:path mock:airport_type_2nd ] . 162 | 163 | schema:description "Domestic airport description"@en, 164 | "Description de Aéroport national"@fr ; 165 | schema:name "Inlandflughafen"@de, 166 | "Domestic airport"@en, 167 | "Aéroport national"@fr ; 168 | schema:position 1 ; 169 | schema:sameAs ; 170 | ns1:other_property_example "another property example for domesctic airport"@en . 171 | 172 | schema:description "International airport description"@en, 173 | "Description de Aéroport international"@fr ; 174 | schema:name "Internationaler Flughafen"@de, 175 | "International airport"@en, 176 | "Aéroport international"@fr ; 177 | schema:position 2 ; 178 | schema:sameAs ; 179 | ns1:other_property_example "another property example for international airport"@en . 180 | 181 | -------------------------------------------------------------------------------- /example/Cubes/concept_table_airport/README.md: -------------------------------------------------------------------------------- 1 | # Implementation of concept tables and multilingual concepts 2 | This is a first implementation to handle: 3 | - concept tables 4 | - multilingual concepts 5 | 6 | ## Concept table 7 | A concept table is the possibility to handle the values of a dimension as a url to a new resource (a concept). 8 | This is similar to an object that is the URL of a Shared Dimension's term, but here the concepts are created for the cube and uploaded with the cube. 9 | Remark: if the resource/concept already exist, than the case is similar to the handling of Shared Dimensions mapping, and this is already handled by pyCube with the "mapping" mechanism. 10 | 11 | ## This example's dataset 12 | This example is a little dataset with values/measures about some airport types, the dataset in [data.csv](data.csv) and the airport types in [airportType.csv](airportType.csv). 13 | Each airport type is identified by two fields (`typeOfAirportID` + `typeOfAirportSecondID`), to demonstrate how to handle this use case that can easily happen in reality. 14 | But one field would have been enough here, and the example is easily adapted by removing the typeOfAirportSecondID from the configurations. 15 | 16 | This example handles two tasks: 17 | - Generate the correct URL for the dimension's object 18 | - Generate the concepts with their properties 19 | 20 | ## Generate the correct URLs for the dimension's object 21 | In description.yml, the "typeOfAirport" dimension is defined with the standards settings, plus the new mapping type: 22 | ``` 23 | mapping: 24 | type: concept 25 | replacement-automated: /airport_type/{typeOfAirport}/{typeOfAirport2nd} 26 | ``` 27 | 28 | In the code, this is handled in the existing `_apply_mappings()` method, with new this `concept` mapping type. 29 | 30 | The replacement will generate a URL replacing for each line the values of the identifiers `{typeOfAirport}` and `{typeOfAirport2nd}`. 31 | - If the value starts with "/", as this example, it is considered a relative URL that will be concatenated to the cube's URL 32 | - If the value do not start with "/", it should be a full URL also containing {field} values that are replaced on the fly. 33 | 34 | **Proposal 1**: this "replacement" handling could be also implement for shared dimensions. 35 | It could be added to the current "replacements" handling, where this replacements allows to give a one-to-one mapping 36 | that might be needed if no corresponding key exists to automatically build the URL (map "Zurich" to 0 for example). 37 | 38 | **Proposal 2**: maybe this way of handling the URL, with the {field} configuration, could replace the current "additive" and "replace" mappings type, handling both with one single syntax. 39 | 40 | ## Generate the concepts with their properties 41 | This can be seen as another independant operation, to generate the triples for the concepts. 42 | 43 | The concept metadata are added to the "description.yml" as follow: 44 | ``` 45 | Concepts: 46 | typeOfAirport: 47 | URI: /airport_type/{typeOfAirportID}/{typeOfAirportSecondID} 48 | name-field: typeOfAirport 49 | position-field: position 50 | multilingual: true 51 | ``` 52 | 53 | A specific dataframe is created with the content of "airportTypes.csv", and added to the cube's graph with: 54 | ``` 55 | cube.write_concept("typeOfAirport", airport_concept_df) 56 | ``` 57 | The first parameter is the key found under the "Concepts" in the yaml file 58 | The second is the dataframe with the values. 59 | 60 | The new method `cube.write_concept()` will generate the triples based on: 61 | - URI: Used to generate a URL replacing for each line the values of the identifiers `{typeOfAirportID}` and `{typeOfAirportSecondID}` 62 | The handling is similar to the "replacement" value for the dimension objects 63 | Both patterns should generate the same URLs, with the flexibility to have different column names in different files 64 | - name-field: (mandatory) the name of the csv column that contains the name for that concept used for schema:name 65 | - multilingual: (optional) if true, than the code will look for columns named name-field + the langage tags (_en, _de,_fr, etc) 66 | and generate the different schema:name language strings 67 | - position-field: (optional) the name of the csv column that contains a numeric position value for the concept 68 | this will generate a `schema:position` that is used by Visualize when the concepts should not be displayed in alphabetical order but according to that position value 69 | 70 | ### Concept triples 71 | The concept triples were deduced by observing some concepts generated by the Cube Creator. 72 | The current code generates the following triples 73 | - no rdf:type, but this could be added 74 | - the URL of the concept is based on the URL of the cube, with a version 75 | However, all those concepts "versions" have a `schema:sameAs` to the URL of the cube without the version (the use of that information might need clarification) 76 | - schema:name is mandatory, and could be either a single value, or language strings to handle multilingual concepts (as in this example) 77 | - schema:position, optional, see the explanation here above 78 | 79 | Example result (see [cube_with_concept.ttl](cube_with_concept.ttl)): 80 | ``` 81 | schema1:name "Inlandflughafen"@de, 82 | "Domestic airport"@en, 83 | "Aéroport national"@fr ; 84 | schema1:position 1 ; 85 | schema1:sameAs . 86 | ``` 87 | 88 | ### Checking the matchings 89 | As we can see, the concepts triples are "separated" triples, not related to the cube itself, where the concept's URL should match the object URL of the dimension (explained here above). 90 | 91 | Different mismatch could happen if: 92 | - The configuration of the URL mapping is not well defined, either in the dimension (`mapping/replacement` field) or in the concept (`URI` field) 93 | - The configuration is correct, but the values in the two input files do not match 94 | 95 | As a reminder: in RDF there is no enforcement of a resource to be explicitly defined for the RDF to be valid. This means that the object of the dimension could be a URL that is not defined anywhere else. 96 | Of course, this will break applicatons as Visualize, but it is still valid RDF (Open World Assumption). 97 | 98 | For this purpose, a `pycube.check_dimension_object_property()` method is added. 99 | It is called in [example_concept.py](example_concept.py) as follow: 100 | ``` 101 | allConceptsFound = cube.check_dimension_object_property("typeOfAirport", SCHEMA.name) 102 | ``` 103 | It means: check that all objects of the "typeOfAirport" dimension (defined in the .yaml file) point to a resource that does have a `schema:name` value. Knowing that concepts MUST HAVE a `schema:name` as explained here above. 104 | That method will print out the URLs that have no match, and returns False if this is the case. 105 | In this example, there is a voluntary missing match for the line in [data.csv](data.csv): 106 | ``` 107 | 2001,A,dummy,15 108 | ``` 109 | Which will be the cause of the following log line: 110 | ``` 111 | Missing value for https://mock-concept.ld.admin.ch/cube/mock-concept/1/concept/airport_type/A/dummy 112 | ``` 113 | 114 | IMPORTANT: `pycube.check_dimension_object_property()` will recreate the dimension's property URL based on the path `value`. 115 | The code comes from the existing `_add_observation()`, and if that code changes, it should be adapted here as well. 116 | ``` 117 | dimension = self._get_shape_column(dimension_name) # raises an exception if dimension not found 118 | path = URIRef(self._base_uri + dimension.get("path")) 119 | ``` 120 | ## Additional fields for concepts 121 | It is possible to add additional properties (fields) for a concept. 122 | 123 | In the example, airportType.csv contains two more fields: 124 | - description: a multilingual field to add a description for the airport type 125 | - other_property_example: another string field as an example 126 | 127 | Those fields are configured directly in the description.yml, for the concept itself: 128 | ``` 129 | Concepts: 130 | typeOfAirport: 131 | URI: /airport_type/{typeOfAirportID}/{typeOfAirportSecondID} 132 | name-field: typeOfAirport 133 | position-field: position 134 | multilingual: true 135 | other-fields: 136 | description: 137 | URI: http://schema.org/description 138 | multilingual: true 139 | other_property_example: 140 | URI: /airport_type/other_property_example 141 | ``` 142 | where: 143 | - other-fields is optional and will be omitted if the concept has no other field 144 | - key: the key of the field (`description`, `other_property_example`) must match the name of the field in the data file 145 | - URI: the URI to use as the RDF property for that field. It is either a full URI as `http://schema.org/description` that will be used as-is, 146 | or a relative path that starts with a "/" and that will be concatenated to the cube's URL, adding first a `/concept/prop` path. 147 | `URI` was intentionally used instead of the common `path` key, as the behavior is currently different (handling of relative or full path) 148 | But the behavior and the name of the field could be harmonized in all cases 149 | - multilingual: optional and similar to the multilingual handling for the concept's name. If true, the code will look for columns named `key` + the langage tags (_en, _de,_fr, etc). In the given example, for `description`, it will look for `description_en`, `description_fr`, etc. 150 | 151 | The data type is deduced by the current `pycube._sanitize_value()`, except when `multilingual` is true and the expected value is a string. 152 | 153 | The RDF result is: 154 | ``` 155 | schema1:name "Inlandflughafen"@de, 156 | "Domestic airport"@en, 157 | "Aéroport national"@fr ; 158 | schema1:description "Domestic airport description"@en, 159 | "Description de Aéroport national"@fr ; 160 | schema1:position 1 ; 161 | schema1:sameAs ; 162 | ns1:other_property_example "another property example for domesctic airport" . 163 | ``` 164 | 165 | ## Run the example 166 | Run [example_concept.py](example_concept.py) that will generate the [cube_with_concept.ttl](cube_with_concept.ttl) 167 | 168 | -------------------------------------------------------------------------------- /pylindas/fetch.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utils to download a data.europa.eu dataset with frictionless metadata, 3 | and generate a description.json. 4 | 5 | TODO: Make it more agnostic from data.europa.eu 6 | """ 7 | 8 | import requests 9 | import json 10 | import os 11 | from datetime import datetime 12 | from typing import Dict, Any, List 13 | from jsonschema import Draft202012Validator, validate 14 | from jsonschema.exceptions import ValidationError 15 | 16 | import logging 17 | 18 | logger = logging.getLogger('pycube') 19 | 20 | def download_json(url): 21 | response = requests.get(url) 22 | response.raise_for_status() 23 | return response.json() 24 | 25 | 26 | def read_schema(schema_path): 27 | with open(schema_path, 'r') as f: 28 | return json.load(f) 29 | 30 | class DataEuropaFetcher(object): 31 | """ 32 | In the future, the class should be splitted into frictionless parsing methods and 33 | data.europa.eu fetch methods. 34 | """ 35 | def __init__(self): 36 | pass 37 | 38 | def _transform_url(self, input_url): 39 | dataset_id = input_url.split('/')[-1].split('?')[0] 40 | return f"https://data.europa.eu/api/hub/search/datasets/{dataset_id}" 41 | 42 | 43 | def _extract_metadata(self, data): 44 | metadata = { 45 | "title": { 46 | "en": data['result']['title'].get('en', ''), 47 | "de": data['result']['title'].get('de', '') 48 | }, 49 | "description": { 50 | "en": data['result']['description'].get('en', ''), 51 | "de": data['result']['description'].get('de', '') 52 | }, 53 | "publisher": data['result'].get('publisher', '') 54 | } 55 | return metadata 56 | 57 | 58 | def _get_distributions(self, distributions): 59 | csv_data = None 60 | frictionless_data = None 61 | for distribution in distributions: 62 | if distribution['title'].get('en') == "Frictionless Tabular Data Resource": 63 | frictionless_url = distribution['access_url'][0] 64 | frictionless_data = requests.get(frictionless_url).json() 65 | if distribution['format'].get('id') == "CSV": 66 | csv_url = distribution['access_url'][0] 67 | csv_data = requests.get(csv_url).content 68 | return { 69 | "frictionless": frictionless_data, 70 | "csv": csv_data 71 | } 72 | 73 | 74 | def _infer_dimension_type(self, field: Dict[Any, Any], primary_keys: List[str]) -> str: 75 | """Infer the dimension type based on field properties.""" 76 | if field['name'] in primary_keys: 77 | return "Key Dimension" 78 | return "Measure Dimension" 79 | 80 | 81 | def _infer_scale_type(self, field: Dict[Any, Any]) -> str: 82 | """Infer the scale type based on field properties.""" 83 | field_type = field.get("type") 84 | if field_type == "string": 85 | return "nominal" 86 | elif field_type == "integer": 87 | return "interval" 88 | elif field_type == "number": 89 | return "ratio" 90 | return "nominal" # default 91 | 92 | 93 | def _infer_temporal_dimension(self, field: Dict[Any, Any]) -> bool: 94 | """Infer if the field is a temporal dimension.""" 95 | field_type = field.get("type") 96 | if field_type == "date": 97 | return True 98 | if field_type == "time": 99 | return True 100 | field_name = field['name'] 101 | if field_name.lower() in ["jahr", "year", "date", "datum"]: 102 | logger.warning(f'Dimension {field_name}: Temporal dimension inferred from field name. Please verify.') 103 | return True 104 | 105 | 106 | 107 | def _generate_dimensions(self, data_metadata: Dict[Any, Any]) -> Dict[str, Dict[Any, Any]]: 108 | """Generate dimensions from data metadata schema.""" 109 | dimensions = {} 110 | 111 | primary_key = data_metadata["schema"].get('primaryKey', []) 112 | primary_keys = primary_key if isinstance(primary_key, list) else [primary_key] 113 | if not primary_keys: 114 | first_field = data_metadata["schema"]["fields"][0]["name"] 115 | logger.warning(f"Primary key not found in schema. Using first field {first_field} as primary key. You may need to adjust Key/Measure Dimension manually.") 116 | primary_key = first_field 117 | 118 | for field in data_metadata["schema"]["fields"]: 119 | field_name = field["name"] 120 | 121 | # Create dimension object 122 | dimension = { 123 | "name": { 124 | "de": field.get("title", field_name), 125 | "en": field.get("title", field_name) 126 | }, 127 | "dimension-type": self._infer_dimension_type(field, primary_keys), 128 | "scale-type": self._infer_scale_type(field), 129 | "path": field_name, 130 | "description": { 131 | "de": field.get("description", f"Beschreibung für {field_name}"), 132 | "en": field.get("description", f"Description for {field_name}") 133 | } 134 | } 135 | 136 | # Add unit if present 137 | if "unit" in field: 138 | dimension["unit"] = field["unit"] 139 | 140 | # Add data-kind if temporal 141 | if self._infer_temporal_dimension(field): 142 | dimension["data-kind"] = { 143 | "type": "temporal", 144 | "unit": "year" 145 | } 146 | 147 | dimensions[field_name] = dimension 148 | 149 | return dimensions 150 | 151 | 152 | def _transform_metadata(self, metadata: Dict[Any, Any], data_metadata: Dict[Any, Any]) -> Dict[Any, Any]: 153 | """Transform metadata to conform to the JSON schema.""" 154 | 155 | output = { 156 | "Name": { 157 | "de": metadata["title"]["de"], 158 | "en": metadata["title"]["en"] 159 | }, 160 | "Description": { 161 | "de": metadata["description"]["de"], 162 | "en": metadata["description"]["en"] 163 | }, 164 | "Publisher": [ 165 | { 166 | "IRI": metadata["publisher"]["resource"] 167 | } 168 | ], 169 | "Creator": [ 170 | { 171 | "IRI": metadata["publisher"]["resource"] 172 | } 173 | ], 174 | 175 | "Contributor": [], 176 | 177 | "Date Created": datetime.now().isoformat(), 178 | "Contact Point": { 179 | "E-Mail": "opendata@example.ch", # Example email 180 | "Name": metadata["publisher"]["name"] 181 | }, 182 | "Base-URI": data_metadata["path"], 183 | "Identifier": data_metadata["name"], 184 | "Version": 0.1, 185 | "Work Status": "Draft", 186 | "Visualize": True, 187 | "Accrual Periodicity": "", 188 | "Namespace": "https://opendata.example.ch", 189 | "dimensions": self._generate_dimensions(data_metadata) 190 | } 191 | 192 | return output 193 | 194 | 195 | def fetch_dataset(self, input_url, output_dir): 196 | transformed_url = self._transform_url(input_url) 197 | data = download_json(transformed_url) 198 | 199 | metadata = self._extract_metadata(data) 200 | 201 | data_csv_filename = os.path.join(output_dir, 'data.csv') 202 | description_json_filename = os.path.join(output_dir, 'description.json') 203 | frictionless_json_filename = os.path.join(output_dir, 'frictionless.json') 204 | 205 | distributions = self._get_distributions(data['result']['distributions']) 206 | 207 | logger.info(f"Writing {data_csv_filename}") 208 | with open(data_csv_filename, 'wb') as f: 209 | f.write(distributions['csv']) 210 | 211 | logger.info(f"Writing {frictionless_json_filename}") 212 | with open(frictionless_json_filename, 'w') as f: 213 | f.write(json.dumps(distributions['frictionless'], indent=2)) 214 | 215 | current_file_dir = os.path.dirname(os.path.realpath(__file__)) 216 | description_schema_path = os.path.join(current_file_dir, 'description.schema.json') 217 | description_schema = read_schema(description_schema_path) 218 | description = self._transform_metadata(metadata, distributions['frictionless']) 219 | schema_path = os.path.relpath( 220 | description_schema_path, 221 | start=os.path.join(os.getcwd(), output_dir) 222 | ) 223 | description = { 224 | "$schema": f"{schema_path}", 225 | **description 226 | } 227 | logger.info(f"Writing {description_json_filename}") 228 | with open(description_json_filename, 'w') as f: 229 | f.write(json.dumps(description, indent=2)) 230 | 231 | validator = Draft202012Validator(description_schema) 232 | errors = list(validator.iter_errors(description)) 233 | for error in errors: 234 | logger.warning(f"Validation Error: {error.message}") 235 | 236 | serialize_command = f"""# You may want to adjust the command with --sep and --decimal, depending on the data.csv 237 | python cli.py serialize {output_dir} {os.path.join(output_dir, 'cube.ttl')}""" 238 | if len(errors): 239 | logger.warning(f"""The data and description have been downloaded. There were validation errors during description validation, you should fix them before running the following command 240 | 241 | {serialize_command} 242 | """) 243 | else: 244 | logger.info(f"""Success ! The data and description have been downloaded, you may now verify it, adjust it, and then run serialize to create RDF triples 245 | 246 | {serialize_command} 247 | """) 248 | 249 | 250 | def fetch(input_url: str, output_dir: str): 251 | # create the output dir if it does not exist 252 | if output_dir and not os.path.exists(output_dir): 253 | os.makedirs(output_dir, exist_ok=True) 254 | 255 | if input_url.startswith('https://data.europa.eu'): 256 | fetcher = DataEuropaFetcher() 257 | fetcher.fetch_dataset(input_url, output_dir) 258 | else: 259 | raise ValueError(f'Only supporting datasets from data.europa.eu at the moment, make sure your input URL starts with https://data.europa.eu') --------------------------------------------------------------------------------