├── tests
    ├── __init__.py
    ├── test_sdterms.csv
    ├── test_lindas.py
    ├── test_data.csv
    ├── test_sdterms.yml
    ├── test.yml
    ├── test_sdterms.py
    └── test_shared_dimension_generation.py
├── pylindas
    ├── getter
    │   ├── __init__.py
    │   └── get.py
    ├── lindas
    │   ├── __init__.py
    │   ├── validate.py
    │   ├── query.py
    │   ├── namespaces.py
    │   └── upload.py
    ├── pycube
    │   ├── __init__.py
    │   └── shared_dimension.py
    ├── pyshareddimension
    │   ├── __init__.py
    │   └── README.md
    ├── __init__.py
    ├── example.py
    ├── shared_dimension_queries
    │   ├── example_sd.py
    │   ├── README.md
    │   └── shared_dimensions_queries.py
    ├── description.schema.json
    ├── cli.py
    └── fetch.py
├── example
    ├── Cubes
    │   ├── kita
    │   │   ├── .gitignore
    │   │   ├── README.md
    │   │   ├── Makefile
    │   │   └── description.json
    │   ├── shared
    │   │   ├── bundeslander
    │   │   │   ├── .gitignore
    │   │   │   ├── README.md
    │   │   │   ├── Makefile
    │   │   │   ├── schema.json
    │   │   │   └── transform.py
    │   │   └── README.md
    │   ├── concept_table_airport
    │   │   ├── data.csv
    │   │   ├── airportdummyconcept.csv
    │   │   ├── airportconcept.csv
    │   │   ├── airport.py
    │   │   ├── description.yml
    │   │   ├── cube_with_concept.ttl
    │   │   └── README.md
    │   ├── wind
    │   │   ├── README.md
    │   │   ├── Makefile
    │   │   ├── data.csv
    │   │   ├── frictionless.json
    │   │   └── description.json
    │   ├── Population_Aargau
    │   │   ├── func.py
    │   │   ├── integration.py
    │   │   ├── age.csv
    │   │   ├── fetch.py
    │   │   ├── prepare.py
    │   │   └── description.yml
    │   ├── Biotope_Statistik
    │   │   ├── data.csv
    │   │   ├── biotope.py
    │   │   └── description.yml
    │   ├── greenhouse_limit
    │   │   ├── data.csv
    │   │   └── description.yml
    │   ├── mock
    │   │   ├── mock.py
    │   │   ├── mock-cube-cube.ttl
    │   │   ├── data.csv
    │   │   └── description.yml
    │   ├── co2-limits
    │   │   ├── data.csv
    │   │   └── description.yml
    │   └── corona
    │   │   └── description.json
    └── Shared_Dimensions
    │   └── shared_dimension_generation
    │       ├── sd_example_SHACL_result.ttl
    │       ├── sd_example.py
    │       ├── sd_terms.csv
    │       └── sd_description.yml
├── .prettierrc
├── scripts
    └── fuseki
    │   ├── start.sh
    │   └── config-mem.ttl
├── requirements.txt
├── docs
    ├── contributing.md
    ├── examples.md
    ├── uris.md
    ├── installation.md
    ├── concepts.md
    ├── roadmap.md
    ├── sd.md
    ├── cli.md
    ├── functionality.md
    └── yaml.md
├── README.md
├── LICENSE
├── pyproject.toml
├── .github
    └── workflows
    │   ├── ci.yaml
    │   └── publish-pypi.yml
└── .gitignore


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pylindas/getter/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pylindas/lindas/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/example/Cubes/kita/.gitignore:
--------------------------------------------------------------------------------
1 | cube.ttl
2 | 


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |   "tabWidth": 2,
3 |   "useTabs": false
4 | }
5 | 


--------------------------------------------------------------------------------
/pylindas/pycube/__init__.py:
--------------------------------------------------------------------------------
1 | from pylindas.pycube.cube import Cube


--------------------------------------------------------------------------------
/tests/test_sdterms.csv:
--------------------------------------------------------------------------------
1 | id,name
2 | 1,Zürich
3 | 2,Bern
4 | 3,Luzern


--------------------------------------------------------------------------------
/example/Cubes/shared/bundeslander/.gitignore:
--------------------------------------------------------------------------------
1 | data.transformed.geojson
2 | data.pretty.geojson
3 | 


--------------------------------------------------------------------------------
/pylindas/pyshareddimension/__init__.py:
--------------------------------------------------------------------------------
1 | from pylindas.pyshareddimension.shared_dimension import SharedDimension


--------------------------------------------------------------------------------
/example/Cubes/shared/README.md:
--------------------------------------------------------------------------------
1 | ## Shared dimensions
2 | 
3 | This folder contains shared dimensions that can be used by other examples.
4 | 


--------------------------------------------------------------------------------
/example/Cubes/concept_table_airport/data.csv:
--------------------------------------------------------------------------------
1 | year,typeOfAirport,typeOfAirport2nd,measure
2 | 2001,A,a,12
3 | 2002,A,a,15
4 | 2001,B,b,19
5 | 2002,B,b,20
6 | 2001,A,dummy,15


--------------------------------------------------------------------------------
/scripts/fuseki/start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | docker run --rm -p 3030:3030 -v $(pwd):/usr/share/data atomgraph/fuseki --config=/usr/share/data/scripts/fuseki/config-mem.ttl
4 | 


--------------------------------------------------------------------------------
/example/Cubes/wind/README.md:
--------------------------------------------------------------------------------
1 | Original: https://data.europa.eu/data/datasets/fc49eebf-3750-4c9c-a29e-6696eb644362/quality?locale=en&validate=90598e7e-5e72-403a-abb3-797165e1b487
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy==2.1.3
 2 | pandas==2.2.3
 3 | pyshacl==0.26.0
 4 | pystardog==0.17.0
 5 | PyYAML==6.0.2
 6 | rdflib==7.0.0
 7 | requests==2.32.3
 8 | sparql-dataframe==0.4
 9 | SPARQLWrapper==2.0.0
10 | 


--------------------------------------------------------------------------------
/tests/test_lindas.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from pylindas.pycube import Cube
3 | from rdflib import Graph
4 | 
5 | class TestClass:
6 |     def test_validate_cube(self):
7 |         pass #test fora wrapper function?


--------------------------------------------------------------------------------
/example/Cubes/shared/bundeslander/README.md:
--------------------------------------------------------------------------------
1 | # German states shapes
2 | 
3 | ```
4 | cd examples/shared/bundeslander
5 | make
6 | curl 'http://localhost:3030/dataset' -H 'Content-Type: text/turtle' -X POST -T data.ttl
7 | ```
8 | 


--------------------------------------------------------------------------------
/example/Cubes/wind/Makefile:
--------------------------------------------------------------------------------
1 | cube.ttl: data.csv description.json
2 | 	echo "Navigating to the project root directory"
3 | 	cd ../.. && \
4 | 		python cli.py serialize example/wind/ example/wind/cube.ttl --sep ";" --decimal ","


--------------------------------------------------------------------------------
/example/Shared_Dimensions/shared_dimension_generation/sd_example_SHACL_result.ttl:
--------------------------------------------------------------------------------
1 | @prefix sh: <http://www.w3.org/ns/shacl#> .
2 | @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
3 | 
4 | [] a sh:ValidationReport ;
5 |     sh:conforms true .
6 | 
7 | 


--------------------------------------------------------------------------------
/example/Cubes/kita/README.md:
--------------------------------------------------------------------------------
1 | # Example Kita dataset
2 | 
3 | This is an example working with the dataset "Children in day-to-day facilities by country"
4 | from the [European Open Data Portal](https://data.europa.eu/).
5 | 
6 | ```
7 | make cube.ttl
8 | ```
9 | 


--------------------------------------------------------------------------------
/pylindas/__init__.py:
--------------------------------------------------------------------------------
1 | from pylindas.pycube import Cube
2 | from pylindas.getter.get import get_cube, get_observations
3 | from pylindas.lindas.namespaces import Namespaces
4 | from pylindas.lindas.upload import upload_ttl
5 | from pylindas.lindas.query import query_lindas
6 | 
7 | __version__ = "0.6.5"
8 | 


--------------------------------------------------------------------------------
/pylindas/lindas/validate.py:
--------------------------------------------------------------------------------
1 | from pyshacl import validate
2 | from rdflib import Graph
3 | 
4 | def validate(data_graph: Graph, shacl_graph: Graph) -> tuple:
5 |     conforms, results_graph, text = validate(data_graph, shacl_graph=shacl_graph, abort_on_first=True, inference="none", advanced=True)
6 |     return conforms, text


--------------------------------------------------------------------------------
/example/Cubes/Population_Aargau/func.py:
--------------------------------------------------------------------------------
1 | def replace_with_shared_dimension(value):
2 |     if value.startswith("C_"):
3 |         return "https://ld.admin.ch/canton/" + value[2:]
4 |     elif value.startswith("D_"):
5 |         return "https://ld.admin.ch/district/" + value[2:]
6 |     else:
7 |         return "https://ld.admin.ch/municipality/" + value [2:]


--------------------------------------------------------------------------------
/example/Cubes/wind/data.csv:
--------------------------------------------------------------------------------
 1 | Jahr;Anzahl_inBetrieb_WKA_SH;Leistung_MW
 2 | 2012;2194;3270,125
 3 | 2013;2243;3634,157
 4 | 2014;2583;4812,401
 5 | 2015;2779;5638,156
 6 | 2016;2942;6198,915
 7 | 2017;2991;6598,15
 8 | 2018;3007;6698,53
 9 | 2019;3009;6718,22
10 | 2020;3025;6811,57
11 | 2021;2980;6939,67
12 | 2022;3040;7383,92
13 | 2023;3173;8505,24
14 | 2024;3170;8592,34
15 | 


--------------------------------------------------------------------------------
/example/Cubes/concept_table_airport/airportdummyconcept.csv:
--------------------------------------------------------------------------------
1 | typeOfAirportID,typeOfAirportSecondID,typeOfAirport_en,typeOfAirport_de,typeOfAirport_fr,position,description_en,description_fr,other_property_example
2 | A,dummy,Dummy airport,Dummy airport (de),Dummy airport (fr),3,A dummy airport type to test the matchings,A dummy airport type to test the matchings (fr),another property example for dummy airport
3 | 


--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
1 | # Contributing and Suggestions
2 | 
3 | If you wish to contribute to this project, feel free to clone this repository and open a pull request to be reviewed and merged.
4 | 
5 | Alternatively feel free to open an [issue](https://github.com/Kronmar-Bafu/lindas-pylindas/issues) with a question or a suggestion on what could be implemented. There is also a [roadmap](roadmap.md) for the further development of `pylindas`.
6 | 


--------------------------------------------------------------------------------
/docs/examples.md:
--------------------------------------------------------------------------------
 1 | # Examples
 2 | 
 3 | There are multiple examples for cubes and shared dimensions in the [example](../example) folder. They usually consist of the following files:
 4 | 
 5 | - .csv file for the tabular data
 6 | - .yml file for the description of the cube
 7 | - .py file for running `pylindas`
 8 | - .ttl file as output after running `pylindas`
 9 | 
10 | For some examples, there are also files for different concepts.
11 | 


--------------------------------------------------------------------------------
/example/Cubes/concept_table_airport/airportconcept.csv:
--------------------------------------------------------------------------------
1 | typeOfAirportID,typeOfAirportSecondID,typeOfAirport_en,typeOfAirport_de,typeOfAirport_fr,position,description_en,description_fr,other_property_example
2 | A,a,Domestic airport,Inlandflughafen,Aéroport national,1,Domestic airport description,Description de Aéroport national,another property example for domesctic airport
3 | B,b,International airport,Internationaler Flughafen,Aéroport international,2,International airport description,Description de Aéroport international,another property example for international airport


--------------------------------------------------------------------------------
/example/Cubes/kita/Makefile:
--------------------------------------------------------------------------------
 1 | all: cube.ttl
 2 | 
 3 | data.csv:
 4 | 	echo "1. Download CSV from https://data.europa.eu/data/datasets/https-www-datenportal-bmbf-de-portal-2-2-5?locale=en"
 5 | 	# Add your download command here
 6 | 	echo "2. Clean the file (removing merged cells, removing rows about the header, removing extra Anzahl grouping columns)"
 7 | 	echo "3. Export as CSV"
 8 | 
 9 | cube.ttl: data.csv description.json
10 | 	echo "Navigating to the project root directory"
11 | 	cd ../.. && \
12 | 		python cli.py serialize example/kita/ example/kita/cube.ttl --na_value "-"
13 | 


--------------------------------------------------------------------------------
/tests/test_data.csv:
--------------------------------------------------------------------------------
 1 | Jahr,Station,Wert,LowerUnsicherheit,UpperUnsicherheit,Wert2,Standardfehler,Status
 2 | 2000,Bern,23.0,1.0,10.0,11.5,5,final
 3 | 2000,Zürich,23.555744036232408,1.0,10.0,11.6,5,final
 4 | 2000,Schweiz,23.1,1.0,11,4,12.0,final
 5 | 2001,Bern,21.536090723505524,1.0,10.0,10.75,5,final
 6 | 2001,Zürich,21.659924330021255,1.0,10.0,10.3,5,final
 7 | 2001,Schweiz,21.1,1.0,10.0,10.3,11.3,final
 8 | 2002,Bern,22.575144684250287,2.0,10.0,11.25,5,provisionally
 9 | 2002,Zürich,20.688211936144263,2.0,10.0,10.3,5,provisionally
10 | 2002,Schweiz,21.4,2.1,4.3,41.2,8,provisionally


--------------------------------------------------------------------------------
/tests/test_sdterms.yml:
--------------------------------------------------------------------------------
 1 | Identifier: test_canton
 2 | Name:
 3 |   en: cantons
 4 |   fr: cantons
 5 |   de: cantons
 6 |   it: cantons
 7 | # Description is optional
 8 | Description:
 9 |   fr: cantons
10 |   en: cantons
11 | # Valid-from is optional, it is a date/time value  
12 | # Note: it is currently optional, but might need to become mandatory as validFrom, and later validThrough, are used to make a SD and its term 'deprecated'
13 | Valid-from: 2025-02-05T00:00:00Z
14 | # Contributor is optional, it is now added by the Cube Creator when creating a new SD
15 | Contributor:
16 |   name: Joshua Hirt
17 |   email: joshua.hirt@bafu.admin.ch
18 | Terms:
19 |   identifier-field: id
20 |   name-field: name
21 |   multilingual: False
22 | 


--------------------------------------------------------------------------------
/docs/uris.md:
--------------------------------------------------------------------------------
 1 | # URIs
 2 | 
 3 | It is important to understand, how the settings in the `description.yaml` file determine the different URIs of the cube:
 4 | 
 5 | For the following settings:
 6 | 
 7 | ```yaml
 8 | Base-URI: https://environment.ld.admin.ch/foen/
 9 | Identifier: wps
10 | Version: 1
11 | ```
12 | 
13 | the following URIs will result:
14 | 
15 | - Cube: https://environment.ld.admin.ch/foen/cube/wps/1
16 | - Observation Set: https://environment.ld.admin.ch/foen/cube/wps/1/ObservationSet
17 | - Observation Constraints: https://environment.ld.admin.ch/foen/cube/wps/1/shape
18 | - Observations: https://environment.ld.admin.ch/foen/cube/wps/1/observation/{list_of_key_dimensions}
19 | - Properties: https://environment.ld.admin.ch/foen/{propertyName}


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | > [!NOTE]
 4 | > Using `pylindas` does require basic to intermediate python skills.
 5 | 
 6 | There are two ways to install this package, locally or through the [Python Package Index (PyPI)](https://pypi.org).
 7 | 
 8 | ## Published Version
 9 | 
10 | You can install this package through pip without cloning the repository.
11 | 
12 | ```
13 | pip install pylindas
14 | ```
15 | 
16 | ## Locally
17 | 
18 | Clone this repository and `cd` into the directory. You can now install this package locally on your machine - we advise to use a virtual environment to avoid conflicts with other projects. Additionally, install all dependencies as described in `requirements.txt`
19 | 
20 | ```
21 | pip install -e .
22 | pip install -r requirements.txt
23 | ```
24 | 


--------------------------------------------------------------------------------
/example/Cubes/wind/frictionless.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "path": "https://opendata.schleswig-holstein.de/dataset/fc49eebf-3750-4c9c-a29e-6696eb644362/resource/b8a7b43c-3529-4b92-bb49-7bf4e9109dfb/download/opendata_wka_inbetrieb_sh_20230103.csv",
 3 |   "name": "wka-inbetrieb",
 4 |   "profile": "tabular-data-resource",
 5 |   "format": "csv",
 6 |   "encoding": "utf-8",
 7 |   "dialect": {
 8 |     "delimiter": ";"
 9 |   },
10 |   "schema": {
11 |     "fields": [
12 |       {
13 |         "type": "integer",
14 |         "name": "Jahr"
15 |       },
16 |       {
17 |         "type": "integer",
18 |         "name": "Anzahl_inBetrieb_WKA_SH",
19 |         "title": "Anzahl"
20 |       },
21 |       {
22 |         "type": "number",
23 |         "decimalChar": ",",
24 |         "name": "Leistung_MW",
25 |         "unit": "MW"
26 |       }
27 |     ]
28 |   }
29 | }


--------------------------------------------------------------------------------
/example/Cubes/Population_Aargau/integration.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import yaml
 3 | import os
 4 | 
 5 | from pylindas.pycube import Cube
 6 | from pylindas.lindas.namespaces import SCHEMA
 7 | 
 8 | ENVIRONMENT = os.getenv("CI_ENVIRONMENT_NAME")
 9 | 
10 | # Load data and yaml
11 | df = pd.read_csv("3_data_preparation/data.csv", encoding="utf-8", sep=",")
12 | with open("4_data_integration/integration.yaml", encoding="utf-8") as file:
13 |     cube_yaml = yaml.safe_load(file)
14 | 
15 | cube = Cube(dataframe=df, cube_yaml=cube_yaml, environment="TEST", local=True)
16 | 
17 | cube.prepare_data()
18 | cube.write_cube()
19 | cube.write_observations()
20 | cube.write_shape()
21 | 
22 | # Create concept
23 | age_group_concept = pd.read_csv("3_data_preparation/age.csv", encoding="utf-8", sep=",")
24 | cube.write_concept("age-group", age_group_concept)
25 | 
26 | cube.serialize("4_data_integration/cube.ttl")


--------------------------------------------------------------------------------
/example/Cubes/Biotope_Statistik/data.csv:
--------------------------------------------------------------------------------
 1 | Typ,Überlappung,Anzahl,Anteil der CH-Biotope,Fläche,Anteil CH-Fläche,Anteil der CH-Biotope (Fläche)
 2 | Hochmoore (Typen I + II),Mit Überlappung,551,7.8,1567.5,0.04,1.5
 3 | Flachmoore,Mit Überlappung,1335,18.8,22501.4,0.54,22.1
 4 | Auengebiete,Mit Überlappung,326,4.6,27844.5,0.67,27.3
 5 | Amphibienlaichgebiete,Mit Überlappung,929,13.1,21670.9,0.52,21.3
 6 | Trockenwiesen und -weiden,Mit Überlappung,3951,55.7,28280.6,0.68,27.8
 7 | Biotope,Mit Überlappung,7092,100.0,101864.9,2.47,100.0
 8 | Hochmoore (Typen I + II),Ohne Überlappung,551,7.8,1567.5,0.04,1.7
 9 | Flachmoore,Ohne Überlappung,1335,18.8,22495.0,0.54,24.0
10 | Auengebiete,Ohne Überlappung,326,4.6,26416.9,0.64,28.2
11 | Amphibienlaichgebiete,Ohne Überlappung,929,13.1,14847.6,0.36,15.9
12 | Trockenwiesen und -weiden,Ohne Überlappung,3951,55.7,28280.6,0.68,30.2
13 | Biotope,Ohne Überlappung,7092,100.0,93607.6,2.27,100.0


--------------------------------------------------------------------------------
/example/Cubes/shared/bundeslander/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: all download ../../../py_cube/cube/shared_dimensions.py
 2 | 
 3 | all: data.pretty.geojson data.transformed.geojson data.ttl
 4 | 
 5 | data.geojson:
 6 | 	echo "Original data downloaded from https://opendatalab.de/projects/geojson-utilities/"
 7 | 	echo "    * Checked all checkboxes in Auswahlhilfe,"
 8 | 	echo "    * then Optionen > \"Welche Flächen: Bundesland Flächen\","
 9 | 	echo "    * then close Dialog, and click \"Export .geojson\" -> data.geojson"
10 | 
11 | data.pretty.geojson: data.geojson
12 | 	jq . $< > $@
13 | 
14 | data.transformed.geojson: transform.py data.pretty.geojson
15 | 	python transform.py data.geojson $@
16 | 
17 | data.ttl: data.transformed.geojson ../../../py_cube/cube/shared_dimensions.py
18 | 	cd ../../.. && \
19 | 	python cli.py shared convert_geojson example/shared/bundeslander/data.transformed.geojson example/shared/bundeslander/data.ttl  
20 | 


--------------------------------------------------------------------------------
/docs/concepts.md:
--------------------------------------------------------------------------------
 1 | # Concepts
 2 | 
 3 | The term `concept` refers to a very specific data structure within the [cube.link](https://cube.link) universe.
 4 | 
 5 | ## Multi-Lingual Concepts
 6 | 
 7 | `pylindas` has a basic implementation to handle:
 8 | 
 9 | - concept tables
10 | - multilingual concepts
11 | 
12 | A concept table is the possibility to handle the values of a dimension as a URI to a new resource (a concept). This is similar to an object that is the URI of a Shared Dimension's term, but here the concepts are created for the cube and uploaded with the cube.  
13 | 
14 | Remark: if the resource/concept already exists, then the case is similar to handling of a Shared Dimensions mapping, and this is already handled by `pylindas` with the "mapping" mechanism.
15 | 
16 | See the folder `example/Cubes/concept_table_airport` and its [README](../example/Cubes/concept_table_airport/README.md) for detailed explanations.
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # pylindas
 2 | 
 3 | ## About
 4 | 
 5 | `pylindas` is a python package for building and publishing linked data cubes according to the [cube.link](https://cube.link) schema. This schema is used to transform tabular data into [RDF](https://www.w3.org/RDF/). `pylindas` is an alternative to the [Cube-Creator](https://cube-creator.lindas.admin.ch). Whereas the Cube-Creator is a [GUI](https://en.wikipedia.org/wiki/Graphical_user_interface) tool, `pylindas` is more suited to fit into a pipeline workflow to create RDF. Currently this project is heavily linked to [LINDAS](https://lindas.admin.ch) the Swiss Federal Linked Data Service.
 6 | 
 7 | ## Documentation
 8 | 
 9 | - [Installation](docs/installation.md)
10 | - [Contributing](docs/contributing.md)
11 | - [Basic functionality](docs/functionality.md)
12 | - [Command line usage](docs/cli.md)
13 | - [Examples](docs/examples.md)
14 | - [URIs](docs/uris.md)
15 | - [Concepts](docs/concepts.md)
16 | - [Shared Dimensions](docs/sd.md)
17 | 


--------------------------------------------------------------------------------
/example/Cubes/shared/bundeslander/schema.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "http://json-schema.org/draft-07/schema#",
 3 |   "title": "Extended GeoJSON Schema",
 4 |   "type": "object",
 5 |   "allOf": [
 6 |     {
 7 |       "$ref": "http://json.schemastore.org/geojson"
 8 |     }
 9 |   ],
10 |   "properties": {
11 |     "features": {
12 |       "type": "array",
13 |       "items": {
14 |         "type": "object",
15 |         "properties": {
16 |           "geometry": {
17 |             "type": "object",
18 |             "properties": {
19 |               "type": {
20 |                 "type": "string",
21 |                 "enum": ["Point", "MultiPolygon", "Polygon"]
22 |               }
23 |             },
24 |             "required": ["type", "coordinates"]
25 |           },
26 |           "properties": {
27 |             "type": "object",
28 |             "required": ["name_de"],
29 |             "properties": {
30 |               "name_de": {
31 |                 "type": "string"
32 |               }
33 |             }
34 |           }
35 |         }
36 |       }
37 |     }
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/docs/roadmap.md:
--------------------------------------------------------------------------------
 1 | # Roadmap
 2 | 
 3 | Feel free to add yourselves :) 
 4 | The quarters are ment as rough guidelines, not something fixed. It's recommended, that each feature gets a seperate issue (please link them).
 5 | 
 6 | Quarter | Features | Lead | Working on it
 7 | ----- | ----- | ---- | ----
 8 | Q4-24 | Renaming, migration to BAR, CI, publication on pypi | Marco | Thomas, Marco
 9 | Q1-25 | Validation (pycube) | Marco | Marco
10 | Q1-25 | Disentanglement of linpy and pycube | Claudio | Marco
11 | Q1-25 | Downloads of cubes from lindas (see here https://github.com/zazuko/cube-creator/wiki/Manually-removing-published-cube) | Marco | Marco
12 | Q1-25 | Benchmarking tripleization + validation of pycube, comparing with tarql | Marco | Marco
13 | Q1-25 (?) | Shared Dimensions in python | | Lian
14 | Q1-25 | Concepts with geo location (point), especially multi-lang | Marco | Marco
15 | Q1-25 | Smart Shared Dimensions (some API to look up existing ones, reuse them, etc etc) | Claudio | Fabian (?)
16 | Q2-25 | hierarchies | |
17 | Q2-25 | yaml downloads | Marco | Marco 
18 | Q3-25 | fastAPI | |
19 | Q3-25 | smart upload (diff) | |
20 | 
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Kronmar-Bafu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/example/Cubes/Biotope_Statistik/biotope.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import pandas as pd
 4 | import yaml
 5 | import os
 6 | 
 7 | from pylindas.pycube import Cube
 8 | from pylindas.lindas.upload import upload_ttl
 9 | from pylindas.lindas.query import cube_exists
10 | 
11 | BASEDIR = os.path.dirname(__file__)
12 | DATAFILE = os.path.join(BASEDIR, "data.csv")
13 | CONFIGFILE = os.path.join(BASEDIR, "description.yml")
14 | CUBEFILE = os.path.join(BASEDIR, "cube.ttl")
15 | 
16 | data = pd.read_csv(DATAFILE, encoding="utf-8", sep=",")
17 | with open(CONFIGFILE, encoding="utf-8") as file:
18 |     config = yaml.safe_load(file)
19 | 
20 | cube = Cube(dataframe=data, cube_yaml=config, environment="TEST", local=True)
21 | cube.prepare_data()
22 | cube.write_cube()
23 | cube.write_observations()
24 | cube.write_shape()
25 | valid, text = cube.validate()
26 | if valid:
27 |     print(text)
28 |     cube.serialize(CUBEFILE)
29 |     if os.path.isfile("lindas.ini"):
30 |         upload_ttl(filename=CUBEFILE, db_file="lindas.ini", environment="TEST", graph_uri="")
31 | else:
32 |     print(text)
33 |     #check for exception in .validate
34 |     raise ValueError("Cube not Valid")


--------------------------------------------------------------------------------
/pylindas/example.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import requests
 3 | import os
 4 | 
 5 | examples_dir = "example"
 6 | 
 7 | 
 8 | def load_example(example_id, base_uri="http://localhost:3030/dataset"):
 9 |     file_path = os.path.join(examples_dir, example_id, 'cube.ttl')
10 |     with open(file_path, 'rb') as f:
11 |         response = requests.post(base_uri, headers={"Content-Type": "text/turtle"}, data=f)
12 |         response.raise_for_status()
13 | 
14 | 
15 | def list_examples(language="en"):
16 |     result = []
17 |     for root, dirs, files in os.walk(examples_dir):
18 |         if "cube.ttl" in files and "description.json" in files:
19 |             description_path = os.path.join(root, "description.json")
20 |             with open(description_path, 'r') as desc_file:
21 |                 desc = json.load(desc_file)
22 |                 name = desc.get("Name", {}).get(language, "")
23 |                 description = desc.get("Description", {}).get(language, "")
24 |             result.append({
25 |                 "id": os.path.relpath(root, examples_dir),
26 |                 "name": name,
27 |                 "description": description
28 |             })
29 |     return result
30 | 


--------------------------------------------------------------------------------
/docs/sd.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Shared Dimensions
 3 | 
 4 | The term `Shared Dimension` refers to a very specific data structure within the [cube.link](https://cube.link) universe.
 5 | 
 6 | ## Shared Dimensions Queries
 7 | 
 8 | To link a dimension to an existing Shared Dimension, the following steps are necessary:
 9 | 
10 | - find a suitable Shared Dimension
11 | - use the URIs of the terms of that Shared Dimension to configure dimension in the yaml file and its "mapping" field
12 | 
13 | `pylindas` has a basic implementation of:
14 | 
15 | - basic queries to request shared dimensions information from [LINDAS](https://lindas.admin.ch) (including terms and their URIs)
16 | - display the results, line by line
17 | 
18 | See the folder `pylindas/shared_dimension_queries` and its [README](../pylindas/shared_dimension_queries/README.md) for detailed explanation
19 | 
20 | ## Generation of Shared Dimensions
21 | 
22 | `pylindas` has a basic implementation to generate a Shared Dimension by transforming a .csv file to a corresponding RDF.  
23 | 
24 | See the folder `pylindas/pyshareddimension` and its [README](../pylindas/pyshareddimension/README.md) for detailed explanations.
25 | 


--------------------------------------------------------------------------------
/example/Cubes/greenhouse_limit/data.csv:
--------------------------------------------------------------------------------
 1 | Jahr,THG-Emissionen ohne die Treibhausgasbilanz der Landnutzung,THG-Emissionen mit der Treibhausgasbilanz der Landnutzung
 2 | 1990,55.24389386,52.5774385
 3 | 1991,57.07963814,50.24361298
 4 | 1992,56.81419162,52.11014189
 5 | 1993,54.23476738,48.90182898
 6 | 1994,53.14582365,51.22014611
 7 | 1995,54.03628518,49.86596536
 8 | 1996,54.61752292,47.14090851
 9 | 1997,53.41576335,49.23482595
10 | 1998,54.94403332,51.34175802
11 | 1999,54.72593647,53.33937066
12 | 2000,54.06542928,57.99289598
13 | 2001,55.57776168,55.42619108
14 | 2002,54.00443567,51.31726012
15 | 2003,55.03748992,53.00206377
16 | 2004,55.62473696,50.80056551
17 | 2005,56.30254624,53.90548843
18 | 2006,55.92278244,55.35708306
19 | 2007,54.01779198,52.45681872
20 | 2008,55.38803169,53.52469138
21 | 2009,53.90798922,51.64351554
22 | 2010,55.5070925,53.29617343
23 | 2011,51.35603093,47.99791545
24 | 2012,52.73698154,52.3108047
25 | 2013,53.56107825,53.32817871
26 | 2014,49.61468214,45.61805031
27 | 2015,49.10202318,50.27561402
28 | 2016,49.4573766,46.25278383
29 | 2017,48.56228908,46.7405917
30 | 2018,47.01032821,48.1301930
31 | 2019,46.7668571,43.62184325
32 | 2020,44.13997184,42.91719233
33 | 2021,45.45529541,45.06430734
34 | 2022,41.89470681,40.46522701
35 | 2023,40.84691968,42.14611779


--------------------------------------------------------------------------------
/pylindas/lindas/query.py:
--------------------------------------------------------------------------------
 1 | from SPARQLWrapper import SPARQLWrapper, JSON
 2 | 
 3 | def query_lindas(query: str, environment: str):
 4 |     match environment:
 5 |         case "TEST":
 6 |             sparql = SPARQLWrapper("https://test.lindas.admin.ch/query")
 7 |         case "INT":
 8 |             sparql = SPARQLWrapper("https://int.lindas.admin.ch/query")
 9 |         case "PROD":
10 |             sparql = SPARQLWrapper("https://lindas.admin.ch/query")
11 |     sparql.setQuery(query=query)
12 |     sparql.setReturnFormat(JSON)
13 |     results = sparql.query().convert()
14 |     return results["boolean"]
15 | 
16 | 
17 | def cube_exists(cube_uri: str, environment: str):
18 |     """
19 |     This function checks whether a cube already exists in the provided environment using the Lindas query endpoint.
20 |     If the cube already exists and the local flag is not set, the function will exit with an appropriate error message.
21 |     Otherwise, the function will return the constructed cube URI as a URIRef object.
22 | 
23 |     Args:
24 |         local (bool): A flag indicating whether the cube is local.
25 |         environment (str): The environment of the cube.
26 |     """
27 |     query = f"ASK {{ <{cube_uri}> ?p ?o}}"
28 |     return query_lindas(query, environment=environment)
29 |         


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "pylindas"
 7 | version = "0.6.5"
 8 | description = "Utilities for working with the linked data service LINDAS of the Swiss Federal Administration. Includes modules for working with cubes."
 9 | readme = "README.md"
10 | authors = [{ name = "Marco Kronenberg", email="marco.kronenberg@bafu.admin.ch" }]
11 | license = { file = "LICENSE" }
12 | classifiers = [
13 |     "Programming Language :: Python :: 3",
14 |     "License :: OSI Approved :: MIT License",
15 | ]
16 | keywords = ["linked data", "LINDAS", "cubes", "RDF"]
17 | requires-python = ">=3.10.11"
18 | dynamic = ["dependencies"]
19 | [tool.setuptools.dynamic]
20 | dependencies = {file = ["requirements.txt"]}
21 | 
22 | [project.urls]
23 | Homepage = "https://github.com/Kronmar-Bafu/py-cube"
24 | 
25 | [project.scripts]
26 | pycube = "pylindas.cli:main"
27 | 
28 | [tool.bumpver]
29 | current_version = "0.6.5"
30 | version_pattern = "MAJOR.MINOR.PATCH"
31 | commit_message = "bump version {old_version} -> {new_version}"
32 | pre_commit_hook = ""
33 | post_commit_hook = ""
34 | 
35 | [tool.bumpver.file_patterns]
36 | "pyproject.toml" = ['current_version = "{version}"', 'version = "{version}"']
37 | "pylindas/__init__.py" = ["{version}"]
38 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
 1 | name: 🧊 CI tests
 2 | 
 3 | concurrency:
 4 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
 5 |   cancel-in-progress: true
 6 | 
 7 | on:
 8 |   push:
 9 |   pull_request:
10 |   schedule:
11 |     - cron: '0 0 * * 1'
12 | 
13 | permissions:
14 |   contents: read
15 | 
16 | jobs:
17 |   test:
18 |     name: 🧊 CI test
19 |     strategy:
20 |       fail-fast: false
21 |       matrix:
22 |         os: [ 'ubuntu-24.04', 'macos-15', 'windows-2022' ]
23 |         python-version: ['3.10', '3.11.2', '3.12', '3.13']
24 |     runs-on: ${{ matrix.os }}
25 | 
26 |     steps:
27 |     - uses: actions/checkout@v4
28 |     - name: Set up Python ${{ matrix.python-version }}
29 |       uses: actions/setup-python@v5
30 |       with:
31 |         python-version: ${{ matrix.python-version }}
32 |     - name: install requirements
33 |       run: |
34 |         python -m pip install --upgrade pip
35 |         pip install -r requirements.txt
36 |         pip install pytest
37 |     - name: test
38 |       env:
39 |         PYTHONWARNINGS: default
40 |         PYTHONUTF8: 1
41 |       run: |
42 |         pytest -s -vv tests example/Cubes/Biotope_Statistik/biotope.py example/Cubes/concept_table_airport/airport.py example/Cubes/mock/mock.py example/Shared_Dimensions/shared_dimension_generation/sd_example.py


--------------------------------------------------------------------------------
/example/Cubes/shared/bundeslander/transform.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import argparse
 3 | from urllib.parse import quote
 4 | 
 5 | def transform_geojson(input_file, output_file):
 6 |     with open(input_file, 'r') as f:
 7 |         data = json.load(f)
 8 |     
 9 |     transformed_features = []
10 |     for feature in data['features']:
11 |         name = feature['properties']['GEN']
12 |         iri = f"https://example.org/land/{quote(feature['properties']['GEN'])}"
13 |         transformed_feature = {
14 |             'type': feature['type'],
15 |             'geometry': feature['geometry'],
16 |             'properties': {
17 |                 'iri': iri,
18 |                 'name_de': name
19 |             }
20 |         }
21 |         transformed_features.append(transformed_feature)
22 |     
23 |     transformed_data = {
24 |         'type': data['type'],
25 |         '$schema': './schema.json',
26 |         'features': transformed_features
27 |     }
28 |     
29 |     with open(output_file, 'w') as f:
30 |         json.dump(transformed_data, f, indent=2)
31 | 
32 | def main():
33 |     parser = argparse.ArgumentParser(description='Transform a GeoJSON file.')
34 |     parser.add_argument('input_file', type=str, help='The input GeoJSON file')
35 |     parser.add_argument('output_file', type=str, help='The output GeoJSON file')
36 |     args = parser.parse_args()
37 |     
38 |     transform_geojson(args.input_file, args.output_file)
39 | 
40 | if __name__ == '__main__':
41 |     main()


--------------------------------------------------------------------------------
/pylindas/shared_dimension_queries/example_sd.py:
--------------------------------------------------------------------------------
 1 | from pylindas.shared_dimension_queries.shared_dimensions_queries import list_shared_dimensions, list_shared_dimension_terms, list_shared_dimensions_print, print_sparql_result
 2 | from rdflib import URIRef
 3 | 
 4 | """
 5 | Author: Fabian Cretton - HEVS
 6 | 
 7 | See README for an explanation
 8 | """
 9 | 
10 | def main():
11 |     print("Shared dimensions query examples")
12 |     print("================================")
13 | 
14 |     print("List all Shared Dimensions:")
15 |     print("---------------------------")
16 |     result = list_shared_dimensions("INT")
17 |     list_shared_dimensions_print(result)
18 | 
19 |     # print("List 10 Shared Dimensions:")
20 |     # print("--------------------------")
21 |     # result = list_shared_dimensions("INT", "fr", 0, 10)
22 |     # list_shared_dimensions_print(result)
23 | 
24 |     print("\nList Shared Dimensions that contains \"Canton\" in the english name")
25 |     print("---------------------------------------------------------------")
26 |     result = list_shared_dimensions("INT", "en", 0, 0, "Canton")
27 |     list_shared_dimensions_print(result, "INT")
28 | 
29 |     print("\nList the Cantons shared dimension's terms")
30 |     print("-----------------------------------------")
31 |     result = list_shared_dimension_terms("INT", "https://ld.admin.ch/dimension/canton", "fr")
32 |     print_sparql_result(result, ["name", "sdTerm"])
33 | 
34 | if __name__ == "__main__":
35 |     main()
36 | 


--------------------------------------------------------------------------------
/example/Shared_Dimensions/shared_dimension_generation/sd_example.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import pandas as pd
 4 | import yaml
 5 | import os
 6 | 
 7 | from pylindas.pyshareddimension import SharedDimension
 8 | from pylindas.lindas.upload import upload_ttl
 9 | from pylindas.lindas.query import cube_exists
10 | 
11 | BASEDIR = os.path.dirname(__file__)
12 | DIMENSIONFILE = os.path.join(BASEDIR, "sd_terms.csv")
13 | CONFIGFILE = os.path.join(BASEDIR, "sd_description.yml")
14 | SDFILE = os.path.join(BASEDIR, "sd_example.ttl")
15 | SHACLFILE = os.path.join(BASEDIR, "sd_example_Shacl_result.ttl")
16 | SHAREDDIMENSIONSHAPE = "https://raw.githubusercontent.com/Kronmar-Bafu/lindas-pylindas/refs/heads/main/pylindas/pyshareddimension/shared_dimension_shape.ttl"
17 | 
18 | terms_df = pd.read_csv(DIMENSIONFILE, encoding="utf8", sep=";")
19 | 
20 | with open(CONFIGFILE) as file:
21 |     sd_yaml = yaml.safe_load(file)
22 | 
23 | sd = SharedDimension(dataframe=terms_df, sd_yaml=sd_yaml, environment="TEST", local=True)
24 | sd.prepare_data()
25 | sd.write_sd()
26 | sd.write_terms()
27 | sd.serialize(SDFILE)
28 | print(sd)
29 | 
30 | # About the SHACL validation, please see the comment of the SharedDimension.validate() method
31 | #   in order to understand the parameters
32 | # This is work in progress as the SHACL file has to be passed as parameter instead of being downloaded from the Web behind the scene
33 | resultBool, resultTxt = sd.validate(SHAREDDIMENSIONSHAPE, SHACLFILE)
34 | print(f"Shared dimension validation result: {resultBool}, with message '{resultTxt}'")
35 | 


--------------------------------------------------------------------------------
/pylindas/lindas/namespaces.py:
--------------------------------------------------------------------------------
 1 | from rdflib import Graph, Namespace
 2 | 
 3 | 
 4 | CUBE = Namespace("https://cube.link/")
 5 | DCAT = Namespace("http://www.w3.org/ns/dcat#")
 6 | DCT = Namespace("http://purl.org/dc/terms/")
 7 | FOAF = Namespace("http://xmlns.com/foaf/0.1/")
 8 | LDADMIN = Namespace("https.//ld.admin.ch/application/")
 9 | META = Namespace("https://cube.link/meta/")
10 | QUDT = Namespace("http://qudt.org/schema/qudt/")
11 | RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
12 | RELATION = Namespace("https://cube.link/relation/")
13 | SCHEMA = Namespace("http://schema.org/")
14 | SH = Namespace("http://www.w3.org/ns/shacl#")
15 | TIME = Namespace("http://www.w3.org/2006/time#")
16 | UNIT = Namespace("http://qudt.org/vocab/unit/")
17 | VCARD = Namespace("http://www.w3.org/2006/vcard/ns#")
18 | VOID = Namespace("http://rdfs.org/ns/void#")
19 | GEO = Namespace("http://www.opengis.net/ont/geosparql#")
20 | SKOS = Namespace("http://www.w3.org/2004/02/skos/core#")
21 | SD_MD = Namespace("https://cube-creator.zazuko.com/shared-dimensions/vocab#")
22 | XSD = Namespace("http://www.w3.org/2001/XMLSchema#")
23 | 
24 | Namespaces = {
25 |     "cube": CUBE,
26 |     "dcat": DCAT,
27 |     "dct": DCT,
28 |     "schema": SCHEMA,
29 |     "sh": SH,
30 |     "foaf": FOAF,
31 |     "ldadmin": LDADMIN,
32 |     "meta": META,
33 |     "qudt": QUDT,
34 |     "rdf": RDF,
35 |     "relation": RELATION,
36 |     "time": TIME,
37 |     "unit": UNIT,
38 |     "vcard": VCARD,
39 |     "void": VOID,
40 |     "geo": GEO,
41 |     "skos": SKOS,
42 |     "sd_md": SD_MD,
43 |     "xsd": XSD,
44 | }
45 | 


--------------------------------------------------------------------------------
/example/Cubes/mock/mock.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import pandas as pd
 4 | import yaml
 5 | import os
 6 | 
 7 | from pylindas.pycube import Cube
 8 | from pylindas.lindas.upload import upload_ttl
 9 | from pylindas.lindas.query import cube_exists
10 | 
11 | BASEDIR = os.path.dirname(__file__)
12 | DATAFILE = os.path.join(BASEDIR, "data.csv")
13 | CONFIGFILE = os.path.join(BASEDIR, "description.yml")
14 | CUBEFILE = os.path.join(BASEDIR, "mock-cube.ttl")
15 | 
16 | mock_df = pd.read_csv(DATAFILE)
17 | 
18 | with open(CONFIGFILE) as file:
19 |     config = yaml.safe_load(file)
20 | 
21 | cube = Cube(dataframe=mock_df, cube_yaml=config, environment="TEST", local=True)
22 | cube.prepare_data()
23 | cube.write_cube()
24 | cube.write_observations()
25 | cube.write_shape()
26 | cube.serialize("example/Cubes/mock/cube.ttl")
27 | print(cube)
28 | 
29 | if not cube_exists(cube_uri=cube.get_iri(), environment="TEST"):
30 |     if os.path.isfile("lindas.ini"):   
31 |          upload_ttl(filename=CUBEFILE, db_file="lindas.ini", environment="TEST", graph_uri="")
32 | 
33 | modk_df_two_sided = pd.read_csv("tests/test_data.csv")
34 | with open("tests/test.yml") as file:
35 |     two_sided_yaml = yaml.safe_load(file)
36 | cube_two_sided = Cube(dataframe=modk_df_two_sided, cube_yaml=two_sided_yaml, environment="TEST", local=True)
37 | cube_two_sided.prepare_data()
38 | cube_two_sided.write_cube()
39 | cube_two_sided.write_observations()
40 | cube_two_sided.write_shape()
41 | 
42 | cube_two_sided.serialize("./example/Cubes/mock-cube-two-sided.ttl")
43 | if os.path.isfile("lindas.ini"):
44 |     upload_ttl(filename="mock/mock-cube-two-sided.ttl", db_file="lindas.ini", environment="TEST", graph_uri="")
45 | 


--------------------------------------------------------------------------------
/pylindas/lindas/upload.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import stardog
 3 | from configparser import ConfigParser
 4 | from typing import Union
 5 | 
 6 | 
 7 | 
 8 | #URL = "https://stardog-test.cluster.ldbar.ch/lindas?graph=..."
 9 | #HEADERS = {'Content-Type': 'text/turtle', 'Authorization': }
10 | 
11 | # def uplod_ttl(filename: str, named_graph: str, password: str):
12 | #     with open(filename) as file:
13 | #         graph = file.read()
14 | #         response = requests.request("POST", )
15 | 
16 | 
17 | def _load_config(db_file:str, environment: str) -> dict:
18 |     parser = ConfigParser()
19 |     parser.read(db_file)
20 |     
21 |     config = {}
22 |     if parser.has_section(environment):
23 |         params = parser.items(environment)
24 |         for param in params:
25 |             config[param[0]] = param[1]
26 |     else:
27 |         raise Exception(f"Environment '{environment}' not found in db_file")
28 | 
29 |     return config
30 | 
31 | 
32 | def upload_ttl(filename: Union[str,list], db_file: str, environment: str, graph_uri: str, clear_graph: bool = False):
33 |     conn_details = _load_config(db_file, environment)
34 | 
35 |     # todo: could graph_uri be specified in lindas.ini?
36 |     with stardog.Connection("lindas", **conn_details) as conn:
37 |         conn.begin()
38 |         if clear_graph and graph_uri:
39 |             #if graph_URI is Null or not given as an arugment, conn.clear clears the whole database, we are not risking that.
40 |             conn.clear(graph_uri=graph_uri)
41 | 
42 |         def _add_file(file: str, graph_uri: str):
43 |             print(f"uploading: {file}")
44 |             conn.add(stardog.content.File(file=file), graph_uri=graph_uri)
45 | 
46 |         if isinstance(filename, str):
47 |             _add_file(file=filename, graph_uri=graph_uri)
48 |         else:
49 |             for f in filename:
50 |                 _add_file(file=f, graph_uri=graph_uri)
51 |         conn.commit()


--------------------------------------------------------------------------------
/docs/cli.md:
--------------------------------------------------------------------------------
 1 | # Command line
 2 | 
 3 | There is also a `pylindas` command line utility, that expects an opinionated way to store
 4 | the data and the description in a directory. It then is able to perform common operations.
 5 | 
 6 | ## Necessary Directory Layout
 7 | 
 8 | The directory must be structured as follows:
 9 | 
10 | - `data.csv`: This file contains the observations.
11 | - `description.json` or `description.yml`: This file contains the cube and dimension descriptions.
12 | 
13 | ## Command Line Usage
14 | 
15 | For example, to serialize the data, use:
16 | 
17 | ```
18 | python cli.py serialize <input_directory> <output_ttl_file>
19 | ```
20 | 
21 | For additional help and options, you can use:
22 | 
23 | ```
24 | python cli.py --help
25 | ```
26 | 
27 | ## Fetching from data sources
28 | 
29 | There is the possibility to download datasets from other data sources. Right now, the functionality is basic, but
30 | it could be possible in the future to extend it.
31 | 
32 | - It supports only datasets coming from data.europa.eu
33 | - It supports only datasets with a Frictionless datapackage
34 | 
35 | See [Frictionless](https://frictionlessdata.io/introduction/#why-frictionless) for more information on Frictionless.
36 | 
37 | ```
38 | python fetch.py 'https://data.europa.eu/data/datasets/fc49eebf-3750-4c9c-a29e-6696eb644362?locale=en' example/corona/
39 | ```
40 | 
41 | ## CLI Examples
42 | 
43 | Multiple cube examples are ready in the [example](../example) directory.
44 | 
45 | ```bash
46 | $ python cli.py example list
47 | corona: Corona Numbers Timeline
48 | kita: Number of kids in day care facilities
49 | wind: Wind turbines — operated WKA per year in Schleswig-Holstein
50 | ```
51 | 
52 | To load an example in a Fuseki database, you can use the load subcommand of the example command.
53 | 
54 | ```bash
55 | $ python cli.py example load kita
56 | ```
57 | 
58 | There is a `start-fuseki` command that can be used to start a Fuseki server containing data
59 | from the examples.
60 | 
61 | ```bash
62 | $ python cli.py example start-fuseki
63 | ```
64 | 


--------------------------------------------------------------------------------
/docs/functionality.md:
--------------------------------------------------------------------------------
 1 | # Basic Functionality and Structure
 2 | 
 3 | The `pylindas` package consists of multiple sub modules:
 4 | 
 5 | ## `pycube`
 6 | 
 7 | To avoid the feeling of a black box, the philosophy of `pycube` is to make the construction of cubes modular. The process will take place in multiple steps, outlined below:
 8 | 
 9 | 1. **Initialization**
10 | 
11 | ```python
12 | from pylindas.pycube import Cube
13 | 
14 | cube = pycube.Cube(dataframe: pd.Dataframe, cube_yaml: dict)
15 | ```
16 | 
17 | This step initializes the cube with the data (`dataframe`) and the configuration (`cube_yaml`).
18 | 
19 | 2. **Mapping**
20 | 
21 | ```python
22 | cube.prepare_data()
23 | ```
24 | 
25 | Creates the observation URIs and applies the mappings as described in the `cube_yaml`.
26 | 
27 | 3. **Write `cube:Cube`**
28 | 
29 | ```python
30 | cube.write_cube()
31 | ```
32 | 
33 | Writes the `cube:Cube`.
34 | 
35 | 4. **Write `cube:Observation`**
36 | 
37 | ```python
38 | cube.write_observations()
39 | ```
40 | 
41 | Writes the `cube:Observation`s and the `cube:ObservationSet`.
42 | 
43 | 5. **Write `cube:ObersvationConstraint`**
44 | 
45 | ```python
46 | cube.write_shape()
47 | ```
48 | 
49 | Writes the `cube:ObservationConstraint`.
50 | 
51 | ## The Complete Work-Flow
52 | 
53 | ```python
54 | # Write the cube
55 | cube = pycube.Cube(dataframe: pd.DataFrame, cube_yaml: dict, shape_yaml: dict)
56 | cube.prepare_data()
57 | cube.write_cube()
58 | cube.write_observations()
59 | cube.write_shape()
60 | 
61 | # Upload the cube
62 | cube.upload(endpoint: str, named_graph: str)
63 | ```
64 | 
65 | For an upload, use `cube.upload(endpoint: str, named_graph: str)` with the proper `endpoint` as well as `named_graph`.
66 | 
67 | A `lindas.ini` file is read for this step, containing these information as well as a password. It contains the structure:
68 | 
69 | ```
70 | [TEST]
71 | endpoint=https://stardog-test.cluster.ldbar.ch
72 | username=a-lindas-user-name
73 | password=something-you-don't-need-to-see;)
74 | ```
75 | 
76 | With additional information for the other environments.


--------------------------------------------------------------------------------
/scripts/fuseki/config-mem.ttl:
--------------------------------------------------------------------------------
 1 | ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 2 | 
 3 | PREFIX :        <#>
 4 | PREFIX fuseki:  <http://jena.apache.org/fuseki#>
 5 | PREFIX rdf:     <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
 6 | PREFIX rdfs:    <http://www.w3.org/2000/01/rdf-schema#>
 7 | PREFIX ja:      <http://jena.hpl.hp.com/2005/11/Assembler#>
 8 | 
 9 | [] rdf:type fuseki:Server ;
10 |    fuseki:services (
11 |      :service
12 |    ) .
13 | 
14 | ## Service description for "/dataset" with all endpoints.
15 | ## e.g.
16 | ##   GET /dataset/query?query=...
17 | ##   GET /dataset/get?default (SPARQL Graph Store Protocol)
18 | 
19 | :service rdf:type fuseki:Service ;
20 |     fuseki:name "dataset" ;
21 | 
22 |     ## The  GET /dataset?query= variants
23 |     fuseki:endpoint [ fuseki:operation fuseki:query ; ] ;
24 |     ## gsp-rw covers gsp-r and upload.
25 |     fuseki:endpoint [ fuseki:operation fuseki:update ; ] ;
26 |     fuseki:endpoint [ fuseki:operation fuseki:gsp-rw ; ] ;
27 |     ## RDF Patch
28 |     # fuseki:endpoint [ fuseki:operation fuseki:patch ; ] ;
29 | 
30 |     fuseki:endpoint [ 
31 |         fuseki:operation fuseki:query ;
32 |         fuseki:name "sparql" 
33 |     ];
34 |     fuseki:endpoint [
35 |         fuseki:operation fuseki:query ;
36 |         fuseki:name "query" 
37 |     ] ;
38 |     fuseki:endpoint [
39 |         fuseki:operation fuseki:update ;
40 |         fuseki:name "update"
41 |     ] ;
42 |     fuseki:endpoint [
43 |         fuseki:operation fuseki:gsp-r ;
44 |         fuseki:name "get"
45 |     ] ;
46 |     fuseki:endpoint [ 
47 |         fuseki:operation fuseki:gsp-rw ; 
48 |         fuseki:name "data"
49 |     ] ; 
50 |     # fuseki:endpoint [
51 |     #     ## RDF Patch
52 |     #     fuseki:operation fuseki:patch ;
53 |     #     fuseki:name "patch"
54 |     # ] ;
55 |     fuseki:dataset :dataset ;
56 |     .
57 | 
58 | # Transactional in-memory dataset.
59 | :dataset rdf:type ja:MemoryDataset ;
60 |     ## Optional load with data on start-up
61 |     ja:data "/usr/share/data/example/kita/cube.ttl";
62 |     ja:data "/usr/share/data/example/wind/cube.ttl";
63 |     ja:data "/usr/share/data/example/shared/bundeslander/data.ttl";
64 |     ## ja:data "data2.trig";
65 |     .
66 | 


--------------------------------------------------------------------------------
/example/Cubes/Population_Aargau/age.csv:
--------------------------------------------------------------------------------
 1 | ageID,ageName_en,ageDescription_en,ageName_de,ageDescription_de
 2 | age_00_04,Age 00 to 04,People with age 00 to 04 years,Alter 00 bis 04,Personen mit Alter 00 bis 04 Jahre
 3 | age_05_09,Age 05 to 09,People with age 05 to 09 years,Alter 05 bis 09,Personen mit Alter 05 bis 09 Jahre
 4 | age_10_14,Age 10 to 14,People with age 10 to 14 years,Alter 10 bis 14,Personen mit Alter 10 bis 14 Jahre
 5 | age_15_19,Age 15 to 19,People with age 15 to 19 years,Alter 15 bis 19,Personen mit Alter 15 bis 19 Jahre
 6 | age_20_24,Age 20 to 24,People with age 20 to 24 years,Alter 20 bis 24,Personen mit Alter 20 bis 24 Jahre
 7 | age_25_29,Age 25 to 29,People with age 25 to 29 years,Alter 25 bis 29,Personen mit Alter 25 bis 29 Jahre
 8 | age_30_34,Age 30 to 34,People with age 30 to 34 years,Alter 30 bis 34,Personen mit Alter 30 bis 34 Jahre
 9 | age_35_39,Age 35 to 39,People with age 35 to 39 years,Alter 35 bis 39,Personen mit Alter 35 bis 39 Jahre
10 | age_40_44,Age 40 to 44,People with age 40 to 44 years,Alter 40 bis 44,Personen mit Alter 40 bis 44 Jahre
11 | age_45_49,Age 45 to 49,People with age 45 to 49 years,Alter 45 bis 49,Personen mit Alter 45 bis 49 Jahre
12 | age_50_54,Age 50 to 54,People with age 50 to 54 years,Alter 50 bis 54,Personen mit Alter 50 bis 54 Jahre
13 | age_55_59,Age 55 to 59,People with age 55 to 59 years,Alter 55 bis 59,Personen mit Alter 55 bis 59 Jahre
14 | age_60_64,Age 60 to 64,People with age 60 to 64 years,Alter 60 bis 64,Personen mit Alter 60 bis 64 Jahre
15 | age_65_69,Age 65 to 69,People with age 65 to 69 years,Alter 65 bis 69,Personen mit Alter 65 bis 69 Jahre
16 | age_70_74,Age 70 to 74,People with age 70 to 74 years,Alter 70 bis 74,Personen mit Alter 70 bis 74 Jahre
17 | age_75_79,Age 75 to 79,People with age 75 to 79 years,Alter 75 bis 79,Personen mit Alter 75 bis 79 Jahre
18 | age_80_84,Age 80 to 84,People with age 80 to 84 years,Alter 80 bis 84,Personen mit Alter 80 bis 84 Jahre
19 | age_85_89,Age 85 to 89,People with age 85 to 89 years,Alter 85 bis 89,Personen mit Alter 85 bis 89 Jahre
20 | age_90_,Age 90 and older,People with age 90 years and older,Alter 90 und älter,Personen mit Alter 90 Jahre und älter
21 | all,Total population,Total population of all ages,Gesamtbevölkerung,Gesamtbevölkerung aller Altersgruppen
22 | 


--------------------------------------------------------------------------------
/.github/workflows/publish-pypi.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPI
 2 | 
 3 | on:
 4 |     push:
 5 |         branches:
 6 |             - main
 7 | 
 8 | jobs:
 9 |     bump-version-and-publish:
10 |         name: Bump Version and Publish to PyPI
11 |         runs-on: ubuntu-latest
12 | 
13 |         permissions:
14 |             contents: write
15 | 
16 |         # don't run on this job on forks
17 |         if: ${{ github.repository_owner == 'Kronmar-Bafu' }}
18 | 
19 |         steps:
20 |             - name: Checkout code
21 |               uses: actions/checkout@v4
22 | 
23 |             - name: Set up python
24 |               uses: actions/setup-python@v5
25 |               with:
26 |                 python-version: '3.13'
27 |             
28 |             - name: Install dependencies
29 |               run: |
30 |                 python -m pip install --upgrade pip
31 |                 pip install bumpver build twine
32 | 
33 |             - name: Determine version bump
34 |               id: version_bump
35 |               run: |
36 |                 VERSION_BUMP="patch"
37 |                 if git log -1 --pretty=%B | grep -iq "#major"; then 
38 |                   VERSION_BUMP="major"
39 |                 elif git log -1 --pretty=%B | grep -iq "#minor"; then
40 |                   VERSION_BUMP="minor"
41 |                 fi
42 |                 echo "Version bump type: $VERSION_BUMP"
43 |                 echo "bump_type=$VERSION_BUMP" >> $GITHUB_ENV
44 |             
45 |             - name: Bump version
46 |               run: |
47 |                 bumpver update --${{ env.bump_type}}
48 | 
49 |             - name: Commit version bump
50 |               uses: stefanzweifel/git-auto-commit-action@v5
51 |               with:
52 |                 commit_message: "Bump version for release"
53 |                 branch: main
54 |             
55 |             - name: Build package
56 |               run: |
57 |                 python -m build
58 |             
59 |             - name: Check package
60 |               run: |
61 |                 twine check dist/*
62 |             
63 |             - name: Publish to PyPI
64 |               env:
65 |                 TWINE_USERNAME: __token__
66 |                 TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
67 |               run: |
68 |                 twine upload dist/*
69 | 


--------------------------------------------------------------------------------
/example/Cubes/mock/mock-cube-cube.ttl:
--------------------------------------------------------------------------------
 1 | @prefix cube: <https://cube.link/> .
 2 | @prefix dcat: <http://www.w3.org/ns/dcat#> .
 3 | @prefix dct: <http://purl.org/dc/terms/> .
 4 | @prefix schema1: <http://schema.org/> .
 5 | @prefix vcard: <http://www.w3.org/2006/vcard/ns#> .
 6 | @prefix void: <http://rdfs.org/ns/void#> .
 7 | @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
 8 | 
 9 | <https://mock.ld.admin.ch/cube/mock-example/1> a void:Dataset,
10 |         schema1:Dataset,
11 |         dcat:Dataset,
12 |         cube:Cube ;
13 |     dct:accrualPeriodicity <http://publications.europe.eu/resource/authority/frequency/ANNUAL> ;
14 |     dct:description "Ein Beispiel Cube, der simulierte Daten enthält"@de,
15 |         "An example Cube containing some simulated data"@en ;
16 |     dct:title "Mock Cube"@de,
17 |         "Mock Cube"@en,
18 |         "Mock Cube"@fr,
19 |         "Mock Cube"@it ;
20 |     schema1:contributor <https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu> ;
21 |     schema1:creativeWorkStatus <https://ld.admin.ch/vocabulary/CreativeWorkStatus/Draft> ;
22 |     schema1:creator <https://register.ld.admin.ch/opendataswiss/org/office_of_Mock> ;
23 |     schema1:dateCreated "2024-08-26"^^xsd:date ;
24 |     schema1:dateModified "2024-09-24T14:43:29+00:00"^^xsd:dateTime,
25 |         "2024-09-24T14:44:21+00:00"^^xsd:dateTime ;
26 |     schema1:datePublished "2024-09-24"^^xsd:date ;
27 |     schema1:description "Ein Beispiel Cube, der simulierte Daten enthält"@de,
28 |         "An example Cube containing some simulated data"@en ;
29 |     schema1:name "Mock Cube"@de,
30 |         "Mock Cube"@en,
31 |         "Mock Cube"@fr,
32 |         "Mock Cube"@it ;
33 |     schema1:publisher <https://register.ld.admin.ch/opendataswiss/org/office_of_Mock> ;
34 |     schema1:version 1 ;
35 |     schema1:workExample <https://ld.admin.ch/application/visualize> ;
36 |     dcat:contactPoint [ a vcard:Organization ;
37 |             vcard:fn "Bundesamt für Mock Data"^^xsd:string ;
38 |             vcard:hasEmail "contact@mock.ld.admin.ch"^^xsd:string ],
39 |         [ a vcard:Organization ;
40 |             vcard:fn "Bundesamt für Mock Data"^^xsd:string ;
41 |             vcard:hasEmail "contact@mock.ld.admin.ch"^^xsd:string ] ;
42 |     cube:observationConstraint <https://mock.ld.admin.ch/cube/mock-example/1/shape> ;
43 |     cube:observationSet <https://mock.ld.admin.ch/cube/mock-example/1/ObservationSet> .
44 | 
45 | 


--------------------------------------------------------------------------------
/example/Cubes/Population_Aargau/fetch.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | 
 3 | # URL of the CSV file
 4 | url = "https://www.ag.ch/app/sajato-api/api/v2/export?columns=SIS_BEVS0001D.1.ALTER_00_04-ALTER_05_09-ALTER_10_14-ALTER_15_19-ALTER_20_24-ALTER_25_29-ALTER_30_34-ALTER_35_39-ALTER_40_44-ALTER_45_49-ALTER_50_54-ALTER_55_59-ALTER_60_64-ALTER_65_69-ALTER_70_74-ALTER_75_79-ALTER_80_84-ALTER_85_89-ALTER_90_-TOTAL&prefix=multiple&sep=%E2%82%AC&search=19%2C1901%2C1902%2C1903%2C1904%2C1905%2C1906%2C1907%2C1908%2C1909%2C1910%2C1911%2C4001%2C4271%2C4221%2C4191%2C4222%2C4061%2C4272%2C4091%2C4223%2C4323%2C4021%2C4301%2C4224%2C4131%2C4022%2C4225%2C4023%2C4062%2C4226%2C4227%2C4002%2C4024%2C4092%2C4093%2C4132%2C4192%2C4228%2C4273%2C4063%2C4274%2C4095%2C4193%2C4003%2C4133%2C4230%2C4302%2C4303%2C4124%2C4094%2C4185%2C4229%2C4064%2C4004%2C4231%2C4194%2C4065%2C4025%2C4304%2C4134%2C4096%2C4066%2C4195%2C4049%2C4161%2C4097%2C4305%2C4026%2C4005%2C4162%2C4196%2C4067%2C4306%2C4027%2C4028%2C4163%2C4307%2C4098%2C4164%2C4029%2C4232%2C4165%2C4135%2C4006%2C4099%2C4197%2C4100%2C4251%2C4198%2C4069%2C4166%2C4186%2C4070%2C4007%2C4199%2C4136%2C4167%2C4101%2C4200%2C4068%2C4084%2C4168%2C4071%2C4252%2C4308%2C4169%2C4233%2C4030%2C4275%2C4309%2C4310%2C4276%2C4031%2C4008%2C4170%2C4102%2C4311%2C4137%2C4312%2C4201%2C4313%2C4138%2C4103%2C4104%2C4253%2C4105%2C4202%2C4314%2C4033%2C4139%2C4234%2C4171%2C4184%2C4277%2C4009%2C4255%2C4279%2C4236%2C4032%2C4254%2C4106%2C4203%2C4235%2C4278%2C4107%2C4172%2C4034%2C4204%2C4035%2C4072%2C4108%2C4036%2C4010%2C4109%2C4173%2C4174%2C4140%2C4073%2C4256%2C4037%2C4237%2C4038%2C4074%2C4175%2C4280%2C4257%2C4205%2C4141%2C4281%2C4315%2C4039%2C4110%2C4258%2C4316%2C4111%2C4011%2C4282%2C4238%2C4075%2C4206%2C4112%2C4317%2C4283%2C4076%2C4207%2C4113%2C4125%2C4114%2C4115%2C4142%2C4143%2C4318%2C4259%2C4176%2C4144%2C4208%2C4209%2C4319%2C4239%2C4177%2C4040%2C4284%2C4210%2C4260%2C4041%2C4116%2C4285%2C4012%2C4178%2C4320%2C4145%2C4117%2C4042%2C4077%2C4179%2C4286%2C4078%2C4118%2C4119%2C4043%2C4321%2C4013%2C4146%2C4079%2C4044%2C4120%2C4121%2C4080%2C4122%2C4287%2C4261%2C4240%2C4262%2C4045%2C4081%2C4180%2C4288%2C4123%2C4322%2C4181%2C4082%2C4046%2C4182%2C4047%2C4048%2C4183%2C4263%2C4147%2C4289%2C4083%2C4324%2C4264&fileType=csv&dateFrom=&dateTo="
 5 | 
 6 | # Local filename to save the downloaded file
 7 | filename = "./example/Cubes/Population_Aargau/data_raw.csv"
 8 | 
 9 | # Download the file
10 | response = requests.get(url)
11 | if response.status_code == 200:
12 |     with open("./" + filename, 'wb') as f:
13 |         f.write(response.content.decode('latin-1').encode('utf-8'))
14 |     print(f"File downloaded and saved as {filename}")
15 | else:
16 |     print(f"Failed to download file. Status code: {response.status_code}")


--------------------------------------------------------------------------------
/example/Cubes/mock/data.csv:
--------------------------------------------------------------------------------
 1 | Jahr,Station,Wert,Standardfehler,Status
 2 | 2000,Bern,23.0,5.0,definitiv
 3 | 2000,Zürich,23.555744036232408,7.517863529610764,definitiv
 4 | 2001,Bern,21.536090723505524,6.466524763963783,definitiv
 5 | 2001,Zürich,21.659924330021255,6.030296213104391,definitiv
 6 | 2002,Bern,22.575144684250287,1.493504195537817,definitiv
 7 | 2002,Zürich,20.688211936144263,4.460656090931694,definitiv
 8 | 2003,Bern,20.34210673843992,3.1225318830708093,definitiv
 9 | 2003,Zürich,21.515733621541955,9.256702605801236,definitiv
10 | 2004,Bern,19.883669558588743,4.901117604444538,definitiv
11 | 2004,Zürich,19.06703223788886,7.3909398210112585,definitiv
12 | 2005,Bern,20.37741134479489,0.3648574303067509,definitiv
13 | 2005,Zürich,19.10206139418359,5.334716140807069,definitiv
14 | 2006,Bern,17.851780950874087,2.5205528663922094,definitiv
15 | 2006,Zürich,19.166364372767042,9.574551975171314,definitiv
16 | 2007,Bern,18.321158320002034,4.873759639672014,definitiv
17 | 2007,Zürich,16.70866751700804,7.127258811335302,definitiv
18 | 2008,Bern,17.89457243174931,0.9142998372297384,definitiv
19 | 2008,Zürich,17.518675046034367,4.510509430147336,definitiv
20 | 2009,Bern,15.644260115877954,3.6030273778709576,definitiv
21 | 2009,Zürich,16.57885415743719,8.255475397580657,definitiv
22 | 2010,Bern,16.67404534861387,6.403036956705559,definitiv
23 | 2010,Zürich,14.658857249625616,5.41838925075702,definitiv
24 | 2011,Bern,15.239527332053381,2.7685586054789892,definitiv
25 | 2011,Zürich,15.77006036400352,2.547974456080931,definitiv
26 | 2012,Bern,13.746463964437758,5.634558406905088,definitiv
27 | 2012,Zürich,13.898790381464226,6.195609420937518,definitiv
28 | 2013,Bern,14.79423943661537,8.450009649429939,definitiv
29 | 2013,Zürich,12.895269405129998,3.425280014313877,definitiv
30 | 2014,Bern,12.578954061301419,4.667912290722137,definitiv
31 | 2014,Zürich,13.739824320879597,0.780391898961843,definitiv
32 | 2015,Bern,12.08858766109956,7.234991659001383,definitiv
33 | 2015,Zürich,11.300664194959296,4.794358156286226,definitiv
34 | 2016,Bern,12.606317539212025,9.624032873046964,definitiv
35 | 2016,Zürich,11.306186956398,2.5019829145341275,definitiv
36 | 2017,Bern,10.081257039139402,5.322003468934927,definitiv
37 | 2017,Zürich,11.399522010793316,0.4085982816951822,definitiv
38 | 2018,Bern,10.525901301404176,7.317046263544459,definitiv
39 | 2018,Zürich,8.933378287580624,5.003684887934572,definitiv
40 | 2019,Bern,10.131078794353765,9.12751448116438,definitiv
41 | 2019,Zürich,9.725396222849866,3.27575514766154,definitiv
42 | 2020,Bern,7.863975722345669,4.286464451069697,definitiv
43 | 2020,Zürich,8.817539768108874,1.6851777638149157,definitiv
44 | 2021,Bern,8.883947915155796,5.824977064184451,definitiv
45 | 2021,Zürich,6.873745757485178,6.681380029364922,definitiv
46 | 2022,Bern,7.479049116629595,7.297772477326248,provisionally
47 | 2022,Zürich,7.984094086903241,5.220921715260017,provisionally
48 | 


--------------------------------------------------------------------------------
/example/Cubes/mock/description.yml:
--------------------------------------------------------------------------------
  1 | Name:
  2 |   de: Mock Cube
  3 |   fr: Mock Cube
  4 |   it: Mock Cube
  5 |   en: Mock Cube
  6 | Description:
  7 |   de: Ein Beispiel Cube, der simulierte Daten enthält
  8 |   en: An example Cube containing some simulated data
  9 | Publisher: 
 10 |   - IRI: https://register.ld.admin.ch/opendataswiss/org/office_of_Mock
 11 | Creator:
 12 |   - IRI: https://register.ld.admin.ch/opendataswiss/org/office_of_Mock
 13 | Contributor:
 14 |   - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu
 15 |     Name: Bundesamt für Mock Data
 16 | Date Created:
 17 |   2024-08-26
 18 | Contact Point:
 19 |   E-Mail: contact@mock.ld.admin.ch
 20 |   Name: Bundesamt für Mock Data
 21 | Base-URI: https://mock.ld.admin.ch/
 22 | Identifier: mock-example
 23 | Version: 1
 24 | Work Status: 
 25 |   Draft
 26 | Visualize:
 27 |   True
 28 | # Optional but recommended
 29 | Accrual Periodicity: yearly
 30 | 
 31 | # Optional
 32 | Namespace: mock
 33 | 
 34 | dimensions:
 35 |   # required
 36 |   Jahr:
 37 |     name:
 38 |       de: Jahr
 39 |       fr: An
 40 |       it: Anno
 41 |       en: Year
 42 |     description:
 43 |       de: Jahr der Erhebung
 44 |     dimension-type: Key Dimension
 45 |     datatype: URI
 46 |     scale-type: ordinal
 47 |     path: year
 48 |     data-kind: 
 49 |       type: temporal
 50 |       unit: year
 51 |     mapping:
 52 |       type: additive
 53 |       base: https://ld.admin.ch/time/year/
 54 |   
 55 |   Station:
 56 |     name:
 57 |       de: Station
 58 |       fr: Station
 59 |       it: Stazione
 60 |       en: Station
 61 |     description:
 62 |       de: Station der Untersuchung
 63 |     dimension-type: Key Dimension
 64 |     scale-type: nominal
 65 |     datatype: URI
 66 |     path: station
 67 |     mapping:
 68 |       type: replace
 69 |       replacements:
 70 |         Bern: https://mock.ld.admin.ch/station/01
 71 |         Zürich: https://mock.ld.admin.ch/station/02
 72 | 
 73 |   Wert:
 74 |     name:
 75 |       de: Wert
 76 |       fr: Valeur
 77 |       it: Valore
 78 |       en: Value
 79 |     description:
 80 |       de: Gemessener Wert an der Station
 81 |     dimension-type: Measure Dimension
 82 |     datatype: float
 83 |     scale-type: interval
 84 |     path: value
 85 |     unit: KiloGM
 86 | 
 87 |   Standardfehler:
 88 |     name:
 89 |       de: Standardfehler
 90 |       fr: Erreur standard
 91 |       it: Errore standard
 92 |       en: Standard error
 93 |     description:
 94 |       de: Standardfehler des berechneten Werts
 95 |     dimension-type: Standard Error
 96 |     datatype: float
 97 |     relates-to: value
 98 |     scale-type: ratio
 99 |     path: standardError
100 |     unit: PERCENT
101 | 
102 |   Status:
103 |     name:
104 |       de: Veröffentlichungsstatus
105 |       fr: Statut de publication
106 |       it: Stato di pubblicazione
107 |       en: State of publication
108 |     description:
109 |       de: "Status der Veröffentlichung, provisorisch oder final"
110 |     dimension-type: Annotation
111 |     datatype: string
112 |     scale-type: nominal
113 |     path: status


--------------------------------------------------------------------------------
/example/Cubes/co2-limits/data.csv:
--------------------------------------------------------------------------------
 1 | Jahr,Energieträger,CO2-Emissionen (Mt),Nicht gerundeter Wert (Mt)
 2 | 1990,Brennstoffe,23.409,23.4088409539035
 3 | 1990,Treibstoffe,15.449,15.4491830781084
 4 | 1991,Brennstoffe,23.251,23.2508647438026
 5 | 1991,Treibstoffe,15.929,15.9291182873447
 6 | 1992,Brennstoffe,23.849,23.8486416497727
 7 | 1992,Treibstoffe,16.259,16.2585739839915
 8 | 1993,Brennstoffe,22.656,22.6555635646011
 9 | 1993,Treibstoffe,15.217,15.2168569310758
10 | 1994,Brennstoffe,22.931,22.9309962146956
11 | 1994,Treibstoffe,15.399,15.3994934644429
12 | 1995,Brennstoffe,22.702,22.7024598374676
13 | 1995,Treibstoffe,15.101,15.1005792556581
14 | 1996,Brennstoffe,21.872,21.8724337824029
15 | 1996,Treibstoffe,15.16,15.1603802991533
16 | 1997,Brennstoffe,22.739,22.7394923442782
17 | 1997,Treibstoffe,15.736,15.7364012222237
18 | 1998,Brennstoffe,23.212,23.2117416464586
19 | 1998,Treibstoffe,15.964,15.9635530644151
20 | 1999,Brennstoffe,22.226,22.2262923026919
21 | 1999,Treibstoffe,16.569,16.5686443608428
22 | 2000,Brennstoffe,22.461,22.4614503218056
23 | 2000,Treibstoffe,16.836,16.8364619054554
24 | 2001,Brennstoffe,22.743,22.7430826369274
25 | 2001,Treibstoffe,16.559,16.5594894298454
26 | 2002,Brennstoffe,22.378,22.3778988775046
27 | 2002,Treibstoffe,16.514,16.5139078060435
28 | 2003,Brennstoffe,22.183,22.1825020578164
29 | 2003,Treibstoffe,16.683,16.6832641674477
30 | 2004,Brennstoffe,22.332,22.3323303647259
31 | 2004,Treibstoffe,16.809,16.8089067625261
32 | 2005,Brennstoffe,22.058,22.0581028668418
33 | 2005,Treibstoffe,16.893,16.8926949527344
34 | 2006,Brennstoffe,21.87,21.8704005096137
35 | 2006,Treibstoffe,17.03,17.029954451496
36 | 2007,Brennstoffe,21.473,21.4734775638869
37 | 2007,Treibstoffe,17.355,17.3545199250883
38 | 2008,Brennstoffe,20.874,20.8743790902275
39 | 2008,Treibstoffe,17.706,17.7057048965966
40 | 2009,Brennstoffe,20.364,20.3644771142418
41 | 2009,Treibstoffe,17.515,17.5146844187239
42 | 2010,Brennstoffe,20.081,20.0809192014044
43 | 2010,Treibstoffe,17.417,17.4171042931388
44 | 2011,Brennstoffe,19.542,19.5421072281977
45 | 2011,Treibstoffe,17.225,17.2246963681246
46 | 2012,Brennstoffe,19.203,19.203496216434
47 | 2012,Treibstoffe,17.347,17.3466069719127
48 | 2013,Brennstoffe,18.862,18.8615373863506
49 | 2013,Treibstoffe,17.258,17.2576046460133
50 | 2014,Brennstoffe,18.362,18.3616203250162
51 | 2014,Treibstoffe,17.154,17.1540834660966
52 | 2015,Brennstoffe,17.86,17.8604720539075
53 | 2015,Treibstoffe,16.415,16.4147799431993
54 | 2016,Brennstoffe,17.543,17.5432203420959
55 | 2016,Treibstoffe,16.247,16.246639568902
56 | 2017,Brennstoffe,17.207,17.2065691870665
57 | 2017,Treibstoffe,15.963,15.9628629448215
58 | 2018,Brennstoffe,16.796,16.7963008002405
59 | 2018,Treibstoffe,15.956,15.9555492234206
60 | 2019,Brennstoffe,16.436,16.4355294371483
61 | 2019,Treibstoffe,15.895,15.8945043183231
62 | 2020,Brennstoffe,16.117,16.1168503388347
63 | 2020,Treibstoffe,14.598,14.5980752146887
64 | 2021,Brennstoffe,15.746,15.7463144422352
65 | 2021,Treibstoffe,14.777,14.777176066368
66 | 2022,Brennstoffe,14.981,14.9809740495392
67 | 2022,Treibstoffe,14.613,14.6129515197184
68 | 2023,Brennstoffe,13.656,13.6562990399726
69 | 2023,Treibstoffe,14.639,14.6385734439246


--------------------------------------------------------------------------------
/example/Cubes/concept_table_airport/airport.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import pandas as pd
 4 | import yaml
 5 | import os
 6 | 
 7 | from pylindas.lindas.namespaces import SCHEMA
 8 | from pylindas.pycube import Cube
 9 | from pylindas.lindas.upload import upload_ttl
10 | from pylindas.lindas.query import cube_exists
11 | 
12 | """
13 | Author: Fabian Cretton - HEVS
14 | 
15 | See the description in the README.
16 | 
17 | This example script only generates the .ttl file, the upload operations are not performed
18 | """
19 | 
20 | BASEDIR = os.path.dirname(__file__)
21 | CONFIGFILE = os.path.join(BASEDIR, "description.yml")
22 | CUBEFILE = os.path.join(BASEDIR, "cube.ttl")
23 | 
24 | 
25 | # data.csv contains an airport type identifier that doesn't exist in airportconcept.csv
26 | # the goal is to demonstrate that the  check_dimension_object_property() called here under will detect that
27 | DATADUMMY = os.path.join(BASEDIR, "data.csv")
28 | data_df = pd.read_csv(DATADUMMY)
29 | 
30 | with open(CONFIGFILE) as file:
31 |     config = yaml.safe_load(file)
32 | 
33 | cube = Cube(dataframe=data_df, cube_yaml=config, environment="TEST", local=True)
34 | cube.prepare_data()
35 | cube.write_cube()
36 | cube.write_observations()
37 | cube.write_shape()
38 | 
39 | # Add the concept data
40 | # The concept must be defined in the cube_yaml file, as a nested key under the "Concepts" key
41 | #   "typeOfAirport" is the name of that nested key
42 | AIRPORTDATA = os.path.join(BASEDIR, "airportconcept.csv")
43 | airport_concept_df = pd.read_csv(AIRPORTDATA)
44 | cube.write_concept("typeOfAirport", airport_concept_df)
45 | 
46 | # Check that all the generated URLs for the typeOfAirport are resources (concept) with a SCHEMA.name triple
47 | # This allows to check if all the entries in data.csv correspond to an entry in airportconcept.csv 
48 | # This check should identify the error of the 'dummy' airport type
49 | allConceptsFound = cube.check_dimension_object_property("typeOfAirport", SCHEMA.name)
50 | 
51 | if not allConceptsFound:
52 |     print("""\nCheck result - WARNING: It seems that some objects of the \"typeOfAirport\" dimension have no matching concept.
53 |           See the log for details and check your data + cube dimension and concepts configuration""")
54 | else:
55 |     print("\nCheck result - SUCCESS: It seems that all objects of the \"typeOfAirport\" dimension have a matching concept.")
56 | 
57 | cube.serialize(CUBEFILE)
58 | 
59 | # Just for testing the functionality: add the 'dummy' airport type
60 | AIRPORTDUMMYDATA = os.path.join(BASEDIR, "airportdummyconcept.csv")
61 | airport_concept_dummy_df = pd.read_csv(AIRPORTDUMMYDATA)
62 | cube.write_concept("typeOfAirport", airport_concept_dummy_df)
63 | allConceptsFound = cube.check_dimension_object_property("typeOfAirport", SCHEMA.name)
64 | 
65 | if not allConceptsFound:
66 |     print("""\nCheck result - WARNING: It seems that some objects of the \"typeOfAirport\" dimension have no matching concept.
67 |           See the log for details and check your data + cube dimension and concepts configuration""")
68 | else:
69 |     print("\nCheck result - SUCCESS: It seems that all objects of the \"typeOfAirport\" dimension have a matching concept.")
70 | 
71 | print(cube)
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/example/Cubes/wind/description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "../../linpy/description.schema.json",
 3 |   "Name": {
 4 |     "de": "Windkraftanlagen - betriebene WKA pro Jahr in Schleswig-Holstein",
 5 |     "en": "Wind turbines \u2014 operated WKA per year in Schleswig-Holstein"
 6 |   },
 7 |   "Description": {
 8 |     "de": "Die Aufstellung zeigt die Anzahl und die genehmigte Leistung der betriebenen Windkraftanlagen (WKA) in Schleswig-Holstein. Es sind nur nach dem BImSchG genehmigungsbed\u00fcrftige WKA ber\u00fccksichtigt. \r\n\r\nBei den Zahlen f\u00fcr das laufende Jahr handelt es sich um vorl\u00e4ufige Werte, wie sie am angegebenen Endzeitpunkt bekannt waren.\r\n\r\nQuelle:  LfU, Fachdatenbank LIS-A\r\n\r\nAuf der [Themenseite Windenergie auf schleswig-holstein.de](https://www.schleswig-holstein.de/DE/landesregierung/themen/energie/windenergie/windenergie_node.html) sind weitere Informationen zum Thema ver\u00f6ffentlicht.",
 9 |     "en": "The list shows the number and approved power of the operated wind turbines (WKA) in Schleswig-Holstein. Only WKAs requiring authorisation under the BImSchG are taken into account. \n\nSource: LfU, specialist database LIS-A\n \nFurther information on the topic is published on the [Theme page Windenergie onschleswig-holstein.de](https://www.schleswig-holstein.de/DE/landesregierung/themen/energie/windenergie/windenergie_node.html)."
10 |   },
11 |   "Publisher": [
12 |     {
13 |       "IRI": "https://opendata.schleswig-holstein.de/organization/02619150-8e16-46a5-b2b9-de73ecfc617d"
14 |     }
15 |   ],
16 |   "Creator": [
17 |     {
18 |       "IRI": "https://opendata.schleswig-holstein.de/organization/02619150-8e16-46a5-b2b9-de73ecfc617d"
19 |     }
20 |   ],
21 |   "Contributor": [],
22 |   "Date Created": "2024-11-13T11:45:45.558776",
23 |   "Contact Point": {
24 |     "E-Mail": "opendata@example.ch",
25 |     "Name": "Landesamt f\u00fcr Umwelt"
26 |   },
27 |   "Base-URI": "https://opendata.schleswig-holstein.de/dataset/fc49eebf-3750-4c9c-a29e-6696eb644362/resource/b8a7b43c-3529-4b92-bb49-7bf4e9109dfb/download/opendata_wka_inbetrieb_sh_20230103.csv",
28 |   "Identifier": "wka-inbetrieb",
29 |   "Version": 0.1,
30 |   "Work Status": "Draft",
31 |   "Visualize": true,
32 |   "Accrual Periodicity": "",
33 |   "Namespace": "https://opendata.example.ch",
34 |   "dimensions": {
35 |     "Jahr": {
36 |       "name": {
37 |         "de": "Jahr",
38 |         "en": "Jahr"
39 |       },
40 |       "dimension-type": "Measure Dimension",
41 |       "scale-type": "interval",
42 |       "path": "Jahr",
43 |       "description": {
44 |         "de": "Beschreibung f\u00fcr Jahr",
45 |         "en": "Description for Jahr"
46 |       },
47 |       "data-kind": {
48 |         "type": "temporal",
49 |         "unit": "year"
50 |       }
51 |     },
52 |     "Anzahl_inBetrieb_WKA_SH": {
53 |       "name": {
54 |         "de": "Anzahl",
55 |         "en": "Anzahl"
56 |       },
57 |       "dimension-type": "Measure Dimension",
58 |       "scale-type": "interval",
59 |       "path": "Anzahl_inBetrieb_WKA_SH",
60 |       "description": {
61 |         "de": "Beschreibung f\u00fcr Anzahl_inBetrieb_WKA_SH",
62 |         "en": "Description for Anzahl_inBetrieb_WKA_SH"
63 |       }
64 |     },
65 |     "Leistung_MW": {
66 |       "name": {
67 |         "de": "Leistung_MW",
68 |         "en": "Leistung_MW"
69 |       },
70 |       "dimension-type": "Measure Dimension",
71 |       "scale-type": "ratio",
72 |       "path": "Leistung_MW",
73 |       "description": {
74 |         "de": "Beschreibung f\u00fcr Leistung_MW",
75 |         "en": "Description for Leistung_MW"
76 |       },
77 |       "unit": "MW"
78 |     }
79 |   }
80 | }


--------------------------------------------------------------------------------
/pylindas/pycube/shared_dimension.py:
--------------------------------------------------------------------------------
 1 | from rdflib import Graph, Literal, RDF, URIRef
 2 | from py_cube.lindas.namespaces import *
 3 | from shapely.geometry import shape
 4 | import json
 5 | import argparse
 6 | 
 7 | 
 8 | class GeoSharedDimension(object):
 9 |     _base_uri: URIRef
10 |     _graph: Graph
11 |     _description: dict
12 | 
13 |     def __init__(self, base_uri: URIRef, description: dict, graph: Graph):
14 |         self._base_uri = base_uri
15 |         self._graph = graph
16 |         self._description = description
17 |     
18 |     def _setup_graph(self) -> Graph:
19 |         """Set up the graph by binding namespaces and returning the graph object.
20 |         
21 |         Returns:
22 |             Graph: The graph object with namespaces bound.
23 |         """
24 |         graph = Graph()
25 |         for prefix, nmspc in Namespaces.items():
26 |             graph.bind(prefix=prefix, namespace=nmspc)
27 |         try:
28 |             graph.bind(prefix=self._cube_dict.get("Namespace"), namespace=Namespace(self._base_uri))
29 |         except KeyError:
30 |             print("no Namespace")
31 |             pass
32 |         return graph
33 | 
34 | 
35 |     def _geojson_to_wkt(self, geojson: dict) -> str:
36 |         """Convert GeoJSON to WKT.
37 |         
38 |         Returns:
39 |             str: The WKT string.
40 |         """
41 |         if not geojson:
42 |             return None
43 |         geom = shape(geojson)
44 |         return geom.wkt
45 | 
46 | 
47 |     def _add_geo_feature_to_graph(self, geojson_feature):
48 |         properties = geojson_feature.get("properties")
49 |         if not properties:
50 |             raise ValueError("Feature must have properties")
51 |         iri = properties.get("iri")
52 |         if not iri:
53 |             raise ValueError("Feature must have an IRI")
54 |         feature = URIRef(iri)
55 |         self._graph.add((feature, RDF.type, URIRef("http://schema.org/Place")))
56 | 
57 |         for lang in ["fr", "en", "de", "it"]:
58 |             name_key = f"name_{lang}"
59 |             if name_key in properties:
60 |                 self._graph.add((feature, URIRef("http://schema.org/name"), Literal(properties[name_key], lang=lang)))
61 | 
62 |         geometry = URIRef(f"{iri}/geometry")
63 |         self._graph.add((feature, URIRef("http://www.opengis.net/ont/geosparql#hasGeometry"), geometry))
64 |         wkt = self._geojson_to_wkt(geojson_feature['geometry'])
65 |         if wkt:
66 |             self._graph.add((geometry, URIRef("http://www.opengis.net/ont/geosparql#asWKT"), Literal(wkt, datatype=URIRef("http://www.opengis.net/ont/geosparql#wktLiteral"))))
67 | 
68 | 
69 |     def serialize(self, filename: str) -> None:
70 |         """Serialize the cube to a file.
71 | 
72 |         This function serializes the cube to the given file name in turtle format.
73 | 
74 |         Args:
75 |             filename (str): The name of the file to write the cube to.
76 | 
77 |         Returns:
78 |             None
79 |         """
80 |         self._graph.serialize(destination=filename, format="turtle", encoding="utf-8")
81 | 
82 | 
83 | def convert_geojson_to_ttl(geojson_filename, ttl_filename):
84 |     with open(geojson_filename, 'r') as f:
85 |         geojson_data = json.load(f)
86 | 
87 |     base_uri = URIRef("http://example.org/base")
88 |     description = {}
89 |     graph = Graph()
90 | 
91 |     shared_dimension = GeoSharedDimension(base_uri, description, graph)
92 | 
93 |     for feature in geojson_data.get("features", []):
94 |         print(f"Adding feature {feature['properties']['name_de']}")
95 |         shared_dimension._add_geo_feature_to_graph(feature)
96 | 
97 |     shared_dimension.serialize(ttl_filename)
98 | 


--------------------------------------------------------------------------------
/example/Shared_Dimensions/shared_dimension_generation/sd_terms.csv:
--------------------------------------------------------------------------------
 1 | code;parent_code;name_de;name_fr;name_it;name_EN;location;wikidata;concept
 2 | 1;;Alle Artengruppen;Tous les groupes d'espèces;Tutti i gruppi di specie;All species groups;Q2;x;1
 3 | 1.1;1;Tiere;Animaux;Animali;Animals;Q2;x;2
 4 | 1.1.1;1.1;Wirbeltiere;Vertébrés;Vertebrati;Vertebrates;Q2;x;3
 5 | 1.1.1.1;1.1.1;Säugetiere ;Mammifères;Mammiferi;Mammals;Q2;x;4
 6 | 1.1.1.1.1;1.1.1.1;Säugetiere (ohne Fledermäuse);Mammifères (sans Chauves-souris);Mammiferi (senza Pipistrelli);Mammals (without bats);Q2;x;5
 7 | 1.1.1.1.2;1.1.1.1;Fledermäuse;Chauves-souris;Pipistrelli;Bats;Q2;x;6
 8 | 1.1.1.2;1.1.1;Brutvögel;Oiseaux nicheurs;Uccelli nidificanti;Breeding birds;Q2;x;7
 9 | 1.1.1.3;1.1.1;Reptilien;Reptiles;Rettili;Reptiles;Q2;x;8
10 | 1.1.1.4;1.1.1;Amphibien;Amphibiens;Anfibi;Amphibians;Q2;x;9
11 | 1.1.1.5;1.1.1;Fische und Rundmäuler;Poissons et Cyclostomes;Pesci e Ciclostomi;Fishes and cyclostomes;Q2;x;10
12 | 1.1.2;1.1;Weichtiere;Mollusques;Molluschi;Molluscs;Q2;x;11
13 | 1.1.2.1;1.1.2;Muscheln;Bivalves;Bivalvi;Bivalves;Q2;x;12
14 | 1.1.2.2;1.1.2;Schnecken;Gastéropodes;Gasteropodi;Gasteropodi;Q2;x;13
15 | 1.1.3;1.1;Krebstiere;Crustacés;Crostacei;Crustaceans;Q2;x;14
16 | 1.1.3.1;1.1.3;Zehnfusskrebse;Écrevisses;Gamberi;Crayfishes;Q2;x;15
17 | 1.1.4;1.1;Insekten;Insectes;Insetti;Insects;Q2;x;16
18 | 1.1.4.1;1.1.4;Hautflügler;Hyménoptères;Imenotteri;Hymenopterae;Q2;x;17
19 | 1.1.4.1.1;1.1.4.1;Bienen;Abeilles;Api;Bees;Q2;x;18
20 | 1.1.4.1.2;1.1.4.1;Ameisen;Fourmis;Formiche;Ants;Q2;x;19
21 | 1.1.4.2;1.1.4;Schmetterlinge;Papillons;Farfalle;Butterflies;Q2;x;20
22 | 1.1.4.2.1;1.1.4.2;Tagfalter und Widderchen;Papillons diurnes et Zygènes;Farfalle diurne  e Zigene;Diurnal Butterflies and Zigene;Q2;x;21
23 | 1.1.4.3;1.1.4;Köcherfliegen;Trichoptères;Tricotteri;Caddisflies;Q2;x;22
24 | 1.1.4.4;1.1.4;Schnaken;Tipules;Ditteri Tipulidi;Diptera Tipulids;Q2;x;23
25 | 1.1.4.5;1.1.4;Käfer;Coléoptères;Coleotteri;Coleopterae;Q2;x;24
26 | 1.1.4.5.1;1.1.4.5;Pracht-, Bock-, Rosenkäfer und Schröter;Coléoptères Buprestidés, Cérambycidés, Cétoniidés et Lucanidés;Coleotteri Buprestidi, Cerambicidi, Cetonidi e Lucanidi;Coleopterae Buprestides, Cerambicides, Cetonides, Lucanides;Q2;x;25
27 | 1.1.4.5.2;1.1.4.5;Laufkäfer und Sandlaufkäfer;Carabidés et Cicindèles;Carabidi e Cicindelidi;Carabidae and Cicindelinae;Q2;x;26
28 | 1.1.4.5.3;1.1.4.5;Wasserkäfer;Coléoptères hydradéphages;Coleotteri Adefagi acquatici;Aquatic beetles Adefagians;Q2;x;27
29 | 1.1.4.6;1.1.4;Netzflügler;Névroptères;Neurotteri;Neuropterans;Q2;x;28
30 | 1.1.4.7;1.1.4;Singzikaden;Cigales;Cicale;Cicadas;Q2;x;29
31 | 1.1.4.8;1.1.4;Heuschrecken;Orthoptères;Ortotteri;Ortopterans;Q2;x;30
32 | 1.1.4.9;1.1.4;Steinfliegen;Plécoptères;Plecotteri;Stoneflies;Q2;x;31
33 | 1.1.4.10;1.1.4;Libellen;Libellules, Odonates;Libellule;Dragonflies;Q2;x;32
34 | 1.1.4.11;1.1.4;Eintagsfliegen;Éphémères;Efemerotteri;Mayflies;Q2;x;33
35 | 1.2;1;Pflanzen;Plantes;Piante;Plants;Q2;x;34
36 | 1.2.1;1.2;Gefässpflanzen;Plantes vasculaires;Piante vascolari;Vascular plants;Q2;x;35
37 | 1.2.1.1;1.2.1;Blütenpflanzen;Plantes â  fleurs;Piante da fiori;Flowering plants;Q2;x;36
38 | 1.2.1.2;1.2.1;Farnartige Pflanzen;Fougères;Felci;Ferns;Q2;x;37
39 | 1.2.2;1.2;Moose;Bryophytes;Briofite;Bryophytes;Q2;x;38
40 | 1.2.2.1;1.2.2;Lebermoose;Hépatiques;Epatiche;Hepaticae;Q2;x;39
41 | 1.2.2.2;1.2.2;Laubmoose;Mousses;Muschi;Musci;Q2;x;40
42 | 1.2.2.3;1.2.2;Hormoose;Anthocérotes;Antocerote;Anthocerotae;Q2;x;41
43 | 1.2.3;1.2;Makroalgen;Macroalgues;Macroalga;Macroalgae;Q2;x;42
44 | 1.2.3.1;1.2.3;Armleuchteralgen;Characées;Caracee;Stoneworts;Q2;x;43
45 | 1.3;1;Flechten und Pilze;Lichens et champignons;Licheni e funghi;Lichens and mycetes;Q2;x;44
46 | 1.3.1;1.3;Flechten;Lichens;Licheni;Lichens and fungi;Q2;x;45
47 | 1.3.1.1;1.3.1;Baumflechten;Lichens épiphytes;Licheni epifiti;Epiphytic lichens;Q2;x;46
48 | 1.3.1.2;1.3.1;Bodenflechten;Lichens terricoles;Licheni terricoli;Soil lichens;Q2;x;47
49 | 1.3.2;1.3;Grosspilze;Champignons supérieurs;Macromiceti;Macromycetes;Q2;x;48
50 | 


--------------------------------------------------------------------------------
/pylindas/getter/get.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import sparql_dataframe
  3 | from SPARQLWrapper import SPARQLWrapper, JSON
  4 | 
  5 | 
  6 | def get_cube(endpoint: str, identifier: str, version: str):
  7 |     """Retrieve the cube URI based on the provided identifier and version using SPARQL query.
  8 |     
  9 |         Args:
 10 |             endpoint (str): The SPARQL endpoint URL.
 11 |             identifier (str): The identifier of the cube.
 12 |             version (str): The version of the cube.
 13 |     
 14 |         Returns:
 15 |             str: The URI of the cube.
 16 |     
 17 |         Raises:
 18 |             Exception: If an error occurs during the SPARQL query execution.
 19 |     """
 20 |     match endpoint:
 21 |         case "TEST":
 22 |             endpoint = "https://test.lindas.admin.ch/query"
 23 |         case "INT":
 24 |             endpoint = "https://int.lindas.admin.ch/query"
 25 |         case "PROD":
 26 |             endpoint = "https://lindas.admin.ch/query"
 27 | 
 28 |     query = f"""
 29 |         PREFIX dcterms: <http://purl.org/dc/terms/>
 30 |         PREFIX cube: <https://cube.link/>
 31 |         PREFIX schema: <http://schema.org/>
 32 | 
 33 |         SELECT ?cube 
 34 |         {{
 35 |            ?cube a cube:Cube ;
 36 |                dcterms:identifier "{identifier}" ;
 37 |                schema:version {version} .
 38 |         }}
 39 |     """
 40 |     sparql = SPARQLWrapper(endpoint)
 41 |     sparql.setReturnFormat(JSON)
 42 |     sparql.setQuery(query)
 43 | 
 44 |     try:
 45 |         resp = sparql.queryAndConvert()
 46 | 
 47 |         return resp["results"]["bindings"][0]["cube"]["value"]
 48 |     except Exception as e:
 49 |         return e
 50 | 
 51 | 
 52 | def get_observations(endpoint: str, identifier: str, version: str):
 53 |     """Retrieve observations from a given endpoint based on the provided identifier and version.
 54 |     
 55 |         Args:
 56 |             endpoint (str): The SPARQL endpoint URL.
 57 |             identifier (str): The identifier for the observations.
 58 |             version (str): The version of the observations.
 59 |     
 60 |         Returns:
 61 |             pandas.DataFrame: A DataFrame containing the observations with columns for observation, predicate, and value.
 62 |     """
 63 |     match endpoint:
 64 |         case "TEST":
 65 |             endpoint = "https://test.lindas.admin.ch/query"
 66 |         case "INT":
 67 |             endpoint = "https://int.lindas.admin.ch/query"
 68 |         case "PROD":
 69 |             endpoint = "https://lindas.admin.ch/query"
 70 |     cube_uri = get_cube(endpoint=endpoint, identifier=identifier, version=version)
 71 |     query = f"""
 72 |         PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
 73 |         PREFIX sh: <http://www.w3.org/ns/shacl#>
 74 |         PREFIX schema: <http://schema.org/>
 75 |         PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
 76 |         PREFIX cube: <https://cube.link/>
 77 |         
 78 |         SELECT ?obs ?pred ?value
 79 |         {{
 80 |             <{cube_uri}> cube:observationSet/cube:observation ?obs .
 81 |             <{cube_uri}> cube:observationConstraint/sh:property ?dim .
 82 |             ?dim sh:path ?predURI ;
 83 |                schema:name ?pred .
 84 |             FILTER(LANG(?pred)='de')
 85 |             {{
 86 |                ?dim a cube:KeyDimension .
 87 |                ?obs ?predURI ?vl .
 88 |                ?vl schema:name ?value
 89 |             }} UNION {{
 90 |                ?dim a cube:KeyDimension .
 91 |                ?obs ?predURI ?value .
 92 |                FILTER (DATATYPE(?value) != xsd:anyURI)
 93 |             }} UNION {{
 94 |                ?dim a cube:MeasureDimension .
 95 |                ?obs ?predURI ?value
 96 |             }}
 97 |         }}
 98 |     """
 99 | 
100 |     df = sparql_dataframe.get(endpoint, query)
101 |     observations = df.pivot(index="obs", columns="pred", values="value").reset_index(drop=True)
102 |     return observations
103 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # Database lindas information
158 | *.ini
159 | 
160 | # PyCharm
161 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
162 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
163 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
164 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
165 | .idea/
166 | 
167 | .DS_Store
168 | 
169 | # Test cube for unit tests
170 | /tests/test_cube.ttl
171 | 
172 | # VSCode
173 | .vscode
174 | 


--------------------------------------------------------------------------------
/example/Shared_Dimensions/shared_dimension_generation/sd_description.yml:
--------------------------------------------------------------------------------
 1 | # The SD URL will be the Identifier concatenated to the fixed "https://ld.admin.ch/cube/dimension/"
 2 | # If the SD URL starts with http, it will instead use that instead of the default
 3 | # Note: a SD's URL is not relative to the LINDAS environment, and it will be dereferenceable only when published on PROD
 4 | Identifier: pylindas_sd_generation_example
 5 | # TODO Base: Optional, allows for separation of DefinedTermSet and the base for identifier-field, where identifier defines the TermSet
 6 | Name:
 7 |   en: PyLindas Shared Dimension generation example
 8 |   fr: PyLindas example de génération d'une Shared Dimension 
 9 |   de: PyLindas Shared Dimension generation example (de)
10 |   it: PyLindas Shared Dimension generation example (it)
11 | # Description is optional
12 | Description:
13 |   fr: Un example de génération d'une Shared Dimension par PyLindas
14 |   en: An example of Shared Dimension generation by PyLindas
15 | # Valid-from is optional, it is a date/time value  
16 | # Note: it is currently optional, but might need to become mandatory as validFrom, and later validThrough, are used to make a SD and its term 'deprecated'
17 | Valid-from: 2025-02-05T00:00:00Z
18 | # Contributor is optional, it is now added by the Cube Creator when creating a new SD
19 | Contributor:
20 |   name: Fabian Cretton
21 |   email: fabian.cretton@dummy.ch
22 | Terms:
23 |   identifier-field: code
24 |   name-field: name
25 |   multilingual: True
26 |   # links-to-other-terms is optional
27 |   # It allows to create links between terms of a same dataset, as for instance the creation of a hierarchy based on child to parent relations with the skos:broader property
28 |   # The identifier of the other term must be found on the same line, as for example:
29 |   #   code;parent_code;name
30 |   #   1;;Alle Artengruppen
31 |   #   1.1;1;Tiere  
32 |   links-to-other-terms:
33 |     #parent_code: name of the column that contains the identifier of the other term
34 |     parent_code:
35 |       # property: the current proposal here does not handle relative URIs, but request an existing property
36 |       # to be adapted if needed
37 |       property: http://www.w3.org/2004/02/skos/core#broader
38 | 
39 |   #mapping is optional, and is used to replace the entrys in the SharedDimension with URI's to different SharedDimensions
40 |   #there are currently 4 types additive, replace, regex and concept adapted from Cube.py
41 |   #anytime something is added to mapping, it should also be added to other-fields, and given the appropriate datatype and URI
42 |   #if made into an URI, make sure the entries are valid as URI's so no empty spaces
43 |   mapping:
44 |     #this is the name of the column in which the entries are to be replaced
45 |     location:
46 |       #type addtitive, adds whatever is in the entry at the end of the "base" so , "base" + "entry"
47 |       type: additive
48 |       base: https://www.wikidata.org/wiki/
49 |     wikidata:
50 |       #type replace, replaces the entry fully with the given replacement, all entries are changed to the same thing, nondynamically
51 |       type: replace
52 |       replacement: https://www.wikidata.org/wiki/Q1
53 |     concept:
54 |       #type concept, the entry is changed to the URI given
55 |       #Columns can be placed in {} and when done so, those places in the URI will dynamically be appropriated as the given entries inside the Column
56 |       #if started with "/" it will use the cubes base_URL and add concept inside ex: /parent/{parent}, will give baseURI + /parent + /(entry in the column of parent)
57 |       type: concept
58 |       replacement-automated: http://the_cube_uri/concept/{concept}/{code}
59 | 
60 |     #TODO Regex example
61 |   
62 |   # other-fields are optional, URI could be relative (and concatenated to the SD's URI) or a full URI starting with 'http/https'
63 |   other-fields:
64 |     wikidata:
65 |       URI: http://schema.org/isPartOf
66 |       datatype: URI
67 |     concept:
68 |       URI: /partoftest
69 |       datatype: URI
70 |     parent_code:
71 |       URI: /parent_identifier_example
72 |       datatype: string
73 |     location:
74 |       URI: http://schema.org/isPartOf 
75 |       datatype: URI
76 | 


--------------------------------------------------------------------------------
/pylindas/shared_dimension_queries/README.md:
--------------------------------------------------------------------------------
 1 | # Shared dimensions queries
 2 | The goal of [shared_dimensions_queries.py](shared_dimensions_queries.py) is to become a tool for developers to find a useful shared dimension,
 3 | then get the URLs of the terms in order to configure the mapping for a cube's dimension.
 4 | 
 5 | This is a first implementation of:
 6 | - Basic queries to request shared dimensions information from LINDAS
 7 | - Display the results, line by line
 8 | 
 9 | ## Example
10 | See an example usage in [example_sd.py](example_sd.py)
11 | 
12 | List all the shared dimensions for a specific LINDAS environment and print them line by line: 
13 |  ```
14 |  result=list_shared_dimensions("INT")
15 |  list_shared_dimensions_print(result)
16 |  ```
17 | 
18 | The result is ordered alphabetically.
19 | 
20 | list_shared_dimensions() has a number of optional parameters, with default values except for the environment:
21 | - environment: LINDAS environment, one of `TEST`, `INT`, `PROD`
22 | - name_lng: the language of the label of the shared dimensions to retrieve (default "en")  
23 | Note: a shared dimension with no label in that language will not be listed (no fall-back handled yet)
24 | - offset/limit: stardard possibility to page through the result with offset/limit (default to 0)  
25 | OFFSET: "skip this many rows from the total result set", 0 to skip no row and begin from start  
26 | LIMIT: "only give me this many rows (starting after any OFFSET)"  
27 | a limit of 0 = no limit, display all results starting from offset (LIMIT will not be added to the query)
28 | - search_word: to limit the results to labels containing a specific word (default "" -> ignored)
29 | 
30 | List 10 Shared Dimensions that contains "Canton" in the french name
31 |  ```
32 |     result = list_shared_dimensions("INT", "fr", 0, 10, "Canton")
33 |  ```
34 | 
35 | As the goal is to observe the URLs of the terms in a shared dimension, URL that will be used to define the mappings, a feature of list_shared_dimensions_print() is to print 2-3 terms for each listed shared dimension.  
36 | To do this, pass a second 'environment' parameter to the function
37 | ```
38 |  list_shared_dimensions_print(result, "INT")
39 | ```
40 | This environment should of course match the one used for `list_shared_dimensions()`. While displaying each shared dimension, LINDAS environment will be queried to get 2 terms.
41 | Example result:
42 | ```
43 | Cantons <https://ld.admin.ch/dimension/canton> - validFrom 2021-01-01T00:00:00Z 
44 | { Terms sample:
45 | Aargau <https://ld.admin.ch/canton/19> 
46 | Appenzell Ausserrhoden <https://ld.admin.ch/canton/15> 
47 | }
48 | Cantons NFI <https://ld.admin.ch/dimension/bgdi/biota/cantonregions>  
49 | { Terms sample:
50 | Aargau <https://ld.admin.ch/dimension/bgdi/biota/cantonregions/19> 
51 | Appenzell Ausserrhoden <https://ld.admin.ch/dimension/bgdi/biota/cantonregions/15> 
52 | }
53 | ```
54 | Note: `list_shared_dimensions()` will also display validFrom and validTo values, when available, as some shared dimensions could be deprecated.
55 | 
56 | It is finally possible to list all the terms for a specific shared dimension.
57 | Here is an example to list the Cantons shared dimension's terms, in french:  
58 | ```
59 | result = list_shared_dimension_terms("INT", "https://ld.admin.ch/dimension/canton", "fr")
60 | print_sparql_result(result, ["name", "sdTerm"])
61 | ```
62 | `print_sparql_result()` prints line by line the JSON result of a SPARQL query, printing the specific expected fields of the query.  
63 | 
64 | ## Next steps
65 | All of this is a first proposal, and should be further improved according to the developers needs.  
66 | 
67 | It is not yet a class with methods, and contains code that could be more generic.  
68 | For instance, query_lindas could be a very generic function as the one found in /lindas/query.py  
69 | To be noted that the existing query_lindas() is specific for ASK queries (returns a bool value), and is maybe "wrongly" named currently.
70 | 
71 | A class could be created, passing for instance the environment in the constructor. Thus avoiding to pass the environment parameter to the different queries.   
72 | Furthermore, the environment could be coming from a configuration file (or environment variables), to avoid hard-coding them.
73 | 


--------------------------------------------------------------------------------
/example/Cubes/greenhouse_limit/description.yml:
--------------------------------------------------------------------------------
  1 | Name:
  2 |   de: Treibhausgasinventar
  3 |   en: Greenhouse Gas Inventory
  4 |   fr: Emissions de gaz a effet de serre
  5 |   it: Emissioni di gas di effetto serre
  6 | Description:
  7 |   de: Ein Beispiel Cube mit einem Zielwert mit Anfangs- und Endpunkt
  8 |   en: An example cube with a target value with start and end
  9 |   fr: Un exemple de Cube avec une valeur cible avec une date de debut et une date de fin
 10 |   it: Un esempio di Cube con un valore obiettivo con una data di inizio e una data di fine
 11 | Publisher:
 12 |   - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu
 13 | Creator:
 14 |   - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu
 15 | Contributor:
 16 |   - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu
 17 |     Name: Bundesamt für Umwelt
 18 | Date Created:
 19 |   2025-03-27
 20 | Contact Point:
 21 |   E-Mail: marco.kronenberg@bafu.admin.ch
 22 |   Name: Bundesamt für Umwelt
 23 | Base-URI: https://environment.ld.admin.ch/foen/test_target/
 24 | Identifier: target_timespan
 25 | Version: 1
 26 | Work Status:
 27 |   Draft
 28 | Visualize:
 29 |   True
 30 | # Optional but recommended
 31 | Accrual Periodicity: yearly
 32 | 
 33 | Namespace: limit_timespan
 34 | 
 35 | dimensions:
 36 |   Jahr:
 37 |     name:
 38 |       de: Jahr
 39 |       fr: An
 40 |       it: Anno
 41 |       en: Year
 42 |     description:
 43 |       de: Jahr der Erhebung
 44 |     dimension-type: Key Dimension
 45 |     datatype: URI
 46 |     scale-type: ordinal
 47 |     path: year
 48 |     data-kind:
 49 |       type: temporal
 50 |       unit: year
 51 |     mapping:
 52 |       type: additive
 53 |       base: https://ld.admin.ch/time/year/
 54 | 
 55 |   THG-Emissionen ohne die Treibhausgasbilanz der Landnutzung:
 56 |     name:
 57 |       de: THG-Emissionen ohne die Treibhausgasbilanz der Landnutzung
 58 |       en: GHG emissions without the greenhouse gas balance of land use
 59 |       fr: Emissions de GES sans le bilan des émissions des gaz à effet de serre lié à l'utilisation des terres
 60 |       it: Emissioni di gas serra senza il bilancio dei gas serra da uso del territorio
 61 |     description:
 62 |       de: THG-Emissionen ohne die Treibhausgasbilanz der Landnutzung
 63 |       en: GHG emissions without the greenhouse gas balance of land use
 64 |       fr: Emissions de GES sans le bilan des émissions des gaz à effet de serre lié à l'utilisation des terres
 65 |       it: Emissioni di gas serra senza il bilancio dei gas serra da uso del territorio
 66 |     dimension-type: Measure Dimension
 67 |     datatype: float
 68 |     scale-type: ratio
 69 |     path: ghgEmission
 70 |     unit: MegaTONNE
 71 | 
 72 |   THG-Emissionen mit der Treibhausgasbilanz der Landnutzung:
 73 |     name:
 74 |       de: THG-Emissionen mit der Treibhausgasbilanz der Landnutzung
 75 |       en: GHG emissions with the greenhouse gas balance of land use
 76 |       fr: Emissions de GES avec le bilan des gaz à effet de serre lié à l'utilisation des terres
 77 |       it: Emissioni di gas serra senza con il bilancio dei gas serra da uso del territorio
 78 |     description:
 79 |       de: THG-Emissionen mit der Treibhausgasbilanz der Landnutzung
 80 |       en: GHG emissions with the greenhouse gas balance of land use
 81 |       fr: Emissions de GES avec le bilan des gaz à effet de serre lié à l'utilisation des terres
 82 |       it: Emissioni di gas serra senza con il bilancio dei gas serra da uso del territorio
 83 |     dimension-type: Measure Dimension
 84 |     datatype: float
 85 |     scale-type: ratio
 86 |     path: ghgEmissionLanduse
 87 |     unit: MegaTONNE
 88 |     annotation:
 89 |       - type: limit
 90 |         value: 26.28871925
 91 |         name:
 92 |           de: Ziel 2030 gemäss Übereinkommen von Paris
 93 |           en: Target according to Paris Agreement for 2030
 94 |           fr: Objectif selon l'accord de Paris pour 2030
 95 |           it: Obiettivo secondo l'Accordo di Parigi per 2030
 96 |         context:
 97 |           Jahr: 2030
 98 |       - type: limit
 99 |         value: 34.17533502
100 |         name:
101 |           de: Durchschnittsziel gemäss Übereinkommen von Paris für 2021-2030
102 |           en: Average target according to Paris Agreement for 2021-2030
103 |           fr: Objectif moyen selon l'accord de Paris pour 2021-2030
104 |           it: Obiettivo medio secondo l'Accordo di Parigi per 2021-2030
105 |         context:
106 |           Jahr:
107 |             min: 2021
108 |             max: 2030
109 | 
110 | 


--------------------------------------------------------------------------------
/example/Cubes/Population_Aargau/prepare.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | 
  3 | # Input file
  4 | csv_file = "./example/Cubes/Population_Aargau/data_raw.csv"
  5 | 
  6 | # Read the CSV file
  7 | df = pd.read_csv(csv_file, sep=",")
  8 | 
  9 | # Set display options to avoid silent downcasting warnings
 10 | pd.set_option('future.no_silent_downcasting', True)
 11 | 
 12 | # Rename "ALTER_xy" columns to "age_xy"
 13 | df.rename(columns=lambda x: x.replace("ALTER_", "age_") if x.startswith("ALTER_") else x, inplace=True)
 14 | 
 15 | # Rename column "TOTAL" to "all"
 16 | df.rename(columns={"TOTAL": "all"}, inplace=True)
 17 | 
 18 | # Create a new list with all the columns that start with "age_"
 19 | age_concept_ids = [col for col in df.columns if col.startswith("age_")]
 20 | 
 21 | # Create names for the age concepts by replacing "age_00_04" with "Age 00 to 04"
 22 | age_concept_names_en = ["Age " + col[4:6] + " to " + col[7:9] for col in age_concept_ids]
 23 | # Special case for the last age group
 24 | age_concept_names_en[-1] = "Age 90 and older"
 25 | 
 26 | age_concept_names_de = ["Alter " + col[4:6] + " bis " + col[7:9] for col in age_concept_ids]
 27 | # Special case for the last age group in German
 28 | age_concept_names_de[-1] = "Alter 90 und älter"
 29 | 
 30 | # Create descriptions for the age concepts
 31 | age_concept_descriptions_en = ["People with age " + col[4:6] + " to " + col[7:9] + " years" for col in age_concept_ids]
 32 | # Special case for the last age group
 33 | age_concept_descriptions_en[-1] = "People with age 90 years and older"
 34 | 
 35 | age_concept_descriptions_de = ["Personen mit Alter " + col[4:6] + " bis " + col[7:9] + " Jahre" for col in
 36 |                                age_concept_ids]
 37 | # Special case for the last age group in German
 38 | age_concept_descriptions_de[-1] = "Personen mit Alter 90 Jahre und älter"
 39 | 
 40 | # Add "all" to the list of age concepts
 41 | age_concept_ids.append("all")
 42 | # Add "All ages" for the "all" column
 43 | age_concept_names_en.append("Total population")
 44 | # Add description for the "all" column
 45 | age_concept_descriptions_en.append("Total population of all ages")
 46 | age_concept_names_de.append("Gesamtbevölkerung")
 47 | age_concept_descriptions_de.append("Gesamtbevölkerung aller Altersgruppen")
 48 | 
 49 | # Create a DataFrame for age concepts
 50 | age_concepts_df = pd.DataFrame({
 51 |     "ageID": age_concept_ids,
 52 |     "ageName_en": age_concept_names_en,
 53 |     "ageDescription_en": age_concept_descriptions_en,
 54 |     "ageName_de": age_concept_names_de,
 55 |     "ageDescription_de": age_concept_descriptions_de
 56 | })
 57 | 
 58 | # Save the age concepts DataFrame to a CSV file
 59 | age_concepts_df.to_csv("./example/Cubes/Population_Aargau/age.csv", index=False)
 60 | 
 61 | # Create a new date column from year, month, and day columns in Format YYYY-MM-DD
 62 | df.insert(1, "date",
 63 |           df["year"].astype(str) + "-" + df["month"].astype(str).str.zfill(2) + "-" + df["day"].astype(str).str.zfill(
 64 |               2))
 65 | 
 66 | 
 67 | # Create a new column for the region
 68 | def region(line):
 69 |     if line.locationType == "CANTON":
 70 |         return "C_" + str(line.bfsNr)
 71 |     elif line.locationType == "DISTRICT":
 72 |         return "D_" + str(line.bfsNr)
 73 |     elif line.locationType == "TOWNSHIP":
 74 |         return "M_" + str(line.bfsNr)
 75 | 
 76 | 
 77 | df.insert(0, "region", df.apply(region, axis=1))
 78 | 
 79 | # Keep only values for "region" = C_19, D_1901 and M_4001 through M_4013 (district Aarau and total of canton Aargau)
 80 | df = df[df["region"].isin(
 81 |     ["C_19", "D_1901", "M_4001", "M_4002", "M_4003", "M_4004", "M_4005",
 82 |      "M_4006", "M_4007", "M_4008", "M_4009", "M_4010", "M_4011", "M_4012", "M_4013"
 83 |     ]
 84 | )]
 85 | 
 86 | df = df[df["year"] > 2020]
 87 | 
 88 | # Drop unnecessary columns
 89 | df.drop(columns=["bfsNr", "year", "month", "day", "locationName", "locationType"], inplace=True)
 90 | 
 91 | 
 92 | # Melt data to long format
 93 | df = df.melt(id_vars=["region", "date"], var_name="group", value_name="number")
 94 | 
 95 | # Column number as integer
 96 | df["number"] = df["number"].astype(int)
 97 | 
 98 | # Add percentage column
 99 | all_df = df[df["group"] == "all"].rename(columns={"number": "all_number"})
100 | df = df.merge(all_df[["region", "date", "all_number"]], on=["region", "date"], how="left")
101 | df["percentage"] = round(df["number"] / df["all_number"] * 100, 4)
102 | df.drop(columns=["all_number"], inplace=True)
103 | 
104 | # Save to CSV
105 | df.to_csv("./example/Cubes/Population_Aargau/data.csv", index=False)
106 | print("Saved extracted data to data.csv")
107 | 


--------------------------------------------------------------------------------
/example/Cubes/corona/description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "../../linpy/description.schema.json",
 3 |   "Name": {
 4 |     "de": "Corona-Zahlen Zeitverlauf",
 5 |     "en": "Corona Numbers Timeline"
 6 |   },
 7 |   "Description": {
 8 |     "de": "Zeitreihe der Corona-Zahlen für Schleswig-Holstein\r\n\r\nEnthalten sind folgende Felder:\r\n\r\n- `Datum` - Datum im Format TT.MM.JJJJ\r\n- `Gemeldete Fälle`\r\n- `Hospitalisierungen`\r\n- `Verstorben`\r\n\r\nFeldtrenner ist Komma, Zeichenketten-Trenner ist doppeltes Anführungszeichen (\")\r\n\r\nBasis für die hier veröffentlichten Daten sind die Zahlen, die die Kreise und kreisfreien Städte auf dem offiziellen Meldeweg der Landesmeldestelle mitteilen. Da die Datenerfassung und Übermittlung Zeit benötigt, können Abweichungen von den vor Ort kommunizierten Fällen entstehen. Im Einzelfall kann es auch zu einer Reduzierung der gemeldeten Fälle kommen, zum Beispiel wenn sich eine Meldung nicht bestätigt hat oder der Wohnort der Person außerhalb des Kreises liegt.\r\n\r\n[Mehr Daten und Diagramme zu Corona in Schleswig-Holstein](https://www.schleswig-holstein.de/DE/Schwerpunkte/Coronavirus/Zahlen/zahlen_node.html)",
 9 |     "en": "Time series of Corona numbers for Schleswig-Holstein\n\nThe following fields are included: \n\n— ‘date’ — date in format dd.mm.yyyy\n— ‘reported cases’\n— ‘hospitalisations’\n— ‘deceased’\n\nField separator is comma, string separator is double quote (\")\n\nThe data published here are based on the figures provided by the districts and non-circular cities by the official means of reporting to the Land Registration Office. As data collection and transmission takes time, deviations from the cases communicated on the spot may arise. In individual cases, there may also be a reduction in the reported cases, for example if a report has not been confirmed or if the person’s place of residence is outside the circle. \n \n[More data and charts on Corona in Schleswig-Holstein](https://www.schleswig-holstein.de/DE/Schwerpunkte/Coronavirus/Zahlen/zahlen_node.html)"
10 |   },
11 |   "Creator": [
12 |     {
13 |       "IRI": "https://opendata.schleswig-holstein.de/organization/94498aaa-9bd0-4ac0-ae70-db319a3ca1d8"
14 |     }
15 |   ],
16 |   "Publisher": [
17 |     {
18 |       "IRI": "https://opendata.schleswig-holstein.de/organization/94498aaa-9bd0-4ac0-ae70-db319a3ca1d8"
19 |     }
20 |   ],
21 |   "Contributor": [
22 |     {
23 |       "IRI": "https://schleswig-holstein.de/opendata/",
24 |       "Name": "Schleswig Holstein Open Data"
25 |     }
26 |   ],
27 |   "Date Created": "2023-10-05T00:00:00Z",
28 |   "Contact Point": {
29 |     "E-Mail": "info@schleswig-holstein.de",
30 |     "Name": "Landesmeldestelle Schleswig-Holstein"
31 |   },
32 |   "Base-URI": "https://phpefi.schleswig-holstein.de/corona/data202011/cvd_sh_verlauf.csv",
33 |   "Identifier": "cvd_sh_verlauf",
34 |   "Version": 1.0,
35 |   "Work Status": "Published",
36 |   "Visualize": true,
37 |   "Accrual Periodicity": "daily",
38 |   "Namespace": "https://phpefi.schleswig-holstein.de/corona",
39 |   "dimensions": {
40 |     "Datum": {
41 |       "name": {
42 |         "de": "Datum",
43 |         "en": "Date"
44 |       },
45 |       "dimension-type": "Key Dimension",
46 |       "scale-type": "ordinal",
47 |       "path": "date",
48 |       "description": {
49 |         "en": "Date in format dd.mm.yyyy",
50 |         "de": "Datum im Format TT.MM.JJJJ"
51 |       },
52 |       "mapping": {
53 |         "type": "regex",
54 |         "pattern": "(?P<day>[0-9]{2})\\.(?P<month>[0-9]{2})\\.(?P<year>[0-9]{4})",
55 |         "replacement": "\\g<day>/\\g<month>/\\g<year>",
56 |         "value-type": "Literal"
57 |       }
58 |     },
59 |     "Gemeldete Fälle": {
60 |       "name": {
61 |         "de": "Gemeldete Fälle",
62 |         "en": "Reported Cases"
63 |       },
64 |       "dimension-type": "Measure Dimension",
65 |       "scale-type": "ratio",
66 |       "path": "reported_cases",
67 |       "description": {
68 |         "en": "Number of reported cases"
69 |       }
70 |     },
71 |     "Hospitalisierungen": {
72 |       "name": {
73 |         "de": "Hospitalisierungen",
74 |         "en": "Hospitalisations"
75 |       },
76 |       "dimension-type": "Measure Dimension",
77 |       "scale-type": "ratio",
78 |       "path": "hospitalisations",
79 |       "description": {
80 |         "en": "Number of hospitalisations"
81 |       }
82 |     },
83 |     "Verstorben": {
84 |       "name": {
85 |         "de": "Verstorben",
86 |         "en": "Deceased"
87 |       },
88 |       "dimension-type": "Measure Dimension",
89 |       "scale-type": "ratio",
90 |       "path": "deceased",
91 |       "description": {
92 |         "en": "Number of deceased"
93 |       }
94 |     }
95 |   }
96 | }
97 | 


--------------------------------------------------------------------------------
/example/Cubes/Population_Aargau/description.yml:
--------------------------------------------------------------------------------
  1 | Name:
  2 |   de: Bevölkerung - Bevölkerungsbestand - Altersaufbau
  3 |   en: Population - Population Stock - Age Structure
  4 | Description:
  5 |   de: Die kantonale Bevölkerungsstatistik erfasst die ständige Wohnbevölkerung an ihrem Hauptwohnsitz. Das heisst, dass beispielsweise Wochenaufenthalter nicht dort gezählt werden, wo sie sich während der Woche aufhalten, sondern dort wo sie angemeldet sind. Nicht enthalten sind Kurzaufenthalter mit einer Aufenthaltsbewilligung von unter 12 Monaten. Sofern für den 31.12. eines Jahres und für den 01.01. des Folgejahrs Daten vorliegen, sind diese grundsätzlich identisch. Wenn aber per 01.01. des Folgejahrs eine Fusion zweier Gemeinden stattgefunden hat, ist der Gebietsstand unterschiedlich. Entsprechend werden die Daten per 01.01. des Folgejahrs für die fusionierte Gemeinde ausgewiesen.
  6 |   en: The cantonal population statistics record the permanent resident population at their main residence. This means that, for example, weekly commuters are not counted where they stay during the week, but where they are registered. Short-term residents with a residence permit of less than 12 months are not included. If data is available for December 31 of one year and January 1 of the following year, these are generally identical. However, if a merger of two municipalities has taken place on January 1 of the following year, the territorial status is different. Accordingly, the data for January 1 of the following year is shown for the merged municipality.
  7 | Publisher: 
  8 |   - IRI: https://www.ag.ch/de/verwaltung/dfr/statistik
  9 | Creator:
 10 |   - IRI: https://www.ag.ch/de/verwaltung/dfr/statistik
 11 | Contributor:
 12 |   - IRI: https://www.ag.ch/de/verwaltung/dfr/statistik
 13 | Date Created:
 14 |   2025-01-01
 15 | Contact Point:
 16 |   E-Mail: statistik@ag.ch
 17 |   Name: Statistik Aargau
 18 | Base-URI: https://ld.admin.ch/bfh/
 19 | Identifier: poc_ag
 20 | Version: 1
 21 | Work Status: 
 22 |   Draft
 23 | Visualize:
 24 |   True
 25 | # Optional but recommended
 26 | Accrual Periodicity: irregular
 27 | 
 28 | # Optional
 29 | Namespace: pocag
 30 | 
 31 | dimensions:
 32 |   # required
 33 |   region:
 34 |     name:
 35 |       de: Region
 36 |       en: Region
 37 |     description:
 38 |       de: Region
 39 |       en: Region
 40 |     dimension-type: Key Dimension
 41 |     datatype: URI
 42 |     scale-type: nominal
 43 |     path: region
 44 |     mapping:
 45 |       type: function
 46 |       filepath: example/Cubes/Population_Aargau/func.py
 47 |       function-name: replace_with_shared_dimension
 48 |     data-kind:
 49 |       type: spatial-shape
 50 |     hierarchy:
 51 |       - root: https://ld.admin.ch/canton/19
 52 |         name: AG - District - Municipality
 53 |         next-in-hierarchy:
 54 |           name: District
 55 |           path: http://schema.org/hasPart
 56 |           next-in-hierarchy:
 57 |             name: Municipality
 58 |             path: http://schema.org/hasPart
 59 | 
 60 |   date:
 61 |     name:
 62 |       de: Datum
 63 |       en: Date
 64 |     description:
 65 |       de: Datum der Erhebung
 66 |       en: Date of the survey
 67 |     dimension-type: Key Dimension
 68 |     scale-type: interval
 69 |     datatype: date
 70 |     path: date
 71 |     data-kind: 
 72 |       type: temporal
 73 |       unit: day
 74 |   
 75 |   group:
 76 |     name:
 77 |       de: Altersgruppe
 78 |       en: Age Group
 79 |     description:
 80 |       de: Altersgruppe
 81 |       en: Age Group
 82 |     dimension-type: Key Dimension
 83 |     scale-type: nominal
 84 |     datatype: URI
 85 |     path: ageGroup
 86 |     mapping:
 87 |       type: concept
 88 |       replacement-automated: /age_group/{group}
 89 | 
 90 |   number:
 91 |     name:
 92 |       de: Anzahl
 93 |       en: Number
 94 |     description:
 95 |       de: Anzahl
 96 |       en: Number
 97 |     dimension-type: Measure Dimension
 98 |     scale-type: ratio
 99 |     datatype: integer
100 |     path: number
101 |     unit: NUM
102 | 
103 |   percentage:
104 |     name:
105 |       de: Anteil
106 |       en: Percentage
107 |     description:
108 |       de: Anteil der Altersgruppe an der Gesamtbevölkerung
109 |       en: Percentage of the age group in the total population
110 |     dimension-type: Measure Dimension
111 |     scale-type: ratio
112 |     datatype: float
113 |     path: percentage
114 |     unit: PERCENT
115 | 
116 | Concepts:
117 |   age-group:
118 |     URI: /age_group/{ageID}
119 |     name-field: ageName
120 |     multilingual: true
121 |     other-fields:
122 |       ageDescription:
123 |         URI: http://schema.org/description
124 |         multilingual: true
125 |         datatype: string
126 | 


--------------------------------------------------------------------------------
/tests/test.yml:
--------------------------------------------------------------------------------
  1 | Name:
  2 |   de: Mock Cube - two sided error
  3 |   fr: Mock Cube - two sided error
  4 |   it: Mock Cube - two sided error
  5 |   en: Mock Cube - two sided error
  6 | Description:
  7 |   de: Ein Beispiel Cube, der simulierte Daten enthält mit zweiseitiger Unsicherheit
  8 |   en: An example Cube containing some simulated data with two-sided uncertainty
  9 | Publisher:
 10 |   - IRI: https://register.ld.admin.ch/opendataswiss/org/office_of_Mock
 11 | Creator:
 12 |   - IRI: https://register.ld.admin.ch/opendataswiss/org/office_of_Mock
 13 | Contributor:
 14 |   - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu
 15 |     Name: Bundesamt für Mock Data
 16 | Date Created:
 17 |   2024-11-12
 18 | Contact Point:
 19 |   E-Mail: contact@mock.ld.admin.ch
 20 |   Name: Bundesamt für Mock Data
 21 | Base-URI: https://mock.ld.admin.ch/
 22 | Identifier: mock-two-sided
 23 | Version: 1
 24 | Work Status:
 25 |   Published
 26 | Visualize:
 27 |   True
 28 | Opendataswiss:
 29 |   True
 30 | # Optional but recommended
 31 | Accrual Periodicity: yearly
 32 | 
 33 | # Optional
 34 | Namespace: mock
 35 | 
 36 | dimensions:
 37 |   # required
 38 |   Jahr:
 39 |     name:
 40 |       de: Jahr
 41 |       fr: An
 42 |       it: Anno
 43 |       en: Year
 44 |     description:
 45 |       de: Jahr der Erhebung
 46 |     dimension-type: Key Dimension
 47 |     datatype: URI
 48 |     scale-type: ordinal
 49 |     path: year
 50 |     data-kind:
 51 |       type: temporal
 52 |       unit: year
 53 |     mapping:
 54 |       type: additive
 55 |       base: https://ld.admin.ch/time/year/
 56 | 
 57 |   Station:
 58 |     name:
 59 |       de: Station
 60 |       fr: Station
 61 |       it: Stazione
 62 |       en: Station
 63 |     description:
 64 |       de: Station der Untersuchung
 65 |     dimension-type: Key Dimension
 66 |     datatype: URI
 67 |     scale-type: nominal
 68 |     path: station
 69 |     hierarchy:
 70 |       - root: Schweiz
 71 |         name: Schweiz
 72 |         next-in-hierarchy:
 73 |           path: http://schema.org/hasPart
 74 |           name: Stationen
 75 |     mapping:
 76 |       type: replace
 77 |       replacements:
 78 |         Bern: https://mock.ld.admin.ch/station/01
 79 |         Zürich: https://mock.ld.admin.ch/station/02
 80 |         Schweiz: https://mock.ld.admin.ch/station/switzerland
 81 | 
 82 |   Wert:
 83 |     name:
 84 |       de: Wert
 85 |       fr: Valeur
 86 |       it: Valore
 87 |       en: Value
 88 |     description:
 89 |       de: Gemessener Wert an der Station
 90 |     dimension-type: Measure Dimension
 91 |     datatype: float
 92 |     scale-type: interval
 93 |     path: value
 94 |     unit: KiloGM
 95 | 
 96 |   UpperUnsicherheit:
 97 |     name:
 98 |       de: Upper Unsicherheit
 99 |     description:
100 |       de: Upper Unsicherheit
101 |     dimension-type: Upper uncertainty
102 |     datatype: float
103 |     relates-to: value
104 |     scale-type: ratio
105 |     path: upperUncertainty
106 |     unit: PERCENT
107 | 
108 |   LowerUnsicherheit:
109 |     name:
110 |       de: Lower Unsicherheit
111 |     description:
112 |       de: Lower Unsicherheit
113 |     dimension-type: Lower uncertainty
114 |     datatype: float
115 |     relates-to: value
116 |     scale-type: ratio
117 |     path: lowerUncertainty
118 |     unit: PERCENT
119 | 
120 |   Wert2:
121 |     name:
122 |       de: Wert2
123 |       fr: Valeur2
124 |       it: Valore2
125 |       en: Value2
126 |     description:
127 |       de: Gemessener Wert 2 an der Station
128 |     dimension-type: Measure Dimension
129 |     scale-type: ratio
130 |     datatype: float
131 |     path: value2
132 |     unit: KiloGM
133 |     annotation: 
134 |       - type: limit
135 |         value: 11
136 |         name: 
137 |           de: Richtwert 2020 in Zürich
138 |         context:
139 |           Jahr: 2020
140 |           Station: Zürich
141 |       - type: limit-range
142 |         min-value: 9
143 |         max-value: 13
144 |         name:
145 |           de: Zielwert für 2021 in Zürich
146 |         context:
147 |           Jahr: 2021
148 |           Station: Zürich
149 | 
150 |   Standardfehler:
151 |     name:
152 |       de: Standardfehler für Wert2
153 |       fr: erreur standard pour Value2
154 |       it: errore 
155 |       en: standard error for Value2
156 |     description:
157 |       de: Standardfehler der Schätzung Wert2
158 |     dimension-type: Standard Error
159 |     datatype: float
160 |     relates-to: value2
161 |     scale-type: ratio
162 |     path: standardError
163 |     unit: PERCENT
164 | 
165 |   Status:
166 |     name:
167 |       de: Veröffentlichungsstatus
168 |       fr: Statut de publication
169 |       it: Stato di pubblicazione
170 |       en: State of publication
171 |     description:
172 |       de: Status der Veröffentlichung, provisorisch oder final
173 |     dimension-type: Annotation
174 |     scale-type: nominal
175 |     path: status
176 | 


--------------------------------------------------------------------------------
/example/Cubes/co2-limits/description.yml:
--------------------------------------------------------------------------------
  1 | Name:
  2 |   de: CO2-Emissionen nach Energieträger
  3 |   en: CO2 Emissions per energy source
  4 |   fr: CO2-Emissions par source d'energie
  5 |   it: Emissioni di CO2 per fonte di energia
  6 | Description:
  7 |   de: Ein Beispiel Cube mit einem Zielwert
  8 |   en: An example Cube with a target value
  9 |   fr: Un exemple de Cube avec une valeur cible
 10 |   it: Un esempio di Cube con un valore obiettivo
 11 | Publisher:
 12 |   - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu
 13 | Creator:
 14 |   - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu
 15 | Contributor:
 16 |   - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu
 17 |     Name: Bundesamt für Umwelt
 18 | Date Created:
 19 |   2024-11-26
 20 | Contact Point:
 21 |   E-Mail: marco.kronenberg@bafu.admin.ch
 22 |   Name: Bundesamt für Umwelt
 23 | Base-URI: https://environment.ld.admin.ch/foen/test_target/
 24 | Identifier: target_1
 25 | Version: 1
 26 | Work Status:
 27 |   Draft
 28 | Visualize:
 29 |   True
 30 | # Optional but recommended
 31 | Accrual Periodicity: yearly
 32 | 
 33 | Namespace: limit_1
 34 | 
 35 | dimensions:
 36 |   Jahr:
 37 |     name:
 38 |       de: Jahr
 39 |       fr: An
 40 |       it: Anno
 41 |       en: Year
 42 |     description:
 43 |       de: Jahr der Erhebung
 44 |     dimension-type: Key Dimension
 45 |     datatype: URI
 46 |     scale-type: ordinal
 47 |     path: year
 48 |     data-kind: temporal
 49 |     mapping:
 50 |       type: additive
 51 |       base: https://ld.admin.ch/time/year/
 52 | 
 53 |   Energieträger:
 54 |     name:
 55 |       de: Energieträger
 56 |       fr: Source d'energie
 57 |       it: Fonte di energia
 58 |       en: Energy source
 59 |     description:
 60 |       de: Energieträger der Objekte
 61 |       fr: Source d'energie
 62 |       it: Fonte di energia
 63 |       en: Energy source
 64 |     dimension-type: Key Dimension
 65 |     datatype: URI
 66 |     scale-type: nominal
 67 |     path: energySource
 68 |     mapping:
 69 |       type: replace
 70 |       replacements:
 71 |         Brennstoffe: https://mock.ld.admin.ch/energySource/01
 72 |         Treibstoffe: https://mock.ld.admin.ch/energySource/02
 73 | 
 74 |   Nicht gerundeter Wert (Mt):
 75 |     name:
 76 |       de: CO2-Emissionen
 77 |       en: CO2 Emissions
 78 |       fr: CO2-Emissions
 79 |       it: Emissioni di CO2
 80 |     description:
 81 |       de: Emissionen von CO2 in Millionen Tonnen
 82 |       en: Emissions of CO2 in millions of tonnes
 83 |       fr: Emissions de CO2 en millions de tonnes
 84 |       it: Emissioni di CO2 in milioni di tonnellate
 85 |     dimension-type: Measure Dimension
 86 |     datatype: float
 87 |     scale-type: ratio
 88 |     path: co2Emissions
 89 |     unit: MegaTONNE
 90 |     annotation:
 91 |       - type: limit-range
 92 |         min-value: 1.708845e+01
 93 |         max-value: 1.779072e+01
 94 |         name:
 95 |           de: Richtwertbande 2016 für Brennstoffe
 96 |           en: target value range for thermal fuel 2016
 97 |           fr: bande de valeurs cibles pour le combustible thermique 2016
 98 |           it: bande di valori obiettivo per il combustibile termico 2016
 99 |         context:
100 |           Jahr: 2016
101 |           Energieträger: Brennstoffe
102 |       - type: limit
103 |         value: 1.568392e+01
104 |         name:
105 |           de: Richtwert 2021 für Brennstoffe
106 |           en: target value for thermal fuel 2021
107 |           fr: valeur cible pour le combustible thermique 2021
108 |           it: valore obiettivo per il combustibile termico 2021
109 |         context:
110 |           Jahr: 2021
111 |           Energieträger: Brennstoffe
112 |       - type: limit
113 |         value: 1.849298e+01
114 |         name:
115 |           de: Richtwert 2012 für Brennstoffe
116 |           en: target value for thermal fuel 2012
117 |           fr: valeur cible pour le combustible thermique 2012
118 |           it: valore obiettivo per il combustibile termico 2012
119 |         context:
120 |           Jahr: 2012
121 |           Energieträger: Brennstoffe
122 |       - type: limit-range
123 |         min-value: 1.779072e+01
124 |         max-value: 1.82589e+01
125 |         name:
126 |           de: Richtwertbande 2014 für Brennstoffe
127 |           en: target value range for thermal fuel 2014
128 |           fr: bande de valeurs cibles pour le combustible thermique 2014
129 |           it: bande di valori obiettivo per il combustibile termico 2014
130 |         context:
131 |           Jahr: 2014
132 |           Energieträger: Brennstoffe
133 | 
134 |   CO2-Emissionen (Mt):
135 |     name:
136 |       de: CO2-Emissionen (gerundet)
137 |       en: CO2 Emissions (rounded)
138 |       fr: CO2-Emissions (rounde)
139 |       it: Emissioni di CO2 (arrotondati)
140 |     description:
141 |       de: gerundete emissionen von CO2 in Millionen Tonnen
142 |       en: rounded emissions of CO2 in millions of tonnes
143 |       fr: arrondies emissions de CO2 en millions de tonnes
144 |       it: arrondite emissioni di CO2 in milioni di tonnellate
145 |     dimension-type: Measure Dimension
146 |     datatype: float
147 |     scale-type: ratio
148 |     path: roundedCO2Emissions
149 |     unit: MegaTONNE


--------------------------------------------------------------------------------
/example/Cubes/Biotope_Statistik/description.yml:
--------------------------------------------------------------------------------
  1 | Name:
  2 |   de: Biotope von nationaler Bedeutung
  3 |   fr: Biotopes d'importance nationale
  4 |   it: Biotope von nationaler Bedeutung
  5 |   en: Biotope von nationaler Bedeutung
  6 | Description:
  7 |   de: Die fünf Biotopinventare Hoch- und Flachmoore, Auen, Amphibienlaichgebiete und Trockenwiesen/-weiden im Überblick
  8 |   fr: Vue d’ensemble des cinq inventaires de biotopes - hauts-marais, bas-marais, zones alluviales, sites de reproduction de batraciens ainsi que prairies et pâturages secs
  9 |   it: Vue d’ensemble des cinq inventaires de biotopes - hauts-marais, bas-marais, zones alluviales, sites de reproduction de batraciens ainsi que prairies et pâturages secs
 10 |   en: Die fünf Biotopinventare Hoch- und Flachmoore, Auen, Amphibienlaichgebiete und Trockenwiesen/-weiden im Überblick
 11 | Publisher:
 12 |   - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu
 13 | Creator:
 14 |   - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu
 15 | Contributor:
 16 |   - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu
 17 |     Name: Bundesamt für Umwelt
 18 | Date Created:
 19 |   2024-01-16
 20 | Contact Point:
 21 |   E-Mail: info@bafu.admin.ch
 22 |   Name: Bundesamt für Umwelt
 23 | Base-URI: https://environment.ld.admin.ch/foen/biotopes
 24 | Identifier: UZ-2404
 25 | Version: 1
 26 | Work Status:
 27 |   Draft
 28 | Visualize:
 29 |   True
 30 | # Optional but recommended
 31 | Accrual Periodicity: irregular
 32 | 
 33 | Namespace: biotop
 34 | 
 35 | dimensions:
 36 |   Typ:
 37 |     name:
 38 |       de: Art des Biotops
 39 |       fr: Art des Biotops
 40 |       it: Art des Biotops
 41 |       en: Art des Biotops
 42 |     description:
 43 |       de: Die Art des Biotopes nach denen in der Biotopstatistik unterschieden wird
 44 |     dimension-type: Key Dimension
 45 |     datatype: URI
 46 |     scale-type: nominal
 47 |     path: type
 48 |     mapping:
 49 |       type: replace
 50 |       replacements:
 51 |         Hochmoore (Typen I + II): https://environment.ld.admin.ch/foen/biotopes/1
 52 |         Flachmoore: https://environment.ld.admin.ch/foen/biotopes/2
 53 |         Auengebiete: https://environment.ld.admin.ch/foen/biotopes/3
 54 |         Amphibienlaichgebiete: https://environment.ld.admin.ch/foen/biotopes/4
 55 |         Trockenwiesen und -weiden: https://environment.ld.admin.ch/foen/biotopes/5
 56 |         Biotope: https://environment.ld.admin.ch/foen/biotopes/tot
 57 |     hierarchy:
 58 |       - root: Biotope
 59 |         name: Biotope
 60 |         next-in-hierarchy:
 61 |           path: http://schema.org/hasPart
 62 |           name: Biotoparten
 63 | 
 64 |   Überlappung:
 65 |     name:
 66 |       de: Überlappung
 67 |       fr: Überlappung
 68 |       it: Überlappung
 69 |       en: Überlappung
 70 |     description:
 71 |       de: Die Fläche kann einzeln pro Objekt oder überlappend betrachtet werden.
 72 |     dimension-type: Key Dimension
 73 |     datatype: URI
 74 |     scale-type: nominal
 75 |     path: overlap
 76 |     mapping:
 77 |       type: replace
 78 |       replacements:
 79 |         Mit Überlappung: https://environment.ld.admin.ch/foen/overlapping/1
 80 |         Ohne Überlappung: https://environment.ld.admin.ch/foen/overlapping/2
 81 | 
 82 |   Anzahl:
 83 |     name:
 84 |       de: Anzahl Objekte
 85 |       fr: Anzahl Objekte
 86 |       it: Anzahl Objekte
 87 |       en: Anzahl Objekte
 88 |     description:
 89 |       de: Anzahl der Objekte
 90 |     datatype: integer
 91 |     dimension-type: Measure Dimension
 92 |     scale-type: ratio
 93 |     path: count
 94 |     unit: UNITLESS
 95 | 
 96 |   Anteil der CH-Biotope:
 97 |     name:
 98 |       de: Anteil der CH-Biotope
 99 |       fr: Anteil der CH-Biotope
100 |       it: Anteil der CH-Biotope
101 |       en: Anteil der CH-Biotope
102 |     description:
103 |       de: Anteil der Objekte an Gesamtmenge der Biotope
104 |     dimension-type: Measure Dimension
105 |     datatype: float
106 |     scale-type: ratio
107 |     path: ratio_of_objects
108 |     unit: PERCENT
109 | 
110 |   Fläche:
111 |     name:
112 |       de: Fläche der Objekte
113 |       fr: Fläche der Objekte
114 |       it: Fläche der Objekte
115 |       en: Fläche der Objekte
116 |     description:
117 |       de: Summe der Flächen der Objekte
118 |     dimension-type: Measure Dimension
119 |     datatype: float
120 |     scale-type: ratio
121 |     path: area
122 |     unit: HA
123 | 
124 |   Anteil CH-Fläche:
125 |     name:
126 |       de: Anteil CH-Fläche
127 |       fr: Anteil CH-Fläche
128 |       it: Anteil CH-Fläche
129 |       en: Anteil CH-Fläche
130 |     description:
131 |       de: Anteil der Objekte an der Gesamtfläche der Schweiz
132 |     dimension-type: Measure Dimension
133 |     datatype: float
134 |     scale-type: ratio
135 |     path: area_of_switzerland
136 |     unit: PERCENT
137 | 
138 |   Anteil der CH-Biotope (Fläche):
139 |     name:
140 |       de: Anteil der CH-Biotope (Fläche)
141 |       fr: Anteil der CH-Biotope (Fläche)
142 |       it: Anteil der CH-Biotope (Fläche)
143 |       en: Anteil der CH-Biotope (Fläche)
144 |     description:
145 |       de: Anteil der Objekte an der Gesamtfläche aller Objekte in der Biotopstatistik
146 |     dimension-type: Measure Dimension
147 |     datatype: float
148 |     scale-type: ratio
149 |     path: area_of_biotopes
150 |     unit: PERCENT


--------------------------------------------------------------------------------
/example/Cubes/concept_table_airport/description.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | "$schema": "../../linpy/description.schema.json"
  3 | Name:
  4 |   en: Example with a concept table
  5 |   fr: Exemple avec une table de concept
  6 |   de: Example with a concept table (DE)
  7 |   it: Example with a concept table (IT)
  8 | Description:
  9 |   fr: Un jeu de données avec deux csv, un pour une table de concept
 10 |   en: A dataset containing two csv, one for a concept table
 11 | Publisher:
 12 | - IRI: https://register.ld.admin.ch/opendataswiss/org/office_of_Mock
 13 | Creator:
 14 | - IRI: https://register.ld.admin.ch/opendataswiss/org/office_of_Mock
 15 | Contributor:
 16 | - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu
 17 |   Name: Bundesamt für Mock Data
 18 | Date Created: '2024-08-26T00:00:00.000Z'
 19 | Contact Point:
 20 |   E-Mail: contact@mock.ld.admin.ch
 21 |   Name: Bundesamt für Mock Data
 22 | Base-URI: https://mock-concept.ld.admin.ch/
 23 | Identifier: mock-concept
 24 | Version: 1
 25 | Work Status: Draft
 26 | Visualize: true
 27 | Accrual Periodicity: yearly
 28 | Namespace: mock
 29 | dimensions:
 30 |   year:
 31 |     name:
 32 |       de: Jahr
 33 |       fr: Année
 34 |       it: Anno
 35 |       en: Year
 36 |     description:
 37 |       de: Jahr der Erhebung
 38 |       fr: Année du relevé
 39 |       it: Anno di rilevamento
 40 |       en: Year of survey
 41 |     dimension-type: Key Dimension
 42 |     scale-type: ordinal
 43 |     path: Jahr
 44 |     datatype: URI
 45 |     data-kind:
 46 |       type: temporal
 47 |       unit: year
 48 |     mapping:
 49 |       type: additive
 50 |       base: https://ld.admin.ch/time/year/
 51 |   typeOfAirport:
 52 |     name:
 53 |       fr: Type d'aéroport
 54 |       de: Flughafentyp
 55 |       it: Tipo di aeroporto
 56 |       en: Type of Airport
 57 |     description:
 58 |       de: Flughafentyp - DESC
 59 |       fr: Type d'aéroport - DESC
 60 |       it: Tipo di aeroporto - DESC
 61 |       en: Type of Airport - DESC
 62 |     dimension-type: Key Dimension
 63 |     datatype: URI
 64 |     scale-type: nominal
 65 |     path: airport_type
 66 |     # This is a new mapping type, for concepts
 67 |     # Concept are independant Resources, a little like Shared Dimension Terms
 68 |     # See the 'Concepts' key here under
 69 |     mapping:
 70 |       type: concept
 71 |       # Each value will be replaced by an URL that links to the concept
 72 |       # The URL starts with "/" and is relative to the URL of the cube
 73 |       # The URL must allow to identify a concept, and can be made of one ore more fields
 74 |       # This is an example with two fields needed to identify an airport type: {typeOfAirport} and {typeOfAirport2nd}
 75 |       # It must match the URI defined for the concept (see here under), and the fields name can be different as coming from different files
 76 |       replacement-automated: /airport_type/{typeOfAirport}/{typeOfAirport2nd}
 77 |   typeOfAirport2nd:
 78 |     name:
 79 |       fr: Type d'aéroport (second key for demo)
 80 |       de: Flughafentyp (second key for demo)
 81 |       it: Tipo di aeroporto (second key for demo)
 82 |       en: Type of Airport (second key for demo)
 83 |     description:
 84 |       de: Flughafentyp - second key for demo
 85 |       fr: Type d'aéroport - second key for demo
 86 |       it: Tipo di aeroporto - second key for demo
 87 |       en: Type of Airport - second key for demo
 88 |     dimension-type: Key Dimension
 89 |     scale-type: nominal      
 90 |     path: airport_type_2nd
 91 |   measure:
 92 |     name:
 93 |       fr: Valeur
 94 |       de: Wert
 95 |       it: Valore
 96 |       en: Value
 97 |     description:
 98 |       de: Wert - DESC
 99 |       fr: Valeur - DESC
100 |       it: Valore - DESC
101 |       en: Value - DESC
102 |     dimension-type: Measure Dimension
103 |     scale-type: interval
104 |     path: value
105 |     unit: kilogramm
106 | # See the README for further explanation about the Concepts metadata
107 | Concepts:
108 |   typeOfAirport:
109 |     # The URL starts with "/" and is relative to the URL of the cube
110 |     # The URL must allow to identify a concept, and can be made of one ore more fields
111 |     # This is an example with two fields needed to identify an airport type: {typeOfAirportID} and {typeOfAirportSecondID}
112 |     # It must match the URI defined for the dimension's replacemetn (see here above), and the fields name can be different as coming from different files
113 |     URI: /airport_type/{typeOfAirportID}/{typeOfAirportSecondID}
114 |     # The name of the field/column that contains the name (label)
115 |     name-field: typeOfAirport
116 |     # Defines if the name is multilingual, meaning that a language suffix (_de, _fr, etc.) wil be concatenated to the name-field to find the different values
117 |     multilingual: true
118 |     # position-field is optional: the name of the csv column that contains a numeric position value for the concept
119 |     # used by Visualize to order the concept (instead of alphabe)
120 |     position-field: position
121 |     # other-fields are optional, URI could be relative (and concatenated to the concept's URI) or a full URI starting with 'http/https'
122 |     other-fields:
123 |       description:
124 |         URI: http://schema.org/description
125 |         multilingual: true
126 |         datatype: string
127 |       other_property_example:
128 |         URI: /airport_type/other_property_example   
129 |         datatype: string
130 |         language: en


--------------------------------------------------------------------------------
/tests/test_sdterms.py:
--------------------------------------------------------------------------------
  1 | from pylindas.pyshareddimension import SharedDimension
  2 | from rdflib import Graph
  3 | import pandas as pd
  4 | import pytest
  5 | import yaml
  6 | import os
  7 | 
  8 | class TestClass:
  9 | 
 10 |     TEST_CASE_PATH = os.path.dirname(__file__)
 11 |     SHAREDDIMENSIONSHAPE = "https://raw.githubusercontent.com/Kronmar-Bafu/lindas-pylindas/refs/heads/main/pylindas/pyshareddimension/shared_dimension_shape.ttl"
 12 | 
 13 |     @classmethod
 14 |     def setup_test_shared_dimension(cls, dataframe_path: str, description_path: str) -> SharedDimension:
 15 |         with open(os.path.join(cls.TEST_CASE_PATH, description_path)) as file:
 16 |             description = yaml.safe_load(file)
 17 |         dataframe = pd.read_csv(os.path.join(cls.TEST_CASE_PATH, dataframe_path), sep=",")
 18 |         sd = SharedDimension(dataframe=dataframe, sd_yaml=description, environment="TEST", local=True)
 19 |         return sd.prepare_data().write_sd().write_terms()
 20 | 
 21 |     def setup_method(self):
 22 |         self.shared_dimension = self.setup_test_shared_dimension(
 23 |             "test_sdterms.csv", "test_sdterms.yml")
 24 | 
 25 |     # SHACL validation of the Shared Dimension
 26 |     #   Please see the comment of the SharedDimension.validate() method
 27 |     #   in order to understand the parameters
 28 |     # This is work in progress as the SHACL file has to be passed as parameter instead of being downloaded from the Web behind the scene
 29 |     def test_perform_SHACL_validation(self):
 30 |         result_bool, result_message = self.shared_dimension.validate(self.SHAREDDIMENSIONSHAPE)
 31 |         assert result_bool == True        
 32 | 
 33 |     # Test some basic triples of the shared dimension
 34 |     # Will better be done by the SHACL validation
 35 |     # However, this allows to test that triples that might be optional for SHACL are correctly generated in the example
 36 |     #   as for instance schema:validFrom
 37 |     def test_shared_dimension_triples(self):
 38 |         sparql = (
 39 |             "PREFIX meta: <https://cube.link/meta/>"
 40 |             "PREFIX schema: <http://schema.org/>"
 41 |             "PREFIX dct: <http://purl.org/dc/terms/>"
 42 |             "ASK"
 43 |             "{"
 44 |             "  ?sd a meta:SharedDimension, schema:DefinedTermSet ;"
 45 |             "     schema:name ?name ;"
 46 |             "     schema:description ?desc ;"
 47 |             "     schema:identifier ?identifier ;"
 48 |             "     dct:contributor/schema:email ?contributorEmail ;"
 49 |             "     dct:contributor/schema:name ?contributorName ;"
 50 |             "     schema:validFrom ?validFrom"
 51 |             "}"
 52 |         )
 53 | 
 54 |         result = self.shared_dimension._graph.query(sparql)
 55 |         assert bool(result)
 56 | 
 57 |     def test_shared_dimension_unwanted_triples(self):
 58 |         # A Shared dimension generated by code should not have the rdf:type:
 59 |         #   hydra:Resource, md:SharedDimension
 60 |         # Those types are given to shared dimensions generated from the Cube Creator
 61 |         # When generated by code, the shared dimension should marked 'read-only' in the Cube Creator
 62 |         #   and this is achieved by not having those 2 types
 63 |         sparql = (
 64 |             "PREFIX meta: <https://cube.link/meta/>"
 65 |             "PREFIX md: <https://cube-creator.zazuko.com/shared-dimensions/vocab#>"
 66 |             "PREFIX hydra: <https://www.w3.org/ns/hydra/core#>"
 67 |             "ASK"
 68 |             "{"
 69 |             "  ?sd a meta:SharedDimension,"
 70 |             "      hydra:Resource, md:SharedDimension"
 71 |             "}"
 72 |         )
 73 | 
 74 |         result = self.shared_dimension._graph.query(sparql)
 75 |         # assert that NO result is found
 76 |         assert not bool(result)        
 77 |         
 78 | 
 79 |     def test_shared_dimension_terms(self):
 80 |         # Find terms that are missing some triples
 81 |         sparql = (
 82 |             "PREFIX meta: <https://cube.link/meta/>"
 83 |             "PREFIX schema: <http://schema.org/>"
 84 |             "PREFIX md: <https://cube-creator.zazuko.com/shared-dimensions/vocab#>"
 85 |             "PREFIX hydra: <https://www.w3.org/ns/hydra/core#>"
 86 |             "PREFIX sd_md: <https://cube-creator.zazuko.com/shared-dimensions/vocab#>"
 87 |             "ASK"
 88 |             "{"
 89 |             "  ?sd a schema:DefinedTerm, sd_md:SharedDimensionTerm ."
 90 |             "   FILTER NOT EXISTS {?sd schema:identifier ?id;"
 91 |             "       schema:inDefinedTermSet <https://ld.admin.ch/cube/dimension/test_canton> ;"
 92 |             "       schema:name ?name ;"
 93 |             "       schema:validFrom ?validFrom ;"
 94 |             "   }"
 95 |             "}"
 96 |         )
 97 | 
 98 |         result = self.shared_dimension._graph.query(sparql)
 99 |         # There should be no term missing those triples
100 |         assert not bool(result)        
101 |         
102 |     def test_shared_dimension_one_single_root(self):
103 |         # In that example, there should be 3 individual terms
104 |         sparql = (
105 |             "PREFIX schema: <http://schema.org/>"
106 |             "PREFIX sd_md: <https://cube-creator.zazuko.com/shared-dimensions/vocab#>"
107 |             "PREFIX skos: <http://www.w3.org/2004/02/skos/core#>"
108 |             "SELECT *"
109 |             "{"
110 |             "  ?sd a schema:DefinedTerm, sd_md:SharedDimensionTerm ."
111 |             "}"
112 |         )
113 | 
114 |         result = self.shared_dimension._graph.query(sparql)
115 |         # There should be exactly 3 results
116 |         assert len(list(result)) == 3


--------------------------------------------------------------------------------
/tests/test_shared_dimension_generation.py:
--------------------------------------------------------------------------------
  1 | from pylindas.pyshareddimension import SharedDimension
  2 | from rdflib import Graph
  3 | import pandas as pd
  4 | import pytest
  5 | import yaml
  6 | 
  7 | class TestClass:
  8 | 
  9 |     TEST_CASE_PATH = "example/Shared_Dimensions/"
 10 | 
 11 |     @classmethod
 12 |     def setup_test_shared_dimension(cls, dataframe_path: str, description_path: str) -> SharedDimension:
 13 |         with open(cls.TEST_CASE_PATH + description_path) as file:
 14 |             description = yaml.safe_load(file)
 15 |         dataframe = pd.read_csv(cls.TEST_CASE_PATH + dataframe_path, sep=";")
 16 |         sd = SharedDimension(dataframe=dataframe, sd_yaml=description, environment="TEST", local=True)
 17 |         return sd.prepare_data().write_sd().write_terms()
 18 | 
 19 |     def setup_method(self):
 20 |         self.shared_dimension = self.setup_test_shared_dimension(
 21 |             "shared_dimension_generation/sd_terms.csv", "shared_dimension_generation/sd_description.yml")
 22 | 
 23 |     # SHACL validation of the Shared Dimension
 24 |     #   Please see the comment of the SharedDimension.validate() method
 25 |     #   in order to understand the parameters
 26 |     # This is work in progress as the SHACL file has to be passed as parameter instead of being downloaded from the Web behind the scene
 27 |     def test_perform_SHACL_validation(self):
 28 |         result_bool, result_message = self.shared_dimension.validate("./pylindas/pyshareddimension/shared_dimension_shape.ttl")
 29 |         assert result_bool == True        
 30 | 
 31 |     # Test some basic triples of the shared dimension
 32 |     # Will better be done by the SHACL validation
 33 |     # However, this allows to test that triples that might be optional for SHACL are correctly generated in the example
 34 |     #   as for instance schema:validFrom
 35 |     def test_shared_dimension_triples(self):
 36 |         sparql = (
 37 |             "PREFIX meta: <https://cube.link/meta/>"
 38 |             "PREFIX schema: <http://schema.org/>"
 39 |             "PREFIX dct: <http://purl.org/dc/terms/>"
 40 |             "ASK"
 41 |             "{"
 42 |             "  ?sd a meta:SharedDimension, schema:DefinedTermSet ;"
 43 |             "     schema:name ?name ;"
 44 |             "     schema:description ?desc ;"
 45 |             "     schema:identifier ?identifier ;"
 46 |             "     dct:contributor/schema:email ?contributorEmail ;"
 47 |             "     dct:contributor/schema:name ?contributorName ;"
 48 |             "     schema:validFrom ?validFrom"
 49 |             "}"
 50 |         )
 51 | 
 52 |         result = self.shared_dimension._graph.query(sparql)
 53 |         assert bool(result)
 54 | 
 55 |     def test_shared_dimension_unwanted_triples(self):
 56 |         # A Shared dimension generated by code should not have the rdf:type:
 57 |         #   hydra:Resource, md:SharedDimension
 58 |         # Those types are given to shared dimensions generated from the Cube Creator
 59 |         # When generated by code, the shared dimension should marked 'read-only' in the Cube Creator
 60 |         #   and this is achieved by not having those 2 types
 61 |         sparql = (
 62 |             "PREFIX meta: <https://cube.link/meta/>"
 63 |             "PREFIX md: <https://cube-creator.zazuko.com/shared-dimensions/vocab#>"
 64 |             "PREFIX hydra: <https://www.w3.org/ns/hydra/core#>"
 65 |             "ASK"
 66 |             "{"
 67 |             "  ?sd a meta:SharedDimension,"
 68 |             "      hydra:Resource, md:SharedDimension"
 69 |             "}"
 70 |         )
 71 | 
 72 |         result = self.shared_dimension._graph.query(sparql)
 73 |         # assert that NO result is found
 74 |         assert not bool(result)        
 75 |         
 76 | 
 77 |     def test_shared_dimension_terms(self):
 78 |         # Find terms that are missing some triples
 79 |         sparql = (
 80 |             "PREFIX meta: <https://cube.link/meta/>"
 81 |             "PREFIX schema: <http://schema.org/>"
 82 |             "PREFIX md: <https://cube-creator.zazuko.com/shared-dimensions/vocab#>"
 83 |             "PREFIX hydra: <https://www.w3.org/ns/hydra/core#>"
 84 |             "PREFIX sd_md: <https://cube-creator.zazuko.com/shared-dimensions/vocab#>"
 85 |             "ASK"
 86 |             "{"
 87 |             "  ?sd a schema:DefinedTerm, sd_md:SharedDimensionTerm ."
 88 |             "   FILTER NOT EXISTS {?sd schema:identifier ?id;"
 89 |             "       schema:inDefinedTermSet <https://ld.admin.ch/cube/dimension/pylindas_sd_generation_example> ;"
 90 |             "       schema:name ?name ;"
 91 |             "       schema:validFrom ?validFrom ;"
 92 |             "   }"
 93 |             "}"
 94 |         )
 95 | 
 96 |         result = self.shared_dimension._graph.query(sparql)
 97 |         # There should be no term missing those triples
 98 |         assert not bool(result)        
 99 |         
100 |     def test_shared_dimension_one_single_root(self):
101 |         # In that example, there should be only one terme with no parent (skos:broader)
102 |         sparql = (
103 |             "PREFIX schema: <http://schema.org/>"
104 |             "PREFIX sd_md: <https://cube-creator.zazuko.com/shared-dimensions/vocab#>"
105 |             "PREFIX skos: <http://www.w3.org/2004/02/skos/core#>"
106 |             "SELECT *"
107 |             "{"
108 |             "  ?sd a schema:DefinedTerm, sd_md:SharedDimensionTerm ."
109 |             "   FILTER NOT EXISTS {?sd skos:broader ?parent}"
110 |             "}"
111 |         )
112 | 
113 |         result = self.shared_dimension._graph.query(sparql)
114 |         # There should be only one result
115 |         assert len(list(result)) == 1


--------------------------------------------------------------------------------
/example/Cubes/kita/description.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "$schema": "../../linpy/description.schema.json",
  3 |   "Name": {
  4 |     "de": "Anzahl Kinder in Kindertagesstätten",
  5 |     "fr": "Nombre d'enfants dans les crèches",
  6 |     "it": "Numero di bambini negli asili nido",
  7 |     "en": "Number of kids in day care facilities"
  8 |   },
  9 |   "Description": {
 10 |     "de": "Ein Datensatz, der die Anzahl der Kinder in Kindertagesstätten enthält",
 11 |     "en": "A dataset containing the number of kids in day care facilities"
 12 |   },
 13 |   "Publisher": [
 14 |     {
 15 |       "IRI": "https://schleswig-holstein.de/opendata/org/office_of_daycare"
 16 |     }
 17 |   ],
 18 |   "Creator": [
 19 |     {
 20 |       "IRI": "https://schleswig-holstein.de/opendata/org/office_of_daycare"
 21 |     }
 22 |   ],
 23 |   "Contributor": [
 24 |     {
 25 |       "IRI": "https://schleswig-holstein.de/opendata/org/office_of_daycare",
 26 |       "Name": "Bundesamt für Kindertagesstätten"
 27 |     }
 28 |   ],
 29 |   "Date Created": "2024-08-26T00:00:00.000Z",
 30 |   "Contact Point": {
 31 |     "E-Mail": "contact@daycare.ld.admin.ch",
 32 |     "Name": "Bundesamt für Kindertagesstätten"
 33 |   },
 34 |   "Base-URI": "https://daycare.ld.admin.ch/",
 35 |   "Identifier": "kids-daycare-facilities-per-lander",
 36 |   "Version": 1,
 37 |   "Work Status": "Draft",
 38 |   "Visualize": true,
 39 |   "Accrual Periodicity": "yearly",
 40 |   "Namespace": "mock",
 41 |   "dimensions": {
 42 |     "Land": {
 43 |       "name": {
 44 |         "de": "Land",
 45 |         "fr": "Etat",
 46 |         "it": "Stato",
 47 |         "en": "State"
 48 |       },
 49 |       "description": {
 50 |         "de": "Bundesland, in dem die Daten erhoben wurden"
 51 |       },
 52 |       "dimension-type": "Key Dimension",
 53 |       "scale-type": "nominal",
 54 |       "data-kind": {
 55 |         "type": "spatial-shape"
 56 |       },
 57 |       "path": "Land",
 58 |       "mapping": {
 59 |         "type": "additive",
 60 |         "base": "https://example.org/land/"
 61 |       }
 62 |     },
 63 |     "Jahr": {
 64 |       "name": {
 65 |         "de": "Jahr",
 66 |         "fr": "Année",
 67 |         "it": "Anno",
 68 |         "en": "Year"
 69 |       },
 70 |       "description": {
 71 |         "de": "Jahr der Erhebung",
 72 |         "fr": "Année du relevé",
 73 |         "it": "Anno di rilevamento",
 74 |         "en": "Year of survey"
 75 |       },
 76 |       "dimension-type": "Key Dimension",
 77 |       "scale-type": "ordinal",
 78 |       "path": "Jahr",
 79 |       "data-kind": {
 80 |         "type": "temporal",
 81 |         "unit": "year"
 82 |       },
 83 |       "mapping": {
 84 |         "type": "additive",
 85 |         "base": "https://ld.admin.ch/time/year/"
 86 |       }
 87 |     },
 88 |     "Kinder bis unter 3 Jahren": {
 89 |       "name": {
 90 |         "fr": "Nombre d'enfants de moins de 3 ans",
 91 |         "de": "Kinder bis unter 3 Jahren",
 92 |         "it": "Numero di bambini di età inferiore a 3 anni",
 93 |         "en": "Children under 3 years old"
 94 |       },
 95 |       "description": {
 96 |         "de": "Anzahl der Kinder bis unter 3 Jahren",
 97 |         "fr": "Nombre d'enfants de moins de 3 ans",
 98 |         "it": "Numero di bambini di età inferiore a 3 anni",
 99 |         "en": "Number of children under 3 years old"
100 |       },
101 |       "dimension-type": "Measure Dimension",
102 |       "scale-type": "interval",
103 |       "path": "Kinder-bis-unter-3-Jahren"
104 |     },
105 |     "Kinder von 3 bis unter 7 Jahren (ohne Schulkinder)": {
106 |       "name": {
107 |         "fr": "Nombre d'enfants de 3 à moins de 7 ans (sans enfants scolarisés)",
108 |         "de": "Kinder von 3 bis unter 7 Jahren (ohne Schulkinder)",
109 |         "it": "Numero di bambini da 3 a meno di 7 anni (senza bambini scolastici)",
110 |         "en": "Children aged 3 to under 7 years (excluding school children)"
111 |       },
112 |       "description": {
113 |         "de": "Anzahl der Kinder von 3 bis unter 7",
114 |         "fr": "Nombre d'enfants de 3 à moins de 7 ans",
115 |         "it": "Numero di bambini da  3 a meno di 7 anni",
116 |         "en": "Number of children aged 3 to under 7"
117 |       },
118 |       "dimension-type": "Measure Dimension",
119 |       "scale-type": "interval",
120 |       "path": "Kinder-von-3-bis-unter-7-Jahren"
121 |     },
122 |     "Kinder von 5 bis unter 14 Jahren (nur Schulkinder)": {
123 |       "name": {
124 |         "fr": "Nombre d'enfants de 5 à moins de 14 ans (uniquement scolarisés)",
125 |         "de": "Kinder von 5 bis unter 14 Jahren (nur Schulkinder)",
126 |         "it": "Numero di bambini da 5 a meno di 14 anni (solo scolari)",
127 |         "en": "Children aged 5 to under 14 years (school children only)"
128 |       },
129 |       "description": {
130 |         "de": "Anzahl der Kinder von 5 bis unter 15",
131 |         "fr": "Nombre d'enfants de 5 à moins de 14 ans",
132 |         "it": "Numero di bambini da  5 a meno di 14 anni",
133 |         "en": "Number of children aged 5 to under 14"
134 |       },
135 |       "dimension-type": "Measure Dimension",
136 |       "scale-type": "interval",
137 |       "path": "Kinder-von-5-bis-unter-14-Jahren"
138 |     },
139 |     "Kinder von 7 Jahren und älter (Nicht-schulkinder)": {
140 |       "name": {
141 |         "fr": "Nombre d'enfants de 7 ans et plus (non scolarisés)",
142 |         "de": "Kinder von 7 Jahren und älter (Nicht-schulkinder)",
143 |         "it": "Numero di bambini di 7 anni e più (non scolari)",
144 |         "en": "Children aged 7 and older (non-school children)"
145 |       },
146 |       "description": {
147 |         "de": "Anzahl der Kinder von 7 Jahren und älter (Nicht-schulkinder)",
148 |         "fr": "Nombre d'enfants de 7 ans et plus (non scolarisés)",
149 |         "it": "Numero di bambini di 7 anni e più (non scolari)",
150 |         "en": "Number of children aged 7 and older (non-school children)"
151 |       },
152 |       "dimension-type": "Measure Dimension",
153 |       "scale-type": "interval",
154 |       "path": "Kinder-von-7-Jahren-und-älter"
155 |     },
156 |     "Insgesamt": {
157 |       "name": {
158 |         "fr": "Nombre total d'enfants",
159 |         "de": "Gesamtzahl der Kinder",
160 |         "it": "Numero totale di bambini",
161 |         "en": "Total number of children"
162 |       },
163 |       "description": {
164 |         "de": "Gesamtzahl der Kinder in Kindertagesstätten",
165 |         "fr": "Nombre total d'enfants dans les crèches",
166 |         "it": "Numero totale di bambini negli asili nido",
167 |         "en": "Total number of children in day care facilities"
168 |       },
169 |       "dimension-type": "Measure Dimension",
170 |       "scale-type": "interval",
171 |       "path": "Insgesamt"
172 |     }
173 |   }
174 | }
175 | 


--------------------------------------------------------------------------------
/pylindas/shared_dimension_queries/shared_dimensions_queries.py:
--------------------------------------------------------------------------------
  1 | from SPARQLWrapper import SPARQLWrapper, JSON
  2 | from rdflib import URIRef
  3 | from typing import List
  4 | import json
  5 | 
  6 | """
  7 | Author: Fabian Cretton - HEVS
  8 | 
  9 | The goal of this file is to become a tool for developers to find a useful shared dimension,
 10 | then get the URLs of the terms in order to configure the mapping for a cube's dimension.
 11 | 
 12 | It is not yet a class with methods, and contains code that could be more generic.
 13 | For instance, query_lindas could be a very generic function as the one found in /lindas/query.py
 14 | But existing query_lindas() is specific for ASK queries (returns a bool value)
 15 | 
 16 | See an example usage in example_sd.py
 17 | 
 18 | This is a first implementation of:
 19 | - Basic queries to request shared dimensions information from LINDAS
 20 | - Display the results, line by line
 21 | """
 22 | 
 23 | def query_lindas(query: str, environment: str):
 24 |     """
 25 |     Send a SPARQL query to a LINDAS end-point and return the JSON result
 26 |     Note: the values of the different environments URL should come from a config file/environment variables 
 27 |     """
 28 |     match environment:
 29 |         case "PROD":
 30 |             endpoint = "https://lindas.admin.ch/query"
 31 |         case "INT":
 32 |             endpoint = "https://int.lindas.admin.ch/query"
 33 |         case _:
 34 |             endpoint = "https://test.lindas.admin.ch/query"
 35 | 
 36 |     sparql = SPARQLWrapper(endpoint)
 37 |     sparql.setQuery(query=query)
 38 |     sparql.setReturnFormat(JSON)
 39 |     return sparql.query().convert()
 40 | 
 41 | def list_shared_dimensions(environment: str, name_lng: str="en", offset: int=0, limit: int=0, search_word: str=""):
 42 |     """
 43 |     List existing Shared Dimensions in a specific environment 
 44 |         Returns the JSON object of the SPARQL query result
 45 | 
 46 |     Args:
 47 |         limit: no limit if 0
 48 | 
 49 |     If a SD has a validThrough date, it could be deprecated (depending on the current date)
 50 |     """
 51 |     query = f"""
 52 |         PREFIX meta: <https://cube.link/meta/>
 53 |         PREFIX schema: <http://schema.org/>
 54 |         SELECT * WHERE {{
 55 |             ?sd a meta:SharedDimension .
 56 |             OPTIONAL{{ ?sd schema:name ?name .}}
 57 |             FILTER(lang(?name) = \"{name_lng}\") 
 58 |             OPTIONAL{{?sd schema:validFrom ?validFrom}}
 59 |             OPTIONAL{{?sd schema:validThrough ?validThrough}}
 60 |         """
 61 | 
 62 |     if search_word != "":
 63 |         query += f"FILTER contains(?name,\"{search_word}\")"
 64 | 
 65 |     query += f"""
 66 |         }} 
 67 |         ORDER BY ?name 
 68 |         OFFSET {offset}
 69 |         """
 70 |     if limit != 0:
 71 |         query += f"LIMIT {limit}"
 72 |     
 73 |     #print(query)
 74 |     return query_lindas(query, environment=environment)
 75 | 
 76 | def list_shared_dimensions_print(result: json, environment_for_terms: str=""):
 77 |     """
 78 |     Print the result of the list_shared_dimensions() query
 79 |         To the console, in a friendly manner, one sd per line with its URL, label, validFrom and validThrough values
 80 | 
 81 |     Args:
 82 |         environment_for_terms: if an environment is passed, for each shared dimension 2 terms will be queried and displayed
 83 |             This possibility to display 2 terms by querying LINDAS is just a POC, should be better refined
 84 |     """
 85 |     # Pretty print the JSON - for debuging purpose
 86 |     #print(json.dumps(result, indent=4))
 87 |     
 88 |     # Loop through the "bindings" and display dimensions name and URL (sd)
 89 |     if 'results' in result and 'bindings' in result['results'] and result['results']['bindings']:
 90 |         for item in result['results']['bindings']:
 91 |             # Extract the 'sd' and 'name' values
 92 |             sd = item['sd']['value']
 93 |             
 94 |             if 'name' in item:
 95 |                 name = item['name']['value']
 96 |             else:
 97 |                 name = "(no name in that language)"
 98 |             
 99 |             if 'validFrom' in item:
100 |                 validFrom = "- validFrom " + item['validFrom']['value']
101 |             else:
102 |                 validFrom = ""
103 | 
104 |             if 'validThrough' in item:
105 |                 validThrough = "- validThrough " + item['validThrough']['value']
106 |             else:
107 |                 validThrough = ""
108 | 
109 |             print(f"{name} <{sd}> {validFrom} {validThrough}")
110 | 
111 |             # if <parameter to define> -> list 2 terms for that sd
112 |             if environment_for_terms != "":
113 |                 termsResult = list_shared_dimension_terms(environment_for_terms, sd, "en", 0, 2)
114 |                 print("{ Terms sample:")
115 |                 print_sparql_result(termsResult, ["name", "sdTerm"])
116 |                 print("}")
117 | 
118 |     else:
119 |         print("No result binding found in that JSON result") 
120 | 
121 | def list_shared_dimension_terms(environment: str, sd_URL: URIRef, name_lng: str="en", offset: int=0, limit: int=0):
122 |     """
123 |     List the terms URL of a Shared Dimensions in a specific environment 
124 |         Returns the JSON object of the SPARQL query result
125 | 
126 |     Args:
127 |         limit: no limit if 0
128 |     """
129 |     query = f"""
130 |         PREFIX schema: <http://schema.org/>
131 |         SELECT * WHERE {{
132 |             ?sdTerm  schema:inDefinedTermSet <{sd_URL}> .
133 |             OPTIONAL{{?sdTerm schema:name ?name .}}
134 |             FILTER(lang(?name) = \"{name_lng}\") 
135 |         }} 
136 |         ORDER BY ?name 
137 |         OFFSET {offset}
138 |         """
139 |     
140 |     if limit != 0:
141 |         query += f"LIMIT {limit}"
142 |     
143 |     #print(query)
144 |     return  query_lindas(query, environment=environment)
145 | 
146 | def print_sparql_result(result: json, fields: List[str]):
147 |     """
148 |     Print line by line the result of a sparql query, according to the fields in the list parameter
149 |     - Each field is tested for existance (this function do not know about the mandatory/OPTIONAL field in the original query)
150 |     - If a value starts with "http" -> it is displayed inbetween <>
151 |     """
152 | 
153 |     if 'results' in result and 'bindings' in result['results'] and result['results']['bindings']:
154 |         for item in result['results']['bindings']:
155 |             line = ""
156 |             for field in fields:
157 |                 if field in item:
158 |                     fieldValue = item[field]['value']
159 |                     if fieldValue.lower().startswith("http"):
160 |                         fieldValue = "<" + fieldValue + ">"
161 |                 else:
162 |                     fieldValue = ""
163 |             
164 |                 line += fieldValue + " "
165 | 
166 |             print(line)
167 |     else:
168 |         print("No result binding found in that JSON result") 


--------------------------------------------------------------------------------
/pylindas/description.schema.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "$schema": "http://json-schema.org/draft-07/schema#",
  3 |   "type": "object",
  4 |   "properties": {
  5 |     "Name": {
  6 |       "type": "object",
  7 |       "properties": {
  8 |         "de": { "type": "string" },
  9 |         "fr": { "type": "string" },
 10 |         "it": { "type": "string" },
 11 |         "en": { "type": "string" }
 12 |       },
 13 |       "required": ["de", "en"]
 14 |     },
 15 |     "Description": {
 16 |       "type": "object",
 17 |       "properties": {
 18 |         "de": { "type": "string" },
 19 |         "en": { "type": "string" }
 20 |       },
 21 |       "required": ["de", "en"]
 22 |     },
 23 |     "Publisher": {
 24 |       "type": "array",
 25 |       "items": {
 26 |         "$ref": "#/definitions/Publisher"
 27 |       }
 28 |     },
 29 |     "Creator": {
 30 |       "type": "array",
 31 |       "items": {
 32 |         "$ref": "#/definitions/Creator"
 33 |       }
 34 |     },
 35 |     "Contributor": {
 36 |       "type": "array",
 37 |       "items": {
 38 |         "$ref": "#/definitions/Contributor"
 39 |       }
 40 |     },
 41 |     "Themes": {
 42 |       "type": "array",
 43 |       "items": {
 44 |         "$ref": "#/definitions/Theme"
 45 |       }
 46 |     },
 47 | 
 48 |     "Date Created": {
 49 |       "type": "string",
 50 |       "format": "date-time"
 51 |     },
 52 |     "Contact Point": {
 53 |       "$ref": "#/definitions/ContactPoint"
 54 |     },
 55 |     "Base-URI": {
 56 |       "type": "string"
 57 |     },
 58 |     "Identifier": {
 59 |       "type": "string"
 60 |     },
 61 |     "Version": {
 62 |       "type": "number"
 63 |     },
 64 |     "Work Status": {
 65 |       "type": "string",
 66 |       "enum": ["Draft", "In Progress", "Published", "Obsolete"]
 67 |     },
 68 |     "Visualize": {
 69 |       "type": "boolean"
 70 |     },
 71 |     "Accrual Periodicity": {
 72 |       "type": "string",
 73 |       "enum": ["daily", "weekly", "monthly", "quarterly", "yearly"]
 74 |     },
 75 |     "Namespace": {
 76 |       "type": "string"
 77 |     },
 78 |     "dimensions": {
 79 |       "type": "object",
 80 |       "patternProperties": {
 81 |         ".*": {
 82 |           "$ref": "#/definitions/Dimension"
 83 |         }
 84 |       },
 85 |       "additionalProperties": true
 86 |     }
 87 |   },
 88 | 
 89 |   "required": [
 90 |     "Name",
 91 |     "Description",
 92 |     "Publisher",
 93 |     "Creator",
 94 |     "Contributor",
 95 |     "Date Created",
 96 |     "Contact Point",
 97 |     "Base-URI",
 98 |     "Identifier",
 99 |     "Version",
100 |     "Work Status",
101 |     "Visualize",
102 |     "Accrual Periodicity",
103 |     "Namespace",
104 |     "dimensions"
105 |   ],
106 |   "definitions": {
107 |     "Publisher": {
108 |       "type": "object",
109 |       "properties": {
110 |         "IRI": {
111 |           "type": "string"
112 |         }
113 |       },
114 |       "required": ["IRI"]
115 |     },
116 |     "Creator": {
117 |       "type": "object",
118 |       "properties": {
119 |         "IRI": {
120 |           "type": "string"
121 |         },
122 |         "Name": {
123 |           "type": "string"
124 |         }
125 |       },
126 |       "required": ["IRI"]
127 |     },
128 |     "Contributor": {
129 |       "type": "object",
130 |       "properties": {
131 |         "IRI": {
132 |           "type": "string"
133 |         },
134 |         "Name": {
135 |           "type": "string"
136 |         }
137 |       },
138 |       "required": ["IRI", "Name"]
139 |     },
140 |     "Theme": {
141 |       "type": "object",
142 |       "properties": {
143 |         "IRI": {
144 |           "type": "string"
145 |         },
146 |         "Name": {
147 |           "type": "string"
148 |         }
149 |       },
150 |       "required": ["IRI"]
151 |     },
152 |     "ContactPoint": {
153 |       "type": "object",
154 |       "properties": {
155 |         "E-Mail": {
156 |           "type": "string",
157 |           "format": "email"
158 |         },
159 |         "Name": {
160 |           "type": "string"
161 |         }
162 |       },
163 |       "required": ["E-Mail", "Name"]
164 |     },
165 |     "Dimension": {
166 |       "type": "object",
167 |       "properties": {
168 |         "name": {
169 |           "type": "object",
170 |           "properties": {
171 |             "de": { "type": "string" },
172 |             "fr": { "type": "string" },
173 |             "it": { "type": "string" },
174 |             "en": { "type": "string" }
175 |           },
176 |           "required": ["de", "en"]
177 |         },
178 |         "dimension-type": {
179 |           "type": "string",
180 |           "enum": ["Key Dimension", "Measure Dimension", "Standard Error"]
181 |         },
182 |         "data-kind": {
183 |           "type": "object",
184 |           "description": "See https://cube.link/#meta-datakind-temporal-spatial",
185 |           "properties": {
186 |             "type": {
187 |               "type": "string",
188 |               "enum": ["temporal", "spatial-shape", "spatial-coordinates"]
189 |             },
190 |             "unit": {
191 |               "type": "string"
192 |             }
193 |           }
194 |         },
195 |         "scale-type": {
196 |           "description": "See https://cube.link/#qudt-scaletype",
197 |           "type": "string",
198 |           "enum": ["nominal", "ordinal", "interval", "ratio"]
199 |         },
200 |         "mapping": {
201 |           "type": "object",
202 |           "properties": {
203 |             "value-type": {
204 |               "type": "string",
205 |               "enum": ["Shared", "Literal"]
206 |             },
207 |             "type": {
208 |               "type": "string",
209 |               "enum": ["regex", "lookup", "replace", "additive"]
210 |             },
211 |             "pattern": {
212 |               "type": "string"
213 |             },
214 |             "replacement": {
215 |               "type": "string"
216 |             },
217 |             "replacements": {
218 |               "type": "array",
219 |               "items": {
220 |                 "type": "string"
221 |               }
222 |             },
223 |             "base": {
224 |               "type": "string"
225 |             }
226 |           },
227 |           "allOf": [
228 |             {
229 |               "if": {
230 |                 "properties": { "type": { "const": "regex" } }
231 |               },
232 |               "then": {
233 |                 "required": ["pattern", "replacement"]
234 |               }
235 |             },
236 |             {
237 |               "if": {
238 |                 "properties": { "type": { "const": "replace" } }
239 |               },
240 |               "then": {
241 |                 "required": ["replacements"]
242 |               }
243 |             },
244 |             {
245 |               "if": {
246 |                 "properties": { "type": { "const": "additive" } }
247 |               },
248 |               "then": {
249 |                 "required": ["base"]
250 |               }
251 |             }
252 |           ]
253 |         },
254 |         "unit": {
255 |           "type": "string"
256 |         },
257 |         "path": {
258 |           "type": "string"
259 |         }
260 |       },
261 |       "required": [
262 |         "name",
263 |         "dimension-type",
264 |         "scale-type",
265 |         "path",
266 |         "description"
267 |       ]
268 |     }
269 |   }
270 | }
271 | 


--------------------------------------------------------------------------------
/pylindas/cli.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import pandas as pd
  4 | import yaml
  5 | import py_cube
  6 | import logging
  7 | 
  8 | from py_cube.fetch import fetch
  9 | from py_cube.example import list_examples, load_example
 10 | from py_cube.cube.shared_dimension import convert_geojson_to_ttl
 11 | 
 12 | 
 13 | logger = logging.getLogger('pycube')
 14 | 
 15 | 
 16 | def serialize(input_directory: str, output_ttl: str, na_values: list[str], sep: str = ",", decimal: str = "."):
 17 |     csv_path = os.path.join(input_directory, "data.csv")
 18 |     yml_path = os.path.join(input_directory, "description.yml")
 19 |     json_path = os.path.join(input_directory, "description.json")
 20 | 
 21 |     if os.path.exists(yml_path):
 22 |         with open(yml_path) as file:
 23 |             cube_yaml = yaml.safe_load(file)
 24 |     elif os.path.exists(json_path):
 25 |         with open(json_path) as file:
 26 |             cube_yaml = yaml.safe_load(file)
 27 |     else:
 28 |         raise FileNotFoundError("Neither description.yml nor description.json found in the directory")
 29 | 
 30 |     df = pd.read_csv(csv_path, na_values=na_values, sep=sep, decimal=decimal)
 31 | 
 32 |     cube = py_cube.Cube(dataframe=df, cube_yaml=cube_yaml, environment="TEST", local=True)
 33 |     cube.prepare_data()
 34 |     cube.write_cube()
 35 |     cube.write_observations()
 36 |     cube.write_shape()
 37 |     cube.serialize(os.path.join(os.getcwd(), output_ttl))
 38 |     print(cube)
 39 | 
 40 | 
 41 | def configure_logging(log_level):
 42 |     class CustomFormatter(logging.Formatter):
 43 |         """Custom logging formatter to add colors based on log level."""
 44 | 
 45 |         COLORS = {
 46 |             'DEBUG': '\033[0m',  # Normal
 47 |             'INFO': '\033[94m',  # Blue
 48 |             'WARNING': '\033[93m',  # Yellow
 49 |             'ERROR': '\033[91m',  # Red
 50 |             'CRITICAL': '\033[91m',  # Red
 51 |         }
 52 | 
 53 |         def format(self, record):
 54 |             log_fmt = self.COLORS.get(record.levelname, '\033[0m') + '%(levelname)s: %(message)s\033[0m'
 55 |             formatter = logging.Formatter(log_fmt)
 56 |             return formatter.format(record)
 57 | 
 58 |     console_handler = logging.StreamHandler()
 59 |     logger.setLevel(log_level)
 60 |     console_handler.setFormatter(CustomFormatter())
 61 |     logger.addHandler(console_handler)
 62 | 
 63 | 
 64 | def main():
 65 |     parser = argparse.ArgumentParser(description="Cube data operations")
 66 |     subparsers = parser.add_subparsers(dest="operation", help="Operation to perform")
 67 | 
 68 |     serialize_parser = subparsers.add_parser("serialize", help="Serialize cube data")
 69 |     serialize_parser.add_argument("input_directory", help="Directory containing the data files")
 70 |     serialize_parser.add_argument("output_ttl", help="Output TTL file")
 71 |     serialize_parser.add_argument("--na_value", nargs="+", help="Values to treat as NA")
 72 |     serialize_parser.add_argument("--sep", default=",", nargs="?", help="Separator for CSV file")
 73 |     serialize_parser.add_argument("--decimal", default=".", nargs="?", help="Decimal separator")
 74 |     serialize_parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase verbosity")
 75 | 
 76 |     fetch_parser = subparsers.add_parser("fetch", help="Fetches a dataset from a URL")
 77 |     fetch_parser.add_argument("input_url", type=str, help="The URL of the dataset to fetch")
 78 |     fetch_parser.add_argument("output", type=str, help="The directory to save the output files")
 79 |     fetch_parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase verbosity")
 80 | 
 81 |     shared_parser = subparsers.add_parser("shared", help="Shared Dimension operations")
 82 |     shared_subparsers = shared_parser.add_subparsers(dest="suboperation", help="Shared sub-operations")
 83 | 
 84 |     convert_geojson_parser = shared_subparsers.add_parser("convert_geojson", help="Convert GeoJSON to TTL")
 85 |     convert_geojson_parser.add_argument("input_geojson", type=str, help="Input GeoJSON file")
 86 |     convert_geojson_parser.add_argument("output_ttl", type=str, help="Output TTL file")
 87 |     convert_geojson_parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase verbosity")
 88 | 
 89 |     example_parser = subparsers.add_parser("example", help="Example operations")
 90 |     example_subparsers = example_parser.add_subparsers(dest="suboperation", help="Example sub-operations")
 91 | 
 92 |     list_parser = example_subparsers.add_parser("list", help="List all examples")
 93 |     list_parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase verbosity")
 94 | 
 95 |     start_fuseki_parser = example_subparsers.add_parser("start-fuseki", help="Start a Fuseki database")
 96 |     start_fuseki_parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase verbosity")
 97 | 
 98 |     load_parser = example_subparsers.add_parser("load", help="Load an example by name")
 99 |     load_parser.add_argument("example_name", type=str, help="The name of the example to load", choices=[example["id"] for example in list_examples()])
100 |     # add optional base_uri argument to load parser
101 |     load_parser.add_argument("--base-uri", type=str, help="The base URI for a SPARQL database (Fuseki supported)", default="http://localhost:3030/dataset")
102 |     load_parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase verbosity")
103 | 
104 |     schema_parser = subparsers.add_parser("schema", help="Schema operations")
105 |     schema_subparsers = schema_parser.add_subparsers(dest="suboperation", help="Schema sub-operations")
106 |     schema_subparsers.add_parser("import", help="Import the description schema file")
107 |     schema_parser.add_argument("output", type=str, help="Output file")
108 |     schema_parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase verbosity")
109 | 
110 | 
111 |     args = parser.parse_args()
112 |     log_level = logging.DEBUG if args.verbose == 1 else logging.INFO
113 | 
114 |     configure_logging(log_level)
115 | 
116 |     if args.operation == "serialize":
117 |         serialize(args.input_directory, args.output_ttl, args.na_value, args.sep, args.decimal)
118 |     elif args.operation == "fetch":
119 |         fetch(args.input_url, args.output)
120 |     elif args.operation == "example":
121 |         if args.suboperation == "list":
122 |             examples = list_examples()
123 |             for example in examples:
124 |                 print(f'{example["id"]}: {example["name"]}')
125 |         elif args.suboperation == "load":
126 |             load_example(args.example_name, args.base_uri)
127 |         elif args.suboperation == "start-fuseki":
128 |             os.system("scripts/fuseki/start.sh")
129 |     elif args.operation == "shared":
130 |         if args.suboperation == "convert_geojson":
131 |             convert_geojson_to_ttl(args.input_geojson, args.output_ttl)
132 |     elif args.operation == 'schema':
133 |         if args.suboperation == "import-description":
134 |             description_schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'description.schema.json')
135 |             with open(description_schema_path, 'r') as f:
136 |                 schema = f.read()
137 |             with open(args.output, 'w') as f:
138 |                 f.write(schema)
139 |                 logger.debug(f"Imported description into current directory: {args.output}")
140 | 
141 | 
142 | 
143 | if __name__ == "__main__":
144 |     main()


--------------------------------------------------------------------------------
/docs/yaml.md:
--------------------------------------------------------------------------------
  1 | # Introduction
  2 | 
  3 | `pylindas` works with dictionaries to describe meta data for the various constructs that are supported (namely `cube:Cube`, `meta:SharedDimension`). One way to construct these nested dictionaries is through a `yaml` file. They are flexible and easy to read and are currently the main way (as well as the only supported way) to provide the necessary metadata.
  4 | 
  5 | This page describes the structure needed for a valid `yaml` file.
  6 | 
  7 | ## Namespaces 
  8 | | **PREFIX** | **IRI** | 
  9 | | --- | --- |
 10 | | `cube` | `<https://cube.link/>` |
 11 | | `dcat` | `<http://www.w3.org/ns/dcat#>`|
 12 | | `dcterms` | `<http://purl.org/dc/terms/>` |
 13 | | `meta` | `<https://cube.link/meta/>` |
 14 | | `schema` | `<http://schema.org/>` |
 15 | | `sh` | `<http://www.w3.org/ns/shacl#>` |
 16 | 
 17 | ***
 18 | 
 19 | # `cube:Cube`
 20 | 
 21 | Below the table you'll find a working example. For additional examples, please refer to [the example directory](https://github.com/Kronmar-Bafu/lindas-pylindas/tree/main/example/Cubes).
 22 | 
 23 | | Key | Status | Expected Entry | Description | Target Predicate  |
 24 | | --- | --- | --- | --- | --- |
 25 | |**Name** | Required | Key-Value pairs, with key being a [ISO 639 language code](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes)) for the language in question and the corresponding value | Name of the dataset with corresponding language | `schema:name`, `dcterms:title` | 
 26 | **Description** | Required | Key-Value pairs, with key being a [ISO 639 language code](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes)) for the language in question and the corresponding value | Description of the data set with correspoing language | `schema:description`, `dcterms:description` |
 27 | |**Publisher** | Required | List of Key-Value pairs, with `key = IRI` and the correct IRI | Describes the publisher of the dataset with the correct IRI | `schema:publisher`, `dcterms:publisher` |
 28 | |**Creator** | Required | List of Key-Value pairs, with `key = IRI` and the correct IRI | Describes the creator of the dataset with the correct IRI | `schema:creator`, `dcterms:creator` |
 29 | |**Contributor** | Required | List of Key-Value pairs, with `key = IRI` and the correct IRI and `key = Name`| Describes the contributors of the dataset. with both, the correct IRI and name | `schema:contributor`, `dcterms:contributor` |
 30 | |**Date Created** | Required | Date of Publication, given in ISO Format xxxx, i.e. YYYY-MM-DD | Publication date of the dataset.  | 
 31 | |**Contact Point** | Required | Key-Value pairs, with keys `E-Mail` and `Name` for contact E-mail as well as name | Contact point of the data set | `schema:contactPoint`, `dcat:contactPoint` |
 32 | |**Base-URI** | Required | a valid URI | The Base-URI will be used to construct a URI for the cube as well as other parts of the cube. Please make sure to give something meaningful and contact the Federal Archive | |
 33 | |**Identifier** | Required | a *unique* identifier for the cube | The unique identifier under which a cube (or a family of cubes with differing versions) can be identified | `dcterms:identifier` |
 34 | |**Version** | Required | a numerical value | the version of the cube | `schema:version` |
 35 | |**Work Status** | Required | Either `Draft` or `Published` | the work status of the Cube. Either Published for final iterations of the given version or Draft for earlier versions. | `schema:creativeWorkStatus` |
 36 | |**Visualize** | optional | True or False | boolean describing whether the Cube should be displayed on `visualize.admin.ch`. Key-value pair can be ommited, which will be considered `False` | `schema:workExample` |
 37 | |**Accrual Periodicity** | optional | `daily`, `weekly`, `monthly`, `yearly` or `irregular` | The frequency with which the cube is expected to be updated | `dct:accrualPeriodicity` |
 38 | |**Namespace** | optional | a string | does not have a technical impact but instead improves readability if one serializes a cube | |
 39 | |**dimensions** | required | a key-value pair with key being the column name in the `pandas.DataFrame`. The value is a valid `dimension` as described 	in [dimension](#dimension) | Describes the meta data of a given dimension. | `cube:observationConstraint/sh:property` |
 40 | 
 41 | ## `dimension`
 42 | 
 43 | | Key | Status | Expected Entry | Description | Target Predicate |
 44 | | --- | --- | --- | --- | --- |
 45 | |**name** | Required | Key-Value pairs, with key being a language short hand and the corresponding value | Name of the dimension with corresponding language | `schema:name` |
 46 | |**description** | Required | Key-Value pairs, with key being a language short hand and the corresponding value | Description of the dimension with corresponding language | `schema:description` |
 47 | |**dimension type** | Required | Either `Key Dimension`, `Measure Dimension` or `Standard Error` | Type of dimension, which either is a measure dimension, key dimension or a standard error. Can only be one | `rdf:type` |
 48 | |**scale type** | Required | Either `nominal`, `ordinal`, `interval`, or `ratio` | Ratio type of dimension. Please refer to [link einfügen] for further details. | `qudt:scaleType`|
 49 | |**path** | required | a per cube unique string `path`, describing the predicate used for the dimension. | `cube:Observation` are written with `<cube_uri/observation/[unique_identifier]> <base_uri/path> "Value"`. | `sh:path` | 
 50 | |**mapping** | required for dimensions using URI objects | key-value pairs, at least one key-value pair with key `type` and value being either `replace` or `additive` | a logic which should be employed when mapping values in the data frame to some URI | None | 
 51 | |**unit** | required for measure dimensions | a unit from the qudt:unit namespace. Refer to [these Units here](https://www.qudt.org/doc/DOC_VOCAB-UNITS.html) - namespace does not need to be provided, for example for kg, provide `KiloGM` | Unit in which the measure dimension is provided | `unit:hasUnit` | |
 52 | |**datatype** | Required | a datatype defined in [section 3](https://www.w3.org/TR/xmlschema-2/#built-in-datatypes), without namespace | the datatype of the column in question | `sh:datatype` | 
 53 | 
 54 | ### Example
 55 | ```yaml
 56 | Name:
 57 |   de: Mock Cube
 58 |   fr: Mock Cube
 59 |   it: Mock Cube
 60 |   en: Mock Cube
 61 | Description:
 62 |   de: Ein Beispiel Cube, der simulierte Daten enthält
 63 |   en: An example Cube containing some simulated data
 64 | Publisher: 
 65 |   - IRI: https://register.ld.admin.ch/opendataswiss/org/office_of_Mock
 66 | Creator:
 67 |   - IRI: https://register.ld.admin.ch/opendataswiss/org/office_of_Mock
 68 | Contributor:
 69 |   - IRI: https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu
 70 |     Name: Bundesamt für Mock Data
 71 | Date Created:
 72 |   2024-08-26
 73 | Contact Point:
 74 |   E-Mail: contact@mock.ld.admin.ch
 75 |   Name: Bundesamt für Mock Data
 76 | Base-URI: https://mock.ld.admin.ch/
 77 | Identifier: mock-example
 78 | Version: 1
 79 | Work Status: 
 80 |   Draft
 81 | Visualize:
 82 |   True
 83 | # Optional but recommended
 84 | Accrual Periodicity: yearly
 85 | 
 86 | # Optional
 87 | Namespace: mock
 88 | 
 89 | dimensions:
 90 |   # required
 91 |   Jahr:
 92 |     name:
 93 |       de: Jahr
 94 |       fr: An
 95 |       it: Anno
 96 |       en: Year
 97 |     description:
 98 |       de: Jahr der Erhebung
 99 |     dimension-type: Key Dimension
100 |     datatype: URI
101 |     scale-type: ordinal
102 |     path: year
103 |     data-kind: 
104 |       type: temporal
105 |       unit: year
106 |     mapping:
107 |       type: additive
108 |       base: https://ld.admin.ch/time/year/
109 | ```
110 | 
111 | ***
112 | 
113 | ## New features: Shared dimension generation + Concept tables
114 | Early March 2025, those two new features were added.  
115 | As there might be some refactoring about the yaml configuration, you can currently find the explanations in the respective README and in the yaml examples as comments:
116 | - Shared dimensions: [README](https://github.com/Kronmar-Bafu/lindas-pylindas/blob/main/pylindas/pyshareddimension/README.md) and [sd_description.yml](https://github.com/Kronmar-Bafu/lindas-pylindas/blob/main/example/Shared_Dimensions/shared_dimension_generation/sd_description.yml)  
117 | - Concept tables: [README](https://github.com/Kronmar-Bafu/lindas-pylindas/blob/main/example/Cubes/concept_table_airport/README.md) and [description.yml](https://github.com/Kronmar-Bafu/lindas-pylindas/blob/main/example/Cubes/concept_table_airport/description.yml)
118 | 
119 | 


--------------------------------------------------------------------------------
/pylindas/pyshareddimension/README.md:
--------------------------------------------------------------------------------
  1 | # Generation of shared dimension
  2 | This is a first implementation to generate a shared dimension, following an approach similar to pyCube, but to transform a .csv file to the corresponding RDF.  
  3 | I will abbreviate Shared Dimension by SD, for convenience.  
  4 | 
  5 | The pyCube generates two things: the cube's resource (with its specific URL and properties as the cube's name and meta-data), and a list of observations (each a resource with a specific URL and properties).  
  6 | Similarily, a SD is also composed of the SD's resource itself (with its specific URL and properties as the SD's name), and a list of terms (each a resource with a specific URL and properties).  
  7 | 
  8 | The implementation is done in [shared_dimension.py](shared_dimension.py), which is a copy and adaptation of cube.py, to reproduce code that matches the pyCube "approach".
  9 | 
 10 | An example is given in the [example/Shared_Dimensions/shared_dimension_generation](/example/Shared_Dimensions/shared_dimension_generation/) folder, which contains:
 11 | 
 12 | - [sd_description.yml](/example/Shared_Dimensions/shared_dimension_generation/sd_description.yml): the information about the SD itself (Identifier, Name in different languages, etc) and about the Terms generation
 13 | - [sd_terms.csv](/example/Shared_Dimensions/shared_dimension_generation/sd_terms.csv): the data for the terms with an identifier and a name in different languages
 14 | To be noted that I took the terms from the BAFU's Red List, a use-case that I did work on
 15 | - [sd_example.py](/example/Shared_Dimensions/shared_dimension_generation/sd_example.py): example code to run the transformation
 16 | 
 17 | ## WARNING: Persistent URLs
 18 | It is to be noted that when publishing a SD, the goal is that other datasets will make links to that SD.  
 19 | This link is the basic principle of Linked Data, and it consist in the re-use of the identifier of the SD and its terms (their URLs) in other datasets, as Cube's dimensions for instance.  
 20 | Therefore, the basic requirement of Persistent URLs should be carefully applied when publishing Shared Dimensions, because removing an existing SD or one of its term could break another dataset (or hundreds, thousands of other datasets).  
 21 | When trying things out on LINDAS TEST, it might not really matter, but when publishing a SD on LINDAS INT it is already more important, and when publishing to LINDAS PROD it is of course vital.  
 22 | 
 23 | To handle this properly, a SD and each term have a `schema:validFrom` triple which indicates the starting date of validity.  
 24 | The value comes from the configuration .yml file and is a date/time value: 
 25 | ```
 26 | Valid-from: 2025-02-05T00:00:00Z
 27 | ```
 28 | When a SD or one term should no more to be used, it must still exist but become "deprecated". This is done by adding a `schema:validThrough` triple with an ending date/time.  
 29 | This mechanism allows to avoid breaking existing datasets.  
 30 | 
 31 | The generation of the `schema:validThrough` triple is not currently handled in this code, further thoughts might be needed to handle this properly and allow to deprecate a whole SD, or only one/some of its terms.
 32 | 
 33 | ## Links between terms: hierarchy example
 34 | A first implementation is available, and the current example demonstrates how to build a hierarchy with `skos:broader` links from child to parent.
 35 | 
 36 | The links (hierarchy) must be provided in the data itself:  
 37 | - [sd_terms.csv](sd_terms.csv): has an identifier for the term itself (the `code` field), and an identifier for its parent (the `parent_code` field)
 38 | - [sd_description.yml](sd_description.yml): defines a link between terms with the `links-to-other-terms key`. The sub-key `parent_code` is the name of the column that contains the identifier of the other term. The value of `property` is the URL of the property to use to link the current term to its related term, the parent in this example.
 39 | `links-to-other-terms key` is optional, just omit it if there is no links between terms in the dataset
 40 | 
 41 | Notes about the hierarchy example: 
 42 | - The root term does not have a parent, this is currently handled properly  
 43 | - The description of the hierarchy is not generated yet, this could be added in a coming version
 44 | 
 45 | This current implementation allows to create links between two terms and can thus be configured to link the term to its parent with the `skos:broader` property.  
 46 | Multiple links can be defined under the `links-to-other-terms key` key.  
 47 | One current "limitation" is that it links one term to another (not to multiple others).  
 48 | 
 49 | ## About hierarchies "description" or "template"
 50 | When a hierarchy exists in a Shared Dimension, the Cube Creator allows to describe that hierarchy under the "Hierarchy" tab.  
 51 | 
 52 | The goal is to describe the existing hierarchy by defining the root(s) node(s), the levels, and the property that links the terms to build that hierarchy (as `skos:broader` for instance). When linking a cube's dimension to an existing Shared Dimension, the hierarchy description must be defined in the metadata, and it is then possible to copy an existing hierarchy description as explained in the [Cube Creator's User guide]](https://github.com/zazuko/cube-creator/wiki/3.-Cube-Designer#linking-to-shared-dimensions).  
 53 | 
 54 | In automn 2024, it was not yet possible to add, by code, a hiearchy description in LINDAS. The cause was that the Cube Creator was expecting the hiearchy description to be in a specific Named Graph (only available to the Cube Creator itself). The possibility to add hiearchies descriptions was requested [in this issue](https://gitlab.ldbar.ch/zazuko/misc/-/issues/197), and was first tested when creating this feature of Shared Dimension generation. At the time of writing (early March 2025), that possibility was not yet working properly (see the [comment](https://gitlab.ldbar.ch/zazuko/misc/-/issues/197#note_18273) in that feature request). 
 55 | 
 56 | **Currently proposed solution**: this step to add a hierarchy description to LINDAS, and then copy it when defining a cube's dimension, is just an option. It is not working yet with pyLindas. But it is also possible to directly add the hierarchy description to the metadata of the dimension while generating a cube with pyCube. This is a feature under development.   
 57 | 
 58 | For information, here is the RDF of the hierarchy description that was used to perform that test:
 59 | ```
 60 | @prefix sd_md: <https://cube-creator.zazuko.com/shared-dimensions/vocab#> .
 61 | @prefix meta: <https://cube.link/meta/> .
 62 | @prefix hydra: <http://www.w3.org/ns/hydra/core#> .
 63 | @prefix schema1: <http://schema.org/> .
 64 | @prefix shacl: <http://www.w3.org/ns/shacl#> .
 65 | 
 66 | <https://ld.admin.ch/cube/dimension/hierarchy/pylindas_hierarchy_generation_example> a sd_md:Hierarchy, meta:Hierarchy, hydra:Resource ;
 67 |     schema1:name "PyLindas Hierarchy Description fo Shared Dimension generation example" ;
 68 |     sd_md:sharedDimension <https://ld.admin.ch/cube/dimension/pylindas_sd_generation_example> ;
 69 |     meta:hierarchyRoot ns1:1 ;
 70 |     meta:nextInHierarchy [ schema1:name "Level 1" ;
 71 |         shacl:path [shacl:inversePath skos:broader] ;
 72 |         meta:nextInHierarchy [ schema1:name "Level 2" ;
 73 |             shacl:path [shacl:inversePath skos:broader] ;
 74 |             meta:nextInHierarchy [ schema1:name "Level 3" ;
 75 |                 shacl:path [shacl:inversePath skos:broader] ;
 76 |                 meta:nextInHierarchy [ schema1:name "Level 4" ;
 77 |                     shacl:path [shacl:inversePath skos:broader] ;
 78 |                     meta:nextInHierarchy [ schema1:name "Level 5" ;
 79 |                         shacl:path [shacl:inversePath skos:broader]
 80 |                         ]
 81 |                     ]
 82 |                 ]
 83 |             ]
 84 |         ] .
 85 | ```
 86 | Note: the links `nextInHierarchy` must be defined from parent to child. Therefore, if the link in the data is from child to parent, the `shacl:inversePath` must be used as in that example. If the link is already parent to child, it can be simply stated:
 87 | ```
 88 |  meta:nextInHierarchy [ schema1:name "Level 1" ;
 89 |     shacl:path skos:narrower 	
 90 |     ]
 91 | ```
 92 | 
 93 | ## Generated Shared dimension's RDF validation with SHACL
 94 | As the SHACL validation has now been implemented in PyCube, with the `validate()` method, a first temporary version is proposed here.
 95 | 
 96 | **IMPORTANT Remark:**  
 97 | The code of the `validate()` method is copied from the cube.py validate() and adapted.  
 98 | However, no official SHACL file is available yet online to validate a Shared Dimension.  
 99 | During former talks with Zazuko, when writing the [page about Data Validation](https://gitlab.ldbar.ch/hevs/lindas-architecture-and-components/-/blob/main/DataValidation.md?ref_type=heads), they sent us an extract of their data validation process, specific to Shared Dimension.  
100 | This extract is temporarily added in this project, in the [shared_dimension_shape.ttl](shared_dimension_shape.ttl) file, and used for that SHACL validation.  
101 | It is currently not hard-coded in the `validate()` method, but passed as parameter. See [sd_example.py](/example/Shared_Dimensions/shared_dimension_generation/sd_example.py) for an example.
102 | 
103 | This code demonstrates the validation, but should be improved when that SHACL is finalized and saved online.
104 | 
105 | 


--------------------------------------------------------------------------------
/example/Cubes/concept_table_airport/cube_with_concept.ttl:
--------------------------------------------------------------------------------
  1 | @prefix cube: <https://cube.link/> .
  2 | @prefix dcat: <http://www.w3.org/ns/dcat#> .
  3 | @prefix dct: <http://purl.org/dc/terms/> .
  4 | @prefix meta: <https://cube.link/meta/> .
  5 | @prefix mock: <https://mock-concept.ld.admin.ch/> .
  6 | @prefix ns1: <https://mock-concept.ld.admin.ch/cube/mock-concept/1/concept/prop/airport_type/> .
  7 | @prefix qudt: <http://qudt.org/schema/qudt/> .
  8 | @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
  9 | @prefix schema: <http://schema.org/> .
 10 | @prefix sh: <http://www.w3.org/ns/shacl#> .
 11 | @prefix time: <http://www.w3.org/2006/time#> .
 12 | @prefix unit: <http://qudt.org/vocab/unit/> .
 13 | @prefix vcard: <http://www.w3.org/2006/vcard/ns#> .
 14 | @prefix void: <http://rdfs.org/ns/void#> .
 15 | 
 16 | <https://mock-concept.ld.admin.ch/cube/mock-concept/1> a void:Dataset,
 17 |         schema:Dataset,
 18 |         dcat:Dataset,
 19 |         cube:Cube ;
 20 |     dct:accrualPeriodicity <http://publications.europe.eu/resource/authority/frequency/ANNUAL> ;
 21 |     dct:creator <https://register.ld.admin.ch/opendataswiss/org/office_of_Mock> ;
 22 |     dct:identifier "mock-concept" ;
 23 |     schema:contactPoint [ a schema:ContactPoint ;
 24 |             schema:email "contact@mock.ld.admin.ch"^^<http://www.w3.org/2001/XMLSchema#string> ;
 25 |             schema:name "Bundesamt für Mock Data"^^<http://www.w3.org/2001/XMLSchema#string> ] ;
 26 |     schema:contributor <https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-umwelt-bafu> ;
 27 |     schema:creativeWorkStatus <https://ld.admin.ch/vocabulary/CreativeWorkStatus/Draft> ;
 28 |     schema:creator <https://register.ld.admin.ch/opendataswiss/org/office_of_Mock> ;
 29 |     schema:dateCreated "2024-08-26"^^<http://www.w3.org/2001/XMLSchema#date> ;
 30 |     schema:dateModified "2025-02-20T09:38:41+00:00"^^<http://www.w3.org/2001/XMLSchema#dateTime> ;
 31 |     schema:datePublished "2025-02-20"^^<http://www.w3.org/2001/XMLSchema#date> ;
 32 |     schema:description "A dataset containing two csv, one for a concept table"@en,
 33 |         "Un jeu de données avec deux csv, un pour une table de concept"@fr ;
 34 |     schema:name "Example with a concept table (DE)"@de,
 35 |         "Example with a concept table"@en,
 36 |         "Exemple avec une table de concept"@fr,
 37 |         "Example with a concept table (IT)"@it ;
 38 |     schema:publisher <https://register.ld.admin.ch/opendataswiss/org/office_of_Mock> ;
 39 |     schema:version 1 ;
 40 |     schema:workExample <https://ld.admin.ch/application/visualize> ;
 41 |     dcat:contactPoint [ a vcard:Organization ;
 42 |             vcard:fn "Bundesamt für Mock Data"^^<http://www.w3.org/2001/XMLSchema#string> ;
 43 |             vcard:hasEmail "contact@mock.ld.admin.ch"^^<http://www.w3.org/2001/XMLSchema#string> ] ;
 44 |     cube:observationConstraint <https://mock-concept.ld.admin.ch/cube/mock-concept/1/shape> ;
 45 |     cube:observationSet <https://mock-concept.ld.admin.ch/cube/mock-concept/1/ObservationSet> .
 46 | 
 47 | <https://mock-concept.ld.admin.ch/cube/mock-concept/1/ObservationSet> a cube:ObservationSet ;
 48 |     cube:observation <https://mock-concept.ld.admin.ch/cube/mock-concept/1/observation/2001_A_a>,
 49 |         <https://mock-concept.ld.admin.ch/cube/mock-concept/1/observation/2001_A_dummy>,
 50 |         <https://mock-concept.ld.admin.ch/cube/mock-concept/1/observation/2001_B_b>,
 51 |         <https://mock-concept.ld.admin.ch/cube/mock-concept/1/observation/2002_A_a>,
 52 |         <https://mock-concept.ld.admin.ch/cube/mock-concept/1/observation/2002_B_b> .
 53 | 
 54 | <https://mock-concept.ld.admin.ch/cube/mock-concept/1/observation/2001_A_a> a cube:Observation ;
 55 |     cube:observedBy <https://register.ld.admin.ch/opendataswiss/org/office_of_Mock> ;
 56 |     mock:Jahr <https://ld.admin.ch/time/year/2001> ;
 57 |     mock:airport_type <https://mock-concept.ld.admin.ch/cube/mock-concept/1/concept/airport_type/A/a> ;
 58 |     mock:airport_type_2nd "a" ;
 59 |     mock:value 12 .
 60 | 
 61 | <https://mock-concept.ld.admin.ch/cube/mock-concept/1/observation/2001_A_dummy> a cube:Observation ;
 62 |     cube:observedBy <https://register.ld.admin.ch/opendataswiss/org/office_of_Mock> ;
 63 |     mock:Jahr <https://ld.admin.ch/time/year/2001> ;
 64 |     mock:airport_type <https://mock-concept.ld.admin.ch/cube/mock-concept/1/concept/airport_type/A/dummy> ;
 65 |     mock:airport_type_2nd "dummy" ;
 66 |     mock:value 15 .
 67 | 
 68 | <https://mock-concept.ld.admin.ch/cube/mock-concept/1/observation/2001_B_b> a cube:Observation ;
 69 |     cube:observedBy <https://register.ld.admin.ch/opendataswiss/org/office_of_Mock> ;
 70 |     mock:Jahr <https://ld.admin.ch/time/year/2001> ;
 71 |     mock:airport_type <https://mock-concept.ld.admin.ch/cube/mock-concept/1/concept/airport_type/B/b> ;
 72 |     mock:airport_type_2nd "b" ;
 73 |     mock:value 19 .
 74 | 
 75 | <https://mock-concept.ld.admin.ch/cube/mock-concept/1/observation/2002_A_a> a cube:Observation ;
 76 |     cube:observedBy <https://register.ld.admin.ch/opendataswiss/org/office_of_Mock> ;
 77 |     mock:Jahr <https://ld.admin.ch/time/year/2002> ;
 78 |     mock:airport_type <https://mock-concept.ld.admin.ch/cube/mock-concept/1/concept/airport_type/A/a> ;
 79 |     mock:airport_type_2nd "a" ;
 80 |     mock:value 15 .
 81 | 
 82 | <https://mock-concept.ld.admin.ch/cube/mock-concept/1/observation/2002_B_b> a cube:Observation ;
 83 |     cube:observedBy <https://register.ld.admin.ch/opendataswiss/org/office_of_Mock> ;
 84 |     mock:Jahr <https://ld.admin.ch/time/year/2002> ;
 85 |     mock:airport_type <https://mock-concept.ld.admin.ch/cube/mock-concept/1/concept/airport_type/B/b> ;
 86 |     mock:airport_type_2nd "b" ;
 87 |     mock:value 20 .
 88 | 
 89 | <https://mock-concept.ld.admin.ch/cube/mock-concept/1/shape> a sh:NodeShape,
 90 |         cube:Constraint ;
 91 |     sh:closed true ;
 92 |     sh:property [ a cube:KeyDimension ;
 93 |             qudt:scaleType qudt:OrdinalScale ;
 94 |             schema:description "Jahr der Erhebung"@de,
 95 |                 "Year of survey"@en,
 96 |                 "Année du relevé"@fr,
 97 |                 "Anno di rilevamento"@it ;
 98 |             schema:name "Jahr"@de,
 99 |                 "Year"@en,
100 |                 "Année"@fr,
101 |                 "Anno"@it ;
102 |             sh:in ( <https://ld.admin.ch/time/year/2001> <https://ld.admin.ch/time/year/2002> ) ;
103 |             sh:maxCount 1 ;
104 |             sh:minCount 1 ;
105 |             sh:nodeKind sh:IRI ;
106 |             sh:path mock:Jahr ;
107 |             meta:dataKind [ a time:GeneralDateTimeDescription ;
108 |                     time:unitType time:unitYear ] ],
109 |         [ sh:in ( cube:Observation ) ;
110 |             sh:nodeKind sh:IRI ;
111 |             sh:path rdf:type ],
112 |         [ a cube:KeyDimension ;
113 |             qudt:scaleType qudt:NominalScale ;
114 |             schema:description "Flughafentyp - DESC"@de,
115 |                 "Type of Airport - DESC"@en,
116 |                 "Type d'aéroport - DESC"@fr,
117 |                 "Tipo di aeroporto - DESC"@it ;
118 |             schema:name "Flughafentyp"@de,
119 |                 "Type of Airport"@en,
120 |                 "Type d'aéroport"@fr,
121 |                 "Tipo di aeroporto"@it ;
122 |             sh:in ( <https://mock-concept.ld.admin.ch/cube/mock-concept/1/concept/airport_type/A/a> <https://mock-concept.ld.admin.ch/cube/mock-concept/1/concept/airport_type/B/b> <https://mock-concept.ld.admin.ch/cube/mock-concept/1/concept/airport_type/A/dummy> ) ;
123 |             sh:maxCount 1 ;
124 |             sh:minCount 1 ;
125 |             sh:nodeKind sh:IRI ;
126 |             sh:path mock:airport_type ],
127 |         [ sh:in ( <https://register.ld.admin.ch/opendataswiss/org/office_of_Mock> ) ;
128 |             sh:nodeKind sh:IRI ;
129 |             sh:path cube:observedBy ],
130 |         [ a cube:MeasureDimension ;
131 |             qudt:hasUnit unit:kilogramm ;
132 |             qudt:scaleType qudt:IntervalScale ;
133 |             schema:description "Wert - DESC"@de,
134 |                 "Value - DESC"@en,
135 |                 "Valeur - DESC"@fr,
136 |                 "Valore - DESC"@it ;
137 |             schema:name "Wert"@de,
138 |                 "Value"@en,
139 |                 "Valeur"@fr,
140 |                 "Valore"@it ;
141 |             sh:max "20" ;
142 |             sh:maxCount 1 ;
143 |             sh:min "12" ;
144 |             sh:minCount 1 ;
145 |             sh:nodeKind sh:Literal ;
146 |             sh:path mock:value ],
147 |         [ a cube:KeyDimension ;
148 |             qudt:scaleType qudt:NominalScale ;
149 |             schema:description "Flughafentyp - second key for demo"@de,
150 |                 "Type of Airport - second key for demo"@en,
151 |                 "Type d'aéroport - second key for demo"@fr,
152 |                 "Tipo di aeroporto - second key for demo"@it ;
153 |             schema:name "Flughafentyp (second key for demo)"@de,
154 |                 "Type of Airport (second key for demo)"@en,
155 |                 "Type d'aéroport (second key for demo)"@fr,
156 |                 "Tipo di aeroporto (second key for demo)"@it ;
157 |             sh:in ( <a> <b> <dummy> ) ;
158 |             sh:maxCount 1 ;
159 |             sh:minCount 1 ;
160 |             sh:nodeKind sh:IRI ;
161 |             sh:path mock:airport_type_2nd ] .
162 | 
163 | <https://mock-concept.ld.admin.ch/cube/mock-concept/1/concept/airport_type/A/a> schema:description "Domestic airport description"@en,
164 |         "Description de Aéroport national"@fr ;
165 |     schema:name "Inlandflughafen"@de,
166 |         "Domestic airport"@en,
167 |         "Aéroport national"@fr ;
168 |     schema:position 1 ;
169 |     schema:sameAs <https://mock-concept.ld.admin.ch/cube/mock-concept/concept/airport_type/A/a> ;
170 |     ns1:other_property_example "another property example for domesctic airport"@en .
171 | 
172 | <https://mock-concept.ld.admin.ch/cube/mock-concept/1/concept/airport_type/B/b> schema:description "International airport description"@en,
173 |         "Description de Aéroport international"@fr ;
174 |     schema:name "Internationaler Flughafen"@de,
175 |         "International airport"@en,
176 |         "Aéroport international"@fr ;
177 |     schema:position 2 ;
178 |     schema:sameAs <https://mock-concept.ld.admin.ch/cube/mock-concept/concept/airport_type/B/b> ;
179 |     ns1:other_property_example "another property example for international airport"@en .
180 | 
181 | 


--------------------------------------------------------------------------------
/example/Cubes/concept_table_airport/README.md:
--------------------------------------------------------------------------------
  1 | # Implementation of concept tables and multilingual concepts
  2 | This is a first implementation to handle:
  3 | - concept tables
  4 | - multilingual concepts
  5 | 
  6 | ## Concept table 
  7 | A concept table is the possibility to handle the values of a dimension as a url to a new resource (a concept).  
  8 | This is similar to an object that is the URL of a Shared Dimension's term, but here the concepts are created for the cube and uploaded with the cube.  
  9 | Remark: if the resource/concept already exist, than the case is similar to the handling of Shared Dimensions mapping, and this is already handled by pyCube with the "mapping" mechanism.  
 10 | 
 11 | ## This example's dataset
 12 | This example is a little dataset with values/measures about some airport types, the dataset in [data.csv](data.csv) and the airport types in [airportType.csv](airportType.csv).  
 13 | Each airport type is identified by two fields (`typeOfAirportID` + `typeOfAirportSecondID`), to demonstrate how to handle this use case that can easily happen in reality.  
 14 | But one field would have been enough here, and the example is easily adapted by removing the typeOfAirportSecondID from the configurations.
 15 | 
 16 | This example handles two tasks:
 17 | - Generate the correct URL for the dimension's object
 18 | - Generate the concepts with their properties
 19 | 
 20 | ## Generate the correct URLs for the dimension's object
 21 | In description.yml, the "typeOfAirport" dimension is defined with the standards settings, plus the new mapping type:
 22 | ```
 23 | 	mapping:
 24 | 		type: concept
 25 | 		replacement-automated: /airport_type/{typeOfAirport}/{typeOfAirport2nd}    
 26 | ```
 27 | 
 28 | In the code, this is handled in the existing `_apply_mappings()` method, with new this `concept` mapping type.  
 29 | 
 30 | The replacement will generate a URL replacing for each line the values of the identifiers `{typeOfAirport}` and `{typeOfAirport2nd}`.  
 31 | - If the value starts with "/", as this example, it is considered a relative URL that will be concatenated to the cube's URL
 32 | - If the value do not start with "/", it should be a full URL also containing {field} values that are replaced on the fly.  
 33 | 
 34 | **Proposal 1**: this "replacement" handling could be also implement for shared dimensions.  
 35 | It could be added to the current "replacements" handling, where this replacements allows to give a one-to-one mapping
 36 | that might be needed if no corresponding key exists to automatically build the URL (map "Zurich" to 0 for example).  
 37 | 
 38 | **Proposal 2**: maybe this way of handling the URL, with the {field} configuration, could replace the current "additive" and "replace" mappings type, handling both with one single syntax.
 39 | 
 40 | ## Generate the concepts with their properties
 41 | This can be seen as another independant operation, to generate the triples for the concepts.  
 42 | 
 43 | The concept metadata are added to the "description.yml" as follow:
 44 | ```
 45 | 	Concepts:
 46 | 		typeOfAirport:
 47 | 			URI: /airport_type/{typeOfAirportID}/{typeOfAirportSecondID}
 48 | 			name-field: typeOfAirport
 49 | 			position-field: position
 50 | 			multilingual: true 
 51 | ```
 52 | 
 53 | A specific dataframe is created with the content of "airportTypes.csv", and added to the cube's graph with:
 54 | ```
 55 | 	cube.write_concept("typeOfAirport", airport_concept_df)
 56 | ```
 57 | The first parameter is the key found under the "Concepts" in the yaml file  
 58 | The second is the dataframe with the values.  
 59 | 
 60 | The new method `cube.write_concept()` will generate the triples based on:  
 61 | - URI: Used to generate a URL replacing for each line the values of the identifiers `{typeOfAirportID}` and `{typeOfAirportSecondID}`      
 62 | The handling is similar to the "replacement" value for the dimension objects  
 63 | Both patterns should generate the same URLs, with the flexibility to have different column names in different files  
 64 | - name-field: (mandatory) the name of the csv column that contains the name for that concept used for schema:name  
 65 | - multilingual: (optional) if true, than the code will look for columns named name-field + the langage tags (_en, _de,_fr, etc)  
 66 | and generate the different schema:name language strings
 67 | - position-field: (optional) the name of the csv column that contains a numeric position value for the concept  
 68 | this will generate a `schema:position` that is used by Visualize when the concepts should not be displayed in alphabetical order but according to that position value
 69 | 
 70 | ### Concept triples
 71 | The concept triples were deduced by observing some concepts generated by the Cube Creator.  
 72 | The current code generates the following triples
 73 | - no rdf:type, but this could be added
 74 | - the URL of the concept is based on the URL of the cube, with a version  
 75 | However, all those concepts "versions" have a `schema:sameAs` to the URL of the cube without the version (the use of that information might need clarification)
 76 | - schema:name is mandatory, and could be either a single value, or language strings to handle multilingual concepts (as in this example)
 77 | - schema:position, optional, see the explanation here above
 78 | 
 79 | Example result (see [cube_with_concept.ttl](cube_with_concept.ttl)):
 80 | ```
 81 | 	<https://mock-concept.ld.admin.ch/cube/mock-concept/1/concept/airport_type/A/a> schema1:name "Inlandflughafen"@de,
 82 | 			"Domestic airport"@en,
 83 | 			"Aéroport national"@fr ;
 84 | 		schema1:position 1 ;
 85 | 		schema1:sameAs <https://mock-concept.ld.admin.ch/cube/mock-concept/concept/airport_type/A/a> .  
 86 | ```
 87 | 
 88 | ### Checking the matchings
 89 | As we can see, the concepts triples are "separated" triples, not related to the cube itself, where the concept's URL should match the object URL of the dimension (explained here above).  
 90 | 
 91 | Different mismatch could happen if:
 92 | - The configuration of the URL mapping is not well defined, either in the dimension (`mapping/replacement` field) or in the concept (`URI` field)
 93 | - The configuration is correct, but the values in the two input files do not match
 94 | 
 95 | As a reminder: in RDF there is no enforcement of a resource to be explicitly defined for the RDF to be valid. This means that the object of the dimension could be a URL that is not defined anywhere else.  
 96 | Of course, this will break applicatons as Visualize, but it is still valid RDF (Open World Assumption).  
 97 | 
 98 | For this purpose, a `pycube.check_dimension_object_property()` method is added.
 99 | It is called in [example_concept.py](example_concept.py) as follow:  
100 | ```
101 | allConceptsFound = cube.check_dimension_object_property("typeOfAirport", SCHEMA.name)
102 | ```
103 | It means: check that all objects of the "typeOfAirport" dimension (defined in the .yaml file) point to a resource that does have a `schema:name` value. Knowing that concepts MUST HAVE a `schema:name` as explained here above.
104 | That method will print out the URLs that have no match, and returns False if this is the case.  
105 | In this example, there is a voluntary missing match for the line in [data.csv](data.csv):  
106 | ```
107 | 2001,A,dummy,15
108 | ```
109 | Which will be the cause of the following log line:
110 | ```
111 | Missing value for  https://mock-concept.ld.admin.ch/cube/mock-concept/1/concept/airport_type/A/dummy
112 | ```
113 | 
114 | IMPORTANT: `pycube.check_dimension_object_property()` will recreate the dimension's property URL based on the path `value`.  
115 | The code comes from the existing `_add_observation()`, and if that code changes, it should be adapted here as well.
116 | ```
117 |     dimension = self._get_shape_column(dimension_name) # raises an exception if dimension not found
118 |     path = URIRef(self._base_uri + dimension.get("path"))
119 | ```	
120 | ## Additional fields for concepts
121 | It is possible to add additional properties (fields) for a concept.
122 | 
123 | In the example, airportType.csv contains two more fields:
124 | - description: a multilingual field to add a description for the airport type
125 | - other_property_example: another string field as an example
126 | 
127 | Those fields are configured directly in the description.yml, for the concept itself:
128 | ```
129 | 	Concepts:
130 | 		typeOfAirport:
131 | 			URI: /airport_type/{typeOfAirportID}/{typeOfAirportSecondID}
132 | 			name-field: typeOfAirport
133 | 			position-field: position
134 | 			multilingual: true 
135 | 			other-fields:
136 | 				description:
137 | 					URI: http://schema.org/description
138 | 					multilingual: true
139 | 				other_property_example:
140 | 					URI: /airport_type/other_property_example   
141 | ```		
142 | where:
143 | - other-fields is optional and will be omitted if the concept has no other field
144 | - key: the key of the field (`description`, `other_property_example`) must match the name of the field in the data file
145 | - URI: the URI to use as the RDF property for that field. It is either a full URI as `http://schema.org/description` that will be used as-is,
146 | or a relative path that starts with a "/" and that will be concatenated to the cube's URL, adding first a `/concept/prop` path.
147 | `URI` was intentionally used instead of the common `path` key, as the behavior is currently different (handling of relative or full path)
148 | But the behavior and the name of the field could be harmonized in all cases
149 | - multilingual: optional and similar to the multilingual handling for the concept's name. If true, the code will look for columns named `key` + the langage tags (_en, _de,_fr, etc). In the given example, for `description`, it will look for `description_en`, `description_fr`, etc.
150 | 
151 | The data type is deduced by the current `pycube._sanitize_value()`, except when `multilingual` is true and the expected value is a string.
152 | 
153 | The RDF result is:
154 | ```
155 | <https://mock-concept.ld.admin.ch/cube/mock-concept/1/concept/airport_type/A/a> schema1:name "Inlandflughafen"@de,
156 |         "Domestic airport"@en,
157 |         "Aéroport national"@fr ;
158 | 	schema1:description "Domestic airport description"@en,
159 |         "Description de Aéroport national"@fr ;
160 |     schema1:position 1 ;
161 |     schema1:sameAs <https://mock-concept.ld.admin.ch/cube/mock-concept/concept/airport_type/A/a> ;
162 |     ns1:other_property_example "another property example for domesctic airport" .
163 | ```	
164 | 
165 | ##  Run the example
166 | Run [example_concept.py](example_concept.py) that will generate the [cube_with_concept.ttl](cube_with_concept.ttl)
167 | 
168 | 


--------------------------------------------------------------------------------
/pylindas/fetch.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Utils to download a data.europa.eu dataset with frictionless metadata,
  3 | and generate a description.json.
  4 | 
  5 | TODO: Make it  more agnostic from data.europa.eu
  6 | """
  7 | 
  8 | import requests
  9 | import json
 10 | import os
 11 | from datetime import datetime
 12 | from typing import Dict, Any, List
 13 | from jsonschema import Draft202012Validator, validate
 14 | from jsonschema.exceptions import ValidationError
 15 | 
 16 | import logging
 17 | 
 18 | logger = logging.getLogger('pycube')
 19 | 
 20 | def download_json(url):
 21 |     response = requests.get(url)
 22 |     response.raise_for_status()
 23 |     return response.json()
 24 | 
 25 | 
 26 | def read_schema(schema_path):
 27 |     with open(schema_path, 'r') as f:
 28 |         return json.load(f)
 29 | 
 30 | class DataEuropaFetcher(object):
 31 |     """
 32 |     In the future, the class should be splitted into frictionless parsing methods and
 33 |     data.europa.eu fetch methods.
 34 |     """
 35 |     def __init__(self):
 36 |         pass
 37 | 
 38 |     def _transform_url(self, input_url):
 39 |         dataset_id = input_url.split('/')[-1].split('?')[0]
 40 |         return f"https://data.europa.eu/api/hub/search/datasets/{dataset_id}"
 41 | 
 42 | 
 43 |     def _extract_metadata(self, data):
 44 |         metadata = {
 45 |             "title": {
 46 |                 "en": data['result']['title'].get('en', ''),
 47 |                 "de": data['result']['title'].get('de', '')
 48 |             },
 49 |             "description": {
 50 |                 "en": data['result']['description'].get('en', ''),
 51 |                 "de": data['result']['description'].get('de', '')
 52 |             },
 53 |             "publisher": data['result'].get('publisher', '')
 54 |         }
 55 |         return metadata
 56 | 
 57 | 
 58 |     def _get_distributions(self, distributions):
 59 |         csv_data = None
 60 |         frictionless_data = None
 61 |         for distribution in distributions:
 62 |             if distribution['title'].get('en') == "Frictionless Tabular Data Resource":
 63 |                 frictionless_url = distribution['access_url'][0]
 64 |                 frictionless_data = requests.get(frictionless_url).json()
 65 |             if distribution['format'].get('id') == "CSV":
 66 |                 csv_url = distribution['access_url'][0]
 67 |                 csv_data = requests.get(csv_url).content
 68 |         return {
 69 |             "frictionless": frictionless_data,
 70 |             "csv": csv_data
 71 |         }
 72 | 
 73 | 
 74 |     def _infer_dimension_type(self, field: Dict[Any, Any], primary_keys: List[str]) -> str:
 75 |         """Infer the dimension type based on field properties."""
 76 |         if field['name'] in primary_keys:
 77 |             return "Key Dimension"
 78 |         return "Measure Dimension"
 79 | 
 80 | 
 81 |     def _infer_scale_type(self, field: Dict[Any, Any]) -> str:
 82 |         """Infer the scale type based on field properties."""
 83 |         field_type = field.get("type")
 84 |         if field_type == "string":
 85 |             return "nominal"
 86 |         elif field_type == "integer":
 87 |             return "interval"
 88 |         elif field_type == "number":
 89 |             return "ratio"
 90 |         return "nominal"  # default
 91 | 
 92 | 
 93 |     def _infer_temporal_dimension(self, field: Dict[Any, Any]) -> bool:
 94 |         """Infer if the field is a temporal dimension."""
 95 |         field_type = field.get("type")
 96 |         if field_type == "date":
 97 |             return True
 98 |         if field_type == "time":
 99 |             return True
100 |         field_name = field['name']
101 |         if field_name.lower() in ["jahr", "year", "date", "datum"]:
102 |             logger.warning(f'Dimension {field_name}: Temporal dimension inferred from field name. Please verify.')
103 |             return True
104 | 
105 | 
106 | 
107 |     def _generate_dimensions(self, data_metadata: Dict[Any, Any]) -> Dict[str, Dict[Any, Any]]:
108 |         """Generate dimensions from data metadata schema."""
109 |         dimensions = {}
110 |         
111 |         primary_key = data_metadata["schema"].get('primaryKey', [])
112 |         primary_keys = primary_key if isinstance(primary_key, list) else [primary_key]
113 |         if not primary_keys:
114 |             first_field = data_metadata["schema"]["fields"][0]["name"]
115 |             logger.warning(f"Primary key not found in schema. Using first field {first_field} as primary key. You may need to adjust Key/Measure Dimension manually.")
116 |             primary_key = first_field
117 | 
118 |         for field in data_metadata["schema"]["fields"]:
119 |             field_name = field["name"]
120 |             
121 |             # Create dimension object
122 |             dimension = {
123 |                 "name": {
124 |                     "de": field.get("title", field_name),
125 |                     "en": field.get("title", field_name)
126 |                 },
127 |                 "dimension-type": self._infer_dimension_type(field, primary_keys),
128 |                 "scale-type": self._infer_scale_type(field),
129 |                 "path": field_name,
130 |                 "description": {
131 |                     "de": field.get("description", f"Beschreibung für {field_name}"),
132 |                     "en": field.get("description", f"Description for {field_name}")
133 |                 }
134 |             }
135 |             
136 |             # Add unit if present
137 |             if "unit" in field:
138 |                 dimension["unit"] = field["unit"]
139 |             
140 |             # Add data-kind if temporal
141 |             if self._infer_temporal_dimension(field):
142 |                 dimension["data-kind"] = {
143 |                     "type": "temporal",
144 |                     "unit": "year"
145 |                 }
146 |                 
147 |             dimensions[field_name] = dimension
148 |         
149 |         return dimensions
150 | 
151 | 
152 |     def _transform_metadata(self, metadata: Dict[Any, Any], data_metadata: Dict[Any, Any]) -> Dict[Any, Any]:
153 |         """Transform metadata to conform to the JSON schema."""
154 |         
155 |         output = {
156 |             "Name": {
157 |                 "de": metadata["title"]["de"],
158 |                 "en": metadata["title"]["en"]
159 |             },
160 |             "Description": {
161 |                 "de": metadata["description"]["de"],
162 |                 "en": metadata["description"]["en"]
163 |             },
164 |             "Publisher": [
165 |                 {
166 |                     "IRI": metadata["publisher"]["resource"]
167 |                 }
168 |             ],
169 |             "Creator": [
170 |                 {
171 |                     "IRI": metadata["publisher"]["resource"]
172 |                 }
173 |             ],
174 | 
175 |             "Contributor": [],
176 | 
177 |             "Date Created": datetime.now().isoformat(),
178 |             "Contact Point": {
179 |                 "E-Mail": "opendata@example.ch",  # Example email
180 |                 "Name": metadata["publisher"]["name"]
181 |             },
182 |             "Base-URI": data_metadata["path"],
183 |             "Identifier": data_metadata["name"],
184 |             "Version": 0.1,
185 |             "Work Status": "Draft",
186 |             "Visualize": True,
187 |             "Accrual Periodicity": "",
188 |             "Namespace": "https://opendata.example.ch",
189 |             "dimensions": self._generate_dimensions(data_metadata)
190 |         }
191 |         
192 |         return output
193 | 
194 | 
195 |     def fetch_dataset(self, input_url, output_dir):
196 |         transformed_url = self._transform_url(input_url)
197 |         data = download_json(transformed_url)
198 | 
199 |         metadata = self._extract_metadata(data)
200 | 
201 |         data_csv_filename = os.path.join(output_dir, 'data.csv')
202 |         description_json_filename = os.path.join(output_dir, 'description.json')
203 |         frictionless_json_filename = os.path.join(output_dir, 'frictionless.json')
204 | 
205 |         distributions = self._get_distributions(data['result']['distributions'])
206 | 
207 |         logger.info(f"Writing {data_csv_filename}")
208 |         with open(data_csv_filename, 'wb') as f:
209 |             f.write(distributions['csv'])
210 | 
211 |         logger.info(f"Writing {frictionless_json_filename}")
212 |         with open(frictionless_json_filename, 'w') as f:
213 |             f.write(json.dumps(distributions['frictionless'], indent=2))
214 | 
215 |         current_file_dir = os.path.dirname(os.path.realpath(__file__))
216 |         description_schema_path = os.path.join(current_file_dir, 'description.schema.json')
217 |         description_schema = read_schema(description_schema_path)
218 |         description = self._transform_metadata(metadata, distributions['frictionless'])
219 |         schema_path = os.path.relpath(
220 |             description_schema_path,
221 |             start=os.path.join(os.getcwd(), output_dir)
222 |         )
223 |         description = {
224 |             "$schema": f"{schema_path}",
225 |             **description
226 |         }
227 |         logger.info(f"Writing {description_json_filename}")
228 |         with open(description_json_filename, 'w') as f:
229 |             f.write(json.dumps(description, indent=2))
230 |         
231 |         validator = Draft202012Validator(description_schema)
232 |         errors = list(validator.iter_errors(description))
233 |         for error in errors:
234 |             logger.warning(f"Validation Error: {error.message}")
235 | 
236 |         serialize_command = f"""# You may want to adjust the command with --sep and --decimal, depending on the data.csv
237 | python cli.py serialize {output_dir} {os.path.join(output_dir, 'cube.ttl')}"""
238 |         if len(errors):
239 |             logger.warning(f"""The data and description have been downloaded. There were validation errors during description validation, you should fix them before running the following command
240 | 
241 | {serialize_command}
242 | """)
243 |         else:
244 |             logger.info(f"""Success ! The data and description have been downloaded, you may now verify it, adjust it, and then run serialize to create RDF triples
245 | 
246 | {serialize_command}
247 | """)
248 | 
249 | 
250 | def fetch(input_url: str, output_dir: str):
251 |     # create the output dir if it does not exist
252 |     if output_dir and not os.path.exists(output_dir):
253 |         os.makedirs(output_dir, exist_ok=True)
254 | 
255 |     if input_url.startswith('https://data.europa.eu'):
256 |         fetcher = DataEuropaFetcher()
257 |         fetcher.fetch_dataset(input_url, output_dir)
258 |     else:
259 |         raise ValueError(f'Only supporting datasets from data.europa.eu at the moment, make sure your input URL starts with https://data.europa.eu')


--------------------------------------------------------------------------------