├── .cargo ├── audit.toml └── config.toml ├── .github └── workflows │ ├── audit.yaml │ ├── docs.yaml │ ├── python_release.yml │ ├── python_tests.yml │ └── rust_tests.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── TODOS.txt ├── examples ├── .gitignore └── mapping │ ├── iris.csv │ └── mapping.ipynb ├── flake.lock ├── flake.nix ├── lib ├── cimxml │ ├── Cargo.toml │ └── src │ │ ├── export.rs │ │ └── lib.rs ├── datalog │ ├── Cargo.toml │ └── src │ │ ├── ast.rs │ │ ├── inference.rs │ │ ├── lib.rs │ │ └── parser.rs ├── file_io │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── fts │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── maplib │ ├── Cargo.toml │ └── src │ │ ├── errors.rs │ │ ├── lib.rs │ │ ├── mapping.rs │ │ └── mapping │ │ ├── constant_terms.rs │ │ ├── default.rs │ │ ├── errors.rs │ │ ├── expansion.rs │ │ └── expansion │ │ └── validation.rs ├── parquet_io │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── pydf_io │ ├── Cargo.toml │ ├── LICENSE │ ├── README.md │ ├── licensing │ │ └── POLARS_LICENSE │ └── src │ │ ├── lib.rs │ │ ├── to_python.rs │ │ └── to_rust.rs ├── query_processing │ ├── Cargo.toml │ ├── LICENSE │ ├── README.md │ └── src │ │ ├── aggregates.rs │ │ ├── cats.rs │ │ ├── constants.rs │ │ ├── errors.rs │ │ ├── exists_helper.rs │ │ ├── expressions.rs │ │ ├── expressions │ │ ├── comparisons.rs │ │ ├── functions.rs │ │ └── operations.rs │ │ ├── find_query_variables.rs │ │ ├── graph_patterns.rs │ │ ├── graph_patterns │ │ ├── cats.rs │ │ ├── group.rs │ │ ├── join.rs │ │ ├── order.rs │ │ ├── union.rs │ │ └── values.rs │ │ ├── lib.rs │ │ ├── pushdowns.rs │ │ └── type_constraints.rs ├── report_mapping │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── representation │ ├── Cargo.toml │ ├── LICENSE │ ├── README.md │ └── src │ │ ├── base_rdf_type.rs │ │ ├── cats.rs │ │ ├── cats │ │ ├── decode.rs │ │ ├── encode.rs │ │ ├── globalize.rs │ │ ├── image.rs │ │ ├── re_encode.rs │ │ └── split.rs │ │ ├── dataset.rs │ │ ├── errors.rs │ │ ├── formatting.rs │ │ ├── lib.rs │ │ ├── literals.rs │ │ ├── multitype.rs │ │ ├── polars_to_rdf.rs │ │ ├── python.rs │ │ ├── query_context.rs │ │ ├── rdf_state.rs │ │ ├── rdf_to_polars.rs │ │ ├── rdf_type.rs │ │ ├── solution_mapping.rs │ │ └── subtypes.rs ├── shacl │ ├── Cargo.toml │ └── src │ │ ├── errors.rs │ │ ├── lib.rs │ │ └── storage.rs ├── spargebra │ ├── Cargo.toml │ ├── LICENSE │ ├── README.md │ └── src │ │ ├── algebra.rs │ │ ├── lib.rs │ │ ├── parser.rs │ │ ├── query.rs │ │ ├── query_context.rs │ │ ├── remove_sugar.rs │ │ ├── term.rs │ │ ├── treehouse.rs │ │ └── update.rs ├── templates │ ├── Cargo.toml │ └── src │ │ ├── ast.rs │ │ ├── compatible.rs │ │ ├── constants.rs │ │ ├── dataset.rs │ │ ├── dataset │ │ └── errors.rs │ │ ├── document.rs │ │ ├── lib.rs │ │ ├── parsing.rs │ │ ├── parsing │ │ ├── errors.rs │ │ ├── nom_parsing.rs │ │ ├── parser_test.rs │ │ └── parsing_ast.rs │ │ ├── python.rs │ │ ├── resolver.rs │ │ └── subtypes_ext.rs ├── triplestore │ ├── Cargo.toml │ └── src │ │ ├── cats.rs │ │ ├── dblf.rs │ │ ├── errors.rs │ │ ├── io_funcs.rs │ │ ├── lib.rs │ │ ├── native_parquet_write.rs │ │ ├── query_solutions.rs │ │ ├── rdfs_inferencing.rs │ │ ├── sparql.rs │ │ ├── sparql │ │ ├── delete.rs │ │ ├── errors.rs │ │ ├── insert.rs │ │ ├── lazy_aggregate.rs │ │ ├── lazy_expressions.rs │ │ ├── lazy_graph_patterns.rs │ │ ├── lazy_graph_patterns │ │ │ ├── distinct.rs │ │ │ ├── extend.rs │ │ │ ├── filter.rs │ │ │ ├── group.rs │ │ │ ├── join.rs │ │ │ ├── left_join.rs │ │ │ ├── minus.rs │ │ │ ├── order_by.rs │ │ │ ├── path.rs │ │ │ ├── project.rs │ │ │ ├── pvalues.rs │ │ │ ├── triple.rs │ │ │ ├── triples_ordering.rs │ │ │ ├── union.rs │ │ │ └── values.rs │ │ ├── lazy_order.rs │ │ ├── rewrite.rs │ │ └── rewrite │ │ │ ├── rewrite_cse.rs │ │ │ └── rewrite_pushdown.rs │ │ ├── storage.rs │ │ ├── triples_read.rs │ │ ├── triples_write.rs │ │ └── triples_write │ │ ├── fast_ntriples.rs │ │ └── serializers.rs └── utils │ ├── Cargo.toml │ └── src │ ├── lib.rs │ └── polars.rs ├── licensing └── POLARS_LICENSE ├── nix └── py_maplib │ ├── default.nix │ └── pytest.nix └── py_maplib ├── Cargo.toml ├── LICENSE ├── README.md ├── maplib ├── .gitignore ├── __init__.py ├── __init__.pyi ├── adding_triples.py └── py.typed ├── pyproject.toml ├── src ├── error.rs ├── lib.rs └── shacl.rs └── tests ├── .gitignore ├── __init__.py ├── out.ttl ├── requirements.txt ├── test_basics.py ├── test_blank_nodes_multi.py ├── test_data_validation.py ├── test_exceptions.py ├── test_integration.py ├── test_multi_expressions.py ├── test_pizza_example.py ├── test_programmatic_pizza_example.py ├── test_rdf_parser.py ├── test_read_write.py ├── test_stottr.py ├── test_validate_iris.py └── testdata ├── expected_easy_case.ttl ├── iterated_property_path_constant_object_query.csv ├── iterated_property_path_constant_subject_query.csv ├── iterated_property_path_query.csv ├── iterated_property_path_query_with_bug.csv ├── larger_ordered_query.csv ├── larger_query.csv ├── multi_concat.csv ├── multi_datatype_join_query.csv ├── multi_datatype_join_query_two_vars.csv ├── multi_datatype_leftjoin_query.csv ├── multi_datatype_query.csv ├── multi_datatype_query_sorting.csv ├── multi_datatype_union_query.csv ├── multi_datatype_union_query_native_df.parquet ├── multi_datatype_union_sort_desc1_query.csv ├── multi_datatype_union_sort_query.csv ├── multi_many_comp.csv ├── property_path_query.csv ├── rdf_parser └── date_panic.nt ├── read_ntriples.csv ├── read_ntriples.nt ├── read_ntriples2.csv ├── simple_construct_query_nothing.csv ├── simple_construct_query_something.csv ├── simple_insert_query_nothing.csv ├── simple_insert_query_something.csv ├── simple_property_path_query.csv ├── simple_query.csv └── stringfuncs.csv /.cargo/audit.toml: -------------------------------------------------------------------------------- 1 | [advisories] 2 | ignore = ["RUSTSEC-2024-0436"] # Unmaintained package "paste" used by Polars -------------------------------------------------------------------------------- /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [build] 2 | jobs = 8 -------------------------------------------------------------------------------- /.github/workflows/audit.yaml: -------------------------------------------------------------------------------- 1 | name: Security audit 2 | on: 3 | push: 4 | paths: 5 | - '**/Cargo.toml' 6 | - '**/Cargo.lock' 7 | jobs: 8 | security_audit: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v4 12 | - uses: actions-rs/audit-check@v1.2.0 13 | with: 14 | token: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /.github/workflows/docs.yaml: -------------------------------------------------------------------------------- 1 | #Based on https://github.com/mitmproxy/pdoc/blob/main/.github/workflows/docs.yml 2 | name: website 3 | 4 | # build the documentation whenever there are new changes on main 5 | on: 6 | push: 7 | branches: 8 | - main 9 | 10 | # security: restrict permissions for CI jobs. 11 | permissions: 12 | contents: read 13 | 14 | env: 15 | CARGO_TERM_COLOR: always 16 | RUST_LOG: debug 17 | MATURIN_VERSION: '1.9.4' 18 | RUST_TOOLCHAIN: nightly-2025-08-29 19 | 20 | jobs: 21 | # Build the documentation and upload the static HTML files as an artifact. 22 | build: 23 | runs-on: ubuntu-latest 24 | 25 | steps: 26 | - uses: actions/checkout@v4 27 | 28 | - name: Install dependencies 29 | run: | 30 | python -m venv .venv 31 | source .venv/bin/activate 32 | pip install --upgrade pip 33 | pip install "maturin[patchelf]"==${{ env.MATURIN_VERSION }} 34 | pip install -r tests/requirements.txt 35 | pip install pdoc 36 | working-directory: ./py_maplib 37 | 38 | - name: Set up nightly rust 39 | run: | 40 | rustup toolchain install ${{ env.RUST_TOOLCHAIN }} 41 | rustup default ${{ env.RUST_TOOLCHAIN }} 42 | 43 | - name: Build install package and build docs 44 | run: | 45 | source .venv/bin/activate 46 | maturin develop 47 | pdoc -o docs/ maplib 48 | working-directory: ./py_maplib 49 | 50 | - uses: actions/upload-pages-artifact@v3 51 | with: 52 | path: py_maplib/docs/ 53 | 54 | # Deploy the artifact to GitHub pages. 55 | # This is a separate job so that only actions/deploy-pages has the necessary permissions. 56 | deploy: 57 | needs: build 58 | runs-on: ubuntu-latest 59 | permissions: 60 | pages: write 61 | id-token: write 62 | environment: 63 | name: github-pages 64 | url: ${{ steps.deployment.outputs.page_url }} 65 | steps: 66 | - id: deployment 67 | uses: actions/deploy-pages@v4 -------------------------------------------------------------------------------- /.github/workflows/python_release.yml: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/pola-rs/polars/blob/main/.github/workflows/release-python.yml 2 | # With license found in maplib/licensing/polars_LICENSE 3 | 4 | name: Release Python 5 | 6 | on: 7 | push: 8 | tags: 9 | - 'v[0-9]+.[0-9]+.[0-9]+' 10 | 11 | permissions: 12 | contents: write 13 | 14 | env: 15 | CARGO_TERM_COLOR: always 16 | RUST_TOOLCHAIN: nightly-2025-08-29 17 | MATURIN_VERSION: '1.9.4' 18 | MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} 19 | 20 | jobs: 21 | security_audit: 22 | runs-on: ubuntu-latest 23 | steps: 24 | - uses: actions/checkout@v4 25 | - uses: actions-rs/audit-check@v1.2.0 26 | with: 27 | token: ${{ secrets.GITHUB_TOKEN }} 28 | 29 | manylinux-x64_64: 30 | needs: security_audit 31 | runs-on: ubuntu-latest 32 | environment: release 33 | steps: 34 | - uses: actions/checkout@v4 35 | - uses: actions/setup-python@v4 36 | with: 37 | python-version: ${{ env.PYTHON_VERSION }} 38 | 39 | - name: Fix README symlink 40 | run: | 41 | rm py_maplib/README.md 42 | cp README.md py_maplib/README.md 43 | 44 | - name: Publish wheel 45 | uses: PyO3/maturin-action@v1 46 | env: 47 | RUSTFLAGS: -C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma 48 | with: 49 | command: publish 50 | args: -m py_maplib/Cargo.toml --skip-existing -o wheels -u magbak --features abi3 51 | maturin-version: ${{ env.MATURIN_VERSION }} 52 | rust-toolchain: ${{ env.RUST_TOOLCHAIN }} 53 | manylinux: 2_28 54 | 55 | win-macos: 56 | needs: security_audit 57 | runs-on: ${{ matrix.os }} 58 | strategy: 59 | fail-fast: false 60 | matrix: 61 | os: [ macos-latest, windows-latest ] 62 | environment: release 63 | steps: 64 | - uses: actions/checkout@v4 65 | - uses: actions/setup-python@v4 66 | with: 67 | python-version: ${{ env.PYTHON_VERSION }} 68 | 69 | - name: Fix README symlink 70 | run: | 71 | rm py_maplib/README.md 72 | cp README.md py_maplib/README.md 73 | 74 | - name: Publish wheel 75 | uses: PyO3/maturin-action@v1 76 | env: 77 | RUSTFLAGS: -C target-feature=+fxsr,+sse,+sse2,+sse3,+sse4.1,+sse4.2 78 | with: 79 | command: publish 80 | args: -m py_maplib/Cargo.toml --no-sdist --skip-existing -o wheels -u magbak --features abi3 81 | maturin-version: ${{ env.MATURIN_VERSION }} 82 | rust-toolchain: ${{ env.RUST_TOOLCHAIN }} 83 | 84 | # macos-aarch64: 85 | # needs: security_audit 86 | # runs-on: macos-latest 87 | # environment: release 88 | # strategy: 89 | # fail-fast: false 90 | # matrix: 91 | # python-version: [ '3.9', '3.10', '3.11' ] 92 | # steps: 93 | # - uses: actions/checkout@v4 94 | # - uses: actions/setup-python@v4 95 | # with: 96 | # python-version: ${{ matrix.python-version }} 97 | # 98 | # - name: Fix README symlink 99 | # run: | 100 | # rm py_maplib/README.md 101 | # cp README.md py_maplib/README.md 102 | # 103 | # - name: Set up Rust targets 104 | # run: rustup target add aarch64-apple-darwin 105 | # 106 | # - name: Publish wheel 107 | # uses: PyO3/maturin-action@v1 108 | # with: 109 | # command: publish 110 | # args: -m py_maplib/Cargo.toml --target aarch64-apple-darwin --no-sdist -o wheels -i python${{ matrix.python-version }} -u magbak 111 | # maturin-version: ${{ env.MATURIN_VERSION }} 112 | # rust-toolchain: ${{ env.RUST_TOOLCHAIN }} 113 | -------------------------------------------------------------------------------- /.github/workflows/python_tests.yml: -------------------------------------------------------------------------------- 1 | name: Python tests 2 | 3 | on: 4 | push: 5 | branches: [ main, "feature/*" ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | RUST_LOG: debug 12 | MATURIN_VERSION: '1.9.4' 13 | RUST_TOOLCHAIN: nightly-2025-08-29 14 | PYTHON_VERSION: '3.12' 15 | 16 | jobs: 17 | build_and_test: 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | 23 | - uses: actions/setup-python@v2 24 | with: 25 | python-version: ${{ env.PYTHON_VERSION }} 26 | 27 | - name: Install dependencies 28 | run: | 29 | python -m venv .venv 30 | source .venv/bin/activate 31 | pip install --upgrade pip 32 | pip install "maturin[patchelf]"==${{ env.MATURIN_VERSION }} 33 | pip install -r tests/requirements.txt 34 | working-directory: ./py_maplib 35 | 36 | - name: Set up nightly rust 37 | run: | 38 | rustup toolchain install ${{ env.RUST_TOOLCHAIN }} 39 | rustup default ${{ env.RUST_TOOLCHAIN }} 40 | 41 | - name: Build install package 42 | run: | 43 | source .venv/bin/activate 44 | maturin develop 45 | working-directory: ./py_maplib 46 | 47 | - name: pytest 48 | run: | 49 | source ../.venv/bin/activate 50 | pytest 51 | working-directory: ./py_maplib/tests 52 | -------------------------------------------------------------------------------- /.github/workflows/rust_tests.yml: -------------------------------------------------------------------------------- 1 | name: Rust tests 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | RUST_LOG: debug 12 | RUST_TOOLCHAIN: nightly-2025-08-29 13 | 14 | jobs: 15 | build_and_test: 16 | runs-on: 17 | group: ubuntu_runners 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | 22 | - name: Upgrade rust with nightly 23 | run: | 24 | rustup update 25 | rustup toolchain install ${{ env.RUST_TOOLCHAIN }} 26 | rustup default ${{ env.RUST_TOOLCHAIN }} 27 | 28 | - name: Build 29 | run: cargo build --verbose --all 30 | - name: Run rust tests 31 | run: cargo test --verbose --all 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | /old_fun 3 | __pycache__ 4 | .idea 5 | 6 | 7 | # Generated by Cargo 8 | # will have compiled files and executables 9 | /target/ 10 | 11 | # These are backup files generated by rustfmt 12 | **/*.rs.bk 13 | 14 | # Nix 15 | **/result 16 | **/result-* 17 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | resolver = "2" 3 | members = [ 4 | "lib/utils", 5 | "lib/representation", 6 | "lib/maplib", 7 | "lib/triplestore", 8 | "lib/file_io", 9 | "lib/pydf_io", 10 | "lib/query_processing", 11 | "lib/spargebra", 12 | "lib/shacl", 13 | "lib/fts", 14 | "lib/datalog", 15 | "lib/cimxml", 16 | "py_maplib", 17 | ] 18 | 19 | [workspace.package] 20 | rust-version = "1.88.0" 21 | 22 | [workspace.dependencies] 23 | pyo3 = {version = "0.25", features = ["py-clone"] } 24 | pyo3-log = {version = "0.12.3" } 25 | polars = { version = "0.51.0", features = ["nightly", "new_streaming", "performant", "cse", "fmt", "parquet"], default-features = false } 26 | polars-core = { version = "0.51.0", default-features = false } 27 | rayon = "1.10.0" 28 | regex = "1.11.1" 29 | oxrdf = { version="0.2.4" } 30 | oxrdfio = { version = "0.1.7" } 31 | oxttl = { version="0.1.7" } 32 | oxiri = "0.2.11" 33 | sparesults = "0.2.4" 34 | oxsdatatypes = "0.2.2" 35 | env_logger = "0.11.8" 36 | log = "0.4.27" 37 | chrono = "0.4.41" 38 | chrono-tz = "0.10.3" 39 | uuid = { version = "1.16.0", features = [ 40 | "v4", # Lets you generate random UUIDs 41 | "fast-rng", # Use a faster (but still sufficiently random) RNG 42 | ] } 43 | thiserror = "2.0.12" 44 | nom = { version = "7.1.3" } 45 | peg = "0.8" 46 | rand = "0.9.1" 47 | oxilangtag = "0.1.5" 48 | fundu = "2.0.1" 49 | memmap2 = "0.9.5" 50 | sprs = "0.11.3" 51 | walkdir = "2.5.0" 52 | 53 | tracing = { version = "0.1", features = [ "log" ] } 54 | tracing-subscriber = { version = "0.3.19", features = [ "env-filter" ] } 55 | tracing-log = "0.2" 56 | 57 | itoa = "1.0.15" 58 | ryu = "1.0.20" 59 | #dev-dependencies 60 | rstest = "0.25.0" 61 | serial_test = "3.2.0" 62 | nohash-hasher = "0.2.0" 63 | 64 | [patch.crates-io] 65 | #polars = { git = 'https://github.com/pola-rs/polars', rev="665722ac3f3664c589c4827208d173cc16f0ec68" } 66 | #polars-core = { git = 'https://github.com/pola-rs/polars', rev="665722ac3f3664c589c4827208d173cc16f0ec68" } 67 | -------------------------------------------------------------------------------- /TODOS.txt: -------------------------------------------------------------------------------- 1 | TODO list: 2 | - Detect situation with incompatible datatypes and recover 3 | - What happens in unions when one side has categorical string..? Should cast 4 | - Get rid of "columns" in solution mappings, as datatypes sorts this out 5 | - Improve quality of datatype inference in expressions 6 | - Create tests for folder cleanup with caching folder 7 | - Create write native parquet test 8 | -------------------------------------------------------------------------------- /examples/.gitignore: -------------------------------------------------------------------------------- 1 | fts -------------------------------------------------------------------------------- /examples/mapping/iris.csv: -------------------------------------------------------------------------------- 1 | "sepal_length","sepal_width","petal_length","petal_width","variety" 2 | 5.1,3.5,1.4,.2,"Setosa" 3 | 4.9,3,1.4,.2,"Setosa" 4 | 4.7,3.2,1.3,.2,"Setosa" 5 | 4.6,3.1,1.5,.2,"Setosa" 6 | 5,3.6,1.4,.2,"Setosa" 7 | 5.4,3.9,1.7,.4,"Setosa" 8 | 4.6,3.4,1.4,.3,"Setosa" 9 | 5,3.4,1.5,.2,"Setosa" 10 | 4.4,2.9,1.4,.2,"Setosa" 11 | 4.9,3.1,1.5,.1,"Setosa" 12 | 5.4,3.7,1.5,.2,"Setosa" 13 | 4.8,3.4,1.6,.2,"Setosa" 14 | 4.8,3,1.4,.1,"Setosa" 15 | 4.3,3,1.1,.1,"Setosa" 16 | 5.8,4,1.2,.2,"Setosa" 17 | 5.7,4.4,1.5,.4,"Setosa" 18 | 5.4,3.9,1.3,.4,"Setosa" 19 | 5.1,3.5,1.4,.3,"Setosa" 20 | 5.7,3.8,1.7,.3,"Setosa" 21 | 5.1,3.8,1.5,.3,"Setosa" 22 | 5.4,3.4,1.7,.2,"Setosa" 23 | 5.1,3.7,1.5,.4,"Setosa" 24 | 4.6,3.6,1,.2,"Setosa" 25 | 5.1,3.3,1.7,.5,"Setosa" 26 | 4.8,3.4,1.9,.2,"Setosa" 27 | 5,3,1.6,.2,"Setosa" 28 | 5,3.4,1.6,.4,"Setosa" 29 | 5.2,3.5,1.5,.2,"Setosa" 30 | 5.2,3.4,1.4,.2,"Setosa" 31 | 4.7,3.2,1.6,.2,"Setosa" 32 | 4.8,3.1,1.6,.2,"Setosa" 33 | 5.4,3.4,1.5,.4,"Setosa" 34 | 5.2,4.1,1.5,.1,"Setosa" 35 | 5.5,4.2,1.4,.2,"Setosa" 36 | 4.9,3.1,1.5,.2,"Setosa" 37 | 5,3.2,1.2,.2,"Setosa" 38 | 5.5,3.5,1.3,.2,"Setosa" 39 | 4.9,3.6,1.4,.1,"Setosa" 40 | 4.4,3,1.3,.2,"Setosa" 41 | 5.1,3.4,1.5,.2,"Setosa" 42 | 5,3.5,1.3,.3,"Setosa" 43 | 4.5,2.3,1.3,.3,"Setosa" 44 | 4.4,3.2,1.3,.2,"Setosa" 45 | 5,3.5,1.6,.6,"Setosa" 46 | 5.1,3.8,1.9,.4,"Setosa" 47 | 4.8,3,1.4,.3,"Setosa" 48 | 5.1,3.8,1.6,.2,"Setosa" 49 | 4.6,3.2,1.4,.2,"Setosa" 50 | 5.3,3.7,1.5,.2,"Setosa" 51 | 5,3.3,1.4,.2,"Setosa" 52 | 7,3.2,4.7,1.4,"Versicolor" 53 | 6.4,3.2,4.5,1.5,"Versicolor" 54 | 6.9,3.1,4.9,1.5,"Versicolor" 55 | 5.5,2.3,4,1.3,"Versicolor" 56 | 6.5,2.8,4.6,1.5,"Versicolor" 57 | 5.7,2.8,4.5,1.3,"Versicolor" 58 | 6.3,3.3,4.7,1.6,"Versicolor" 59 | 4.9,2.4,3.3,1,"Versicolor" 60 | 6.6,2.9,4.6,1.3,"Versicolor" 61 | 5.2,2.7,3.9,1.4,"Versicolor" 62 | 5,2,3.5,1,"Versicolor" 63 | 5.9,3,4.2,1.5,"Versicolor" 64 | 6,2.2,4,1,"Versicolor" 65 | 6.1,2.9,4.7,1.4,"Versicolor" 66 | 5.6,2.9,3.6,1.3,"Versicolor" 67 | 6.7,3.1,4.4,1.4,"Versicolor" 68 | 5.6,3,4.5,1.5,"Versicolor" 69 | 5.8,2.7,4.1,1,"Versicolor" 70 | 6.2,2.2,4.5,1.5,"Versicolor" 71 | 5.6,2.5,3.9,1.1,"Versicolor" 72 | 5.9,3.2,4.8,1.8,"Versicolor" 73 | 6.1,2.8,4,1.3,"Versicolor" 74 | 6.3,2.5,4.9,1.5,"Versicolor" 75 | 6.1,2.8,4.7,1.2,"Versicolor" 76 | 6.4,2.9,4.3,1.3,"Versicolor" 77 | 6.6,3,4.4,1.4,"Versicolor" 78 | 6.8,2.8,4.8,1.4,"Versicolor" 79 | 6.7,3,5,1.7,"Versicolor" 80 | 6,2.9,4.5,1.5,"Versicolor" 81 | 5.7,2.6,3.5,1,"Versicolor" 82 | 5.5,2.4,3.8,1.1,"Versicolor" 83 | 5.5,2.4,3.7,1,"Versicolor" 84 | 5.8,2.7,3.9,1.2,"Versicolor" 85 | 6,2.7,5.1,1.6,"Versicolor" 86 | 5.4,3,4.5,1.5,"Versicolor" 87 | 6,3.4,4.5,1.6,"Versicolor" 88 | 6.7,3.1,4.7,1.5,"Versicolor" 89 | 6.3,2.3,4.4,1.3,"Versicolor" 90 | 5.6,3,4.1,1.3,"Versicolor" 91 | 5.5,2.5,4,1.3,"Versicolor" 92 | 5.5,2.6,4.4,1.2,"Versicolor" 93 | 6.1,3,4.6,1.4,"Versicolor" 94 | 5.8,2.6,4,1.2,"Versicolor" 95 | 5,2.3,3.3,1,"Versicolor" 96 | 5.6,2.7,4.2,1.3,"Versicolor" 97 | 5.7,3,4.2,1.2,"Versicolor" 98 | 5.7,2.9,4.2,1.3,"Versicolor" 99 | 6.2,2.9,4.3,1.3,"Versicolor" 100 | 5.1,2.5,3,1.1,"Versicolor" 101 | 5.7,2.8,4.1,1.3,"Versicolor" 102 | 6.3,3.3,6,2.5,"Virginica" 103 | 5.8,2.7,5.1,1.9,"Virginica" 104 | 7.1,3,5.9,2.1,"Virginica" 105 | 6.3,2.9,5.6,1.8,"Virginica" 106 | 6.5,3,5.8,2.2,"Virginica" 107 | 7.6,3,6.6,2.1,"Virginica" 108 | 4.9,2.5,4.5,1.7,"Virginica" 109 | 7.3,2.9,6.3,1.8,"Virginica" 110 | 6.7,2.5,5.8,1.8,"Virginica" 111 | 7.2,3.6,6.1,2.5,"Virginica" 112 | 6.5,3.2,5.1,2,"Virginica" 113 | 6.4,2.7,5.3,1.9,"Virginica" 114 | 6.8,3,5.5,2.1,"Virginica" 115 | 5.7,2.5,5,2,"Virginica" 116 | 5.8,2.8,5.1,2.4,"Virginica" 117 | 6.4,3.2,5.3,2.3,"Virginica" 118 | 6.5,3,5.5,1.8,"Virginica" 119 | 7.7,3.8,6.7,2.2,"Virginica" 120 | 7.7,2.6,6.9,2.3,"Virginica" 121 | 6,2.2,5,1.5,"Virginica" 122 | 6.9,3.2,5.7,2.3,"Virginica" 123 | 5.6,2.8,4.9,2,"Virginica" 124 | 7.7,2.8,6.7,2,"Virginica" 125 | 6.3,2.7,4.9,1.8,"Virginica" 126 | 6.7,3.3,5.7,2.1,"Virginica" 127 | 7.2,3.2,6,1.8,"Virginica" 128 | 6.2,2.8,4.8,1.8,"Virginica" 129 | 6.1,3,4.9,1.8,"Virginica" 130 | 6.4,2.8,5.6,2.1,"Virginica" 131 | 7.2,3,5.8,1.6,"Virginica" 132 | 7.4,2.8,6.1,1.9,"Virginica" 133 | 7.9,3.8,6.4,2,"Virginica" 134 | 6.4,2.8,5.6,2.2,"Virginica" 135 | 6.3,2.8,5.1,1.5,"Virginica" 136 | 6.1,2.6,5.6,1.4,"Virginica" 137 | 7.7,3,6.1,2.3,"Virginica" 138 | 6.3,3.4,5.6,2.4,"Virginica" 139 | 6.4,3.1,5.5,1.8,"Virginica" 140 | 6,3,4.8,1.8,"Virginica" 141 | 6.9,3.1,5.4,2.1,"Virginica" 142 | 6.7,3.1,5.6,2.4,"Virginica" 143 | 6.9,3.1,5.1,2.3,"Virginica" 144 | 5.8,2.7,5.1,1.9,"Virginica" 145 | 6.8,3.2,5.9,2.3,"Virginica" 146 | 6.7,3.3,5.7,2.5,"Virginica" 147 | 6.7,3,5.2,2.3,"Virginica" 148 | 6.3,2.5,5,1.9,"Virginica" 149 | 6.5,3,5.2,2,"Virginica" 150 | 6.2,3.4,5.4,2.3,"Virginica" 151 | 5.9,3,5.1,1.8,"Virginica" -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "crane": { 4 | "locked": { 5 | "lastModified": 1759893430, 6 | "narHash": "sha256-yAy4otLYm9iZ+NtQwTMEbqHwswSFUbhn7x826RR6djw=", 7 | "owner": "ipetkov", 8 | "repo": "crane", 9 | "rev": "1979a2524cb8c801520bd94c38bb3d5692419d93", 10 | "type": "github" 11 | }, 12 | "original": { 13 | "owner": "ipetkov", 14 | "repo": "crane", 15 | "type": "github" 16 | } 17 | }, 18 | "fenix": { 19 | "inputs": { 20 | "nixpkgs": [ 21 | "nixpkgs" 22 | ], 23 | "rust-analyzer-src": "rust-analyzer-src" 24 | }, 25 | "locked": { 26 | "lastModified": 1759301100, 27 | "narHash": "sha256-hmiTEoVAqLnn80UkreCNunnRKPucKvcg5T4/CELEtbw=", 28 | "owner": "nix-community", 29 | "repo": "fenix", 30 | "rev": "0956bc5d1df2ea800010172c6bc4470d9a22cb81", 31 | "type": "github" 32 | }, 33 | "original": { 34 | "owner": "nix-community", 35 | "ref": "monthly", 36 | "repo": "fenix", 37 | "type": "github" 38 | } 39 | }, 40 | "flake-utils": { 41 | "inputs": { 42 | "systems": "systems" 43 | }, 44 | "locked": { 45 | "lastModified": 1731533236, 46 | "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", 47 | "owner": "numtide", 48 | "repo": "flake-utils", 49 | "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", 50 | "type": "github" 51 | }, 52 | "original": { 53 | "owner": "numtide", 54 | "repo": "flake-utils", 55 | "type": "github" 56 | } 57 | }, 58 | "nixpkgs": { 59 | "locked": { 60 | "lastModified": 1760038930, 61 | "narHash": "sha256-Oncbh0UmHjSlxO7ErQDM3KM0A5/Znfofj2BSzlHLeVw=", 62 | "owner": "NixOS", 63 | "repo": "nixpkgs", 64 | "rev": "0b4defa2584313f3b781240b29d61f6f9f7e0df3", 65 | "type": "github" 66 | }, 67 | "original": { 68 | "id": "nixpkgs", 69 | "ref": "nixos-unstable", 70 | "type": "indirect" 71 | } 72 | }, 73 | "root": { 74 | "inputs": { 75 | "crane": "crane", 76 | "fenix": "fenix", 77 | "flake-utils": "flake-utils", 78 | "nixpkgs": "nixpkgs" 79 | } 80 | }, 81 | "rust-analyzer-src": { 82 | "flake": false, 83 | "locked": { 84 | "lastModified": 1759245522, 85 | "narHash": "sha256-H4Hx/EuMJ9qi1WzPV4UG2bbZiDCdREtrtDvYcHr0kmk=", 86 | "owner": "rust-lang", 87 | "repo": "rust-analyzer", 88 | "rev": "a6bc4a4bbe6a65b71cbf76a0cf528c47a8d9f97f", 89 | "type": "github" 90 | }, 91 | "original": { 92 | "owner": "rust-lang", 93 | "ref": "nightly", 94 | "repo": "rust-analyzer", 95 | "type": "github" 96 | } 97 | }, 98 | "systems": { 99 | "locked": { 100 | "lastModified": 1681028828, 101 | "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", 102 | "owner": "nix-systems", 103 | "repo": "default", 104 | "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", 105 | "type": "github" 106 | }, 107 | "original": { 108 | "owner": "nix-systems", 109 | "repo": "default", 110 | "type": "github" 111 | } 112 | } 113 | }, 114 | "root": "root", 115 | "version": 7 116 | } 117 | -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | inputs = { 3 | nixpkgs.url = "nixpkgs/nixos-unstable"; 4 | flake-utils.url = "github:numtide/flake-utils"; 5 | 6 | crane.url = "github:ipetkov/crane"; 7 | 8 | fenix.url = "github:nix-community/fenix/monthly"; 9 | fenix.inputs.nixpkgs.follows = "nixpkgs"; 10 | }; 11 | 12 | outputs = { self, flake-utils, nixpkgs, crane, fenix, ... }@inputs: 13 | flake-utils.lib.eachDefaultSystem (system: let 14 | pkgs = nixpkgs.legacyPackages.${system}; 15 | inherit (pkgs) lib; 16 | 17 | fenixSet = fenix.packages.${system}.complete; 18 | inherit (fenixSet) toolchain; 19 | 20 | craneLib = (crane.mkLib pkgs).overrideToolchain toolchain; 21 | 22 | rustPlatform = pkgs.makeRustPlatform { 23 | cargo = toolchain; 24 | rustc = toolchain; 25 | }; 26 | 27 | root = ./.; 28 | src = lib.fileset.toSource { 29 | inherit root; 30 | fileset = lib.fileset.unions [ 31 | (craneLib.fileset.commonCargoSources root) 32 | (lib.fileset.fileFilter (file: file.hasExt "md") root) 33 | ./py_maplib/LICENSE 34 | ./py_maplib 35 | ]; 36 | }; 37 | 38 | cargoVendorDir = craneLib.vendorCargoDeps { inherit src; }; 39 | cargoArtifacts = craneLib.buildDepsOnly { inherit src; }; 40 | 41 | python = let 42 | packageOverrides = self: super: { 43 | maplib = self.callPackage ./nix/py_maplib { 44 | inherit src 45 | craneLib cargoVendorDir 46 | rustPlatform; 47 | }; 48 | }; 49 | in pkgs.python3.override {inherit packageOverrides; self = python;}; 50 | in { 51 | packages = rec { 52 | default = py_maplib; 53 | py_maplib = python.pkgs.maplib; 54 | python-env = python.withPackages (ps: [ ps.maplib ps.polars ps.rdflib ps.jupyter ps.lxml ]); 55 | }; 56 | legacyPackages.python = python; 57 | devShells.default = craneLib.devShell { 58 | inputsFrom = [ self.packages.${system}.py_maplib ]; 59 | 60 | # https://github.com/tikv/jemallocator/pull/116 61 | env.CFLAGS = "-Wno-error=int-conversion"; 62 | 63 | packages = [ 64 | pkgs.cargo-audit 65 | pkgs.cargo-deny 66 | pkgs.cargo-vet 67 | pkgs.cargo-all-features 68 | 69 | fenixSet.rust-analyzer 70 | 71 | pkgs.just 72 | pkgs.gum 73 | ]; 74 | }; 75 | checks = { 76 | py_pytest = self.legacyPackages.${system}.python.pkgs.callPackage ./nix/py_maplib/pytest.nix { 77 | src = lib.fileset.toSource { 78 | root = ./.; 79 | fileset = lib.fileset.unions [ 80 | ./py_maplib/tests 81 | ]; 82 | }; 83 | }; 84 | cargo_test = craneLib.cargoTest { 85 | inherit src cargoArtifacts; 86 | inherit (craneLib.crateNameFromCargoToml { cargoToml = ./py_maplib/Cargo.toml; }) pname version; 87 | }; 88 | }; 89 | apps = { 90 | python-env = { 91 | type = "app"; 92 | program = "${self.packages.${system}.python-env}/bin/python"; 93 | }; 94 | jupyter = { 95 | type = "app"; 96 | program = "${self.packages.${system}.python-env}/bin/jupyter"; 97 | }; 98 | }; 99 | } 100 | ); 101 | } 102 | -------------------------------------------------------------------------------- /lib/cimxml/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cimxml" 3 | version = "0.1.0" 4 | edition = "2021" 5 | rust-version.workspace = true 6 | 7 | [features] 8 | pyo3 = [ "dep:pyo3", "triplestore/pyo3" ] 9 | 10 | [dependencies] 11 | representation = {path ="../representation"} 12 | triplestore = {path="../triplestore"} 13 | thiserror.workspace = true 14 | oxrdf.workspace = true 15 | 16 | pyo3 = { workspace = true, optional = true } 17 | -------------------------------------------------------------------------------- /lib/cimxml/src/export.rs: -------------------------------------------------------------------------------- 1 | use oxrdf::{Literal, NamedNode}; 2 | use representation::dataset::NamedGraph; 3 | use std::collections::HashMap; 4 | use std::io::Write; 5 | use thiserror::Error; 6 | use triplestore::Triplestore; 7 | 8 | #[derive(Error, Debug)] 9 | pub enum CIMXMLError {} 10 | 11 | pub fn cim_xml_write( 12 | _buf: &mut W, 13 | _triplestore: &mut Triplestore, 14 | _graph: &NamedGraph, 15 | _profile_graph: &NamedGraph, 16 | _prefixes: HashMap, 17 | _fullmodel_details: FullModelDetails, 18 | ) -> Result<(), CIMXMLError> { 19 | unimplemented!("Contact Data Treehouse to try") 20 | } 21 | 22 | #[allow(dead_code)] 23 | pub struct FullModelDetails { 24 | id: NamedNode, 25 | description: Option, 26 | version: Option, 27 | created: Literal, 28 | scenario_time: Literal, 29 | modeling_authority_set: Option, 30 | } 31 | 32 | impl FullModelDetails { 33 | pub fn new( 34 | _id: NamedNode, 35 | _description: Option, 36 | _version: Option, 37 | _created: Literal, 38 | _scenario_time: Literal, 39 | _modeling_authority_set: Option, 40 | ) -> Result { 41 | unimplemented!("Contact Data Treehouse to try") 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /lib/cimxml/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod export; 2 | -------------------------------------------------------------------------------- /lib/datalog/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "datalog" 3 | version = "0.1.0" 4 | edition = "2021" 5 | rust-version.workspace = true 6 | 7 | [features] 8 | pyo3 = [ "dep:pyo3", "triplestore/pyo3" ] 9 | 10 | [dependencies] 11 | triplestore = {path = "../triplestore"} 12 | representation = {path = "../representation"} 13 | 14 | thiserror.workspace= true 15 | oxrdf.workspace = true 16 | 17 | pyo3 = { workspace = true, optional = true } 18 | -------------------------------------------------------------------------------- /lib/datalog/src/ast.rs: -------------------------------------------------------------------------------- 1 | #[derive(Debug)] 2 | pub struct DatalogRuleset {} 3 | 4 | impl DatalogRuleset { 5 | pub fn extend(&mut self, _other: DatalogRuleset) { 6 | unimplemented!("Contact Data Treehouse to try") 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /lib/datalog/src/inference.rs: -------------------------------------------------------------------------------- 1 | use crate::ast::DatalogRuleset; 2 | use oxrdf::NamedNode; 3 | use representation::dataset::NamedGraph; 4 | use representation::solution_mapping::EagerSolutionMappings; 5 | use std::collections::HashMap; 6 | use thiserror::*; 7 | use triplestore::sparql::errors::SparqlError; 8 | use triplestore::Triplestore; 9 | 10 | #[derive(Debug, Error)] 11 | pub enum DatalogError { 12 | #[error(transparent)] 13 | SparqlError(SparqlError), 14 | } 15 | 16 | pub fn infer( 17 | _triplestore: &mut Triplestore, 18 | _graph: Option<&NamedGraph>, 19 | _ruleset: &DatalogRuleset, 20 | _max_iterations: Option, 21 | _max_results: Option, 22 | ) -> Result>, DatalogError> { 23 | unimplemented!("Contact data treehouse to try") 24 | } 25 | -------------------------------------------------------------------------------- /lib/datalog/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod ast; 2 | pub mod inference; 3 | pub mod parser; 4 | -------------------------------------------------------------------------------- /lib/datalog/src/parser.rs: -------------------------------------------------------------------------------- 1 | use crate::ast::DatalogRuleset; 2 | use thiserror::*; 3 | 4 | pub fn parse_datalog_ruleset( 5 | _datalog_ruleset: &str, 6 | _base_iri: Option<&str>, 7 | ) -> Result { 8 | unimplemented!("Contact Data Treehouse to try") 9 | } 10 | 11 | #[derive(Debug, Error)] 12 | #[error(transparent)] 13 | pub struct DatalogSyntaxError(#[from] DatalogParseErrorKind); 14 | 15 | #[derive(Debug, Error)] 16 | pub(crate) enum DatalogParseErrorKind {} 17 | -------------------------------------------------------------------------------- /lib/file_io/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "file_io" 3 | version = "0.5.0" 4 | edition = "2021" 5 | rust-version.workspace = true 6 | 7 | [dependencies] 8 | polars = {workspace=true, features=["parquet", "lazy"], default-features = false } 9 | thiserror={ workspace= true } 10 | -------------------------------------------------------------------------------- /lib/file_io/src/lib.rs: -------------------------------------------------------------------------------- 1 | use polars::prelude::{ 2 | DataFrame, LazyFrame, ParallelStrategy, ParquetCompression, ParquetWriter, PlPathRef, 3 | PolarsError, ScanArgsParquet, 4 | }; 5 | use std::fs::{create_dir, File}; 6 | use std::path::Path; 7 | 8 | use std::fmt::{Display, Formatter}; 9 | use std::io; 10 | use thiserror::Error; 11 | 12 | pub fn create_folder_if_not_exists(path: &Path) -> Result<(), FileIOError> { 13 | if !path.exists() { 14 | create_dir(path).map_err(FileIOError::FileCreateIOError)?; 15 | } 16 | Ok(()) 17 | } 18 | 19 | #[derive(Error, Debug)] 20 | pub enum FileIOError { 21 | FileCreateIOError(io::Error), 22 | WriteParquetError(PolarsError), 23 | ReadParquetError(PolarsError), 24 | } 25 | 26 | impl Display for FileIOError { 27 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 28 | match self { 29 | FileIOError::FileCreateIOError(e) => { 30 | write!(f, "Creating file for writing resulted in an error: {e}") 31 | } 32 | FileIOError::WriteParquetError(e) => { 33 | write!(f, "Writing to parquet file produced an error {e:?}") 34 | } 35 | FileIOError::ReadParquetError(p) => { 36 | write!(f, "Reading parquet file resulted in an error: {p:?}") 37 | } 38 | } 39 | } 40 | } 41 | 42 | pub fn property_to_filename(property_name: &str) -> String { 43 | property_name 44 | .chars() 45 | .filter(|x| x.is_alphanumeric()) 46 | .collect() 47 | } 48 | 49 | pub fn write_parquet( 50 | df: &mut DataFrame, 51 | file_path: &Path, 52 | compression: ParquetCompression, 53 | ) -> Result<(), FileIOError> { 54 | let file = File::create(file_path).map_err(FileIOError::FileCreateIOError)?; 55 | let mut writer = ParquetWriter::new(file); 56 | writer = writer.with_row_group_size(Some(1_000)); 57 | writer = writer.with_compression(compression); 58 | writer.finish(df).map_err(FileIOError::WriteParquetError)?; 59 | Ok(()) 60 | } 61 | 62 | pub fn scan_parquet(file_path: &Path) -> Result { 63 | LazyFrame::scan_parquet( 64 | PlPathRef::Local(file_path).into_owned(), 65 | ScanArgsParquet { 66 | n_rows: None, 67 | cache: false, 68 | parallel: ParallelStrategy::Auto, 69 | rechunk: false, 70 | low_memory: false, 71 | ..Default::default() 72 | }, 73 | ) 74 | .map_err(FileIOError::ReadParquetError) 75 | } 76 | -------------------------------------------------------------------------------- /lib/fts/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fts" 3 | version = "0.1.0" 4 | edition = "2021" 5 | rust-version.workspace = true 6 | 7 | [dependencies] 8 | representation = {path="../representation"} 9 | spargebra = { path="../spargebra" } 10 | query_processing = {path="../query_processing"} 11 | 12 | oxrdf.workspace = true 13 | thiserror.workspace = true 14 | polars.workspace = true 15 | -------------------------------------------------------------------------------- /lib/fts/src/lib.rs: -------------------------------------------------------------------------------- 1 | use oxrdf::NamedNode; 2 | use polars::frame::DataFrame; 3 | use representation::cats::LockedCats; 4 | use representation::solution_mapping::{BaseCatState, SolutionMappings}; 5 | use representation::BaseRDFNodeType; 6 | use spargebra::term::TriplePattern; 7 | use std::path::Path; 8 | use thiserror::Error; 9 | 10 | #[derive(Debug, Error)] 11 | pub enum FtsError {} 12 | 13 | #[derive(Debug, Clone)] 14 | pub struct FtsIndex {} 15 | 16 | impl FtsIndex { 17 | pub fn new(_path: &Path) -> Result { 18 | unimplemented!("Contact Data Treehouse to enable full text search") 19 | } 20 | 21 | pub fn add_literal_string( 22 | &mut self, 23 | _df: &DataFrame, 24 | _predicate: &NamedNode, 25 | _subject_type: &BaseRDFNodeType, 26 | _subject_state: &BaseCatState, 27 | _object_type: &BaseRDFNodeType, 28 | _object_state: &BaseCatState, 29 | _global_cats: LockedCats, 30 | ) -> Result<(), FtsError> { 31 | unimplemented!("Contact Data Treehouse to enable full text search") 32 | } 33 | pub fn lookup_from_triple_patterns( 34 | &self, 35 | _patterns: &Vec, 36 | _global_cats: LockedCats, 37 | ) -> Result<(Vec, Option), FtsError> { 38 | unimplemented!("Contact Data Treehouse to enable full text search") 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /lib/maplib/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "maplib" 3 | version = "0.6.0" 4 | edition = "2021" 5 | rust-version.workspace = true 6 | 7 | [features] 8 | pyo3 = [ "dep:pyo3", "triplestore/pyo3", "shacl/pyo3", "datalog/pyo3", "cimxml/pyo3" ] 9 | 10 | [dependencies] 11 | templates = {path = "../templates"} 12 | triplestore = { path = "../triplestore" } 13 | shacl = { path = "../shacl" } 14 | representation = { path = "../representation" } 15 | datalog = {path = "../datalog"} 16 | cimxml = {path = "../cimxml"} 17 | 18 | rayon = { workspace = true } 19 | oxrdf = { workspace = true } 20 | oxiri = { workspace = true } 21 | polars = { workspace = true, default-features = false, features = [ 22 | "semi_anti_join", "abs", "round_series", "lazy", "concat_str", "is_in", "dtype-full", "strings", "rows", 23 | "timezones", "polars-time", "temporal", "list_eval", "partition_by", "parquet", "cse", "nightly", "performant", 24 | ] } 25 | chrono = { workspace = true } 26 | chrono-tz = { workspace = true } 27 | uuid = { workspace = true } 28 | thiserror = { workspace = true } 29 | oxrdfio = { workspace = true } 30 | tracing = { workspace = true } 31 | 32 | pyo3 = { workspace = true, optional = true } 33 | 34 | 35 | [dev-dependencies] 36 | rstest.workspace=true 37 | serial_test.workspace = true 38 | -------------------------------------------------------------------------------- /lib/maplib/src/errors.rs: -------------------------------------------------------------------------------- 1 | use crate::mapping::errors::MappingError; 2 | use cimxml::export::CIMXMLError; 3 | use datalog::inference::DatalogError; 4 | use oxiri::IriParseError; 5 | use polars::error::PolarsError; 6 | use shacl::errors::ShaclError; 7 | use std::io; 8 | use templates::dataset::errors::TemplateError; 9 | use thiserror::Error; 10 | use triplestore::errors::TriplestoreError; 11 | use triplestore::sparql::errors::SparqlError; 12 | 13 | #[derive(Error, Debug)] 14 | pub enum MaplibError { 15 | #[error(transparent)] 16 | TemplateError(#[from] TemplateError), 17 | #[error(transparent)] 18 | MappingError(#[from] MappingError), 19 | #[error("Datalog syntax error: `{0}`")] 20 | DatalogSyntaxError(String), 21 | #[error(transparent)] 22 | DatalogError(DatalogError), 23 | #[error(transparent)] 24 | CIMXMLError(CIMXMLError), 25 | #[error("Error creating file: `{0}`")] 26 | FileCreateIOError(io::Error), 27 | #[error("Error writing parquet: `{0}`")] 28 | WriteParquetError(PolarsError), 29 | #[error("Error reading parquet: `{0}`")] 30 | ReadParquetError(PolarsError), 31 | #[error("Path does not exist `{0}`")] 32 | PathDoesNotExist(String), 33 | #[error("Error writing NTriples: `{0}`")] 34 | WriteNTriplesError(io::Error), 35 | #[error("Error removing parquet file: `{0}`")] 36 | RemoveParquetFileError(io::Error), 37 | #[error(transparent)] 38 | TriplestoreError(#[from] TriplestoreError), 39 | #[error(transparent)] 40 | SparqlError(#[from] SparqlError), 41 | #[error(transparent)] 42 | ShaclError(#[from] ShaclError), 43 | #[error(transparent)] 44 | IRIParseError(#[from] IriParseError), 45 | #[error("Datalog ruleset missing")] 46 | MissingDatalogRuleset, 47 | } 48 | -------------------------------------------------------------------------------- /lib/maplib/src/lib.rs: -------------------------------------------------------------------------------- 1 | extern crate chrono; 2 | extern crate chrono_tz; 3 | 4 | pub mod errors; 5 | pub mod mapping; 6 | -------------------------------------------------------------------------------- /lib/parquet_io/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "parquet_io" 3 | version = "0.5.0" 4 | edition = "2021" 5 | rust-version.workspace = true 6 | 7 | [dependencies] 8 | polars = {workspace=true, features=["parquet"], default-features = false } 9 | thiserror={ workspace= true } 10 | uuid = {workspace=true} -------------------------------------------------------------------------------- /lib/parquet_io/src/lib.rs: -------------------------------------------------------------------------------- 1 | use polars::prelude::{ 2 | DataFrame, LazyFrame, ParallelStrategy, ParquetCompression, ParquetWriter, PolarsError, 3 | ScanArgsParquet, 4 | }; 5 | use std::fs::File; 6 | use std::path::Path; 7 | 8 | use std::fmt::{Display, Formatter}; 9 | use std::io; 10 | use thiserror::Error; 11 | 12 | #[derive(Error, Debug)] 13 | pub enum ParquetIOError { 14 | FileCreateIOError(io::Error), 15 | WriteParquetError(PolarsError), 16 | ReadParquetError(PolarsError), 17 | } 18 | 19 | impl Display for ParquetIOError { 20 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 21 | match self { 22 | ParquetIOError::FileCreateIOError(e) => { 23 | write!(f, "Creating file for writing resulted in an error: {}", e) 24 | } 25 | ParquetIOError::WriteParquetError(e) => { 26 | write!(f, "Writing to parquet file produced an error {:?}", e) 27 | } 28 | ParquetIOError::ReadParquetError(p) => { 29 | write!(f, "Reading parquet file resulted in an error: {:?}", p) 30 | } 31 | } 32 | } 33 | } 34 | 35 | pub fn property_to_filename(property_name: &str) -> String { 36 | property_name 37 | .chars() 38 | .filter(|x| x.is_alphanumeric()) 39 | .collect() 40 | } 41 | 42 | pub fn write_parquet( 43 | df: &mut DataFrame, 44 | file_path: &Path, 45 | compression: ParquetCompression, 46 | ) -> Result<(), ParquetIOError> { 47 | let file = File::create(file_path).map_err(ParquetIOError::FileCreateIOError)?; 48 | let mut writer = ParquetWriter::new(file); 49 | writer = writer.with_row_group_size(Some(1_000)); 50 | writer = writer.with_compression(compression); 51 | writer 52 | .finish(df) 53 | .map_err(ParquetIOError::WriteParquetError)?; 54 | Ok(()) 55 | } 56 | 57 | pub fn scan_parquet(file_path: &String) -> Result { 58 | LazyFrame::scan_parquet( 59 | Path::new(file_path), 60 | ScanArgsParquet { 61 | n_rows: None, 62 | cache: false, 63 | parallel: ParallelStrategy::Auto, 64 | rechunk: false, 65 | low_memory: false, 66 | ..Default::default() 67 | }, 68 | ) 69 | .map_err(ParquetIOError::ReadParquetError) 70 | } 71 | -------------------------------------------------------------------------------- /lib/pydf_io/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pydf_io" 3 | version = "0.7.6" 4 | edition = "2021" 5 | rust-version.workspace = true 6 | 7 | [dependencies] 8 | representation = {path = "../representation"} 9 | 10 | pyo3 = {workspace=true, features = ["extension-module"]} 11 | polars = {workspace=true, default-features = false } 12 | polars-core = {workspace=true, features=["dtype-array", "dtype-date", "dtype-datetime", 13 | "dtype-decimal", "dtype-duration", "dtype-i8", "dtype-i16", "dtype-struct", "dtype-time", "dtype-u8", "dtype-u16"], default-features = false} 14 | thiserror.workspace = true 15 | 16 | [lints.rust] 17 | unexpected_cfgs = { level = "warn", check-cfg = ['cfg(feature, values("gil-refs", "rdf-star"))'] } 18 | -------------------------------------------------------------------------------- /lib/pydf_io/README.md: -------------------------------------------------------------------------------- 1 | # python_dataframe_api -------------------------------------------------------------------------------- /lib/pydf_io/licensing/POLARS_LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020 Ritchie Vink 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. -------------------------------------------------------------------------------- /lib/pydf_io/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod to_python; 2 | pub mod to_rust; 3 | -------------------------------------------------------------------------------- /lib/pydf_io/src/to_python.rs: -------------------------------------------------------------------------------- 1 | // From: https://github.com/pola-rs/polars/blob/master/py-polars/src/arrow_interop/to_py.rs 2 | // Edited to remove dependencies on py-polars, and added specific functionality for RDF. 3 | // Original licence in ../licensing/POLARS_LICENSE 4 | 5 | use polars::prelude::{col, IntoLazy}; 6 | use polars_core::frame::DataFrame; 7 | use polars_core::prelude::{ArrayRef, ArrowField, CompatLevel}; 8 | use polars_core::utils::arrow::ffi; 9 | use polars_core::utils::arrow::record_batch::RecordBatch; 10 | use pyo3::ffi::Py_uintptr_t; 11 | use pyo3::prelude::*; 12 | use pyo3::types::PyList; 13 | use pyo3::IntoPyObjectExt; 14 | use representation::cats::LockedCats; 15 | use representation::formatting::{format_columns, format_native_columns}; 16 | use representation::multitype::compress_actual_multitypes; 17 | use representation::python::PySolutionMappings; 18 | use representation::query_context::Context; 19 | use representation::RDFNodeState; 20 | use std::collections::HashMap; 21 | 22 | /// Arrow array to Python. 23 | pub(crate) fn to_py_array( 24 | array: ArrayRef, 25 | py: Python, 26 | pyarrow: &Bound<'_, PyModule>, 27 | ) -> PyResult { 28 | let schema = Box::new(ffi::export_field_to_c(&ArrowField::new( 29 | "".into(), 30 | array.dtype().clone(), 31 | true, 32 | ))); 33 | let array = Box::new(ffi::export_array_to_c(array)); 34 | 35 | let schema_ptr: *const ffi::ArrowSchema = &*schema; 36 | let array_ptr: *const ffi::ArrowArray = &*array; 37 | 38 | let array = pyarrow.getattr("Array")?.call_method1( 39 | "_import_from_c", 40 | (array_ptr as Py_uintptr_t, schema_ptr as Py_uintptr_t), 41 | )?; 42 | 43 | array.into_py_any(py) 44 | } 45 | 46 | /// RecordBatch to Python. 47 | pub(crate) fn to_py_rb( 48 | rb: &RecordBatch, 49 | names: &[&str], 50 | py: Python, 51 | pyarrow: &Bound<'_, PyModule>, 52 | ) -> PyResult { 53 | let mut arrays = Vec::with_capacity(rb.len()); 54 | 55 | for array in rb.columns() { 56 | let array_object = to_py_array(array.clone(), py, pyarrow)?; 57 | arrays.push(array_object); 58 | } 59 | 60 | let record = pyarrow 61 | .getattr("RecordBatch")? 62 | .call_method1("from_arrays", (arrays, names.to_vec()))?; 63 | 64 | record.into_py_any(py) 65 | } 66 | pub fn to_py_df( 67 | rb: &RecordBatch, 68 | names: &[&str], 69 | py: Python, 70 | pyarrow: &Bound<'_, PyModule>, 71 | polars: &Bound<'_, PyModule>, 72 | ) -> PyResult { 73 | let py_rb = to_py_rb(rb, names, py, pyarrow)?; 74 | let py_rb_list = PyList::empty(py); 75 | py_rb_list.append(py_rb)?; 76 | let py_table = pyarrow 77 | .getattr("Table")? 78 | .call_method1("from_batches", (py_rb_list,))?; 79 | let py_table = py_table.into_py_any(py)?; 80 | let df = polars.call_method1("from_arrow", (py_table,))?; 81 | df.into_py_any(py) 82 | } 83 | 84 | pub fn df_to_py_df( 85 | mut df: DataFrame, 86 | rdf_node_states: HashMap, 87 | pushdown_paths: Option>, 88 | include_datatypes: bool, 89 | py: Python, 90 | ) -> PyResult { 91 | let names_vec: Vec = df 92 | .get_column_names() 93 | .into_iter() 94 | .map(|x| x.to_string()) 95 | .collect(); 96 | let names: Vec<&str> = names_vec.iter().map(|x| x.as_str()).collect(); 97 | let chunk = df 98 | .as_single_chunk() 99 | .iter_chunks(CompatLevel::oldest(), true) 100 | .next() 101 | .unwrap(); 102 | let pyarrow = PyModule::import(py, "pyarrow")?; 103 | let polars = PyModule::import(py, "polars")?; 104 | let py_df = to_py_df(&chunk, names.as_slice(), py, &pyarrow, &polars)?; 105 | if include_datatypes { 106 | Py::new( 107 | py, 108 | PySolutionMappings { 109 | mappings: py_df.into_any(), 110 | rdf_node_states, 111 | pushdown_paths, 112 | }, 113 | )? 114 | .into_py_any(py) 115 | } else { 116 | Ok(py_df) 117 | } 118 | } 119 | 120 | pub fn fix_cats_and_multicolumns( 121 | mut df: DataFrame, 122 | mut dts: HashMap, 123 | native_dataframe: bool, 124 | global_cats: LockedCats, 125 | ) -> (DataFrame, HashMap) { 126 | let column_ordering: Vec<_> = df 127 | .get_column_names() 128 | .iter() 129 | .map(|x| col(x.as_str())) 130 | .collect(); 131 | //Important that column compression happen before decisions are made based on column type. 132 | (df, dts) = compress_actual_multitypes(df, dts); 133 | let mut lf = df.lazy(); 134 | if !native_dataframe { 135 | lf = format_columns(lf, &dts, global_cats) 136 | } else { 137 | lf = format_native_columns(lf, &mut dts, global_cats) 138 | } 139 | df = lf.select(column_ordering).collect().unwrap(); 140 | (df, dts) 141 | } 142 | -------------------------------------------------------------------------------- /lib/pydf_io/src/to_rust.rs: -------------------------------------------------------------------------------- 1 | // From: https://github.com/pola-rs/polars/blob/master/py-polars/src/arrow_interop/to_rust.rs 2 | // Edited to remove dependencies on py-polars, remove unused functionality 3 | // Original licence in ../licensing/POLARS_LICENSE 4 | 5 | use polars::prelude::PlSmallStr; 6 | use polars_core::error::PolarsError; 7 | use polars_core::prelude::{ArrayRef, ArrowDataType, DataFrame, IntoColumn, Series}; 8 | use polars_core::utils::accumulate_dataframes_vertical; 9 | use polars_core::utils::arrow::ffi; 10 | use polars_core::utils::rayon::iter::{ 11 | IndexedParallelIterator, IntoParallelIterator, ParallelIterator, 12 | }; 13 | use polars_core::POOL; 14 | use pyo3::create_exception; 15 | use pyo3::exceptions::PyException; 16 | use pyo3::exceptions::PyRuntimeError; 17 | use pyo3::ffi::Py_uintptr_t; 18 | use pyo3::prelude::*; 19 | use thiserror::Error; 20 | 21 | #[derive(Error, Debug)] 22 | pub enum ToRustError { 23 | #[error(transparent)] 24 | PolarsError(#[from] PolarsError), 25 | } 26 | 27 | pub fn array_to_rust(obj: &Bound<'_, PyAny>) -> PyResult { 28 | // prepare a pointer to receive the Array struct 29 | let array = Box::new(ffi::ArrowArray::empty()); 30 | let schema = Box::new(ffi::ArrowSchema::empty()); 31 | 32 | let array_ptr = &*array as *const ffi::ArrowArray; 33 | let schema_ptr = &*schema as *const ffi::ArrowSchema; 34 | 35 | // make the conversion through PyArrow's private API 36 | // this changes the pointer's memory and is thus unsafe. In particular, `_export_to_c` can go out of bounds 37 | obj.call_method1( 38 | "_export_to_c", 39 | (array_ptr as Py_uintptr_t, schema_ptr as Py_uintptr_t), 40 | )?; 41 | 42 | unsafe { 43 | let field = ffi::import_field_from_c(schema.as_ref()).map_err(ToRustError::from)?; 44 | let array = ffi::import_array_from_c(*array, field.dtype).map_err(ToRustError::from)?; 45 | Ok(array) 46 | } 47 | } 48 | 49 | pub fn polars_df_to_rust_df(df: &Bound<'_, PyAny>) -> PyResult { 50 | let arr = df.call_method0("to_arrow")?; 51 | let batches = arr.call_method1("to_batches", (u32::MAX,))?; 52 | let batches_len = batches.call_method0("__len__")?; 53 | let l: u32 = batches_len.extract()?; 54 | let mut batches_vec = vec![]; 55 | for i in 0..l { 56 | let batch = batches.call_method1("__getitem__", (i,))?; 57 | batches_vec.push(batch); 58 | } 59 | array_to_rust_df(batches_vec.as_slice()) 60 | } 61 | 62 | pub fn array_to_rust_df(rb: &[Bound<'_, PyAny>]) -> PyResult { 63 | if rb.is_empty() { 64 | return Ok(DataFrame::empty()); 65 | } 66 | let schema = rb.first().unwrap().getattr("schema")?; 67 | let names = schema.getattr("names")?.extract::>()?; 68 | 69 | let dfs = rb 70 | .iter() 71 | .map(|rb| { 72 | let mut run_parallel = false; 73 | 74 | let columns = (0..names.len()) 75 | .map(|i| { 76 | let array = rb.call_method1("column", (i,))?; 77 | let arr = array_to_rust(&array)?; 78 | run_parallel |= matches!( 79 | arr.dtype(), 80 | ArrowDataType::Utf8 | ArrowDataType::Dictionary(_, _, _) 81 | ); 82 | Ok(arr) 83 | }) 84 | .collect::>>()?; 85 | 86 | // we parallelize this part because we can have dtypes that are not zero copy 87 | // for instance String -> large-String 88 | // dict encoded to categorical 89 | let columns = if run_parallel { 90 | POOL.install(|| { 91 | columns 92 | .into_par_iter() 93 | .enumerate() 94 | .map(|(i, arr)| { 95 | let s = 96 | Series::try_from((PlSmallStr::from_str(names[i].as_str()), arr)) 97 | .map_err(ToRustError::from)?; 98 | Ok(s.into_column()) 99 | }) 100 | .collect::>>() 101 | }) 102 | } else { 103 | columns 104 | .into_iter() 105 | .enumerate() 106 | .map(|(i, arr)| { 107 | let s = Series::try_from((PlSmallStr::from_str(names[i].as_str()), arr)) 108 | .map_err(ToRustError::from)?; 109 | Ok(s.into_column()) 110 | }) 111 | .collect::>>() 112 | }?; 113 | 114 | Ok(DataFrame::new(columns).map_err(ToRustError::from)?) 115 | }) 116 | .collect::>>()?; 117 | 118 | Ok(accumulate_dataframes_vertical(dfs).map_err(ToRustError::from)?) 119 | } 120 | 121 | impl std::convert::From for PyErr { 122 | fn from(err: ToRustError) -> PyErr { 123 | let default = || PyRuntimeError::new_err(format!("{:?}", &err)); 124 | 125 | default() 126 | } 127 | } 128 | 129 | create_exception!(exceptions, ArrowErrorException, PyException); 130 | -------------------------------------------------------------------------------- /lib/query_processing/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "query_processing" 3 | version = "0.3.12" 4 | edition = "2021" 5 | rust-version.workspace = true 6 | 7 | [dependencies] 8 | representation = { path = "../representation"} 9 | spargebra = { path = "../spargebra"} 10 | 11 | oxrdf.workspace = true 12 | polars = {workspace=true, default-features = false, features=[ 13 | "performant", 14 | "nightly", 15 | "cse", 16 | "regex", 17 | "zip_with", 18 | "semi_anti_join", 19 | "abs", 20 | "round_series", 21 | "lazy", 22 | "concat_str", 23 | "is_in", 24 | "dtype-full", 25 | "strings", 26 | "rows", 27 | "timezones", 28 | "polars-time", 29 | "temporal", 30 | "list_eval", 31 | "partition_by", 32 | "parquet", 33 | "diagonal_concat", 34 | "cross_join", 35 | "cum_agg", 36 | "coalesce"] } 37 | tracing.workspace = true 38 | uuid.workspace = true 39 | thiserror.workspace = true 40 | rayon.workspace = true 41 | 42 | [dev-dependencies] 43 | rstest.workspace = true 44 | -------------------------------------------------------------------------------- /lib/query_processing/README.md: -------------------------------------------------------------------------------- 1 | # query_processing 2 | Query processing common to maplib and chrontext 3 | -------------------------------------------------------------------------------- /lib/query_processing/src/constants.rs: -------------------------------------------------------------------------------- 1 | pub const DATETIME_AS_MICROS: &str = "https://github.com/DataTreehouse/chrontext#DateTimeAsMicros"; 2 | pub const MICROS_AS_DATETIME: &str = "https://github.com/DataTreehouse/chrontext#MicrosAsDateTime"; 3 | pub const DATETIME_AS_SECONDS: &str = 4 | "https://github.com/DataTreehouse/chrontext#DateTimeAsSeconds"; 5 | pub const SECONDS_AS_DATETIME: &str = 6 | "https://github.com/DataTreehouse/chrontext#SecondsAsDateTime"; 7 | 8 | pub const FLOOR_DATETIME_TO_SECONDS_INTERVAL: &str = 9 | "https://github.com/DataTreehouse/chrontext#FloorDateTimeToSecondsInterval"; 10 | pub const MODULUS: &str = "https://github.com/DataTreehouse/chrontext#modulus"; 11 | 12 | pub const TIME_BUCKET: &str = "https://github.com/DataTreehouse/chrontext#time_bucket"; 13 | 14 | pub const LIST_AGGREGATION: &str = "https://github.com/DataTreehouse/maplib#ListAggregation"; 15 | 16 | pub const DECODE: &str = "https://github.com/DataTreehouse/maplib#decodeString"; 17 | -------------------------------------------------------------------------------- /lib/query_processing/src/errors.rs: -------------------------------------------------------------------------------- 1 | use representation::{BaseRDFNodeType, RDFNodeState}; 2 | use spargebra::algebra::Function; 3 | use thiserror::Error; 4 | 5 | #[derive(Error, Debug)] 6 | pub enum QueryProcessingError { 7 | #[error("Inconsistent datatypes for {}, {:?}, {:?} in context {}", .0, .1, .2, .3)] 8 | InconsistentDatatypes(String, RDFNodeState, RDFNodeState, String), 9 | #[error("Variable ?{} not found in context {}",.0, .1)] 10 | VariableNotFound(String, String), 11 | #[error("Inconsistent datatypes when casting {} to {:?}, got {:?}. Try filtering first.", .0, .1, .2)] 12 | BadCastDatatype(String, BaseRDFNodeType, BaseRDFNodeType), 13 | #[error("Function {} got wrong number of arguments {}, expected {}", .0, .1, .2)] 14 | BadNumberOfFunctionArguments(Function, usize, String), 15 | } 16 | -------------------------------------------------------------------------------- /lib/query_processing/src/exists_helper.rs: -------------------------------------------------------------------------------- 1 | use oxrdf::Variable; 2 | use spargebra::algebra::{AggregateExpression, AggregateFunction, Expression, GraphPattern}; 3 | 4 | pub fn rewrite_exists_graph_pattern( 5 | graph_pattern: &GraphPattern, 6 | helper_column_name: &str, 7 | ) -> GraphPattern { 8 | match graph_pattern { 9 | GraphPattern::Join { left, right } => GraphPattern::Join { 10 | left: Box::new(rewrite_exists_graph_pattern(left, helper_column_name)), 11 | right: Box::new(rewrite_exists_graph_pattern(right, helper_column_name)), 12 | }, 13 | GraphPattern::LeftJoin { 14 | left, 15 | right, 16 | expression, 17 | } => GraphPattern::LeftJoin { 18 | left: Box::new(rewrite_exists_graph_pattern(left, helper_column_name)), 19 | right: Box::new(rewrite_exists_graph_pattern(right, helper_column_name)), 20 | expression: expression.clone(), 21 | }, 22 | GraphPattern::Filter { expr, inner } => GraphPattern::Filter { 23 | inner: Box::new(rewrite_exists_graph_pattern(inner, helper_column_name)), 24 | expr: expr.clone(), 25 | }, 26 | GraphPattern::Union { left, right } => GraphPattern::Union { 27 | left: Box::new(rewrite_exists_graph_pattern(left, helper_column_name)), 28 | right: Box::new(rewrite_exists_graph_pattern(right, helper_column_name)), 29 | }, 30 | GraphPattern::Graph { name, inner } => GraphPattern::Graph { 31 | name: name.clone(), 32 | inner: Box::new(rewrite_exists_graph_pattern(inner, helper_column_name)), 33 | }, 34 | GraphPattern::Extend { 35 | inner, 36 | variable, 37 | expression, 38 | } => GraphPattern::Extend { 39 | inner: Box::new(rewrite_exists_graph_pattern(inner, helper_column_name)), 40 | variable: variable.clone(), 41 | expression: expression.clone(), 42 | }, 43 | GraphPattern::Minus { left, right } => GraphPattern::Join { 44 | left: Box::new(rewrite_exists_graph_pattern(left, helper_column_name)), 45 | right: right.clone(), 46 | }, 47 | GraphPattern::OrderBy { inner, expression } => GraphPattern::OrderBy { 48 | inner: Box::new(rewrite_exists_graph_pattern(inner, helper_column_name)), 49 | expression: expression.clone(), 50 | }, 51 | GraphPattern::Project { inner, variables } => { 52 | let mut new_variables = variables.clone(); 53 | new_variables.push(Variable::new_unchecked(helper_column_name)); 54 | let new_inner = rewrite_exists_graph_pattern(inner, helper_column_name); 55 | GraphPattern::Project { 56 | inner: Box::new(new_inner), 57 | variables: new_variables, 58 | } 59 | } 60 | GraphPattern::Distinct { inner } => GraphPattern::Distinct { 61 | inner: Box::new(rewrite_exists_graph_pattern(inner, helper_column_name)), 62 | }, 63 | GraphPattern::Reduced { inner } => GraphPattern::Reduced { 64 | inner: Box::new(rewrite_exists_graph_pattern(inner, helper_column_name)), 65 | }, 66 | GraphPattern::Slice { 67 | inner, 68 | start, 69 | length, 70 | } => GraphPattern::Slice { 71 | inner: Box::new(rewrite_exists_graph_pattern(inner, helper_column_name)), 72 | start: *start, 73 | length: *length, 74 | }, 75 | GraphPattern::Group { 76 | inner, 77 | variables, 78 | aggregates, 79 | } => { 80 | let mut new_aggregates = aggregates.clone(); 81 | let new_var = Variable::new_unchecked(helper_column_name); 82 | let new_inner = rewrite_exists_graph_pattern(inner, helper_column_name); 83 | new_aggregates.push(( 84 | new_var.clone(), 85 | AggregateExpression::FunctionCall { 86 | name: AggregateFunction::Max, 87 | expr: Expression::Variable(new_var), 88 | distinct: false, 89 | }, 90 | )); 91 | GraphPattern::Group { 92 | inner: Box::new(new_inner), 93 | variables: variables.clone(), 94 | aggregates: new_aggregates, 95 | } 96 | } 97 | _ => graph_pattern.clone(), 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /lib/query_processing/src/graph_patterns/cats.rs: -------------------------------------------------------------------------------- 1 | use polars::prelude::{by_name, col, LazyFrame}; 2 | use representation::cats::{ 3 | maybe_decode_expr, optional_maybe_decode_expr, CatReEnc, Cats, LockedCats, 4 | }; 5 | use representation::solution_mapping::BaseCatState; 6 | use representation::{BaseRDFNodeType, RDFNodeState}; 7 | 8 | pub enum CatOperation { 9 | Decode, 10 | ReEnc(CatReEnc), 11 | } 12 | 13 | impl CatOperation { 14 | pub fn apply( 15 | self, 16 | mut mappings: LazyFrame, 17 | c: &str, 18 | t: &RDFNodeState, 19 | base_t: &BaseRDFNodeType, 20 | global_cats: LockedCats, 21 | ) -> LazyFrame { 22 | match self { 23 | CatOperation::Decode => { 24 | if t.is_multi() || t.is_lang_string() { 25 | let mut fields = vec![]; 26 | for s in base_t.multi_columns() { 27 | if let Some(e) = optional_maybe_decode_expr( 28 | col(c).struct_().field_by_name(&s), 29 | base_t, 30 | t.map.get(base_t).unwrap(), 31 | global_cats.clone(), 32 | ) { 33 | fields.push(e); 34 | } 35 | } 36 | if !fields.is_empty() { 37 | mappings = mappings.with_column(col(c).struct_().with_fields(fields)); 38 | } 39 | } else { 40 | mappings = mappings.with_column(maybe_decode_expr( 41 | col(c), 42 | base_t, 43 | t.map.get(base_t).unwrap(), 44 | global_cats.clone(), 45 | )); 46 | } 47 | } 48 | CatOperation::ReEnc(cat_re_enc) => { 49 | if !t.is_multi() { 50 | mappings = cat_re_enc.re_encode(mappings, c, false); 51 | } else { 52 | let tmp = uuid::Uuid::new_v4().to_string(); 53 | let n = base_t.field_col_name(); 54 | mappings = mappings.with_column(col(c).struct_().field_by_name(&n).alias(&tmp)); 55 | mappings = cat_re_enc.re_encode(mappings, &tmp, false); 56 | mappings = mappings 57 | .with_column(col(c).struct_().with_fields(vec![col(&tmp).alias(&n)])); 58 | mappings = mappings.drop(by_name([tmp], true)); 59 | } 60 | } 61 | } 62 | mappings 63 | } 64 | } 65 | 66 | pub fn create_compatible_cats( 67 | left: &BaseCatState, 68 | right: &BaseCatState, 69 | ) -> (BaseCatState, Option, Option) { 70 | match left { 71 | BaseCatState::CategoricalNative(_left_sorted, None) => match right { 72 | BaseCatState::CategoricalNative(_right_sorted, None) => { 73 | //TODO! FIX SO THAT WE CAN SET TO && BECAUSE DEPENDS ON GLOBAL SORT STATE 74 | (BaseCatState::CategoricalNative(false, None), None, None) 75 | } 76 | BaseCatState::CategoricalNative(_, right_local_cats) => ( 77 | BaseCatState::CategoricalNative(false, right_local_cats.as_ref().cloned()), 78 | None, 79 | None, 80 | ), 81 | BaseCatState::String => (BaseCatState::String, Some(CatOperation::Decode), None), 82 | BaseCatState::NonString => { 83 | unreachable!("Should never happen") 84 | } 85 | }, 86 | BaseCatState::CategoricalNative(_, Some(left_local_cats)) => match right { 87 | BaseCatState::CategoricalNative(_, None) => ( 88 | BaseCatState::CategoricalNative(false, Some(left_local_cats.clone())), 89 | None, 90 | None, 91 | ), 92 | BaseCatState::CategoricalNative(_, Some(right_local_cats)) => { 93 | let re_enc = Cats::join(left_local_cats.clone(), right_local_cats.clone()); 94 | ( 95 | BaseCatState::CategoricalNative(false, Some(left_local_cats.clone())), 96 | None, 97 | Some(CatOperation::ReEnc(re_enc)), 98 | ) 99 | } 100 | BaseCatState::String | BaseCatState::NonString => { 101 | unreachable!("Should never happen") 102 | } 103 | }, 104 | BaseCatState::String => match right { 105 | BaseCatState::CategoricalNative(..) => { 106 | (BaseCatState::String, None, Some(CatOperation::Decode)) 107 | } 108 | BaseCatState::NonString => { 109 | unreachable!("Should never happen") 110 | } 111 | BaseCatState::String => (BaseCatState::String, None, None), 112 | }, 113 | BaseCatState::NonString => match right { 114 | BaseCatState::CategoricalNative(..) | BaseCatState::String => { 115 | unreachable!("Should never happen") 116 | } 117 | BaseCatState::NonString => (BaseCatState::NonString, None, None), 118 | }, 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /lib/query_processing/src/graph_patterns/group.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::QueryProcessingError; 2 | use polars::prelude::{by_name, Expr}; 3 | use representation::solution_mapping::SolutionMappings; 4 | 5 | pub fn group_by( 6 | solution_mappings: SolutionMappings, 7 | aggregate_expressions: Vec, 8 | by: Vec, 9 | dummy_varname: Option, 10 | ) -> Result { 11 | let SolutionMappings { 12 | mut mappings, 13 | rdf_node_types, 14 | height_estimate, 15 | } = solution_mappings; 16 | let grouped_mappings = mappings.group_by(by.as_slice()); 17 | 18 | mappings = grouped_mappings.agg(aggregate_expressions.as_slice()); 19 | if let Some(dummy_varname) = dummy_varname { 20 | mappings = mappings.drop(by_name([&dummy_varname], false)); 21 | } 22 | Ok(SolutionMappings::new( 23 | mappings, 24 | rdf_node_types, 25 | height_estimate, 26 | )) 27 | } 28 | -------------------------------------------------------------------------------- /lib/query_processing/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod aggregates; 2 | pub mod cats; 3 | pub mod constants; 4 | pub mod errors; 5 | pub mod exists_helper; 6 | pub mod expressions; 7 | pub mod find_query_variables; 8 | pub mod graph_patterns; 9 | pub mod pushdowns; 10 | pub mod type_constraints; 11 | -------------------------------------------------------------------------------- /lib/report_mapping/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "report_mapping" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | maplib = {path="../maplib"} 8 | shacl = {path = "../shacl"} 9 | triplestore = {path = "../triplestore"} -------------------------------------------------------------------------------- /lib/report_mapping/src/lib.rs: -------------------------------------------------------------------------------- 1 | use maplib::mapping::Model; 2 | use shacl::errors::ShaclError; 3 | use shacl::ValidationReport; 4 | use triplestore::Triplestore; 5 | 6 | pub fn report_to_model( 7 | _report: &ValidationReport, 8 | _shape_graph: &Option, 9 | ) -> Result { 10 | unimplemented!("Contact Data Treehouse to try") 11 | } 12 | -------------------------------------------------------------------------------- /lib/representation/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "representation" 3 | version = "0.6.10" 4 | edition = "2021" 5 | rust-version.workspace = true 6 | 7 | [features] 8 | pyo3 = [ "utils/pyo3" ] 9 | 10 | [dependencies] 11 | utils = { path = "../utils" } 12 | spargebra = {path = "../spargebra"} 13 | 14 | tracing.workspace = true 15 | oxrdf = {workspace = true, features = ["oxsdatatypes"]} 16 | polars = {workspace = true, features=["cum_agg","diagonal_concat", "merge_sorted", "performant", "new_streaming", "lazy", "dtype-full", "timezones", "polars-time", "partition_by", "strings"], default-features = false } 17 | chrono.workspace = true 18 | chrono-tz.workspace = true 19 | thiserror.workspace = true 20 | pyo3 = {workspace = true, features = ["chrono-tz", "chrono"]} 21 | oxsdatatypes.workspace = true 22 | rayon.workspace = true 23 | nohash-hasher.workspace = true 24 | uuid.workspace = true 25 | 26 | [lints.rust] 27 | unexpected_cfgs = { level = "warn", check-cfg = ['cfg(feature, values("gil-refs", "rdf-star"))'] } 28 | -------------------------------------------------------------------------------- /lib/representation/README.md: -------------------------------------------------------------------------------- 1 | # representation 2 | Internal representation of solution mappings for chrontext and maplib 3 | -------------------------------------------------------------------------------- /lib/representation/src/cats/globalize.rs: -------------------------------------------------------------------------------- 1 | use super::{encode_triples, rdf_split_iri_str, re_encode, CatEncs, CatTriples, CatType, Cats}; 2 | use crate::solution_mapping::BaseCatState; 3 | use crate::BaseRDFNodeType; 4 | use oxrdf::NamedNode; 5 | use polars::frame::DataFrame; 6 | use tracing::instrument; 7 | 8 | impl Cats { 9 | pub fn globalize(&mut self, mut cat_triples: Vec) -> Vec { 10 | let local_cats: Vec<_> = cat_triples 11 | .iter_mut() 12 | .map(|x| x.local_cats.drain(..)) 13 | .flatten() 14 | .collect(); 15 | let re_enc_map = self.merge(local_cats); 16 | let global_cats = re_encode(cat_triples, re_enc_map); 17 | global_cats 18 | } 19 | 20 | pub fn encode_predicates(&mut self, cat_triples: &Vec) { 21 | for ct in cat_triples { 22 | let (pre, suf) = rdf_split_iri_str(ct.predicate.as_str()); 23 | let ct = CatType::Prefix(NamedNode::new_unchecked(pre)); 24 | if !self.cat_map.contains_key(&ct) { 25 | self.cat_map.insert(ct.clone(), CatEncs::new_empty()); 26 | } 27 | 28 | let enc = self.cat_map.get_mut(&ct).unwrap(); 29 | if !enc.contains_key(suf) { 30 | enc.encode_new_str(&suf, self.iri_height); 31 | self.iri_height += 1; 32 | } 33 | } 34 | } 35 | } 36 | 37 | #[instrument(skip_all)] 38 | pub fn cat_encode_triples( 39 | df: DataFrame, 40 | subject_type: BaseRDFNodeType, 41 | object_type: BaseRDFNodeType, 42 | predicate: NamedNode, 43 | subject_cat_state: BaseCatState, 44 | object_cat_state: BaseCatState, 45 | global_cats: &Cats, 46 | ) -> CatTriples { 47 | let (local_cats, encoded_triples) = encode_triples( 48 | df, 49 | &subject_type, 50 | &object_type, 51 | subject_cat_state, 52 | object_cat_state, 53 | global_cats, 54 | ); 55 | 56 | CatTriples { 57 | encoded_triples, 58 | predicate, 59 | subject_type, 60 | object_type, 61 | local_cats, 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /lib/representation/src/cats/split.rs: -------------------------------------------------------------------------------- 1 | use oxrdf::NamedNode; 2 | use polars::prelude::Series; 3 | use rayon::iter::ParallelIterator; 4 | use std::collections::HashMap; 5 | 6 | pub fn named_node_split_prefix(nn: &NamedNode) -> NamedNode { 7 | NamedNode::new_unchecked(rdf_split_iri_str(nn.as_str()).0) 8 | } 9 | 10 | pub fn named_node_split_suffix(nn: &NamedNode) -> NamedNode { 11 | NamedNode::new_unchecked(rdf_split_iri_str(nn.as_str()).1) 12 | } 13 | 14 | pub fn split_iri_series<'a>( 15 | series: &'a Series, 16 | ) -> (Vec>, Vec>, HashMap) { 17 | let series_str = series.str().unwrap(); 18 | let mut prefix_map = HashMap::new(); 19 | let mut prefixes = Vec::with_capacity(series.len()); 20 | 21 | let (new_prefixes, suffixes): (Vec<_>, Vec<_>) = series_str 22 | .par_iter() 23 | .map(|x| { 24 | if let Some(iri) = x { 25 | let (pre, suf) = rdf_split_iri_str(iri); 26 | (Some(pre), Some(suf)) 27 | } else { 28 | (None, None) 29 | } 30 | }) 31 | .unzip(); 32 | for p in new_prefixes { 33 | if let Some(p) = p { 34 | if let Some(v) = prefix_map.get(p) { 35 | prefixes.push(Some(*v)); 36 | } else { 37 | let new_v = prefix_map.len() as u32; 38 | prefix_map.insert(p.to_string(), new_v); 39 | prefixes.push(Some(new_v)); 40 | } 41 | } else { 42 | prefixes.push(None) 43 | } 44 | } 45 | 46 | (prefixes, suffixes, prefix_map) 47 | } 48 | 49 | pub fn rdf_split_iri_str(iri: &str) -> (&str, &str) { 50 | // Apache 2 / MIT The Rust Project Contributors 51 | #[inline] 52 | fn rsplit_once_inclusive_l( 53 | this: &str, 54 | delimiter: P, 55 | ) -> Option<(&'_ str, &'_ str)> 56 | where 57 | for<'a> P::Searcher<'a>: std::str::pattern::ReverseSearcher<'a>, 58 | { 59 | let (_, end) = std::str::pattern::ReverseSearcher::next_match_back( 60 | &mut delimiter.into_searcher(this), 61 | )?; 62 | // SAFETY: `Searcher` is known to return valid indices. 63 | unsafe { Some((this.get_unchecked(..end), this.get_unchecked(end..))) } 64 | } 65 | 66 | const DELIMITERS: &[char] = &['/', '#', ':']; 67 | 68 | let (prefix, suffix) = match rsplit_once_inclusive_l(iri, DELIMITERS) { 69 | Some(pair) => pair, 70 | None => ("", iri), 71 | }; 72 | (prefix, suffix) 73 | } 74 | -------------------------------------------------------------------------------- /lib/representation/src/dataset.rs: -------------------------------------------------------------------------------- 1 | use oxrdf::NamedNode; 2 | use spargebra::algebra::QueryDataset; 3 | use std::fmt::{Display, Formatter}; 4 | 5 | #[derive(Debug, Clone, Hash, Eq, PartialEq)] 6 | pub enum NamedGraph { 7 | DefaultGraph, 8 | NamedGraph(NamedNode), 9 | } 10 | 11 | impl NamedGraph { 12 | pub fn from_maybe_named_node(nn: Option<&NamedNode>) -> NamedGraph { 13 | if let Some(nn) = nn { 14 | NamedGraph::NamedGraph(nn.clone()) 15 | } else { 16 | NamedGraph::DefaultGraph 17 | } 18 | } 19 | } 20 | 21 | impl Default for NamedGraph { 22 | fn default() -> Self { 23 | NamedGraph::DefaultGraph 24 | } 25 | } 26 | 27 | impl Display for NamedGraph { 28 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 29 | match self { 30 | NamedGraph::DefaultGraph => { 31 | write!(f, "default graph") 32 | } 33 | NamedGraph::NamedGraph(nn) => { 34 | write!(f, "{}", nn) 35 | } 36 | } 37 | } 38 | } 39 | 40 | #[derive(Debug, Clone)] 41 | pub enum QueryGraph { 42 | NamedGraph(NamedGraph), 43 | QueryDataset(QueryDataset), 44 | } 45 | 46 | impl QueryGraph { 47 | pub fn from_named_graph(ng: &NamedGraph) -> QueryGraph { 48 | QueryGraph::NamedGraph(ng.clone()) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /lib/representation/src/errors.rs: -------------------------------------------------------------------------------- 1 | use thiserror::*; 2 | 3 | #[derive(Debug, Error)] 4 | pub enum RepresentationError { 5 | #[error("Datatype error `{0}`")] 6 | DatatypeError(String), 7 | #[error("Invalid literal `{0}`")] 8 | InvalidLiteralError(String), 9 | } 10 | -------------------------------------------------------------------------------- /lib/representation/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![feature(pattern)] 2 | 3 | pub mod multitype; 4 | pub mod polars_to_rdf; 5 | pub mod query_context; 6 | pub mod rdf_to_polars; 7 | pub mod solution_mapping; 8 | 9 | mod base_rdf_type; 10 | pub mod cats; 11 | pub mod dataset; 12 | pub mod errors; 13 | pub mod formatting; 14 | pub mod literals; 15 | pub mod python; 16 | mod rdf_state; 17 | mod rdf_type; 18 | pub mod subtypes; 19 | 20 | pub use base_rdf_type::*; 21 | pub use rdf_state::*; 22 | pub use rdf_type::*; 23 | 24 | use crate::subtypes::{is_literal_subtype, OWL_REAL}; 25 | use oxrdf::vocab::xsd; 26 | use oxrdf::{BlankNode, NamedNode, NamedNodeRef, NamedOrBlankNode, Term}; 27 | 28 | pub const PREDICATE_COL_NAME: &str = "predicate"; 29 | pub const OBJECT_COL_NAME: &str = "object"; 30 | pub const SUBJECT_COL_NAME: &str = "subject"; 31 | pub const LANG_STRING_VALUE_FIELD: &str = ""; 32 | pub const LANG_STRING_LANG_FIELD: &str = "l"; 33 | 34 | pub const RDF_NODE_TYPE_IRI: &str = "IRI"; 35 | pub const RDF_NODE_TYPE_BLANK_NODE: &str = "Blank"; 36 | pub const RDF_NODE_TYPE_NONE: &str = "None"; 37 | 38 | pub fn literal_iri_to_namednode(s: &str) -> NamedNode { 39 | NamedNode::new_unchecked(s) 40 | } 41 | 42 | pub fn literal_blanknode_to_blanknode(b: &str) -> BlankNode { 43 | BlankNode::new_unchecked(b) 44 | } 45 | 46 | pub fn owned_term_to_named_or_blank_node(t: Term) -> Option { 47 | match t { 48 | Term::NamedNode(nn) => Some(NamedOrBlankNode::NamedNode(nn)), 49 | Term::BlankNode(bl) => Some(NamedOrBlankNode::BlankNode(bl)), 50 | _ => None, 51 | } 52 | } 53 | 54 | pub fn owned_term_to_named_node(t: Term) -> Option { 55 | match t { 56 | Term::NamedNode(nn) => Some(nn), 57 | _ => None, 58 | } 59 | } 60 | 61 | pub fn literal_is_numeric(l: NamedNodeRef) -> bool { 62 | matches!(l, xsd::FLOAT | xsd::DOUBLE) 63 | || is_literal_subtype(l, NamedNode::new_unchecked(OWL_REAL).as_ref()) 64 | } 65 | 66 | pub fn literal_is_boolean(l: NamedNodeRef) -> bool { 67 | matches!(l, xsd::BOOLEAN) 68 | } 69 | 70 | pub fn literal_is_datetime(l: NamedNodeRef) -> bool { 71 | matches!(l, xsd::DATE_TIME) || matches!(l, xsd::DATE_TIME_STAMP) 72 | } 73 | 74 | pub fn literal_is_date(l: NamedNodeRef) -> bool { 75 | matches!(l, xsd::DATE) 76 | } 77 | 78 | pub fn literal_is_string(l: NamedNodeRef) -> bool { 79 | matches!(l, xsd::STRING) 80 | } 81 | -------------------------------------------------------------------------------- /lib/representation/src/literals.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::RepresentationError; 2 | use oxrdf::{Literal, Term}; 3 | 4 | pub fn parse_literal_as_primitive( 5 | l: Literal, 6 | ) -> Result { 7 | let parsed = l.value().parse().map_err(|_x| { 8 | RepresentationError::InvalidLiteralError(format!("Could not parse as literal {l}")) 9 | })?; 10 | Ok(parsed) 11 | } 12 | 13 | pub fn parse_term_as_primitive(term: Term) -> Result { 14 | match term { 15 | Term::Literal(l) => parse_literal_as_primitive(l), 16 | _ => Err(RepresentationError::InvalidLiteralError(format!( 17 | "Wrong term type when trying to parse literal {term}" 18 | ))), 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /lib/representation/src/rdf_type.rs: -------------------------------------------------------------------------------- 1 | use crate::base_rdf_type::BaseRDFNodeTypeRef; 2 | use oxrdf::NamedNodeRef; 3 | 4 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] 5 | pub enum RDFNodeTypeRef<'a> { 6 | IRI, 7 | BlankNode, 8 | Literal(NamedNodeRef<'a>), 9 | None, 10 | MultiType(Vec>), 11 | } 12 | -------------------------------------------------------------------------------- /lib/representation/src/solution_mapping.rs: -------------------------------------------------------------------------------- 1 | use crate::cats::LockedCats; 2 | use crate::RDFNodeState; 3 | use oxrdf::vocab::xsd; 4 | use polars::prelude::{DataFrame, IntoLazy, LazyFrame}; 5 | use std::collections::HashMap; 6 | use utils::polars::InterruptableCollectError; 7 | 8 | use utils::polars::pl_interruptable_collect; 9 | 10 | #[derive(Clone, Debug)] 11 | pub enum BaseCatState { 12 | CategoricalNative(bool, Option), 13 | String, 14 | NonString, 15 | } 16 | 17 | impl BaseCatState { 18 | pub fn get_local_cats(&self) -> Option { 19 | match self { 20 | BaseCatState::CategoricalNative(_, local_cats) => { 21 | if let Some(local_cats) = local_cats { 22 | Some(local_cats.clone()) 23 | } else { 24 | None 25 | } 26 | } 27 | BaseCatState::String | BaseCatState::NonString => None, 28 | } 29 | } 30 | } 31 | 32 | #[derive(Clone)] 33 | pub struct SolutionMappings { 34 | pub mappings: LazyFrame, 35 | pub rdf_node_types: HashMap, 36 | pub height_estimate: usize, 37 | } 38 | 39 | #[derive(Clone, Debug)] 40 | pub struct EagerSolutionMappings { 41 | pub mappings: DataFrame, 42 | pub rdf_node_types: HashMap, 43 | } 44 | 45 | impl EagerSolutionMappings { 46 | pub fn new( 47 | mappings: DataFrame, 48 | rdf_node_types: HashMap, 49 | ) -> EagerSolutionMappings { 50 | EagerSolutionMappings { 51 | mappings, 52 | rdf_node_types, 53 | } 54 | } 55 | pub fn as_lazy(self) -> SolutionMappings { 56 | let EagerSolutionMappings { 57 | mappings, 58 | rdf_node_types, 59 | } = self; 60 | let height = mappings.height(); 61 | SolutionMappings::new(mappings.lazy(), rdf_node_types, height) 62 | } 63 | } 64 | 65 | impl SolutionMappings { 66 | pub fn new( 67 | mappings: LazyFrame, 68 | rdf_node_types: HashMap, 69 | height_upper_bound: usize, 70 | ) -> SolutionMappings { 71 | SolutionMappings { 72 | mappings, 73 | rdf_node_types, 74 | height_estimate: height_upper_bound, 75 | } 76 | } 77 | 78 | pub fn as_eager(self, streaming: bool) -> EagerSolutionMappings { 79 | EagerSolutionMappings { 80 | mappings: self 81 | .mappings 82 | .with_new_streaming(streaming) 83 | .collect() 84 | .unwrap(), 85 | rdf_node_types: self.rdf_node_types, 86 | } 87 | } 88 | 89 | pub fn as_eager_interruptable( 90 | self, 91 | streaming: bool, 92 | ) -> Result { 93 | { 94 | let df = pl_interruptable_collect(self.mappings.with_new_streaming(streaming))?; 95 | Ok(EagerSolutionMappings { 96 | mappings: df, 97 | rdf_node_types: self.rdf_node_types, 98 | }) 99 | } 100 | } 101 | } 102 | 103 | pub fn is_literal_string_col(rdf_node_type: &RDFNodeState) -> bool { 104 | rdf_node_type.is_lit_type(xsd::STRING) 105 | } 106 | -------------------------------------------------------------------------------- /lib/representation/src/subtypes.rs: -------------------------------------------------------------------------------- 1 | use oxrdf::vocab::{rdfs, xsd}; 2 | use oxrdf::NamedNodeRef; 3 | 4 | pub const OWL_REAL: &str = "http://www.w3.org/2002/07/owl#real"; 5 | pub const OWL_RATIONAL: &str = "http://www.w3.org/2002/07/owl#rational"; 6 | 7 | // s literal subtype of t 8 | pub fn is_literal_subtype(s: NamedNodeRef, t: NamedNodeRef) -> bool { 9 | if s == t || t == rdfs::LITERAL { 10 | true 11 | } else if t.as_str() == OWL_REAL { 12 | owl_real_subtype(s) 13 | } else if t.as_str() == OWL_RATIONAL { 14 | owl_rational_subtype(s) 15 | } else { 16 | match t { 17 | xsd::DECIMAL => xsd_decimal_subtype(s), 18 | xsd::DOUBLE => xsd_double_subtype(s), 19 | xsd::INTEGER => xsd_integer_subtype(s), 20 | xsd::LONG => xsd_long_subtype(s), 21 | xsd::INT => xsd_int_subtype(s), 22 | xsd::SHORT => xsd_short_subtype(s), 23 | xsd::NON_NEGATIVE_INTEGER => xsd_non_negative_integer_subtype(s), 24 | xsd::POSITIVE_INTEGER => xsd_positive_integer_subtype(s), 25 | xsd::UNSIGNED_LONG => xsd_unsigned_long_subtype(s), 26 | xsd::UNSIGNED_INT => xsd_unsigned_int_subtype(s), 27 | xsd::UNSIGNED_SHORT => xsd_unsigned_short_subtype(s), 28 | xsd::NON_POSITIVE_INTEGER => xsd_non_positive_integer_subtype(s), 29 | xsd::DURATION => { 30 | matches!(s, xsd::YEAR_MONTH_DURATION | xsd::DAY_TIME_DURATION) 31 | } 32 | xsd::DATE_TIME => { 33 | matches!(s, xsd::DATE_TIME_STAMP) 34 | } 35 | _ => false, 36 | } 37 | } 38 | } 39 | 40 | fn owl_real_subtype(s: NamedNodeRef) -> bool { 41 | matches!(s.as_str(), OWL_RATIONAL) || owl_rational_subtype(s) 42 | } 43 | 44 | fn owl_rational_subtype(s: NamedNodeRef) -> bool { 45 | matches!(s, xsd::DECIMAL) || xsd_decimal_subtype(s) 46 | } 47 | 48 | fn xsd_decimal_subtype(s: NamedNodeRef) -> bool { 49 | matches!(s, xsd::INTEGER) || xsd_integer_subtype(s) 50 | } 51 | 52 | fn xsd_double_subtype(s: NamedNodeRef) -> bool { 53 | matches!(s, xsd::FLOAT) 54 | } 55 | 56 | fn xsd_integer_subtype(s: NamedNodeRef) -> bool { 57 | matches!( 58 | s, 59 | xsd::LONG | xsd::NON_NEGATIVE_INTEGER | xsd::NON_POSITIVE_INTEGER 60 | ) || xsd_long_subtype(s) 61 | || xsd_non_negative_integer_subtype(s) 62 | || xsd_non_positive_integer_subtype(s) 63 | } 64 | 65 | fn xsd_non_positive_integer_subtype(s: NamedNodeRef) -> bool { 66 | matches!(s, xsd::NEGATIVE_INTEGER) 67 | } 68 | 69 | fn xsd_non_negative_integer_subtype(s: NamedNodeRef) -> bool { 70 | matches!(s, xsd::POSITIVE_INTEGER) || xsd_positive_integer_subtype(s) 71 | } 72 | 73 | fn xsd_positive_integer_subtype(s: NamedNodeRef) -> bool { 74 | matches!(s, xsd::UNSIGNED_LONG) || xsd_unsigned_long_subtype(s) 75 | } 76 | 77 | fn xsd_unsigned_long_subtype(s: NamedNodeRef) -> bool { 78 | matches!(s, xsd::UNSIGNED_INT) || xsd_unsigned_int_subtype(s) 79 | } 80 | 81 | fn xsd_unsigned_int_subtype(s: NamedNodeRef) -> bool { 82 | matches!(s, xsd::UNSIGNED_SHORT) || xsd_unsigned_short_subtype(s) 83 | } 84 | 85 | fn xsd_unsigned_short_subtype(s: NamedNodeRef) -> bool { 86 | matches!(s, xsd::UNSIGNED_BYTE) 87 | } 88 | 89 | fn xsd_long_subtype(s: NamedNodeRef) -> bool { 90 | matches!(s, xsd::INT) || xsd_int_subtype(s) 91 | } 92 | 93 | fn xsd_int_subtype(s: NamedNodeRef) -> bool { 94 | matches!(s, xsd::SHORT) || xsd_short_subtype(s) 95 | } 96 | 97 | fn xsd_short_subtype(s: NamedNodeRef) -> bool { 98 | matches!(s, xsd::BYTE) 99 | } 100 | -------------------------------------------------------------------------------- /lib/shacl/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "shacl" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [features] 7 | pyo3 = [ "dep:pyo3", "triplestore/pyo3" ] 8 | 9 | [dependencies] 10 | representation = { path = "../representation" } 11 | triplestore = {path="../triplestore"} 12 | thiserror.workspace = true 13 | oxrdf.workspace = true 14 | polars.workspace = true 15 | 16 | pyo3 = { workspace = true, optional = true } 17 | -------------------------------------------------------------------------------- /lib/shacl/src/errors.rs: -------------------------------------------------------------------------------- 1 | use thiserror::Error; 2 | use triplestore::errors::TriplestoreError; 3 | 4 | #[derive(Error, Debug)] 5 | pub enum ShaclError { 6 | #[error("Contact DataTreehouse for SHACL support! Triplestore: {0}")] 7 | TriplestoreError(#[from] TriplestoreError), 8 | } 9 | -------------------------------------------------------------------------------- /lib/shacl/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod errors; 2 | pub mod storage; 3 | 4 | use crate::storage::StoredResults; 5 | use errors::ShaclError; 6 | use oxrdf::{NamedNode, NamedOrBlankNode}; 7 | use polars::prelude::DataFrame; 8 | use representation::solution_mapping::SolutionMappings; 9 | use std::time::Duration; 10 | use triplestore::Triplestore; 11 | 12 | use representation::cats::LockedCats; 13 | use representation::dataset::NamedGraph; 14 | 15 | #[derive(Debug, Clone)] 16 | pub struct ShapeTargets { 17 | pub shape_node: NamedOrBlankNode, 18 | pub context: String, 19 | pub count: usize, 20 | } 21 | 22 | #[derive(Debug, Clone)] 23 | pub struct Performance { 24 | pub shape_node: NamedOrBlankNode, 25 | pub context: String, 26 | pub duration: Duration, 27 | } 28 | 29 | #[derive(Debug, Clone)] 30 | pub struct ValidationReport { 31 | pub conforms: Option, 32 | pub results: Option, 33 | pub validation_performance: Vec, 34 | pub targets_performance: Vec, 35 | pub shape_targets: Vec, 36 | pub cats: Option, 37 | } 38 | 39 | impl ValidationReport { 40 | pub fn concatenated_results(&self) -> Result, ShaclError> { 41 | unimplemented!("Contact Data Treehouse to try") 42 | } 43 | 44 | pub fn concatenated_details(&self) -> Result, ShaclError> { 45 | unimplemented!("Contact Data Treehouse to try") 46 | } 47 | pub fn performance_df(&self) -> DataFrame { 48 | unimplemented!("Contact Data Treehouse to try") 49 | } 50 | 51 | pub fn shape_targets_df(&self) -> DataFrame { 52 | unimplemented!("Contact Data Treehouse to try") 53 | } 54 | } 55 | 56 | pub fn validate( 57 | _data_triplestore: &mut Triplestore, 58 | _data_graph: &NamedGraph, 59 | _shapes_graph: &NamedGraph, 60 | _include_details: bool, 61 | _include_conforms: bool, 62 | _streaming: bool, 63 | _max_shape_constraint_results: Option, 64 | _include_transient: bool, 65 | _only_shapes: Option>, 66 | _deactivate_shapes: Vec, 67 | _dry_run: bool, 68 | ) -> Result { 69 | unimplemented!("Contact Data Treehouse to try") 70 | } 71 | -------------------------------------------------------------------------------- /lib/shacl/src/storage.rs: -------------------------------------------------------------------------------- 1 | use representation::solution_mapping::EagerSolutionMappings; 2 | use representation::RDFNodeState; 3 | use std::collections::HashMap; 4 | use std::path::PathBuf; 5 | 6 | #[allow(dead_code)] 7 | #[derive(Debug, Clone)] 8 | pub struct StoredResults { 9 | folder_path: Option, 10 | stored_results: Vec, 11 | stored_details: Vec, 12 | } 13 | 14 | #[derive(Debug, Clone)] 15 | pub enum StoredSolutionMappings { 16 | EagerSolutionMappings(EagerSolutionMappings), 17 | SolutionMappingsOnDisk(SolutionMappingsOnDisk), 18 | } 19 | 20 | #[allow(dead_code)] 21 | #[derive(Debug, Clone)] 22 | pub struct SolutionMappingsOnDisk { 23 | height: usize, 24 | rdf_node_types: HashMap, 25 | file: PathBuf, 26 | } 27 | -------------------------------------------------------------------------------- /lib/spargebra/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "spargebra" 3 | version = "0.3.0-alpha.5-parparse" 4 | authors = ["Tpt "] 5 | license = "MIT OR Apache-2.0" 6 | readme = "README.md" 7 | keywords = ["SPARQL"] 8 | repository = "https://github.com/oxigraph/oxigraph/tree/main/lib/spargebra" 9 | homepage = "https://oxigraph.org/" 10 | documentation = "https://docs.rs/spargebra" 11 | description = """ 12 | A SPARQL parser 13 | """ 14 | edition = "2021" 15 | rust-version.workspace = true 16 | 17 | [features] 18 | default = [] 19 | rdf-star = ["oxrdf/rdf-star"] 20 | 21 | [dependencies] 22 | rand.workspace = true 23 | peg.workspace = true 24 | oxrdf.workspace = true 25 | oxiri.workspace = true 26 | oxilangtag.workspace = true 27 | chrono.workspace = true 28 | fundu = { workspace=true, features = ["custom", "chrono"] } 29 | thiserror.workspace = true 30 | 31 | [package.metadata.docs.rs] 32 | all-features = true 33 | -------------------------------------------------------------------------------- /lib/spargebra/README.md: -------------------------------------------------------------------------------- 1 | This repository is forked from https://github.com/oxigraph/oxigraph in order to support some custom constructions. 2 | We recommend that you use the original library, and to support the original creator Thomas Tanon. 3 | 4 | Spargebra 5 | ========= 6 | 7 | [![Latest Version](https://img.shields.io/crates/v/spargebra.svg)](https://crates.io/crates/spargebra) 8 | [![Released API docs](https://docs.rs/spargebra/badge.svg)](https://docs.rs/spargebra) 9 | [![Crates.io downloads](https://img.shields.io/crates/d/spargebra)](https://crates.io/crates/spargebra) 10 | [![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) 11 | [![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) 12 | 13 | Spargebra is a [SPARQL](https://www.w3.org/TR/sparql11-overview/) parser. 14 | 15 | It supports both [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/) and [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/). 16 | 17 | The emitted tree is based on [SPARQL 1.1 Query Algebra](https://www.w3.org/TR/sparql11-query/#sparqlQuery) objects. 18 | 19 | The API entry point for SPARQL queries is the [`Query`] struct and the API entry point for SPARQL updates is the [`Update`] struct. 20 | 21 | Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#sparql-star) is also available behind the `rdf-star` feature. 22 | 23 | This crate is intended to be a building piece for SPARQL implementations in Rust like [Oxigraph](https://oxigraph.org). 24 | 25 | Usage example: 26 | 27 | ```rust 28 | use spargebra::Query; 29 | 30 | let query_str = "SELECT ?s ?p ?o WHERE { ?s ?p ?o . }"; 31 | let query = Query::parse(query_str, None).unwrap(); 32 | assert_eq!(query.to_string(), query_str); 33 | ``` 34 | 35 | ## License 36 | 37 | This project is licensed under either of 38 | 39 | * Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or 40 | ``) 41 | * MIT license ([LICENSE-MIT](../LICENSE-MIT) or 42 | ``) 43 | 44 | at your option. 45 | 46 | 47 | ### Contribution 48 | 49 | Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. 50 | -------------------------------------------------------------------------------- /lib/spargebra/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![doc = include_str!("../README.md")] 2 | #![doc(test(attr(deny(warnings))))] 3 | #![cfg_attr(docsrs, feature(doc_auto_cfg))] 4 | #![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] 5 | #![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] 6 | 7 | pub mod algebra; 8 | mod parser; 9 | mod query; 10 | pub mod query_context; 11 | pub mod remove_sugar; 12 | pub mod term; 13 | pub mod treehouse; 14 | mod update; 15 | 16 | pub use parser::SparqlSyntaxError; 17 | pub use query::*; 18 | pub use update::*; 19 | -------------------------------------------------------------------------------- /lib/templates/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "templates" 3 | version = "0.1.0" 4 | edition = "2021" 5 | rust-version.workspace = true 6 | 7 | [dependencies] 8 | representation = { path = "../representation" } 9 | 10 | oxrdf = { workspace = true } 11 | unic-char-range = "0.9.0" 12 | tracing = { workspace = true } 13 | thiserror = { workspace = true } 14 | nom = { workspace = true, features = ["alloc"] } 15 | pyo3.workspace = true 16 | walkdir.workspace = true 17 | -------------------------------------------------------------------------------- /lib/templates/src/compatible.rs: -------------------------------------------------------------------------------- 1 | // pub fn validate_compatible_ptypes(argument_ptype:&PType, signature_ptype:&PType) -> bool { 2 | // match argument_ptype { 3 | // PType::None => { 4 | // true 5 | // } 6 | // PType::Basic(argument_basic) => { 7 | // match signature_ptype { 8 | // PType::None => true, 9 | // PType::Basic(signature_basic) => { 10 | // if argument_basic.as_str() == OTTR_IRI { 11 | // 12 | // } 13 | // } 14 | // PType::Lub(_) => {} 15 | // PType::List(_) => {} 16 | // PType::NEList(_) => {} 17 | // } 18 | // } 19 | // PType::Lub(argument_inner) | PType::List(argument_inner) | PType::NEList(argument_inner) => { 20 | // 21 | // } 22 | // } 23 | // } 24 | -------------------------------------------------------------------------------- /lib/templates/src/constants.rs: -------------------------------------------------------------------------------- 1 | pub const OTTR_TRIPLE: &str = "http://ns.ottr.xyz/0.4/Triple"; 2 | pub const OTTR_PREFIX: &str = "ottr"; 3 | pub const OTTR_PREFIX_IRI: &str = "http://ns.ottr.xyz/0.4/"; 4 | pub const RDF_PREFIX: &str = "rdf"; 5 | pub const RDF_PREFIX_IRI: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; 6 | pub const RDFS_PREFIX: &str = "rdfs"; 7 | pub const RDFS_PREFIX_IRI: &str = "http://www.w3.org/2000/01/rdf-schema#"; 8 | pub const XSD_PREFIX: &str = "xsd"; 9 | pub const XSD_PREFIX_IRI: &str = "http://www.w3.org/2001/XMLSchema#"; 10 | pub const DEFAULT_PREFIX: &str = "urn:maplib_default:"; 11 | pub const OTTR_IRI: &str = "http://ns.ottr.xyz/0.4/IRI"; 12 | pub const OTTR_BLANK_NODE: &str = "http://ns.ottr.xyz/0.4/BlankNode"; 13 | 14 | pub const OWL_PREFIX: &str = "owl"; 15 | pub const OWL_PREFIX_IRI: &str = "http://www.w3.org/2002/07/owl#"; 16 | 17 | pub const SHACL_PREFIX: &str = "sh"; 18 | 19 | pub const SHACL_PREFIX_IRI: &str = "http://www.w3.org/ns/shacl#"; 20 | 21 | pub const XSD_ANY_URI: &str = "http://www.w3.org/2001/XMLSchema#anyURI"; 22 | -------------------------------------------------------------------------------- /lib/templates/src/dataset/errors.rs: -------------------------------------------------------------------------------- 1 | use crate::resolver::ResolutionError; 2 | use oxrdf::Variable; 3 | use std::fmt::{Display, Formatter}; 4 | use std::io; 5 | use thiserror::Error; 6 | 7 | #[derive(Error, Debug)] 8 | pub enum TemplateError { 9 | InconsistentNumberOfArguments(String, String, usize, usize), 10 | IncompatibleTypes(String, Variable, String, String), 11 | ReadTemplateFileError(io::Error), 12 | ResolveDirectoryEntryError(walkdir::Error), 13 | ParsingError(crate::parsing::errors::ParsingError), 14 | ResolutionError(ResolutionError), 15 | TemplateNotFound(String, String), 16 | } 17 | 18 | impl Display for TemplateError { 19 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 20 | match &self { 21 | TemplateError::InconsistentNumberOfArguments(calling, template, given, expected) => { 22 | write!( 23 | f, 24 | "Template {calling} called {template} with {given} arguments, but expected {expected}" 25 | ) 26 | } 27 | TemplateError::IncompatibleTypes(nn, var, given, expected) => { 28 | write!( 29 | f, 30 | "Template {nn} variable {var} was given argument of type {given:?} but expected {expected:?}" 31 | ) 32 | } 33 | TemplateError::ReadTemplateFileError(e) => { 34 | write!(f, "Error reading template file {e}") 35 | } 36 | TemplateError::ResolveDirectoryEntryError(e) => { 37 | write!(f, "Resolve template directory entry error {e}") 38 | } 39 | TemplateError::ParsingError(p) => { 40 | write!(f, "Template parsing error: {p}") 41 | } 42 | TemplateError::ResolutionError(r) => { 43 | write!(f, "Template resolution error {r}") 44 | } 45 | TemplateError::TemplateNotFound(container, inner) => { 46 | write!( 47 | f, 48 | "Could not find template {inner} referenced from template {container}" 49 | ) 50 | } 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /lib/templates/src/document.rs: -------------------------------------------------------------------------------- 1 | use crate::ast::StottrDocument; 2 | use crate::dataset::errors::TemplateError; 3 | use crate::parsing::whole_stottr_doc; 4 | use crate::resolver::resolve_document; 5 | use std::fs::read_to_string; 6 | use std::path::Path; 7 | 8 | pub fn document_from_str(s: &str) -> Result { 9 | let unresolved = whole_stottr_doc(s).map_err(TemplateError::ParsingError)?; 10 | resolve_document(unresolved).map_err(TemplateError::ResolutionError) 11 | } 12 | 13 | pub fn document_from_file>(p: P) -> Result { 14 | let s = read_to_string(p).map_err(TemplateError::ReadTemplateFileError)?; 15 | document_from_str(&s) 16 | } 17 | -------------------------------------------------------------------------------- /lib/templates/src/lib.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | #[macro_use] 3 | extern crate unic_char_range; 4 | use crate::ast::PType; 5 | use representation::RDFNodeState; 6 | 7 | pub mod ast; 8 | mod compatible; 9 | pub mod constants; 10 | pub mod dataset; 11 | pub mod document; 12 | mod parsing; 13 | pub mod python; 14 | mod resolver; 15 | pub mod subtypes_ext; 16 | 17 | #[derive(Clone, Debug)] 18 | pub enum MappingColumnType { 19 | Flat(RDFNodeState), 20 | Nested(Box), 21 | } 22 | 23 | impl MappingColumnType { 24 | pub fn as_ptype(&self) -> PType { 25 | match self { 26 | MappingColumnType::Flat(f) => PType::from(f), 27 | MappingColumnType::Nested(n) => PType::List(Box::new(n.as_ptype())), 28 | } 29 | } 30 | } 31 | 32 | impl PartialEq for MappingColumnType { 33 | fn eq(&self, other: &Self) -> bool { 34 | match self { 35 | MappingColumnType::Flat(state_self) => { 36 | if let MappingColumnType::Flat(state_other) = other { 37 | state_self.types_equal(state_other) 38 | } else { 39 | false 40 | } 41 | } 42 | MappingColumnType::Nested(t) => { 43 | if let MappingColumnType::Nested(t_other) = other { 44 | t.eq(t_other) 45 | } else { 46 | false 47 | } 48 | } 49 | } 50 | } 51 | 52 | fn ne(&self, other: &Self) -> bool { 53 | !self.eq(other) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /lib/templates/src/parsing.rs: -------------------------------------------------------------------------------- 1 | use crate::parsing::errors::{ParsingError, ParsingErrorKind}; 2 | use crate::parsing::nom_parsing::stottr_doc; 3 | use crate::parsing::parsing_ast::UnresolvedStottrDocument; 4 | use nom::Finish; 5 | 6 | pub mod errors; 7 | mod nom_parsing; 8 | mod parser_test; 9 | pub mod parsing_ast; 10 | 11 | pub fn whole_stottr_doc(s: &str) -> Result { 12 | let result = stottr_doc(s).finish(); 13 | match result { 14 | Ok((rest, doc)) => { 15 | if !rest.is_empty() { 16 | Err(ParsingError { 17 | kind: ParsingErrorKind::CouldNotParseEverything(rest.to_string()), 18 | }) 19 | } else { 20 | Ok(doc) 21 | } 22 | } 23 | Err(e) => Err(ParsingError { 24 | kind: ParsingErrorKind::NomParserError(format!("{:?}", e.code)), 25 | }), 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /lib/templates/src/parsing/errors.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | use std::fmt::{Display, Formatter}; 3 | 4 | #[derive(Debug)] 5 | pub enum ParsingErrorKind { 6 | CouldNotParseEverything(String), 7 | NomParserError(String), 8 | } 9 | 10 | #[derive(Debug)] 11 | pub struct ParsingError { 12 | pub(crate) kind: ParsingErrorKind, 13 | } 14 | 15 | impl Display for ParsingError { 16 | fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { 17 | match &self.kind { 18 | ParsingErrorKind::CouldNotParseEverything(s) => { 19 | write!( 20 | f, 21 | "Could not parse entire string as sttotr document, rest: {s}" 22 | ) 23 | } 24 | ParsingErrorKind::NomParserError(s) => { 25 | write!(f, "Nom parser error with code {s}") 26 | } 27 | } 28 | } 29 | } 30 | 31 | impl Error for ParsingError {} 32 | -------------------------------------------------------------------------------- /lib/templates/src/parsing/parsing_ast.rs: -------------------------------------------------------------------------------- 1 | use crate::ast::{Directive, ListExpanderType}; 2 | use oxrdf::{BlankNode, NamedNode, Variable}; 3 | 4 | #[derive(PartialEq, Debug)] 5 | pub enum UnresolvedStatement { 6 | Signature(UnresolvedSignature), 7 | Template(UnresolvedTemplate), 8 | BaseTemplate(UnresolvedBaseTemplate), 9 | Instance(UnresolvedInstance), 10 | } 11 | 12 | #[derive(PartialEq, Debug)] 13 | pub struct UnresolvedTemplate { 14 | pub signature: UnresolvedSignature, 15 | pub pattern_list: Vec, 16 | } 17 | 18 | #[derive(PartialEq, Debug)] 19 | pub struct UnresolvedBaseTemplate { 20 | pub signature: UnresolvedSignature, 21 | } 22 | 23 | #[derive(PartialEq, Debug)] 24 | pub struct UnresolvedSignature { 25 | pub template_name: ResolvesToNamedNode, 26 | pub parameter_list: Vec, 27 | pub annotation_list: Option>, 28 | } 29 | 30 | #[derive(PartialEq, Debug)] 31 | pub struct UnresolvedParameter { 32 | pub optional: bool, 33 | pub non_blank: bool, 34 | pub ptype: Option, 35 | pub variable: Variable, 36 | pub default_value: Option, 37 | } 38 | 39 | #[derive(PartialEq, Debug)] 40 | pub enum UnresolvedPType { 41 | Basic(ResolvesToNamedNode), 42 | Lub(Box), 43 | List(Box), 44 | NEList(Box), 45 | } 46 | 47 | #[derive(PartialEq, Debug)] 48 | pub struct UnresolvedDefaultValue { 49 | pub constant_term: UnresolvedConstantTerm, 50 | } 51 | 52 | #[derive(PartialEq, Debug)] 53 | pub enum UnresolvedConstantTerm { 54 | Constant(UnresolvedConstantLiteral), 55 | ConstantList(Vec), 56 | } 57 | 58 | #[derive(PartialEq, Debug)] 59 | pub enum UnresolvedConstantLiteral { 60 | Iri(ResolvesToNamedNode), 61 | BlankNode(BlankNode), 62 | Literal(UnresolvedStottrLiteral), 63 | None, 64 | } 65 | 66 | #[derive(PartialEq, Debug)] 67 | pub struct UnresolvedStottrLiteral { 68 | pub value: String, 69 | pub language: Option, 70 | pub data_type_iri: Option, 71 | } 72 | 73 | #[derive(PartialEq, Debug)] 74 | pub struct PrefixedName { 75 | pub prefix: String, 76 | pub name: String, 77 | } 78 | 79 | #[derive(PartialEq, Debug)] 80 | pub enum ResolvesToNamedNode { 81 | PrefixedName(PrefixedName), 82 | NamedNode(NamedNode), 83 | } 84 | 85 | #[derive(PartialEq, Debug)] 86 | pub struct UnresolvedInstance { 87 | pub list_expander: Option, 88 | pub template_name: ResolvesToNamedNode, 89 | pub argument_list: Vec, 90 | } 91 | 92 | #[derive(PartialEq, Debug)] 93 | pub struct UnresolvedAnnotation { 94 | pub instance: UnresolvedInstance, 95 | } 96 | 97 | #[derive(PartialEq, Debug)] 98 | pub struct UnresolvedArgument { 99 | pub list_expand: bool, 100 | pub term: UnresolvedStottrTerm, 101 | } 102 | 103 | #[derive(PartialEq, Debug)] 104 | pub enum UnresolvedStottrTerm { 105 | Variable(Variable), 106 | ConstantTerm(UnresolvedConstantTerm), 107 | List(Vec), 108 | } 109 | 110 | #[derive(PartialEq, Debug)] 111 | pub struct UnresolvedStottrDocument { 112 | pub directives: Vec, 113 | pub statements: Vec, 114 | } 115 | -------------------------------------------------------------------------------- /lib/templates/src/subtypes_ext.rs: -------------------------------------------------------------------------------- 1 | use crate::ast::ptype_is_possibly_literal; 2 | use oxrdf::NamedNodeRef; 3 | use representation::subtypes::is_literal_subtype; 4 | 5 | pub fn is_literal_subtype_ext(s: NamedNodeRef, t: NamedNodeRef) -> bool { 6 | if !ptype_is_possibly_literal(s) || !ptype_is_possibly_literal(t) { 7 | false 8 | } else { 9 | is_literal_subtype(s, t) 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /lib/triplestore/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "triplestore" 3 | version = "0.5.0" 4 | edition = "2021" 5 | rust-version.workspace = true 6 | 7 | [features] 8 | pyo3 = [ "dep:pyo3", "utils/pyo3" ] 9 | 10 | [dependencies] 11 | utils = { path = "../utils" } 12 | query_processing = { path = "../query_processing" } 13 | representation = { path = "../representation" } 14 | file_io = { path = "../file_io" } 15 | spargebra = { path = "../spargebra" } 16 | fts = {path ="../fts"} 17 | 18 | rayon.workspace = true 19 | sprs = { workspace = true, features = ["multi_thread"] } 20 | oxrdf.workspace = true 21 | polars = { workspace = true, features = [ 22 | "ipc", "range", "rank", 23 | "is_unique","merge_sorted", "new_streaming", "zip_with", "nightly", "performant", "cse", "semi_anti_join", "abs", "round_series", "lazy", "concat_str", "is_in", 24 | "dtype-full", "strings", "rows", "timezones", "polars-time", "temporal", "list_eval", "partition_by", "parquet", 25 | "diagonal_concat", "cum_agg"], default-features = false } 26 | polars-core = {workspace = true, default-features = false} 27 | uuid.workspace = true 28 | thiserror.workspace = true 29 | oxrdfio.workspace = true 30 | oxttl.workspace = true 31 | memmap2.workspace = true 32 | sparesults.workspace = true 33 | tracing.workspace = true 34 | 35 | pyo3 = { workspace = true, optional = true } 36 | 37 | itoa.workspace = true 38 | ryu.workspace = true 39 | -------------------------------------------------------------------------------- /lib/triplestore/src/cats.rs: -------------------------------------------------------------------------------- 1 | use super::Triplestore; 2 | use representation::cats::CatTriples; 3 | 4 | impl Triplestore { 5 | pub fn globalize(&mut self, cat_triples: Vec) -> Vec { 6 | let mut mutcat = self.global_cats.write().unwrap(); 7 | let cat_triples = { 8 | let cat_triples = mutcat.globalize(cat_triples); 9 | mutcat.encode_predicates(&cat_triples); 10 | cat_triples 11 | }; 12 | cat_triples 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /lib/triplestore/src/errors.rs: -------------------------------------------------------------------------------- 1 | use file_io::FileIOError; 2 | use fts::FtsError; 3 | use oxrdfio::RdfSyntaxError; 4 | use std::io; 5 | use std::sync::PoisonError; 6 | use thiserror::Error; 7 | 8 | #[derive(Error, Debug)] 9 | pub enum TriplestoreError { 10 | #[error("Error writing NTriples {0}")] 11 | WriteNTriplesError(String), 12 | #[error("Path {0} does not exist")] 13 | PathDoesNotExist(String), 14 | #[error("Error removing file {0}")] 15 | RemoveFileError(io::Error), 16 | #[error("Creating folder resulted in an error: {0}")] 17 | FolderCreateIOError(io::Error), 18 | #[error("Read caching directory error {0}")] 19 | ReadCachingDirectoryError(io::Error), 20 | #[error("Read caching directory entry error {0}")] 21 | ReadCachingDirectoryEntryError(io::Error), 22 | #[error("{0}")] 23 | RDFSyntaxError(RdfSyntaxError), 24 | #[error("Read triples file error {0}")] 25 | ReadTriplesFileError(io::Error), 26 | #[error("Invalid base iri {0}")] 27 | InvalidBaseIri(String), 28 | #[error("Error subtracting from transient triples {0}")] 29 | SubtractTransientTriplesError(String), 30 | #[error("RDFS Class inheritance error {0}")] 31 | RDFSClassInheritanceError(String), 32 | #[error("Indexing error {0}")] 33 | IndexingError(String), 34 | #[error("IPC IO error: {0}")] 35 | IPCIOError(String), 36 | #[error("Parquet IO error: {0}")] 37 | FileIOError(FileIOError), 38 | #[error("Full text search error {0}")] 39 | FtsError(FtsError), 40 | #[error("Graph does not exist: {0}")] 41 | GraphDoesNotExist(String), 42 | #[error("A lock was open when a thread crashed, cannot guarantee data consistency")] 43 | PoisonedLockError, 44 | } 45 | 46 | impl From> for TriplestoreError { 47 | fn from(_: PoisonError) -> Self { 48 | Self::PoisonedLockError 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /lib/triplestore/src/io_funcs.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::TriplestoreError; 2 | use std::fs::create_dir; 3 | use std::path::Path; 4 | 5 | pub(crate) fn create_folder_if_not_exists(path: &Path) -> Result<(), TriplestoreError> { 6 | if !path.exists() { 7 | create_dir(path).map_err(TriplestoreError::FolderCreateIOError)?; 8 | } 9 | Ok(()) 10 | } 11 | -------------------------------------------------------------------------------- /lib/triplestore/src/native_parquet_write.rs: -------------------------------------------------------------------------------- 1 | use super::{StoredBaseRDFNodeType, Triplestore}; 2 | use crate::errors::TriplestoreError; 3 | use file_io::{property_to_filename, write_parquet}; 4 | use polars::prelude::ParquetCompression; 5 | use representation::dataset::NamedGraph; 6 | use std::path::Path; 7 | use std::time::Instant; 8 | use tracing::{debug, instrument}; 9 | 10 | impl Triplestore { 11 | #[instrument(skip_all)] 12 | pub fn write_native_parquet( 13 | &mut self, 14 | path: &Path, 15 | graph: &NamedGraph, 16 | ) -> Result<(), TriplestoreError> { 17 | self.check_graph_exists(graph)?; 18 | let now = Instant::now(); 19 | if !path.exists() { 20 | return Err(TriplestoreError::PathDoesNotExist( 21 | path.to_str().unwrap().to_string(), 22 | )); 23 | } 24 | let path_buf = path.to_path_buf(); 25 | 26 | for (property, tts) in self.graph_triples_map.get_mut(graph).unwrap() { 27 | for ((_rdf_node_type_s, rdf_node_type_o), tt) in tts { 28 | let filename; 29 | if let StoredBaseRDFNodeType::Literal(literal_type) = rdf_node_type_o { 30 | filename = format!( 31 | "{}_{}", 32 | property_to_filename(property.as_str()), 33 | property_to_filename(literal_type.as_str()) 34 | ); 35 | } else { 36 | filename = format!( 37 | "{}_object_property", 38 | property_to_filename(property.as_str()), 39 | ) 40 | } 41 | let file_path = path_buf.clone(); 42 | 43 | for (i, (lf, _)) in tt.get_lazy_frames(&None, &None)?.into_iter().enumerate() { 44 | let filename = format!("{filename}_part_{i}.parquet"); 45 | let mut file_path = file_path.clone(); 46 | file_path.push(filename); 47 | write_parquet( 48 | &mut lf.collect().unwrap(), 49 | file_path.as_path(), 50 | ParquetCompression::default(), 51 | ) 52 | .map_err(TriplestoreError::FileIOError)? 53 | } 54 | } 55 | } 56 | debug!( 57 | elapsed = now.elapsed().as_secs_f64(), 58 | "Writing native parquet" 59 | ); 60 | Ok(()) 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /lib/triplestore/src/rdfs_inferencing.rs: -------------------------------------------------------------------------------- 1 | use super::Triplestore; 2 | use crate::errors::TriplestoreError; 3 | use oxrdf::vocab::rdf; 4 | use representation::dataset::NamedGraph; 5 | 6 | const SUBCLASS_INFERENCING: &str = r#" 7 | PREFIX rdfs: 8 | CONSTRUCT { ?a a ?b } 9 | WHERE { 10 | ?a a ?c . 11 | ?c rdfs:subClassOf+ ?b . 12 | } 13 | "#; 14 | 15 | impl Triplestore { 16 | pub fn rdfs_class_inheritance(&mut self, graph: &NamedGraph) -> Result<(), TriplestoreError> { 17 | self.insert(SUBCLASS_INFERENCING, &None, true, false, false, graph) 18 | .map_err(|x| TriplestoreError::RDFSClassInheritanceError(x.to_string()))?; 19 | Ok(()) 20 | } 21 | 22 | pub fn drop_rdfs_class_inheritance( 23 | &mut self, 24 | graph: &NamedGraph, 25 | ) -> Result<(), TriplestoreError> { 26 | if let Some(t) = self.graph_transient_triples_map.get_mut(graph) { 27 | t.remove(&rdf::TYPE.into_owned()); 28 | } 29 | Ok(()) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /lib/triplestore/src/sparql/errors.rs: -------------------------------------------------------------------------------- 1 | use std::sync::PoisonError; 2 | 3 | use crate::errors::TriplestoreError; 4 | use fts::FtsError; 5 | use query_processing::errors::QueryProcessingError; 6 | use representation::RDFNodeState; 7 | use spargebra::SparqlSyntaxError; 8 | use thiserror::Error; 9 | 10 | #[derive(Error, Debug)] 11 | pub enum SparqlError { 12 | #[error("SPARQL parsing error: {0}")] 13 | ParseError(SparqlSyntaxError), 14 | #[error("Query type not supported")] 15 | QueryTypeNotSupported, 16 | #[error("Inconsistent datatypes for {}, {:?}, {:?} in context {}", .0, .1, .2, .3)] 17 | InconsistentDatatypes(String, RDFNodeState, RDFNodeState, String), 18 | #[error(transparent)] 19 | QueryProcessingError(#[from] QueryProcessingError), 20 | #[error(transparent)] 21 | TriplestoreError(#[from] TriplestoreError), 22 | #[error("Construct query with undefined variable {}", .0)] 23 | ConstructWithUndefinedVariable(String), 24 | #[error("Full text search lookup error: {}", .0)] 25 | FtsLookupError(#[from] FtsError), 26 | #[error("Query interrupted via signal")] 27 | InterruptSignal, 28 | #[error("A lock was open when a thread crashed, cannot guarantee data constitency")] 29 | PoisonedLockError, 30 | } 31 | 32 | impl From> for SparqlError { 33 | fn from(_: PoisonError) -> Self { 34 | Self::PoisonedLockError 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /lib/triplestore/src/sparql/lazy_aggregate.rs: -------------------------------------------------------------------------------- 1 | use super::{QuerySettings, Triplestore}; 2 | use crate::sparql::errors::SparqlError; 3 | use oxrdf::Variable; 4 | 5 | use query_processing::aggregates::{ 6 | avg, count_with_expression, count_without_expression, group_concat, list_aggregation, max, min, 7 | sample, sum, AggregateReturn, 8 | }; 9 | use query_processing::constants::LIST_AGGREGATION; 10 | use representation::dataset::QueryGraph; 11 | use representation::query_context::{Context, PathEntry}; 12 | use representation::solution_mapping::{EagerSolutionMappings, SolutionMappings}; 13 | use spargebra::algebra::{AggregateExpression, AggregateFunction}; 14 | use std::collections::HashMap; 15 | 16 | impl Triplestore { 17 | pub fn sparql_aggregate_expression_as_lazy_column_and_expression( 18 | &self, 19 | variable: &Variable, 20 | aggregate_expression: &AggregateExpression, 21 | solution_mappings: SolutionMappings, 22 | context: &Context, 23 | parameters: &Option>, 24 | query_settings: &QuerySettings, 25 | dataset: &QueryGraph, 26 | ) -> Result { 27 | let output_solution_mappings; 28 | let mut out_expr; 29 | let column_context; 30 | let out_rdf_node_type; 31 | match aggregate_expression { 32 | AggregateExpression::CountSolutions { distinct } => { 33 | output_solution_mappings = solution_mappings; 34 | column_context = None; 35 | (out_expr, out_rdf_node_type) = 36 | count_without_expression(*distinct, &output_solution_mappings.rdf_node_types); 37 | } 38 | AggregateExpression::FunctionCall { 39 | name, 40 | expr, 41 | distinct, 42 | } => { 43 | column_context = Some(context.extension_with(PathEntry::AggregationOperation)); 44 | output_solution_mappings = self.lazy_expression( 45 | expr, 46 | solution_mappings, 47 | column_context.as_ref().unwrap(), 48 | parameters, 49 | None, 50 | query_settings, 51 | dataset, 52 | )?; 53 | match name { 54 | AggregateFunction::Count => { 55 | (out_expr, out_rdf_node_type) = 56 | count_with_expression(column_context.as_ref().unwrap(), *distinct); 57 | } 58 | AggregateFunction::Sum => { 59 | (out_expr, out_rdf_node_type) = sum( 60 | &output_solution_mappings, 61 | column_context.as_ref().unwrap(), 62 | *distinct, 63 | ); 64 | } 65 | AggregateFunction::Avg => { 66 | (out_expr, out_rdf_node_type) = avg( 67 | &output_solution_mappings, 68 | column_context.as_ref().unwrap(), 69 | *distinct, 70 | ); 71 | } 72 | AggregateFunction::Min => { 73 | (out_expr, out_rdf_node_type) = 74 | min(&output_solution_mappings, column_context.as_ref().unwrap()); 75 | } 76 | AggregateFunction::Max => { 77 | (out_expr, out_rdf_node_type) = 78 | max(&output_solution_mappings, column_context.as_ref().unwrap()); 79 | } 80 | AggregateFunction::GroupConcat { separator } => { 81 | (out_expr, out_rdf_node_type) = 82 | group_concat(column_context.as_ref().unwrap(), separator, *distinct); 83 | } 84 | AggregateFunction::Sample => { 85 | (out_expr, out_rdf_node_type) = 86 | sample(&output_solution_mappings, column_context.as_ref().unwrap()); 87 | } 88 | AggregateFunction::Custom(name) => { 89 | if name.as_str() == LIST_AGGREGATION { 90 | (out_expr, out_rdf_node_type) = list_aggregation( 91 | &output_solution_mappings, 92 | column_context.as_ref().unwrap(), 93 | ); 94 | } else { 95 | unimplemented!("Custom aggregation {}", name); 96 | } 97 | } 98 | } 99 | } 100 | } 101 | out_expr = out_expr.alias(variable.as_str()); 102 | Ok(AggregateReturn { 103 | solution_mappings: output_solution_mappings, 104 | expr: out_expr, 105 | context: column_context, 106 | rdf_node_type: out_rdf_node_type, 107 | }) 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /lib/triplestore/src/sparql/lazy_graph_patterns/distinct.rs: -------------------------------------------------------------------------------- 1 | use super::Triplestore; 2 | use crate::sparql::errors::SparqlError; 3 | use crate::sparql::QuerySettings; 4 | use query_processing::graph_patterns::distinct; 5 | use query_processing::pushdowns::Pushdowns; 6 | use representation::dataset::QueryGraph; 7 | use representation::query_context::{Context, PathEntry}; 8 | use representation::solution_mapping::{EagerSolutionMappings, SolutionMappings}; 9 | use spargebra::algebra::GraphPattern; 10 | use std::collections::HashMap; 11 | use tracing::{instrument, trace}; 12 | 13 | impl Triplestore { 14 | #[instrument(skip_all)] 15 | pub(crate) fn lazy_distinct( 16 | &self, 17 | inner: &GraphPattern, 18 | solution_mappings: Option, 19 | context: &Context, 20 | parameters: &Option>, 21 | pushdowns: Pushdowns, 22 | query_settings: &QuerySettings, 23 | dataset: &QueryGraph, 24 | ) -> Result { 25 | trace!("Processing distinct graph pattern"); 26 | let solution_mappings = self.lazy_graph_pattern( 27 | inner, 28 | solution_mappings, 29 | &context.extension_with(PathEntry::DistinctInner), 30 | parameters, 31 | pushdowns, 32 | query_settings, 33 | dataset, 34 | )?; 35 | let sm = distinct(solution_mappings)?; 36 | Ok(sm) 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /lib/triplestore/src/sparql/lazy_graph_patterns/extend.rs: -------------------------------------------------------------------------------- 1 | use super::Triplestore; 2 | use crate::sparql::errors::SparqlError; 3 | use oxrdf::Variable; 4 | use tracing::{instrument, trace}; 5 | 6 | use crate::sparql::QuerySettings; 7 | use query_processing::expressions::contains_graph_pattern; 8 | use query_processing::graph_patterns::extend; 9 | use query_processing::pushdowns::Pushdowns; 10 | use representation::dataset::QueryGraph; 11 | use representation::query_context::{Context, PathEntry}; 12 | use representation::solution_mapping::{EagerSolutionMappings, SolutionMappings}; 13 | use spargebra::algebra::{Expression, GraphPattern}; 14 | use std::collections::HashMap; 15 | 16 | impl Triplestore { 17 | #[allow(clippy::too_many_arguments)] 18 | #[instrument(skip_all)] 19 | pub(crate) fn lazy_extend( 20 | &self, 21 | inner: &GraphPattern, 22 | variable: &Variable, 23 | expression: &Expression, 24 | input_solution_mappings: Option, 25 | context: &Context, 26 | parameters: &Option>, 27 | mut pushdowns: Pushdowns, 28 | query_settings: &QuerySettings, 29 | dataset: &QueryGraph, 30 | ) -> Result { 31 | trace!("Processing extend graph pattern"); 32 | let inner_context = context.extension_with(PathEntry::ExtendInner); 33 | let expression_context = context.extension_with(PathEntry::ExtendExpression); 34 | pushdowns.remove_variable(variable); 35 | let expression_pushdowns = if contains_graph_pattern(expression) { 36 | Some(pushdowns.clone()) 37 | } else { 38 | None 39 | }; 40 | let mut output_solution_mappings = self.lazy_graph_pattern( 41 | inner, 42 | input_solution_mappings, 43 | &inner_context, 44 | parameters, 45 | pushdowns, 46 | query_settings, 47 | dataset, 48 | )?; 49 | output_solution_mappings = self.lazy_expression( 50 | expression, 51 | output_solution_mappings, 52 | &expression_context, 53 | parameters, 54 | expression_pushdowns.as_ref(), 55 | query_settings, 56 | dataset, 57 | )?; 58 | Ok(extend( 59 | output_solution_mappings, 60 | &expression_context, 61 | variable, 62 | self.global_cats.clone(), 63 | )?) 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /lib/triplestore/src/sparql/lazy_graph_patterns/filter.rs: -------------------------------------------------------------------------------- 1 | use super::Triplestore; 2 | use crate::sparql::errors::SparqlError; 3 | use tracing::{instrument, trace}; 4 | 5 | use crate::sparql::QuerySettings; 6 | use query_processing::expressions::contains_graph_pattern; 7 | use query_processing::graph_patterns::filter; 8 | use query_processing::pushdowns::Pushdowns; 9 | use representation::dataset::QueryGraph; 10 | use representation::query_context::{Context, PathEntry}; 11 | use representation::solution_mapping::{EagerSolutionMappings, SolutionMappings}; 12 | use spargebra::algebra::{Expression, GraphPattern}; 13 | use std::collections::HashMap; 14 | 15 | impl Triplestore { 16 | #[instrument(skip_all)] 17 | pub(crate) fn lazy_filter( 18 | &self, 19 | inner: &GraphPattern, 20 | expression: &Expression, 21 | input_solution_mappings: Option, 22 | context: &Context, 23 | parameters: &Option>, 24 | pushdowns: Pushdowns, 25 | query_settings: &QuerySettings, 26 | dataset: &QueryGraph, 27 | ) -> Result { 28 | trace!("Processing filter graph pattern"); 29 | let inner_context = context.extension_with(PathEntry::FilterInner); 30 | let expression_context = context.extension_with(PathEntry::FilterExpression); 31 | let expression_pushdowns = if contains_graph_pattern(expression) { 32 | Some(pushdowns.clone()) 33 | } else { 34 | None 35 | }; 36 | let mut output_solution_mappings = self.lazy_graph_pattern( 37 | inner, 38 | input_solution_mappings, 39 | &inner_context, 40 | parameters, 41 | pushdowns, 42 | query_settings, 43 | dataset, 44 | )?; 45 | output_solution_mappings = self.lazy_expression( 46 | expression, 47 | output_solution_mappings, 48 | &expression_context, 49 | parameters, 50 | expression_pushdowns.as_ref(), 51 | query_settings, 52 | dataset, 53 | )?; 54 | 55 | output_solution_mappings = filter(output_solution_mappings, &expression_context)?; 56 | 57 | Ok(output_solution_mappings) 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /lib/triplestore/src/sparql/lazy_graph_patterns/group.rs: -------------------------------------------------------------------------------- 1 | use super::Triplestore; 2 | use crate::sparql::errors::SparqlError; 3 | use oxrdf::Variable; 4 | use tracing::{instrument, trace}; 5 | 6 | use crate::sparql::QuerySettings; 7 | use polars::prelude::JoinType; 8 | use query_processing::aggregates::AggregateReturn; 9 | use query_processing::graph_patterns::{group_by, join, prepare_group_by}; 10 | use query_processing::pushdowns::Pushdowns; 11 | use representation::dataset::QueryGraph; 12 | use representation::query_context::{Context, PathEntry}; 13 | use representation::solution_mapping::{EagerSolutionMappings, SolutionMappings}; 14 | use spargebra::algebra::{AggregateExpression, GraphPattern}; 15 | use std::collections::HashMap; 16 | 17 | impl Triplestore { 18 | #[allow(clippy::too_many_arguments)] 19 | #[instrument(skip_all)] 20 | pub(crate) fn lazy_group( 21 | &self, 22 | inner: &GraphPattern, 23 | variables: &[Variable], 24 | aggregates: &[(Variable, AggregateExpression)], 25 | solution_mappings: Option, 26 | context: &Context, 27 | parameters: &Option>, 28 | mut pushdowns: Pushdowns, 29 | query_settings: &QuerySettings, 30 | dataset: &QueryGraph, 31 | ) -> Result { 32 | trace!("Processing group graph pattern"); 33 | let inner_context = context.extension_with(PathEntry::GroupInner); 34 | pushdowns.limit_to_variables(variables); 35 | pushdowns.add_graph_pattern_pushdowns(inner); 36 | let output_solution_mappings = self.lazy_graph_pattern( 37 | inner, 38 | None, 39 | &inner_context, 40 | parameters, 41 | pushdowns, 42 | query_settings, 43 | dataset, 44 | )?; 45 | let (mut output_solution_mappings, by, dummy_varname) = 46 | prepare_group_by(output_solution_mappings, variables); 47 | let mut aggregate_expressions = vec![]; 48 | let mut new_rdf_node_types = HashMap::new(); 49 | for v in variables { 50 | new_rdf_node_types.insert( 51 | v.as_str().to_string(), 52 | output_solution_mappings 53 | .rdf_node_types 54 | .get(v.as_str()) 55 | .unwrap() 56 | .clone(), 57 | ); 58 | } 59 | let mut aggregate_contexts = vec![]; 60 | for i in 0..aggregates.len() { 61 | let aggregate_context = context.extension_with(PathEntry::GroupAggregation(i as u16)); 62 | let (v, a) = aggregates.get(i).unwrap(); 63 | let AggregateReturn { 64 | solution_mappings: aggregate_solution_mappings, 65 | expr, 66 | context: c, 67 | rdf_node_type, 68 | } = self.sparql_aggregate_expression_as_lazy_column_and_expression( 69 | v, 70 | a, 71 | output_solution_mappings, 72 | &aggregate_context, 73 | parameters, 74 | query_settings, 75 | dataset, 76 | )?; 77 | output_solution_mappings = aggregate_solution_mappings; 78 | new_rdf_node_types.insert(v.as_str().to_string(), rdf_node_type); 79 | aggregate_expressions.push(expr); 80 | if let Some(c) = c { 81 | aggregate_contexts.push(c); 82 | } 83 | } 84 | output_solution_mappings.rdf_node_types = new_rdf_node_types; 85 | let mut grouped = group_by( 86 | output_solution_mappings, 87 | aggregate_expressions, 88 | by, 89 | dummy_varname, 90 | )?; 91 | for a in aggregate_contexts { 92 | grouped.rdf_node_types.remove(a.as_str()); 93 | } 94 | let solution_mappings = if let Some(solution_mappings) = solution_mappings { 95 | join( 96 | solution_mappings, 97 | grouped, 98 | JoinType::Inner, 99 | self.global_cats.clone(), 100 | )? 101 | } else { 102 | grouped 103 | }; 104 | Ok(solution_mappings) 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /lib/triplestore/src/sparql/lazy_graph_patterns/left_join.rs: -------------------------------------------------------------------------------- 1 | use super::Triplestore; 2 | use crate::sparql::errors::SparqlError; 3 | use tracing::{instrument, trace}; 4 | 5 | use crate::sparql::QuerySettings; 6 | use polars::prelude::{col, JoinType}; 7 | use query_processing::expressions::contains_graph_pattern; 8 | use query_processing::graph_patterns::{filter, join}; 9 | use query_processing::pushdowns::Pushdowns; 10 | use representation::dataset::QueryGraph; 11 | use representation::query_context::{Context, PathEntry}; 12 | use representation::solution_mapping::{EagerSolutionMappings, SolutionMappings}; 13 | use spargebra::algebra::{Expression, GraphPattern}; 14 | use std::collections::HashMap; 15 | 16 | impl Triplestore { 17 | #[allow(clippy::too_many_arguments)] 18 | #[instrument(skip_all)] 19 | pub fn lazy_left_join( 20 | &self, 21 | left: &GraphPattern, 22 | right: &GraphPattern, 23 | expression: &Option, 24 | solution_mappings: Option, 25 | context: &Context, 26 | parameters: &Option>, 27 | mut pushdowns: Pushdowns, 28 | query_settings: &QuerySettings, 29 | dataset: &QueryGraph, 30 | ) -> Result { 31 | trace!("Processing left join graph pattern"); 32 | let left_context = context.extension_with(PathEntry::LeftJoinLeftSide); 33 | let right_context = context.extension_with(PathEntry::LeftJoinRightSide); 34 | let expression_context = context.extension_with(PathEntry::LeftJoinExpression); 35 | let left_solution_mappings = self.lazy_graph_pattern( 36 | left, 37 | solution_mappings, 38 | &left_context, 39 | parameters, 40 | pushdowns.clone(), 41 | query_settings, 42 | dataset, 43 | )?; 44 | 45 | pushdowns.add_graph_pattern_pushdowns(right); 46 | 47 | let expression_pushdowns = if let Some(expr) = expression { 48 | pushdowns.add_filter_variable_pushdowns(expr, None); 49 | if contains_graph_pattern(expr) { 50 | Some(pushdowns.clone()) 51 | } else { 52 | None 53 | } 54 | } else { 55 | None 56 | }; 57 | 58 | let mut right_solution_mappings = self.lazy_graph_pattern( 59 | right, 60 | Some(left_solution_mappings.clone()), 61 | &right_context, 62 | parameters, 63 | pushdowns, 64 | query_settings, 65 | dataset, 66 | )?; 67 | 68 | if let Some(expr) = expression { 69 | right_solution_mappings = self.lazy_expression( 70 | expr, 71 | right_solution_mappings, 72 | &expression_context, 73 | parameters, 74 | expression_pushdowns.as_ref(), 75 | query_settings, 76 | dataset, 77 | )?; 78 | right_solution_mappings = filter(right_solution_mappings, &expression_context)?; 79 | //The following is a workaround: 80 | let keep_cols: Vec<_> = right_solution_mappings 81 | .rdf_node_types 82 | .keys() 83 | .filter(|x| x.as_str() != expression_context.as_str()) 84 | .map(|x| col(x)) 85 | .collect(); 86 | right_solution_mappings.mappings = right_solution_mappings.mappings.select(keep_cols); 87 | right_solution_mappings 88 | .rdf_node_types 89 | .remove(expression_context.as_str()); 90 | //right_solution_mappings = 91 | // drop_inner_contexts(right_solution_mappings, &vec![&expression_context]); 92 | } 93 | let left_solution_mappings = join( 94 | left_solution_mappings, 95 | right_solution_mappings, 96 | JoinType::Left, 97 | self.global_cats.clone(), 98 | )?; 99 | Ok(left_solution_mappings) 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /lib/triplestore/src/sparql/lazy_graph_patterns/minus.rs: -------------------------------------------------------------------------------- 1 | use super::Triplestore; 2 | use crate::sparql::errors::SparqlError; 3 | use tracing::{instrument, trace}; 4 | 5 | use crate::sparql::QuerySettings; 6 | use query_processing::graph_patterns::minus; 7 | use query_processing::pushdowns::Pushdowns; 8 | use representation::dataset::QueryGraph; 9 | use representation::query_context::{Context, PathEntry}; 10 | use representation::solution_mapping::{EagerSolutionMappings, SolutionMappings}; 11 | use spargebra::algebra::GraphPattern; 12 | use std::collections::HashMap; 13 | 14 | impl Triplestore { 15 | #[instrument(skip_all)] 16 | pub(crate) fn lazy_minus( 17 | &self, 18 | left: &GraphPattern, 19 | right: &GraphPattern, 20 | solution_mappings: Option, 21 | context: &Context, 22 | parameters: &Option>, 23 | pushdowns: Pushdowns, 24 | query_settings: &QuerySettings, 25 | dataset: &QueryGraph, 26 | ) -> Result { 27 | trace!("Processing minus graph pattern"); 28 | let left_context = context.extension_with(PathEntry::MinusLeftSide); 29 | let right_context = context.extension_with(PathEntry::MinusRightSide); 30 | let left_solution_mappings = self.lazy_graph_pattern( 31 | left, 32 | solution_mappings.clone(), 33 | &left_context, 34 | parameters, 35 | pushdowns, 36 | query_settings, 37 | dataset, 38 | )?; 39 | 40 | let right_solution_mappings = self.lazy_graph_pattern( 41 | right, 42 | solution_mappings, 43 | &right_context, 44 | parameters, 45 | Pushdowns::new(), 46 | query_settings, 47 | dataset, 48 | )?; 49 | 50 | Ok(minus(left_solution_mappings, right_solution_mappings)?) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /lib/triplestore/src/sparql/lazy_graph_patterns/order_by.rs: -------------------------------------------------------------------------------- 1 | use super::Triplestore; 2 | use crate::sparql::errors::SparqlError; 3 | use tracing::{instrument, trace}; 4 | 5 | use crate::sparql::QuerySettings; 6 | use polars::prelude::by_name; 7 | use query_processing::graph_patterns::order_by; 8 | use query_processing::pushdowns::Pushdowns; 9 | use representation::dataset::QueryGraph; 10 | use representation::query_context::{Context, PathEntry}; 11 | use representation::solution_mapping::{EagerSolutionMappings, SolutionMappings}; 12 | use spargebra::algebra::{GraphPattern, OrderExpression}; 13 | use std::collections::HashMap; 14 | 15 | impl Triplestore { 16 | #[instrument(skip_all)] 17 | pub(crate) fn lazy_order_by( 18 | &self, 19 | inner: &GraphPattern, 20 | expression: &[OrderExpression], 21 | solution_mappings: Option, 22 | context: &Context, 23 | parameters: &Option>, 24 | pushdowns: Pushdowns, 25 | query_settings: &QuerySettings, 26 | dataset: &QueryGraph, 27 | ) -> Result { 28 | trace!("Processing order by graph pattern"); 29 | let mut output_solution_mappings = self.lazy_graph_pattern( 30 | inner, 31 | solution_mappings, 32 | &context.extension_with(PathEntry::OrderByInner), 33 | parameters, 34 | pushdowns, 35 | query_settings, 36 | dataset, 37 | )?; 38 | 39 | let order_expression_contexts: Vec = (0..expression.len()) 40 | .map(|i| context.extension_with(PathEntry::OrderByExpression(i as u16))) 41 | .collect(); 42 | let mut asc_ordering = vec![]; 43 | let mut inner_contexts = vec![]; 44 | for i in 0..expression.len() { 45 | let (ordering_solution_mappings, reverse, inner_context) = self.lazy_order_expression( 46 | expression.get(i).unwrap(), 47 | output_solution_mappings, 48 | order_expression_contexts.get(i).unwrap(), 49 | parameters, 50 | query_settings, 51 | dataset, 52 | )?; 53 | output_solution_mappings = ordering_solution_mappings; 54 | inner_contexts.push(inner_context); 55 | asc_ordering.push(reverse); 56 | } 57 | let sort_columns: Vec<_> = inner_contexts 58 | .iter() 59 | .map(|x| x.as_str().to_string()) 60 | .collect(); 61 | output_solution_mappings = order_by( 62 | output_solution_mappings, 63 | &sort_columns, 64 | asc_ordering, 65 | self.global_cats.clone(), 66 | )?; 67 | output_solution_mappings.mappings = output_solution_mappings 68 | .mappings 69 | .drop(by_name(sort_columns, true)); 70 | Ok(output_solution_mappings) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /lib/triplestore/src/sparql/lazy_graph_patterns/project.rs: -------------------------------------------------------------------------------- 1 | use super::Triplestore; 2 | use crate::sparql::errors::SparqlError; 3 | use oxrdf::Variable; 4 | use tracing::{instrument, trace}; 5 | 6 | use crate::sparql::QuerySettings; 7 | use polars::prelude::JoinType; 8 | use query_processing::graph_patterns::{join, project}; 9 | use query_processing::pushdowns::Pushdowns; 10 | use representation::dataset::QueryGraph; 11 | use representation::query_context::{Context, PathEntry}; 12 | use representation::solution_mapping::{EagerSolutionMappings, SolutionMappings}; 13 | use spargebra::algebra::GraphPattern; 14 | use std::collections::HashMap; 15 | 16 | impl Triplestore { 17 | #[instrument(skip_all)] 18 | pub(crate) fn lazy_project( 19 | &self, 20 | inner: &GraphPattern, 21 | variables: &Vec, 22 | solution_mappings: Option, 23 | context: &Context, 24 | parameters: &Option>, 25 | mut pushdowns: Pushdowns, 26 | query_settings: &QuerySettings, 27 | dataset: &QueryGraph, 28 | ) -> Result { 29 | trace!("Processing project graph pattern"); 30 | let inner_context = context.extension_with(PathEntry::ProjectInner); 31 | pushdowns.limit_to_variables(variables); 32 | pushdowns.add_graph_pattern_pushdowns(inner); 33 | let mut project_solution_mappings = self.lazy_graph_pattern( 34 | inner, 35 | None, 36 | &inner_context, 37 | parameters, 38 | pushdowns, 39 | query_settings, 40 | dataset, 41 | )?; 42 | project_solution_mappings = project(project_solution_mappings, variables)?; 43 | let solution_mappings = if let Some(solution_mappings) = solution_mappings { 44 | join( 45 | solution_mappings, 46 | project_solution_mappings, 47 | JoinType::Inner, 48 | self.global_cats.clone(), 49 | )? 50 | } else { 51 | project_solution_mappings 52 | }; 53 | 54 | Ok(solution_mappings) 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /lib/triplestore/src/sparql/lazy_graph_patterns/pvalues.rs: -------------------------------------------------------------------------------- 1 | use super::Triplestore; 2 | use crate::sparql::errors::SparqlError; 3 | use oxrdf::Variable; 4 | use polars::prelude::JoinType; 5 | 6 | use query_processing::graph_patterns::join; 7 | use query_processing::pushdowns::Pushdowns; 8 | use representation::query_context::Context; 9 | use representation::solution_mapping::{EagerSolutionMappings, SolutionMappings}; 10 | use std::collections::{HashMap, HashSet}; 11 | 12 | impl Triplestore { 13 | pub(crate) fn lazy_pvalues( 14 | &self, 15 | solution_mappings: Option, 16 | variables: &[Variable], 17 | bindings_name: &String, 18 | _context: &Context, 19 | parameters: &Option>, 20 | _pushdowns: Pushdowns, 21 | ) -> Result { 22 | //Todo: apply pushdowns. 23 | let sm = if let Some(parameters) = parameters { 24 | if let Some(EagerSolutionMappings { 25 | mappings, 26 | rdf_node_types, 27 | }) = parameters.get(bindings_name) 28 | { 29 | let mapping_vars: HashSet<_> = mappings 30 | .get_column_names() 31 | .into_iter() 32 | .map(|x| x.as_str()) 33 | .collect(); 34 | let expected_vars: HashSet<_> = variables.iter().map(|x| x.as_str()).collect(); 35 | if mapping_vars != expected_vars { 36 | todo!("Handle mismatching variables in PValues") 37 | } 38 | EagerSolutionMappings::new(mappings.clone(), rdf_node_types.clone()) 39 | } else { 40 | todo!("Handle this error.. ") 41 | } 42 | } else { 43 | todo!("Handle this error") 44 | }; 45 | let cats = self.global_cats.read().unwrap(); 46 | let (sm, _) = cats.encode_solution_mappings(sm, None); 47 | if let Some(mut mappings) = solution_mappings { 48 | //TODO: Remove this workaround 49 | mappings = mappings.as_eager(false).as_lazy(); 50 | mappings = join( 51 | mappings, 52 | sm.as_lazy(), 53 | JoinType::Inner, 54 | self.global_cats.clone(), 55 | )?; 56 | Ok(mappings) 57 | } else { 58 | Ok(sm.as_lazy()) 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /lib/triplestore/src/sparql/lazy_graph_patterns/triples_ordering.rs: -------------------------------------------------------------------------------- 1 | use oxrdf::{BlankNode, Term, Variable}; 2 | use query_processing::pushdowns::Pushdowns; 3 | use representation::solution_mapping::SolutionMappings; 4 | use spargebra::term::{NamedNodePattern, TermPattern, TriplePattern}; 5 | use std::cmp::Ordering; 6 | use std::collections::{HashMap, HashSet}; 7 | 8 | pub fn order_triple_patterns( 9 | tps: &[TriplePattern], 10 | sm: &Option, 11 | pushdowns: &Pushdowns, 12 | ) -> Vec { 13 | let mut candidates = tps.to_owned(); 14 | let mut ordering = vec![]; 15 | let mut visited: HashSet<_> = if let Some(sm) = sm { 16 | sm.rdf_node_types 17 | .keys() 18 | .map(|x| x.as_str().to_string()) 19 | .collect() 20 | } else { 21 | HashSet::new() 22 | }; 23 | while !candidates.is_empty() { 24 | let tp = candidates 25 | .iter() 26 | .min_by(|t1, t2| strictly_before(t1, t2, &visited, &pushdowns.variables_values)) 27 | .unwrap(); 28 | let pos = candidates.iter().position(|x| x == tp).unwrap(); 29 | let tp = candidates.remove(pos); 30 | for v in variables(&tp) { 31 | visited.insert(v.as_str().to_string()); 32 | } 33 | for b in blank_nodes(&tp) { 34 | visited.insert(b.to_string()); 35 | } 36 | ordering.push(tp); 37 | } 38 | ordering 39 | } 40 | 41 | // Metaphor here is that quantity is cost to include, so less is better. 42 | fn strictly_before( 43 | t1: &TriplePattern, 44 | t2: &TriplePattern, 45 | visited: &HashSet, 46 | variable_values: &HashMap>, 47 | ) -> Ordering { 48 | let t1_connected = is_connected(t1, visited); 49 | let t2_connected = is_connected(t2, visited); 50 | if t1_connected && !t2_connected { 51 | return Ordering::Less; 52 | } 53 | if !t1_connected && t2_connected { 54 | return Ordering::Greater; 55 | } 56 | if let NamedNodePattern::Variable(v1) = &t1.predicate { 57 | if let NamedNodePattern::Variable(v2) = &t2.predicate { 58 | if variable_values.contains_key(v1.as_str()) 59 | && !variable_values.contains_key(v2.as_str()) 60 | { 61 | return Ordering::Less; 62 | } else if !variable_values.contains_key(v1.as_str()) 63 | && variable_values.contains_key(v2.as_str()) 64 | { 65 | return Ordering::Greater; 66 | } 67 | if visited.contains(v1.as_str()) && !visited.contains(v2.as_str()) { 68 | return Ordering::Less; 69 | } 70 | if !visited.contains(v1.as_str()) && visited.contains(v2.as_str()) { 71 | return Ordering::Greater; 72 | } 73 | //Todo find the least costly among the two 74 | } else { 75 | return Ordering::Greater; 76 | } 77 | } else if let NamedNodePattern::Variable(_) = &t2.predicate { 78 | return Ordering::Less; 79 | } 80 | 81 | // we rely on Polars to do the rest in the query optimizer. 82 | Ordering::Equal 83 | } 84 | 85 | fn is_connected(tp: &TriplePattern, visited: &HashSet) -> bool { 86 | let tp_vars = variables(tp); 87 | for v in &tp_vars { 88 | if visited.contains(v.as_str()) { 89 | return true; 90 | } 91 | } 92 | let tp_blanks = blank_nodes(tp); 93 | for b in tp_blanks { 94 | if visited.contains(&b.to_string()) { 95 | return true; 96 | } 97 | } 98 | false 99 | } 100 | 101 | fn variables(tp: &TriplePattern) -> Vec<&Variable> { 102 | let mut vs = vec![]; 103 | if let TermPattern::Variable(v) = &tp.subject { 104 | vs.push(v); 105 | } 106 | if let NamedNodePattern::Variable(v) = &tp.predicate { 107 | vs.push(v); 108 | } 109 | if let TermPattern::Variable(v) = &tp.object { 110 | vs.push(v); 111 | } 112 | vs 113 | } 114 | 115 | fn blank_nodes(tp: &TriplePattern) -> Vec<&BlankNode> { 116 | let mut bs = vec![]; 117 | if let TermPattern::BlankNode(b) = &tp.subject { 118 | bs.push(b); 119 | } 120 | if let TermPattern::BlankNode(b) = &tp.object { 121 | bs.push(b); 122 | } 123 | bs 124 | } 125 | -------------------------------------------------------------------------------- /lib/triplestore/src/sparql/lazy_graph_patterns/union.rs: -------------------------------------------------------------------------------- 1 | use super::Triplestore; 2 | use crate::sparql::errors::SparqlError; 3 | use tracing::{instrument, trace}; 4 | 5 | use crate::sparql::QuerySettings; 6 | use query_processing::graph_patterns::union; 7 | use query_processing::pushdowns::Pushdowns; 8 | use representation::dataset::QueryGraph; 9 | use representation::query_context::{Context, PathEntry}; 10 | use representation::solution_mapping::{EagerSolutionMappings, SolutionMappings}; 11 | use spargebra::algebra::GraphPattern; 12 | use std::collections::HashMap; 13 | 14 | impl Triplestore { 15 | #[instrument(skip_all)] 16 | pub(crate) fn lazy_union( 17 | &self, 18 | left: &GraphPattern, 19 | right: &GraphPattern, 20 | solution_mappings: Option, 21 | context: &Context, 22 | parameters: &Option>, 23 | mut pushdowns: Pushdowns, 24 | query_settings: &QuerySettings, 25 | dataset: &QueryGraph, 26 | ) -> Result { 27 | trace!("Processing union graph pattern"); 28 | let left_context = context.extension_with(PathEntry::UnionLeftSide); 29 | let right_context = context.extension_with(PathEntry::UnionRightSide); 30 | let mut left_pushdowns = pushdowns.clone(); 31 | left_pushdowns.add_graph_pattern_pushdowns(left); 32 | let left_solution_mappings = self.lazy_graph_pattern( 33 | left, 34 | solution_mappings.clone(), 35 | &left_context, 36 | parameters, 37 | left_pushdowns, 38 | query_settings, 39 | dataset, 40 | )?; 41 | pushdowns.add_graph_pattern_pushdowns(right); 42 | let right_solution_mappings = self.lazy_graph_pattern( 43 | right, 44 | solution_mappings, 45 | &right_context, 46 | parameters, 47 | pushdowns, 48 | query_settings, 49 | dataset, 50 | )?; 51 | 52 | let u = union( 53 | vec![left_solution_mappings, right_solution_mappings], 54 | true, 55 | self.global_cats.clone(), 56 | )?; 57 | Ok(u) 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /lib/triplestore/src/sparql/lazy_graph_patterns/values.rs: -------------------------------------------------------------------------------- 1 | use super::Triplestore; 2 | use crate::sparql::errors::SparqlError; 3 | use oxrdf::Variable; 4 | use polars::prelude::JoinType; 5 | use query_processing::graph_patterns::{join, values_pattern}; 6 | use query_processing::pushdowns::Pushdowns; 7 | use representation::query_context::Context; 8 | use representation::solution_mapping::SolutionMappings; 9 | use spargebra::term::GroundTerm; 10 | 11 | impl Triplestore { 12 | pub(crate) fn lazy_values( 13 | &self, 14 | solution_mappings: Option, 15 | variables: &[Variable], 16 | bindings: &[Vec>], 17 | _context: &Context, 18 | _pushdowns: Pushdowns, 19 | ) -> Result { 20 | let sm = values_pattern(variables, bindings); 21 | let (sm, _) = { 22 | let cats = self.global_cats.read()?; 23 | cats.encode_solution_mappings(sm, None) 24 | }; 25 | if let Some(mut mappings) = solution_mappings { 26 | mappings = join( 27 | mappings, 28 | sm.as_lazy(), 29 | JoinType::Inner, 30 | self.global_cats.clone(), 31 | )?; 32 | Ok(mappings) 33 | } else { 34 | Ok(sm.as_lazy()) 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /lib/triplestore/src/sparql/lazy_order.rs: -------------------------------------------------------------------------------- 1 | use super::{QuerySettings, Triplestore}; 2 | use crate::sparql::errors::SparqlError; 3 | 4 | use representation::dataset::QueryGraph; 5 | use representation::query_context::{Context, PathEntry}; 6 | use representation::solution_mapping::{EagerSolutionMappings, SolutionMappings}; 7 | use spargebra::algebra::OrderExpression; 8 | use std::collections::HashMap; 9 | 10 | impl Triplestore { 11 | pub fn lazy_order_expression( 12 | &self, 13 | oexpr: &OrderExpression, 14 | solution_mappings: SolutionMappings, 15 | context: &Context, 16 | parameters: &Option>, 17 | query_settings: &QuerySettings, 18 | dataset: &QueryGraph, 19 | ) -> Result<(SolutionMappings, bool, Context), SparqlError> { 20 | match oexpr { 21 | OrderExpression::Asc(expr) => { 22 | let inner_context = context.extension_with(PathEntry::OrderingOperation); 23 | Ok(( 24 | self.lazy_expression( 25 | expr, 26 | solution_mappings, 27 | &inner_context, 28 | parameters, 29 | None, 30 | query_settings, 31 | dataset, 32 | )?, 33 | true, 34 | inner_context, 35 | )) 36 | } 37 | OrderExpression::Desc(expr) => { 38 | let inner_context = context.extension_with(PathEntry::OrderingOperation); 39 | Ok(( 40 | self.lazy_expression( 41 | expr, 42 | solution_mappings, 43 | &inner_context, 44 | parameters, 45 | None, 46 | query_settings, 47 | dataset, 48 | )?, 49 | false, 50 | inner_context, 51 | )) 52 | } 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /lib/triplestore/src/sparql/rewrite.rs: -------------------------------------------------------------------------------- 1 | use crate::sparql::rewrite::rewrite_cse::rewrite_gp_cse; 2 | use crate::sparql::rewrite::rewrite_pushdown::rewrite_gp_pushdown; 3 | use spargebra::algebra::GraphPattern; 4 | use spargebra::Query; 5 | use std::collections::HashSet; 6 | 7 | mod rewrite_cse; 8 | mod rewrite_pushdown; 9 | 10 | pub fn rewrite(q: Query) -> Query { 11 | let q = match q { 12 | Query::Select { 13 | dataset, 14 | pattern, 15 | base_iri, 16 | } => Query::Select { 17 | dataset, 18 | pattern: rewrite_gp(pattern), 19 | base_iri, 20 | }, 21 | Query::Construct { 22 | template, 23 | dataset, 24 | pattern, 25 | base_iri, 26 | } => Query::Construct { 27 | template, 28 | dataset, 29 | pattern: rewrite_gp(pattern), 30 | base_iri, 31 | }, 32 | Query::Describe { 33 | dataset, 34 | pattern, 35 | base_iri, 36 | } => Query::Describe { 37 | dataset, 38 | pattern: rewrite_gp(pattern), 39 | base_iri, 40 | }, 41 | Query::Ask { 42 | dataset, 43 | pattern, 44 | base_iri, 45 | } => Query::Ask { 46 | dataset, 47 | pattern: rewrite_gp(pattern), 48 | base_iri, 49 | }, 50 | }; 51 | q 52 | } 53 | 54 | pub fn rewrite_gp(pattern: GraphPattern) -> GraphPattern { 55 | let mut gp = rewrite_gp_pushdown(pattern, vec![], HashSet::new()); 56 | gp = rewrite_gp_cse(gp); 57 | gp 58 | } 59 | -------------------------------------------------------------------------------- /lib/utils/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "utils" 3 | version = "0.1.0" 4 | edition = "2021" 5 | rust-version.workspace = true 6 | 7 | [features] 8 | pyo3 = [ "dep:pyo3" ] 9 | 10 | [dependencies] 11 | rayon = {workspace = true} 12 | polars = { workspace = true, features = ["lazy"], default-features = false } 13 | thiserror.workspace = true 14 | 15 | pyo3 = { workspace = true, optional = true } 16 | -------------------------------------------------------------------------------- /lib/utils/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod polars; 2 | -------------------------------------------------------------------------------- /lib/utils/src/polars.rs: -------------------------------------------------------------------------------- 1 | use polars::{error::PolarsError, frame::DataFrame, prelude::LazyFrame}; 2 | #[cfg(not(feature = "pyo3"))] 3 | use rayon::iter::{IntoParallelIterator, ParallelIterator}; 4 | use std::collections::HashMap; 5 | use thiserror::Error; 6 | 7 | #[cfg(feature = "pyo3")] 8 | use pyo3::Python; 9 | #[cfg(feature = "pyo3")] 10 | use std::{thread::sleep, time::Duration}; 11 | 12 | #[derive(Error, Debug)] 13 | pub enum InterruptableCollectError { 14 | #[error("interrupted via signal")] 15 | Interrupted, 16 | #[error(transparent)] 17 | Polars(#[from] PolarsError), 18 | } 19 | 20 | /// collect a Polars LazyFrame in a way that allows a KeyboardInterrupt to cancel the search 21 | pub fn pl_interruptable_collect(lf: LazyFrame) -> Result { 22 | let future = lf.collect_concurrently()?; 23 | 24 | #[cfg(feature = "pyo3")] 25 | { 26 | Python::with_gil(move |py| { 27 | // TODO: https://github.com/pola-rs/polars/issues/22513 28 | // min med avg 95% 99% 99.9% 29 | // check_signals takes 20 - 30 - 85 - 261 - 691 - 908 30 | // It's better to give it some time rather than waste it on checking signals 31 | let mut sleeptime = Duration::from_nanos(15); 32 | loop { 33 | sleep(sleeptime); 34 | match future.fetch() { 35 | None => { 36 | if py.check_signals().is_err() { 37 | // Polars has some kind of race condition and panics as it tries to tx on our dropped rx 38 | // We've already decided to bail out, so waiting a few ms for rayon to clean up or whatever should be fine 39 | // TODO: https://github.com/pola-rs/polars/issues/22515 40 | future.cancel(); 41 | sleep(Duration::from_millis(50)); 42 | break Err(InterruptableCollectError::Interrupted); 43 | } 44 | // Max delay is twice the time the query actually took 45 | sleeptime = std::cmp::min(sleeptime * 2, Duration::from_millis(50)); 46 | } 47 | Some(dfr) => break Ok(dfr?), 48 | } 49 | } 50 | }) 51 | } 52 | 53 | #[cfg(not(feature = "pyo3"))] 54 | Ok(future.fetch_blocking()?) 55 | } 56 | 57 | pub fn pl_vec_interruptable_collect( 58 | lfs: Vec, 59 | #[cfg(feature = "pyo3")] py: Python<'_>, 60 | ) -> Result, InterruptableCollectError> { 61 | // println!("Entering an interruptable collect"); 62 | 63 | let mut futures = HashMap::new(); 64 | for (i, lf) in lfs.into_iter().enumerate() { 65 | let future = lf.collect_concurrently()?; 66 | futures.insert(i, future); 67 | } 68 | 69 | #[cfg(feature = "pyo3")] 70 | { 71 | // TODO: https://github.com/pola-rs/polars/issues/22513 72 | // min med avg 95% 99% 99.9% 73 | // check_signals takes 20 - 30 - 85 - 261 - 691 - 908 74 | // It's better to give it some time rather than waste it on checking signals 75 | let mut sleeptime = Duration::from_nanos(15); 76 | let mut dfs = HashMap::new(); 77 | let mut ok_count = 0; 78 | loop { 79 | sleep(sleeptime); 80 | for (i, future) in &futures { 81 | if let std::collections::hash_map::Entry::Vacant(e) = dfs.entry(i) { 82 | match future.fetch() { 83 | None => { 84 | if py.check_signals().is_err() { 85 | // Polars has some kind of race condition and panics as it tries to tx on our dropped rx 86 | // We've already decided to bail out, so waiting a few ms for rayon to clean up or whatever should be fine 87 | // TODO: https://github.com/pola-rs/polars/issues/22515 88 | future.cancel(); 89 | sleep(Duration::from_millis(50)); 90 | return Err(InterruptableCollectError::Interrupted); 91 | } 92 | // Max delay is twice the time the query actually took 93 | } 94 | Some(dfr) => { 95 | e.insert(dfr?); 96 | ok_count += 1; 97 | } 98 | } 99 | } 100 | } 101 | if ok_count == futures.len() { 102 | break; 103 | } 104 | sleeptime = std::cmp::min(sleeptime * 2, Duration::from_millis(50)); 105 | } 106 | let results: Vec<_> = (0..dfs.len()).map(|i| dfs.remove(&i).unwrap()).collect(); 107 | Ok(results) 108 | } 109 | 110 | #[cfg(not(feature = "pyo3"))] 111 | { 112 | let dfs: Result, _> = futures 113 | .into_par_iter() 114 | .map(|(_, x)| x.fetch_blocking()) 115 | .collect(); 116 | Ok(dfs?) 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /licensing/POLARS_LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020 Ritchie Vink 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. -------------------------------------------------------------------------------- /nix/py_maplib/default.nix: -------------------------------------------------------------------------------- 1 | { 2 | src, 3 | craneLib, 4 | cargoVendorDir, 5 | lib, 6 | callPackage, 7 | buildPythonPackage, 8 | pythonImportsCheckHook, 9 | pytestCheckHook, 10 | rustPlatform, 11 | polars, 12 | pyarrow, 13 | }: 14 | let 15 | cargoName = craneLib.crateNameFromCargoToml { 16 | cargoToml = src + /py_maplib/Cargo.toml; 17 | }; 18 | in 19 | buildPythonPackage rec { 20 | pname = "maplib"; 21 | inherit (cargoName) version; 22 | pyproject = true; 23 | 24 | inherit src cargoVendorDir; 25 | 26 | nativeBuildInputs = [ 27 | # This is wrong, but there is an upstream bug with 28 | # makeRustPlatform and its buildHooks. 29 | # This probably breaks cross-compilation 30 | rustPlatform.rust.rustc 31 | rustPlatform.rust.cargo 32 | 33 | craneLib.configureCargoCommonVarsHook 34 | craneLib.configureCargoVendoredDepsHook 35 | rustPlatform.maturinBuildHook 36 | 37 | pythonImportsCheckHook 38 | # pytestCheckHook 39 | ]; 40 | 41 | propagatedBuildInputs = [ 42 | polars 43 | pyarrow 44 | ]; 45 | 46 | buildAndTestSubdir = "py_maplib"; 47 | } 48 | -------------------------------------------------------------------------------- /nix/py_maplib/pytest.nix: -------------------------------------------------------------------------------- 1 | { 2 | src, 3 | buildPythonPackage, 4 | pytestCheckHook, 5 | maplib, 6 | rdflib 7 | }: 8 | 9 | buildPythonPackage { 10 | pname = "maplib-tests"; 11 | inherit (maplib) version; 12 | format = "other"; 13 | 14 | inherit src; 15 | 16 | # dontUnpack = true; 17 | dontBuild = true; 18 | dontInstall = true; 19 | 20 | propagatedBuildInputs = [ 21 | maplib 22 | rdflib 23 | ]; 24 | 25 | nativeCheckInputs = [ 26 | pytestCheckHook 27 | ]; 28 | 29 | checkPhase = '' 30 | ls 31 | cd py_maplib/tests 32 | pytest 33 | ''; 34 | 35 | } 36 | -------------------------------------------------------------------------------- /py_maplib/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "py_maplib" 3 | version = "0.18.3" 4 | edition = "2021" 5 | 6 | [features] 7 | abi3 = [ "pyo3/abi3-py39" ] 8 | 9 | [dependencies] 10 | maplib = { path = "../lib/maplib", features = [ "pyo3" ] } 11 | triplestore = { path = "../lib/triplestore", features = [ "pyo3" ] } 12 | representation = { path = "../lib/representation" } 13 | shacl = { path = "../lib/shacl", features = [ "pyo3" ] } 14 | pydf_io = { path = "../lib/pydf_io" } 15 | report_mapping = {path = "../lib/report_mapping"} 16 | templates = {path= "../lib/templates"} 17 | cimxml = {path = "../lib/cimxml", features = [ "pyo3" ]} 18 | 19 | pyo3.workspace = true 20 | oxrdf.workspace = true 21 | oxrdfio.workspace = true 22 | thiserror.workspace = true 23 | polars = { workspace = true, features = [ 24 | "nightly", "performant", "cse", 25 | "dtype-array", "dtype-date", 26 | "dtype-datetime", "dtype-decimal", "dtype-duration", 27 | "dtype-i8", "dtype-i16", "dtype-struct", 28 | "dtype-time", "dtype-u8", "dtype-u16"], default-features = false } 29 | uuid.workspace = true 30 | chrono.workspace = true 31 | 32 | tracing.workspace = true 33 | tracing-subscriber.workspace = true 34 | 35 | [lib] 36 | name = "maplib" 37 | crate-type = ["cdylib"] 38 | 39 | # Conditional dependency specification of mimalloc and jemallocator copied from: https://github.com/pola-rs/polars/blob/main/py-polars/Cargo.toml 40 | # Licence in /licensing/POLARS_LICENSE 41 | [target.'cfg(not(target_os = "linux"))'.dependencies] 42 | mimalloc = { version = "0.1", default-features = false } 43 | 44 | [target.'cfg(target_os = "linux")'.dependencies] 45 | jemallocator = { version = "0.5.4", features = ["disable_initial_exec_tls"] } 46 | 47 | [lints.rust] 48 | unexpected_cfgs = { level = "warn", check-cfg = ['cfg(feature, values("gil-refs", "rdf-star"))'] } 49 | -------------------------------------------------------------------------------- /py_maplib/README.md: -------------------------------------------------------------------------------- 1 | This is a placeholder README.md, which is overwritten on release by ../README.md. -------------------------------------------------------------------------------- /py_maplib/maplib/.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | -------------------------------------------------------------------------------- /py_maplib/maplib/__init__.py: -------------------------------------------------------------------------------- 1 | # r''' 2 | # # Overview 3 | # 4 | # ''' 5 | 6 | __all__ = [ 7 | "Model", 8 | "a", 9 | "Triple", 10 | "SolutionMappings", 11 | "IndexingOptions", 12 | "ValidationReport", 13 | "Instance", 14 | "Template", 15 | "Argument", 16 | "Parameter", 17 | "Variable", 18 | "RDFType", 19 | "XSD", 20 | "IRI", 21 | "Literal", 22 | "Prefix", 23 | "BlankNode", 24 | "explore", 25 | "add_triples", 26 | "MaplibException", 27 | ] 28 | 29 | import pathlib 30 | from .maplib import * 31 | from .adding_triples import add_triples 32 | 33 | if (pathlib.Path(__file__).parent.resolve() / "graph_explorer").exists(): 34 | from .graph_explorer import explore 35 | else: 36 | 37 | def explore( 38 | m: "Model", 39 | host: str = "localhost", 40 | port: int = 8000, 41 | bind: str = "localhost", 42 | popup=True, 43 | fts=True, 44 | ): 45 | """Starts a graph explorer session. 46 | To run from Jupyter Notebook use: 47 | >>> from maplib import explore 48 | >>> 49 | >>> server = explore(m) 50 | You can later stop the server with 51 | >>> server.stop() 52 | 53 | :param m: The Model to explore 54 | :param host: The hostname that we will point the browser to. 55 | :param port: The port where the graph explorer webserver listens on. 56 | :param bind: Bind to the following host / ip. 57 | :param popup: Pop up the browser window. 58 | :param fts: Enable full text search indexing 59 | """ 60 | print("Contact Data Treehouse to try!") 61 | -------------------------------------------------------------------------------- /py_maplib/maplib/adding_triples.py: -------------------------------------------------------------------------------- 1 | from maplib.maplib import Model, Template, IRI, Triple, Variable 2 | 3 | 4 | def add_triples( 5 | source: Model, target: Model, source_graph: str = None, target_graph: str = None 6 | ): 7 | """(Zero) copy the triples from one Model into another. 8 | 9 | :param source: The source mapping 10 | :param target: The target mapping 11 | :param source_graph: The named graph in the source mapping to copy from. None means default graph. 12 | :param target_graph: The named graph in the target mapping to copy into. None means default graph. 13 | """ 14 | for p in source.get_predicate_iris(source_graph): 15 | subject = Variable("subject") 16 | object = Variable("object") 17 | template = Template( 18 | iri=IRI("urn:maplib:tmp"), 19 | parameters=[subject, object], 20 | instances=[Triple(subject, p, object)], 21 | ) 22 | sms = source.get_predicate(p, source_graph) 23 | for sm in sms: 24 | target.map( 25 | template, 26 | sm.mappings, 27 | types=sm.rdf_types, 28 | graph=target_graph, 29 | ) 30 | -------------------------------------------------------------------------------- /py_maplib/maplib/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTreehouse/maplib/19ad513c9c1ebd60c6a0019a5a7c255a0bfc5789/py_maplib/maplib/py.typed -------------------------------------------------------------------------------- /py_maplib/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "maplib" 3 | description = "Dataframe-based interactive knowledge graph construction" 4 | dependencies = ["polars>=0.20.13", "pyarrow>=7.0.0", "fastapi[standard]>=0.115"] 5 | readme = "README.md" 6 | authors = [{name = "Magnus Bakken", email = "magnus@data-treehouse.com" }] 7 | license = {file = "LICENSE"} 8 | requires-python = ">=3.9" 9 | keywords = ["rdf", "graph", "dataframe", "sparql", "ottr"] 10 | classifiers = [ 11 | "Development Status :: 4 - Beta", 12 | "License :: OSI Approved :: Apache Software License", 13 | "Programming Language :: Python :: 3 :: Only", 14 | "Programming Language :: Python :: 3.9", 15 | "Programming Language :: Python :: 3.10", 16 | "Programming Language :: Python :: 3.11", 17 | "Programming Language :: Rust", 18 | "Topic :: Database :: Database Engines/Servers", 19 | "Topic :: Scientific/Engineering", 20 | ] 21 | dynamic = ["version"] 22 | 23 | [project.urls] 24 | Homepage = "https://github.com/DataTreehouse/maplib" 25 | Documentation = "https://datatreehouse.github.io/maplib/maplib/maplib.html" 26 | Repository = "https://github.com/DataTreehouse/maplib" 27 | Changelog = "https://github.com/DataTreehouse/maplib/releases" 28 | 29 | [build-system] 30 | requires = ["maturin==1.9.4"] 31 | build-backend = "maturin" -------------------------------------------------------------------------------- /py_maplib/src/error.rs: -------------------------------------------------------------------------------- 1 | // Adapted from: https://raw.githubusercontent.com/pola-rs/polars/master/py-polars/src/error.rs 2 | // Original licence: 3 | // 4 | // Copyright (c) 2020 Ritchie Vink 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files (the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions: 12 | // 13 | // The above copyright notice and this permission notice shall be included in all 14 | // copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | // SOFTWARE. 23 | 24 | use maplib::errors::MaplibError; 25 | use pyo3::{create_exception, exceptions::PyException, prelude::*}; 26 | use std::fmt::Debug; 27 | use thiserror::Error; 28 | 29 | #[derive(Error, Debug)] 30 | #[allow(clippy::enum_variant_names)] 31 | pub enum PyMaplibError { 32 | #[error(transparent)] 33 | MaplibError(#[from] MaplibError), 34 | #[error("Function argument error: `{0}`")] 35 | FunctionArgumentError(String), 36 | #[error("Runtime error `{0}`")] 37 | RuntimeError(String), 38 | } 39 | 40 | impl std::convert::From for PyErr { 41 | fn from(err: PyMaplibError) -> PyErr { 42 | match &err { 43 | PyMaplibError::MaplibError(err) => MaplibException::new_err(format!("{err}")), 44 | PyMaplibError::FunctionArgumentError(s) => { 45 | FunctionArgumentException::new_err(s.clone()) 46 | } 47 | PyMaplibError::RuntimeError(s) => MaplibRuntimeError::new_err(s.clone()), 48 | } 49 | } 50 | } 51 | 52 | create_exception!(exceptions, MaplibException, PyException); 53 | create_exception!(exceptions, MaplibRuntimeError, PyException); 54 | create_exception!(exceptions, FunctionArgumentException, PyException); 55 | -------------------------------------------------------------------------------- /py_maplib/src/shacl.rs: -------------------------------------------------------------------------------- 1 | use crate::error::PyMaplibError; 2 | use crate::{fix_cats_and_multicolumns, PyModel}; 3 | use maplib::errors::MaplibError; 4 | use pydf_io::to_python::df_to_py_df; 5 | use pyo3::{pyclass, pymethods, PyObject, PyResult, Python}; 6 | use report_mapping::report_to_model; 7 | use representation::solution_mapping::EagerSolutionMappings; 8 | use shacl::ValidationReport as RustValidationReport; 9 | use std::collections::HashMap; 10 | use triplestore::Triplestore; 11 | 12 | #[derive(Clone)] 13 | #[pyclass(name = "ValidationReport")] 14 | pub struct PyValidationReport { 15 | shape_graph: Option, 16 | inner: RustValidationReport, 17 | } 18 | 19 | impl PyValidationReport { 20 | pub fn new( 21 | inner: RustValidationReport, 22 | shape_graph: Option, 23 | ) -> PyValidationReport { 24 | PyValidationReport { shape_graph, inner } 25 | } 26 | } 27 | 28 | #[pymethods] 29 | impl PyValidationReport { 30 | #[getter] 31 | pub fn conforms(&self) -> Option { 32 | self.inner.conforms 33 | } 34 | 35 | #[getter] 36 | pub fn shape_targets(&self, py: Python<'_>) -> PyResult { 37 | let df = py.allow_threads(|| self.inner.shape_targets_df()); 38 | df_to_py_df(df, HashMap::new(), None, false, py) 39 | } 40 | 41 | #[getter] 42 | pub fn performance(&self, py: Python<'_>) -> PyResult { 43 | let df = py.allow_threads(|| self.inner.performance_df()); 44 | df_to_py_df(df, HashMap::new(), None, false, py) 45 | } 46 | 47 | #[pyo3(signature = (native_dataframe=None, include_datatypes=None, streaming=None))] 48 | pub fn results( 49 | &self, 50 | native_dataframe: Option, 51 | include_datatypes: Option, 52 | streaming: Option, 53 | py: Python<'_>, 54 | ) -> PyResult> { 55 | let streaming = streaming.unwrap_or(false); 56 | let report = py.allow_threads(|| -> Result<_, PyMaplibError> { 57 | let sm = self 58 | .inner 59 | .concatenated_results() 60 | .map_err(|x| PyMaplibError::from(MaplibError::from(x)))?; 61 | let cats = self.inner.cats.as_ref().unwrap().clone(); 62 | match sm { 63 | Some(sm) => { 64 | let EagerSolutionMappings { 65 | mut mappings, 66 | mut rdf_node_types, 67 | } = sm.as_eager(streaming); 68 | (mappings, rdf_node_types) = fix_cats_and_multicolumns( 69 | mappings, 70 | rdf_node_types, 71 | native_dataframe.unwrap_or(false), 72 | cats, 73 | ); 74 | Ok(Some((mappings, rdf_node_types))) 75 | } 76 | None => Ok(None), 77 | } 78 | })?; 79 | match report { 80 | Some((mappings, rdf_node_types)) => Ok(Some(df_to_py_df( 81 | mappings, 82 | rdf_node_types, 83 | None, 84 | include_datatypes.unwrap_or(false), 85 | py, 86 | )?)), 87 | None => Ok(None), 88 | } 89 | } 90 | 91 | #[pyo3(signature = (native_dataframe=None, include_datatypes=None, streaming=None))] 92 | pub fn details( 93 | &self, 94 | native_dataframe: Option, 95 | include_datatypes: Option, 96 | streaming: Option, 97 | py: Python<'_>, 98 | ) -> PyResult> { 99 | let streaming = streaming.unwrap_or(false); 100 | let native_dataframe = native_dataframe.unwrap_or(false); 101 | let include_datatypes = include_datatypes.unwrap_or(false); 102 | let details = py.allow_threads(|| -> Result<_, PyMaplibError> { 103 | let sm = self 104 | .inner 105 | .concatenated_details() 106 | .map_err(|x| PyMaplibError::from(MaplibError::from(x)))?; 107 | let cats = self.inner.cats.as_ref().unwrap().clone(); 108 | match sm { 109 | Some(sm) => { 110 | let EagerSolutionMappings { 111 | mut mappings, 112 | mut rdf_node_types, 113 | } = sm.as_eager(streaming); 114 | (mappings, rdf_node_types) = 115 | fix_cats_and_multicolumns(mappings, rdf_node_types, native_dataframe, cats); 116 | Ok(Some((mappings, rdf_node_types))) 117 | } 118 | None => Ok(None), 119 | } 120 | })?; 121 | match details { 122 | Some((mappings, rdf_node_types)) => Ok(Some(df_to_py_df( 123 | mappings, 124 | rdf_node_types, 125 | None, 126 | include_datatypes, 127 | py, 128 | )?)), 129 | None => Ok(None), 130 | } 131 | } 132 | 133 | pub fn graph(&self, py: Python<'_>) -> PyResult { 134 | let m = py.allow_threads(|| { 135 | report_to_model(&self.inner, &self.shape_graph) 136 | .map_err(|x| PyMaplibError::from(MaplibError::from(x))) 137 | })?; 138 | Ok(PyModel::from_inner_mapping(m)) 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /py_maplib/tests/.gitignore: -------------------------------------------------------------------------------- 1 | imdb 2 | test_imdb.py 3 | documents 4 | test_benchmark.py 5 | testdata/gtfs 6 | test_opcua_model.py 7 | testdata/*.parquet 8 | bench200_000.nt 9 | tmp 10 | out.nt 11 | create*.nt -------------------------------------------------------------------------------- /py_maplib/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTreehouse/maplib/19ad513c9c1ebd60c6a0019a5a7c255a0bfc5789/py_maplib/tests/__init__.py -------------------------------------------------------------------------------- /py_maplib/tests/out.ttl: -------------------------------------------------------------------------------- 1 | "A" . 2 | -------------------------------------------------------------------------------- /py_maplib/tests/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | rdflib==6.2.0 -------------------------------------------------------------------------------- /py_maplib/tests/test_exceptions.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from maplib import Model 3 | from maplib import MaplibException 4 | 5 | 6 | def test_model_exception(): 7 | m = Model() 8 | 9 | with pytest.raises(MaplibException) as e: 10 | m.reads("abc", format="turtle", graph="http://example.com/data") 11 | -------------------------------------------------------------------------------- /py_maplib/tests/test_programmatic_pizza_example.py: -------------------------------------------------------------------------------- 1 | import polars as pl 2 | import pytest 3 | from polars.testing import assert_frame_equal 4 | 5 | from maplib import BlankNode 6 | 7 | from maplib import ( 8 | Model, 9 | Prefix, 10 | Template, 11 | Argument, 12 | Parameter, 13 | Variable, 14 | RDFType, 15 | Triple, 16 | a, 17 | ) 18 | 19 | pl.Config.set_fmt_str_lengths(200) 20 | 21 | 22 | @pytest.fixture(scope="function") 23 | def template() -> Template: 24 | pi = "https://github.com/DataTreehouse/maplib/pizza#" 25 | pi = Prefix(pi) 26 | 27 | p_var = Variable("p") 28 | c_var = Variable("c") 29 | ings_var = Variable("ings") 30 | 31 | template = Template( 32 | iri=pi.suf("PizzaTemplate"), 33 | parameters=[ 34 | Parameter(variable=p_var, rdf_type=RDFType.IRI()), 35 | Parameter(variable=c_var, rdf_type=RDFType.IRI()), 36 | Parameter(variable=ings_var, rdf_type=RDFType.Nested(RDFType.IRI())), 37 | ], 38 | instances=[ 39 | Triple(p_var, a(), pi.suf("Pizza")), 40 | Triple(p_var, pi.suf("fromCountry"), c_var), 41 | Triple(p_var, pi.suf("hasBlank"), BlankNode("MyBlank")), 42 | Triple( 43 | p_var, 44 | pi.suf("hasIngredient"), 45 | Argument(term=ings_var, list_expand=True), 46 | list_expander="cross", 47 | ), 48 | ], 49 | ) 50 | return template 51 | 52 | 53 | @pytest.fixture(scope="function") 54 | def pizzas_model(template: Template): 55 | 56 | pi = "https://github.com/DataTreehouse/maplib/pizza#" 57 | df = pl.DataFrame( 58 | { 59 | "p": [pi + "Hawaiian", pi + "Grandiosa"], 60 | "c": [pi + "CAN", pi + "NOR"], 61 | "ings": [[pi + "Pineapple", pi + "Ham"], [pi + "Pepper", pi + "Meat"]], 62 | } 63 | ) 64 | # print(df) 65 | 66 | m = Model() 67 | m.map(template, df) 68 | hpizzas = """ 69 | PREFIX pi: 70 | CONSTRUCT { ?p a pi:HeterodoxPizza } 71 | WHERE { 72 | ?p a pi:Pizza . 73 | ?p pi:hasIngredient pi:Pineapple . 74 | }""" 75 | m.insert(hpizzas) 76 | return m 77 | 78 | 79 | def test_simple_query_no_error(pizzas_model): 80 | res = pizzas_model.query( 81 | """ 82 | PREFIX pi: 83 | 84 | SELECT ?p WHERE { 85 | ?p a pi:HeterodoxPizza 86 | } 87 | """ 88 | ) 89 | 90 | expected_df = pl.DataFrame( 91 | {"p": [""]} 92 | ) 93 | assert_frame_equal(res, expected_df) 94 | 95 | 96 | def test_insert_new_thing(pizzas_model): 97 | hpizzas = """ 98 | PREFIX pi: 99 | CONSTRUCT { ?p a pi:HeterodoxPizza2 } 100 | WHERE { 101 | ?p a pi:Pizza . 102 | ?p pi:hasIngredient pi:Pineapple . 103 | }""" 104 | res1 = pizzas_model.insert(hpizzas) 105 | assert isinstance(res1, dict) 106 | assert len(res1) == 1 107 | assert res1["http://www.w3.org/1999/02/22-rdf-syntax-ns#type"].shape == (1, 2) 108 | res2 = pizzas_model.insert(hpizzas) 109 | assert len(res2) == 0 110 | 111 | 112 | def test_insert_new_things(pizzas_model): 113 | hpizzas = """ 114 | PREFIX pi: 115 | CONSTRUCT { 116 | ?p a pi:HeterodoxPizza2 . 117 | ?p pi:abc pi:123 . 118 | } 119 | WHERE { 120 | ?p a pi:Pizza . 121 | }""" 122 | res1 = pizzas_model.insert(hpizzas) 123 | assert isinstance(res1, dict) 124 | assert len(res1) == 2 125 | assert res1["http://www.w3.org/1999/02/22-rdf-syntax-ns#type"].shape == (2, 2) 126 | assert res1["https://github.com/DataTreehouse/maplib/pizza#abc"].shape == (2, 2) 127 | res2 = pizzas_model.insert(hpizzas) 128 | assert len(res2) == 0 129 | 130 | 131 | def test_print_template(template: Template): 132 | s = str(template) 133 | assert ( 134 | s 135 | == """ [ 136 | ?p, 137 | ?c, 138 | List<> ?ings ] :: { 139 | (?p, , ) , 140 | (?p, , ?c) , 141 | (?p, , _:MyBlank) , 142 | cross | (?p, , ++ ?ings) 143 | } . 144 | """ 145 | ) 146 | -------------------------------------------------------------------------------- /py_maplib/tests/test_rdf_parser.py: -------------------------------------------------------------------------------- 1 | import polars as pl 2 | import pytest 3 | import rdflib 4 | from polars.testing import assert_frame_equal 5 | import pathlib 6 | from maplib import Model 7 | 8 | pl.Config.set_fmt_str_lengths(300) 9 | 10 | PATH_HERE = pathlib.Path(__file__).parent 11 | TESTDATA_PATH = PATH_HERE / "testdata" / "rdf_parser" 12 | 13 | 14 | def test_issue_8(): 15 | m = Model() 16 | m.read(TESTDATA_PATH / "date_panic.nt", format="ntriples") 17 | df = m.query("""SELECT ?c WHERE {?a ?b ?c}""", native_dataframe=True) 18 | expected = pl.from_repr( 19 | """ 20 | ┌────────────┐ 21 | │ c │ 22 | │ --- │ 23 | │ date │ 24 | ╞════════════╡ 25 | │ 2035-01-23 │ 26 | └────────────┘ 27 | """ 28 | ) 29 | assert_frame_equal(df, expected) 30 | -------------------------------------------------------------------------------- /py_maplib/tests/test_read_write.py: -------------------------------------------------------------------------------- 1 | import polars as pl 2 | import pytest 3 | import rdflib 4 | from polars.testing import assert_frame_equal 5 | import pathlib 6 | from maplib import Model 7 | 8 | pl.Config.set_fmt_str_lengths(300) 9 | 10 | 11 | PATH_HERE = pathlib.Path(__file__).parent 12 | TESTDATA_PATH = PATH_HERE / "testdata" 13 | 14 | 15 | def test_read_ntriples(): 16 | m = Model() 17 | m.read(str(TESTDATA_PATH / "read_ntriples.nt")) 18 | res = m.query( 19 | """ 20 | PREFIX foaf: 21 | 22 | SELECT ?s ?v ?o WHERE { 23 | ?s ?v ?o . 24 | } ORDER BY ?s ?v ?o 25 | """ 26 | ).sort(["s", "v", "o"]) 27 | # TODO: Fix multitype sorting 28 | filename = TESTDATA_PATH / "read_ntriples.csv" 29 | # res.write_csv(str(filename)) 30 | expected_df = pl.scan_csv(filename).collect() 31 | pl.testing.assert_frame_equal(res, expected_df) 32 | 33 | 34 | def test_read_ntriples_twice_with_replace(): 35 | m = Model() 36 | m.read(str(TESTDATA_PATH / "read_ntriples.nt")) 37 | m.read(str(TESTDATA_PATH / "read_ntriples.nt"), replace_graph=True) 38 | res = m.query( 39 | """ 40 | PREFIX foaf: 41 | 42 | SELECT ?s ?v ?o WHERE { 43 | ?s ?v ?o . 44 | } ORDER BY ?s ?v ?o 45 | """ 46 | ).sort(["s", "v", "o"]) 47 | # TODO: Fix multitype sorting 48 | filename = TESTDATA_PATH / "read_ntriples.csv" 49 | # res.write_csv(str(filename)) 50 | assert res.shape == (8, 3) 51 | 52 | 53 | def test_read_write_ntriples_string(): 54 | m = Model() 55 | with open(TESTDATA_PATH / "read_ntriples.nt") as f: 56 | ntstring = f.read() 57 | m.reads(ntstring, format="ntriples") 58 | out_str = m.writes(format="ntriples") 59 | m2 = Model() 60 | m2.reads(out_str, format="ntriples") 61 | res = m2.query( 62 | """ 63 | PREFIX foaf: 64 | 65 | SELECT ?v ?o WHERE { 66 | ?s ?v ?o . 67 | } ORDER BY ?v ?o 68 | """ 69 | ).sort(["v", "o"]) 70 | # TODO: Fix multitype sorting 71 | filename = TESTDATA_PATH / "read_ntriples2.csv" 72 | # res.write_csv(str(filename)) 73 | expected_df = pl.scan_csv(filename).select(["v", "o"]).sort(["v", "o"]).collect() 74 | pl.testing.assert_frame_equal(res, expected_df) 75 | 76 | 77 | def test_read_write_turtle_string(): 78 | m = Model() 79 | with open(TESTDATA_PATH / "read_ntriples.nt") as f: 80 | ntstring = f.read() 81 | m.reads(ntstring, format="ntriples") 82 | out_str = m.writes(format="turtle") 83 | m2 = Model() 84 | m2.reads(out_str, format="turtle") 85 | res = m2.query( 86 | """ 87 | PREFIX foaf: 88 | 89 | SELECT ?v ?o WHERE { 90 | ?s ?v ?o . 91 | } ORDER BY ?v ?o 92 | """ 93 | ).sort(["v", "o"]) 94 | # TODO: Fix multitype sorting 95 | filename = TESTDATA_PATH / "read_ntriples2.csv" 96 | # res.write_csv(str(filename)) 97 | expected_df = pl.scan_csv(filename).select(["v", "o"]).sort(["v", "o"]).collect() 98 | pl.testing.assert_frame_equal(res, expected_df) 99 | 100 | 101 | def test_read_write_xml_string(): 102 | m = Model() 103 | with open(TESTDATA_PATH / "read_ntriples.nt") as f: 104 | ntstring = f.read() 105 | m.reads(ntstring, format="ntriples") 106 | out_str = m.writes(format="rdf/xml") 107 | m2 = Model() 108 | m2.reads(out_str, format="rdf/xml") 109 | res = m2.query( 110 | """ 111 | PREFIX foaf: 112 | 113 | SELECT ?v ?o WHERE { 114 | ?s ?v ?o . 115 | } ORDER BY ?v ?o 116 | """ 117 | ).sort(["v", "o"]) 118 | # TODO: Fix multitype sorting 119 | filename = TESTDATA_PATH / "read_ntriples2.csv" 120 | # res.write_csv(str(filename)) 121 | expected_df = pl.scan_csv(filename).select(["v", "o"]).sort(["v", "o"]).collect() 122 | pl.testing.assert_frame_equal(res, expected_df) 123 | -------------------------------------------------------------------------------- /py_maplib/tests/test_validate_iris.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from maplib import Model, Triple, Variable, Template, IRI, Parameter, RDFType 3 | import polars as pl 4 | 5 | 6 | def test_many_validation_errors_subject(): 7 | n = 10_000 8 | df = pl.DataFrame( 9 | { 10 | "subject": [f"MyBadIRI//{i}" for i in range(n)], 11 | "predicate": ["https://example.net/hasObject"] * n, 12 | "object": [i for i in range(n)], 13 | } 14 | ) 15 | m = Model() 16 | subj = Variable("subject") 17 | pred = Variable("predicate") 18 | obj = Variable("object") 19 | t = Template( 20 | IRI("https://github.com/DataTreehouse/maplib/my_template"), 21 | [subj, pred, obj], 22 | [Triple(subj, pred, obj)], 23 | ) 24 | with pytest.raises(Exception) as e: 25 | m.map(t, df, validate_iris=True) 26 | assert "invalid IRI" in str(e) 27 | 28 | 29 | def test_many_validation_errors_predicate(): 30 | n = 10_000 31 | df = pl.DataFrame( 32 | { 33 | "subject": [f"urn:dth:goodiri_{i}" for i in range(n)], 34 | "predicate": ["!https://example.net/hasObject"] * n, 35 | "object": [i for i in range(n)], 36 | } 37 | ) 38 | m = Model() 39 | subj = Variable("subject") 40 | pred = Variable("predicate") 41 | obj = Variable("object") 42 | t = Template( 43 | IRI("https://github.com/DataTreehouse/maplib/my_template"), 44 | [subj, pred, obj], 45 | [Triple(subj, pred, obj)], 46 | ) 47 | with pytest.raises(Exception) as e: 48 | m.map(t, df, validate_iris=True) 49 | assert "invalid IRI" in str(e) 50 | 51 | 52 | def test_many_validation_errors_nested_object(): 53 | n = 1_000 54 | df = pl.DataFrame( 55 | { 56 | "subject": [f"urn:dth:goodiri_{i}" for i in range(n)], 57 | "predicate": ["https://example.net/hasObject"] * n, 58 | "object": [ 59 | [f"aTerribleIRI!!{i}", f"anotherTerribleIRI!!{i}"] for i in range(n) 60 | ], 61 | } 62 | ) 63 | m = Model() 64 | subj = Variable("subject") 65 | pred = Variable("predicate") 66 | obj = Variable("object") 67 | t = Template( 68 | IRI("https://github.com/DataTreehouse/maplib/my_template"), 69 | [subj, pred, Parameter(obj, rdf_type=RDFType.Nested(RDFType.IRI()))], 70 | [Triple(subj, pred, obj)], 71 | ) 72 | with pytest.raises(Exception) as e: 73 | m.map(t, df, validate_iris=True) 74 | assert "invalid IRI" in str(e) 75 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/expected_easy_case.ttl: -------------------------------------------------------------------------------- 1 | "1"^^ . 2 | "2"^^ . 3 | "3"^^ . 4 | "4"^^ . 5 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/iterated_property_path_constant_object_query.csv: -------------------------------------------------------------------------------- 1 | site_label 2 | Breezy Field 3 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/iterated_property_path_constant_subject_query.csv: -------------------------------------------------------------------------------- 1 | node 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/larger_ordered_query.csv: -------------------------------------------------------------------------------- 1 | site_label,wtur_label,ts,ts_label 2 | Breezy Field,A1,,Production 3 | Breezy Field,A10,,Production 4 | Breezy Field,A2,,Production 5 | Breezy Field,A3,,Production 6 | Breezy Field,A4,,Production 7 | Breezy Field,A5,,Production 8 | Breezy Field,A6,,Production 9 | Breezy Field,A7,,Production 10 | Breezy Field,A8,,Production 11 | Breezy Field,A9,,Production 12 | Gale Valley,A1,,Production 13 | Gale Valley,A10,,Production 14 | Gale Valley,A2,,Production 15 | Gale Valley,A3,,Production 16 | Gale Valley,A4,,Production 17 | Gale Valley,A5,,Production 18 | Gale Valley,A6,,Production 19 | Gale Valley,A7,,Production 20 | Gale Valley,A8,,Production 21 | Gale Valley,A9,,Production 22 | Gusty Plains,A1,,Production 23 | Gusty Plains,A10,,Production 24 | Gusty Plains,A2,,Production 25 | Gusty Plains,A3,,Production 26 | Gusty Plains,A4,,Production 27 | Gusty Plains,A5,,Production 28 | Gusty Plains,A6,,Production 29 | Gusty Plains,A7,,Production 30 | Gusty Plains,A8,,Production 31 | Gusty Plains,A9,,Production 32 | Wind Mountain,A1,,Production 33 | Wind Mountain,A10,,Production 34 | Wind Mountain,A2,,Production 35 | Wind Mountain,A3,,Production 36 | Wind Mountain,A4,,Production 37 | Wind Mountain,A5,,Production 38 | Wind Mountain,A6,,Production 39 | Wind Mountain,A7,,Production 40 | Wind Mountain,A8,,Production 41 | Wind Mountain,A9,,Production 42 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/larger_query.csv: -------------------------------------------------------------------------------- 1 | site_label,wtur_label,ts,ts_label 2 | Wind Mountain,A1,,Production 3 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/multi_concat.csv: -------------------------------------------------------------------------------- 1 | a,b,c 2 | """2021-01-01T08:00:00+00:00""^^","""2021-01-01T08:00:01+00:00""^^",2021-01-01 08:00:00.000000+00:00_2021-01-01 08:00:01.000000+00:00 3 | """2021-01-01T08:00:00+00:00""^^","""2.0""^^",2021-01-01 08:00:00.000000+00:00_2.0 4 | """2021-01-01T08:00:00+00:00""^^","""2""^^",2021-01-01 08:00:00.000000+00:00_2 5 | """2021-01-01T08:00:00+00:00""^^","""90""^^",2021-01-01 08:00:00.000000+00:00_90 6 | """2021-01-01T08:00:00+00:00""^^","""AString""",2021-01-01 08:00:00.000000+00:00_AString 7 | """3.0""^^","""2021-01-01T08:00:01+00:00""^^",3.0_2021-01-01 08:00:01.000000+00:00 8 | """3.0""^^","""2.0""^^",3.0_2.0 9 | """3.0""^^","""2""^^",3.0_2 10 | """3.0""^^","""90""^^",3.0_90 11 | """3.0""^^","""AString""",3.0_AString 12 | """1""^^","""2021-01-01T08:00:01+00:00""^^",1_2021-01-01 08:00:01.000000+00:00 13 | """1""^^","""2.0""^^",1_2.0 14 | """1""^^","""2""^^",1_2 15 | """1""^^","""90""^^",1_90 16 | """1""^^","""AString""",1_AString 17 | """89""^^","""2021-01-01T08:00:01+00:00""^^",89_2021-01-01 08:00:01.000000+00:00 18 | """89""^^","""2.0""^^",89_2.0 19 | """89""^^","""2""^^",89_2 20 | """89""^^","""90""^^",89_90 21 | """89""^^","""AString""",89_AString 22 | """String""","""2021-01-01T08:00:01+00:00""^^",String_2021-01-01 08:00:01.000000+00:00 23 | """String""","""2.0""^^",String_2.0 24 | """String""","""2""^^",String_2 25 | """String""","""90""^^",String_90 26 | """String""","""AString""",String_AString 27 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/multi_datatype_join_query.csv: -------------------------------------------------------------------------------- 1 | s1,s2,o 2 | _:person_l0_p0_r0,_:person_l0_p0_r0,Ann 3 | _:person_l0_p0_r1,_:person_l0_p0_r1,Bob 4 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/multi_datatype_join_query_two_vars.csv: -------------------------------------------------------------------------------- 1 | s,o 2 | _:person_l0_p0_r0,Ann 3 | _:person_l0_p0_r1,Bob 4 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/multi_datatype_leftjoin_query.csv: -------------------------------------------------------------------------------- 1 | s,o 2 | _:person_l0_p0_r0,Ann 3 | _:person_l0_p0_r1,Bob 4 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/multi_datatype_query.csv: -------------------------------------------------------------------------------- 1 | s,v,o 2 | _:person_l0_p0_r0,, 3 | _:person_l0_p0_r0,,"""Ann""" 4 | _:person_l0_p0_r0,,"""Strong""" 5 | _:person_l0_p0_r0,, 6 | _:person_l0_p0_r1,, 7 | _:person_l0_p0_r1,,"""Bob""" 8 | _:person_l0_p0_r1,,"""Brite""" 9 | _:person_l0_p0_r1,, 10 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/multi_datatype_query_sorting.csv: -------------------------------------------------------------------------------- 1 | s,v,o 2 | _:person_l0_p0_r0,, 3 | _:person_l0_p0_r0,,"""Ann""" 4 | _:person_l0_p0_r0,,"""Strong""" 5 | _:person_l0_p0_r0,, 6 | _:person_l0_p0_r1,, 7 | _:person_l0_p0_r1,,"""Bob""" 8 | _:person_l0_p0_r1,,"""Brite""" 9 | _:person_l0_p0_r1,, 10 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/multi_datatype_union_query.csv: -------------------------------------------------------------------------------- 1 | s,o 2 | _:person_l0_p0_r0,"""Ann""" 3 | _:person_l0_p0_r0, 4 | _:person_l0_p0_r1,"""Bob""" 5 | _:person_l0_p0_r1, 6 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/multi_datatype_union_query_native_df.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTreehouse/maplib/19ad513c9c1ebd60c6a0019a5a7c255a0bfc5789/py_maplib/tests/testdata/multi_datatype_union_query_native_df.parquet -------------------------------------------------------------------------------- /py_maplib/tests/testdata/multi_datatype_union_sort_desc1_query.csv: -------------------------------------------------------------------------------- 1 | s,o 2 | _:person_l0_p0_r1,"""Bob""" 3 | _:person_l0_p0_r0,"""Ann""" 4 | _:person_l0_p0_r0, 5 | _:person_l0_p0_r1, 6 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/multi_datatype_union_sort_query.csv: -------------------------------------------------------------------------------- 1 | s,o 2 | _:person_l0_p0_r0, 3 | _:person_l0_p0_r1, 4 | _:person_l0_p0_r0,"""Ann""" 5 | _:person_l0_p0_r1,"""Bob""" 6 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/multi_many_comp.csv: -------------------------------------------------------------------------------- 1 | a,b 2 | """2021-01-01T08:00:00+00:00""^^","""2021-01-01T08:00:01+00:00""^^" 3 | """3.0""^^","""90""^^" 4 | """1""^^","""2.0""^^" 5 | """1""^^","""2""^^" 6 | """1""^^","""90""^^" 7 | """89""^^","""90""^^" 8 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/property_path_query.csv: -------------------------------------------------------------------------------- 1 | site_label,node 2 | Breezy Field,https://github.com/magbak/chrontext/windpower_example#WindTurbine31 3 | Breezy Field,https://github.com/magbak/chrontext/windpower_example#WindTurbine32 4 | Breezy Field,https://github.com/magbak/chrontext/windpower_example#WindTurbine33 5 | Breezy Field,https://github.com/magbak/chrontext/windpower_example#WindTurbine34 6 | Breezy Field,https://github.com/magbak/chrontext/windpower_example#WindTurbine35 7 | Breezy Field,https://github.com/magbak/chrontext/windpower_example#WindTurbine36 8 | Breezy Field,https://github.com/magbak/chrontext/windpower_example#WindTurbine37 9 | Breezy Field,https://github.com/magbak/chrontext/windpower_example#WindTurbine38 10 | Breezy Field,https://github.com/magbak/chrontext/windpower_example#WindTurbine39 11 | Breezy Field,https://github.com/magbak/chrontext/windpower_example#WindTurbine40 12 | Gale Valley,https://github.com/magbak/chrontext/windpower_example#WindTurbine11 13 | Gale Valley,https://github.com/magbak/chrontext/windpower_example#WindTurbine12 14 | Gale Valley,https://github.com/magbak/chrontext/windpower_example#WindTurbine13 15 | Gale Valley,https://github.com/magbak/chrontext/windpower_example#WindTurbine14 16 | Gale Valley,https://github.com/magbak/chrontext/windpower_example#WindTurbine15 17 | Gale Valley,https://github.com/magbak/chrontext/windpower_example#WindTurbine16 18 | Gale Valley,https://github.com/magbak/chrontext/windpower_example#WindTurbine17 19 | Gale Valley,https://github.com/magbak/chrontext/windpower_example#WindTurbine18 20 | Gale Valley,https://github.com/magbak/chrontext/windpower_example#WindTurbine19 21 | Gale Valley,https://github.com/magbak/chrontext/windpower_example#WindTurbine20 22 | Gusty Plains,https://github.com/magbak/chrontext/windpower_example#WindTurbine21 23 | Gusty Plains,https://github.com/magbak/chrontext/windpower_example#WindTurbine22 24 | Gusty Plains,https://github.com/magbak/chrontext/windpower_example#WindTurbine23 25 | Gusty Plains,https://github.com/magbak/chrontext/windpower_example#WindTurbine24 26 | Gusty Plains,https://github.com/magbak/chrontext/windpower_example#WindTurbine25 27 | Gusty Plains,https://github.com/magbak/chrontext/windpower_example#WindTurbine26 28 | Gusty Plains,https://github.com/magbak/chrontext/windpower_example#WindTurbine27 29 | Gusty Plains,https://github.com/magbak/chrontext/windpower_example#WindTurbine28 30 | Gusty Plains,https://github.com/magbak/chrontext/windpower_example#WindTurbine29 31 | Gusty Plains,https://github.com/magbak/chrontext/windpower_example#WindTurbine30 32 | Wind Mountain,https://github.com/magbak/chrontext/windpower_example#WindTurbine1 33 | Wind Mountain,https://github.com/magbak/chrontext/windpower_example#WindTurbine10 34 | Wind Mountain,https://github.com/magbak/chrontext/windpower_example#WindTurbine2 35 | Wind Mountain,https://github.com/magbak/chrontext/windpower_example#WindTurbine3 36 | Wind Mountain,https://github.com/magbak/chrontext/windpower_example#WindTurbine4 37 | Wind Mountain,https://github.com/magbak/chrontext/windpower_example#WindTurbine5 38 | Wind Mountain,https://github.com/magbak/chrontext/windpower_example#WindTurbine6 39 | Wind Mountain,https://github.com/magbak/chrontext/windpower_example#WindTurbine7 40 | Wind Mountain,https://github.com/magbak/chrontext/windpower_example#WindTurbine8 41 | Wind Mountain,https://github.com/magbak/chrontext/windpower_example#WindTurbine9 42 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/rdf_parser/date_panic.nt: -------------------------------------------------------------------------------- 1 | "2035-01-23"^^. 2 | ""^^. 3 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/read_ntriples.csv: -------------------------------------------------------------------------------- 1 | s,v,o 2 | _:person_l0_p0_r0_0,, 3 | _:person_l0_p0_r0_0,,"""Ann""" 4 | _:person_l0_p0_r0_0,,"""Strong""" 5 | _:person_l0_p0_r0_0,, 6 | _:person_l0_p0_r1_0,, 7 | _:person_l0_p0_r1_0,,"""Bob""" 8 | _:person_l0_p0_r1_0,,"""Brite""" 9 | _:person_l0_p0_r1_0,, 10 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/read_ntriples.nt: -------------------------------------------------------------------------------- 1 | _:person_l0_p0_r0 "Strong" . 2 | _:person_l0_p0_r1 "Brite" . 3 | _:person_l0_p0_r1 . 4 | _:person_l0_p0_r0 . 5 | _:person_l0_p0_r0 "Ann" . 6 | _:person_l0_p0_r1 "Bob" . 7 | _:person_l0_p0_r0 . 8 | _:person_l0_p0_r1 . 9 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/read_ntriples2.csv: -------------------------------------------------------------------------------- 1 | v,o 2 | , 3 | , 4 | ,"""Ann""" 5 | ,"""Bob""" 6 | ,"""Brite""" 7 | ,"""Strong""" 8 | , 9 | , 10 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/simple_construct_query_nothing.csv: -------------------------------------------------------------------------------- 1 | subject,predicate,object 2 | ,, 3 | ,, 4 | ,, 5 | ,, 6 | ,, 7 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/simple_insert_query_nothing.csv: -------------------------------------------------------------------------------- 1 | a 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/simple_property_path_query.csv: -------------------------------------------------------------------------------- 1 | site_label,node 2 | Breezy Field, 3 | Breezy Field, 4 | Breezy Field, 5 | Breezy Field, 6 | Breezy Field, 7 | Breezy Field, 8 | Breezy Field, 9 | Breezy Field, 10 | Breezy Field, 11 | Breezy Field, 12 | Gale Valley, 13 | Gale Valley, 14 | Gale Valley, 15 | Gale Valley, 16 | Gale Valley, 17 | Gale Valley, 18 | Gale Valley, 19 | Gale Valley, 20 | Gale Valley, 21 | Gale Valley, 22 | Gusty Plains, 23 | Gusty Plains, 24 | Gusty Plains, 25 | Gusty Plains, 26 | Gusty Plains, 27 | Gusty Plains, 28 | Gusty Plains, 29 | Gusty Plains, 30 | Gusty Plains, 31 | Gusty Plains, 32 | Wind Mountain, 33 | Wind Mountain, 34 | Wind Mountain, 35 | Wind Mountain, 36 | Wind Mountain, 37 | Wind Mountain, 38 | Wind Mountain, 39 | Wind Mountain, 40 | Wind Mountain, 41 | Wind Mountain, 42 | -------------------------------------------------------------------------------- /py_maplib/tests/testdata/stringfuncs.csv: -------------------------------------------------------------------------------- 1 | label,strStarts,strEnds,contains,strBefore,strAfter 2 | Wind turbine 11,false,true,false,Wind turbine 11,urbine 11 3 | Wind turbine 24,false,false,true,Wind t,urbine 24 4 | Wind turbine 3,true,false,false,Wind turbine 3,urbine 3 5 | Wind turbine 30,true,false,false,Wind turbine 30,urbine 30 6 | Wind turbine 31,true,false,false,Wind turbine 31,urbine 31 7 | Wind turbine 32,true,false,false,Wind turbine 32,urbine 32 8 | Wind turbine 33,true,false,false,Wind turbine 33,urbine 33 9 | Wind turbine 34,true,false,false,Wind turbine 34,urbine 34 10 | Wind turbine 35,true,false,false,Wind turbine 35,urbine 35 11 | Wind turbine 36,true,false,false,Wind turbine 36,urbine 36 12 | Wind turbine 37,true,false,false,Wind turbine 37,urbine 37 13 | Wind turbine 38,true,false,false,Wind turbine 38,urbine 38 14 | Wind turbine 39,true,false,false,Wind turbine 39,urbine 39 15 | --------------------------------------------------------------------------------