├── .gitignore ├── LICENSE.txt ├── README.md ├── bin └── rdfhash ├── docs ├── apt_install.md └── cli_reference.md ├── examples ├── context │ └── blank-nodes.ttl ├── dir-0.ttl ├── experiment-0.ttl ├── hashed │ ├── dir-0__sha256.ttl │ ├── experiment-0__sha256.ttl │ ├── product-0__sha256.ttl │ └── recursive-64__sha256.ttl ├── product-0.ttl └── recursive-64.ttl ├── pyproject.toml ├── rdfhash.ttl ├── rdfhash ├── __init__.py ├── cli.py ├── logger.py ├── main.py └── utils │ ├── __init__.py │ ├── graph.py │ └── hash.py ├── setup.py └── test ├── conftest.py ├── test_examples.py └── utils └── __init__.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .vscode 3 | *.ipynb 4 | *.egg-info 5 | build 6 | dist 7 | .pytest_cache 8 | 9 | .temp* 10 | examples/planned 11 | .DS_Store -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Neil Graham 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rdfhash: RDF Graph Hashing/Compression Tool 2 | 3 | `rdfhash` is a utility for RDF graph compression that works by hashing RDF subjects based on a checksum of their triples, effectively minimizing the size of RDF graphs by consolidating subjects that have identical definitions. 4 | 5 | ## Installation 6 | 7 | You can install `rdfhash` using `pip`, a package manager for Python. Ensure [`python`](https://www.python.org/downloads/) and [`pip`](https://pip.pypa.io/en/stable/installation/#installation) are properly installed on your system, then run the following command: 8 | 9 | ```bash 10 | pip install rdfhash 11 | ``` 12 | 13 | ## Usage 14 | 15 | ### Command Line Interface (CLI) 16 | 17 | #### **Basic Usage** 18 | 19 | By default, all blank nodes in a `text/turtle` file or string are replaced by their hashed definition: 20 | 21 | ```bash 22 | rdfhash ' 23 | @prefix hash: . 24 | 25 | [ ] a hash:Attribute ; 26 | hash:unit hash:unit:Centimeters ; 27 | hash:value 5.38 .' 28 | ``` 29 | 30 | Output: 31 | 32 | ```yaml 33 | @prefix hash: . 34 | 35 | 36 | a hash:Attribute ; 37 | hash:unit ; 38 | hash:value 5.38 . 39 | ``` 40 | 41 | #### **Advanced Usage** 42 | 43 | The `rdfhash` tool is highly customizable and can be tailored to fit the requirements of any organization: 44 | 45 | ```bash 46 | rdfhash ' 47 | @prefix hash: . 48 | @prefix md5: . 49 | 50 | [ ] a hash:Contact ; 51 | hash:phone "487-538-2824" ; 52 | hash:email "johnsmith@example.com" ; 53 | hash:name [ 54 | a hash:LegalName ; 55 | hash:firstName "John" ; 56 | hash:lastName "Smith" ; 57 | ] ; 58 | hash:address [ 59 | a hash:Address ; 60 | hash:street "4567 Mountain Peak Way" ; 61 | hash:city "Denver" ; 62 | hash:state "CO" ; 63 | hash:zip "80202" ; 64 | hash:country "USA" ; 65 | ] ; 66 | .' \ 67 | --method md5 \ 68 | --template 'http://rdfhash.com/instances/{method}/{value}' \ 69 | --sparql ' 70 | prefix hash: 71 | select ?s where { 72 | ?s a ?type . 73 | VALUES ?type { 74 | hash:Contact 75 | hash:LegalName 76 | hash:Address 77 | } 78 | }' 79 | ``` 80 | - `--method` specifies the hashing algorithm to use. The default is `sha256`. 81 | - `--template` specifies the URI template to use for hashed subjects. The default is `{method}:{value}`. 82 | - `--sparql` specifies the SPARQL query to use for selecting subjects to hash. The default is `SELECT ?s WHERE { ?s ?p ?o . FILTER(isBlank(?s))}` (Selecting all Blank Node subjects). 83 | - Run `rdfhash --help` for more information on available parameters. 84 | 85 | Output: 86 | 87 | ```yaml 88 | @prefix hash: . 89 | @prefix md5: . 90 | 91 | md5:8fc18e400ff531e5cbe02fef751662ba 92 | a hash:Contact ; 93 | hash:phone "487-538-2824" ; 94 | hash:email "johnsmith@example.com" ; 95 | hash:name md5:5fd42f2c072c80e3db760c3fc69b91b8 ; 96 | hash:address md5:9a3e3ce644e2c5271015d9665675a8e5 . 97 | 98 | md5:5fd42f2c072c80e3db760c3fc69b91b8 99 | a hash:LegalName ; 100 | hash:firstName "John" ; 101 | hash:lastName "Smith" . 102 | 103 | md5:9a3e3ce644e2c5271015d9665675a8e5 104 | a hash:Address ; 105 | hash:street "4567 Mountain Peak Way" ; 106 | hash:city "Denver" ; 107 | hash:state "CO" ; 108 | hash:zip "80202" ; 109 | hash:country "USA" . 110 | ``` 111 | 112 | ### Import as a Python Module 113 | 114 | ```python 115 | from rdfhash import hash_subjects 116 | 117 | data = ''' 118 | @prefix hash: . 119 | @prefix sha1: . 120 | 121 | 122 | a hash:Attribute ; 123 | hash:value 42 . 124 | ''' 125 | 126 | graph, subjects_replaced = hash_subjects( 127 | data, 128 | method='sha1', 129 | template='http://rdfhash.com/instances/{method}/{value}', 130 | sparql_select_subjects=''' 131 | prefix hash: 132 | SELECT ?s WHERE { ?s a hash:Attribute. } 133 | ''' 134 | ) 135 | 136 | print(graph.serialize(format='turtle')) 137 | ``` 138 | 139 | Output: 140 | 141 | ```yaml 142 | @prefix hash: . 143 | @prefix sha1: . 144 | 145 | sha1:4afe716d630b17d5a5d06f0901800e16f3e8c9a4 146 | a hash:Attribute ; 147 | hash:value 42 . 148 | ``` 149 | 150 | ## Limitations 151 | 152 | It's important to note where `rdfhash` is limited in its functionality. These limitations are expected to be addressed in future versions. 153 | 154 | - The `rdfhash` tool does not yet fully support Named Graphs (e.g. `text/trig` or `application/n-quads`) 155 | - Users can still attempt to pass RDF data containing Named Graphs, although the expected output has not yet been tested. 156 | - Circular dependencies between selected subjects are currently not allowed. (e.g. Inverse properties). A [Directed Acyclic Graph (DAG)](https://en.wikipedia.org/wiki/Directed_acyclic_graph) is required at the moment. 157 | - Best practice to follow is prioritizing broader-to-narrower relationships. (e.g. A person `Contact` points to `LegalName` and `Address` and not inversely. Multiple contacts can point to the same `LegalName` or `Address`.) 158 | - Future `rdfhash` versions will support ignoring specific properties used in a subject's hash, allowing the use of inverse properties. 159 | - Currently, selected subjects are expected to be fully defined in the input graph. 160 | - Future `rdfhash` versions will support connections to a SPARQL endpoint to fetch full context for hashing. 161 | -------------------------------------------------------------------------------- /bin/rdfhash: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import os 5 | # Get scripts directory 6 | script_dir = os.path.dirname(os.path.realpath(__file__)) 7 | 8 | # Add root directory to path 9 | sys.path.append(os.path.join(script_dir, "..")) 10 | 11 | from rdfhash.cli import cli 12 | 13 | cli() -------------------------------------------------------------------------------- /docs/apt_install.md: -------------------------------------------------------------------------------- 1 | # Debian/Ubuntu (`apt`) Installation 2 | 3 | ## Install `python3.10` or `python3.11` 4 | 5 | Execute lines 1 at a time. 6 | 7 | ```bash 8 | # Update packages 9 | sudo apt update && sudo apt upgrade -y 10 | 11 | # For adding custom PPAs 12 | sudo apt install software-properties-common -y 13 | 14 | # Add PPA, then press [ENTER] 15 | sudo add-apt-repository ppa:deadsnakes/ppa 16 | 17 | # Install 'python3.10' 18 | sudo apt install python3.10 19 | 20 | # Install 'python3.10 -m pip' 21 | curl -sS https://bootstrap.pypa.io/get-pip.py | sudo python3.10 22 | ``` 23 | 24 | ## Install `pip` package `rdfhash` 25 | 26 | ```bash 27 | python3.10 -m pip install rdfhash 28 | ``` 29 | 30 | ## Test Script 31 | 32 | ```bash 33 | rdfhash -d '[ a ] .' -m 'blake2b' 34 | ``` 35 | 36 | ```bash 37 | a . 38 | ``` 39 | -------------------------------------------------------------------------------- /docs/cli_reference.md: -------------------------------------------------------------------------------- 1 | # CLI Reference 2 | 3 | ## Main command: `rdfhash` 4 | 5 | Replaces selected subjects with hash of their set of triples. 6 | 7 | ``` 8 | usage: rdfhash [-h] [-d DATA] [-f {turtle,n-triples,trig,n-quads,n3,rdf}] 9 | [-a ACCEPT [ACCEPT ...]] [-v] [--debug] 10 | [-m {md5,sha1,sha224,sha256,sha384,sha512,sha3_224,sha3_256,sha3_384,sha3_512,blake2b,blake2s}] 11 | [--sparql SPARQL] 12 | {reverse} ... 13 | 14 | Replace selected subjects with hash of their triples (`{predicate} {object}.\n` sorted + joined). 15 | 16 | positional arguments: 17 | {reverse} 18 | reverse Reverse hashed URIs to blank nodes. 19 | 20 | options: 21 | -h, --help show this help message and exit 22 | -d DATA, --data DATA Input RDF string or file. (.ttl, .nt, .n3) 23 | -f {turtle,n-triples,trig,n-quads,n3,rdf}, --format {turtle,n-triples,trig,n-quads,n3,rdf} 24 | Input format. 25 | -a ACCEPT [ACCEPT ...], --accept ACCEPT [ACCEPT ...] 26 | Output accept format. 27 | -v, --verbose Show 'info' level logs. 28 | --debug Show 'debug' level logs. 29 | -m {md5,sha1,sha224,sha256,sha384,sha512,sha3_224,sha3_256,sha3_384,sha3_512,blake2b,blake2s}, --method {md5,sha1,sha224,sha256,sha384,sha512,sha3_224,sha3_256,sha3_384,sha3_512,blake2b,blake2s} 30 | Hash method. 31 | --sparql SPARQL, --sparql-select-subjects SPARQL 32 | SPARQL SELECT query returning subject URIs to replace 33 | with hash of their triples. Defaults to all blank node 34 | subjects. 35 | ``` 36 | 37 | ### Example 38 | 39 | ```bash 40 | rdfhash --data='[ a ] .' --method=md5 41 | ``` 42 | 43 | ``` 44 | a . 45 | ``` 46 | 47 | --- 48 | 49 | ## Subcommand: `reverse` 50 | 51 | Reverses hashed URIs to blank nodes. 52 | 53 | ``` 54 | usage: rdfhash reverse [-h] [-d DATA] [-f {turtle,n-triples,trig,n-quads,n3,rdf}] 55 | [-a ACCEPT [ACCEPT ...]] [-v] [--debug] 56 | 57 | options: 58 | -h, --help show this help message and exit 59 | -d DATA, --data DATA Input RDF string or file. (.ttl, .nt, .n3) 60 | -f {turtle,n-triples,trig,n-quads,n3,rdf}, --format {turtle,n-triples,trig,n-quads,n3,rdf} 61 | Input format. 62 | -a ACCEPT [ACCEPT ...], --accept ACCEPT [ACCEPT ...] 63 | Output accept format. 64 | -v, --verbose Show 'info' level logs. 65 | --debug Show 'debug' level logs. 66 | ``` 67 | 68 | ### Example 69 | 70 | ```bash 71 | rdfhash reverse --data=' a .' 72 | ``` 73 | 74 | ``` 75 | [] a . 76 | ``` 77 | -------------------------------------------------------------------------------- /examples/context/blank-nodes.ttl: -------------------------------------------------------------------------------- 1 | @prefix xsd: . 2 | 3 | @prefix hash: . 4 | @prefix hashid: . 5 | 6 | # Example Instances 7 | 8 | hashid:BlankNodes_Rule a hash:Rule ; 9 | hash:priority 1 ; 10 | hash:method hash:sha256 ; 11 | hash:sparql "SELECT ?s WHERE {?s ?p ?o . FILTER (isBlank(?s))}" ; 12 | hash:template "http://rdfhash.com/id/{method}:{value}" ; 13 | . -------------------------------------------------------------------------------- /examples/dir-0.ttl: -------------------------------------------------------------------------------- 1 | @prefix dir: . 2 | @prefix file: . 3 | @prefix sha256sum: . 4 | 5 | _:dir__game 6 | dir:PS3_GAME _:dir__PS3_GAME ; 7 | file:PS3_GAME.SFB sha256sum:aee34fae6fe2b2996f567899ba6dafb76ab5023b751db7a9f8566b916e182ac8 ; 8 | . 9 | 10 | _:dir__PS3_GAME 11 | dir:USRDIR _:dir__USRDIR ; 12 | file:ICON_0.PNG sha256sum:7e785be38b38a300842f6f530eccc6d85b29ce8b4e7881b627569a295187707e ; 13 | file:PARAM.SFO sha256sum:3f407ab8cf4756f7c990906e22f1578cb055ffe97aec6939d49b459346881c8c ; 14 | file:PIC1.PNG sha256sum:cae05f61c8012e8717cf9391c5eadb144942cef17e4661a714b399f51327fc74 ; 15 | file:PS3LOGO.DAT sha256sum:e08430957ac3f4ee719dc2ed04d3443c395dd3b997d8f34f68a1b5c37d92f425 ; 16 | . 17 | 18 | _:dir__USRDIR 19 | dir:patcher _:dir__patcher ; 20 | file:ac130.ff sha256sum:0d445d4b51118c8bdf2ce6703e83d006176f57fc9097fd603de0eb08f217eb2b ; 21 | file:ac130_fade.bik sha256sum:78282c38e33e06ccfd1b1021f8aed63cb4d6046ba6133ba8a5377faf219a2e32 ; 22 | file:ac130_load.bik sha256sum:cdccbf3c477cd113bbe8e373970a9913986cafec40f04fba97d8de0ca24d2623 ; 23 | file:aftermath.ff sha256sum:79ac8b5f7c21abc97563438a9b50969aabf36e5427a54297e18e75fb675a45c0 ; 24 | file:aftermath_fade.bik sha256sum:27f175c0f15249ed16863cdca486d881f42cb0d6bfbea5829dbb8fc3caea0463 ; 25 | file:aftermath_load.bik sha256sum:a11a0752df2d6cd75f41a18e9bb49f39e0c7bb2e72d99fde64a3ca6e3d84329f ; 26 | file:airlift.ff sha256sum:84dfac62bcc0d71a4d4238f3108913d30ed589004927af73558a095c3d9d335c ; 27 | file:airlift_fade.bik sha256sum:3166c296cc31992a8fe1b14ee5284487cf75354da8133b42f39bbcd7dbf7b8c0 ; 28 | file:airlift_load.bik sha256sum:4598e2c095e7aeba1a81b9683dc588af0732fb76f7a40d7238f6da31080f0ffa ; 29 | file:airplane.ff sha256sum:4a0b75d76648115396dd7836d208e29225595931a9c002219d29146d608ef526 ; 30 | file:airplane_load.bik sha256sum:f7ed05ac206e632ca3ef864f2cadc45bffe3d5c8c86cceb9b74149aa46bb306b ; 31 | file:ambush.ff sha256sum:fc662a6039e3a4fd5775079beb46b39e7dbe5df66f14434391a792f63f193b4c ; 32 | file:ambush_fade.bik sha256sum:47dc9da5626473ad175a7c0a3c4db2e9fadaece7e6533cefe1831aa0eb19454b ; 33 | file:ambush_load.bik sha256sum:1b9193af9c1c27faf57e789e702373e12214770d40b06b2c533d14da5843811f ; 34 | file:armada.ff sha256sum:26d06f0952b9adf98c0b53f0a69e60394fa6d82bfd81a73dac4787bc9640e3d3 ; 35 | file:armada_fade.bik sha256sum:287e3d4c456e3d6c5a77cf4d390c92eb2fe351eecc6aa546fc1fdaa85dd3e072 ; 36 | file:armada_load.bik sha256sum:f860b5cceb757573f1b273771e0357b0f677e7d4504d0b78aa496f386ce897aa ; 37 | file:atvi.bik sha256sum:e42f0704790588a8c8e56873604445a54d590f0fa0599953ca044a678418ef12 ; 38 | file:black.bik sha256sum:dea4aa34d6930d34448a292a7a80279932e0e10a7e3840a8cff62322ad3083e7 ; 39 | file:blackout.ff sha256sum:0f5bf0bc219698e8a2ff92fa947be9498c1b6396341e43f160f8764f99512df8 ; 40 | file:blackout_fade.bik sha256sum:090e0ed1718dfabbb85ee7c82e7f259a2eae04a826a0b7bfb8e9536fe7f9d175 ; 41 | file:blackout_load.bik sha256sum:a8cc48c2ded73254c2c02e6395da0e3ba4cfb4e93ec9dda9f8e61834b6d33bcc ; 42 | file:bog_a.ff sha256sum:7450147b17b6844c03aa565cdee5a423aff4d5d79ce7af02b2a1d9046636e263 ; 43 | file:bog_a_fade.bik sha256sum:cd7b556b136dfed69af4eb1a487d4e853ea58931934a8e6593a8436f92cb9aca ; 44 | file:bog_a_load.bik sha256sum:502b5791ae2e6b2a93670f6d6c89264a867c1389c6b00e75a27af1b6713f56fb ; 45 | file:bog_b.ff sha256sum:a17621d43b86eca537a7146c2a62a21de85c9f236910ed98a5431562aafc663f ; 46 | file:bog_b_fade.bik sha256sum:12cfd50a945ddd7c29d9d4640136cc81d8eaf6da0d1eb1ddfc016f76642ff5e0 ; 47 | file:bog_b_load.bik sha256sum:d3ea59b8a139976fe0727acf4c9b08ac5fce4cc46eac1f0dd989595e23edbca4 ; 48 | file:cargoship.ff sha256sum:2533ef3c91e08aad4f5e60b94985998e7317784996c4c1e29b3ed9fff2367e9a ; 49 | file:cargoship_fade.bik sha256sum:3d68fa64d1f8a0365805814a97aa4231f1accc3132b5fb556c0661edeb6eb84b ; 50 | file:cargoship_load.bik sha256sum:f2804f76e2b20a893c70bd78459a4b2420721d9a8a306c3c7233c0d9312796b3 ; 51 | file:code_post_gfx.ff sha256sum:4462526e8e62ad894d1d8eccb3c1d123820e405ef799b1bc459dff4754e6f33a ; 52 | file:code_post_gfx_mp.ff sha256sum:d7947d5b5ccfa3e1b1815d919feb979e66fddb81acecd5ae2fb034130220aa6e ; 53 | file:cod_intro.bik sha256sum:f3b1a5b7fbf4430173041a800ca9cdc37e2e00e0ea8f7f8ad908cf385fea1e1a ; 54 | file:common.ff sha256sum:e2315527f99e00783627051d3b54cd1113aefbb30e2632e8920d344fbadd8fa2 ; 55 | file:common_mp.ff sha256sum:6b6e5f7c9cf47acffa6a070b06109a422cbfe7b0f19b1c9fb1524fa1ebff8322 ; 56 | file:coup.ff sha256sum:e747ee5c96bda66809fcd65c0489458697a64d91e71064335415a0d476c4aea5 ; 57 | file:coup_fade.bik sha256sum:fa400809d3888cf9d180e6e94452e97cc293e9a646d4465325bbc8053eea58bb ; 58 | file:coup_load.bik sha256sum:c3304cb3c2e5fd3b0468a22444349b03e7f2bb27113dfc7df10b9da48f57fc4d ; 59 | file:credits_load.bik sha256sum:e6a8b0c29cd2327bbd1e30a59ea31ce310b7eae44a3d5712d533156fb894a431 ; 60 | file:default.bik sha256sum:f3f1f00e1f04cf7da997d656e4ebd454de0cbb05e944e1e7b68af33a64d5f26a ; 61 | file:EBOOT.BIN sha256sum:89d08238243f68f5b2d146cdd3ce622a71b26e9b7b071581ba48944e3fa20c35 ; 62 | file:EBOOT_MP.BIN sha256sum:ebaf2b7387f15b0f5743948c7c29d944d295bedc508812fab15f5a1a648bb568 ; 63 | file:EBOOT_SP.BIN sha256sum:6b1a3ec2c0fad4f585c01a3ae55bd1e4109ed9801d64f76f1fe111504271dd71 ; 64 | file:hunted.ff sha256sum:10f75527e3d1c4df9d36a24f044b3e973e1b126aa45ad89e41bfcbf16f68681b ; 65 | file:hunted_fade.bik sha256sum:336d607d07b5d215f8297ffe4db844c32ae1b68e360450a46d7032d6cea899be ; 66 | file:hunted_load.bik sha256sum:decc313b9ebb9cf170049ac2c447fdcb40300d7b2f738bac9cb627c768b163ac ; 67 | file:icbm.ff sha256sum:13f116b6e0fd3c9037a6affcc7143103d9ef19e164e57d26f408a2d2da66b82b ; 68 | file:icbm_fade.bik sha256sum:4c720bda2a55068511cae632eb17e9804b3b0e2f6f9cdf442e105d52b2141b72 ; 69 | file:icbm_load.bik sha256sum:135a6be6dd56599d561f3b02fb17735731a384214053046c32cd67fc684b4ead ; 70 | file:ICON0.PNG sha256sum:7e785be38b38a300842f6f530eccc6d85b29ce8b4e7881b627569a295187707e ; 71 | file:IW_logo.bik sha256sum:125033bc6e7e779ac1b462e7c01dc666c1e5e469339f5f4ee45cbdd85b1321f7 ; 72 | file:jeepride.ff sha256sum:610798f39a396cb63121119c83e229f625d4daeb611f48f670d174d58b4e1ef2 ; 73 | file:jeepride_fade.bik sha256sum:482f1a8e2ed5a3fc778cd57cee84868135518d8caedb88b7e41e9533626e83e7 ; 74 | file:jeepride_load.bik sha256sum:63a9922c7207fc6329aadad6a4531e54ec3114dfa3726cb91dd73202f075e5b6 ; 75 | file:killhouse.ff sha256sum:6ec77c7aaca3f6c3c6278786be8d75e298bc63986ffe02165b72de733bda0248 ; 76 | file:killhouse_fade.bik sha256sum:08164b11a146680dfccec4c7b9d78d2c19c551ebfd9faf49f177fbe9d8a84314 ; 77 | file:killhouse_load.bik sha256sum:4aaae58d499450c61c3ec2b001da0eb14b67c7c7ae7b4aadd2443202f9593547 ; 78 | file:launchfacility_a.ff sha256sum:9065d9f5ae42bad07b6e0a27d6d4b55d747d29edcc6c540523b95c4f9c9c22a5 ; 79 | file:launchfacility_a_fade.bik sha256sum:742bf346b5875aeb797edce67b07919b64ad2bdc003933554ebd6d1afb595a2b ; 80 | file:launchfacility_a_load.bik sha256sum:34288b82f3c53d4df232112e43c2b9e2a6ae83b5add6815b9147f584fcfb5ad5 ; 81 | file:launchfacility_b.ff sha256sum:170e5b9f10dcd85046439abd40ead234b691ce22bfbafe268003f4b92759fc10 ; 82 | file:launchfacility_b_fade.bik sha256sum:ea07d764ec9c3055aa732908c04de099162c3834c141bb85a2e0dfcec5b3881c ; 83 | file:launchfacility_b_load.bik sha256sum:bc2464022aa808a518259e310da82c457d2bfce5d258a2957ce1450eaaa0ebbf ; 84 | file:legal.bik sha256sum:ae8f384f164065fa6c67ae4ed88b058589d3fcadec13f0b4a21290610e7582e8 ; 85 | file:mp_backlot.ff sha256sum:d5fcd9a3a539168d0365e0e128742b660ace4008233215eca2b5732b9de995e7 ; 86 | file:mp_backlot_load.ff sha256sum:2fade9d9c29d9f723e97ce965f19adee7c4bd739e51df6bee8797403379800c6 ; 87 | file:mp_bloc.ff sha256sum:7797643df54897847b2f54ade0b3c4d423e97284cb49d266abd3ffc5667a0889 ; 88 | file:mp_bloc_load.ff sha256sum:050187558c8d5d1dc528c3e0904b98261aca00fd6dd8fedca84f3ae893b6e285 ; 89 | file:mp_bog.ff sha256sum:db9abc263197efb984e0bdb6b7b49ea4bb6d43ecee96d02a96a71b4ad1c9e90b ; 90 | file:mp_bog_load.ff sha256sum:8fb4973e3932c14340c92f0499ecb5f21e9dc0aa7aa7e946b862e70bdd012c16 ; 91 | file:mp_cargoship.ff sha256sum:e676dadc9d0ea8eda0babbf3b3ba8e5e260dbe070b426561a5c547a8724387ec ; 92 | file:mp_cargoship_load.ff sha256sum:dd338d13c45e0b451d00b15f581e7991b2aad3cb7a2ba43c0ec8416c994e1174 ; 93 | file:mp_citystreets.ff sha256sum:ad32ddfe3efe18597b05cecff127825886e93aea0155d7058904a6f602043704 ; 94 | file:mp_citystreets_load.ff sha256sum:7890afc8b8a00388d3ada3764a44adf4b81f4e521194f56561a7767fd33b6411 ; 95 | file:mp_convoy.ff sha256sum:1a25e7783c5dbd463dc82025ce242eb11dff1ead3eb1a98fb9320c3c5422e52f ; 96 | file:mp_convoy_load.ff sha256sum:16e5ed7b083c0b1a391a1a128a94db6a11aa6741035f28c32936562ef965f19e ; 97 | file:mp_countdown.ff sha256sum:73c86fa69817cbee6e7e5cb4bc211a5c74141514f0941c353928ef0de92d7299 ; 98 | file:mp_countdown_load.ff sha256sum:b5466da474bf1be366188180a86b44cbf17c4635ef7f0f440ce17826b1b380a0 ; 99 | file:mp_crash.ff sha256sum:8debe8079cfbe490523d10b7cf28fe4407d417d9e36166506461ab750d0ffb4d ; 100 | file:mp_crash_load.ff sha256sum:241ad7cecf1ffcbd4e84a6f3a73b0135b95a1ba0d201e913cff222847dcf3c63 ; 101 | file:mp_crossfire.ff sha256sum:7ba25a24d730864c1e26fbd0eca1c839ac73b99649f00edd1f951ee229057e87 ; 102 | file:mp_crossfire_load.ff sha256sum:4efcce296a13d189fcabffbe407b4e06285fb010727d26fbd1cf1902ff985ecb ; 103 | file:mp_farm.ff sha256sum:5a3b7f3f958af033a74976e5209532a6aa8e16e3a683bca65b4d20120696a948 ; 104 | file:mp_farm_load.ff sha256sum:88328f823c649ebb2a65a22aca63ee99ef196d8482f0cb2377db7780872fed7f ; 105 | file:mp_overgrown.ff sha256sum:7e9a6ee0a1d33d2803dc908cf8a170f48bd027c3028af372c4c73041c227a7f5 ; 106 | file:mp_overgrown_load.ff sha256sum:b405b27a4d5a6d4e304aabdbaeee09b49401376a07d3c15192bc771d6c505021 ; 107 | file:mp_pipeline.ff sha256sum:43c96756b60fb1ea637ae3964218191627bcfe382b30237ab12acbff3da503f8 ; 108 | file:mp_pipeline_load.ff sha256sum:9740e36ea867dded24bf19a12a2de161018318a049e7cecf304a747c519f8351 ; 109 | file:mp_shipment.ff sha256sum:dcafb32480482ff81a9e2cbe8c37f7baa4baa1f331d063c1cfaad5d92612df98 ; 110 | file:mp_shipment_load.ff sha256sum:54d2abb742bfd29a88bfb9460c4841c7c57111b5f2113467c330a432341989c4 ; 111 | file:mp_showdown.ff sha256sum:d215c0c45a4a17d4f451b646b8d758c08c04cfb3e9c11b2a4fe102f203bfb02b ; 112 | file:mp_showdown_load.ff sha256sum:576dd4d29611303f0473e18f007ab2f7cec1d58f6cbeff4e7b89a76b7a1c24f7 ; 113 | file:mp_strike.ff sha256sum:a2a9368012f9fe7da7b031d6a07cc941a065806813f3c80a15a10a42cdb12577 ; 114 | file:mp_strike_load.ff sha256sum:d51eedc52a64a87231e23d643a37f1a0da6239bf0699eb9dd55d34a054c478fc ; 115 | file:mp_vacant.ff sha256sum:bd06d200a0a7891241d4608223902d88d908768095a3e36b2f7a43867712c431 ; 116 | file:mp_vacant_load.ff sha256sum:669d6e3a06bd45819753d4a99955f48bf3b3ee5b180f9b206976ac1e8a4908de ; 117 | file:packfile1.pak sha256sum:36cf1a37fbafc120d7ac78308114fe8fa304037d66f8aca15dda7d554309677e ; 118 | file:scoutsniper.ff sha256sum:e94eb9d3eeeed705c2ffb8e1b3e814b21711c635958714279810ee02c62fee8e ; 119 | file:scoutsniper_fade.bik sha256sum:9e029aa5e14ee700fa68ca131ff924025421705e1a043092015dc05da6eccc83 ; 120 | file:scoutsniper_load.bik sha256sum:e138cbdec7cd907d6f82b9b1cc5df8c331775d652e2188b51e573481e23a86fb ; 121 | file:sha256sum.txt sha256sum:3d7d5b1bfa3bac5f5f593b24dfb318042e54c3d0be6b7e445d2789fd995e830f ; 122 | file:simplecredits.ff sha256sum:e77583a42507ef58626c03509fd70beeec2d8ccbf150d92820ebe1669653034e ; 123 | file:simplecredits_load.bik sha256sum:e6a8b0c29cd2327bbd1e30a59ea31ce310b7eae44a3d5712d533156fb894a431 ; 124 | file:sniperescape.ff sha256sum:d552b0b49eaf436c73f675d793adbbabd1c5df1e1176d886dfa7081e9bbc85ae ; 125 | file:sniperescape_load.bik sha256sum:15426ac821c124cc894e907d19fff076afd47abc7a2e0925899156938af0384c ; 126 | file:ui.ff sha256sum:1e9697a85441d03bb25d1c3bfc1bc979fe41f5ee1fd070d2e98223ae3691dda8 ; 127 | file:ui_mp.ff sha256sum:eb306a97c72de25629b35409f3c1e009975b296f4e413a6ed080f9439489c610 ; 128 | file:village_assault.ff sha256sum:830a34ac7115480e23aafcc356c0b40b6509673a7f93f408bba7306090b0e018 ; 129 | file:village_assault_fade.bik sha256sum:fe9bbba0f3d937552bf82221e703ff248b63845135c7272c1d449e0ae722d80e ; 130 | file:village_assault_load.bik sha256sum:2b0964d6383bce39cc3ae151861fdbd1fd657798656e0c8e04670124dce386a0 ; 131 | file:village_defend.ff sha256sum:71d69d9f741cf32ddc25638518a31e3d593b0f1aa0915719dcf9bb29d9a3a2e4 ; 132 | file:village_defend_fade.bik sha256sum:f08bda6a3fba03b0be348ba04e5043ac0b4e603be18f4101402fe450e60b62d3 ; 133 | file:village_defend_load.bik sha256sum:45ff37eb4b07a83d5fadf5eb6a8d1c9d80db225f28f20dcc8a062b909959fd6a ; 134 | file:zakhaev_escape.bik sha256sum:73f3dba648ac2bbc3212b5a9aa877e4e9aadebaf64f80b0d319b54bc16b49c0b ; 135 | . 136 | 137 | _:dir__patcher 138 | file:i_button_icons_ps2.fnt sha256sum:d9c5ee00b87e668f3296a07d2fbb8f166168a7cabce401429e1ef56d67a3de66 ; 139 | file:i_button_icons_ps2.tex sha256sum:9f7fcc7d4c6b2d61a29de91d039baa8e144b3dd72e6f9999436f61e99b19995a ; 140 | file:i_helvetica_bold.fnt sha256sum:357c4f471956d147c0c451f65e35ce668322fbec9acfa2e29513a16298b85d42 ; 141 | file:i_helvetica_bold.tex sha256sum:a6cbb38b0d6964c80e395141bed8492845fe5334a6822c74fe92ef9e4e446a90 ; 142 | . -------------------------------------------------------------------------------- /examples/experiment-0.ttl: -------------------------------------------------------------------------------- 1 | @prefix : . 2 | @prefix qudt: . 3 | @prefix rdf: . 4 | @prefix skos: . 5 | @prefix xsd: . 6 | @prefix chebi: . 7 | 8 | :Experiment1 9 | a :Experiment ; 10 | :hasHypothesis _:Hypothesis1 ; 11 | :hasObservation _:Observation1, _:Observation2, _:Observation3 . 12 | 13 | :Sample1 14 | a :Sample ; 15 | :composedOf [ 16 | a :CompositionPart ; 17 | :substance chebi:CHEBI_16240 ; # Hydrogen Peroxide (H202) 18 | :percent "100"^^xsd:integer ; # 100% 19 | ] . 20 | 21 | _:Hypothesis1 22 | a :Hypothesis ; 23 | skos:definition "If the temperature is increased while the pressure is held constant, the rate of decomposition of 100% Hydrogen Peroxide (H2O2) will increase."@en . 24 | 25 | _:Observation1 26 | a :Observation ; 27 | :onSample :Sample1 ; 28 | :hasAttribute [ 29 | a :Attribute ; 30 | :for :Temperature ; 31 | :value "40"^^xsd:integer ; 32 | qudt:unit qudt:DegreeCelsius ; 33 | ], 34 | [ 35 | a :Attribute ; 36 | :for :Pressure ; 37 | :value "100000"^^xsd:integer ; 38 | qudt:unit qudt:Pascal ; 39 | ], 40 | [ 41 | a :Attribute ; 42 | :for :DecompositionRate ; 43 | :value "0.01"^^xsd:decimal ; 44 | qudt:unit qudt:SecondTimeInverse ; 45 | ] . 46 | 47 | _:Observation2 48 | a :Observation ; 49 | :onSample :Sample1 ; 50 | :hasAttribute [ 51 | a :Attribute ; 52 | :for :Temperature ; 53 | :value "60"^^xsd:integer ; 54 | qudt:unit qudt:DegreeCelsius ; 55 | ], 56 | [ 57 | a :Attribute ; 58 | :for :Pressure ; 59 | :value "100000"^^xsd:integer ; 60 | qudt:unit qudt:Pascal ; 61 | ], 62 | [ 63 | a :Attribute ; 64 | :for :DecompositionRate ; 65 | :value "0.02"^^xsd:decimal ; 66 | qudt:unit qudt:SecondTimeInverse ; 67 | ] . 68 | 69 | _:Observation3 70 | a :Observation ; 71 | :onSample :Sample1 ; 72 | :hasAttribute [ 73 | a :Attribute ; 74 | :for :Temperature ; 75 | :value "100"^^xsd:integer ; 76 | qudt:unit qudt:DegreeCelsius ; 77 | ], 78 | [ 79 | a :Attribute ; 80 | :for :Pressure ; 81 | :value "100000"^^xsd:integer ; 82 | qudt:unit qudt:Pascal ; 83 | ], 84 | [ 85 | a :Attribute ; 86 | :for :DecompositionRate ; 87 | :value "0.03"^^xsd:decimal ; 88 | qudt:unit qudt:SecondTimeInverse ; 89 | ] . 90 | -------------------------------------------------------------------------------- /examples/hashed/dir-0__sha256.ttl: -------------------------------------------------------------------------------- 1 | @prefix dir: . 2 | @prefix file: . 3 | @prefix sha256sum: . 4 | 5 | dir:PS3_GAME ; 6 | file:PS3_GAME.SFB sha256sum:aee34fae6fe2b2996f567899ba6dafb76ab5023b751db7a9f8566b916e182ac8 . 7 | 8 | dir:patcher ; 9 | file:EBOOT.BIN sha256sum:89d08238243f68f5b2d146cdd3ce622a71b26e9b7b071581ba48944e3fa20c35 ; 10 | file:EBOOT_MP.BIN sha256sum:ebaf2b7387f15b0f5743948c7c29d944d295bedc508812fab15f5a1a648bb568 ; 11 | file:EBOOT_SP.BIN sha256sum:6b1a3ec2c0fad4f585c01a3ae55bd1e4109ed9801d64f76f1fe111504271dd71 ; 12 | file:ICON0.PNG sha256sum:7e785be38b38a300842f6f530eccc6d85b29ce8b4e7881b627569a295187707e ; 13 | file:IW_logo.bik sha256sum:125033bc6e7e779ac1b462e7c01dc666c1e5e469339f5f4ee45cbdd85b1321f7 ; 14 | file:ac130.ff sha256sum:0d445d4b51118c8bdf2ce6703e83d006176f57fc9097fd603de0eb08f217eb2b ; 15 | file:ac130_fade.bik sha256sum:78282c38e33e06ccfd1b1021f8aed63cb4d6046ba6133ba8a5377faf219a2e32 ; 16 | file:ac130_load.bik sha256sum:cdccbf3c477cd113bbe8e373970a9913986cafec40f04fba97d8de0ca24d2623 ; 17 | file:aftermath.ff sha256sum:79ac8b5f7c21abc97563438a9b50969aabf36e5427a54297e18e75fb675a45c0 ; 18 | file:aftermath_fade.bik sha256sum:27f175c0f15249ed16863cdca486d881f42cb0d6bfbea5829dbb8fc3caea0463 ; 19 | file:aftermath_load.bik sha256sum:a11a0752df2d6cd75f41a18e9bb49f39e0c7bb2e72d99fde64a3ca6e3d84329f ; 20 | file:airlift.ff sha256sum:84dfac62bcc0d71a4d4238f3108913d30ed589004927af73558a095c3d9d335c ; 21 | file:airlift_fade.bik sha256sum:3166c296cc31992a8fe1b14ee5284487cf75354da8133b42f39bbcd7dbf7b8c0 ; 22 | file:airlift_load.bik sha256sum:4598e2c095e7aeba1a81b9683dc588af0732fb76f7a40d7238f6da31080f0ffa ; 23 | file:airplane.ff sha256sum:4a0b75d76648115396dd7836d208e29225595931a9c002219d29146d608ef526 ; 24 | file:airplane_load.bik sha256sum:f7ed05ac206e632ca3ef864f2cadc45bffe3d5c8c86cceb9b74149aa46bb306b ; 25 | file:ambush.ff sha256sum:fc662a6039e3a4fd5775079beb46b39e7dbe5df66f14434391a792f63f193b4c ; 26 | file:ambush_fade.bik sha256sum:47dc9da5626473ad175a7c0a3c4db2e9fadaece7e6533cefe1831aa0eb19454b ; 27 | file:ambush_load.bik sha256sum:1b9193af9c1c27faf57e789e702373e12214770d40b06b2c533d14da5843811f ; 28 | file:armada.ff sha256sum:26d06f0952b9adf98c0b53f0a69e60394fa6d82bfd81a73dac4787bc9640e3d3 ; 29 | file:armada_fade.bik sha256sum:287e3d4c456e3d6c5a77cf4d390c92eb2fe351eecc6aa546fc1fdaa85dd3e072 ; 30 | file:armada_load.bik sha256sum:f860b5cceb757573f1b273771e0357b0f677e7d4504d0b78aa496f386ce897aa ; 31 | file:atvi.bik sha256sum:e42f0704790588a8c8e56873604445a54d590f0fa0599953ca044a678418ef12 ; 32 | file:black.bik sha256sum:dea4aa34d6930d34448a292a7a80279932e0e10a7e3840a8cff62322ad3083e7 ; 33 | file:blackout.ff sha256sum:0f5bf0bc219698e8a2ff92fa947be9498c1b6396341e43f160f8764f99512df8 ; 34 | file:blackout_fade.bik sha256sum:090e0ed1718dfabbb85ee7c82e7f259a2eae04a826a0b7bfb8e9536fe7f9d175 ; 35 | file:blackout_load.bik sha256sum:a8cc48c2ded73254c2c02e6395da0e3ba4cfb4e93ec9dda9f8e61834b6d33bcc ; 36 | file:bog_a.ff sha256sum:7450147b17b6844c03aa565cdee5a423aff4d5d79ce7af02b2a1d9046636e263 ; 37 | file:bog_a_fade.bik sha256sum:cd7b556b136dfed69af4eb1a487d4e853ea58931934a8e6593a8436f92cb9aca ; 38 | file:bog_a_load.bik sha256sum:502b5791ae2e6b2a93670f6d6c89264a867c1389c6b00e75a27af1b6713f56fb ; 39 | file:bog_b.ff sha256sum:a17621d43b86eca537a7146c2a62a21de85c9f236910ed98a5431562aafc663f ; 40 | file:bog_b_fade.bik sha256sum:12cfd50a945ddd7c29d9d4640136cc81d8eaf6da0d1eb1ddfc016f76642ff5e0 ; 41 | file:bog_b_load.bik sha256sum:d3ea59b8a139976fe0727acf4c9b08ac5fce4cc46eac1f0dd989595e23edbca4 ; 42 | file:cargoship.ff sha256sum:2533ef3c91e08aad4f5e60b94985998e7317784996c4c1e29b3ed9fff2367e9a ; 43 | file:cargoship_fade.bik sha256sum:3d68fa64d1f8a0365805814a97aa4231f1accc3132b5fb556c0661edeb6eb84b ; 44 | file:cargoship_load.bik sha256sum:f2804f76e2b20a893c70bd78459a4b2420721d9a8a306c3c7233c0d9312796b3 ; 45 | file:cod_intro.bik sha256sum:f3b1a5b7fbf4430173041a800ca9cdc37e2e00e0ea8f7f8ad908cf385fea1e1a ; 46 | file:code_post_gfx.ff sha256sum:4462526e8e62ad894d1d8eccb3c1d123820e405ef799b1bc459dff4754e6f33a ; 47 | file:code_post_gfx_mp.ff sha256sum:d7947d5b5ccfa3e1b1815d919feb979e66fddb81acecd5ae2fb034130220aa6e ; 48 | file:common.ff sha256sum:e2315527f99e00783627051d3b54cd1113aefbb30e2632e8920d344fbadd8fa2 ; 49 | file:common_mp.ff sha256sum:6b6e5f7c9cf47acffa6a070b06109a422cbfe7b0f19b1c9fb1524fa1ebff8322 ; 50 | file:coup.ff sha256sum:e747ee5c96bda66809fcd65c0489458697a64d91e71064335415a0d476c4aea5 ; 51 | file:coup_fade.bik sha256sum:fa400809d3888cf9d180e6e94452e97cc293e9a646d4465325bbc8053eea58bb ; 52 | file:coup_load.bik sha256sum:c3304cb3c2e5fd3b0468a22444349b03e7f2bb27113dfc7df10b9da48f57fc4d ; 53 | file:credits_load.bik sha256sum:e6a8b0c29cd2327bbd1e30a59ea31ce310b7eae44a3d5712d533156fb894a431 ; 54 | file:default.bik sha256sum:f3f1f00e1f04cf7da997d656e4ebd454de0cbb05e944e1e7b68af33a64d5f26a ; 55 | file:hunted.ff sha256sum:10f75527e3d1c4df9d36a24f044b3e973e1b126aa45ad89e41bfcbf16f68681b ; 56 | file:hunted_fade.bik sha256sum:336d607d07b5d215f8297ffe4db844c32ae1b68e360450a46d7032d6cea899be ; 57 | file:hunted_load.bik sha256sum:decc313b9ebb9cf170049ac2c447fdcb40300d7b2f738bac9cb627c768b163ac ; 58 | file:icbm.ff sha256sum:13f116b6e0fd3c9037a6affcc7143103d9ef19e164e57d26f408a2d2da66b82b ; 59 | file:icbm_fade.bik sha256sum:4c720bda2a55068511cae632eb17e9804b3b0e2f6f9cdf442e105d52b2141b72 ; 60 | file:icbm_load.bik sha256sum:135a6be6dd56599d561f3b02fb17735731a384214053046c32cd67fc684b4ead ; 61 | file:jeepride.ff sha256sum:610798f39a396cb63121119c83e229f625d4daeb611f48f670d174d58b4e1ef2 ; 62 | file:jeepride_fade.bik sha256sum:482f1a8e2ed5a3fc778cd57cee84868135518d8caedb88b7e41e9533626e83e7 ; 63 | file:jeepride_load.bik sha256sum:63a9922c7207fc6329aadad6a4531e54ec3114dfa3726cb91dd73202f075e5b6 ; 64 | file:killhouse.ff sha256sum:6ec77c7aaca3f6c3c6278786be8d75e298bc63986ffe02165b72de733bda0248 ; 65 | file:killhouse_fade.bik sha256sum:08164b11a146680dfccec4c7b9d78d2c19c551ebfd9faf49f177fbe9d8a84314 ; 66 | file:killhouse_load.bik sha256sum:4aaae58d499450c61c3ec2b001da0eb14b67c7c7ae7b4aadd2443202f9593547 ; 67 | file:launchfacility_a.ff sha256sum:9065d9f5ae42bad07b6e0a27d6d4b55d747d29edcc6c540523b95c4f9c9c22a5 ; 68 | file:launchfacility_a_fade.bik sha256sum:742bf346b5875aeb797edce67b07919b64ad2bdc003933554ebd6d1afb595a2b ; 69 | file:launchfacility_a_load.bik sha256sum:34288b82f3c53d4df232112e43c2b9e2a6ae83b5add6815b9147f584fcfb5ad5 ; 70 | file:launchfacility_b.ff sha256sum:170e5b9f10dcd85046439abd40ead234b691ce22bfbafe268003f4b92759fc10 ; 71 | file:launchfacility_b_fade.bik sha256sum:ea07d764ec9c3055aa732908c04de099162c3834c141bb85a2e0dfcec5b3881c ; 72 | file:launchfacility_b_load.bik sha256sum:bc2464022aa808a518259e310da82c457d2bfce5d258a2957ce1450eaaa0ebbf ; 73 | file:legal.bik sha256sum:ae8f384f164065fa6c67ae4ed88b058589d3fcadec13f0b4a21290610e7582e8 ; 74 | file:mp_backlot.ff sha256sum:d5fcd9a3a539168d0365e0e128742b660ace4008233215eca2b5732b9de995e7 ; 75 | file:mp_backlot_load.ff sha256sum:2fade9d9c29d9f723e97ce965f19adee7c4bd739e51df6bee8797403379800c6 ; 76 | file:mp_bloc.ff sha256sum:7797643df54897847b2f54ade0b3c4d423e97284cb49d266abd3ffc5667a0889 ; 77 | file:mp_bloc_load.ff sha256sum:050187558c8d5d1dc528c3e0904b98261aca00fd6dd8fedca84f3ae893b6e285 ; 78 | file:mp_bog.ff sha256sum:db9abc263197efb984e0bdb6b7b49ea4bb6d43ecee96d02a96a71b4ad1c9e90b ; 79 | file:mp_bog_load.ff sha256sum:8fb4973e3932c14340c92f0499ecb5f21e9dc0aa7aa7e946b862e70bdd012c16 ; 80 | file:mp_cargoship.ff sha256sum:e676dadc9d0ea8eda0babbf3b3ba8e5e260dbe070b426561a5c547a8724387ec ; 81 | file:mp_cargoship_load.ff sha256sum:dd338d13c45e0b451d00b15f581e7991b2aad3cb7a2ba43c0ec8416c994e1174 ; 82 | file:mp_citystreets.ff sha256sum:ad32ddfe3efe18597b05cecff127825886e93aea0155d7058904a6f602043704 ; 83 | file:mp_citystreets_load.ff sha256sum:7890afc8b8a00388d3ada3764a44adf4b81f4e521194f56561a7767fd33b6411 ; 84 | file:mp_convoy.ff sha256sum:1a25e7783c5dbd463dc82025ce242eb11dff1ead3eb1a98fb9320c3c5422e52f ; 85 | file:mp_convoy_load.ff sha256sum:16e5ed7b083c0b1a391a1a128a94db6a11aa6741035f28c32936562ef965f19e ; 86 | file:mp_countdown.ff sha256sum:73c86fa69817cbee6e7e5cb4bc211a5c74141514f0941c353928ef0de92d7299 ; 87 | file:mp_countdown_load.ff sha256sum:b5466da474bf1be366188180a86b44cbf17c4635ef7f0f440ce17826b1b380a0 ; 88 | file:mp_crash.ff sha256sum:8debe8079cfbe490523d10b7cf28fe4407d417d9e36166506461ab750d0ffb4d ; 89 | file:mp_crash_load.ff sha256sum:241ad7cecf1ffcbd4e84a6f3a73b0135b95a1ba0d201e913cff222847dcf3c63 ; 90 | file:mp_crossfire.ff sha256sum:7ba25a24d730864c1e26fbd0eca1c839ac73b99649f00edd1f951ee229057e87 ; 91 | file:mp_crossfire_load.ff sha256sum:4efcce296a13d189fcabffbe407b4e06285fb010727d26fbd1cf1902ff985ecb ; 92 | file:mp_farm.ff sha256sum:5a3b7f3f958af033a74976e5209532a6aa8e16e3a683bca65b4d20120696a948 ; 93 | file:mp_farm_load.ff sha256sum:88328f823c649ebb2a65a22aca63ee99ef196d8482f0cb2377db7780872fed7f ; 94 | file:mp_overgrown.ff sha256sum:7e9a6ee0a1d33d2803dc908cf8a170f48bd027c3028af372c4c73041c227a7f5 ; 95 | file:mp_overgrown_load.ff sha256sum:b405b27a4d5a6d4e304aabdbaeee09b49401376a07d3c15192bc771d6c505021 ; 96 | file:mp_pipeline.ff sha256sum:43c96756b60fb1ea637ae3964218191627bcfe382b30237ab12acbff3da503f8 ; 97 | file:mp_pipeline_load.ff sha256sum:9740e36ea867dded24bf19a12a2de161018318a049e7cecf304a747c519f8351 ; 98 | file:mp_shipment.ff sha256sum:dcafb32480482ff81a9e2cbe8c37f7baa4baa1f331d063c1cfaad5d92612df98 ; 99 | file:mp_shipment_load.ff sha256sum:54d2abb742bfd29a88bfb9460c4841c7c57111b5f2113467c330a432341989c4 ; 100 | file:mp_showdown.ff sha256sum:d215c0c45a4a17d4f451b646b8d758c08c04cfb3e9c11b2a4fe102f203bfb02b ; 101 | file:mp_showdown_load.ff sha256sum:576dd4d29611303f0473e18f007ab2f7cec1d58f6cbeff4e7b89a76b7a1c24f7 ; 102 | file:mp_strike.ff sha256sum:a2a9368012f9fe7da7b031d6a07cc941a065806813f3c80a15a10a42cdb12577 ; 103 | file:mp_strike_load.ff sha256sum:d51eedc52a64a87231e23d643a37f1a0da6239bf0699eb9dd55d34a054c478fc ; 104 | file:mp_vacant.ff sha256sum:bd06d200a0a7891241d4608223902d88d908768095a3e36b2f7a43867712c431 ; 105 | file:mp_vacant_load.ff sha256sum:669d6e3a06bd45819753d4a99955f48bf3b3ee5b180f9b206976ac1e8a4908de ; 106 | file:packfile1.pak sha256sum:36cf1a37fbafc120d7ac78308114fe8fa304037d66f8aca15dda7d554309677e ; 107 | file:scoutsniper.ff sha256sum:e94eb9d3eeeed705c2ffb8e1b3e814b21711c635958714279810ee02c62fee8e ; 108 | file:scoutsniper_fade.bik sha256sum:9e029aa5e14ee700fa68ca131ff924025421705e1a043092015dc05da6eccc83 ; 109 | file:scoutsniper_load.bik sha256sum:e138cbdec7cd907d6f82b9b1cc5df8c331775d652e2188b51e573481e23a86fb ; 110 | file:sha256sum.txt sha256sum:3d7d5b1bfa3bac5f5f593b24dfb318042e54c3d0be6b7e445d2789fd995e830f ; 111 | file:simplecredits.ff sha256sum:e77583a42507ef58626c03509fd70beeec2d8ccbf150d92820ebe1669653034e ; 112 | file:simplecredits_load.bik sha256sum:e6a8b0c29cd2327bbd1e30a59ea31ce310b7eae44a3d5712d533156fb894a431 ; 113 | file:sniperescape.ff sha256sum:d552b0b49eaf436c73f675d793adbbabd1c5df1e1176d886dfa7081e9bbc85ae ; 114 | file:sniperescape_load.bik sha256sum:15426ac821c124cc894e907d19fff076afd47abc7a2e0925899156938af0384c ; 115 | file:ui.ff sha256sum:1e9697a85441d03bb25d1c3bfc1bc979fe41f5ee1fd070d2e98223ae3691dda8 ; 116 | file:ui_mp.ff sha256sum:eb306a97c72de25629b35409f3c1e009975b296f4e413a6ed080f9439489c610 ; 117 | file:village_assault.ff sha256sum:830a34ac7115480e23aafcc356c0b40b6509673a7f93f408bba7306090b0e018 ; 118 | file:village_assault_fade.bik sha256sum:fe9bbba0f3d937552bf82221e703ff248b63845135c7272c1d449e0ae722d80e ; 119 | file:village_assault_load.bik sha256sum:2b0964d6383bce39cc3ae151861fdbd1fd657798656e0c8e04670124dce386a0 ; 120 | file:village_defend.ff sha256sum:71d69d9f741cf32ddc25638518a31e3d593b0f1aa0915719dcf9bb29d9a3a2e4 ; 121 | file:village_defend_fade.bik sha256sum:f08bda6a3fba03b0be348ba04e5043ac0b4e603be18f4101402fe450e60b62d3 ; 122 | file:village_defend_load.bik sha256sum:45ff37eb4b07a83d5fadf5eb6a8d1c9d80db225f28f20dcc8a062b909959fd6a ; 123 | file:zakhaev_escape.bik sha256sum:73f3dba648ac2bbc3212b5a9aa877e4e9aadebaf64f80b0d319b54bc16b49c0b . 124 | 125 | dir:USRDIR ; 126 | file:ICON_0.PNG sha256sum:7e785be38b38a300842f6f530eccc6d85b29ce8b4e7881b627569a295187707e ; 127 | file:PARAM.SFO sha256sum:3f407ab8cf4756f7c990906e22f1578cb055ffe97aec6939d49b459346881c8c ; 128 | file:PIC1.PNG sha256sum:cae05f61c8012e8717cf9391c5eadb144942cef17e4661a714b399f51327fc74 ; 129 | file:PS3LOGO.DAT sha256sum:e08430957ac3f4ee719dc2ed04d3443c395dd3b997d8f34f68a1b5c37d92f425 . 130 | 131 | file:i_button_icons_ps2.fnt sha256sum:d9c5ee00b87e668f3296a07d2fbb8f166168a7cabce401429e1ef56d67a3de66 ; 132 | file:i_button_icons_ps2.tex sha256sum:9f7fcc7d4c6b2d61a29de91d039baa8e144b3dd72e6f9999436f61e99b19995a ; 133 | file:i_helvetica_bold.fnt sha256sum:357c4f471956d147c0c451f65e35ce668322fbec9acfa2e29513a16298b85d42 ; 134 | file:i_helvetica_bold.tex sha256sum:a6cbb38b0d6964c80e395141bed8492845fe5334a6822c74fe92ef9e4e446a90 . 135 | 136 | -------------------------------------------------------------------------------- /examples/hashed/experiment-0__sha256.ttl: -------------------------------------------------------------------------------- 1 | @prefix : . 2 | @prefix chebi: . 3 | @prefix qudt: . 4 | @prefix skos: . 5 | @prefix xsd: . 6 | 7 | :Experiment1 a :Experiment ; 8 | :hasHypothesis ; 9 | :hasObservation , 10 | , 11 | . 12 | 13 | a :Observation ; 14 | :hasAttribute , 15 | , 16 | ; 17 | :onSample :Sample1 . 18 | 19 | a :Attribute ; 20 | qudt:unit qudt:DegreeCelsius ; 21 | :for :Temperature ; 22 | :value 60 . 23 | 24 | a :Observation ; 25 | :hasAttribute , 26 | , 27 | ; 28 | :onSample :Sample1 . 29 | 30 | a :Hypothesis ; 31 | skos:definition "If the temperature is increased while the pressure is held constant, the rate of decomposition of 100% Hydrogen Peroxide (H2O2) will increase."@en . 32 | 33 | a :Attribute ; 34 | qudt:unit qudt:DegreeCelsius ; 35 | :for :Temperature ; 36 | :value 100 . 37 | 38 | a :Attribute ; 39 | qudt:unit qudt:DegreeCelsius ; 40 | :for :Temperature ; 41 | :value 40 . 42 | 43 | a :Attribute ; 44 | qudt:unit qudt:SecondTimeInverse ; 45 | :for :DecompositionRate ; 46 | :value 0.01 . 47 | 48 | a :Attribute ; 49 | qudt:unit qudt:SecondTimeInverse ; 50 | :for :DecompositionRate ; 51 | :value 0.02 . 52 | 53 | a :Attribute ; 54 | qudt:unit qudt:SecondTimeInverse ; 55 | :for :DecompositionRate ; 56 | :value 0.03 . 57 | 58 | a :Observation ; 59 | :hasAttribute , 60 | , 61 | ; 62 | :onSample :Sample1 . 63 | 64 | a :CompositionPart ; 65 | :percent 100 ; 66 | :substance chebi:CHEBI_16240 . 67 | 68 | :Sample1 a :Sample ; 69 | :composedOf . 70 | 71 | a :Attribute ; 72 | qudt:unit qudt:Pascal ; 73 | :for :Pressure ; 74 | :value 100000 . 75 | 76 | -------------------------------------------------------------------------------- /examples/hashed/product-0__sha256.ttl: -------------------------------------------------------------------------------- 1 | @prefix c: . 2 | @prefix currency: . 3 | @prefix d: . 4 | @prefix p: . 5 | @prefix xsd: . 6 | 7 | d:TimeEntry__ps5__2020_11_12 a c:TimeEntry ; 8 | p:date "2020-11-12"^^xsd:date ; 9 | p:value . 10 | 11 | d:TimeEntry__ps5__2022_06_01 a c:TimeEntry ; 12 | p:date "2022-06-01"^^xsd:date ; 13 | p:value . 14 | 15 | d:TimeEntry__xbox_series_x__2020_10_12 a c:TimeEntry ; 16 | p:date "2020-10-12"^^xsd:date ; 17 | p:value . 18 | 19 | a c:Product ; 20 | p:available false ; 21 | p:name "Microsoft - Xbox Series X 1TB Console - Black"^^xsd:string ; 22 | p:price ; 23 | p:url . 24 | 25 | a currency:USDollar ; 26 | p:amount 499.99 . 27 | 28 | a c:Product ; 29 | p:available false ; 30 | p:name "Sony - PlayStation 5 Console"^^xsd:string ; 31 | p:price ; 32 | p:url . 33 | 34 | -------------------------------------------------------------------------------- /examples/hashed/recursive-64__sha256.ttl: -------------------------------------------------------------------------------- 1 | @prefix : . 2 | 3 | : . 4 | 5 | : . 6 | 7 | : . 8 | 9 | : . 10 | 11 | : . 12 | 13 | : . 14 | 15 | : . 16 | 17 | : . 18 | 19 | : . 20 | 21 | : . 22 | 23 | : . 24 | 25 | : . 26 | 27 | : . 28 | 29 | : . 30 | 31 | : . 32 | 33 | : . 34 | 35 | : . 36 | 37 | : . 38 | 39 | : . 40 | 41 | : . 42 | 43 | : . 44 | 45 | : . 46 | 47 | : . 48 | 49 | : . 50 | 51 | : . 52 | 53 | : . 54 | 55 | : . 56 | 57 | : . 58 | 59 | : . 60 | 61 | : . 62 | 63 | : . 64 | 65 | : . 66 | 67 | : . 68 | 69 | : . 70 | 71 | : . 72 | 73 | : . 74 | 75 | : . 76 | 77 | : . 78 | 79 | : . 80 | 81 | : . 82 | 83 | : . 84 | 85 | : . 86 | 87 | : . 88 | 89 | : . 90 | 91 | : . 92 | 93 | : . 94 | 95 | : . 96 | 97 | : . 98 | 99 | : . 100 | 101 | : . 102 | 103 | : . 104 | 105 | : . 106 | 107 | : : . 108 | 109 | : . 110 | 111 | : . 112 | 113 | : . 114 | 115 | : . 116 | 117 | : . 118 | 119 | : . 120 | 121 | : . 122 | 123 | : . 124 | 125 | : . 126 | 127 | : . 128 | 129 | : . 130 | 131 | -------------------------------------------------------------------------------- /examples/product-0.ttl: -------------------------------------------------------------------------------- 1 | @prefix rdf: . 2 | @prefix xsd: . 3 | 4 | @prefix c: . 5 | @prefix currency: . 6 | @prefix p: . 7 | 8 | @prefix d: . 9 | 10 | _:xbox_series_x 11 | rdf:type c:Product ; 12 | p:name "Microsoft - Xbox Series X 1TB Console - Black" ; 13 | p:url ; 14 | p:available false ; 15 | p:price [ 16 | rdf:type currency:USDollar ; 17 | p:amount "499.99"^^xsd:decimal ; 18 | ] . 19 | 20 | _:ps5 21 | rdf:type c:Product ; 22 | p:name "Sony - PlayStation 5 Console" ; 23 | p:url ; 24 | p:available false ; 25 | p:price [ 26 | rdf:type currency:USDollar ; 27 | p:amount "499.99"^^xsd:decimal ; 28 | ] . 29 | 30 | d:TimeEntry__ps5__2020_11_12 31 | a c:TimeEntry ; 32 | p:date "2020-11-12"^^xsd:date ; 33 | p:value _:ps5 . 34 | 35 | d:TimeEntry__xbox_series_x__2020_10_12 36 | a c:TimeEntry ; 37 | p:date "2020-10-12"^^xsd:date ; 38 | p:value _:xbox_series_x . 39 | 40 | d:TimeEntry__ps5__2022_06_01 41 | a c:TimeEntry ; 42 | p:date "2022-06-01"^^xsd:date ; 43 | p:value _:ps5 . 44 | -------------------------------------------------------------------------------- /examples/recursive-64.ttl: -------------------------------------------------------------------------------- 1 | @prefix : . 2 | [ : [ : [ : [ : [ : [ : [ : [ : 3 | [ : [ : [ : [ : [ : [ : [ : [ : 4 | [ : [ : [ : [ : [ : [ : [ : [ : 5 | [ : [ : [ : [ : [ : [ : [ : [ : 6 | [ : [ : [ : [ : [ : [ : [ : [ : 7 | [ : [ : [ : [ : [ : [ : [ : [ : 8 | [ : [ : [ : [ : [ : [ : [ : [ : 9 | [ : [ : [ : [ : [ : [ : [ : [ : 10 | : 11 | ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] 12 | ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] 13 | ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] 14 | ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] . -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.pytest.ini_options] 6 | log_cli = true 7 | log_cli_level = "INFO" 8 | log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)" 9 | log_cli_date_format = "%Y-%m-%d %H:%M:%S" -------------------------------------------------------------------------------- /rdfhash.ttl: -------------------------------------------------------------------------------- 1 | @prefix rdf: . 2 | @prefix rdfs: . 3 | @prefix xsd: . 4 | @prefix owl: . 5 | @prefix sh: . 6 | @prefix skos: . 7 | @prefix dcterms: . 8 | 9 | @prefix gist: . 10 | 11 | @prefix hash: . 12 | @prefix hashid: . 13 | 14 | hash: a owl:Ontology ; 15 | rdfs:label "`rdfhash` Ontology" ; 16 | rdfs:comment "Ontology for `rdfhash` tool." ; 17 | owl:versionInfo "0.0.1" ; 18 | rdfs:seeAlso , ; 19 | dcterms:creator ; 20 | dcterms:created "2023-06-22"^^xsd:date ; 21 | . 22 | 23 | hash:id a owl:DatatypeProperty ; 24 | rdfs:label "id" ; 25 | rdfs:comment "Main identifier for subject. Can be Literal or URI. A subject can only have 1 instance of this property." ; 26 | rdfs:subPropertyOf sh:property ; 27 | rdfs:domain owl:Thing ; 28 | rdfs:range xsd:string ; 29 | dcterms:creator ; 30 | dcterms:created "2023-06-22"^^xsd:date ; 31 | . 32 | 33 | hash:id_shape a sh:PropertyShape ; 34 | sh:path hash:id ; 35 | sh:datatype xsd:string ; 36 | sh:maxCount 1 ; 37 | dcterms:creator ; 38 | dcterms:created "2023-06-22"^^xsd:date ; 39 | . 40 | 41 | # _____________________________________________________________________________ # 42 | 43 | 44 | hash:Rule a owl:Class, sh:NodeShape ; 45 | rdfs:label "Rule" ; 46 | rdfs:comment "A rule for selecting subject URI's and hashing a checksum of their source/subset graph definition." ; 47 | dcterms:creator ; 48 | dcterms:created "2023-06-22"^^xsd:date ; 49 | sh:property hash:id_shape ; 50 | sh:property [ 51 | a sh:PropertyShape ; 52 | sh:path hash:sparql ; 53 | sh:datatype xsd:string ; 54 | sh:maxCount 1 ; 55 | ], [ 56 | a sh:PropertyShape ; 57 | sh:path hash:insert ; 58 | sh:class hash:Definition ; 59 | 60 | ], [ 61 | a sh:PropertyShape ; 62 | sh:path hash:method ; 63 | sh:class hash:Method ; 64 | sh:maxCount 1 ; 65 | sh:defaultValue hash:sha256 ; 66 | ], [ 67 | a sh:PropertyShape ; 68 | sh:path hash:template ; 69 | sh:datatype xsd:string ; 70 | sh:maxCount 1 ; 71 | sh:defaultValue "{method}:{value}" ; 72 | ], [ 73 | a sh:PropertyShape ; 74 | sh:path hash:include ; 75 | ], [ 76 | a sh:PropertyShape ; 77 | sh:path hash:exclude ; 78 | sh:class hash:Triples ; 79 | ], [ 80 | a sh:PropertyShape ; 81 | sh:path hash:inputTemplate ; 82 | sh:datatype xsd:string ; 83 | sh:maxCount 1 ; 84 | sh:defaultValue "{p} {o}.\\n" ; 85 | ], [ 86 | a sh:PropertyShape ; 87 | sh:path hash:bitSize ; 88 | sh:datatype xsd:integer ; 89 | sh:maxCount 1 ; 90 | ] . 91 | # __ __ __ __ __ __ __ __ __ __ __ __ __ __ __ __ # 92 | 93 | hash:sparql a owl:DatatypeProperty ; 94 | rdfs:label "sparql" ; 95 | rdfs:comment "SPARQL query used to select instances to be hashed. SELECT returns Subject URIs to replace with a checksum of their source graph definition. CONSTRUCT allows you to return a subset of the graph to hash; All Subject URIs are replaced in the source graph with a checksum of the CONSTRUCTed graph definition." ; 96 | rdfs:subPropertyOf sh:property ; 97 | rdfs:domain hash:Rule ; 98 | rdfs:range xsd:string ; 99 | dcterms:creator ; 100 | dcterms:created "2023-06-22"^^xsd:date ; 101 | . 102 | 103 | hash:insert a owl:ObjectProperty ; 104 | rdfs:label "insert" ; 105 | rdfs:comment "A nested set of triples that are inserted on selected instances. Look at `hash:Triples` for more information on handling dynamic values." ; 106 | rdfs:domain hash:Rule ; 107 | rdfs:range hash:Definition ; 108 | dcterms:creator ; 109 | dcterms:created "2023-06-22"^^xsd:date ; 110 | . 111 | 112 | hash:inherit a owl:ObjectProperty ; 113 | rdfs:label "inherit" ; 114 | rdfs:comment "Inherit all properties from another `hash:Rule` instance. " ; 115 | rdfs:domain hash:Rule ; 116 | rdfs:range hash:Rule ; 117 | dcterms:creator ; 118 | dcterms:created "2023-06-22"^^xsd:date ; 119 | . 120 | 121 | hash:method a owl:ObjectProperty ; 122 | rdfs:label "method" ; 123 | rdfs:comment "The hash method used to generate the hash value. (e.g. `hash:sha256`)" ; 124 | rdfs:domain hash:Rule ; 125 | rdfs:range hash:Method ; 126 | dcterms:creator ; 127 | dcterms:created "2023-06-22"^^xsd:date ; 128 | . 129 | 130 | hash:template a owl:DatatypeProperty ; 131 | rdfs:label "template" ; 132 | rdfs:comment "URI template generated from hash value. The template can contain the following variables: {method} (e.g. `md5`), {value} (e.g. `a6be11c879133def33fdb767be80056f`)" ; 133 | rdfs:domain hash:Rule ; 134 | rdfs:range xsd:string ; 135 | dcterms:creator ; 136 | dcterms:created "2023-06-22"^^xsd:date ; 137 | . 138 | 139 | hash:include a owl:ObjectProperty ; 140 | rdfs:label "include" ; 141 | rdfs:comment "The list of properties to include in the hash input. If none specified, all properties are included." ; 142 | rdfs:domain hash:Rule ; 143 | rdfs:range owl:Thing ; 144 | dcterms:creator ; 145 | dcterms:created "2023-06-22"^^xsd:date ; 146 | . 147 | 148 | hash:exclude a owl:ObjectProperty ; 149 | rdfs:label "exclude" ; 150 | rdfs:comment "The list of properties to exclude from the hash input." ; 151 | rdfs:domain hash:Rule ; 152 | rdfs:range owl:Thing ; 153 | dcterms:creator ; 154 | dcterms:created "2023-06-22"^^xsd:date ; 155 | . 156 | 157 | hash:inputTemplate a owl:DatatypeProperty ; 158 | rdfs:label "input template" ; 159 | rdfs:comment "Input triple statement used to generate hash value. The generated template is sorted among a list of all other triples on the subject, then joined together with an empty string. The template can contain the following variables: {p} (predicate), {o} (object)." ; 160 | rdfs:domain hash:Rule ; 161 | rdfs:range xsd:string ; 162 | dcterms:creator ; 163 | dcterms:created "2023-06-22"^^xsd:date ; 164 | . 165 | 166 | hash:bitSize a owl:DatatypeProperty ; 167 | rdfs:label "Bit Size" ; 168 | rdfs:comment "Size of the hash value in bits." ; 169 | rdfs:subPropertyOf sh:property ; 170 | rdfs:domain hash:Rule ; 171 | rdfs:range xsd:integer ; 172 | dcterms:creator ; 173 | dcterms:created "2023-06-22"^^xsd:date ; 174 | . 175 | 176 | # _____________________________________________________________________________ # 177 | # hash:Option & hash:SaltForm 178 | 179 | hash:Option a owl:Class, sh:NodeShape ; 180 | rdfs:subClassOf gist:Category ; 181 | dcterms:creator ; 182 | dcterms:created "2023-06-22"^^xsd:date ; 183 | . 184 | 185 | hash:SaltForm a owl:Class, sh:NodeShape ; 186 | rdfs:subClassOf gist:Option ; 187 | dcterms:creator ; 188 | dcterms:created "2023-06-22"^^xsd:date ; 189 | . 190 | # __ __ __ __ __ __ __ __ __ __ __ __ __ __ __ __ # 191 | 192 | hash:option a owl:ObjectProperty ; 193 | rdfs:subPropertyOf gist:isCategorizedBy ; 194 | rdfs:label "option" ; 195 | rdfs:comment "An option for a hash method." ; 196 | rdfs:domain hash:Rule ; 197 | rdfs:range hash:Option ; 198 | dcterms:creator ; 199 | dcterms:created "2023-06-22"^^xsd:date ; 200 | . 201 | # __ __ __ __ __ __ __ __ __ __ __ __ __ __ __ __ # 202 | 203 | hash:saltPrefixTriples a hash:SaltForm, owl:NamedIndividual ; 204 | rdfs:label "prefix triples" ; 205 | rdfs:comment "Insert salt before each triple in the input hash string." ; 206 | dcterms:creator ; 207 | dcterms:created "2023-06-22"^^xsd:date ; 208 | . 209 | 210 | hash:saltSuffixTriples a hash:SaltForm, owl:NamedIndividual ; 211 | rdfs:label "suffix triples" ; 212 | rdfs:comment "Insert salt after each triple in the input hash string." ; 213 | dcterms:creator ; 214 | dcterms:created "2023-06-22"^^xsd:date ; 215 | . 216 | # _____________________________________________________________________________ # 217 | 218 | hash:Method a owl:Class, sh:NodeShape ; 219 | rdfs:subClassOf gist:Category ; 220 | dcterms:creator ; 221 | dcterms:created "2023-06-22"^^xsd:date ; 222 | sh:property [ 223 | a sh:PropertyShape ; 224 | sh:path hash:requiresBitSize ; 225 | sh:datatype xsd:boolean ; 226 | sh:defaultValue false ; 227 | ] ; 228 | sh:property [ 229 | a sh:PropertyShape ; 230 | sh:path hash:isDeterministic ; 231 | sh:datatype xsd:boolean ; 232 | sh:defaultValue false ; 233 | ] ; 234 | . 235 | # __ __ __ __ __ __ __ __ __ __ __ __ __ __ __ __ # 236 | 237 | hash:isDeterministic a gist:Category, owl:NamedIndividual ; 238 | rdfs:label "is deterministic" ; 239 | rdfs:comment "Whether the hash method is deterministic." ; 240 | dcterms:creator ; 241 | dcterms:created "2023-06-22"^^xsd:date ; 242 | . 243 | 244 | hash:requiresBitSize a gist:Category, owl:NamedIndividual ; 245 | rdfs:label "requires bit size" ; 246 | rdfs:comment "Whether the hash method requires a bit size." ; 247 | dcterms:creator ; 248 | dcterms:created "2023-06-22"^^xsd:date ; 249 | . 250 | 251 | hash:md5 a hash:Method, owl:NamedIndividual ; 252 | hash:id "md5" ; 253 | gist:isCategorizedBy hash:isDeterministic ; 254 | dcterms:creator ; 255 | dcterms:created "2023-06-22"^^xsd:date ; 256 | . 257 | 258 | hash:sha1 a hash:Method, owl:NamedIndividual ; 259 | rdfs:label "sha1" ; 260 | gist:isCategorizedBy hash:isDeterministic ; 261 | dcterms:creator ; 262 | dcterms:created "2023-06-22"^^xsd:date ; 263 | . 264 | 265 | hash:sha224 a hash:Method, owl:NamedIndividual ; 266 | rdfs:label "sha224" ; 267 | gist:isCategorizedBy hash:isDeterministic ; 268 | dcterms:creator ; 269 | dcterms:created "2023-06-22"^^xsd:date ; 270 | . 271 | 272 | hash:sha256 a hash:Method, owl:NamedIndividual ; 273 | rdfs:label "sha256" ; 274 | gist:isCategorizedBy hash:isDeterministic ; 275 | dcterms:creator ; 276 | dcterms:created "2023-06-22"^^xsd:date ; 277 | . 278 | 279 | hash:sha384 a hash:Method, owl:NamedIndividual ; 280 | rdfs:label "sha384" ; 281 | gist:isCategorizedBy hash:isDeterministic ; 282 | dcterms:creator ; 283 | dcterms:created "2023-06-22"^^xsd:date ; 284 | . 285 | 286 | hash:sha512 a hash:Method, owl:NamedIndividual ; 287 | rdfs:label "sha512" ; 288 | gist:isCategorizedBy hash:isDeterministic ; 289 | dcterms:creator ; 290 | dcterms:created "2023-06-22"^^xsd:date ; 291 | . 292 | 293 | hash:sha3-224 a hash:Method, owl:NamedIndividual ; 294 | rdfs:label "sha3-224" ; 295 | gist:isCategorizedBy hash:isDeterministic ; 296 | dcterms:creator ; 297 | dcterms:created "2023-06-22"^^xsd:date ; 298 | . 299 | 300 | hash:sha3-256 a hash:Method, owl:NamedIndividual ; 301 | rdfs:label "sha3-256" ; 302 | gist:isCategorizedBy hash:isDeterministic ; 303 | dcterms:creator ; 304 | dcterms:created "2023-06-22"^^xsd:date ; 305 | . 306 | 307 | hash:sha3-384 a hash:Method, owl:NamedIndividual ; 308 | rdfs:label "sha3-384" ; 309 | gist:isCategorizedBy hash:isDeterministic ; 310 | dcterms:creator ; 311 | dcterms:created "2023-06-22"^^xsd:date ; 312 | . 313 | 314 | hash:sha3-512 a hash:Method, owl:NamedIndividual ; 315 | rdfs:label "sha3-512" ; 316 | gist:isCategorizedBy hash:isDeterministic ; 317 | dcterms:creator ; 318 | dcterms:created "2023-06-22"^^xsd:date ; 319 | . 320 | 321 | hash:blake2b a hash:Method, owl:NamedIndividual ; 322 | rdfs:label "blake2b" ; 323 | gist:isCategorizedBy hash:isDeterministic ; 324 | dcterms:creator ; 325 | dcterms:created "2023-06-22"^^xsd:date ; 326 | . 327 | 328 | hash:blake2s a hash:Method, owl:NamedIndividual ; 329 | rdfs:label "blake2s" ; 330 | gist:isCategorizedBy hash:isDeterministic ; 331 | dcterms:creator ; 332 | dcterms:created "2023-06-22"^^xsd:date ; 333 | . 334 | 335 | hash:uuid1 a hash:Method, owl:NamedIndividual ; 336 | rdfs:label "uuid1" ; 337 | dcterms:creator ; 338 | dcterms:created "2023-06-22"^^xsd:date ; 339 | . 340 | 341 | hash:uuid3 a hash:Method, owl:NamedIndividual ; 342 | rdfs:label "uuid3" ; 343 | dcterms:creator ; 344 | dcterms:created "2023-06-22"^^xsd:date ; 345 | . 346 | 347 | hash:uuid4 a hash:Method, owl:NamedIndividual ; 348 | rdfs:label "uuid4" ; 349 | dcterms:creator ; 350 | dcterms:created "2023-06-22"^^xsd:date ; 351 | . 352 | 353 | hash:uuid5 a hash:Method, owl:NamedIndividual ; 354 | rdfs:label "uuid5" ; 355 | gist:isCategorizedBy hash:isDeterministic ; 356 | dcterms:creator ; 357 | dcterms:created "2023-06-22"^^xsd:date ; 358 | . 359 | 360 | hash:shake-128 a hash:Method, owl:NamedIndividual ; 361 | rdfs:label "shake-128" ; 362 | gist:isCategorizedBy hash:requiresBitSize ; 363 | gist:isCategorizedBy hash:isDeterministic ; 364 | dcterms:creator ; 365 | dcterms:created "2023-06-22"^^xsd:date ; 366 | . 367 | 368 | hash:shake-256 a hash:Method, owl:NamedIndividual ; 369 | rdfs:label "shake-256" ; 370 | gist:isCategorizedBy hash:requiresBitSize ; 371 | gist:isCategorizedBy hash:isDeterministic ; 372 | dcterms:creator ; 373 | dcterms:created "2023-06-22"^^xsd:date ; 374 | . 375 | # _____________________________________________________________________________ # 376 | -------------------------------------------------------------------------------- /rdfhash/__init__.py: -------------------------------------------------------------------------------- 1 | from rdfhash.main import reverse_hash_subjects, hash_subjects 2 | 3 | # Default function 'rdfhash' uses function 'hash_subjects'. 4 | rdfhash = hash_subjects 5 | -------------------------------------------------------------------------------- /rdfhash/cli.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import logging 4 | 5 | from rdfhash.main import hash_subjects, reverse_hash_subjects 6 | from rdfhash.logger import logger 7 | from rdfhash.utils.hash import hash_types 8 | from rdfhash.utils.graph import mime, file_ext, graph_types 9 | 10 | 11 | def get_parser(): 12 | """Return argument parser for command 'hash_subjects'. 13 | Returns: argparse.ArgumentParser: _description_ 14 | """ 15 | parser = argparse.ArgumentParser( 16 | description=( 17 | "Replace selected subjects with hash of their triples " 18 | "(`{predicate} {object}.\\n` sorted & joined)." 19 | ), 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 21 | ) 22 | 23 | parser.add_argument( 24 | "data", 25 | nargs="+", 26 | help="Input RDF string or file path.\nSupported file formats: ['." 27 | + "', '.".join(file_ext.keys()) 28 | + "']", 29 | ) 30 | 31 | parser.add_argument( 32 | "-f", 33 | "--format", 34 | help="Input format.\nSupports: ['" + "', '".join(mime.keys()) + "']", 35 | default="text/turtle", 36 | ) 37 | 38 | parser.add_argument( 39 | "-g", 40 | "--graph", 41 | default="oxrdflib", 42 | help="Graph library to use.\nSupports: ['" 43 | + "', '".join(graph_types.keys()) 44 | + "']", 45 | ) 46 | 47 | parser.add_argument( 48 | "-a", 49 | "--accept", 50 | default="text/turtle", 51 | help=f"Output accept format.\nSupports: ['" + "', '".join(mime.keys()) + "']", 52 | ) 53 | 54 | parser.add_argument( 55 | "-t", 56 | "--template", 57 | default="{method}:{value}", 58 | help="Hash URI template. '{method}' corresponds to the hashing method (eg. 'sha256'). '{value}' corresponds to the calculated hash value.", 59 | ) 60 | 61 | parser.add_argument( 62 | "-m", 63 | "--method", 64 | "--hash-method", 65 | default="sha256", 66 | help="Hash method.\nSupports: ['" + "', '".join(hash_types.keys()) + "']", 67 | ) 68 | 69 | parser.add_argument( 70 | "-s", 71 | "--sparql", 72 | "--sparql-select-subjects", 73 | default="SELECT ?s WHERE { ?s ?p ?o . FILTER (isBlank(?s)) }", 74 | help="SPARQL SELECT query returning subject URIs to replace with hash of" 75 | " their triples. Defaults to all blank node subjects.", 76 | ) 77 | 78 | parser.add_argument( 79 | "-r", 80 | "--reverse", 81 | action="store_true", 82 | help="Reverse hashed URIs to Blank Nodes. --template is used to identify hashed URI template.", 83 | ) 84 | 85 | parser.add_argument( 86 | "-v", 87 | "--verbose", 88 | action="store_true", 89 | help="Show 'info' level logs.", 90 | ) 91 | 92 | parser.add_argument( 93 | "--debug", 94 | action="store_true", 95 | help="Show 'debug' level logs.", 96 | ) 97 | 98 | return parser 99 | 100 | 101 | def cli(args_list=None): 102 | """ 103 | Parse arguments and pass to function 'hash_subjects'. Serialize results with 104 | respect to 'accept' argument. 105 | """ 106 | # Parse arguments. 107 | if args_list == None: 108 | args_list = sys.argv[1:] 109 | parser = get_parser() 110 | args = parser.parse_args(["--help"] if len(args_list) == 0 else sys.argv[1:]) 111 | 112 | if args.format in mime: 113 | args.format = mime[args.format] 114 | elif args.format in file_ext: 115 | args.format = file_ext[args.format] 116 | elif args.format not in mime.values(): 117 | parser.print_usage() 118 | print(f"\nERROR: Unsupported format: {args.format}") 119 | sys.exit(1) 120 | 121 | if args.accept in mime: 122 | args.accept = mime[args.accept] 123 | elif args.accept in file_ext: 124 | args.accept = file_ext[args.accept] 125 | elif args.accept not in mime.values(): 126 | parser.print_usage() 127 | print(f"\nERROR: Unsupported accept format: {args.accept}") 128 | sys.exit(1) 129 | 130 | if args.data == None: 131 | parser.print_usage() 132 | print("\nERROR: The following arguments are required: -d/--data") 133 | sys.exit(1) 134 | 135 | if args.debug: 136 | logger.setLevel(logging.DEBUG) 137 | elif args.verbose: 138 | logger.setLevel(logging.INFO) 139 | 140 | graph, hashed_values = hash_subjects( 141 | args.data, args.format, args.method, args.template, args.sparql, args.graph 142 | ) 143 | 144 | if args.reverse: 145 | reverse_hash_subjects(graph, args.format, args.template, args.graph) 146 | 147 | print(graph.serialize(format=args.accept)) 148 | -------------------------------------------------------------------------------- /rdfhash/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | class CustomFormatter(logging.Formatter): 5 | 6 | cyan = "\x1b[36;20m" 7 | green = "\x1b[32;20m" 8 | yellow = "\x1b[33;20m" 9 | red = "\x1b[31;20m" 10 | bold_red = "\x1b[31;1m" 11 | reset = "\x1b[0m" 12 | format = ( 13 | "%(asctime)s - %(name)s - %(levelname)s - %(message)s (%(filename)s:%(lineno)d)" 14 | ) 15 | 16 | FORMATS = { 17 | logging.DEBUG: cyan + format + reset, 18 | logging.INFO: green + format + reset, 19 | logging.WARNING: yellow + format + reset, 20 | logging.ERROR: red + format + reset, 21 | logging.CRITICAL: bold_red + format + reset, 22 | } 23 | 24 | def format(self, record): 25 | log_fmt = self.FORMATS.get(record.levelno) 26 | formatter = logging.Formatter(log_fmt) 27 | return formatter.format(record) 28 | 29 | 30 | # Create 'logger' to be imported. 31 | logger = logging.getLogger("rdfhash") 32 | logger.setLevel(logging.WARNING) 33 | 34 | # create console handler with a higher log level 35 | ch = logging.StreamHandler() 36 | ch.setLevel(logging.DEBUG) 37 | 38 | ch.setFormatter(CustomFormatter()) 39 | 40 | logger.addHandler(ch) 41 | -------------------------------------------------------------------------------- /rdfhash/main.py: -------------------------------------------------------------------------------- 1 | from rdfhash.utils.hash import hash_string, hashlib_methods 2 | from rdfhash.utils.graph import get_graph 3 | from rdfhash.utils import validate_uri 4 | from rdfhash.logger import logger 5 | 6 | 7 | def hash_subjects( 8 | data, 9 | format=None, 10 | method="sha256", 11 | template="{method}:{value}", 12 | sparql_select_subjects=("SELECT DISTINCT ?s { ?s ?p ?o . FILTER (isBlank(?s)) }"), 13 | graph_type="oxrdflib", 14 | length=None, 15 | ): 16 | """Hash subjects by the sum of their triples. 17 | 18 | Subject hash result is calculated by sorting each triple by 19 | `{subject} {predicate}.` then joining with `\n`. 20 | 21 | Example: 22 | 23 | Data: `[ "John"; 24; "US" ] .` 24 | 25 | Hash Input: 26 | ` "John"^^.\n` 27 | ` "24"^^.\n` 28 | ` "US"^^.\n` 29 | 30 | Hash Output: `` 31 | 32 | Args: 33 | data (str|rdflib.Graph): Data representing RDF triples. 34 | format (str, optional): Format of data. Defaults to None. 35 | method (str, optional): Hashing method to use. Defaults to "sha256". 36 | template (str, optional): Template string for hash URI. 37 | Defaults to "{method}:{value}". 38 | sparql_select_subject (str, optional): SPARQL SELECT query to return 39 | list of subjects which will have their triples hashed. 40 | graph_type (str, optional): Graph type to use. Defaults to "oxrdflib". 41 | length (int, optional): Length of hash result. Required for some hash methods, 42 | optional for all. 43 | 44 | Returns: 45 | rdflib.Graph: Updated 'data' graph. 46 | """ 47 | 48 | # Convert data provided to rdflib.Graph. 49 | graph = get_graph(data, format, graph_type) 50 | len_before = len(graph) 51 | 52 | # Use SPARQL query 'sparql_select_subject' to get list of subjects to hash. 53 | select_subjects = set() 54 | for row in graph.query(sparql_select_subjects): 55 | for item in row: 56 | select_subjects.add(item) 57 | 58 | logger.info( 59 | f"\n({len(select_subjects)}) Hashing subject triples:\n-- " 60 | + "\n-- ".join([graph.term_to_string(s) for s in select_subjects]) 61 | ) 62 | 63 | hashed_values = {} # Dictionary of subjects and resolved hash values. 64 | 65 | spec_length = None 66 | if ":" in method: 67 | method, spec_length = method.split(":") 68 | spec_length = int(spec_length) 69 | 70 | for s in select_subjects: 71 | if s in hashed_values: 72 | continue 73 | 74 | # Continue if subject is already replaced or N/A. 75 | if (s, None, None) not in graph: 76 | logger.warning( 77 | "Selected subject not found in graph: " + graph.term_to_string(s) 78 | ) 79 | continue 80 | 81 | hashed_values.update( 82 | hash_subject( 83 | graph, 84 | s, 85 | method, 86 | template, 87 | select_subjects, 88 | length=length or spec_length, 89 | ) 90 | ) 91 | 92 | logger.info( 93 | f"\n({len(hashed_values)}) Hashed subjects:\n-- " 94 | + "\n-- ".join( 95 | f"{graph.term_to_string(k)} -> {graph.term_to_string(v)}" 96 | for k, v in hashed_values.items() 97 | ) 98 | ) 99 | 100 | len_after = len(graph) 101 | if len_before == len_after: 102 | logger.info(f"(=) Graph size did not change: {len_before}") 103 | else: 104 | logger.info( 105 | f"(-{len_before-len_after}) Graph size reduced from {len_before} to {len_after}." 106 | ) 107 | 108 | return graph, hashed_values 109 | 110 | 111 | def hash_subject( 112 | graph, 113 | subject, 114 | method="sha256", 115 | template="{method}:{value}", 116 | also_subjects=None, 117 | circ_deps=None, 118 | length=None, 119 | ): 120 | """Replaces subject in graph with hash of it's triples. 121 | 122 | If encounters a blank node in the object position, recursively hashes it. 123 | 124 | Updates 'graph' rdflib.Graph but does not return anything. 125 | 126 | Args: 127 | graph (rdflib.Graph): rdflib.Graph. 128 | subject (_type_): rdflib.term representing a subject. 129 | method (str, optional): Hashing method to use. Defaults to "sha256". 130 | template (str, optional): Template string for hash URI. 131 | Defaults to "{method}:{value}". 132 | also_subjects (set, optional) If encounters any of these terms in triples, 133 | recursively resolves them. Throws error if circular dependency found. 134 | Defaults to None. 135 | circ_deps (set, optional): Set of values which 'subject' cannot be. 136 | Defaults to None. 137 | length (int, optional): Length of hash result. Required for some hash methods, 138 | optional for all. 139 | 140 | Raises: 141 | ValueError: If blank node in predicate position of any triples. 142 | ValueError: If subject does not have triples associated with it. 143 | ValueError: If circular dependency is detected. Unable to resolve 144 | current hash. 145 | """ 146 | hashed_values = {} # Return dictionary. 147 | 148 | # if type(template) != Template: 149 | # template = Template(template) 150 | 151 | if also_subjects == None: 152 | also_subjects = set() 153 | 154 | if circ_deps == None: 155 | circ_deps = set() 156 | 157 | # Add current subject to circular dependencies. 158 | circ_deps.add(subject) 159 | 160 | hash_input_list = [] # List of values to hash. (`${predicate} ${object}.`) 161 | triples_add = [] # List of triples to replace with hashed subject. 162 | 163 | # Get all triples containing subject. 164 | triples = [*graph.triples((subject, None, None))] 165 | 166 | # Return if no triples found on subject specified. 167 | if len(triples) == 0: 168 | logger.warning( 169 | "Could not find any triples for subject: " + graph.term_to_string(subject) 170 | ) 171 | return hashed_values 172 | 173 | # Generate list of `${predicate} ${object}.` for each triple on subject. 174 | # ---------------------------------------------------------------------- 175 | 176 | for triple in triples: 177 | graph.remove(triple) # Remove triple from graph. 178 | 179 | triple_new = [] 180 | 181 | # Iterate over 'predicate' (1) and 'object' (2). 182 | for i in range(1, 3): 183 | term = triple[i] 184 | if term in also_subjects: 185 | # If object in circular dependencies, throw error. 186 | if term in circ_deps: 187 | raise ValueError( 188 | "Unable to resolve hash. Circular dependency " 189 | f"detected: {graph.term_to_string(subject)} <--> {graph.term_to_string(term)}" 190 | ) 191 | 192 | # Recursive Call. 193 | # --------------- 194 | # Resolve hash value of nested triples first. 195 | hashed_values.update( 196 | hash_subject( 197 | graph, term, method, template, also_subjects, circ_deps, length 198 | ) 199 | ) 200 | triple_new.append(hashed_values[term]) 201 | else: 202 | triple_new.append(term) 203 | 204 | # Append predicate and object to list to be added later with hashed subject. 205 | triples_add.append(triple_new) 206 | 207 | # Append `{predicate} {object}.\n` to list of values to hash. 208 | hash_input_list.append( 209 | f"{graph.term_to_string(triple_new[0], True)} {graph.term_to_string(triple_new[1], True)}.\n" 210 | ) 211 | 212 | # Sort and concatenate list, hash value, then add to graph. 213 | # --------------------------------------------------------- 214 | 215 | # Sort list of strings: `{predicate} {object}.\n` 216 | hash_input_list.sort() 217 | 218 | # Join list of strings to be hashed. 219 | hash_input = "".join(hash_input_list) 220 | 221 | logger.debug(f'({len(hash_input_list)}) Hashing triple set: """{hash_input}"""') 222 | 223 | # Concatenate sorted list, hash with method, then add to a URIRef. 224 | hash_dict = {"method": method, "value": hash_string(hash_input, method, length)} 225 | hash_subj = graph.NamedNode(template.format(**hash_dict)) 226 | 227 | logger.debug(f"Result of hashed triples: {graph.term_to_string(hash_subj)}") 228 | 229 | # Add triples to graph with hashed subject. 230 | for pred_obj in triples_add: 231 | graph.add((hash_subj, *pred_obj)) 232 | 233 | # Replace instances of current subject in the object position. 234 | for triple in graph.triples((None, None, subject)): 235 | graph.remove(triple) 236 | graph.add((triple[0], triple[1], hash_subj)) 237 | 238 | # Add hashed subject to 'hashed_values' and return. 239 | hashed_values[subject] = hash_subj 240 | return hashed_values 241 | 242 | 243 | def reverse_hash_subjects( 244 | data, format=None, template="{method}:{value}", graph_type="oxrdflib" 245 | ): 246 | """Convert hashed URIs to blank nodes. 247 | 248 | Args: 249 | data (_type_): Data representing RDF triples. 250 | format (str, optional): Format of data. Defaults to None. 251 | 252 | Returns: 253 | rdflib.Graph: Updated 'data' graph. 254 | """ 255 | bnode_int = 0 256 | bnode_dict = {} 257 | 258 | graph = get_graph(data, format, graph_type) 259 | 260 | # Check every term in graph. 261 | for triple in graph.triples(): 262 | new_triple = [] # Replaces 'triple'. 263 | updated = False # Tracks whether 'new_triple' is different to 'triple'. 264 | 265 | for term in triple: 266 | if term in bnode_dict: 267 | updated = True 268 | new_triple.append(bnode_dict[term]) 269 | # If term matches 'template' regex, replace with blank node. 270 | elif graph.is_uri(term) and validate_uri( 271 | graph.term_to_string(term)[1:-1], template 272 | ): 273 | updated = True 274 | bnode = graph.BlankNode(bnode_int) 275 | bnode_int += 1 276 | bnode_dict[term] = bnode 277 | new_triple.append(bnode) 278 | # If does not start with hash method, do not update. 279 | else: 280 | new_triple.append(term) 281 | 282 | # If 'new_triple' is different to 'triple', replace with new. 283 | if updated: 284 | graph.remove(triple) 285 | graph.add(new_triple) 286 | 287 | return graph 288 | -------------------------------------------------------------------------------- /rdfhash/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from .hash import hash_types 4 | 5 | 6 | def validate_uri( 7 | uri, 8 | template="{method}:{value}", 9 | values={"method": set(list(hash_types)), "value": r"[a-f0-9]+"}, 10 | ): 11 | def value_re(value): 12 | if type(value) == set: 13 | return "(" + "|".join(value) + ")" 14 | elif type(value) == str: 15 | return value 16 | 17 | # Convert 'template' to a regular expression. 18 | template_re = template 19 | for key in values.keys(): 20 | template_re = template_re.replace("{" + key + "}", value_re(values[key])) 21 | 22 | # Validate URI. 23 | return re.match(template_re, uri) != None 24 | -------------------------------------------------------------------------------- /rdfhash/utils/graph.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import io 3 | 4 | import oxrdflib 5 | import rdflib 6 | import pyoxigraph 7 | 8 | from rdfhash.utils.hash import hash_string 9 | 10 | mime = { 11 | "trig": "application/trig", 12 | "nq": "application/n-quads", 13 | "nquads": "application/n-quads", 14 | "ntriples": "application/n-triples", 15 | "nt": "application/n-triples", 16 | "turtle": "text/turtle", 17 | "ttl": "text/turtle", 18 | "rdf": "application/rdf+xml", 19 | "xml": "application/rdf+xml", 20 | "n3": "text/n3", 21 | } 22 | 23 | file_ext = { 24 | "nt": "application/n-triples", 25 | "nq": "application/n-quads", 26 | "ttl": "text/turtle", 27 | "trig": "application/trig", 28 | "n3": "text/n3", 29 | "xml": "application/rdf+xml", 30 | "rdf": "application/rdf+xml", 31 | "n3": "text/n3", 32 | } 33 | 34 | # _____________________________________________________________________________ # 35 | 36 | 37 | class __Graph__: 38 | """Interoperable graph class, based on rdflib.ConjunctiveGraph. 39 | 40 | Raises: 41 | TypeError: _description_ 42 | NotImplementedError: _description_ 43 | NotImplementedError: _description_ 44 | 45 | Returns: 46 | _type_: _description_ 47 | """ 48 | 49 | graph = None 50 | 51 | library_class = rdflib 52 | graph_class = rdflib.ConjunctiveGraph 53 | 54 | NamedNode = rdflib.URIRef 55 | BlankNode = rdflib.BNode 56 | Literal = rdflib.Literal 57 | Variable = rdflib.Variable 58 | 59 | xsd_langstring = rdflib.URIRef( 60 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" 61 | ) 62 | 63 | xsd_string = rdflib.URIRef("http://www.w3.org/2001/XMLSchema#string") 64 | 65 | xsd_boolean = rdflib.URIRef("http://www.w3.org/2001/XMLSchema#boolean") 66 | 67 | default_format = mime["trig"] 68 | 69 | supports_named_graphs = True 70 | 71 | def __init__(self, data=None, format=None, max_path=2048): 72 | """Initialize graph object 73 | 74 | Args: 75 | data (_type_, optional): _description_. Defaults to None. 76 | format (_type_, optional): _description_. Defaults to None. 77 | max_path (int, optional): Check if 'data' is a file path if length 78 | is less than 'max_path'. Specify -1 to always check. Defaults to 2048. 79 | """ 80 | if self.graph == None: 81 | self.graph = self.graph_class() 82 | 83 | if data: 84 | type_data = type(data) 85 | 86 | if type_data == self.graph_class: 87 | self.graph = data 88 | return 89 | 90 | elif type_data == str: 91 | data = [data] 92 | 93 | elif type_data != list: 94 | raise ValueError( 95 | "Argument 'data' must be string or list of strings. Got " 96 | + str(type_data) 97 | ) 98 | 99 | # Parse all files into graph. 100 | for item in data: 101 | if (max_path == -1 or len(item) < max_path) and os.path.isfile(item): 102 | self.parse_file(item, format) 103 | else: 104 | self.parse(item, format) 105 | 106 | def __len__(self): 107 | return len(self.graph) 108 | 109 | def __contains__(self, item): 110 | return item in self.graph 111 | 112 | def _parse(self, data, format): 113 | self.graph.parse(data=data, format=format) 114 | 115 | def _parse_file(self, file_path, format=None): 116 | self.graph.parse(file_path, format=format) 117 | 118 | def parse(self, data, format=None): 119 | self._parse(data=data, format=format or self.default_format) 120 | return self 121 | 122 | def parse_file(self, file_path, format=None): 123 | if format == None: 124 | ext = os.path.splitext(file_path)[1][1:] 125 | if ext not in file_ext: 126 | raise ValueError("File specified not recognized as a valid RDF file. ") 127 | self._parse_file(file_path, format=file_ext[ext]) 128 | else: 129 | self._parse_file(file_path, format=format) 130 | return self 131 | 132 | def serialize(self, path=None, format=None): 133 | if path: 134 | self.graph.serialize(destination=path, format=format or self.default_format) 135 | return True 136 | else: 137 | return self.graph.serialize(format=format or self.default_format) 138 | 139 | def query(self, query): 140 | res = self.graph.query(query) 141 | return res 142 | 143 | def subjects(self, predicate=None, object=None): 144 | return self.graph.subjects(predicate, object) 145 | 146 | def predicates(self, subject=None, object=None): 147 | return self.graph.predicates(subject, object) 148 | 149 | def objects(self, subject=None, predicate=None): 150 | return self.graph.objects(subject, predicate) 151 | 152 | def triples(self, triple=None): 153 | if triple == None: 154 | return self.graph.triples((None, None, None)) 155 | return self.graph.triples(triple) 156 | 157 | def quads(self, quad=None): 158 | if quad == None: 159 | return self.graph.quads((None, None, None, None)) 160 | return self.graph.quads(quad) 161 | 162 | def is_bnode(self, term): 163 | return type(term) == self.BlankNode 164 | 165 | def is_uri(self, term): 166 | return type(term) == self.NamedNode 167 | 168 | def is_literal(self, term): 169 | return type(term) == self.Literal 170 | 171 | def is_variable(self, term): 172 | return type(term) == self.Variable 173 | 174 | def is_term(self, term, allow_variable=False): 175 | return any( 176 | [ 177 | self.is_uri(term), 178 | self.is_literal(term), 179 | self.is_bnode(term), 180 | self.is_variable(term) if allow_variable else False, 181 | ] 182 | ) 183 | 184 | def term_to_string(self, term, expand_literals=False): 185 | if expand_literals and type(term) == self.Literal: 186 | value, datatype, language = term.value, term.datatype, term.language 187 | if datatype == None: 188 | if language: 189 | datatype = self.xsd_langstring 190 | else: 191 | datatype = self.xsd_string 192 | if term.language: 193 | return f'"{value}"^^{datatype.n3()}@{language}' 194 | 195 | return f'"{value}"^^{datatype.n3()}' 196 | return term.n3() 197 | 198 | def hash_triples(self, triples, method="sha256", triple_format="{p} {o}\n"): 199 | sorted_triples = sorted( 200 | triple_format.format(s=s, p=p, o=o) for s, p, o in triples 201 | ) 202 | joined_triples = "".join(sorted_triples) 203 | return hash_string(joined_triples, method=method) 204 | 205 | def add(self, triples): 206 | self.graph.add(triples) 207 | return self 208 | 209 | def remove(self, triples): 210 | self.graph.remove(triples) 211 | return self 212 | 213 | """Available methods: 214 | __init__ 215 | __len__ 216 | __contains__ 217 | _parse 218 | _parse_file 219 | parse 220 | parse_file 221 | serialize 222 | query 223 | subjects 224 | predicates 225 | objects 226 | quads 227 | triples 228 | is_bnode 229 | is_uri 230 | is_literal 231 | term_to_string 232 | hash_triples 233 | add 234 | remove 235 | """ 236 | 237 | 238 | # __ __ __ __ __ __ __ __ __ __ __ __ __ __ __ __ # 239 | 240 | 241 | class RdfLibGraph(__Graph__): 242 | """ 243 | __Graph__ defines interoperable graph class and is based on rdflib.Graph. 244 | No need to define methods here. 245 | """ 246 | 247 | 248 | # __ __ __ __ __ __ __ __ __ __ __ __ __ __ __ __ # 249 | 250 | 251 | class OxRdfLibGraph(RdfLibGraph): 252 | library_class = oxrdflib 253 | graph_class = oxrdflib.Graph 254 | 255 | NamedNode = rdflib.URIRef 256 | BlankNode = rdflib.BNode 257 | Literal = rdflib.Literal 258 | Variable = rdflib.Variable 259 | 260 | default_format = mime["trig"] 261 | 262 | supports_named_graphs = True 263 | """Inheriting methods from RdfLibGraph""" 264 | 265 | def __init__(self, data=None, format=None, max_path=2048): 266 | self.graph = rdflib.ConjunctiveGraph(store="Oxigraph") 267 | super().__init__(data, format, max_path) 268 | 269 | 270 | # __ __ __ __ __ __ __ __ __ __ __ __ __ __ __ __ # 271 | 272 | 273 | class OxiGraph(__Graph__): 274 | graph_class = pyoxigraph.Store 275 | 276 | default_format = mime["trig"] 277 | 278 | BlankNode = pyoxigraph.BlankNode 279 | NamedNode = pyoxigraph.NamedNode 280 | Literal = pyoxigraph.Literal 281 | Variable = pyoxigraph.Variable 282 | 283 | Quad = pyoxigraph.Quad 284 | 285 | xsd_string = pyoxigraph.NamedNode("http://www.w3.org/2001/XMLSchema#string") 286 | xsd_boolean = pyoxigraph.NamedNode("http://www.w3.org/2001/XMLSchema#boolean") 287 | 288 | supports_named_graphs = True 289 | 290 | def __contains__(self, item): 291 | iter = self.quads(item) 292 | try: 293 | next(iter) 294 | return True 295 | except StopIteration: 296 | return False 297 | 298 | def _parse(self, data, format): 299 | input = io.StringIO(data) 300 | self.graph.load(input, format) 301 | return self 302 | 303 | def _parse_file(self, path, format): 304 | self.graph.load(path, format) 305 | return self 306 | 307 | def serialize(self, path=None, format=None): 308 | if format == None: 309 | format = self.default_format 310 | if path: 311 | self.graph.dump(path, mime_type=format) 312 | return True 313 | else: 314 | with io.BytesIO() as buffer: 315 | self.graph.dump(buffer, mime_type=format) 316 | buffer.seek(0) 317 | res = buffer.read() 318 | return res.decode("utf-8") 319 | 320 | def subjects(self, predicate=None, object=None, graph=None): 321 | return self.graph.quads_for_pattern(None, predicate, object, graph) 322 | 323 | def predicates(self, subject=None, object=None, graph=None): 324 | return self.graph.quads_for_pattern(subject, None, object, graph) 325 | 326 | def objects(self, subject=None, predicate=None, graph=None): 327 | return self.graph.quads_for_pattern(subject, predicate, None, graph) 328 | 329 | def quads(self, quad): 330 | return self.graph.quads_for_pattern(*quad) 331 | 332 | def triples(self, triple=None): 333 | if triple == None: 334 | triple = (None, None, None) 335 | return self.quads(triple) 336 | 337 | def term_to_string(self, term, expand_literals=False): 338 | if expand_literals and type(term) == self.Literal: 339 | value, datatype, language = term.value, term.datatype, term.language 340 | if datatype == self.xsd_boolean: 341 | value = str(value.capitalize()) 342 | if language: 343 | return f'"{value}"^^{datatype}@{language}' 344 | return f'"{value}"^^{datatype}' 345 | return str(term) 346 | 347 | def add(self, quad): 348 | return self.graph.add(self.Quad(*quad)) 349 | 350 | def remove(self, quad): 351 | return self.graph.remove(self.Quad(*quad)) 352 | 353 | 354 | # _____________________________________________________________________________ # 355 | 356 | graph_types = { 357 | "rdflib": RdfLibGraph, 358 | "oxrdflib": OxRdfLibGraph, 359 | "oxigraph": OxiGraph, 360 | } 361 | 362 | graph_classes = { 363 | rdflib.Graph: RdfLibGraph, 364 | oxrdflib.Graph: OxRdfLibGraph, 365 | pyoxigraph.Store: OxiGraph, 366 | } 367 | 368 | 369 | def get_graph(data=None, format="trig", graph_type="oxrdflib", max_path=2048): 370 | type_data = type(data) 371 | 372 | if issubclass(type_data, __Graph__): 373 | return data 374 | elif type_data in graph_classes: 375 | return graph_classes[type_data](data, format) 376 | elif graph_type in graph_types: 377 | return graph_types[graph_type](data, format, max_path) 378 | else: 379 | raise ValueError( 380 | "Argument 'graph_type' must be one of: " 381 | + ", ".join(graph_types.keys()) 382 | + ". Got: " 383 | + str(graph_type) 384 | ) 385 | -------------------------------------------------------------------------------- /rdfhash/utils/hash.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import uuid 3 | 4 | 5 | hashlib_methods = { 6 | "md5": hashlib.md5, 7 | "sha1": hashlib.sha1, 8 | "sha224": hashlib.sha224, 9 | "sha256": hashlib.sha256, 10 | "sha384": hashlib.sha384, 11 | "sha512": hashlib.sha512, 12 | "sha3-224": hashlib.sha3_224, 13 | "sha3-256": hashlib.sha3_256, 14 | "sha3-384": hashlib.sha3_384, 15 | "sha3-512": hashlib.sha3_512, 16 | "blake2b": hashlib.blake2b, 17 | "blake2s": hashlib.blake2s, 18 | } 19 | 20 | uuid_methods = { 21 | "uuid1": uuid.uuid1, 22 | "uuid3": uuid.uuid3, 23 | "uuid4": uuid.uuid4, 24 | } 25 | 26 | manual_methods = { 27 | "uuid5": lambda v: uuid.uuid5(uuid.NAMESPACE_OID, v).hex, 28 | "shake-128": lambda v, length: hashlib.shake_128(v).hexdigest(length), 29 | "shake-256": lambda v, length: hashlib.shake_256(v).hexdigest(length), 30 | } 31 | 32 | # ----------------------------------------------------------------------------- # 33 | 34 | 35 | hash_types_requiring_length = { 36 | "shake-128", 37 | "shake-256", 38 | } 39 | 40 | hash_types_requiring_string = { 41 | "uuid5", 42 | } 43 | 44 | hash_types_resolvable = { 45 | *list(hashlib_methods.keys()), 46 | "uuid5", 47 | "shake-128", 48 | "shake-256", 49 | } 50 | 51 | hash_type_functions = { 52 | "hashlib": lambda method, val_list: hashlib_methods[method](*val_list).hexdigest(), 53 | "uuid": lambda method, val_list: uuid_methods[method](*val_list).hex, 54 | "manual": lambda method, val_list: manual_methods[method](*val_list), 55 | } 56 | 57 | hash_types = { 58 | **{k: hash_type_functions["hashlib"] for k in hashlib_methods.keys()}, 59 | **{k: hash_type_functions["uuid"] for k in uuid_methods.keys()}, 60 | **{k: hash_type_functions["manual"] for k in manual_methods.keys()}, 61 | } 62 | 63 | 64 | # ----------------------------------------------------------------------------- # 65 | 66 | 67 | def hash_string(s, method="sha256", length=None): 68 | """Hash a Python string with a given 69 | 70 | Args: 71 | s (str): String to hash. 72 | method (str, optional): Hash method to use. Defaults to "sha256". 73 | length (int, optional): Length of hash result. 74 | 75 | Raises: 76 | ValueError: String is not able to be encoded to 'UTF-8' 77 | ValueError: Hash method specified is not in 'hash_types' 78 | 79 | Returns: 80 | str: Hexadecimal string representation of hash. 81 | """ 82 | # Validate that s is encoded as UTF-8. 83 | try: 84 | if method not in hash_types_requiring_string: 85 | s = s.encode("utf-8") 86 | except UnicodeEncodeError: 87 | raise ValueError("String must be encoded as UTF-8.") 88 | 89 | # Throw error if method not found in 'hash_types' 90 | if method not in hash_types: 91 | raise ValueError(f"Invalid hash method: {method}") 92 | 93 | # Set up method values 94 | method_vals = [s] 95 | if method in hash_types_requiring_length: 96 | method_vals.append(length) 97 | 98 | # Calculate hash 99 | result = hash_types[method](method, method_vals) 100 | 101 | # If length specified, truncate result (unless already truncated in hash method) 102 | if length: 103 | if method in hash_types_requiring_length: 104 | return result 105 | result = result[:length] 106 | 107 | return result 108 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from os import path, getcwd 3 | 4 | setup( 5 | name="rdfhash", 6 | version="0.4.6", 7 | author="Neil Graham", 8 | author_email="grahamneiln@gmail.com", 9 | url="https://github.com/NeilGraham/rdfhash", 10 | license_files="LICENSE.txt", 11 | description="De-duplicate RDF triples w/ a SPARQL query. Subjects taken from SELECT are replaced by the hash of their triples '{predicate} {object}.\n' pairs sorted.", 12 | long_description=open(path.join(getcwd(), "README.md")).read() 13 | # Replace relative links with absolute links to GitHub for PyPi 14 | .replace( 15 | "](docs/", 16 | "](https://github.com/NeilGraham/rdfhash/blob/master/docs/", 17 | ), 18 | long_description_content_type="text/markdown", 19 | packages=find_packages(), 20 | entry_points={"console_scripts": ["rdfhash = rdfhash.cli:cli"]}, 21 | python_requires=">=3.7", 22 | install_requires=[ 23 | "pytest >= 7.1.2", 24 | "rdflib >= 6.1.1", 25 | "oxrdflib >= 0.3.4", 26 | "pyoxigraph >= 0.3.16", 27 | ], 28 | ) 29 | -------------------------------------------------------------------------------- /test/conftest.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | from os.path import join, dirname, normpath 4 | 5 | sys.path.append(join(dirname(normpath(__file__)), "..")) 6 | 7 | from rdfhash.logger import logger 8 | 9 | logger.setLevel(logging.DEBUG) 10 | -------------------------------------------------------------------------------- /test/test_examples.py: -------------------------------------------------------------------------------- 1 | from os import getcwd, path, listdir 2 | from pathlib import Path 3 | from glob import glob 4 | 5 | from rdflib import Graph 6 | import pytest 7 | import oxrdflib 8 | 9 | from rdfhash import rdfhash, reverse_hash_subjects 10 | from rdfhash.logger import logger 11 | from rdfhash.utils.hash import ( 12 | hash_types, 13 | hash_types_requiring_length, 14 | hash_types_resolvable, 15 | ) 16 | from rdfhash.utils.graph import graph_types 17 | from utils import compare_graphs, graph_diff 18 | 19 | repo_dir = path.dirname(Path(__file__).parent.absolute()) 20 | ttl_files = ( 21 | path.relpath(file) for file in glob(path.join(repo_dir, "examples", "*.ttl")) 22 | ) 23 | 24 | resolvable_hash_methods = list(hash_types_resolvable.copy()) 25 | for i in range(0, len(resolvable_hash_methods)): 26 | if resolvable_hash_methods[i] in hash_types_requiring_length: 27 | resolvable_hash_methods[i] = resolvable_hash_methods[i] + ":64" 28 | 29 | 30 | @pytest.mark.parametrize("file_path", ttl_files) 31 | # @pytest.mark.parametrize("hash_method", resolvable_hash_methods) 32 | @pytest.mark.parametrize("hash_method", ["sha256"]) 33 | @pytest.mark.parametrize("graph_type", list(graph_types.keys())) 34 | # @pytest.mark.parametrize("graph_type", ["oxrdflib"]) 35 | def test__hash_examples(file_path, hash_method, graph_type, force_write=False): 36 | """Hash file and compare against hash file. 37 | 38 | Args: 39 | graph_type (str): Graph type to use. 40 | force_write (bool, optional): If True, forces writing hash 41 | result to file './examples/hashed'. Defaults to True. 42 | """ 43 | 44 | success = False 45 | hash_file_path = path.join( 46 | path.dirname(file_path), 47 | "hashed", 48 | f"{path.splitext(path.basename(file_path))[0]}__{hash_method}.ttl", 49 | ) 50 | 51 | # Generate hash of blank nodes in example file. 52 | graph, replaced_subjects = rdfhash( 53 | file_path, method=hash_method, graph_type=graph_type 54 | ) 55 | 56 | graph_actual = ( 57 | None 58 | if not path.isfile(hash_file_path) 59 | else Graph(store="Oxigraph").parse(hash_file_path) 60 | ) 61 | graph_generated = Graph(store="Oxigraph").parse( 62 | data=graph.serialize(format="text/turtle"), format="text/turtle" 63 | ) 64 | 65 | # If hash file does not exist, continue. 66 | if graph_actual == None: 67 | logger.warning( 68 | f"Cannot find hash file at path, writing to path: {hash_file_path}" 69 | ) 70 | graph_generated.serialize(hash_file_path, format="text/turtle") 71 | return 72 | 73 | # Check to see that both graphs are the exact same. 74 | elif compare_graphs(graph_generated, graph_actual): 75 | logger.info( 76 | "Successfully verified hash against file: " 77 | f"'{file_path}' <-> '{hash_file_path}' ({hash_method})" 78 | ) 79 | success = True 80 | 81 | # If the hash is not correct, append to 'failed' and continue. 82 | else: 83 | logger.error( 84 | "Mismatch between calculated hash and file: " 85 | f"'{file_path}' -> '{hash_file_path}' ({hash_method}) " 86 | ) 87 | 88 | # Write output of function to file path if 'force_write' is True. 89 | if force_write: 90 | logger.warning(f"Forcing write to file path: {hash_file_path}") 91 | graph.serialize(hash_file_path, format="text/turtle") 92 | 93 | if not success: 94 | differences = graph_diff(graph_generated, graph_actual) 95 | diff_s = "" 96 | 97 | if len(differences["in_g1_not_g2"]) > 0: 98 | diff_s += "Test File Only:\n" 99 | for triple in differences["in_g1_not_g2"]: 100 | diff_s += differences["in_g1_not_g2"].serialize(format="turtle") 101 | diff_s += "\n\n" 102 | 103 | if len(differences["in_g2_not_g1"]) > 0: 104 | diff_s += "Reference File Only:\n" 105 | diff_s += differences["in_g2_not_g1"].serialize(format="turtle") 106 | diff_s += "\n\n" 107 | 108 | logger.error(diff_s) 109 | 110 | pytest.fail(f"Hash mismatch for file: {file_path} ({hash_method})\n\n{diff_s}") 111 | 112 | 113 | # def test__reverse_example(file_path, template="{method}:{value}"): 114 | # graph_generated = reverse_hash_subjects(file_path, ) 115 | -------------------------------------------------------------------------------- /test/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import difflib 2 | 3 | from rdflib import Graph, Literal, XSD, BNode 4 | import oxrdflib 5 | from termcolor import colored 6 | 7 | from rdfhash.utils.graph import get_graph, __Graph__ 8 | from rdfhash.main import hash_subjects 9 | 10 | 11 | def normalize_graph(g, create_new=False): 12 | """ 13 | Normalize a rdflib.Graph by converting all string literals to the xsd:string datatype. 14 | """ 15 | if create_new: 16 | new_g = Graph(store="Oxigraph") 17 | else: 18 | new_g = g 19 | for s, p, o in g: 20 | if isinstance(o, Literal) and o.datatype is None and isinstance(o.value, str): 21 | o = Literal(o.value, datatype=XSD.string) 22 | new_g.add((s, p, o)) 23 | return new_g 24 | 25 | 26 | def compare_graphs(g1, g2): 27 | """ 28 | Compare two graphs, normalizing strings to xsd:string before comparison. 29 | """ 30 | return normalize_graph(g1).isomorphic(normalize_graph(g2)) 31 | 32 | def diff_strings(a, b): 33 | for i, s in enumerate(difflib.ndiff(a, b)): 34 | if s[0] == " ": 35 | continue 36 | elif s[0] == "-": 37 | print(colored(f"Line {i+1}, char {s[2:].find(s[-1])+1}: {s}", "red")) 38 | elif s[0] == "+": 39 | print(colored(f"Line {i+1}, char {s[2:].find(s[-1])+1}: {s}", "green")) 40 | 41 | 42 | def graph_diff(f1, f2): 43 | g1: __Graph__ = get_graph(graph_type="oxigraph") 44 | g1.parse_file(f1) 45 | 46 | g2: __Graph__ = get_graph(graph_type="oxigraph") 47 | g2.parse_file(f2) 48 | subjects1 = set(g1.subjects()) 49 | subjects2 = set(g2.subjects()) 50 | 51 | def hash_bnodes(g): 52 | g_hashed = get_graph(graph_type="oxigraph") 53 | 54 | blank_nodes_s1 = set([node for node in subjects1 if g.is_bnode(node)]) 55 | blank_nodes_s1 += set( 56 | [g.subjects(None, node) for node in g.objects() if g.is_bnode(node)] 57 | ) 58 | for node in blank_nodes_s1: 59 | g_hashed.add(g.triples((node, None, None))) 60 | hash_subjects(g_hashed) 61 | 62 | return g_hashed 63 | 64 | g1_bnodes = hash_bnodes(g1) 65 | for triple in g1_bnodes.triples(): 66 | g1.add(triple) 67 | g2_bnodes = hash_bnodes(g2) 68 | for triple in g2_bnodes.triples(): 69 | g2.add(triple) 70 | 71 | # Compare subjects 72 | for subject in subjects1.union(subjects2): 73 | if subject in subjects1 and subject in subjects2: 74 | # For subjects that are similar, display differences in sorted set of triples 75 | triples1 = sorted(list(g1.triples((subject, None, None)))) 76 | triples2 = sorted(list(g2.triples((subject, None, None)))) 77 | print(f"Subject: {subject}") 78 | diff_strings(triples1, triples2) 79 | elif subject in subjects1: 80 | # For subjects that are different, display them above and below the similar subjects diff 81 | triples1 = sorted(list(g1.triples((subject, None, None)))) 82 | print( 83 | colored( 84 | f"Subject: {subject} only in g1 with triples:\n{triples1}", 85 | "red", 86 | ) 87 | ) 88 | else: 89 | triples2 = sorted(list(g2.triples((subject, None, None)))) 90 | print( 91 | colored( 92 | f"Subject: {subject} only in g2 with triples:\n{triples2}", 93 | "green", 94 | ) 95 | ) 96 | 97 | 98 | 99 | def graph_differences(g1, g2, compare_bnodes=False): 100 | """ 101 | Find and return the differences between two graphs. 102 | 103 | Returns a dictionary with two keys: 'in_g1_not_g2' and 'in_g2_not_g1'. Each key 104 | corresponds to a list of triples that are present in one graph but not the other. 105 | """ 106 | normalized_g1 = normalize_graph(g1) 107 | normalized_g2 = normalize_graph(g2) 108 | 109 | def triples_without_bnodes(graph, compare_bnodes=False): 110 | """ 111 | Generate triples from the graph, skipping those with blank nodes. 112 | """ 113 | for triple in graph: 114 | if any(isinstance(node, BNode) for node in triple): 115 | if compare_bnodes: 116 | raise NotImplementedError( 117 | "Comparison of blank nodes is not implemented" 118 | ) 119 | else: 120 | continue 121 | yield triple[0] 122 | 123 | triples_in_g1_not_g2 = list( 124 | set(triples_without_bnodes(normalized_g1)) 125 | - set(triples_without_bnodes(normalized_g2)) 126 | ) 127 | triples_in_g2_not_g1 = list( 128 | set(triples_without_bnodes(normalized_g2)) 129 | - set(triples_without_bnodes(normalized_g1)) 130 | ) 131 | 132 | g1_diff = {} 133 | g2_diff = {} 134 | # g1_diff = Graph(store="Oxigraph") 135 | # g2_diff = Graph(store="Oxigraph") 136 | for triple in triples_in_g1_not_g2: 137 | g1_diff.add(triple) 138 | for triple in triples_in_g2_not_g1: 139 | g2_diff.add(triple) 140 | 141 | return {"in_g1_not_g2": g1_diff, "in_g2_not_g1": g2_diff} 142 | --------------------------------------------------------------------------------