├── .gitignore ├── .gitmodules ├── LICENSE ├── Mandolin-Evaluation-and-Discussion.pdf ├── README.md ├── install.sh ├── mandolin-400px.png ├── mandolin-500x500px.png ├── mandolin.properties ├── pgsql-create.sh ├── pgsql-init.sh ├── pgsql-start.sh ├── pgsql-stop.sh ├── pgsql ├── LICENSE ├── drop.sh ├── initdb.sh ├── sql │ ├── create.sql │ ├── debug.sql │ ├── drop.sql │ ├── ground.sql │ ├── load-body.sql │ ├── load-head.sql │ ├── load-tail.sql │ ├── load.sql │ ├── qc.sql │ └── run.sql ├── start.sh └── stop.sh ├── pom.xml ├── rockit.properties └── src ├── main ├── java │ └── org │ │ └── aksw │ │ └── mandolin │ │ ├── MainDolin.java │ │ ├── Mandolin.java │ │ ├── common │ │ ├── MandolinCommon.java │ │ └── NameMapperCommon.java │ │ ├── controller │ │ ├── Classes.java │ │ ├── Evidence.java │ │ ├── NameMapper.java │ │ ├── OntoImporter.java │ │ ├── ProbKBData.java │ │ ├── SimilarityJoin.java │ │ └── Validator.java │ │ ├── eval │ │ ├── CrossValidation.java │ │ ├── Dataset.java │ │ ├── FMeasureEvaluation.java │ │ ├── LinkPredictionEvaluation.java │ │ └── MeanRankCalc.java │ │ ├── grounding │ │ └── Grounding.java │ │ ├── inference │ │ ├── Factors.java │ │ ├── PostgreDB.java │ │ ├── ProbKBToRockitGibbsSampling.java │ │ ├── RockitGibbsSampling.java │ │ └── RockitGroundingAndGibbsSampling.java │ │ ├── model │ │ ├── Cache.java │ │ ├── ComparableLiteral.java │ │ ├── PredictionLiteral.java │ │ └── PredictionSet.java │ │ ├── reasoner │ │ └── PelletReasoner.java │ │ ├── rulemining │ │ ├── AmieHandler.java │ │ ├── RDFToTSV.java │ │ ├── RuleDriver.java │ │ └── RuleMiner.java │ │ ├── semantifier │ │ ├── Commons.java │ │ ├── DatasetBuildFixer.java │ │ ├── DatasetBuildSatellites.java │ │ ├── DatasetBuildSemantifier.java │ │ ├── DatasetBuildStarter.java │ │ ├── DatasetBuilderAlgorithm.java │ │ └── SemantifierPipeline.java │ │ └── util │ │ ├── Bundle.java │ │ ├── CustomQuoteMode.java │ │ ├── DataIO.java │ │ ├── PostgreNotStartedException.java │ │ ├── PrettyRandom.java │ │ ├── SetUtils.java │ │ ├── Shell.java │ │ ├── StringClean.java │ │ ├── Timer.java │ │ ├── URIHandler.java │ │ └── URLs.java └── resources │ ├── log4j.properties │ ├── log4j2.xml │ └── publications.properties └── test ├── java └── org │ └── aksw │ └── mandolin │ └── MandolinTest.java └── resources ├── AKSW-one-out.nt ├── log4j.properties └── log4j2.xml /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | bin/ 3 | target/ 4 | 5 | .classpath 6 | .project 7 | .settings/ 8 | 9 | data/ 10 | data.zip 11 | logs/ 12 | eval/ 13 | pgsql/db/ 14 | 15 | .DS_Store 16 | *.log 17 | *.lp 18 | *.mps -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "horn-concerto"] 2 | path = horn-concerto 3 | url = https://github.com/mommi84/horn-concerto.git 4 | -------------------------------------------------------------------------------- /Mandolin-Evaluation-and-Discussion.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AKSW/Mandolin/9b8266d600b83c6368625af669d3fb355296564b/Mandolin-Evaluation-and-Discussion.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![logo](https://github.com/mommi84/Mandolin/raw/master/mandolin-400px.png "Mandolin logo") 2 | 3 | MANDOLIN 4 | ======== 5 | 6 | *The best modules for Markov Logic Networks - rule mining, grounding, inference - condensed in one framework.* 7 | 8 | ## Requirements 9 | 10 | * Java 1.8+ 11 | * PostgreSQL 9.4.x 12 | * Gurobi solver 13 | * Maven 14 | * Wget, Unzip 15 | 16 | ## Quick start 17 | 18 | * Download and decompress [Mandolin v0.4.0-alpha](https://github.com/AKSW/Mandolin/releases/download/v0.4.0-alpha/mandolin-binaries-v0.4.0-alpha.zip) 19 | * Run `bash install.sh` 20 | 21 | ## Experiments 22 | 23 | The following command will discover new links of any predicate (`--aim`) on the WordNet dataset (`--input`) with mining threshold 0.8 (`--mining`) and 1 million Gibbs sampling iterations (`--sampling`). 24 | 25 | ```bash 26 | java -Xmx1g -jar target/Mandolin-0.4.0-jar-with-dependencies.jar plain --input data/benchmark/wn18/wordnet-mlj12-train.nt,data/benchmark/wn18/wordnet-mlj12-valid.nt --output eval/wn18 --mining 0.8 --sampling 1000000 --aim "*" 27 | ``` 28 | 29 | Discovered links can be found in the `--output` folder at `./eval/wn18/discovered_X.nt`, where `X` is the output threshold, meaning that a file contains all links whose confidence is greater or equal than `X`. 30 | 31 | An excerpt of the discovered **rules and weights**: 32 | 33 | ```text 34 | 0.990517419 wn18:_part_of(b, a) => wn18:_has_part(a, b) 35 | 0.862068966 wn18:_instance_hypernym(a, c) AND wn18:_synset_domain_topic_of(f, b) => wn18:_synset_domain_topic_of(a, b) 36 | ``` 37 | 38 | An excerpt of the discovered **links** with confidence > 0.9: 39 | 40 | ```text 41 | wn18:08131530 wn18:_has_part wn18:08132046 . 42 | wn18:09189411 wn18:_has_part wn18:08707917 . 43 | wn18:10484858 wn18:_synset_domain_topic_of wn18:08441203 . 44 | wn18:01941987 wn18:_synset_domain_topic_of wn18:00300441 . 45 | ``` 46 | 47 | ### Basic documentation 48 | 49 | Mandolin can be launched as follows. 50 | 51 | ```bash 52 | java -Xmx1g -jar target/Mandolin-0.4.0-jar-with-dependencies.jar 53 | ``` 54 | 55 | #### Goals 56 | 57 | **Goal**|**Description** 58 | :-----|:----- 59 | `plain`|Launch a plain Mandolin execution. 60 | `eval`|Evaluate MRR and hits@k. 61 | 62 | #### Plain execution 63 | 64 | Parameters for `plain` goal: 65 | 66 | **Parameter**|**Description**|**Example value** 67 | :-----|:-----|:----- 68 | `--input`|Comma-separated N-Triple files.|`data1.nt,data2.nt` 69 | `--output`|Workspace and output folder.|`eval/experiment1` 70 | `--aim`|Aim predicate. For all predicates use wildcard `*`.|`http://www.w3.org/2002/07/owl#sameAs` 71 | `--mining`|Rule mining threshold.|`0.9` (default: `0.0` support) 72 | `--sampling`|Gibbs sampling iterations.|`1000000` (default: 100 x evidence size) 73 | `--rules`|Maximum number of rules.|`1500` (default: none) 74 | `--sim`|Enable similarity among literals as `min,step,max`.|`0.8,0.1,0.9` (default: none) 75 | `--onto`|Enable ontology import.|`true` (default: `false`) 76 | `--fwc`|Enable forward-chain.|`true` (default: `false`) 77 | 78 | #### Evaluation 79 | 80 | The `eval` goal takes two parameters: the N-Triples file of the test set and Mandolin's output directory. 81 | 82 | Example run: 83 | 84 | ```bash 85 | java -Xmx1g -jar target/Mandolin-0.4.0-jar-with-dependencies.jar eval data/benchmark/wn18/wordnet-mlj12-test.nt eval/wn18 86 | ``` 87 | 88 | ## Manual install 89 | 90 | * Clone project: 91 | 92 | ```bash 93 | git clone https://github.com/mommi84/Mandolin.git 94 | cd Mandolin 95 | ``` 96 | 97 | * Get PostgreSQL 9.4.x - [Ubuntu/Debian binaries](http://oscg-downloads.s3.amazonaws.com/packages/postgresql-9.4.8-1-x64-bigsql.deb) 98 | 99 | ### Alternative 1 100 | 101 | * Launch `bash install.sh -c` 102 | 103 | ### Alternative 2 104 | 105 | * Insert PostgreSQL setting parameters into a file `./mandolin.properties`. Example: 106 | 107 | ```properties 108 | # GENERAL CONFIGURATION FOR MANDOLIN 109 | pgsql_home=/usr/local/Cellar/postgresql/9.4.1 110 | pgsql_username=tom 111 | pgsql_password= 112 | pgsql_url=localhost 113 | ``` 114 | 115 | * Download [data](https://s3-eu-west-1.amazonaws.com/anonymous-folder/data.zip) 116 | 117 | * Compile project: 118 | 119 | ```bash 120 | export MAVEN_OPTS=-Xss4m 121 | mvn clean compile assembly:single 122 | ``` 123 | 124 | ## Database handler 125 | 126 | After using Mandolin, stop the DB instance with: 127 | 128 | ```bash 129 | sh pgsql-stop.sh 130 | ``` 131 | 132 | The instance can be restarted with: 133 | 134 | ```bash 135 | sh pgsql-start.sh 136 | ``` 137 | 138 | ## Citing 139 | 140 | ``` 141 | @article{soru2017mandolin, 142 | title={Mandolin: A Knowledge Discovery Framework for the Web of Data}, 143 | author={Soru, Tommaso and Esteves, Diego and Marx, Edgard and Ngomo, Axel-Cyrille Ngonga}, 144 | journal={arXiv preprint arXiv:1711.01283}, 145 | year={2017} 146 | } 147 | ``` 148 | 149 | ## License(s) 150 | 151 | **Mandolin** is licensed under GNU General Public License v2.0. 152 | **AMIE** is licensed under Creative Commons Attribution-NonComercial license v3.0. 153 | **ProbKB** is licensed under the BSD license. 154 | **RockIt** is licensed under the MIT License. 155 | **Gurobi** can be activated using a [free academic license](http://www.gurobi.com/academia/academia-center). 156 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "" 3 | echo "=== Mandolin Installer ===" 4 | echo "" 5 | 6 | if [[ $1 == "-c" ]] 7 | then 8 | echo "Compiling Mandolin..." 9 | export MAVEN_OPTS=-Xss4m 10 | mvn -q clean compile assembly:single 11 | fi 12 | 13 | read -p "Download datasets into ./data/? " -n 1 -r 14 | echo # (optional) move to a new line 15 | if [[ $REPLY =~ ^[Yy]$ ]] 16 | then 17 | # do stuff 18 | echo "Downloading datasets..." 19 | wget -q https://s3-eu-west-1.amazonaws.com/anonymous-folder/data.zip 20 | unzip -qq data.zip && rm -rf data.zip 21 | fi 22 | 23 | read -p "Download and install PostgreSQL? [Ubuntu systems only] " -n 1 -r 24 | echo # (optional) move to a new line 25 | if [[ $REPLY =~ ^[Yy]$ ]] 26 | then 27 | # do stuff 28 | echo "Downloading PostgreSQL..." 29 | wget -q http://oscg-downloads.s3.amazonaws.com/packages/postgresql-9.4.8-1-x64-bigsql.deb 30 | pgdr=`pwd`"/postgres/" 31 | echo "Installing PostgreSQL in "$pgdr 32 | dpkg-deb -x postgresql-9.4.8-1-x64-bigsql.deb $pgdr && rm -rf postgresql-9.4.8-1-x64-bigsql.deb 33 | pgdir=$pgdr"opt/postgresql/pg94" # changing to home 34 | echo "# GENERAL CONFIGURATION FOR MANDOLIN" > mandolin.properties 35 | echo "pgsql_home="$pgdir >> mandolin.properties 36 | echo "pgsql_username="`whoami` >> mandolin.properties 37 | echo "pgsql_password=" >> mandolin.properties 38 | echo "pgsql_url=localhost" >> mandolin.properties 39 | else 40 | read -p "PostgreSQL home? " pgdir 41 | echo "# GENERAL CONFIGURATION FOR MANDOLIN" > mandolin.properties 42 | echo "pgsql_home="$pgdir >> mandolin.properties 43 | read -p "PostgreSQL username? " puname 44 | echo "pgsql_username="$puname >> mandolin.properties 45 | read -sp "PostgreSQL password? " ppwd 46 | echo "pgsql_password="$ppwd >> mandolin.properties 47 | read -p "PostgreSQL host? " phost 48 | echo "pgsql_url="$phost >> mandolin.properties 49 | fi 50 | 51 | echo "Initializing database..." 52 | cd pgsql && $pgdir/bin/initdb db -E utf8 53 | echo "Starting server and creating DB..." 54 | $pgdir/bin/pg_ctl start -D db/ && sleep 5s && $pgdir/bin/createdb probkb && cd .. 55 | 56 | echo "Done." 57 | -------------------------------------------------------------------------------- /mandolin-400px.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AKSW/Mandolin/9b8266d600b83c6368625af669d3fb355296564b/mandolin-400px.png -------------------------------------------------------------------------------- /mandolin-500x500px.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AKSW/Mandolin/9b8266d600b83c6368625af669d3fb355296564b/mandolin-500x500px.png -------------------------------------------------------------------------------- /mandolin.properties: -------------------------------------------------------------------------------- 1 | # GENERAL CONFIGURATION FOR MANDOLIN 2 | pgsql_home=/usr/local/Cellar/postgresql/9.4.1 3 | pgsql_username=tom 4 | pgsql_password= 5 | pgsql_url=localhost 6 | -------------------------------------------------------------------------------- /pgsql-create.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | createdb probkb -------------------------------------------------------------------------------- /pgsql-init.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cd pgsql && sh initdb.sh && cd .. -------------------------------------------------------------------------------- /pgsql-start.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | echo "\n\tStarting PostgreSQL database... Run 'sh pgsql-stop.sh' to terminate it.\n" && cd pgsql && sh start.sh && cd .. 3 | -------------------------------------------------------------------------------- /pgsql-stop.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cd pgsql && sh stop.sh && cd .. 3 | -------------------------------------------------------------------------------- /pgsql/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, University of Florida 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | -------------------------------------------------------------------------------- /pgsql/drop.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | psql probkb -f sql/drop.sql 3 | -------------------------------------------------------------------------------- /pgsql/initdb.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | initdb db -E utf8 3 | -------------------------------------------------------------------------------- /pgsql/sql/create.sql: -------------------------------------------------------------------------------- 1 | CREATE SCHEMA probkb; 2 | 3 | -- data tables 4 | CREATE TABLE probkb.classes(id INT PRIMARY KEY, name TEXT); 5 | CREATE TABLE probkb.entities(id INT PRIMARY KEY, name TEXT); 6 | CREATE TABLE probkb.relations(id INT PRIMARY KEY, name TEXT); 7 | 8 | CREATE TABLE probkb.entClasses( 9 | ent INT, class INT, 10 | PRIMARY KEY(ent, class) 11 | ); 12 | 13 | CREATE TABLE probkb.relClasses( 14 | rel INT, class1 INT, class2 INT, 15 | PRIMARY KEY(rel, class1, class2) 16 | ); 17 | 18 | CREATE TABLE probkb.extractions( 19 | rel INT, ent1 INT, ent2 INT, weight DOUBLE PRECISION, url TEXT, 20 | PRIMARY KEY(rel, ent1, ent2) 21 | ); 22 | 23 | CREATE TABLE probkb.functionals( 24 | rel INT, arg INT, deg INT, 25 | PRIMARY KEY (rel, arg) 26 | ); 27 | 28 | CREATE TABLE probkb.ambiguities( 29 | ent INT, class INT, 30 | PRIMARY KEY(ent, class) 31 | ); 32 | 33 | CREATE TABLE probkb.trash( 34 | id INT PRIMARY KEY 35 | ); 36 | 37 | CREATE SEQUENCE probkb.relids; 38 | 39 | -- mln tables 40 | CREATE TABLE probkb.mln1(head INT, body INT, class1 INT, class2 INT, weight DOUBLE PRECISION); 41 | CREATE TABLE probkb.mln2(head INT, body INT, class1 INT, class2 INT, weight DOUBLE PRECISION); 42 | CREATE TABLE probkb.mln3(head INT, body1 INT, body2 INT, 43 | class1 INT, class2 INT, class3 INT, weight DOUBLE PRECISION); 44 | CREATE TABLE probkb.mln4(head INT, body1 INT, body2 INT, 45 | class1 INT, class2 INT, class3 INT, weight DOUBLE PRECISION); 46 | CREATE TABLE probkb.mln5(head INT, body1 INT, body2 INT, 47 | class1 INT, class2 INT, class3 INT, weight DOUBLE PRECISION); 48 | CREATE TABLE probkb.mln6(head INT, body1 INT, body2 INT, 49 | class1 INT, class2 INT, class3 INT, weight DOUBLE PRECISION); 50 | -------------------------------------------------------------------------------- /pgsql/sql/debug.sql: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------- 2 | -- DEBUGGING utilities 3 | ------------------------------------------------------- 4 | CREATE OR REPLACE FUNCTION probkb.trace(rsid INT) RETURNS VOID AS $$ 5 | DECLARE 6 | cnt INT := 1; 7 | target INT; 8 | rule RECORD; 9 | BEGIN 10 | DROP TABLE IF EXISTS probkb.tr, probkb.queue; 11 | CREATE TABLE probkb.tr(head TEXT, body1 TEXT, body2 TEXT); 12 | 13 | CREATE TABLE probkb.queue(id INT PRIMARY KEY); 14 | INSERT INTO probkb.queue VALUES (rsid); 15 | 16 | WHILE cnt > 0 LOOP 17 | SELECT MAX(id) INTO target FROM probkb.queue; 18 | SELECT (probkb.traceStep(target)).* INTO rule; 19 | RAISE INFO '(%) %:-%,%', rule.id1, rule.name1, rule.name2, rule.name3; 20 | 21 | INSERT INTO probkb.tr(head, body1, body2) VALUES (rule.name1, rule.name2, rule.name3); 22 | INSERT INTO probkb.queue 23 | SELECT rule.id2 WHERE rule.id2 IS NOT NULL 24 | UNION 25 | SELECT rule.id3 WHERE rule.id3 IS NOT NULL 26 | EXCEPT 27 | SELECT id FROM probkb.queue; 28 | 29 | DELETE FROM probkb.queue WHERE id = target; 30 | SELECT COUNT(*) INTO cnt FROM probkb.queue; 31 | END LOOP; 32 | END; 33 | $$ LANGUAGE plpgsql; 34 | 35 | CREATE OR REPLACE FUNCTION probkb.traceStep(rsid INT, OUT id1 INT, OUT name1 TEXT, 36 | OUT id2 INT, OUT name2 TEXT, 37 | OUT id3 INT, OUT name3 TEXT) AS $$ 38 | DECLARE 39 | factor RECORD; 40 | BEGIN 41 | SELECT factors.id1, factors.id2, factors.id3 INTO factor FROM probkb.factors 42 | WHERE factors.id1 = rsid AND factors.id2 < rsid AND factors.id3 < rsid 43 | ORDER BY (factors.id2+factors.id3) LIMIT 1; 44 | 45 | SELECT INTO id1, name1 46 | r.id, relations.name || '(' || e1.name || ',' || e2.name || ')' 47 | FROM probkb.relationships r JOIN probkb.relations ON r.rel = relations.id 48 | JOIN probkb.entities e1 ON r.ent1 = e1.id 49 | JOIN probkb.entities e2 ON r.ent2 = e2.id 50 | WHERE r.id = rsid; 51 | 52 | SELECT INTO id2, name2 53 | r.id, relations.name || '(' || e1.name || ',' || e2.name || ')' 54 | FROM probkb.relationships r JOIN probkb.relations ON r.rel = relations.id 55 | JOIN probkb.entities e1 ON r.ent1 = e1.id 56 | JOIN probkb.entities e2 ON r.ent2 = e2.id 57 | WHERE r.id = factor.id2; 58 | 59 | SELECT INTO id3, name3 60 | r.id, relations.name || '(' || e1.name || ',' || e2.name || ')' 61 | FROM probkb.relationships r JOIN probkb.relations ON r.rel = relations.id 62 | JOIN probkb.entities e1 ON r.ent1 = e1.id 63 | JOIN probkb.entities e2 ON r.ent2 = e2.id 64 | WHERE r.id = factor.id3; 65 | 66 | IF id2 IS NULL THEN -- look for urls 67 | SELECT extractions.url INTO name2 68 | FROM probkb.relationships JOIN probkb.extractions ON relationships.rel = extractions.rel 69 | AND relationships.ent1 = extractions.ent1 AND relationships.ent2 = extractions.ent2 70 | WHERE relationships.id = rsid LIMIT 1; 71 | END IF; 72 | END; 73 | $$ LANGUAGE plpgsql; 74 | -------------------------------------------------------------------------------- /pgsql/sql/drop.sql: -------------------------------------------------------------------------------- 1 | DROP SCHEMA IF EXISTS probkb CASCADE; 2 | -------------------------------------------------------------------------------- /pgsql/sql/load-body.sql: -------------------------------------------------------------------------------- 1 | 2 | -- build relationships table with type information 3 | CREATE TABLE probkb.relationships AS 4 | SELECT nextval('probkb.relids') AS id, r.rel AS rel, 5 | r.ent1 AS ent1, rc.class1 AS class1, 6 | r.ent2 AS ent2, rc.class2 AS class2, AVG(weight) AS weight 7 | FROM probkb.extractions r, probkb.relClasses rc, probkb.entClasses ec1, probkb.entClasses ec2 8 | WHERE r.rel = rc.rel 9 | AND r.ent1 = ec1.ent AND ec1.class = rc.class1 10 | AND r.ent2 = ec2.ent AND ec2.class = rc.class2 11 | GROUP BY r.rel, r.ent1, rc.class1, r.ent2, rc.class2; 12 | CREATE INDEX relationships_rel_idx ON probkb.relationships(rel); 13 | CLUSTER probkb.relationships USING relationships_rel_idx; 14 | 15 | DELETE FROM probkb.relationships WHERE ent1 = ent2; 16 | 17 | SELECT probkb.qc(); 18 | 19 | -------------------------------------------------------------------------------- /pgsql/sql/load-head.sql: -------------------------------------------------------------------------------- 1 | SET work_mem='4GB'; 2 | SET enable_mergejoin=OFF; 3 | 4 | -- generate random types for 0 typed entities 5 | --INSERT INTO entClasses 6 | --SELECT tt.ent, trunc(random()*156) AS class 7 | --FROM (SELECT id AS ent FROM entities 8 | -- EXCEPT 9 | -- SELECT ent FROM entClasses) tt; 10 | 11 | -- import csv 12 | -------------------------------------------------------------------------------- /pgsql/sql/load-tail.sql: -------------------------------------------------------------------------------- 1 | 2 | ANALYZE probkb.relationships; -- gather statistics for better query plan 3 | ANALYZE probkb.mln1; 4 | ANALYZE probkb.mln2; 5 | ANALYZE probkb.mln3; 6 | ANALYZE probkb.mln4; 7 | ANALYZE probkb.mln5; 8 | ANALYZE probkb.mln6; 9 | -------------------------------------------------------------------------------- /pgsql/sql/load.sql: -------------------------------------------------------------------------------- 1 | -- WARNING: This file is not used. Check `load-*.sql` files instead. 2 | 3 | SET work_mem='4GB'; 4 | SET enable_mergejoin=OFF; 5 | 6 | -- generate random types for 0 typed entities 7 | --INSERT INTO entClasses 8 | --SELECT tt.ent, trunc(random()*156) AS class 9 | --FROM (SELECT id AS ent FROM entities 10 | -- EXCEPT 11 | -- SELECT ent FROM entClasses) tt; 12 | 13 | -- import csv 14 | COPY probkb.classes FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/classes.csv' DELIMITERS ',' CSV; 15 | COPY probkb.entities FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/entities.csv' DELIMITERS ',' CSV; 16 | COPY probkb.relations FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/relations.csv' DELIMITERS ',' CSV; 17 | COPY probkb.entClasses FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/entClasses.csv' DELIMITERS ',' CSV; 18 | COPY probkb.relClasses FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/relClasses.csv' DELIMITERS ',' CSV; 19 | COPY probkb.functionals FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/functionals.csv' DELIMITERS ',' CSV; 20 | COPY probkb.extractions FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/relationships.csv' DELIMITERS ',' CSV; 21 | 22 | -- build relationships table with type information 23 | CREATE TABLE probkb.relationships AS 24 | SELECT nextval('probkb.relids') AS id, r.rel AS rel, 25 | r.ent1 AS ent1, rc.class1 AS class1, 26 | r.ent2 AS ent2, rc.class2 AS class2, AVG(weight) AS weight 27 | FROM probkb.extractions r, probkb.relClasses rc, probkb.entClasses ec1, probkb.entClasses ec2 28 | WHERE r.rel = rc.rel 29 | AND r.ent1 = ec1.ent AND ec1.class = rc.class1 30 | AND r.ent2 = ec2.ent AND ec2.class = rc.class2 31 | GROUP BY r.rel, r.ent1, rc.class1, r.ent2, rc.class2; 32 | CREATE INDEX relationships_rel_idx ON probkb.relationships(rel); 33 | CLUSTER probkb.relationships USING relationships_rel_idx; 34 | 35 | DELETE FROM probkb.relationships WHERE ent1 = ent2; 36 | 37 | SELECT probkb.qc(); 38 | 39 | COPY probkb.mln1 FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/mln1.csv' DELIMITERS ',' CSV; 40 | COPY probkb.mln2 FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/mln2.csv' DELIMITERS ',' CSV; 41 | COPY probkb.mln3 FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/mln3.csv' DELIMITERS ',' CSV; 42 | COPY probkb.mln4 FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/mln4.csv' DELIMITERS ',' CSV; 43 | COPY probkb.mln5 FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/mln5.csv' DELIMITERS ',' CSV; 44 | COPY probkb.mln6 FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/mln6.csv' DELIMITERS ',' CSV; 45 | 46 | ANALYZE probkb.relationships; -- gather statistics for better query plan 47 | ANALYZE probkb.mln1; 48 | ANALYZE probkb.mln2; 49 | ANALYZE probkb.mln3; 50 | ANALYZE probkb.mln4; 51 | ANALYZE probkb.mln5; 52 | ANALYZE probkb.mln6; 53 | -------------------------------------------------------------------------------- /pgsql/sql/qc.sql: -------------------------------------------------------------------------------- 1 | CREATE OR REPLACE FUNCTION probkb.qc() RETURNS BIGINT AS $$ 2 | DECLARE 3 | deleted1 BIGINT := 0; 4 | deleted2 BIGINT := 0; 5 | BEGIN 6 | -- Detecting ambiguity. 7 | INSERT INTO probkb.ambiguities 8 | SELECT DISTINCT r.ent1, r.class1 9 | FROM probkb.relationships r JOIN probkb.functionals f ON r.rel = f.rel 10 | WHERE f.arg = 1 11 | GROUP BY r.rel, ent1, class1, class2 12 | HAVING COUNT(*) > MIN(f.deg) 13 | EXCEPT 14 | SELECT ent, class FROM probkb.ambiguities; 15 | 16 | -- Remove ambiguous entities. 17 | DELETE FROM probkb.relationships 18 | WHERE (ent1, class1) IN ( 19 | SELECT ent, class FROM probkb.ambiguities 20 | ); 21 | GET DIAGNOSTICS deleted1 = ROW_COUNT; 22 | 23 | DELETE FROM probkb.relationships 24 | WHERE (ent2, class2) IN ( 25 | SELECT ent, class FROM probkb.ambiguities 26 | ); 27 | GET DIAGNOSTICS deleted2 = ROW_COUNT; 28 | 29 | RETURN deleted1 + deleted2; 30 | END; 31 | $$ LANGUAGE plpgsql; 32 | -------------------------------------------------------------------------------- /pgsql/sql/run.sql: -------------------------------------------------------------------------------- 1 | -- Run grounding phase... 2 | 3 | SELECT probkb.ground(); 4 | SELECT probkb.groundFactors(); 5 | -------------------------------------------------------------------------------- /pgsql/start.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | postgres -D db/ & 3 | -------------------------------------------------------------------------------- /pgsql/stop.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | pg_ctl stop -D db/ 3 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | org.aksw.mandolin 5 | Mandolin 6 | 0.4.0 7 | Mandolin 8 | 9 | UTF-8 10 | 11 | 12 | 13 | 14 | maven-compiler-plugin 15 | 3.1 16 | 17 | 1.8 18 | 1.8 19 | 20 | 21 | 22 | maven-assembly-plugin 23 | 24 | 25 | 26 | org.aksw.mandolin.MainDolin 27 | 28 | 29 | 30 | jar-with-dependencies 31 | 32 | 33 | 34 | 35 | package 36 | 37 | single 38 | 39 | 40 | 41 | 42 | 43 | org.apache.maven.plugins 44 | maven-surefire-plugin 45 | 2.14.1 46 | 47 | 48 | **/*Test.java 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | maven.aksw.internal 57 | University Leipzig, AKSW Maven2 Repository 58 | http://maven.aksw.org/repository/internal 59 | 60 | 61 | 62 | maven.aksw.snapshots 63 | University Leipzig, AKSW Maven2 Repository 64 | http://maven.aksw.org/repository/snapshots 65 | 66 | 67 | 68 | 69 | 70 | org.apache.jena 71 | jena-core 72 | 2.13.0 73 | 74 | 75 | org.apache.jena 76 | jena-arq 77 | 2.13.0 78 | 79 | 80 | com.opencsv 81 | opencsv 82 | 3.3 83 | 84 | 85 | org.apache.logging.log4j 86 | log4j-core 87 | 2.4.1 88 | 89 | 90 | net.sf.supercsv 91 | super-csv 92 | 2.3.1 93 | 94 | 95 | com.github.mpkorstanje 96 | simmetrics-core 97 | 3.2.0 98 | 99 | 100 | it.unimi.dsi 101 | fastutil 102 | 6.3 103 | 104 | 105 | com.github.ansell.aterms 106 | aterm-java 107 | 1.8.2 108 | 109 | 110 | com.github.ansell.aterms 111 | shared-objects 112 | 1.4.9-p1 113 | 114 | 115 | com.clarkparsia.pellet 116 | pellet-core 117 | 2.4.0-SNAPSHOT 118 | 119 | 120 | com.clarkparsia.pellet 121 | pellet-jena 122 | 2.4.0-SNAPSHOT 123 | 124 | 125 | org.postgresql 126 | postgresql 127 | 9.4-1205-jdbc42 128 | 129 | 130 | it.tsoru.ppjoinhandler 131 | ppjoin-handler 132 | 0.1.1 133 | 134 | 135 | com.googlecode.rockit 136 | tsoru-rockit 137 | 0.5.277 138 | 139 | 140 | org.jgrapht 141 | jgrapht-core 142 | 0.9.1 143 | 144 | 145 | de.mpg.mpi-inf.amie 146 | amie-plus 147 | 2015-08-26 148 | 149 | 150 | commons-cli 151 | commons-cli 152 | 1.2 153 | 154 | 155 | Markov Logic Networks for the Discovery of Links. 156 | http://mandolin.aksw.org/ 157 | 2014 158 | 159 | 160 | -------------------------------------------------------------------------------- /rockit.properties: -------------------------------------------------------------------------------- 1 | # CONFIGURATION FILE FOR ROCKIT 2 | # 3 | # Pay attention that you do not use spaces at the end of the line (do not write "root ", but only "root") 4 | # 5 | # 6 | # Specify your data how to access your MySQL installation 7 | #sql_username= 8 | #sql_password= 9 | #sql_url= 10 | #?useCursorFetch=true&defaultFetchSize=1000 11 | # 12 | # 13 | # Specify a temp folder where files are created and deleted during runtime. 14 | temp_path=tmp/ 15 | # 16 | # Here you can set the name of the database that will be used. 17 | # All data in this database will be deleted 18 | #sql_database= 19 | # 20 | # ================================================================== 21 | # PARAMETERS 22 | # ================================================================== 23 | # The standard setting is usually optimal. If your problem is too complex, you might want to increase the gurobi_tollerance parameter. 24 | # 25 | # ------------------------------------------------------------------ 26 | # Maximum a-posteriori inference with ILP 27 | # ------------------------------------------------------------------ 28 | # 29 | # Sets the gap of the Gurobi solver. This gives exacter (value around 0.000001) or 30 | # more approximative (value 0.01) solutions. 31 | # If the gap is set to -1, the standard Gurobi gap is used. 32 | # HINT: If your problem does not terminate, then increase this value (take for instance 0.01) 33 | gap=-1 34 | # gap=0.01 35 | # gap=0.001 36 | #... 37 | # gap=0.000001 38 | # 39 | # Enables cutting-plane inference (do not add all constraints at once but add them step by step) 40 | # Reference: http://arxiv.org/abs/1206.3282 41 | # Advantage: Problems are much smaller, thus much faster to solve 42 | # Disadvantage: More than one problem has to be solved (but usually the overall time is still smaller) 43 | # use_cutting_plane_inference=true 44 | use_cutting_plane_inference=true 45 | # 46 | # If activated, variables will be aggregated. 47 | # Reference: https://rockit.googlecode.com/files/rockit.pdf 48 | # Advantage: Less formulas in ILP, ILP runs faster. 49 | # Disadvantage: Maybe a slightly slower Java Code (but usually the overall time is faster when activated) 50 | # use_cutting_plane_aggregation=true 51 | use_cutting_plane_aggregation=true 52 | # 53 | # Number of threads (integer). Determines the number of threads created for SQL processing. 54 | # If value is -1 then the number of threads is your number of CPUs + 1. 55 | number_of_threads=-1 56 | # 57 | # 58 | #Time limit in seconds. Limits the total time expended for the gurobi solver (in seconds). 59 | #Note that this only limits the time of the gurobi solver. Since there can be multiple calls of the gurobi solver, the overall runtime 60 | #is usually larger. 61 | #If -1 is set, time limit is infinitive. 62 | time_limit=-1 63 | # 64 | # 65 | # ------------------------------------------------------------------ 66 | # Marginal inference using Gibbs sampling in combination with symmetry detection 67 | # ------------------------------------------------------------------ 68 | # 69 | # Leverage Symmetries: 70 | # Reference: http://arxiv.org/abs/1304.2694 71 | # Advantage: The resulting marginal probabilities are of a higher quality since all their symmetries 72 | # are leveraged. This usually leads to a significant better result in fewer samples (proven). 73 | # Disadvantage: Minimal longer runtime and needs to run saucy in the background (not possible for ios apple machines) 74 | use_symmetries_in_marginal_inference=false 75 | # 76 | # ------------------------------------------------------------------ 77 | # General Parameters 78 | # ------------------------------------------------------------------ 79 | # 80 | # 81 | # Enables larger (debug) output. Additionally, it prints the ilp into the file "model.lp" before solving. 82 | # Important: Set this to false for runtime evaluations. 83 | # debug_output=false 84 | debug_output=true 85 | # 86 | # Simplifies formulas with negatives weights like it is done in Tuffy. 87 | # Example: 88 | # -2 student(a1) v !advisedBy(a2,a1) v !advisedBy(a1,a2) 89 | # --> 90 | # 2 student(a1) n advisedBy(a2,a1) n advisedBy(a1,a2) 91 | # --> 92 | # 1 student(a1) v advisedBy(a1,a2) 93 | # 1 student(a1) v advisedBy(a2,a1) 94 | # 95 | # The formulars with negative weights are changed so that: 96 | # - Their weight is multiplied with -1 (made positive). 97 | # - Each non-negated hidden predicate (in the restriction part) is set to its negation . 98 | # - Each negated hidden predicate (in the restriction part) is set to be not negated. 99 | # - Formular becomes a conjunction 100 | # 101 | # The formulas with conjunction are changed in a way that: 102 | # - The weight is divided by the number of hidden predicates. 103 | # - For each predicate a new formula is created. 104 | # 105 | # If deactivated, negative weights and conjunctions are left as such (no changes are made) 106 | simplify_negative_weight_and_conjunction=false 107 | #simplify_negative_weight_and_conjunction=false 108 | # 109 | # changes handling of String variables 110 | # converts string values to extra observed predicates if they occur in positive predicates. 111 | # advise: Leave this value as it is 112 | convert_string_values=true 113 | # 114 | # Select ILP solver: 115 | # GUROBI, CPLEX, SCIP 116 | # default: GUROBI 117 | ilp_solver=GUROBI 118 | # 119 | scip= 120 | # it was C:\\scip-3.1.0.exe -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/MainDolin.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin; 2 | 3 | import java.util.Arrays; 4 | 5 | import org.aksw.mandolin.eval.CrossValidation; 6 | import org.aksw.mandolin.eval.MeanRankCalc; 7 | 8 | /** 9 | * Main controller for Mandolin. 10 | * 11 | * @author Tommaso Soru 12 | * 13 | */ 14 | public class MainDolin { 15 | 16 | public static void main(String[] args) throws NumberFormatException, Exception { 17 | 18 | String[] argsw = Arrays.copyOfRange(args, 1, args.length); 19 | 20 | switch(args[0]) { 21 | case "plain": 22 | Mandolin.main(argsw); 23 | break; 24 | case "eval": 25 | MeanRankCalc.main(argsw); 26 | break; 27 | case "cv": 28 | CrossValidation.main(argsw); 29 | break; 30 | default: 31 | } 32 | 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/Mandolin.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin; 2 | 3 | import java.io.File; 4 | import java.util.Arrays; 5 | import java.util.Scanner; 6 | 7 | import org.aksw.mandolin.controller.Classes; 8 | import org.aksw.mandolin.controller.Evidence; 9 | import org.aksw.mandolin.controller.NameMapper; 10 | import org.aksw.mandolin.controller.OntoImporter; 11 | import org.aksw.mandolin.controller.ProbKBData; 12 | import org.aksw.mandolin.controller.Validator; 13 | import org.aksw.mandolin.grounding.Grounding; 14 | import org.aksw.mandolin.inference.ProbKBToRockitGibbsSampling; 15 | import org.aksw.mandolin.model.PredictionSet; 16 | import org.aksw.mandolin.reasoner.PelletReasoner; 17 | import org.aksw.mandolin.rulemining.RDFToTSV; 18 | import org.aksw.mandolin.rulemining.RuleMiner; 19 | import org.aksw.mandolin.util.PostgreNotStartedException; 20 | import org.aksw.mandolin.util.SetUtils; 21 | import org.apache.logging.log4j.LogManager; 22 | import org.apache.logging.log4j.Logger; 23 | 24 | /** 25 | * The final pipeline for MANDOLIN, a scalable join of several 26 | * statistical-relational-learning algorithms to predict RDF links of any type 27 | * (i.e., triples) in one or more RDF datasets using rule mining of Horn 28 | * clauses, Markov Logic Networks, and Gibbs Sampling. 29 | * 30 | * @author Tommaso Soru 31 | * 32 | */ 33 | public class Mandolin { 34 | 35 | private final static Logger logger = LogManager.getLogger(Mandolin.class); 36 | 37 | private static final int THETA_MIN = 0; 38 | private static final int THETA_MAX = 10; 39 | // input datasets 40 | private String[] inputPaths; 41 | private String workspace; 42 | private String aimRelation; 43 | 44 | // thresholds for similarity joins among datatype values 45 | private int thrMin; 46 | private int thrMax; 47 | private int thrStep; 48 | 49 | /** 50 | * Enable ontology import. 51 | */ 52 | private boolean enableOnt; 53 | 54 | /** 55 | * Enable forward chain. 56 | */ 57 | private boolean enableFwc; 58 | 59 | /** 60 | * Enable similarity graph enrichment. 61 | */ 62 | private boolean enableSim; 63 | 64 | /* 65 | * Threshold for head-coverage method in rule mining. If null, support method is used. 66 | */ 67 | private Double mining; 68 | 69 | /** 70 | * Maximum number of rules to mine. 71 | */ 72 | private Integer maxRules; 73 | 74 | // ------------------------------------------------------------------------- 75 | 76 | private NameMapper map; 77 | 78 | /** 79 | * Iteration for the Gibbs sampling. 80 | */ 81 | private Integer sampling; 82 | 83 | /** 84 | * @param workspace workspace path 85 | * @param csInputPaths comma-separated input paths 86 | * @param aimRelation aim relation URI 87 | * @param thrMin 88 | * @param thrStep 89 | * @param thrMax 90 | * @param enableOnt 91 | * @param enableFwc 92 | * @param enableSim 93 | */ 94 | public Mandolin(String workspace, String csInputPaths, String aimRelation, int thrMin, int thrStep, int thrMax, boolean enableOnt, boolean enableFwc, boolean enableSim) { 95 | super(); 96 | 97 | this.workspace = workspace; 98 | this.inputPaths = csInputPaths.split(","); 99 | this.aimRelation = aimRelation; 100 | this.thrMin = thrMin; 101 | this.thrStep = thrStep; 102 | this.thrMax = thrMax; 103 | this.enableOnt = enableOnt; 104 | this.enableFwc = enableFwc; 105 | this.enableSim = enableSim; 106 | 107 | map = new NameMapper(aimRelation); 108 | 109 | } 110 | 111 | /** 112 | * @throws Exception 113 | */ 114 | public void run() throws Exception { 115 | 116 | logger.info("Mandolin started!"); 117 | printInfo(); 118 | 119 | // create working directory 120 | new File(workspace).mkdirs(); 121 | 122 | if(enableOnt) { 123 | // inputs -> model-tmp.nt 124 | OntoImporter.run(workspace, inputPaths); 125 | } 126 | 127 | // inputs (or model-tmp.nt) -> model.nt (or model-fwc.nt) 128 | Validator.run(workspace, inputPaths, enableFwc, enableOnt); 129 | if(enableFwc) { 130 | // model.nt -> model-fwc.nt 131 | PelletReasoner.run(workspace); 132 | } 133 | 134 | // model-fwc.nt -> map (classes) 135 | Classes.build(map, workspace); 136 | // model-fwc.nt -> map (other) 137 | if(enableSim) 138 | Evidence.build(map, workspace, thrMin, thrMax, thrStep); 139 | else 140 | Evidence.build(map, workspace); 141 | 142 | if(logger.isTraceEnabled()) 143 | map.pretty(); 144 | 145 | logger.info("# entClasses: " + map.getEntClasses().size()); 146 | logger.info("# relClasses: " + map.getRelClasses().size()); 147 | logger.info("# relationships: " + map.getRelationships().size()); 148 | 149 | // map -> KB description csv 150 | ProbKBData.buildCSV(map, workspace); 151 | 152 | // model-fwc.nt -> model.tsv 153 | RDFToTSV.run(workspace); 154 | // model.tsv -> MLN csv 155 | RuleMiner.run(map, workspace, mining, maxRules); 156 | 157 | // csv -> Postgre factors 158 | Grounding.ground(workspace); 159 | 160 | // Postgre factors -> predictions 161 | PredictionSet pset = new ProbKBToRockitGibbsSampling(map).infer(sampling); 162 | 163 | pset.saveTo(workspace + "/predictions.dat"); 164 | 165 | for(int th=THETA_MIN; th<=THETA_MAX; th+=1) { 166 | double theta = th / 10.0; 167 | logger.info("theta = "+theta); 168 | 169 | // get set of predicted (just outputted) links 170 | String knowledge = workspace + "/model-fwc.nt"; 171 | String predicted = workspace + "/output_" + theta + ".nt"; 172 | pset.saveLinkset(map, theta, predicted); 173 | 174 | // compute set of discovered (emergent) links 175 | String discovered = workspace + "/discovered_" + theta + ".nt"; 176 | SetUtils.minus(predicted, knowledge, discovered); 177 | logger.debug("+++ DISCOVERED +++"); 178 | Scanner in = new Scanner(new File(discovered)); 179 | int size = 0; 180 | while(in.hasNextLine()) { 181 | logger.debug(in.nextLine()); 182 | size++; 183 | } 184 | in.close(); 185 | logger.info("Discovered triples size: "+size); 186 | } 187 | 188 | 189 | logger.info("Mandolin done."); 190 | 191 | } 192 | 193 | /** 194 | * 195 | */ 196 | private void printInfo() { 197 | logger.info("BASE = "+workspace); 198 | logger.info("INPUT_PATHS:"); 199 | for(String ip : inputPaths) 200 | logger.info("\t" + ip); 201 | logger.info("AIM_RELATION = "+aimRelation); 202 | logger.info("ONTO_IMPORT = "+enableOnt); 203 | logger.info("FORWARD_CHAIN = "+enableFwc); 204 | logger.info("SIMILARITIES = "+enableSim); 205 | logger.info("THR = [min="+thrMin+", step="+thrStep+", max="+thrMax+"]"); 206 | logger.info("MINING_THR = "+mining); 207 | logger.info("MAX_RULES = "+maxRules); 208 | logger.info("SAMPLING_ITER = "+sampling); 209 | } 210 | 211 | 212 | public NameMapper getMap() { 213 | return map; 214 | } 215 | 216 | public static void main(String[] args) throws Exception { 217 | 218 | logger.info("Mandolin initialized with args = {}", Arrays.toString(args)); 219 | 220 | String output = null, input = null, aim = "false", rules = null, 221 | onto = "false", fwc = "false", mining = null, sampling = null; 222 | String[] simVal = {"-1", "-1", "-1"}; 223 | 224 | for(int i=0; i 34 | * 35 | */ 36 | public class MandolinCommon { 37 | 38 | public static final String SRC_PATH = "datasets/DBLPL3S.nt"; 39 | public static final String TGT_PATH = "datasets/LinkedACM.nt"; 40 | public static final String LINKSET_PATH = "linksets/DBLPL3S-LinkedACM.nt"; 41 | public static final String GOLD_STANDARD_PATH = "linksets/DBLPL3S-LinkedACM-GoldStandard.nt"; 42 | 43 | public static final String BASE = "eval/09_publi-tuffy"; 44 | 45 | public static final String EVIDENCE_DB = BASE + "/evidence.db"; 46 | public static final String QUERY_DB = BASE + "/query.db"; 47 | public static final String PROG_MLN = BASE + "/prog.mln"; 48 | 49 | public static final int TRAINING_SIZE = Integer.MAX_VALUE; // TODO restore: 50 | // (int) (47 * 51 | // 0.9); 52 | 53 | private static final int THR_MIN = 80; 54 | private static final int THR_MAX = 90; 55 | private static final int THR_STEP = 10; 56 | 57 | private TreeSet unary = new TreeSet<>(); 58 | 59 | private NameMapperCommon map; 60 | 61 | public NameMapperCommon getMap() { 62 | return map; 63 | } 64 | 65 | public MandolinCommon() { 66 | 67 | map = new NameMapperCommon(); 68 | 69 | } 70 | 71 | private void run() throws FileNotFoundException { 72 | 73 | new File(BASE).mkdirs(); 74 | 75 | PrintWriter pwEvid = new PrintWriter(new File(EVIDENCE_DB)); 76 | graphEvidence(pwEvid); 77 | mappingEvidence(pwEvid, 0, TRAINING_SIZE); 78 | pwEvid.close(); 79 | 80 | buildQueryDB(new PrintWriter(new File(QUERY_DB))); 81 | 82 | buildProgMLN(new PrintWriter(new File(PROG_MLN))); 83 | 84 | } 85 | 86 | public void buildProgMLN(PrintWriter pwProg) { 87 | 88 | String sameAs = map.getName(URLs.OWL_SAMEAS); 89 | for (String name : map.getNamesByType(Type.PROPERTY)) { 90 | // closed world assumption is false for owl:sameAs 91 | String cw = name.equals(sameAs) ? "" : "*"; 92 | pwProg.write(cw + name + "(res, res)\n"); 93 | } 94 | for (int thr = THR_MIN; thr <= THR_MAX; thr += THR_STEP) 95 | pwProg.write("*Sim" + thr + "(res, res)\n"); 96 | for (String u : unary) 97 | pwProg.write("*" + u + "(res)\n"); 98 | pwProg.write("\n"); 99 | for (String name : map.getNamesByType(Type.PROPERTY)) { 100 | // symmetric property 101 | pwProg.write("1 !" + name + "(x, y) v " + name + "(y, x)\n"); 102 | pwProg.write("1 !" + name + "(y, x) v " + name + "(x, y)\n"); 103 | // transitive property 104 | pwProg.write("1 !" + name + "(x, y) v !" + name + "(y, z) v " 105 | + name + "(x, z)\n"); 106 | } 107 | pwProg.close(); 108 | 109 | } 110 | 111 | public void buildQueryDB(PrintWriter pwQuery) { 112 | 113 | String sameAs = map.getName(URLs.OWL_SAMEAS); 114 | pwQuery.write(sameAs); 115 | pwQuery.close(); 116 | 117 | } 118 | 119 | public void graphEvidence(PrintWriter pwEvid) { 120 | 121 | final Cache cache = new Cache(); 122 | 123 | PPJoin ppjoin = new PPJoin(); 124 | 125 | Tokenizer tok = ppjoin.getTokenizer(); 126 | HashMap dataset = new HashMap<>(); 127 | 128 | // use a TreeSet to deduplicate 129 | final TreeSet setOfStrings = new TreeSet<>(); 130 | 131 | StreamRDF dataStream = new StreamRDF() { 132 | 133 | @Override 134 | public void base(String arg0) { 135 | } 136 | 137 | @Override 138 | public void finish() { 139 | } 140 | 141 | @Override 142 | public void prefix(String arg0, String arg1) { 143 | } 144 | 145 | @Override 146 | public void quad(Quad arg0) { 147 | } 148 | 149 | @Override 150 | public void start() { 151 | } 152 | 153 | @Override 154 | public void triple(Triple arg0) { 155 | String s = map.add(arg0.getSubject().getURI(), Type.RESOURCE); 156 | // System.out.println("Added "+s+" - "+map.getURI(s)); 157 | String p = map.add(arg0.getPredicate().getURI(), Type.PROPERTY); 158 | // System.out.println("Added "+p+" - "+map.getURI(p)); 159 | String o = map.add(arg0.getObject().toString(), Type.RESOURCE); 160 | // System.out.println("Added "+o+" - "+map.getURI(o)); 161 | 162 | if (pwEvid != null) { 163 | 164 | if (arg0.getPredicate().getURI() 165 | .equals(Commons.RDF_TYPE.getURI())) { 166 | System.out.println(o + "(" + s + ")"); 167 | System.out.println("NEWCLASS\t" + o + "\t" 168 | + arg0.getObject().toString()); 169 | pwEvid.write(o + "(" + s + ")\n"); 170 | unary.add(o); 171 | } else { 172 | System.out.println(p + "(" + s + ", " + o + ")"); 173 | pwEvid.write(p + "(" + s + ", " + o + ")\n"); 174 | } 175 | } 176 | 177 | if (arg0.getObject().isLiteral()) { 178 | String dtURI = arg0.getObject().getLiteralDatatypeURI(); 179 | 180 | boolean considerString; 181 | if (dtURI == null) 182 | considerString = true; 183 | else 184 | considerString = dtURI.equals(XSD.xstring.getURI()); 185 | 186 | if (considerString) { 187 | ComparableLiteral lit = new ComparableLiteral(arg0 188 | .getObject().getLiteral().toString(true), arg0 189 | .getObject().getLiteral().getValue().toString()); 190 | setOfStrings.add(lit); 191 | } 192 | } 193 | 194 | } 195 | 196 | }; 197 | 198 | RDFDataMgr.parse(dataStream, SRC_PATH); 199 | RDFDataMgr.parse(dataStream, TGT_PATH); 200 | 201 | map.pretty(); 202 | 203 | Iterator it = setOfStrings.iterator(); 204 | for (int i = 0; it.hasNext(); i++) { 205 | ComparableLiteral lit = it.next(); 206 | String val = lit.getVal(); 207 | cache.stringItems.add(new StringItem(tok.tokenize(val, false), i)); 208 | dataset.put(i, lit); 209 | } 210 | 211 | System.out.println(cache.stringItems.size()); 212 | List stringItems = cache.stringItems; 213 | 214 | StringItem[] strDatum = stringItems.toArray(new StringItem[stringItems 215 | .size()]); 216 | Arrays.sort(strDatum); 217 | 218 | ppjoin.setUseSortAtExtractPairs(false); 219 | 220 | for (int thr = THR_MIN; thr <= THR_MAX; thr += THR_STEP) { 221 | System.out.println("thr = " + (thr / 100.0)); 222 | List> result = ppjoin.extractPairs( 223 | strDatum, thr / 100.0); 224 | for (Entry entry : result) { 225 | ComparableLiteral lit1 = dataset.get(entry.getKey().getId()); 226 | ComparableLiteral lit2 = dataset.get(entry.getValue().getId()); 227 | pwEvid.write("Sim" + thr + "(" + map.getName(lit1.getUri()) 228 | + ", " + map.getName(lit2.getUri()) + ")\n"); 229 | System.out.println(lit1.getUri() + " <=> " + lit2.getUri()); 230 | System.out.println(lit1.getVal() + " <=> " + lit2.getVal()); 231 | } 232 | } 233 | 234 | } 235 | 236 | public void mappingEvidence(PrintWriter pwEvid, final int START, 237 | final int END) { 238 | 239 | final Cache training = new Cache(); 240 | 241 | StreamRDF mapStream = new StreamRDF() { 242 | 243 | @Override 244 | public void base(String arg0) { 245 | } 246 | 247 | @Override 248 | public void finish() { 249 | } 250 | 251 | @Override 252 | public void prefix(String arg0, String arg1) { 253 | } 254 | 255 | @Override 256 | public void quad(Quad arg0) { 257 | } 258 | 259 | @Override 260 | public void start() { 261 | } 262 | 263 | @Override 264 | public void triple(Triple arg0) { 265 | String s = map.add(arg0.getSubject().getURI(), Type.RESOURCE); 266 | // System.out.println("Added "+s+" - "+map.getURI(s)); 267 | String p = map.add(arg0.getPredicate().getURI(), Type.PROPERTY); 268 | // System.out.println("Added "+p+" - "+map.getURI(p)); 269 | String o = map.add(arg0.getObject().toString(), Type.RESOURCE); 270 | // System.out.println("Added "+o+" - "+map.getURI(o)); 271 | 272 | if (pwEvid != null) { 273 | int c = ++training.count; 274 | if (START <= c && c <= END) { 275 | System.out.println(training.count + "\t" + p + "(" + s 276 | + ", " + o + ")"); 277 | pwEvid.write(p + "(" + s + ", " + o + ")\n"); 278 | } 279 | } 280 | } 281 | 282 | }; 283 | 284 | RDFDataMgr.parse(mapStream, LINKSET_PATH); 285 | 286 | } 287 | 288 | public void closureEvidence(PrintWriter pwEvid) { 289 | 290 | StreamRDF mapStream = new StreamRDF() { 291 | 292 | @Override 293 | public void base(String arg0) { 294 | } 295 | 296 | @Override 297 | public void finish() { 298 | } 299 | 300 | @Override 301 | public void prefix(String arg0, String arg1) { 302 | } 303 | 304 | @Override 305 | public void quad(Quad arg0) { 306 | } 307 | 308 | @Override 309 | public void start() { 310 | } 311 | 312 | @Override 313 | public void triple(Triple arg0) { 314 | String s = map.getName(arg0.getSubject().getURI()); 315 | String p = map.getName(arg0.getPredicate().getURI()); 316 | String o = map.getName(arg0.getObject().toString()); 317 | 318 | if (s == null || p == null || o == null) 319 | System.err.println("HALT!"); 320 | 321 | if (pwEvid != null) { 322 | pwEvid.write(p + "(" + s + ", " + o + ")\n"); 323 | } 324 | } 325 | 326 | }; 327 | 328 | RDFDataMgr.parse(mapStream, GOLD_STANDARD_PATH); 329 | 330 | } 331 | 332 | public static void main(String[] args) throws FileNotFoundException { 333 | 334 | // System.err.println("Launch line commented to prevent file overwrite."); 335 | new MandolinCommon().run(); 336 | 337 | } 338 | 339 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/common/NameMapperCommon.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.common; 2 | 3 | import java.util.HashMap; 4 | import java.util.TreeSet; 5 | 6 | /** 7 | * @author Tommaso Soru 8 | * 9 | */ 10 | public class NameMapperCommon { 11 | 12 | private HashMap mlnToUri = new HashMap<>(); 13 | private HashMap uriToMln = new HashMap<>(); 14 | 15 | private HashMap> listByType = new HashMap<>(); 16 | 17 | public enum Type { 18 | RESOURCE("Res"), PROPERTY("Prop"); 19 | private String str; 20 | Type(String str) { 21 | this.str = str; 22 | } 23 | public String toString() { 24 | return str; 25 | } 26 | } 27 | 28 | private HashMap count = new HashMap<>(); 29 | 30 | public NameMapperCommon() { 31 | super(); 32 | count.put(Type.RESOURCE, 0); 33 | count.put(Type.PROPERTY, 0); 34 | listByType.put(Type.RESOURCE, new TreeSet<>()); 35 | listByType.put(Type.PROPERTY, new TreeSet<>()); 36 | } 37 | 38 | /** 39 | * Add an URI to the map and return the MLN name. 40 | * 41 | * @param uri 42 | * @return 43 | */ 44 | public String add(String uri, Type type) { 45 | 46 | if(uriToMln.containsKey(uri)) 47 | return uriToMln.get(uri); 48 | 49 | String name = type.toString() + count.get(type); 50 | mlnToUri.put(name, uri); 51 | uriToMln.put(uri, name); 52 | listByType.get(type).add(name); 53 | increase(type); 54 | return name; 55 | } 56 | 57 | private void increase(Type type) { 58 | count.put(type, count.get(type) + 1); 59 | } 60 | 61 | public String getURI(String name) { 62 | return mlnToUri.get(name); 63 | } 64 | 65 | public String getName(String uri) { 66 | return uriToMln.get(uri); 67 | } 68 | 69 | public TreeSet getNamesByType(Type type) { 70 | return listByType.get(type); 71 | } 72 | 73 | public void pretty() { 74 | for(String key : mlnToUri.keySet()) 75 | // if(listByType.get(Type.PROPERTY).contains(key)) // TODO remove me! 76 | System.out.println(key + "\t" + mlnToUri.get(key)); 77 | } 78 | 79 | } 80 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/controller/Classes.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.controller; 2 | 3 | import org.aksw.mandolin.controller.NameMapper.Type; 4 | import org.aksw.mandolin.util.URIHandler; 5 | import org.apache.jena.riot.RDFDataMgr; 6 | import org.apache.jena.riot.system.StreamRDF; 7 | import org.apache.logging.log4j.LogManager; 8 | import org.apache.logging.log4j.Logger; 9 | 10 | import com.hp.hpl.jena.graph.Triple; 11 | import com.hp.hpl.jena.sparql.core.Quad; 12 | import com.hp.hpl.jena.vocabulary.OWL; 13 | import com.hp.hpl.jena.vocabulary.RDF; 14 | import com.hp.hpl.jena.vocabulary.RDFS; 15 | 16 | /** 17 | * @author Tommaso Soru 18 | * 19 | */ 20 | public class Classes { 21 | 22 | private final static Logger logger = LogManager.getLogger(Classes.class); 23 | 24 | private final static Cache size = new Cache(); 25 | 26 | /** 27 | * @param map 28 | * @param SRC_PATH 29 | * @param TGT_PATH 30 | */ 31 | public static void build(final NameMapper map, final String BASE) { 32 | 33 | final CollectionCache nodes = new CollectionCache(); 34 | // final CollectionCache classes = new CollectionCache(); 35 | 36 | // reader implementation 37 | StreamRDF dataStream = new StreamRDF() { 38 | 39 | @Override 40 | public void base(String arg0) { 41 | } 42 | 43 | @Override 44 | public void finish() { 45 | } 46 | 47 | @Override 48 | public void prefix(String arg0, String arg1) { 49 | } 50 | 51 | @Override 52 | public void quad(Quad arg0) { 53 | } 54 | 55 | @Override 56 | public void start() { 57 | } 58 | 59 | @Override 60 | public void triple(Triple arg0) { 61 | String s = URIHandler.parse(arg0.getSubject()); 62 | String p = arg0.getPredicate().getURI(); 63 | String o = arg0.getObject().toString(); 64 | 65 | // if property is rdf:type... 66 | if(p.equals(RDF.type.getURI())) { 67 | // then object is always a class 68 | String className = map.add(o, Type.CLASS); 69 | // if object is :Class... 70 | if(o.equals(OWL.Class.getURI()) || 71 | o.equals(RDFS.Class.getURI())) { 72 | // then also subject is a class 73 | map.add(s, Type.CLASS); 74 | } else { 75 | // else subject is an entity 76 | // XXX subject could be even a property! 77 | String entName = map.add(s, Type.ENTITY); 78 | map.addEntClass(entName, className); 79 | } 80 | // // save class 81 | // // TODO this could be extended to all properties with domain or range = rdfs:Class 82 | // classes.set.add(o); 83 | } 84 | 85 | map.add(s, Type.ENTITY); 86 | map.add(o, Type.ENTITY); 87 | 88 | // save nodes 89 | nodes.set.add(s); 90 | nodes.set.add(o); 91 | 92 | // // save property 93 | // properties.set.add(p); 94 | // count triples 95 | size.value++; 96 | } 97 | 98 | }; 99 | 100 | RDFDataMgr.parse(dataStream, BASE + "/model-fwc.nt"); 101 | 102 | logger.info("Adding owl:Thing type to {} nodes.", nodes.set.size()); 103 | for(String s : nodes.set) 104 | map.addEntClass(map.toName(s), map.getOwlThingName()); 105 | 106 | map.setCollisionDelta(collisionDelta()); 107 | 108 | } 109 | 110 | /** 111 | * Compute the upper bound for the order of magnitude of entities and return the sum to add to avoid ID collision. 112 | * 113 | * @return 114 | */ 115 | public static int collisionDelta() { 116 | int upper = (int) Math.log10(size.value * 2) + 1; 117 | return (int) Math.pow(10, upper); 118 | } 119 | 120 | 121 | } 122 | 123 | class Cache { 124 | int value = 0; 125 | } 126 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/controller/Evidence.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.controller; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.FileOutputStream; 6 | import java.util.TreeSet; 7 | 8 | import org.aksw.mandolin.controller.NameMapper.Type; 9 | import org.aksw.mandolin.model.Cache; 10 | import org.aksw.mandolin.model.ComparableLiteral; 11 | import org.aksw.mandolin.util.URIHandler; 12 | import org.apache.jena.riot.Lang; 13 | import org.apache.jena.riot.RDFDataMgr; 14 | import org.apache.jena.riot.system.StreamRDF; 15 | import org.apache.jena.riot.system.StreamRDFWriter; 16 | import org.apache.logging.log4j.LogManager; 17 | import org.apache.logging.log4j.Logger; 18 | 19 | import com.hp.hpl.jena.graph.Node; 20 | import com.hp.hpl.jena.graph.Triple; 21 | import com.hp.hpl.jena.sparql.core.Quad; 22 | import com.hp.hpl.jena.vocabulary.RDF; 23 | import com.hp.hpl.jena.vocabulary.RDFS; 24 | import com.hp.hpl.jena.vocabulary.XSD; 25 | 26 | /** 27 | * @author Tommaso Soru 28 | * 29 | */ 30 | public class Evidence { 31 | 32 | private final static Logger logger = LogManager.getLogger(Evidence.class); 33 | 34 | /** 35 | * @param map 36 | * @param SRC_PATH 37 | * @param TGT_PATH 38 | * @param LNK_PATH 39 | * @param THR_MIN 40 | * @param THR_MAX 41 | * @param THR_STEP 42 | */ 43 | public static void build(final NameMapper map, final String BASE, 44 | final int THR_MIN, final int THR_MAX, final int THR_STEP) { 45 | 46 | // for similarity join 47 | final Cache cache = new Cache(); 48 | 49 | final TreeSet setOfStrings = build(map, BASE); 50 | 51 | // call similarity join 52 | SimilarityJoin.build(map, setOfStrings, cache, BASE, THR_MIN, THR_MAX, 53 | THR_STEP); 54 | 55 | // append model-sim-fwc.nt to model-fwc.nt 56 | final FileOutputStream output; 57 | try { 58 | output = new FileOutputStream(new File(BASE + "/model-sim-temp.nt")); 59 | } catch (FileNotFoundException e) { 60 | logger.fatal(e.getMessage()); 61 | throw new RuntimeException("File " + BASE + "/model-sim-temp.nt not found!"); 62 | } 63 | 64 | final StreamRDF writer = StreamRDFWriter.getWriterStream(output, Lang.NT); 65 | writer.start(); 66 | 67 | StreamRDF reader = new StreamRDF() { 68 | 69 | @Override 70 | public void triple(Triple triple) { 71 | writer.triple(triple); 72 | } 73 | 74 | @Override 75 | public void start() { 76 | } 77 | 78 | @Override 79 | public void quad(Quad quad) { 80 | } 81 | 82 | @Override 83 | public void prefix(String prefix, String iri) { 84 | } 85 | 86 | @Override 87 | public void finish() { 88 | } 89 | 90 | @Override 91 | public void base(String base) { 92 | } 93 | 94 | }; 95 | 96 | RDFDataMgr.parse(reader, BASE + "/model-fwc.nt"); 97 | 98 | StreamRDF readerSim = new StreamRDF() { 99 | 100 | @Override 101 | public void triple(Triple triple) { 102 | writer.triple(triple); 103 | String s = triple.getSubject().getURI(); 104 | String p = triple.getPredicate().getURI(); 105 | 106 | String o = parse(triple.getObject()); 107 | if(o == null) 108 | return; 109 | // String relName = 110 | map.add(p, Type.RELATION); 111 | // String name1 = 112 | map.add(s, Type.ENTITY); 113 | // String name2 = 114 | map.add(o, Type.ENTITY); 115 | 116 | // XXX oddly this shall be off 117 | // map.addRelationship(relName, name1, name2); 118 | } 119 | 120 | @Override 121 | public void start() { 122 | } 123 | 124 | @Override 125 | public void quad(Quad quad) { 126 | } 127 | 128 | @Override 129 | public void prefix(String prefix, String iri) { 130 | } 131 | 132 | @Override 133 | public void finish() { 134 | } 135 | 136 | @Override 137 | public void base(String base) { 138 | } 139 | 140 | }; 141 | 142 | RDFDataMgr.parse(readerSim, BASE + "/model-sim-fwc.nt"); 143 | 144 | writer.finish(); 145 | 146 | 147 | // delete old file, rename temp file 148 | new File(BASE + "/model-fwc.nt").delete(); 149 | new File(BASE + "/model-sim-temp.nt").renameTo(new File(BASE + "/model-fwc.nt")); 150 | 151 | } 152 | 153 | /** 154 | * @param map 155 | * @param BASE 156 | */ 157 | public static final TreeSet build(final NameMapper map, final String BASE) { 158 | 159 | final TreeSet setOfStrings = new TreeSet<>(); 160 | 161 | // reader implementation 162 | StreamRDF dataStream = new StreamRDF() { 163 | 164 | @Override 165 | public void base(String arg0) { 166 | } 167 | 168 | @Override 169 | public void finish() { 170 | } 171 | 172 | @Override 173 | public void prefix(String arg0, String arg1) { 174 | } 175 | 176 | @Override 177 | public void quad(Quad arg0) { 178 | } 179 | 180 | @Override 181 | public void start() { 182 | } 183 | 184 | @Override 185 | public void triple(Triple arg0) { 186 | String s = URIHandler.parse(arg0.getSubject()); 187 | String p = arg0.getPredicate().getURI(); 188 | // TODO if (o.isBlankNode) => URIHandler 189 | String o = parse(arg0.getObject()); 190 | if(o == null) 191 | return; 192 | 193 | String relName = map.add(p, Type.RELATION); 194 | String subjName = map.add(s, Type.ENTITY); 195 | String objName = map.add(o, Type.ENTITY); 196 | 197 | // now check for non-instantiations... 198 | if (!p.equals(RDF.type.getURI())) { 199 | // it is supposed that the map contains only classes 200 | // and instances of these classes (see Classes.build) 201 | // assume non-instantiated resources are entities 202 | 203 | // domain/range specification 204 | if (p.equals(RDFS.domain.getURI())) { 205 | subjName = map.add(s, Type.RELATION); 206 | // property name, target class, is domain 207 | map.addRelClass(subjName, objName, true); 208 | } 209 | if (p.equals(RDFS.range.getURI())) { 210 | subjName = map.add(s, Type.RELATION); 211 | // property name, target class, is range 212 | map.addRelClass(subjName, objName, false); 213 | } 214 | 215 | // if subject or object are not found, it means that they 216 | // have not been instantiated earlier (see Classes.build) 217 | if (subjName == null) 218 | // not found => instance subject, create entity 219 | subjName = map.add(s, Type.ENTITY); 220 | else { 221 | // create entity form for class 222 | if (subjName.startsWith(Type.CLASS.toString())) 223 | subjName = map.classToEntityForm(subjName); 224 | // create stable entity form for properties 225 | if (subjName.startsWith(Type.RELATION.toString())) 226 | subjName = map.relationToEntityForm(subjName); 227 | 228 | } 229 | if (objName == null) 230 | // not found => instance/datatype object, create entity 231 | objName = map.add(o, Type.ENTITY); 232 | else { 233 | // create entity form for class 234 | if (objName.startsWith(Type.CLASS.toString())) 235 | objName = map.classToEntityForm(objName); 236 | // create stable entity form for properties 237 | if (objName.startsWith(Type.RELATION.toString())) 238 | objName = map.relationToEntityForm(objName); 239 | } 240 | 241 | // property, subject (entity), object (entity) names 242 | map.addRelationship(relName, subjName, objName); 243 | 244 | } 245 | 246 | if (arg0.getObject().isLiteral()) { 247 | String dtURI = arg0.getObject().getLiteralDatatypeURI(); 248 | 249 | boolean considerString; 250 | if (dtURI == null) 251 | considerString = true; 252 | else 253 | considerString = dtURI.equals(XSD.xstring.getURI()); 254 | 255 | if (considerString) { 256 | // ComparableLiteral lit = new ComparableLiteral(arg0 257 | // .getObject().getLiteral().toString(true), arg0 258 | // .getObject().getLiteral().getValue().toString()); 259 | ComparableLiteral lit = new ComparableLiteral(o, o); 260 | logger.trace(lit.getVal()); 261 | setOfStrings.add(lit); 262 | } 263 | 264 | map.addRelationship(relName, subjName, objName); 265 | } 266 | 267 | } 268 | 269 | }; 270 | 271 | RDFDataMgr.parse(dataStream, BASE + "/model-fwc.nt"); 272 | 273 | return setOfStrings; 274 | 275 | } 276 | 277 | private static String parse(Node obj) { 278 | try { 279 | if(obj.isURI()) 280 | return obj.getURI(); 281 | if(obj.isLiteral()) 282 | return obj.getLiteralValue().toString(); 283 | if(obj.isBlank()) 284 | return obj.getBlankNodeLabel(); 285 | } catch(Exception e) { 286 | logger.warn("Cannot parse node: "+obj); 287 | } 288 | return null; 289 | } 290 | } 291 | 292 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/controller/NameMapper.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.controller; 2 | 3 | import java.util.HashMap; 4 | import java.util.TreeSet; 5 | 6 | import org.apache.logging.log4j.LogManager; 7 | import org.apache.logging.log4j.Logger; 8 | 9 | import com.hp.hpl.jena.vocabulary.OWL; 10 | import com.hp.hpl.jena.vocabulary.RDF; 11 | 12 | /** 13 | * @author Tommaso Soru 14 | * 15 | */ 16 | public class NameMapper { 17 | 18 | private final static Logger logger = LogManager.getLogger(NameMapper.class); 19 | 20 | private HashMap mlnToUri = new HashMap<>(); 21 | private HashMap uriToMln = new HashMap<>(); 22 | 23 | private HashMap> listByType = new HashMap<>(); 24 | 25 | private String RDF_TYPE_NAME; 26 | private String OWL_THING_NAME; 27 | private String AIM_NAME; 28 | 29 | public String getOwlThingId() { 30 | return OWL_THING_NAME.substring(ProbKBData.CLS_LENGTH); 31 | } 32 | 33 | public String getOwlThingName() { 34 | return OWL_THING_NAME; 35 | } 36 | 37 | public TreeSet getEntClasses() { 38 | return entClasses; 39 | } 40 | 41 | public TreeSet getRelClasses() { 42 | return relClasses; 43 | } 44 | 45 | public TreeSet getRelationships() { 46 | return relationships; 47 | } 48 | 49 | // TODO change to HashMap> 50 | private TreeSet entClasses = new TreeSet<>(); 51 | private TreeSet relClasses = new TreeSet<>(); 52 | private TreeSet relationships = new TreeSet<>(); 53 | 54 | public enum Type { 55 | CLASS, ENTITY, RELATION; 56 | public String toString() { 57 | return this.name(); 58 | } 59 | } 60 | 61 | private HashMap count = new HashMap<>(); 62 | 63 | private String aimURI; 64 | private int cDelta; 65 | 66 | public NameMapper(String aimURI) { 67 | super(); 68 | 69 | for(Type t : Type.values()) { 70 | count.put(t, 1); 71 | listByType.put(t, new TreeSet<>()); 72 | } 73 | // for comodity, the first element is always rdf:type 74 | RDF_TYPE_NAME = this.add(RDF.type.getURI(), Type.RELATION); 75 | logger.debug("Alias for rdf:type is " + RDF_TYPE_NAME); 76 | // same for owl:Thing 77 | OWL_THING_NAME = this.add(OWL.Thing.getURI(), Type.CLASS); 78 | logger.debug("Alias for owl:Thing is " + OWL_THING_NAME); 79 | 80 | this.aimURI = aimURI; 81 | if(!aimURI.equals("*")) { 82 | AIM_NAME = this.add(aimURI, Type.RELATION); 83 | logger.debug("Alias for AIM ("+aimURI+") is " + AIM_NAME); 84 | } 85 | } 86 | 87 | /** 88 | * Add the instantiation of an entity. Duality class-entity: a class with ID=x has an entity counterpart with ID=-x. 89 | * 90 | * @param entName 91 | * @param className 92 | */ 93 | public void addEntClass(String entName, String className) { 94 | 95 | if(entName.startsWith(Type.CLASS.name())) 96 | entName = classToEntityForm(entName); 97 | if(entName.startsWith(Type.RELATION.name())) 98 | entName = relationToEntityForm(entName); 99 | 100 | logger.trace("ENTCLASS: "+entName+", "+className); 101 | entClasses.add(entName + "#" + className); 102 | entClasses.add(entName + "#" + OWL_THING_NAME); 103 | // add an rdf:type relationship 104 | this.addRelationship(RDF_TYPE_NAME, entName, Type.ENTITY.toString() + "-" + className.substring(ProbKBData.CLS_LENGTH)); 105 | // add rdf:type owl:Thing 106 | this.addRelationship(RDF_TYPE_NAME, entName, Type.ENTITY.toString() + "-" + OWL_THING_NAME.substring(ProbKBData.CLS_LENGTH)); 107 | } 108 | 109 | /** 110 | * Add domain or range for a relation. 111 | * 112 | * @param relName 113 | * @param className 114 | * @param isDomain 115 | */ 116 | public void addRelClass(String relName, String className, boolean isDomain) { 117 | relClasses.add(relName + "#" + className + "#" + isDomain); 118 | } 119 | 120 | public void addRelationship(String relName, String name1, String name2) { 121 | if(relName.startsWith(Type.ENTITY.toString())) { 122 | String before = relName; 123 | relName = entityToRelationForm(relName); 124 | // some properties had been recognised as entities before 125 | String uri = mlnToUri.get(before); 126 | mlnToUri.put(relName, uri); 127 | uriToMln.put(uri, relName); 128 | } 129 | relationships.add(relName + "#" + name1 + "#" + name2); 130 | } 131 | 132 | public String entityToRelationForm(String relName) { 133 | String idr = String.valueOf(Integer.parseInt(relName.substring(ProbKBData.ENT_LENGTH)) + cDelta); 134 | logger.trace(relName+" => "+idr); 135 | relName = Type.RELATION.toString() + idr; 136 | return relName; 137 | } 138 | 139 | public String relationToEntityForm(String relName) { 140 | String idr = String.valueOf(Integer.parseInt(relName.substring(ProbKBData.REL_LENGTH)) + cDelta); 141 | logger.trace(relName+" => "+idr); 142 | relName = Type.ENTITY.toString() + idr; 143 | return relName; 144 | } 145 | 146 | public String classToEntityForm(String className) { 147 | return Type.ENTITY.toString() + "-" 148 | + className.substring(ProbKBData.CLS_LENGTH); 149 | } 150 | 151 | /** 152 | * Add an URI to the map and return the MLN name. 153 | * 154 | * @param uri 155 | * @return 156 | */ 157 | public String add(String uri, Type type) { 158 | 159 | if(uriToMln.containsKey(uri)) 160 | return uriToMln.get(uri); 161 | 162 | String name = type.toString() + count.get(type); 163 | mlnToUri.put(name, uri); 164 | uriToMln.put(uri, name); 165 | listByType.get(type).add(name); 166 | increase(type); 167 | return name; 168 | } 169 | 170 | public HashMap getNamesToURIs() { 171 | return mlnToUri; 172 | } 173 | 174 | private void increase(Type type) { 175 | count.put(type, count.get(type) + 1); 176 | } 177 | 178 | public String getURI(String name) { 179 | return mlnToUri.get(name); 180 | } 181 | 182 | public String getName(String uri) { 183 | return uriToMln.get(uri); 184 | } 185 | 186 | public boolean containsURI(String name) { 187 | return mlnToUri.containsKey(name); 188 | } 189 | 190 | public boolean containsName(String uri) { 191 | return uriToMln.containsKey(uri); 192 | } 193 | 194 | public TreeSet getNamesByType(Type type) { 195 | return listByType.get(type); 196 | } 197 | 198 | public void pretty() { 199 | for(String key : mlnToUri.keySet()) 200 | logger.trace(key + "\t" + mlnToUri.get(key)); 201 | } 202 | 203 | /** 204 | * Return only the ID (number after the Type) of the class the given entity belongs to. If not found, return the ID of owl:Thing. 205 | * 206 | * @param entityName 207 | * @return 208 | */ 209 | public String classIdOf(String entityName) { 210 | for(String ec : entClasses) 211 | if(ec.startsWith(entityName+"#")) 212 | return ec.substring(ProbKBData.CLS_LENGTH); 213 | return OWL_THING_NAME.substring(ProbKBData.CLS_LENGTH); 214 | } 215 | 216 | public String getAim() { 217 | return aimURI; 218 | } 219 | 220 | public String getAimName() { 221 | return AIM_NAME; 222 | } 223 | 224 | public void setCollisionDelta(int cDelta) { 225 | logger.debug("Collision delta: "+cDelta); 226 | this.cDelta = cDelta; 227 | } 228 | 229 | public String toName(String uri) { 230 | String name = uriToMln.get(uri); 231 | if(name.startsWith(Type.ENTITY.name())) 232 | return name; 233 | if(name.startsWith(Type.CLASS.name())) 234 | return classToEntityForm(name); 235 | // relation 236 | return relationToEntityForm(name); 237 | } 238 | 239 | public static int parse(String string) { 240 | String sub = null; 241 | if(string.startsWith(Type.CLASS.name())) 242 | sub = Type.CLASS.name(); 243 | if(string.startsWith(Type.ENTITY.name())) 244 | sub = Type.ENTITY.name(); 245 | if(string.startsWith(Type.RELATION.name())) 246 | sub = Type.RELATION.name(); 247 | return Integer.parseInt(string.substring(sub.length())); 248 | } 249 | 250 | } 251 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/controller/OntoImporter.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.controller; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.FileOutputStream; 6 | import java.io.IOException; 7 | import java.net.URL; 8 | import java.util.TreeSet; 9 | 10 | import org.aksw.mandolin.util.PrettyRandom; 11 | import org.apache.commons.io.FileUtils; 12 | import org.apache.jena.riot.Lang; 13 | import org.apache.jena.riot.RDFDataMgr; 14 | import org.apache.jena.riot.RiotException; 15 | import org.apache.jena.riot.system.StreamRDF; 16 | import org.apache.jena.riot.system.StreamRDFWriter; 17 | import org.apache.logging.log4j.LogManager; 18 | import org.apache.logging.log4j.Logger; 19 | 20 | import com.hp.hpl.jena.graph.Triple; 21 | import com.hp.hpl.jena.rdf.model.Model; 22 | import com.hp.hpl.jena.rdf.model.ModelFactory; 23 | import com.hp.hpl.jena.rdf.model.Statement; 24 | import com.hp.hpl.jena.rdf.model.StmtIterator; 25 | import com.hp.hpl.jena.sparql.core.Quad; 26 | import com.hp.hpl.jena.vocabulary.OWL; 27 | import com.hp.hpl.jena.vocabulary.RDF; 28 | import com.hp.hpl.jena.vocabulary.RDFS; 29 | 30 | /** 31 | * Ontologies cannot be imported using Jena, because most datasets are not 32 | * actual OWL files, thus they do not provide meta-information about used and 33 | * imported ontologies. For instance, an N-Triples file could be using URIs 34 | * which are referred only in the file itself. Phisically visiting these URIs is 35 | * a method to retrieve their definition. In this version, we limit the URIs to 36 | * classes and properties. 37 | * 38 | * @author Tommaso Soru 39 | * @version 0.0.1 40 | * 41 | */ 42 | public class OntoImporter { 43 | 44 | private final static Logger logger = LogManager.getLogger(OntoImporter.class); 45 | 46 | private static final Lang[] LANG_ATTEMPTS = {Lang.RDFXML, Lang.TTL, Lang.NT}; 47 | 48 | /** 49 | * @param BASE 50 | * @param paths 51 | */ 52 | public static void run(final String BASE, final String[] paths) { 53 | 54 | final CollectionCache properties = new CollectionCache(); 55 | final CollectionCache classes = new CollectionCache(); 56 | 57 | final FileOutputStream output; 58 | try { 59 | output = new FileOutputStream(new File(BASE + "/model-tmp.nt")); 60 | } catch (FileNotFoundException e) { 61 | e.printStackTrace(); 62 | return; 63 | } 64 | 65 | final StreamRDF writer = StreamRDFWriter.getWriterStream(output, Lang.NT); 66 | 67 | // reader implementation 68 | StreamRDF dataStream = new StreamRDF() { 69 | 70 | @Override 71 | public void base(String arg0) { 72 | } 73 | 74 | @Override 75 | public void finish() { 76 | } 77 | 78 | @Override 79 | public void prefix(String arg0, String arg1) { 80 | } 81 | 82 | @Override 83 | public void quad(Quad arg0) { 84 | } 85 | 86 | @Override 87 | public void start() { 88 | writer.start(); 89 | } 90 | 91 | @Override 92 | public void triple(Triple triple) { 93 | String s = triple.getSubject().getURI(); 94 | String p = triple.getPredicate().getURI(); 95 | String o = triple.getObject().toString(); 96 | 97 | // if property is rdf:type... 98 | if (p.equals(RDF.type.getURI())) { 99 | // save class 100 | // TODO this could be extended to all properties with domain 101 | // or range = rdfs:Class 102 | classes.set.add(o); 103 | // as well as all super-classes of rdfs:Class 104 | if(o.equals(RDFS.Class.getURI()) || 105 | o.equals(OWL.Class.getURI())) 106 | classes.set.add(s); 107 | } 108 | // save property 109 | properties.set.add(p); 110 | 111 | // write triple 112 | writer.triple(triple); 113 | 114 | } 115 | 116 | }; 117 | 118 | for(String path : paths) 119 | RDFDataMgr.parse(dataStream, path); 120 | 121 | logger.info("# classes collected = "+classes.set.size()); 122 | logger.info("# properties collected = "+properties.set.size()); 123 | 124 | // ontology importer 125 | for(String uri : classes.set) { 126 | logger.trace("Crawling <"+uri+">..."); 127 | Model model = ModelFactory.createDefaultModel(); 128 | // visit URIs in classes and properties 129 | String path = BASE + "/temp-file-" + PrettyRandom.get(6) + ""; 130 | File file = new File(path); 131 | try { 132 | FileUtils.copyURLToFile(new URL(uri), file); 133 | } catch (IOException e) { 134 | logger.warn("Cannot download <"+uri+">."); 135 | continue; 136 | } 137 | logger.trace("Saved to "+path+"."); 138 | for(Lang lang : LANG_ATTEMPTS) { 139 | try { 140 | logger.trace("Trying with "+lang); 141 | RDFDataMgr.read(model, path, lang); 142 | break; 143 | } catch (RiotException e) { 144 | logger.warn("Cannot interpret <"+uri+"> using "+lang+"."); 145 | } 146 | } 147 | logger.trace("# statements: "+model.size()); 148 | StmtIterator list = model.listStatements(); 149 | // append NT files to model... 150 | while(list.hasNext()) { 151 | // save wanted part of RDF files 152 | Statement stmt = list.next(); 153 | 154 | logger.trace(stmt); 155 | 156 | boolean imprt = stmt.getPredicate().getURI().equals(uri); 157 | 158 | if(!imprt) 159 | if(stmt.getSubject().isURIResource()) 160 | if(stmt.getSubject().getURI().equals(uri)) 161 | imprt = true; 162 | if(!imprt) 163 | if(stmt.getObject().isURIResource()) 164 | if(stmt.getObject().asResource().getURI().equals(uri)) 165 | imprt = true; 166 | 167 | if(imprt) { 168 | Triple t = stmt.asTriple(); 169 | logger.trace(t); 170 | writer.triple(t); 171 | } 172 | } 173 | } 174 | writer.finish(); 175 | 176 | } 177 | 178 | } 179 | 180 | class CollectionCache { 181 | TreeSet set = new TreeSet<>(); 182 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/controller/ProbKBData.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.controller; 2 | 3 | import java.io.File; 4 | import java.io.FileWriter; 5 | import java.io.IOException; 6 | import java.util.Arrays; 7 | import java.util.HashMap; 8 | import java.util.Iterator; 9 | 10 | import org.aksw.mandolin.controller.NameMapper.Type; 11 | import org.apache.logging.log4j.LogManager; 12 | import org.apache.logging.log4j.Logger; 13 | 14 | import com.opencsv.CSVWriter; 15 | 16 | /** 17 | * @author Tommaso Soru 18 | * 19 | */ 20 | public class ProbKBData { 21 | 22 | private final static Logger logger = LogManager.getLogger(ProbKBData.class); 23 | 24 | public final static int ENT_LENGTH = Type.ENTITY.name().length(); 25 | public final static int CLS_LENGTH = Type.CLASS.name().length(); 26 | public final static int REL_LENGTH = Type.RELATION.name().length(); 27 | 28 | private static String base; 29 | private static NameMapper map; 30 | 31 | public static void buildCSV(NameMapper theMap, String theBase) throws IOException { 32 | 33 | base = theBase; 34 | map = theMap; 35 | 36 | allNodes(); 37 | 38 | entClasses(); 39 | relClasses(); 40 | relationships(); 41 | functionals(); 42 | 43 | } 44 | 45 | 46 | private static void functionals() throws IOException { 47 | 48 | CSVWriter writer = new CSVWriter(new FileWriter(new File(base + "/functionals.csv"))); 49 | 50 | // TODO 51 | 52 | writer.close(); 53 | 54 | } 55 | 56 | 57 | private static void allNodes() throws IOException { 58 | 59 | CSVWriter entWriter = new CSVWriter(new FileWriter(new File(base + "/entities.csv"))); 60 | CSVWriter clsWriter = new CSVWriter(new FileWriter(new File(base + "/classes.csv"))); 61 | CSVWriter relWriter = new CSVWriter(new FileWriter(new File(base + "/relations.csv"))); 62 | 63 | HashMap hmap = map.getNamesToURIs(); 64 | 65 | for(String key : hmap.keySet()) { 66 | String id = ""; 67 | if(key.startsWith(Type.ENTITY.name())) { 68 | id = key.substring(ENT_LENGTH); 69 | entWriter.writeNext(new String[] {id, hmap.get(key)}); 70 | } 71 | if(key.startsWith(Type.CLASS.name())) { 72 | id = key.substring(CLS_LENGTH); 73 | clsWriter.writeNext(new String[] {id, hmap.get(key)}); 74 | entWriter.writeNext(new String[] {"-" + id, hmap.get(key)}); 75 | } 76 | if(key.startsWith(Type.RELATION.name())) { 77 | id = key.substring(REL_LENGTH); 78 | relWriter.writeNext(new String[] {id, hmap.get(key)}); 79 | } 80 | } 81 | 82 | relWriter.close(); 83 | clsWriter.close(); 84 | entWriter.close(); 85 | 86 | } 87 | 88 | 89 | private static void entClasses() throws IOException { 90 | 91 | CSVWriter writer = new CSVWriter(new FileWriter(new File(base + "/entClasses.csv"))); 92 | 93 | for(String line : map.getEntClasses()) { 94 | String[] arr = line.split("#"); 95 | // entity_id+"|"+class_id 96 | String id1 = arr[0].substring(ENT_LENGTH); 97 | String id2 = arr[1].substring(CLS_LENGTH); 98 | // TODO find a fix for these relationships 99 | try { 100 | Integer.parseInt(id1); 101 | Integer.parseInt(id2); 102 | } catch(NumberFormatException e) { 103 | continue; 104 | } 105 | writer.writeNext(new String[] {id1, id2}); 106 | } 107 | 108 | writer.close(); 109 | 110 | } 111 | 112 | 113 | /** 114 | * Domain and range information, as required by ProbKB. 115 | * 116 | * @throws IOException 117 | */ 118 | private static void relClasses() throws IOException { 119 | 120 | HashMap entries = new HashMap<>(); 121 | 122 | String owlThing = map.getOwlThingId(); 123 | 124 | // set defaults 125 | for(String prop : map.getNamesByType(Type.RELATION)) { 126 | String rel = prop.substring(REL_LENGTH); 127 | entries.put(rel, new String[] {rel, owlThing, owlThing}); 128 | } 129 | 130 | for(String line : map.getRelClasses()) { 131 | String[] arr = line.split("#"); 132 | // rel_id+"#"+class_id+"#"+is_domain 133 | String rel = arr[0].startsWith(Type.RELATION.name()) ? arr[0].substring(REL_LENGTH) : arr[0].substring(ENT_LENGTH); 134 | String cl = arr[1].substring(CLS_LENGTH); 135 | Boolean isDomain = Boolean.parseBoolean(arr[2]); 136 | 137 | String[] obj; 138 | if(entries.containsKey(rel)) 139 | obj = entries.get(rel); 140 | else { 141 | obj = new String[] {rel, owlThing, owlThing}; 142 | entries.put(rel, obj); 143 | } 144 | obj[isDomain ? 1 : 2] = cl; 145 | logger.trace((isDomain ? "domain" : "range") + " => " + Arrays.toString(obj)); 146 | 147 | } 148 | 149 | CSVWriter writer = new CSVWriter(new FileWriter(new File(base + "/relClasses.csv"))); 150 | 151 | for(String entry : entries.keySet()) { 152 | String[] obj = entries.get(entry); 153 | // TODO find a fix for these relationships 154 | try { 155 | for(String o : obj) 156 | Integer.parseInt(o); 157 | } catch(NumberFormatException e) { 158 | continue; 159 | } 160 | writer.writeNext(obj); 161 | } 162 | 163 | writer.close(); 164 | 165 | } 166 | 167 | 168 | private static void relationships() throws IOException { 169 | 170 | CSVWriter writer = new CSVWriter(new FileWriter(new File(base + "/relationships.csv"))); 171 | 172 | Iterator it = map.getRelationships().iterator(); 173 | while(it.hasNext()) { 174 | String line = it.next(); 175 | String[] arr = line.split("#"); 176 | // relation_id+"|"+entity_id+"|"+entity_id 177 | String id1 = arr[0].substring(REL_LENGTH); 178 | String id2 = arr[1].substring(ENT_LENGTH); 179 | String id3 = arr[2].substring(ENT_LENGTH); 180 | 181 | // System.out.println(line); 182 | // if(arr[0].startsWith(Type.ENTITY.toString())) { 183 | // id1 = String.valueOf(Integer.parseInt(arr[0].substring(ENT_LENGTH)) + 10000); 184 | // System.out.println("rel = "+id1); 185 | // } 186 | 187 | // TODO find a fix for these relationships 188 | try { 189 | Integer.parseInt(id1); 190 | Integer.parseInt(id2); 191 | Integer.parseInt(id3); 192 | } catch(NumberFormatException e) { 193 | continue; 194 | } 195 | 196 | writer.writeNext(new String[] {id1, id2, id3, "1.0", "http://"}); 197 | } 198 | 199 | writer.close(); 200 | 201 | } 202 | 203 | 204 | } 205 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/controller/SimilarityJoin.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.controller; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.FileOutputStream; 6 | import java.io.IOException; 7 | import java.util.Arrays; 8 | import java.util.HashMap; 9 | import java.util.Iterator; 10 | import java.util.List; 11 | import java.util.Map.Entry; 12 | import java.util.TreeSet; 13 | 14 | import jp.ndca.similarity.join.PPJoin; 15 | import jp.ndca.similarity.join.StringItem; 16 | import jp.ndca.similarity.join.Tokenizer; 17 | 18 | import org.aksw.mandolin.controller.NameMapper.Type; 19 | import org.aksw.mandolin.model.Cache; 20 | import org.aksw.mandolin.model.ComparableLiteral; 21 | import org.aksw.mandolin.reasoner.PelletReasoner; 22 | import org.apache.commons.codec.digest.DigestUtils; 23 | import org.apache.commons.io.FileUtils; 24 | import org.apache.jena.riot.Lang; 25 | import org.apache.jena.riot.system.StreamRDF; 26 | import org.apache.jena.riot.system.StreamRDFWriter; 27 | import org.apache.logging.log4j.LogManager; 28 | import org.apache.logging.log4j.Logger; 29 | 30 | import com.hp.hpl.jena.graph.Node; 31 | import com.hp.hpl.jena.graph.NodeFactory; 32 | import com.hp.hpl.jena.graph.Triple; 33 | import com.hp.hpl.jena.vocabulary.OWL; 34 | import com.hp.hpl.jena.vocabulary.RDF; 35 | import com.hp.hpl.jena.vocabulary.RDFS; 36 | 37 | /** 38 | * @author Tommaso Soru 39 | * 40 | */ 41 | public class SimilarityJoin { 42 | 43 | private final static Logger logger = LogManager 44 | .getLogger(SimilarityJoin.class); 45 | 46 | public static final String SIMILAR_PREFIX = "http://mandolin.aksw.org/ontology#similar"; 47 | 48 | public static final String SIMILAR_TO_PREFIX = "http://mandolin.aksw.org/ontology#similarTo"; 49 | 50 | static HashMap hashes = new HashMap<>(); 51 | 52 | /** 53 | * This could be left turned off, as the similarity join algorithm already 54 | * finds all pairs. A mere copy of the file is executed if false. 55 | */ 56 | private static boolean useClosure = false; 57 | 58 | public static final String similarCompositePropertyURI(int thr, String uri) { 59 | 60 | String s; 61 | 62 | if (hashes.containsKey(uri)) 63 | s = hashes.get(uri); 64 | else { 65 | s = DigestUtils.sha1Hex(uri); 66 | hashes.put(uri, s); 67 | } 68 | 69 | return SIMILAR_PREFIX + thr + "-" + s; 70 | } 71 | 72 | public static final String similarToURI(int thr) { 73 | // no such property 74 | if (thr <= 0 || thr >= 100) 75 | return null; 76 | return SIMILAR_TO_PREFIX + thr; 77 | } 78 | 79 | public static void build(NameMapper map, 80 | TreeSet setOfStrings, Cache cache, 81 | final String BASE, final int THR_MIN, final int THR_MAX, 82 | final int THR_STEP) { 83 | 84 | PPJoin ppjoin = new PPJoin(); 85 | Tokenizer tok = ppjoin.getTokenizer(); 86 | HashMap dataset = new HashMap<>(); 87 | 88 | Iterator it = setOfStrings.iterator(); 89 | for (int i = 0; it.hasNext(); i++) { 90 | ComparableLiteral lit = it.next(); 91 | String val = lit.getVal(); 92 | cache.stringItems.add(new StringItem(tok.tokenize(val, false), i)); 93 | dataset.put(i, lit); 94 | } 95 | 96 | logger.trace(cache.stringItems.size()); 97 | List stringItems = cache.stringItems; 98 | 99 | StringItem[] strDatum = stringItems.toArray(new StringItem[stringItems 100 | .size()]); 101 | Arrays.sort(strDatum); 102 | 103 | ppjoin.setUseSortAtExtractPairs(false); 104 | 105 | // open NT file of similarity joins. 106 | final FileOutputStream output; 107 | try { 108 | output = new FileOutputStream(new File(BASE + "/model-sim.nt")); 109 | } catch (FileNotFoundException e) { 110 | logger.fatal(e.getMessage()); 111 | throw new RuntimeException("Cannot open file " + BASE 112 | + "/model-sim.nt of similarity joins."); 113 | } 114 | 115 | final StreamRDF writer = StreamRDFWriter.getWriterStream(output, 116 | Lang.NT); 117 | writer.start(); 118 | 119 | int cTBox = 0, cABox = 0; 120 | 121 | for (int thr = THR_MIN; thr <= THR_MAX; thr += THR_STEP) { 122 | 123 | String rel = similarToURI(thr); 124 | if (rel == null) 125 | continue; 126 | if (rel.isEmpty()) 127 | continue; 128 | Node relNode = NodeFactory.createURI(rel); 129 | 130 | writer.triple(new Triple(relNode, RDF.type.asNode(), 131 | OWL.SymmetricProperty.asNode())); 132 | writer.triple(new Triple(relNode, RDF.type.asNode(), 133 | OWL.TransitiveProperty.asNode())); 134 | cTBox += 2; 135 | 136 | for (int thrj = THR_MIN; thrj < thr; thrj += THR_STEP) { 137 | Triple t = new Triple(relNode, RDFS.subPropertyOf.asNode(), 138 | NodeFactory.createURI(similarToURI(thrj))); 139 | logger.trace(t); 140 | writer.triple(t); 141 | cTBox++; 142 | } 143 | 144 | List> result = ppjoin.extractPairs( 145 | strDatum, thr / 100.0); 146 | for (Entry entry : result) { 147 | ComparableLiteral lit1 = dataset.get(entry.getKey().getId()); 148 | ComparableLiteral lit2 = dataset.get(entry.getValue().getId()); 149 | String relName = map.add(rel, Type.RELATION); 150 | map.addRelationship(relName, map.getName(lit1.getUri()), 151 | map.getName(lit2.getUri())); 152 | 153 | // add similarTo relationship 154 | writer.triple(new Triple(NodeFactory.createURI(lit1.getUri()), 155 | relNode, NodeFactory.createURI(lit2.getUri()))); 156 | 157 | int c = compositeRelations(writer, map, thr, lit1.getUri(), 158 | lit2.getUri()); 159 | cABox += c; 160 | 161 | logger.trace(lit1.getUri() + " <=> " + lit2.getUri()); 162 | logger.trace(lit1.getVal() + " <=> " + lit2.getVal()); 163 | } 164 | 165 | cABox += result.size(); 166 | 167 | } 168 | 169 | // close NT file 170 | writer.finish(); 171 | 172 | logger.info("Triples added after similarity join: TBox=" + cTBox 173 | + ", ABox=" + cABox); 174 | 175 | if (useClosure) { 176 | // computing closure on similarity joins 177 | PelletReasoner.closure(BASE + "/model-sim.nt", BASE 178 | + "/model-sim-fwc.nt"); 179 | } else { 180 | try { 181 | FileUtils.copyFile(new File(BASE + "/model-sim.nt"), new File( 182 | BASE + "/model-sim-fwc.nt")); 183 | } catch (IOException e) { 184 | e.printStackTrace(); 185 | } 186 | } 187 | 188 | } 189 | 190 | private static int compositeRelations(StreamRDF writer, NameMapper map, 191 | int thr, String wURI, String zURI) { 192 | 193 | String w = map.getName(wURI), z = map.getName(zURI); 194 | Node wNode = NodeFactory.createURI(wURI); 195 | Node zNode = NodeFactory.createURI(zURI); 196 | 197 | TreeSet rships = map.getRelationships(); 198 | TreeSet wTree = new TreeSet<>(); 199 | TreeSet zTree = new TreeSet<>(); 200 | for (String rship : rships) { 201 | String[] rsh = rship.split("#"); 202 | // w and z can be only in 2nd position, as they are datatypes 203 | if (rsh[2].equals(w)) 204 | wTree.add(rship); 205 | if (rsh[2].equals(z)) 206 | zTree.add(rship); 207 | } 208 | 209 | logger.trace("wTree = " + wTree); 210 | logger.trace("zTree = " + zTree); 211 | 212 | // forall x : (x, rel, w) . add (x, extRel, z) 213 | for (String rship : wTree) { 214 | String[] rsh = rship.split("#"); 215 | String rel = rsh[0], subj = rsh[1]; 216 | 217 | String extRelURI = similarCompositePropertyURI(thr, rel); 218 | Node extRelNode = NodeFactory.createURI(extRelURI); 219 | String extRelName = map.add(extRelURI, Type.RELATION); 220 | logger.trace(rel + " => " + extRelURI + " => " + extRelName); 221 | 222 | map.addRelationship(extRelName, subj, z); 223 | logger.trace(extRelName + "#" + subj + "#" + z); 224 | 225 | // add composite-relation triple 226 | Triple t = new Triple(NodeFactory.createURI(map.getURI(subj)), 227 | extRelNode, zNode); 228 | logger.trace(t); 229 | writer.triple(t); 230 | 231 | } 232 | 233 | // forall y : (y, rel, z) . add (y, extRel, w) 234 | for (String rship : zTree) { 235 | String[] rsh = rship.split("#"); 236 | String rel = rsh[0], subj = rsh[1]; 237 | 238 | String extRelURI = similarCompositePropertyURI(thr, rel); 239 | Node extRelNode = NodeFactory.createURI(extRelURI); 240 | String extRelName = map.add(extRelURI, Type.RELATION); 241 | logger.trace(rel + " => " + extRelURI + " => " + extRelName); 242 | 243 | map.addRelationship(extRelName, subj, w); 244 | logger.trace(extRelName + "#" + subj + "#" + w); 245 | 246 | // add composite-relation triple 247 | Triple t = new Triple(NodeFactory.createURI(map.getURI(subj)), 248 | extRelNode, wNode); 249 | logger.trace(t); 250 | writer.triple(t); 251 | 252 | } 253 | 254 | return wTree.size() + zTree.size(); 255 | 256 | } 257 | 258 | } 259 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/controller/Validator.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.controller; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.FileOutputStream; 6 | 7 | import org.apache.jena.riot.Lang; 8 | import org.apache.jena.riot.RDFDataMgr; 9 | import org.apache.jena.riot.system.StreamRDF; 10 | import org.apache.jena.riot.system.StreamRDFWriter; 11 | import org.apache.logging.log4j.LogManager; 12 | import org.apache.logging.log4j.Logger; 13 | 14 | import com.hp.hpl.jena.graph.Node; 15 | import com.hp.hpl.jena.graph.NodeFactory; 16 | import com.hp.hpl.jena.graph.Triple; 17 | import com.hp.hpl.jena.sparql.core.Quad; 18 | import com.hp.hpl.jena.vocabulary.XSD; 19 | 20 | /** 21 | * @author Tommaso Soru 22 | * 23 | */ 24 | public class Validator { 25 | 26 | private final static Logger logger = LogManager.getLogger(Validator.class); 27 | 28 | /** 29 | * @param base 30 | * @param input 31 | * @param enableFwc 32 | * @param enableOnt 33 | */ 34 | public static void run(String base, String[] input, boolean enableFwc, boolean enableOnt) { 35 | 36 | String outputFile = enableFwc ? "model.nt" : "model-fwc.nt"; 37 | 38 | final FileOutputStream output; 39 | try { 40 | output = new FileOutputStream(new File(base + "/" + outputFile)); 41 | } catch (FileNotFoundException e) { 42 | e.printStackTrace(); 43 | return; 44 | } 45 | 46 | final StreamRDF writer = StreamRDFWriter.getWriterStream(output, Lang.NT); 47 | 48 | StreamRDF dataStream = new StreamRDF() { 49 | 50 | @Override 51 | public void start() { 52 | writer.start(); 53 | } 54 | 55 | @Override 56 | public void quad(Quad quad) { 57 | } 58 | 59 | @Override 60 | public void base(String base) { 61 | } 62 | 63 | @Override 64 | public void prefix(String prefix, String iri) { 65 | } 66 | 67 | @Override 68 | public void finish() { 69 | writer.finish(); 70 | } 71 | 72 | @Override 73 | public void triple(Triple triple) { 74 | Node node = triple.getObject(); 75 | if(node.isLiteral()) { 76 | if(!node.getLiteral().isWellFormed()) { 77 | // known issue: fix gYear literals 78 | if(node.getLiteralDatatypeURI() != null) { 79 | if(node.getLiteralDatatypeURI().equals(XSD.gYear.getURI()) || 80 | node.getLiteralDatatypeURI().equals(XSD.gYear.getLocalName())) { 81 | Node newNode = NodeFactory.createLiteral( 82 | node.getLiteral().toString().substring(0, 4) + "^^" + XSD.gYear); 83 | triple = new Triple(triple.getSubject(), triple.getPredicate(), 84 | newNode); 85 | logger.warn("Bad-formed literal: "+node+" - Using: "+newNode); 86 | } 87 | } 88 | } 89 | } 90 | writer.triple(triple); 91 | } 92 | 93 | }; 94 | 95 | if(enableOnt) { 96 | String inputFile = base + "/model-tmp.nt"; 97 | RDFDataMgr.parse(dataStream, inputFile); 98 | 99 | new File(inputFile).delete(); 100 | } else { 101 | for(String path : input) 102 | RDFDataMgr.parse(dataStream, path); 103 | } 104 | 105 | if(!enableFwc) 106 | new File(base + "/model.nt").delete(); 107 | } 108 | 109 | 110 | @SuppressWarnings("unused") 111 | private static void validate(String in, String out) { 112 | 113 | final FileOutputStream output; 114 | try { 115 | output = new FileOutputStream(new File(out)); 116 | } catch (FileNotFoundException e) { 117 | e.printStackTrace(); 118 | return; 119 | } 120 | 121 | final StreamRDF writer = StreamRDFWriter.getWriterStream(output, Lang.NT); 122 | 123 | StreamRDF dataStream = new StreamRDF() { 124 | 125 | @Override 126 | public void start() { 127 | writer.start(); 128 | } 129 | 130 | @Override 131 | public void quad(Quad quad) { 132 | } 133 | 134 | @Override 135 | public void base(String base) { 136 | } 137 | 138 | @Override 139 | public void prefix(String prefix, String iri) { 140 | } 141 | 142 | @Override 143 | public void finish() { 144 | writer.finish(); 145 | } 146 | 147 | @Override 148 | public void triple(Triple triple) { 149 | Node node = triple.getObject(); 150 | if(node.isLiteral()) { 151 | if(!node.getLiteral().isWellFormed()) { 152 | // known issue: fix gYear literals 153 | if(node.getLiteralDatatypeURI() != null) { 154 | if(node.getLiteralDatatypeURI().equals(XSD.gYear.getURI()) || 155 | node.getLiteralDatatypeURI().equals(XSD.gYear.getLocalName())) { 156 | Node newNode = NodeFactory.createLiteral( 157 | node.getLiteral().toString().substring(0, 4) + "^^" + XSD.gYear); 158 | triple = new Triple(triple.getSubject(), triple.getPredicate(), 159 | newNode); 160 | // System.out.println("Bad-formed literal: "+node+" - Using: "+newNode); 161 | } 162 | } 163 | } 164 | } 165 | writer.triple(triple); 166 | } 167 | 168 | }; 169 | 170 | RDFDataMgr.parse(dataStream, in); 171 | 172 | } 173 | 174 | 175 | } 176 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/eval/Dataset.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.eval; 2 | 3 | /** 4 | * @author Tommaso Soru 5 | * 6 | */ 7 | enum Dataset { 8 | 9 | FB15K("benchmark/fb15k/freebase_mtr100_mte100-test.nt", "fb15k_"), 10 | WN18("benchmark/wn18/wordnet-mlj12-test.nt", "wn18_"); 11 | 12 | String ref, prefix; 13 | 14 | Dataset(String ref, String prefix) { 15 | this.ref = ref; 16 | this.prefix = prefix; 17 | } 18 | 19 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/eval/FMeasureEvaluation.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.eval; 2 | 3 | import java.util.TreeSet; 4 | 5 | import org.apache.jena.riot.RDFDataMgr; 6 | import org.apache.jena.riot.system.StreamRDF; 7 | 8 | import com.hp.hpl.jena.graph.Triple; 9 | import com.hp.hpl.jena.sparql.core.Quad; 10 | 11 | /** 12 | * @author Tommaso Soru 13 | * 14 | */ 15 | public class FMeasureEvaluation { 16 | 17 | 18 | private String psetPath, hsetPath; 19 | 20 | private int tp, fp, fn; 21 | private double pre, rec, f1; 22 | 23 | public FMeasureEvaluation(String psetPath, String hsetPath) { 24 | super(); 25 | this.psetPath = psetPath; 26 | this.hsetPath = hsetPath; 27 | } 28 | 29 | public void run() { 30 | 31 | System.out.println("Running evaluation on set "+psetPath+" against set "+hsetPath); 32 | 33 | TreeSet spoP = read(psetPath); 34 | TreeSet spoH = read(hsetPath); 35 | 36 | System.out.println("Predicted"); 37 | for(String s : spoP) 38 | System.out.println("\t"+s); 39 | System.out.println("Hidden"); 40 | for(String s : spoH) 41 | System.out.println("\t"+s); 42 | 43 | TreeSet fpSet = new TreeSet<>(spoP); 44 | fpSet.removeAll(spoH); 45 | fp = fpSet.size(); 46 | 47 | TreeSet fnSet = new TreeSet<>(spoH); 48 | fnSet.removeAll(spoP); 49 | fn = fnSet.size(); 50 | 51 | tp = spoP.size() - fp; 52 | 53 | System.out.println("TP = "+tp+"; FP = "+fp+"; FN = "+fn); 54 | 55 | pre = (tp + fp) == 0 ? 0d : (double) tp / (tp + fp); 56 | rec = (tp + fn) == 0 ? 0d : (double) tp / (tp + fn); 57 | f1 = (pre + rec) == 0d ? 0d : 2 * pre * rec / (pre + rec); 58 | 59 | System.out.println("F1 = "+f1+"; Pre = "+pre+"; Rec = "+rec); 60 | 61 | } 62 | 63 | private TreeSet read(String path) { 64 | 65 | TreeSet spo = new TreeSet<>(); 66 | 67 | StreamRDF dataStream = new StreamRDF() { 68 | 69 | @Override 70 | public void start() { 71 | } 72 | 73 | @Override 74 | public void triple(Triple triple) { 75 | spo.add(triple.getSubject().getURI()+" "+ 76 | triple.getPredicate().getURI()+" "+ 77 | triple.getObject().toString()); 78 | } 79 | 80 | @Override 81 | public void quad(Quad quad) { 82 | } 83 | 84 | @Override 85 | public void base(String base) { 86 | } 87 | 88 | @Override 89 | public void prefix(String prefix, String iri) { 90 | } 91 | 92 | @Override 93 | public void finish() { 94 | } 95 | 96 | }; 97 | 98 | RDFDataMgr.parse(dataStream, path); 99 | 100 | return spo; 101 | } 102 | 103 | public String getPsetPath() { 104 | return psetPath; 105 | } 106 | 107 | public String getHsetPath() { 108 | return hsetPath; 109 | } 110 | 111 | public int getTp() { 112 | return tp; 113 | } 114 | 115 | public int getFp() { 116 | return fp; 117 | } 118 | 119 | public int getFn() { 120 | return fn; 121 | } 122 | 123 | public double getPre() { 124 | return pre; 125 | } 126 | 127 | public double getRec() { 128 | return rec; 129 | } 130 | 131 | public double getF1() { 132 | return f1; 133 | } 134 | 135 | public static void main(String[] args) { 136 | new FMeasureEvaluation("/Users/tom/PhD/srl/Mandolin/eval/0002/cv/run0/output_1.0.nt", 137 | "/Users/tom/PhD/srl/Mandolin/eval/0002/cv/partitions/0.nt").run(); 138 | } 139 | 140 | } 141 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/eval/LinkPredictionEvaluation.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.eval; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | 6 | /** 7 | * @author Tommaso Soru 8 | * 9 | */ 10 | public class LinkPredictionEvaluation { 11 | 12 | /** 13 | * Dataset = First parameter. 14 | */ 15 | static Dataset DATASET; 16 | 17 | /** 18 | * Experiment code = Second parameter. 19 | */ 20 | static String EXP_CODE; 21 | 22 | /** 23 | * @param args 24 | * @throws IOException 25 | */ 26 | public static void main(String[] args) throws IOException { 27 | 28 | if(args.length > 0) { 29 | DATASET = Dataset.valueOf(args[0]); 30 | EXP_CODE = args[1]; 31 | } else { 32 | // demo values 33 | DATASET = Dataset.FB15K; 34 | EXP_CODE = "09_?m_v"; 35 | } 36 | 37 | final String REF = DATASET.ref; 38 | final String BASE = "eval/" + DATASET.prefix + EXP_CODE; 39 | 40 | ArrayList meanranks = new ArrayList<>(); 41 | 42 | for(int i=1; i<=5; i++) { 43 | 44 | String testSet = REF; 45 | String output = BASE.replace("?", String.valueOf(i)); 46 | 47 | MeanRankCalc mr = new MeanRankCalc(testSet, output); 48 | mr.setMinThr(0); 49 | mr.partitionData(); 50 | meanranks.add(mr.start()); 51 | 52 | } 53 | 54 | System.out.println("\nmeanranks = " + meanranks); 55 | 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/eval/MeanRankCalc.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.eval; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.IOException; 6 | import java.io.PrintWriter; 7 | import java.text.DecimalFormat; 8 | import java.util.ArrayList; 9 | import java.util.Scanner; 10 | import java.util.TreeSet; 11 | 12 | import org.apache.commons.io.FileUtils; 13 | import org.apache.jena.riot.RDFDataMgr; 14 | import org.apache.jena.riot.system.StreamRDF; 15 | 16 | import com.hp.hpl.jena.graph.Triple; 17 | import com.hp.hpl.jena.rdf.model.Model; 18 | import com.hp.hpl.jena.rdf.model.Property; 19 | import com.hp.hpl.jena.rdf.model.RDFNode; 20 | import com.hp.hpl.jena.rdf.model.Resource; 21 | import com.hp.hpl.jena.rdf.model.ResourceFactory; 22 | import com.hp.hpl.jena.sparql.core.Quad; 23 | import com.hp.hpl.jena.util.iterator.ExtendedIterator; 24 | 25 | /** 26 | * @author Tommaso Soru 27 | * 28 | */ 29 | public class MeanRankCalc { 30 | 31 | private PrintWriter pw; 32 | 33 | private String testSet, mandolinOut; 34 | 35 | private int minThr = 1; 36 | 37 | public int getMinThr() { 38 | return minThr; 39 | } 40 | 41 | public void setMinThr(int minThr) { 42 | this.minThr = minThr; 43 | } 44 | 45 | public static void main(String[] args) throws IOException { 46 | 47 | String testSet = args[0]; 48 | String mandolinOut = args[1]; 49 | 50 | MeanRankCalc mr = new MeanRankCalc(testSet, mandolinOut); 51 | 52 | mr.partitionData(); 53 | mr.start(); 54 | 55 | } 56 | 57 | public MeanRankCalc(String testSet, String mandolinOut) { 58 | super(); 59 | this.testSet = testSet; 60 | this.mandolinOut = mandolinOut; 61 | } 62 | 63 | public double start() throws FileNotFoundException { 64 | 65 | pw = new PrintWriter(new File(mandolinOut + "/evaluation.csv")); 66 | 67 | Scanner in = new Scanner(new File(mandolinOut + "/entities.csv")); 68 | int entities = 0; 69 | while(in.hasNextLine()) { 70 | in.nextLine(); 71 | entities++; 72 | } 73 | in.close(); 74 | 75 | final Integer MEDIAN_RANK = entities / 2; 76 | System.out.println("Median rank = "+MEDIAN_RANK); 77 | 78 | DecimalFormat df = new DecimalFormat("0.0"); 79 | 80 | // load N=max-min+1 models in descending order 81 | final Model[] m = new Model[10 - minThr + 1]; 82 | for(int i=m.length; i>=1; i--) { 83 | String thr = String.valueOf(df.format((double) (i+minThr-1) / 10.0)); 84 | String discovered = mandolinOut + "/ranked_" + thr + ".nt"; 85 | System.out.println("Loading model "+i+"..."); 86 | m[m.length-i] = RDFDataMgr.loadModel(discovered); 87 | } 88 | 89 | final ArrayList ranks = new ArrayList<>(); 90 | 91 | final MRCache cache = new MRCache(); 92 | 93 | StreamRDF dataStream = new StreamRDF() { 94 | 95 | @Override 96 | public void start() { 97 | } 98 | 99 | private Integer check(Model mdl, MRCache cache, Triple triple, boolean forward) { 100 | 101 | Resource s = ResourceFactory.createResource(triple.getSubject() 102 | .getURI()); 103 | Property p = ResourceFactory.createProperty(triple 104 | .getPredicate().getURI()); 105 | Resource o = ResourceFactory.createResource(triple.getObject() 106 | .getURI()); 107 | 108 | ExtendedIterator it = forward ? mdl 109 | .listObjectsOfProperty(s, p) : mdl 110 | .listSubjectsWithProperty(p, o); 111 | // initialize count 112 | int y = 0; 113 | while (it.hasNext()) { 114 | 115 | Resource res = it.next().asResource(); 116 | String uri = forward ? triple.getObject().getURI() : triple 117 | .getSubject().getURI(); 118 | // if triple is found 119 | if (res.getURI().equals(uri)) { 120 | // rank[triple] = x + 1 121 | int rank = cache.x+1; 122 | System.out.println(triple+" >>> "+rank); 123 | ranks.add(cache.x + 1); 124 | // next triple 125 | return rank; 126 | } 127 | y++; 128 | } 129 | // add up to rank value 130 | cache.x += y; 131 | 132 | return null; 133 | } 134 | 135 | @Override 136 | public void triple(Triple triple) { 137 | // initialize rank value 138 | cache.x = 0; 139 | 140 | // for each model 141 | for(int i=0; i>> (median)"); 162 | } 163 | 164 | @Override 165 | public void quad(Quad quad) { 166 | } 167 | 168 | @Override 169 | public void base(String base) { 170 | } 171 | 172 | @Override 173 | public void prefix(String prefix, String iri) { 174 | } 175 | 176 | @Override 177 | public void finish() { 178 | } 179 | 180 | }; 181 | 182 | // stream test set 183 | RDFDataMgr.parse(dataStream, testSet); 184 | 185 | System.out.println("\n=== FILTERED SETTING === "+mandolinOut.substring(mandolinOut.lastIndexOf('/')+1)); 186 | // compute mean rank 187 | int sum = 0, sumR = 0; 188 | System.out.println(ranks); 189 | for(Integer i : ranks) { 190 | sum += i; 191 | sumR += 1.0 / i; 192 | } 193 | 194 | double mr = (double) sum / (double) ranks.size(); 195 | double mrr = (double) sumR / (double) ranks.size(); 196 | System.out.println("\nMeanRank = "+mr); 197 | System.out.println("\nMRR = "+mrr); 198 | 199 | double h1 = (double) cache.hitsAt1 * 100 / (double) ranks.size(); 200 | double h3 = (double) cache.hitsAt3 * 100 / (double) ranks.size(); 201 | double h10 = (double) cache.hitsAt10 * 100 / (double) ranks.size(); 202 | System.out.println("\nHits@1 = "+h1); 203 | System.out.println("Hits@3 = "+h3); 204 | System.out.println("Hits@10 = "+h10); 205 | 206 | pw.println(mandolinOut + "," + mrr + "," + h1 + "," + h3 + "," + h10); 207 | pw.close(); 208 | 209 | return mr; 210 | } 211 | 212 | public void partitionData() throws IOException { 213 | 214 | System.out.println("Partitioning data..."); 215 | 216 | DecimalFormat df = new DecimalFormat("0.0"); 217 | for (int i = minThr; i <= 9; i++) { 218 | String thrA = String.valueOf(df.format((double) i / 10.0)); 219 | String thrB = String.valueOf(df.format((double) (i+1) / 10.0)); 220 | System.out.println(thrA+","+thrB); 221 | String outA = mandolinOut + "/discovered_" + thrA + ".nt"; 222 | String outB = mandolinOut + "/discovered_" + thrB + ".nt"; 223 | String ranked = mandolinOut + "/ranked_" + thrA + ".nt"; 224 | 225 | if(new File(ranked).exists()) { 226 | System.out.println("Partitions exist. Skipping..."); 227 | return; 228 | } 229 | 230 | Scanner inB = new Scanner(new File(outB)); 231 | TreeSet indexB = new TreeSet<>(); 232 | while(inB.hasNextLine()) 233 | indexB.add(inB.nextLine()); 234 | inB.close(); 235 | 236 | Scanner inA = new Scanner(new File(outA)); 237 | PrintWriter pw = new PrintWriter(new File(ranked)); 238 | while(inA.hasNextLine()) { 239 | String line = inA.nextLine(); 240 | if(!indexB.contains(line)) 241 | pw.println(line); 242 | } 243 | pw.close(); 244 | inA.close(); 245 | } 246 | FileUtils.copyFile(new File(mandolinOut + "/discovered_1.0.nt"), new File(mandolinOut + "/ranked_1.0.nt")); 247 | 248 | 249 | } 250 | 251 | } 252 | 253 | class MRCache { 254 | int x; 255 | int hitsAt1 = 0; 256 | int hitsAt3 = 0; 257 | int hitsAt10 = 0; 258 | } 259 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/grounding/Grounding.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.grounding; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.PrintWriter; 6 | import java.util.Scanner; 7 | 8 | import org.aksw.mandolin.util.Bundle; 9 | import org.aksw.mandolin.util.Shell; 10 | import org.apache.logging.log4j.LogManager; 11 | import org.apache.logging.log4j.Logger; 12 | 13 | /** 14 | * @author Tommaso Soru 15 | * 16 | */ 17 | public class Grounding { 18 | 19 | private final static Logger logger = LogManager.getLogger(Grounding.class); 20 | 21 | public static void ground(String base) throws FileNotFoundException { 22 | // prepare SQL files 23 | prepare(base); 24 | // generate tables and procedures 25 | generate(base); 26 | // run scripts for grounding 27 | run(); 28 | } 29 | 30 | private static void run() { 31 | 32 | logger.info("Grounding..."); 33 | 34 | String[] cmd = { 35 | // Drop schema 36 | Bundle.getString("pgsql_home") + "/bin/psql probkb -h " 37 | + Bundle.getString("pgsql_url") + " -p 5432 -f " 38 | + System.getProperty("user.dir") + "/pgsql/sql/run.sql", }; 39 | for (String c : cmd) { 40 | logger.debug("> " + c); 41 | Shell.execute(c, true); 42 | } 43 | } 44 | 45 | private static void generate(String base) { 46 | String[] cmd = { 47 | // Drop schema 48 | Bundle.getString("pgsql_home") + "/bin/psql probkb -h " 49 | + Bundle.getString("pgsql_url") + " -p 5432 -f " 50 | + System.getProperty("user.dir") 51 | + "/pgsql/sql/drop.sql", 52 | // // Create db 53 | // Bundle.getString("pgsql_home") + "/bin/createdb probkb -h " 54 | // + Bundle.getString("pgsql_url") + " -p 5432", 55 | // Create the probkb schema and tables. 56 | Bundle.getString("pgsql_home") + "/bin/psql probkb -h " 57 | + Bundle.getString("pgsql_url") + " -p 5432 -f " 58 | + System.getProperty("user.dir") 59 | + "/pgsql/sql/create.sql", 60 | // Create quality control procedures. 61 | Bundle.getString("pgsql_home") + "/bin/psql probkb -h " 62 | + Bundle.getString("pgsql_url") + " -p 5432 -f " 63 | + System.getProperty("user.dir") + "/pgsql/sql/qc.sql", 64 | // Load the files in CSV format. 65 | Bundle.getString("pgsql_home") + "/bin/psql probkb -h " 66 | + Bundle.getString("pgsql_url") + " -p 5432 -f " 67 | + System.getProperty("user.dir") + "/" + base 68 | + "/load.sql", 69 | // Create grounding procedures. 70 | Bundle.getString("pgsql_home") + "/bin/psql probkb -h " 71 | + Bundle.getString("pgsql_url") + " -p 5432 -f " 72 | + System.getProperty("user.dir") 73 | + "/pgsql/sql/ground.sql" }; 74 | for (String c : cmd) { 75 | logger.debug("> " + c); 76 | Shell.execute(c, true); 77 | } 78 | } 79 | 80 | private static void prepare(String base) throws FileNotFoundException { 81 | 82 | PrintWriter load = new PrintWriter(new File(base + "/load.sql")); 83 | 84 | // write head 85 | write("pgsql/sql/load-head.sql", load); 86 | 87 | // write graph tables 88 | String[] tables = { "classes", "entities", "relations", "entClasses", 89 | "relClasses", "functionals", "extractions", }; 90 | // due to a stylistic choice from ProbKB, table `extractions` 91 | // corresponds to file `relationships.csv` 92 | String[] csv = { "classes", "entities", "relations", "entClasses", 93 | "relClasses", "functionals", "relationships", }; 94 | for (int i = 0; i < tables.length; i++) 95 | load.write("COPY probkb." + tables[i] + " FROM '" 96 | + System.getProperty("user.dir") + "/" + base + "/" 97 | + csv[i] + ".csv' DELIMITERS ',' CSV;\n"); 98 | 99 | // write body 100 | write("pgsql/sql/load-body.sql", load); 101 | 102 | // write MLN tables 103 | for (int i = 1; i <= 6; i++) 104 | load.write("COPY probkb.mln" + i + " FROM '" 105 | + System.getProperty("user.dir") + "/" + base + "/mln" + i 106 | + ".csv' DELIMITERS ',' CSV;\n"); 107 | 108 | // write tail 109 | write("pgsql/sql/load-tail.sql", load); 110 | load.close(); 111 | 112 | } 113 | 114 | private static void write(String filename, PrintWriter pw) 115 | throws FileNotFoundException { 116 | Scanner in = new Scanner(new File(filename)); 117 | while (in.hasNextLine()) 118 | pw.write(in.nextLine() + "\n"); 119 | in.close(); 120 | } 121 | 122 | // public static void main(String[] args) throws FileNotFoundException { 123 | // 124 | // ground("eval/0001"); 125 | // 126 | // } 127 | 128 | } 129 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/inference/Factors.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.inference; 2 | 3 | import java.sql.ResultSet; 4 | import java.sql.SQLException; 5 | import java.util.ArrayList; 6 | import java.util.Collection; 7 | 8 | import org.aksw.mandolin.controller.ProbKBData; 9 | import org.aksw.mandolin.controller.NameMapper.Type; 10 | import org.apache.logging.log4j.LogManager; 11 | import org.apache.logging.log4j.Logger; 12 | 13 | import com.googlecode.rockit.app.solver.pojo.Clause; 14 | import com.googlecode.rockit.app.solver.pojo.Literal; 15 | import com.googlecode.rockit.javaAPI.HerbrandUniverse; 16 | 17 | /** 18 | * The "factors" singleton makes the three collections needed by the RockIt 19 | * inference out of ProbKB output. 20 | * 21 | * @author Tommaso Soru 22 | * 23 | */ 24 | public class Factors { 25 | 26 | private final static Logger logger = LogManager.getLogger(Factors.class); 27 | 28 | private static Factors instance = null; 29 | 30 | private ArrayList consistentStartingPoints; 31 | private ArrayList clauses; 32 | private Collection evidence; 33 | 34 | private PostgreDB db; 35 | 36 | private static HerbrandUniverse u = HerbrandUniverse.getInstance(); 37 | 38 | protected Factors() { 39 | super(); 40 | } 41 | 42 | public static Factors getInstance() { 43 | if (instance == null) 44 | instance = new Factors(); 45 | return instance; 46 | } 47 | 48 | /** 49 | * Preprocess factors from ProbKB for RockIt. 50 | * 51 | * @param aimName 52 | */ 53 | public void preprocess(String aimName) { 54 | 55 | db = new PostgreDB(); 56 | db.connect(); 57 | 58 | buildClauses(); 59 | buildEvidence(aimName); 60 | 61 | db.close(); 62 | } 63 | 64 | private void buildEvidence(String aimName) { 65 | 66 | evidence = new ArrayList<>(); 67 | consistentStartingPoints = new ArrayList<>(); 68 | 69 | ResultSet rs; 70 | if(aimName == null) { 71 | rs = db.evidence(); 72 | } else { 73 | int aimNumber = Integer.parseInt(aimName 74 | .substring(ProbKBData.REL_LENGTH)); 75 | rs = db.evidence(aimNumber); 76 | } 77 | 78 | try { 79 | while (rs.next()) { 80 | String a1 = u.getKey(Type.ENTITY.name() + rs.getInt("ent1")); 81 | String b1 = u.getKey(Type.ENTITY.name() + rs.getInt("ent2")); 82 | String r = Type.RELATION.name() + rs.getInt("rel"); 83 | // String string = aimName + "|" + a1 + "|" + b1; 84 | String string = r + "|" + a1 + "|" + b1; 85 | // As the Semantic Web deals only with true statements, 86 | // all literals are set to true and belong to the starting 87 | // points. 88 | consistentStartingPoints.add(string); 89 | evidence.add(new Literal(string, true)); 90 | } 91 | } catch (SQLException e) { 92 | e.printStackTrace(); 93 | } 94 | 95 | logger.info("EVIDENCE SIZE = "+evidence.size()); 96 | // logger.debug("EVIDENCE"); 97 | // for (Literal l : evidence) 98 | // logger.debug(l); 99 | 100 | } 101 | 102 | private void buildClauses() { 103 | 104 | clauses = new ArrayList<>(); 105 | 106 | for (int i = 1; i <= 3; i++) { 107 | 108 | logger.debug("Selecting type "+i+" factors..."); 109 | ResultSet rs = db.factors(i); 110 | try { 111 | while (rs.next()) { 112 | 113 | ArrayList lit = new ArrayList<>(); 114 | boolean positive = true; 115 | 116 | // first restriction 117 | String r1 = Type.RELATION.name() + rs.getInt("r1"); 118 | String a1 = u.getKey(Type.ENTITY.name() + rs.getInt("a1")); 119 | String b1 = u.getKey(Type.ENTITY.name() + rs.getInt("b1")); 120 | lit.add(new Literal(r1 + "|" + a1 + "|" + b1, positive)); 121 | 122 | if (i >= 2) { 123 | // second restriction 124 | String r2 = Type.RELATION.name() + rs.getInt("r2"); 125 | String a2 = u.getKey(Type.ENTITY.name() + rs.getInt("a2")); 126 | String b2 = u.getKey(Type.ENTITY.name() + rs.getInt("b2")); 127 | lit.add(new Literal(r2 + "|" + a2 + "|" + b2, positive)); 128 | 129 | if (i >= 3) { 130 | // third restriction 131 | String r3 = Type.RELATION.name() + rs.getInt("r3"); 132 | String a3 = u.getKey(Type.ENTITY.name() + rs.getInt("a3")); 133 | String b3 = u.getKey(Type.ENTITY.name() + rs.getInt("b3")); 134 | lit.add(new Literal(r3 + "|" + a3 + "|" + b3, 135 | positive)); 136 | } 137 | } 138 | 139 | // XXX Since there is a weight, its value is finite 140 | // (hard=false). 141 | boolean hard = false; 142 | 143 | Clause clause = new Clause(rs.getDouble("weight"), lit, hard); 144 | clauses.add(clause); 145 | logger.trace(clause); 146 | 147 | } 148 | } catch (SQLException e) { 149 | e.printStackTrace(); 150 | } 151 | 152 | logger.debug(clauses.size() + " clauses collected until type "+i+"."); 153 | } 154 | 155 | logger.info(clauses.size() + " clauses collected."); 156 | 157 | } 158 | 159 | public ArrayList getConsistentStartingPoints() { 160 | return consistentStartingPoints; 161 | } 162 | 163 | public ArrayList getClauses() { 164 | return clauses; 165 | } 166 | 167 | public Collection getEvidence() { 168 | return evidence; 169 | } 170 | 171 | } 172 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/inference/PostgreDB.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.inference; 2 | 3 | import java.sql.Connection; 4 | import java.sql.DriverManager; 5 | import java.sql.ResultSet; 6 | import java.sql.SQLException; 7 | import java.sql.Statement; 8 | 9 | import org.aksw.mandolin.util.Bundle; 10 | import org.aksw.mandolin.util.PostgreNotStartedException; 11 | import org.apache.logging.log4j.LogManager; 12 | import org.apache.logging.log4j.Logger; 13 | 14 | /** 15 | * @author Tommaso Soru 16 | * 17 | */ 18 | public class PostgreDB { 19 | 20 | private final static Logger logger = LogManager.getLogger(PostgreDB.class); 21 | private Connection con = null; 22 | private Statement st = null; 23 | 24 | public PostgreDB() { 25 | super(); 26 | } 27 | 28 | public void connect() { 29 | 30 | String host = Bundle.getString("pgsql_url"); 31 | // String db = Bundle.getString("pgsql_database"); 32 | String url = "jdbc:postgresql://" + host + "/probkb"; 33 | String user = Bundle.getString("pgsql_username"); 34 | String password = Bundle.getString("pgsql_password"); 35 | 36 | try { 37 | con = DriverManager.getConnection(url, user, password); 38 | st = con.createStatement(); 39 | 40 | } catch (SQLException ex) { 41 | logger.fatal(ex.getMessage() + "\n\n" 42 | + "Maybe PostgreSQL was not started?" + "\n" 43 | + "Open a console and run:" + "\n" + "\tsh pgsql-start.sh" 44 | + "\n"); 45 | throw new PostgreNotStartedException(); 46 | } 47 | 48 | } 49 | 50 | /** 51 | * A factor graph is composed by factors connected with one, two, or three 52 | * clauses (i.e., relationships). 53 | * 54 | * @param n 55 | * size of the restriction, i.e. number of clauses (1, 2, 3). 56 | * @return 57 | */ 58 | public ResultSet factors(int n) { 59 | 60 | try { 61 | 62 | switch (n) { 63 | case 1: 64 | // one... 65 | return st 66 | .executeQuery("select rs1.rel as r1, rs1.ent1 as a1, rs1.ent2 as b1, " 67 | + "f.weight from probkb.relationships as rs1, probkb.factors as f " 68 | + "where f.id1 = rs1.id and f.id2 is null and f.id3 is null;"); 69 | case 2: 70 | // two... 71 | return st 72 | .executeQuery("select rs1.rel as r1, rs1.ent1 as a1, rs1.ent2 as b1, " 73 | + "rs2.rel as r2, rs2.ent1 as a2, rs2.ent2 as b2, " 74 | + "f.weight from probkb.relationships as rs1, " 75 | + "probkb.relationships as rs2, probkb.factors as f " 76 | + "where f.id1 = rs1.id and f.id2 = rs2.id and f.id3 is null;"); 77 | case 3: 78 | // three... 79 | return st 80 | .executeQuery("select rs1.rel as r1, rs1.ent1 as a1, rs1.ent2 as b1, " 81 | + "rs2.rel as r2, rs2.ent1 as a2, rs2.ent2 as b2, " 82 | + "rs3.rel as r3, rs3.ent1 as a3, rs3.ent2 as b3, " 83 | + "f.weight from probkb.relationships as rs1, " 84 | + "probkb.relationships as rs2, probkb.relationships as rs3, " 85 | + "probkb.factors as f " 86 | + "where f.id1 = rs1.id and f.id2 = rs2.id and f.id3 = rs3.id;"); 87 | } 88 | } catch (SQLException ex) { 89 | logger.warn(ex.getMessage(), ex); 90 | } 91 | 92 | return null; 93 | 94 | } 95 | 96 | public void close() { 97 | try { 98 | 99 | if (st != null) { 100 | st.close(); 101 | } 102 | if (con != null) { 103 | con.close(); 104 | } 105 | 106 | } catch (SQLException ex) { 107 | logger.warn(ex.getMessage(), ex); 108 | } 109 | } 110 | 111 | public ResultSet evidence(int aimNumber) { 112 | 113 | ResultSet rs = null; 114 | try { 115 | rs = st.executeQuery("select rel, ent1, ent2 from probkb.relationships where rel = " 116 | + aimNumber + ";"); 117 | 118 | } catch (SQLException ex) { 119 | logger.warn(ex.getMessage(), ex); 120 | } 121 | return rs; 122 | 123 | } 124 | 125 | public ResultSet evidence() { 126 | 127 | ResultSet rs = null; 128 | try { 129 | rs = st.executeQuery("select rel, ent1, ent2 from probkb.extractions;"); 130 | 131 | } catch (SQLException ex) { 132 | logger.warn(ex.getMessage(), ex); 133 | } 134 | return rs; 135 | } 136 | 137 | } 138 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/inference/ProbKBToRockitGibbsSampling.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.inference; 2 | 3 | import java.sql.SQLException; 4 | import java.util.ArrayList; 5 | import java.util.Collection; 6 | 7 | import org.aksw.mandolin.controller.NameMapper; 8 | import org.aksw.mandolin.model.PredictionLiteral; 9 | import org.aksw.mandolin.model.PredictionSet; 10 | import org.apache.logging.log4j.LogManager; 11 | import org.apache.logging.log4j.Logger; 12 | 13 | import com.googlecode.rockit.app.solver.pojo.Clause; 14 | import com.googlecode.rockit.app.solver.pojo.Literal; 15 | import com.googlecode.rockit.exception.ParseException; 16 | import com.googlecode.rockit.exception.SolveException; 17 | import com.hp.hpl.jena.vocabulary.OWL; 18 | 19 | /** 20 | * Manager for the Gibbs-Sampling inference. Ground rules can be extracted from 21 | * the Postgre database after being generated by ProbKB (faster) or generated 22 | * through standard grounding by RockIt (slower). 23 | * 24 | * @author Tommaso Soru 25 | * 26 | */ 27 | public class ProbKBToRockitGibbsSampling extends RockitGibbsSampling { 28 | 29 | private final static Logger logger = LogManager.getLogger(ProbKBToRockitGibbsSampling.class); 30 | 31 | public static void main(String[] args) { 32 | 33 | PredictionSet ps = new ProbKBToRockitGibbsSampling( 34 | new NameMapper(OWL.sameAs.getURI())).infer(null); 35 | for (PredictionLiteral lit : ps) 36 | logger.info(lit); 37 | 38 | } 39 | 40 | public ProbKBToRockitGibbsSampling(NameMapper map) { 41 | super(map); 42 | } 43 | 44 | /** 45 | * Call ProbKB for grounding and preprocess its input for Gibbs sampling by 46 | * RockIt. 47 | */ 48 | public PredictionSet infer(Integer sampling) { 49 | 50 | Factors factors = Factors.getInstance(); 51 | factors.preprocess(map.getAimName()); 52 | 53 | // +++ STARTING POINTS +++ 54 | // Prop2|alb|nob 55 | ArrayList consistentStartingPoints = factors 56 | .getConsistentStartingPoints(); 57 | 58 | // +++ CLAUSES +++ 59 | // Clause [weight=0.0, restriction=[[Prop2|b|e]], hard=true] 60 | ArrayList clauses = factors.getClauses(); 61 | 62 | // +++ EVIDENCE +++ 63 | // [Prop2|2db|h0e] 64 | Collection evidence = factors.getEvidence(); 65 | 66 | logger.debug("Evidence: "+evidence); 67 | 68 | // call Gibbs sampler 69 | PredictionSet ps = null; 70 | try { 71 | ps = gibbsSampling(consistentStartingPoints, clauses, evidence, sampling); 72 | } catch (SQLException | SolveException | ParseException e) { 73 | e.printStackTrace(); 74 | } 75 | 76 | return ps; 77 | } 78 | 79 | } 80 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/inference/RockitGibbsSampling.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.inference; 2 | 3 | import java.sql.SQLException; 4 | import java.util.ArrayList; 5 | import java.util.Collection; 6 | 7 | import org.aksw.mandolin.controller.NameMapper; 8 | import org.aksw.mandolin.model.PredictionLiteral; 9 | import org.aksw.mandolin.model.PredictionSet; 10 | import org.apache.logging.log4j.LogManager; 11 | import org.apache.logging.log4j.Logger; 12 | 13 | import com.googlecode.rockit.app.Parameters; 14 | import com.googlecode.rockit.app.sampler.gibbs.GIBBSLiteral; 15 | import com.googlecode.rockit.app.sampler.gibbs.GIBBSSampler; 16 | import com.googlecode.rockit.app.solver.pojo.Clause; 17 | import com.googlecode.rockit.app.solver.pojo.Literal; 18 | import com.googlecode.rockit.exception.ParseException; 19 | import com.googlecode.rockit.exception.ReadOrWriteToFileException; 20 | import com.googlecode.rockit.exception.SolveException; 21 | import com.googlecode.rockit.parser.SyntaxReader; 22 | 23 | /** 24 | * @author Tommaso Soru 25 | * 26 | */ 27 | public abstract class RockitGibbsSampling { 28 | 29 | private final static Logger logger = LogManager.getLogger(RockitGibbsSampling.class); 30 | 31 | protected static SyntaxReader reader; 32 | 33 | protected NameMapper map; 34 | 35 | // Sampling only 36 | /** 37 | * The maximum number of iterations for sampling. 38 | */ 39 | public static final int MAX_ITERATIONS = 10000000; 40 | 41 | protected GIBBSSampler gibbsSampler; 42 | 43 | protected RockitGibbsSampling(NameMapper map) { 44 | super(); 45 | 46 | this.map = map; 47 | 48 | try { 49 | Parameters.readPropertyFile(); 50 | } catch (ReadOrWriteToFileException e) { 51 | logger.error(e.getMessage()); 52 | } 53 | Parameters.USE_CUTTING_PLANE_AGGREGATION = false; 54 | Parameters.USE_CUTTING_PLANE_INFERENCE = false; 55 | reader = new SyntaxReader(); 56 | 57 | } 58 | 59 | public abstract PredictionSet infer(Integer samples); 60 | 61 | /** 62 | * Gibbs Sampling by RockIt. 63 | * 64 | * @param consistentStartingPoints 65 | * @param clauses 66 | * @param evidence 67 | * @throws SQLException 68 | * @throws SolveException 69 | * @throws ParseException 70 | */ 71 | public PredictionSet gibbsSampling( 72 | ArrayList consistentStartingPoints, 73 | ArrayList clauses, Collection evidence, Integer sampling) 74 | throws SQLException, SolveException, ParseException { 75 | 76 | PredictionSet ps = new PredictionSet(map.getAim()); 77 | 78 | gibbsSampler = new GIBBSSampler(); 79 | int iter = iterations(clauses.size() + evidence.size(), sampling); 80 | ArrayList gibbsOutput = gibbsSampler.sample(iter, 81 | clauses, evidence, consistentStartingPoints); 82 | 83 | for (GIBBSLiteral l : gibbsOutput) 84 | ps.add(new PredictionLiteral(l, iter)); 85 | 86 | return ps; 87 | } 88 | 89 | /** 90 | * Get number of iterations. 91 | * @param sampling 92 | * 93 | * @param i 94 | * @return 95 | */ 96 | private int iterations(int literals, Integer sampling) { 97 | 98 | 99 | int iterations; 100 | 101 | long iter = (long) literals * 1000; 102 | 103 | if(sampling != null) // pre-assigned 104 | iterations = sampling; 105 | else if(iter >= Integer.MAX_VALUE) // overflow 106 | iterations = MAX_ITERATIONS; 107 | else if(iter >= MAX_ITERATIONS) // not overflow, but still too high 108 | iterations = MAX_ITERATIONS; 109 | else 110 | iterations = (int) iter; // acceptable value 111 | 112 | logger.info("literals={}, supposed_iter={}, actual_iter={}", literals, iter, iterations); 113 | return iterations; 114 | 115 | } 116 | 117 | } 118 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/inference/RockitGroundingAndGibbsSampling.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.inference; 2 | 3 | import java.io.IOException; 4 | import java.sql.SQLException; 5 | import java.util.ArrayList; 6 | import java.util.Collection; 7 | 8 | import org.aksw.mandolin.controller.NameMapper; 9 | import org.aksw.mandolin.model.PredictionSet; 10 | import org.apache.logging.log4j.LogManager; 11 | import org.apache.logging.log4j.Logger; 12 | 13 | import com.googlecode.rockit.app.solver.StandardSolver; 14 | import com.googlecode.rockit.app.solver.pojo.Clause; 15 | import com.googlecode.rockit.app.solver.pojo.Literal; 16 | import com.googlecode.rockit.exception.ParseException; 17 | import com.googlecode.rockit.exception.ReadOrWriteToFileException; 18 | import com.googlecode.rockit.exception.SolveException; 19 | import com.googlecode.rockit.javaAPI.Model; 20 | import com.hp.hpl.jena.vocabulary.OWL; 21 | 22 | /** 23 | * Manager for the Gibbs-Sampling inference. Ground rules can be extracted from 24 | * the Postgre database after being generated by ProbKB (faster) or generated 25 | * through standard grounding by RockIt (slower). 26 | * 27 | * TODO missing link: AMIE rules to MLN file (now using dumb rules)! 28 | * 29 | * @author Tommaso Soru 30 | * 31 | */ 32 | public class RockitGroundingAndGibbsSampling extends RockitGibbsSampling { 33 | 34 | private final static Logger logger = LogManager.getLogger(RockitGroundingAndGibbsSampling.class); 35 | 36 | /** 37 | * MLN file. 38 | */ 39 | private String input; 40 | 41 | /** 42 | * DB file. 43 | */ 44 | private String groundings; 45 | 46 | private Model model; 47 | 48 | public static void main(String[] args) throws ReadOrWriteToFileException, 49 | ParseException, IOException { 50 | 51 | // launch test 52 | new RockitGroundingAndGibbsSampling(new NameMapper( 53 | OWL.sameAs.getURI()), "eval/11_publi-mln/prog.mln", 54 | "eval/11_publi-mln/evidence.db").infer(null); 55 | 56 | } 57 | 58 | public RockitGroundingAndGibbsSampling(NameMapper map, String input, 59 | String groundings) throws ReadOrWriteToFileException, 60 | ParseException, IOException { 61 | super(map); 62 | 63 | this.input = input; 64 | this.groundings = groundings; 65 | 66 | } 67 | 68 | /** 69 | * Call RockIt for both standard grounding and Gibbs-sampling inference. 70 | */ 71 | public PredictionSet infer(Integer samples) { 72 | 73 | PredictionSet ps = null; 74 | 75 | try { 76 | model = reader.getModel(input, groundings); 77 | 78 | // standard grounding... 79 | logger.info("Input: " + this.input); 80 | StandardSolver solver = new StandardSolver(model); 81 | // ground MLN and retrieve Clauses 82 | ArrayList consistentStartingPoints = solver.solve(); 83 | logger.info("+++ STARTING POINTS +++"); 84 | for (String s : consistentStartingPoints) 85 | logger.info(s); 86 | ArrayList clauses = solver.getAllClauses(); 87 | logger.info("+++ CLAUSES +++"); 88 | for (Clause c : clauses) 89 | logger.info(c); 90 | Collection evidence = solver.getEvidenceAxioms(); 91 | logger.info("+++ EVIDENCE +++"); 92 | for (Literal l : evidence) 93 | logger.info(l); 94 | solver = null; // free memory 95 | 96 | // call Gibbs sampler 97 | ps = gibbsSampling(consistentStartingPoints, clauses, evidence, samples); 98 | 99 | } catch (ParseException | IOException | SQLException | SolveException e) { 100 | e.printStackTrace(); 101 | } 102 | 103 | return ps; 104 | } 105 | 106 | } 107 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/model/Cache.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.model; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import jp.ndca.similarity.join.StringItem; 7 | 8 | /** 9 | * @author Tommaso Soru 10 | * 11 | */ 12 | public class Cache { 13 | 14 | public int count = 0; 15 | public List stringItems = new ArrayList(); 16 | 17 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/model/ComparableLiteral.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.model; 2 | 3 | /** 4 | * @author Tommaso Soru 5 | * 6 | */ 7 | public class ComparableLiteral implements Comparable { 8 | 9 | private String uri; 10 | private String val; 11 | 12 | public ComparableLiteral(String uri, String val) { 13 | this.uri = uri; 14 | this.val = val; 15 | } 16 | 17 | public String getUri() { 18 | return uri; 19 | } 20 | 21 | public String getVal() { 22 | return val; 23 | } 24 | 25 | @Override 26 | public int compareTo(ComparableLiteral o) { 27 | return this.getUri().compareTo(o.getUri()); 28 | } 29 | 30 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/model/PredictionLiteral.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.model; 2 | 3 | import java.io.Serializable; 4 | 5 | import com.googlecode.rockit.app.sampler.gibbs.GIBBSLiteral; 6 | import com.googlecode.rockit.javaAPI.HerbrandUniverse; 7 | 8 | /** 9 | * @author Tommaso Soru 10 | * 11 | */ 12 | public class PredictionLiteral implements Comparable, Serializable { 13 | 14 | /** 15 | * 16 | */ 17 | private static final long serialVersionUID = 4558540244149162506L; 18 | 19 | private static HerbrandUniverse u = HerbrandUniverse.getInstance(); 20 | private String p, x, y; 21 | private String id; 22 | private double prob; 23 | 24 | public PredictionLiteral(String input, double prob) { 25 | String[] name = input.split("\\|"); 26 | p = name[0]; 27 | x = u.getConstant(name[1]); 28 | y = u.getConstant(name[2]); 29 | id = p + "(" + x + ", " + y + ")"; 30 | this.prob = prob; 31 | } 32 | 33 | public PredictionLiteral(GIBBSLiteral l, int iter) { 34 | this(l.getName(), l.return_my_probability(iter)); 35 | } 36 | 37 | public String getP() { 38 | return p; 39 | } 40 | 41 | public String getX() { 42 | return x; 43 | } 44 | 45 | public String getY() { 46 | return y; 47 | } 48 | 49 | public double getProb() { 50 | return prob; 51 | } 52 | 53 | public String toString() { 54 | return "P[ " + id + " = true ] = " + prob; 55 | } 56 | 57 | @Override 58 | public int compareTo(PredictionLiteral o) { 59 | return this.id.compareTo(o.id); 60 | } 61 | 62 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/model/PredictionSet.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.model; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.FileOutputStream; 6 | import java.io.IOException; 7 | import java.io.ObjectOutputStream; 8 | import java.io.Serializable; 9 | import java.net.URI; 10 | import java.net.URISyntaxException; 11 | import java.util.TreeSet; 12 | 13 | import org.aksw.mandolin.controller.NameMapper; 14 | import org.aksw.mandolin.controller.NameMapper.Type; 15 | import org.apache.jena.riot.Lang; 16 | import org.apache.jena.riot.system.StreamRDF; 17 | import org.apache.jena.riot.system.StreamRDFWriter; 18 | import org.apache.logging.log4j.LogManager; 19 | import org.apache.logging.log4j.Logger; 20 | 21 | import com.hp.hpl.jena.graph.NodeFactory; 22 | import com.hp.hpl.jena.graph.Triple; 23 | import com.hp.hpl.jena.shared.JenaException; 24 | 25 | /** 26 | * @author Tommaso Soru 27 | * 28 | */ 29 | public class PredictionSet extends TreeSet implements 30 | Serializable { 31 | 32 | private final static Logger logger = LogManager.getLogger(PredictionSet.class); 33 | 34 | /** 35 | * 36 | */ 37 | private static final long serialVersionUID = 864082651004354757L; 38 | 39 | /** 40 | * Internal name only. 41 | */ 42 | private String aim; 43 | 44 | public PredictionSet(String aim) { 45 | this.aim = aim; 46 | logger.info("Created prediction set with aim: " + aim); 47 | } 48 | 49 | public String getAim() { 50 | return aim; 51 | } 52 | 53 | public void saveTo(String path) { 54 | ObjectOutputStream oos; 55 | try { 56 | oos = new ObjectOutputStream(new FileOutputStream(path)); 57 | oos.writeObject(this); 58 | oos.close(); 59 | logger.info("Predictions saved to " + path); 60 | } catch (IOException e) { 61 | logger.warn("Cannot save " + this.toString() + ": " 62 | + e.getMessage()); 63 | } 64 | } 65 | 66 | public void saveLinkset(NameMapper map, double theta, String path) { 67 | 68 | FileOutputStream output; 69 | try { 70 | output = new FileOutputStream(new File(path)); 71 | } catch (FileNotFoundException e) { 72 | e.printStackTrace(); 73 | return; 74 | } 75 | 76 | StreamRDF writer = StreamRDFWriter.getWriterStream(output, Lang.NT); 77 | writer.start(); 78 | 79 | double max = Double.MIN_VALUE; 80 | double min = Double.MAX_VALUE; 81 | for (PredictionLiteral lit : this) { 82 | if(lit.getProb() > max) 83 | max = lit.getProb(); 84 | if(lit.getProb() < min) 85 | min = lit.getProb(); 86 | } 87 | double delta = max - min; 88 | logger.debug("Normalization extrema: max = "+max+", min = "+min+", delta = "+delta); 89 | 90 | logger.info("Inferred triples size: "+this.size()); 91 | for (PredictionLiteral lit : this) { 92 | 93 | // filter only aim relation from pset 94 | String p = map.getURI(lit.getP()); 95 | if (!p.equals(aim) && !aim.equals("*")) 96 | continue; 97 | 98 | // relative value for probability 99 | double relprob = (lit.getProb() - min) / delta; 100 | 101 | if (relprob >= theta) { 102 | logger.debug(lit + " (" + relprob + ")"); 103 | String s = map.getURI(lit.getX()); 104 | if(s == null) { 105 | int a = NameMapper.parse(lit.getX()); 106 | String str = String.valueOf(-a); 107 | s = map.getURI(Type.CLASS.name() + str); 108 | } 109 | 110 | // filter out illegal triples... 111 | try { 112 | new URI(s); 113 | } catch (URISyntaxException e) { 114 | logger.debug("A predicted triple has a subject " 115 | + "(" + s + ") which is not a URI. Skipping triple..."); 116 | continue; 117 | } catch (NullPointerException e) { 118 | logger.debug("Error on lit.X="+lit.getX()+ " lit.Y="+lit.getY()); 119 | continue; 120 | } 121 | 122 | String o = map.getURI(lit.getY()); 123 | if(o == null) { 124 | int b = NameMapper.parse(lit.getY()); 125 | String str = String.valueOf(-b); 126 | o = map.getURI(Type.CLASS.name() + str); 127 | } 128 | Triple t; 129 | try { 130 | t = new Triple(NodeFactory.createURI(s), 131 | NodeFactory.createURI(p), NodeFactory.createURI(o)); 132 | } catch (JenaException e) { 133 | logger.debug("Some of the following is not a URI: s="+s+", p="+p+", o="+o); 134 | continue; 135 | } 136 | 137 | logger.debug(lit.getProb() + "\t" + t); 138 | 139 | writer.triple(t); 140 | } 141 | } 142 | 143 | writer.finish(); 144 | 145 | } 146 | 147 | } 148 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/reasoner/PelletReasoner.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.reasoner; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.FileOutputStream; 6 | import java.util.Iterator; 7 | 8 | import org.aksw.mandolin.util.Timer; 9 | import org.apache.jena.riot.Lang; 10 | import org.apache.jena.riot.RDFDataMgr; 11 | import org.apache.jena.riot.system.StreamRDF; 12 | import org.apache.logging.log4j.LogManager; 13 | import org.apache.logging.log4j.Logger; 14 | import org.mindswap.pellet.jena.PelletReasonerFactory; 15 | 16 | import com.hp.hpl.jena.graph.Node; 17 | import com.hp.hpl.jena.graph.NodeFactory; 18 | import com.hp.hpl.jena.graph.Triple; 19 | import com.hp.hpl.jena.ontology.OntModel; 20 | import com.hp.hpl.jena.rdf.model.InfModel; 21 | import com.hp.hpl.jena.rdf.model.ModelFactory; 22 | import com.hp.hpl.jena.rdf.model.Property; 23 | import com.hp.hpl.jena.rdf.model.RDFNode; 24 | import com.hp.hpl.jena.rdf.model.Resource; 25 | import com.hp.hpl.jena.reasoner.Reasoner; 26 | import com.hp.hpl.jena.reasoner.ValidityReport; 27 | import com.hp.hpl.jena.shared.Lock; 28 | import com.hp.hpl.jena.sparql.core.Quad; 29 | import com.hp.hpl.jena.vocabulary.XSD; 30 | 31 | /** 32 | * Pellet-Jena reasoner. The inferred closure model is saved in file; it will 33 | * not be available as an in-memory object. 34 | * 35 | * @author Tommaso Soru 36 | * 37 | */ 38 | public class PelletReasoner { 39 | 40 | private final static Logger logger = LogManager.getLogger(PelletReasoner.class); 41 | 42 | public static void main(String[] args) { 43 | testThis(); 44 | // run("eval/0001"); 45 | } 46 | 47 | /** 48 | * Add OWL rules and compute the forward chain. 49 | * 50 | * @param base 51 | * @param datasetPaths 52 | */ 53 | public static void run(String base) { 54 | 55 | Reasoner reasoner = PelletReasonerFactory.theInstance().create(); 56 | OntModel ontModel = ModelFactory 57 | .createOntologyModel(PelletReasonerFactory.THE_SPEC); 58 | InfModel infModel = ModelFactory.createInfModel(reasoner, ontModel); 59 | 60 | String path = System.getProperty("user.dir"); 61 | RDFDataMgr.read(infModel, "file://" + path + "/" + base + "/model.nt"); 62 | 63 | logger.info("Model size = " + ontModel.size()); 64 | 65 | ValidityReport report = infModel.validate(); 66 | printIterator(report.getReports(), "Validation Results"); 67 | 68 | logger.info("Inferred model size = " + infModel.size()); 69 | 70 | infModel.enterCriticalSection(Lock.READ); 71 | 72 | try { 73 | RDFDataMgr.write(new FileOutputStream(new File(base 74 | + "/model-fwc.nt")), infModel, Lang.NT); 75 | logger.info("Model generated."); 76 | } catch (FileNotFoundException e) { 77 | logger.fatal(e.getMessage()); 78 | throw new RuntimeException("Necessary file model-fwc.nt was not generated."); 79 | } finally { 80 | infModel.leaveCriticalSection(); 81 | } 82 | 83 | new File(base + "/model.nt").delete(); 84 | 85 | } 86 | 87 | public static void closure(String input, String output) { 88 | 89 | Reasoner reasoner = PelletReasonerFactory.theInstance().create(); 90 | OntModel ontModel = ModelFactory 91 | .createOntologyModel(PelletReasonerFactory.THE_SPEC); 92 | InfModel infModel = ModelFactory.createInfModel(reasoner, ontModel); 93 | 94 | String path = System.getProperty("user.dir"); 95 | RDFDataMgr.read(infModel, "file://" + path + "/" + input); 96 | 97 | logger.info("Model = "+input+", size = " + ontModel.size()); 98 | 99 | ValidityReport report = infModel.validate(); 100 | printIterator(report.getReports(), "Validation Results"); 101 | 102 | logger.info("Inferred model size = " + infModel.size()); 103 | 104 | infModel.enterCriticalSection(Lock.READ); 105 | 106 | try { 107 | RDFDataMgr.write(new FileOutputStream(new File(output)), 108 | infModel, Lang.NT); 109 | logger.info("Model generated at "+output); 110 | } catch (FileNotFoundException e) { 111 | logger.fatal(e.getMessage()); 112 | throw new RuntimeException("Necessary file "+output+" was not generated."); 113 | } finally { 114 | infModel.leaveCriticalSection(); 115 | } 116 | 117 | } 118 | 119 | private static void testThis() { 120 | 121 | Timer t = new Timer(); 122 | 123 | Reasoner reasoner = PelletReasonerFactory.theInstance().create(); 124 | OntModel ontModel = ModelFactory 125 | .createOntologyModel(PelletReasonerFactory.THE_SPEC); 126 | InfModel infModel = ModelFactory.createInfModel(reasoner, ontModel); 127 | 128 | t.lap(); 129 | 130 | String path = System.getProperty("user.dir"); 131 | 132 | String[] paths = { "file://" + path + "/datasets/DBLPL3S-100.nt", 133 | "file://" + path + "/datasets/LinkedACM-100.nt", 134 | "file://" + path + "/linksets/DBLPL3S-LinkedACM-100.nt" }; 135 | 136 | StreamRDF dataStream = new StreamRDF() { 137 | 138 | @Override 139 | public void start() { 140 | } 141 | 142 | @Override 143 | public void quad(Quad quad) { 144 | } 145 | 146 | @Override 147 | public void base(String base) { 148 | } 149 | 150 | @Override 151 | public void prefix(String prefix, String iri) { 152 | } 153 | 154 | @Override 155 | public void finish() { 156 | } 157 | 158 | @Override 159 | public void triple(Triple triple) { 160 | Node node = triple.getObject(); 161 | if (node.isLiteral()) { 162 | if (!node.getLiteral().isWellFormed()) { 163 | // known issue: fix gYear literals 164 | if (node.getLiteralDatatypeURI() != null) { 165 | if (node.getLiteralDatatypeURI().equals( 166 | XSD.gYear.getURI()) 167 | || node.getLiteralDatatypeURI().equals( 168 | XSD.gYear.getLocalName())) { 169 | Node newNode = NodeFactory.createLiteral(node 170 | .getLiteral().toString() 171 | .substring(0, 4) 172 | + "^^" + XSD.gYear); 173 | triple = new Triple(triple.getSubject(), 174 | triple.getPredicate(), newNode); 175 | // logger.warn("Bad-formed literal: " 176 | // + node + " - Using: " + newNode); 177 | } 178 | } 179 | } 180 | } 181 | 182 | Resource s = infModel.createResource(triple.getSubject() 183 | .getURI()); 184 | Property p = infModel.createProperty(triple.getPredicate() 185 | .getURI()); 186 | RDFNode o = infModel.asRDFNode(triple.getObject()); 187 | 188 | infModel.add(s, p, o); 189 | } 190 | 191 | }; 192 | 193 | for (String p : paths) 194 | RDFDataMgr.parse(dataStream, p); 195 | 196 | t.lap(); 197 | 198 | logger.info("Model size = " + ontModel.size()); 199 | 200 | ValidityReport report = infModel.validate(); 201 | printIterator(report.getReports(), "Validation Results"); 202 | 203 | logger.info("Inferred model size = " + infModel.size()); 204 | 205 | infModel.enterCriticalSection(Lock.READ); 206 | 207 | String f = "tmp/test-this.nt"; 208 | try { 209 | RDFDataMgr.write(new FileOutputStream(new File(f)), 210 | infModel, Lang.NT); 211 | logger.info("Model generated."); 212 | } catch (FileNotFoundException e) { 213 | logger.fatal(e.getMessage()); 214 | throw new RuntimeException("Necessary file "+f+" was not generated."); 215 | } finally { 216 | infModel.leaveCriticalSection(); 217 | } 218 | 219 | t.lap(); 220 | 221 | logger.info("Reasoner init (ms): " + t.getLapMillis(0)); 222 | logger.info("Model load (ms): " + t.getLapMillis(1)); 223 | logger.info("Model load (ms/triple): " + t.getLapMillis(1) 224 | / infModel.size()); 225 | logger.info("Validation (ms): " + t.getLapMillis(2)); 226 | logger.info("Save inferred model (ms): " + t.getLapMillis(3)); 227 | printIterator(report.getReports(), "Validation Results"); 228 | 229 | } 230 | 231 | private static void printIterator(Iterator i, String header) { 232 | logger.info(header); 233 | 234 | if (i.hasNext()) { 235 | while (i.hasNext()) 236 | logger.info(i.next()); 237 | } else 238 | logger.info(""); 239 | 240 | logger.info(""); 241 | } 242 | 243 | } 244 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/rulemining/AmieHandler.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.rulemining; 2 | 3 | import java.util.List; 4 | 5 | import org.apache.logging.log4j.LogManager; 6 | import org.apache.logging.log4j.Logger; 7 | 8 | import javatools.parsers.NumberFormatter; 9 | import amie.mining.AMIE; 10 | import amie.rules.Rule; 11 | 12 | /** 13 | * @author Tommaso Soru 14 | * 15 | */ 16 | public class AmieHandler { 17 | 18 | private final static Logger logger = LogManager.getLogger(AmieHandler.class); 19 | 20 | public static enum MiningStrategy { 21 | HEAD_COVERAGE, SUPPORT; 22 | } 23 | 24 | private String ontology; 25 | private List rules = null; 26 | private Double miningThr = 0.01; 27 | 28 | public AmieHandler(String ontology) { 29 | super(); 30 | this.ontology = ontology; 31 | } 32 | 33 | public void run(MiningStrategy ms) throws Exception { 34 | 35 | AMIE miner; 36 | switch(ms) { 37 | case HEAD_COVERAGE: 38 | miner = AMIE.getInstance(new String[] { ontology, "-minhc", String.valueOf(miningThr) }); 39 | break; 40 | case SUPPORT: 41 | miner = AMIE.getInstance(new String[] { ontology, "-pm", "support", "-mins", "0" }); 42 | break; 43 | default: 44 | throw new RuntimeException("MiningStrategy does not exist: " + ms.name()); 45 | } 46 | 47 | logger.info("Starting the mining phase"); 48 | 49 | long time = System.currentTimeMillis(); 50 | 51 | rules = miner.mine(); 52 | 53 | if (!miner.isRealTime()) { 54 | Rule.printRuleHeaders(); 55 | for (Rule rule : rules) { 56 | logger.info(rule.getFullRuleString()); 57 | } 58 | } 59 | 60 | long miningTime = System.currentTimeMillis() - time; 61 | logger.info("Mining done in " 62 | + NumberFormatter.formatMS(miningTime)); 63 | logger.info(rules.size() + " rules mined."); 64 | 65 | } 66 | 67 | public List getRules() { 68 | return rules; 69 | } 70 | 71 | public void setMiningThr(Double mining) { 72 | this.miningThr = mining; 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/rulemining/RDFToTSV.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.rulemining; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.PrintWriter; 6 | 7 | import org.aksw.mandolin.util.URIHandler; 8 | import org.apache.jena.riot.RDFDataMgr; 9 | import org.apache.jena.riot.system.StreamRDF; 10 | 11 | import com.hp.hpl.jena.graph.Triple; 12 | import com.hp.hpl.jena.sparql.core.Quad; 13 | 14 | /** 15 | * Generate input for AMIE. 16 | * 17 | * @author Tommaso Soru 18 | * 19 | */ 20 | public class RDFToTSV { 21 | 22 | public static void main(String[] args) throws Exception { 23 | 24 | run("eval/0001"); 25 | 26 | } 27 | 28 | public static void run(String base) 29 | throws FileNotFoundException { 30 | 31 | PrintWriter pw = new PrintWriter(new File(base + "/model.tsv")); 32 | 33 | StreamRDF stream = new StreamRDF() { 34 | 35 | @Override 36 | public void triple(Triple triple) { 37 | pw.write(URIHandler.parse(triple.getSubject()) + "\t" 38 | + triple.getPredicate().getURI() + "\t" 39 | + triple.getObject().toString() + "\n"); 40 | } 41 | 42 | @Override 43 | public void start() { 44 | } 45 | 46 | @Override 47 | public void quad(Quad quad) { 48 | } 49 | 50 | @Override 51 | public void prefix(String prefix, String iri) { 52 | } 53 | 54 | @Override 55 | public void finish() { 56 | } 57 | 58 | @Override 59 | public void base(String base) { 60 | } 61 | 62 | }; 63 | 64 | RDFDataMgr.parse(stream, base + "/model-fwc.nt"); 65 | 66 | pw.close(); 67 | 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/rulemining/RuleDriver.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.rulemining; 2 | 3 | import java.io.File; 4 | import java.io.FileWriter; 5 | import java.io.IOException; 6 | import java.util.ArrayList; 7 | import java.util.HashMap; 8 | 9 | import javatools.datatypes.ByteString; 10 | 11 | import org.aksw.mandolin.controller.NameMapper; 12 | import org.aksw.mandolin.controller.ProbKBData; 13 | import org.apache.logging.log4j.LogManager; 14 | import org.apache.logging.log4j.Logger; 15 | 16 | import amie.rules.Rule; 17 | 18 | import com.opencsv.CSVWriter; 19 | 20 | /** 21 | * Driver of rules from Amie to ProbKB. 22 | * 23 | * @author Tommaso Soru 24 | * 25 | */ 26 | public class RuleDriver { 27 | 28 | private final static Logger logger = LogManager.getLogger(RuleDriver.class); 29 | 30 | private NameMapper map; 31 | private String base; 32 | 33 | private static final String HEAD_LEFT = "?a"; 34 | private static final String HEAD_RIGHT = "?b"; 35 | 36 | private HashMap> csvContent = new HashMap<>(); 37 | 38 | public RuleDriver(NameMapper map, String base) { 39 | super(); 40 | this.map = map; 41 | this.base = base; 42 | for(int i=1; i<=6; i++) 43 | csvContent.put(base + "/mln"+i+".csv", new ArrayList<>()); 44 | } 45 | 46 | public void process(Rule rule) throws IOException { 47 | 48 | int size = rule.getBody().size(); 49 | 50 | if (size == 1) { // call one or two 51 | 52 | ByteString[] b = rule.getBody().get(0); 53 | // subject, predicate, object 54 | String pHead = rule.getHeadRelation(); 55 | String pBody = b[1].toString(); // TODO check me! 56 | if (b[0].toString().equals(HEAD_LEFT)) 57 | addTypeOne(pHead, pBody, toWeight(rule.getPcaConfidence())); 58 | else 59 | addTypeTwo(pHead, pBody, toWeight(rule.getPcaConfidence())); 60 | } else { // call three to six 61 | 62 | ByteString[] b1 = rule.getBody().get(0); 63 | ByteString[] b2 = rule.getBody().get(1); 64 | 65 | String pHead = rule.getHeadRelation(); 66 | String pBody1 = b1[1].toString(); 67 | String pBody2 = b2[1].toString(); 68 | 69 | if (b1[0].toString().equals(HEAD_LEFT) && b2[0].toString().equals(HEAD_RIGHT)) 70 | addTypeThree(pHead, pBody1, pBody2, 71 | toWeight(rule.getPcaConfidence())); 72 | if (b1[0].toString().equals(HEAD_RIGHT) && b2[0].toString().equals(HEAD_LEFT)) 73 | addTypeThree(pHead, pBody2, pBody1, 74 | toWeight(rule.getPcaConfidence())); 75 | 76 | if (b1[0].toString().equals(HEAD_LEFT) && b2[2].toString().equals(HEAD_RIGHT)) 77 | addTypeFour(pHead, pBody1, pBody2, 78 | toWeight(rule.getPcaConfidence())); 79 | if (b1[2].toString().equals(HEAD_RIGHT) && b2[0].toString().equals(HEAD_LEFT)) 80 | addTypeFour(pHead, pBody2, pBody1, 81 | toWeight(rule.getPcaConfidence())); 82 | 83 | if (b1[2].toString().equals(HEAD_LEFT) && b2[0].toString().equals(HEAD_RIGHT)) 84 | addTypeFive(pHead, pBody1, pBody2, 85 | toWeight(rule.getPcaConfidence())); 86 | if (b1[0].toString().equals(HEAD_RIGHT) && b2[2].toString().equals(HEAD_LEFT)) 87 | addTypeFive(pHead, pBody2, pBody1, 88 | toWeight(rule.getPcaConfidence())); 89 | 90 | if (b1[2].toString().equals(HEAD_LEFT) && b2[2].toString().equals(HEAD_RIGHT)) 91 | addTypeSix(pHead, pBody1, pBody2, 92 | toWeight(rule.getPcaConfidence())); 93 | if (b1[2].toString().equals(HEAD_RIGHT) && b2[2].toString().equals(HEAD_LEFT)) 94 | addTypeSix(pHead, pBody2, pBody1, 95 | toWeight(rule.getPcaConfidence())); 96 | 97 | } 98 | } 99 | 100 | /** 101 | * @param pcaConfidence 102 | * @return 103 | */ 104 | private double toWeight(double pcaConfidence) { 105 | return pcaConfidence; 106 | } 107 | 108 | /** 109 | * p(x,y) <- q(x,y) 110 | * 111 | * @param pHead 112 | * @param pBody 113 | * @param weight 114 | * @throws IOException 115 | */ 116 | private void addTypeOne(String pHead, String pBody, double weight) { 117 | logger.trace("Adding type one: "+pHead+", "+pBody+", "+weight); 118 | String headName = map.getName(pHead).substring(ProbKBData.REL_LENGTH); 119 | String bodyName = map.getName(pBody).substring(ProbKBData.REL_LENGTH); 120 | String str[] = { 121 | headName, 122 | bodyName, 123 | "1", // TODO class of x 124 | "1", // TODO class of y 125 | "" + weight 126 | }; 127 | csvContent.get(base + "/mln1.csv").add(str); 128 | } 129 | 130 | /** 131 | * p(x,y) <- q(y,x) 132 | * 133 | * @param pHead 134 | * @param pBody 135 | * @param weight 136 | */ 137 | private void addTypeTwo(String pHead, String pBody, double weight) { 138 | logger.trace("Adding type two: "+pHead+", "+pBody+", "+weight); 139 | String str[] = { 140 | map.getName(pHead).substring(ProbKBData.REL_LENGTH), 141 | map.getName(pBody).substring(ProbKBData.REL_LENGTH), 142 | "1", // TODO class of x 143 | "1", // TODO class of y 144 | "" + weight 145 | }; 146 | csvContent.get(base + "/mln2.csv").add(str); 147 | } 148 | 149 | /** 150 | * p(x,y) <- q(x,z), r(y,z) 151 | * 152 | * @param pHead 153 | * @param pBodyQ 154 | * @param pBodyR 155 | * @param weight 156 | */ 157 | private void addTypeThree(String pHead, String pBodyQ, String pBodyR, 158 | double weight) { 159 | logger.trace("Adding type three: "+pHead+", "+pBodyQ+", "+pBodyR+", "+weight); 160 | String str[] = { 161 | map.getName(pHead).substring(ProbKBData.REL_LENGTH), 162 | map.getName(pBodyQ).substring(ProbKBData.REL_LENGTH), 163 | map.getName(pBodyR).substring(ProbKBData.REL_LENGTH), 164 | "1", // TODO class of x 165 | "1", // TODO class of y 166 | "1", // TODO class of z 167 | "" + weight 168 | }; 169 | csvContent.get(base + "/mln3.csv").add(str); 170 | } 171 | 172 | /** 173 | * p(x,y) <- q(x,z), r(z,y) 174 | * @param pHead 175 | * @param pBodyQ 176 | * @param pBodyR 177 | * @param weight 178 | */ 179 | private void addTypeFour(String pHead, String pBodyQ, String pBodyR, 180 | double weight) { 181 | logger.trace("Adding type four: "+pHead+", "+pBodyQ+", "+pBodyR+", "+weight); 182 | String str[] = { 183 | map.getName(pHead).substring(ProbKBData.REL_LENGTH), 184 | map.getName(pBodyQ).substring(ProbKBData.REL_LENGTH), 185 | map.getName(pBodyR).substring(ProbKBData.REL_LENGTH), 186 | "1", // TODO class of x 187 | "1", // TODO class of y 188 | "1", // TODO class of z 189 | "" + weight 190 | }; 191 | csvContent.get(base + "/mln4.csv").add(str); 192 | } 193 | 194 | /** 195 | * p(x,y) <- q(z,x), r(y,z) 196 | * @param pHead 197 | * @param pBodyQ 198 | * @param pBodyR 199 | * @param weight 200 | */ 201 | private void addTypeFive(String pHead, String pBodyQ, String pBodyR, 202 | double weight) { 203 | logger.trace("Adding type five: "+pHead+", "+pBodyQ+", "+pBodyR+", "+weight); 204 | String str[] = { 205 | map.getName(pHead).substring(ProbKBData.REL_LENGTH), 206 | map.getName(pBodyQ).substring(ProbKBData.REL_LENGTH), 207 | map.getName(pBodyR).substring(ProbKBData.REL_LENGTH), 208 | "1", // TODO class of x 209 | "1", // TODO class of y 210 | "1", // TODO class of z 211 | "" + weight 212 | }; 213 | csvContent.get(base + "/mln5.csv").add(str); 214 | } 215 | 216 | /** 217 | * p(x,y) <- q(z,x), r(z,y) 218 | * @param pHead 219 | * @param pBodyQ 220 | * @param pBodyR 221 | * @param weight 222 | */ 223 | private void addTypeSix(String pHead, String pBodyQ, String pBodyR, 224 | double weight) { 225 | logger.trace("Adding type six: "+pHead+", "+pBodyQ+", "+pBodyR+", "+weight); 226 | String str[] = { 227 | map.getName(pHead).substring(ProbKBData.REL_LENGTH), 228 | map.getName(pBodyQ).substring(ProbKBData.REL_LENGTH), 229 | map.getName(pBodyR).substring(ProbKBData.REL_LENGTH), 230 | "1", // TODO class of x 231 | "1", // TODO class of y 232 | "1", // TODO class of z 233 | "" + weight 234 | }; 235 | csvContent.get(base + "/mln6.csv").add(str); 236 | } 237 | 238 | public void buildCSV() { 239 | 240 | for(String key : csvContent.keySet()) { 241 | CSVWriter writer = null; 242 | try { 243 | writer = new CSVWriter(new FileWriter(new File(key))); 244 | for(String[] line : csvContent.get(key)) 245 | writer.writeNext(line); 246 | writer.close(); 247 | } catch (IOException e) { 248 | logger.error(e.getMessage()); 249 | // XXX RuntimeException? 250 | } 251 | } 252 | } 253 | 254 | } 255 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/rulemining/RuleMiner.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.rulemining; 2 | 3 | import java.util.Comparator; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | import java.util.TreeMap; 8 | import java.util.TreeSet; 9 | 10 | import javatools.datatypes.ByteString; 11 | 12 | import org.aksw.mandolin.controller.NameMapper; 13 | import org.aksw.mandolin.rulemining.AmieHandler.MiningStrategy; 14 | import org.apache.logging.log4j.LogManager; 15 | import org.apache.logging.log4j.Logger; 16 | 17 | import amie.rules.Rule; 18 | 19 | import com.hp.hpl.jena.vocabulary.OWL; 20 | import com.hp.hpl.jena.vocabulary.RDF; 21 | import com.hp.hpl.jena.vocabulary.RDFS; 22 | 23 | /** 24 | * @author Tommaso Soru 25 | * 26 | */ 27 | public class RuleMiner { 28 | 29 | private final static Logger logger = LogManager.getLogger(RuleMiner.class); 30 | 31 | /** 32 | * @param map 33 | * @param base 34 | * @param mining 35 | * @param maxRules 36 | * @throws Exception 37 | */ 38 | public static void run(NameMapper map, String base, Double mining, Integer maxRules) throws Exception { 39 | 40 | boolean support = (mining == null); 41 | 42 | AmieHandler h = new AmieHandler(base + "/model.tsv"); 43 | 44 | if(!support) { 45 | h.setMiningThr(mining); 46 | h.run(MiningStrategy.HEAD_COVERAGE); 47 | if(h.getRules().isEmpty()) 48 | support = true; 49 | } 50 | 51 | if(support) { 52 | h.run(MiningStrategy.SUPPORT); 53 | if(h.getRules().isEmpty()) { 54 | logger.fatal("Rules size = 0"); 55 | throw new RuntimeException("Mandolin cannot continue without MLN rules!"); 56 | } 57 | } 58 | 59 | List rules = h.getRules(); 60 | if(rules.isEmpty()) { 61 | logger.fatal("Rules size = 0"); 62 | throw new RuntimeException("Mandolin cannot continue without MLN rules!"); 63 | } 64 | 65 | TreeSet topNRules = new TreeSet<>(); 66 | if(maxRules != null) { 67 | HashMap rank = new HashMap<>(); 68 | for(Rule rule : rules) 69 | rank.put(rule.toString(), rule.getPcaConfidence()); 70 | ValueComparator bvc = new ValueComparator(rank); 71 | TreeMap sortedRank = new TreeMap(bvc); 72 | sortedRank.putAll(rank); 73 | int i=0; 74 | for(String key : sortedRank.keySet()) { 75 | topNRules.add(key); 76 | logger.trace(key + ", " + rank.get(key)); 77 | if(++i == maxRules) 78 | break; 79 | } 80 | } 81 | 82 | RuleDriver driver = new RuleDriver(map, base); 83 | 84 | for(Rule rule : rules) { 85 | 86 | if(maxRules != null) 87 | if(!topNRules.contains(rule.toString())) 88 | continue; 89 | 90 | // filter out RDF/RDFS/OWL-only rules 91 | if(isUpper(rule.getHeadRelation())) { 92 | boolean skip = true; 93 | for(ByteString[] bs : rule.getBody()) 94 | if(!isUpper(bs[1].toString())) { 95 | skip = false; 96 | break; 97 | } 98 | if(skip) { 99 | logger.trace("Skipping upper-ontology rule..."); 100 | continue; 101 | } 102 | } 103 | 104 | // send rule to driver 105 | driver.process(rule); 106 | // print rule information 107 | printInfo(rule); 108 | } 109 | 110 | // make CSVs 111 | driver.buildCSV(); 112 | 113 | } 114 | 115 | /** 116 | * @param rule 117 | */ 118 | private static void printInfo(Rule rule) { 119 | String str = ""; 120 | for(ByteString[] bs : rule.getBody()) { 121 | String bstr = ""; 122 | for(ByteString b : bs) 123 | bstr += b + ","; 124 | str += bstr + " | "; 125 | } 126 | logger.info(rule.getHeadRelation() + "\t" + str + "\t" + rule.getPcaConfidence()); 127 | } 128 | 129 | /** 130 | * @param headRelation 131 | * @return 132 | */ 133 | private static boolean isUpper(String headRelation) { 134 | if(headRelation.startsWith(OWL.NS)) 135 | return true; 136 | if(headRelation.startsWith(RDF.getURI())) 137 | return true; 138 | if(headRelation.startsWith(RDFS.getURI())) 139 | return true; 140 | return false; 141 | } 142 | 143 | } 144 | 145 | class ValueComparator implements Comparator { 146 | 147 | Map base; 148 | 149 | public ValueComparator(Map base) { 150 | this.base = base; 151 | } 152 | 153 | // Note: this comparator imposes orderings that are inconsistent with 154 | // equals. 155 | public int compare(String a, String b) { 156 | if (base.get(a) >= base.get(b)) { 157 | return -1; 158 | } else { 159 | return 1; 160 | } // returning 0 would merge keys 161 | } 162 | 163 | } 164 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/semantifier/Commons.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.semantifier; 2 | 3 | import java.io.FileOutputStream; 4 | 5 | import com.hp.hpl.jena.query.Query; 6 | import com.hp.hpl.jena.query.QueryExecution; 7 | import com.hp.hpl.jena.query.QueryExecutionFactory; 8 | import com.hp.hpl.jena.query.QueryFactory; 9 | import com.hp.hpl.jena.query.ResultSet; 10 | import com.hp.hpl.jena.query.Syntax; 11 | import com.hp.hpl.jena.rdf.model.Model; 12 | import com.hp.hpl.jena.rdf.model.Property; 13 | import com.hp.hpl.jena.rdf.model.Resource; 14 | import com.hp.hpl.jena.rdf.model.ResourceFactory; 15 | 16 | /** 17 | * Database builder common constants and methods. 18 | * 19 | * @author Tommaso Soru 20 | * 21 | */ 22 | public class Commons { 23 | 24 | // SPARQL 25 | // used to be "http://dblp.l3s.de/d2r/sparql" 26 | public static final String DBLPL3S_ENDPOINT = "http://139.18.8.97:8890/sparql"; 27 | // used to be "" 28 | public static final String DBLPL3S_GRAPH = "http://dblp.l3s.de"; 29 | public static final String ACMRKB_ENDPOINT = "http://139.18.8.97:8890/sparql"; 30 | public static final String ACMRKB_GRAPH = "http://acm.rkbexplorer.com"; 31 | 32 | public static final String DBLP_NAMESPACE = "http://dblp.rkbexplorer.com/id/"; 33 | public static final String DBLPL3S_NAMESPACE = "http://dblp.l3s.de/d2r/resource/publications/"; 34 | public static final String ACMRKB_NAMESPACE = "http://acm.rkbexplorer.com/id/"; 35 | public static final String LINKEDACM_NAMESPACE = "http://mandolin.aksw.org/acm/"; 36 | 37 | public static final String OLD_AUTHOR_PREFIX = "http://acm.rkbexplorer.com/id/person-"; 38 | 39 | // URIs 40 | public static final Resource DBLPL3S_PUBLICATION_CLASS = ResourceFactory 41 | .createResource("http://xmlns.com/foaf/0.1/Document"); 42 | public static final Resource ACMRKB_PUBLICATION_CLASS = ResourceFactory 43 | .createResource("http://www.aktors.org/ontology/portal#Article-Reference"); 44 | public static final Resource DBLPL3S_AUTHOR_CLASS = ResourceFactory 45 | .createResource("http://xmlns.com/foaf/0.1/Agent"); 46 | public static final Resource ACMRKB_AUTHOR_CLASS = ResourceFactory 47 | .createResource("http://www.aktors.org/ontology/portal#Person"); 48 | public static final Property RDF_TYPE = ResourceFactory 49 | .createProperty("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"); 50 | public static final Property OWL_SAMEAS = ResourceFactory 51 | .createProperty("http://www.w3.org/2002/07/owl#sameAs"); 52 | public static final Property RDFS_LABEL = ResourceFactory 53 | .createProperty("http://www.w3.org/2000/01/rdf-schema#label"); 54 | public static final Property HAS_AUTHOR = ResourceFactory 55 | .createProperty("http://www.aktors.org/ontology/portal#has-author"); 56 | public static final Property HAS_TITLE = ResourceFactory 57 | .createProperty("http://www.aktors.org/ontology/portal#has-title"); 58 | public static final Property FULL_NAME = ResourceFactory 59 | .createProperty("http://www.aktors.org/ontology/portal#full-name"); 60 | public static final Property DC_CREATOR = ResourceFactory 61 | .createProperty("http://purl.org/dc/elements/1.1/creator"); 62 | 63 | 64 | // I/O 65 | public static final String DBLP_ACM_CSV = "mappings/dblp-acm.csv"; 66 | public static final String DBLP_ACM_FIXED_CSV = "mappings/dblp-acm-fixed.csv"; 67 | public static final String DBLP_ACM_REMOVED_CSV = "tmp/removed-publications.csv"; 68 | 69 | public static final String PUBS_WITH_AUTHORS_MAP = "tmp/pubs-with-authors.dblp-l3s.map"; 70 | public static final String AUTHORS_SAMEAS_MAP = "tmp/authors-sameas.map"; 71 | 72 | public static final String TO_BE_DELETED_ID = "tmp/to-be-deleted-id.txt"; 73 | public static final String TO_BE_DELETED = "tmp/to-be-deleted.txt"; 74 | public static final String DISTANCES_CSV = "tmp/distances.csv"; 75 | 76 | public static final String LINKEDACM_NT = "datasets/LinkedACM.nt"; 77 | public static final String DBLPL3S_LINKEDACM_NT = "linksets/DBLPL3S-LinkedACM.nt"; 78 | public static final String DBLPL3S_NT = "datasets/DBLPL3S.nt"; 79 | 80 | 81 | 82 | /** 83 | * Perform SPARQL query against an endpoint on a given graph. 84 | * 85 | * @param query 86 | * @param endpoint 87 | * @param graph 88 | * @return 89 | */ 90 | public static ResultSet sparql(String query, String endpoint, String graph) { 91 | 92 | Query sparqlQuery = QueryFactory.create(query, Syntax.syntaxARQ); 93 | QueryExecution qexec = QueryExecutionFactory.sparqlService(endpoint, 94 | sparqlQuery, graph); 95 | return qexec.execSelect(); 96 | 97 | } 98 | 99 | /** 100 | * Save the model to N-Triple file. 101 | * 102 | * @param m 103 | * @param name 104 | */ 105 | public static void save(Model m, String name) { 106 | 107 | // save to TURTLE/N3 108 | try { 109 | FileOutputStream fout = new FileOutputStream(name); 110 | m.write(fout, "N-TRIPLES"); 111 | fout.close(); 112 | } catch (Exception e) { 113 | System.out.println("Exception caught" + e.getMessage()); 114 | e.printStackTrace(); 115 | } 116 | 117 | } 118 | 119 | } 120 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/semantifier/DatasetBuildFixer.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.semantifier; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.FileReader; 6 | import java.io.FileWriter; 7 | import java.io.IOException; 8 | import java.io.PrintWriter; 9 | import java.util.HashMap; 10 | import java.util.Scanner; 11 | import java.util.TreeSet; 12 | 13 | import org.simmetrics.metrics.Levenshtein; 14 | 15 | import com.hp.hpl.jena.query.QuerySolution; 16 | import com.hp.hpl.jena.query.ResultSet; 17 | import com.opencsv.CSVReader; 18 | import com.opencsv.CSVWriter; 19 | 20 | /** 21 | * Removes faulty mappings from the gold standard, e.g. when the authors cannot 22 | * be linked because one of them is missing in one dataset. 23 | * 24 | * @author Tommaso Soru 25 | * 26 | */ 27 | public class DatasetBuildFixer { 28 | 29 | public static void main(String[] args) throws IOException, ClassNotFoundException { 30 | 31 | // System.out.println(new Levenshtein().distance("Query Execution Techniques for Caching Expensive Methods.", "2Q")); 32 | 33 | new DatasetBuildFixer().run(); 34 | new DatasetBuildFixer().fix(); 35 | } 36 | 37 | public void fix() throws IOException { 38 | 39 | TreeSet ids = new TreeSet<>(); 40 | Scanner in = new Scanner(new File(Commons.TO_BE_DELETED_ID)); 41 | while (in.hasNextLine()) 42 | ids.add(in.nextLine()); 43 | in.close(); 44 | 45 | System.out.println("-----------\n"+ids); 46 | 47 | CSVReader reader = new CSVReader(new FileReader(new File(Commons.DBLP_ACM_CSV))); 48 | CSVWriter writer = new CSVWriter(new FileWriter(new File(Commons.DBLP_ACM_FIXED_CSV))); 49 | CSVWriter removed = new CSVWriter(new FileWriter(new File(Commons.DBLP_ACM_REMOVED_CSV))); 50 | String[] nextLine = reader.readNext(); 51 | writer.writeNext(nextLine); 52 | removed.writeNext(nextLine); 53 | while ((nextLine = reader.readNext()) != null) { 54 | if(ids.contains(nextLine[1])) { 55 | removed.writeNext(nextLine); 56 | System.out.println("Removed: "+nextLine[0]+" | "+nextLine[1]); 57 | } else 58 | writer.writeNext(nextLine); 59 | } 60 | removed.close(); 61 | writer.close(); 62 | reader.close(); 63 | 64 | } 65 | 66 | public void run() throws FileNotFoundException { 67 | 68 | TreeSet blacklist = new TreeSet<>(); 69 | PrintWriter pw = new PrintWriter(new File(Commons.TO_BE_DELETED_ID)); 70 | 71 | // get list of faulty authors 72 | TreeSet pairs = new TreeSet<>(); 73 | Scanner in = new Scanner(new File(Commons.TO_BE_DELETED)); 74 | while (in.hasNextLine()) 75 | pairs.add(in.nextLine()); 76 | in.close(); 77 | 78 | for (String pair : pairs) { 79 | String dblp = pair.split(",")[0]; 80 | String acm = pair.split(",")[1]; 81 | 82 | System.out.println(dblp+" | "+acm); 83 | 84 | // query for DBLP-L3S publications 85 | HashMap dblpLabelToURI = new HashMap<>(); 86 | ResultSet rs1 = Commons.sparql( 87 | "select ?p ?t where { ?p <"+Commons.DC_CREATOR+"> <" + dblp 88 | + "> . ?p <"+Commons.RDFS_LABEL+"> ?t }", 89 | Commons.DBLPL3S_ENDPOINT, Commons.DBLPL3S_GRAPH); 90 | while(rs1.hasNext()) { 91 | QuerySolution qs = rs1.next(); 92 | dblpLabelToURI.put(qs.getLiteral("t").getString(), qs.getResource("p").getURI()); 93 | } 94 | 95 | // query for ACM publications 96 | HashMap acmLabelToURI = new HashMap<>(); 97 | ResultSet rs2 = Commons.sparql( 98 | "select ?p ?t where { ?p <"+Commons.HAS_AUTHOR+"> <" + acm 99 | + "> . ?p <"+Commons.HAS_TITLE+"> ?t }", 100 | Commons.ACMRKB_ENDPOINT, Commons.ACMRKB_GRAPH); 101 | while(rs2.hasNext()) { 102 | QuerySolution qs = rs2.next(); 103 | acmLabelToURI.put(qs.getLiteral("t").getString(), qs.getResource("p").getURI()); 104 | } 105 | 106 | // Round-Robin among labels, checking also for substrings (e.g., to cut off undertitles) 107 | float dMin = Float.MAX_VALUE, dMinSub = Float.MAX_VALUE; 108 | String l1min = null, l2min = null, l1minSub = null, l2minSub = null; 109 | Levenshtein lev = new Levenshtein(); 110 | for(String l1 : dblpLabelToURI.keySet()) { 111 | for(String l2 : acmLabelToURI.keySet()) { 112 | float d = lev.distance(l1.toLowerCase(), l2.toLowerCase()); 113 | if(d < dMin) { 114 | dMin = d; 115 | l1min = l1; 116 | l2min = l2; 117 | } 118 | for(int i=0; i 2.0) { 138 | System.out.println("Using substring comparison (dMin = "+dMin+")"); 139 | dMin = dMinSub; 140 | l1min = l1minSub; 141 | l2min = l2minSub; 142 | } 143 | 144 | 145 | // add publications to the blacklist 146 | System.out.println("DISTANCE = " + dMin + "\n" + l1min + "\n" + l2min); 147 | String l2URI = acmLabelToURI.get(l2min); 148 | System.out.println("URI: "+l2URI + "\n"); 149 | blacklist.add(l2URI.substring(l2URI.lastIndexOf("/") + 1)); 150 | 151 | // break; 152 | } 153 | 154 | System.out.println(blacklist); 155 | for(String id : blacklist) 156 | pw.write(id+"\n"); 157 | pw.close(); 158 | } 159 | 160 | } 161 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/semantifier/DatasetBuildSatellites.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.semantifier; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.FileOutputStream; 6 | import java.util.TreeSet; 7 | 8 | import org.apache.jena.riot.Lang; 9 | import org.apache.jena.riot.RDFDataMgr; 10 | import org.apache.jena.riot.system.StreamRDF; 11 | import org.apache.jena.riot.system.StreamRDFWriter; 12 | 13 | import com.hp.hpl.jena.graph.Triple; 14 | import com.hp.hpl.jena.query.Query; 15 | import com.hp.hpl.jena.query.QueryExecution; 16 | import com.hp.hpl.jena.query.QueryExecutionFactory; 17 | import com.hp.hpl.jena.query.QueryFactory; 18 | import com.hp.hpl.jena.query.Syntax; 19 | import com.hp.hpl.jena.rdf.model.Model; 20 | import com.hp.hpl.jena.rdf.model.StmtIterator; 21 | import com.hp.hpl.jena.sparql.core.Quad; 22 | import com.hp.hpl.jena.vocabulary.OWL; 23 | import com.hp.hpl.jena.vocabulary.RDF; 24 | 25 | /** 26 | * Add datatype properties and satellites (URIs belonging to the CBD) for each 27 | * author. The only tolerated predicates for satellites are defined in the 28 | * 'predicates' set. 29 | * 30 | * @author Tommaso Soru 31 | * 32 | */ 33 | public class DatasetBuildSatellites { 34 | 35 | private static final String ENDPOINT = "http://localhost:8890/sparql"; 36 | 37 | // private static final String GRAPH = "http://mandolin.aksw.org/acm"; 38 | // private static final String FILE = "LinkedACM-10.nt"; 39 | // private static final String ARTICLE = "http://www.aktors.org/ontology/portal#Article-Reference"; 40 | 41 | private static final String GRAPH = "http://dblp.l3s.de"; 42 | private static final String FILE = "DBLPL3S.nt"; 43 | private static final String ARTICLE = "http://xmlns.com/foaf/0.1/Document"; 44 | 45 | 46 | private static TreeSet predicates = new TreeSet<>(); 47 | 48 | static { 49 | // tolerate only these two types of object properties of satellites 50 | predicates.add(RDF.type.getURI()); 51 | predicates.add(OWL.sameAs.getURI()); 52 | } 53 | 54 | public static void main(String[] args) { 55 | 56 | run(); 57 | deduplicate(); 58 | 59 | } 60 | 61 | public static void deduplicate() { 62 | 63 | File old = new File("datasets2/" + FILE); 64 | 65 | Model m = RDFDataMgr.loadModel(old.getPath()); 66 | System.out.println("Model has "+m.size()+" deduplicated triples."); 67 | File tmp = new File("datasets2/tmp_" + FILE); 68 | try { 69 | m.write(new FileOutputStream(tmp), "N-TRIPLE"); 70 | } catch (FileNotFoundException e) { 71 | e.printStackTrace(); 72 | return; 73 | } 74 | 75 | old.delete(); 76 | tmp.renameTo(old); 77 | 78 | System.out.println("Done."); 79 | 80 | } 81 | 82 | public static void run() { 83 | 84 | new File("datasets2/").mkdirs(); 85 | 86 | FileOutputStream output; 87 | try { 88 | output = new FileOutputStream(new File("datasets2/" + FILE)); 89 | } catch (FileNotFoundException e) { 90 | e.printStackTrace(); 91 | return; 92 | } 93 | 94 | final StreamRDF writer = StreamRDFWriter.getWriterStream(output, 95 | Lang.NT); 96 | 97 | TreeSet articleIDs = new TreeSet<>(); 98 | TreeSet satelliteIDs = new TreeSet<>(); 99 | 100 | // stream dataset 101 | // search for ?s a 102 | // collect article IDs 103 | collectWrite(articleIDs, writer); 104 | 105 | System.out.println("file = " + FILE); 106 | System.out.println("articles = " + articleIDs.size()); 107 | 108 | // for each article ID: 109 | // add its CBD and 110 | // collect satellite IDs 111 | for (String a : articleIDs) { 112 | System.out.print(a + "..."); 113 | cbd(a, writer, articleIDs, satelliteIDs, true); 114 | System.out.println(" OK"); 115 | } 116 | 117 | System.out.println(); 118 | 119 | System.out.println("satellites = " + satelliteIDs.size()); 120 | 121 | // for each satellite ID: 122 | // launch describe query 123 | // write out triples 124 | for (String aut : satelliteIDs) { 125 | System.out.print(aut + "..."); 126 | boolean success = cbd(aut, writer, articleIDs, satelliteIDs, false); 127 | if(success) 128 | System.out.println(" OK"); 129 | else 130 | System.out.println(" skipped"); 131 | } 132 | 133 | writer.finish(); 134 | System.out.println("\nDone."); 135 | 136 | } 137 | 138 | private static boolean cbd(String uri, StreamRDF writer, 139 | TreeSet articleIDs, TreeSet satelliteIDs, 140 | boolean addAll) { 141 | String query = "DESCRIBE <" + uri + ">"; 142 | Query sparqlQuery = QueryFactory.create(query, Syntax.syntaxARQ); 143 | QueryExecution qexec = QueryExecutionFactory.sparqlService(ENDPOINT, 144 | sparqlQuery, GRAPH); 145 | Model m; 146 | try { 147 | m = qexec.execDescribe(); 148 | } catch (Exception e1) { 149 | return false; 150 | } 151 | StmtIterator it = m.listStatements(); 152 | while (it.hasNext()) { 153 | Triple t = it.next().asTriple(); 154 | 155 | if (addAll) { 156 | writer.triple(t); 157 | String s = t.getSubject().getURI(); 158 | boolean isUri = t.getObject().isURI(); 159 | if (isUri) { 160 | String o = t.getObject().getURI(); 161 | if (s.equals(uri)) 162 | satelliteIDs.add(o); 163 | if (o.equals(uri)) 164 | satelliteIDs.add(s); 165 | } 166 | 167 | } else { 168 | String s = t.getSubject().getURI(); 169 | String p = t.getPredicate().getURI(); 170 | boolean isUri = t.getObject().isURI(); 171 | 172 | if (!isUri) { 173 | writer.triple(t); 174 | } else { 175 | String o = t.getObject().getURI(); 176 | if (articleIDs.contains(o)) 177 | writer.triple(t); 178 | else if (articleIDs.contains(s)) 179 | writer.triple(t); 180 | else if (predicates.contains(p)) 181 | writer.triple(t); 182 | } 183 | } 184 | 185 | } 186 | return true; 187 | } 188 | 189 | private static void collectWrite(TreeSet articleIDs, 190 | StreamRDF writer) { 191 | 192 | StreamRDF dataStream = new StreamRDF() { 193 | 194 | @Override 195 | public void start() { 196 | writer.start(); 197 | } 198 | 199 | @Override 200 | public void triple(Triple triple) { 201 | 202 | if (triple.getPredicate().getURI().equals(RDF.type.getURI())) 203 | if (triple.getObject().getURI().equals(ARTICLE)) 204 | articleIDs.add(triple.getSubject().getURI()); 205 | 206 | } 207 | 208 | @Override 209 | public void quad(Quad quad) { 210 | } 211 | 212 | @Override 213 | public void base(String base) { 214 | } 215 | 216 | @Override 217 | public void prefix(String prefix, String iri) { 218 | } 219 | 220 | @Override 221 | public void finish() { 222 | // finishes later 223 | } 224 | 225 | }; 226 | 227 | RDFDataMgr.parse(dataStream, "datasets/" + FILE); 228 | 229 | } 230 | 231 | } 232 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/semantifier/DatasetBuildStarter.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.semantifier; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.IOException; 6 | import java.io.Serializable; 7 | import java.util.ArrayList; 8 | import java.util.List; 9 | import java.util.Scanner; 10 | 11 | import org.aksw.mandolin.util.DataIO; 12 | 13 | import com.hp.hpl.jena.query.QuerySolution; 14 | import com.hp.hpl.jena.query.ResultSet; 15 | 16 | /** 17 | * Build the Commons.PUBS_WITH_AUTHORS_MAP containing all DBLPL3S publications 18 | * with their respective authors, if any. The process could have been carried 19 | * out by replacing namespaces (RKBExplorer to L3S), however this is a safer way 20 | * to do it. 21 | * 22 | * @author Tommaso Soru 23 | * 24 | */ 25 | public class DatasetBuildStarter { 26 | 27 | public static void main(String[] args) throws IOException, 28 | ClassNotFoundException { 29 | new DatasetBuildStarter().run(); 30 | } 31 | 32 | public void run() throws IOException, ClassNotFoundException { 33 | 34 | ArrayList data = new ArrayList<>(); 35 | 36 | for (String rkbURI : getRKBURIs()) { 37 | 38 | Elements e = getElements(rkbURI, Commons.DC_CREATOR.getURI(), 39 | Commons.DBLPL3S_ENDPOINT, Commons.DBLPL3S_GRAPH); 40 | 41 | System.out.println(e.getURI()); 42 | 43 | if (e.getURI() != null) { 44 | // should always happen 45 | data.add(e); 46 | } 47 | 48 | } 49 | 50 | DataIO.serialize(data, Commons.PUBS_WITH_AUTHORS_MAP); 51 | 52 | } 53 | 54 | /** 55 | * Get publication URIs from the perfect mapping. 56 | * 57 | * @return 58 | * @throws FileNotFoundException 59 | */ 60 | private ArrayList getRKBURIs() throws FileNotFoundException { 61 | 62 | ArrayList list = new ArrayList<>(); 63 | 64 | Scanner in = new Scanner(new File(Commons.DBLP_ACM_CSV)); 65 | in.nextLine(); // skip header 66 | while (in.hasNextLine()) { 67 | String[] line = in.nextLine().split(","); 68 | String rkb = line[0].replaceAll("\"", ""); 69 | list.add(Commons.DBLP_NAMESPACE + rkb); 70 | } 71 | in.close(); 72 | 73 | return list; 74 | } 75 | 76 | /** 77 | * Get the publication associated with a list of elements (e.g., authors). 78 | * 79 | * @param rkbURI 80 | * @param relation 81 | * @param endpoint 82 | * @return 83 | */ 84 | private Elements getElements(String rkbURI, String relation, String endpoint, String graph) { 85 | 86 | String query = "SELECT ?cr ?pub WHERE { ?pub <" + Commons.OWL_SAMEAS 87 | + "> <" + rkbURI + "> ; <" + relation + "> ?cr }"; 88 | System.out.println(query); 89 | 90 | ResultSet rs = Commons.sparql(query, endpoint, graph); 91 | 92 | ArrayList list = new ArrayList<>(); 93 | String l3sURI = null; 94 | 95 | while (rs.hasNext()) { 96 | QuerySolution qs = rs.next(); 97 | l3sURI = qs.getResource("?pub").getURI(); 98 | list.add(qs.getResource("?cr").getURI()); 99 | } 100 | 101 | Elements elem; 102 | 103 | if (l3sURI == null) { 104 | elem = getElementsNoCreator(rkbURI, relation, endpoint, graph); 105 | } else { 106 | elem = new Elements(l3sURI); 107 | elem.setElements(list); 108 | } 109 | 110 | return elem; 111 | 112 | } 113 | 114 | private Elements getElementsNoCreator(String rkbURI, String relation, 115 | String endpoint, String graph) { 116 | String query = "SELECT ?pub WHERE { ?pub <" + Commons.OWL_SAMEAS 117 | + "> <" + rkbURI + "> }"; 118 | System.out.println(query); 119 | 120 | ResultSet rs = Commons.sparql(query, endpoint, graph); 121 | 122 | String l3sURI = null; 123 | 124 | while (rs.hasNext()) { 125 | l3sURI = rs.next().getResource("?pub").getURI(); 126 | } 127 | 128 | Elements elem = new Elements(l3sURI); 129 | elem.setElements(new ArrayList<>()); 130 | 131 | return elem; 132 | } 133 | 134 | } 135 | 136 | class Elements implements Serializable { 137 | 138 | private static final long serialVersionUID = -4523439946804741035L; 139 | 140 | private String uri; 141 | private List elements; 142 | 143 | public void setElements(List elements) { 144 | this.elements = elements; 145 | } 146 | 147 | Elements(String uri) { 148 | this.uri = uri; 149 | elements = new ArrayList(); 150 | } 151 | 152 | public String getURI() { 153 | return uri; 154 | } 155 | 156 | public List getElements() { 157 | return elements; 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/semantifier/DatasetBuilderAlgorithm.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.semantifier; 2 | 3 | import java.io.BufferedInputStream; 4 | import java.io.ByteArrayOutputStream; 5 | import java.io.File; 6 | import java.io.FileNotFoundException; 7 | import java.io.FileOutputStream; 8 | import java.io.IOException; 9 | import java.io.InputStream; 10 | import java.io.PrintWriter; 11 | import java.net.URL; 12 | import java.util.ArrayList; 13 | import java.util.HashMap; 14 | import java.util.Scanner; 15 | import java.util.TreeSet; 16 | 17 | import org.aksw.mandolin.util.DataIO; 18 | import org.apache.jena.riot.RDFDataMgr; 19 | import org.apache.jena.riot.RiotNotFoundException; 20 | import org.simmetrics.metrics.Levenshtein; 21 | 22 | import com.hp.hpl.jena.query.QuerySolution; 23 | import com.hp.hpl.jena.query.ResultSet; 24 | import com.hp.hpl.jena.rdf.model.Model; 25 | import com.hp.hpl.jena.rdf.model.NodeIterator; 26 | import com.hp.hpl.jena.rdf.model.ResourceFactory; 27 | 28 | /** 29 | * @author Tommaso Soru 30 | * 31 | */ 32 | public class DatasetBuilderAlgorithm { 33 | 34 | private int N_EXAMPLES; 35 | 36 | public DatasetBuilderAlgorithm(int n) { 37 | this.N_EXAMPLES = n; 38 | } 39 | 40 | public static void main(String[] args) throws FileNotFoundException, 41 | ClassNotFoundException, IOException { 42 | new DatasetBuilderAlgorithm(100).run(); 43 | } 44 | 45 | public void run() throws FileNotFoundException, ClassNotFoundException, 46 | IOException { 47 | 48 | // load DBLP l3s to ACM rkb 49 | HashMap l3sMap = l3sToACMRkb(); 50 | 51 | // build reverse map 52 | HashMap> map = new HashMap<>(); 53 | 54 | ArrayList data = DataIO 55 | .readList(Commons.PUBS_WITH_AUTHORS_MAP); 56 | for (Elements e : data) { 57 | System.out.println(e.getURI()); 58 | // TODO remove me! 59 | if (!l3sMap.containsKey(e.getURI())) 60 | continue; 61 | 62 | for (String el : e.getElements()) { 63 | TreeSet pubSet; 64 | if (map.containsKey(el)) 65 | pubSet = map.get(el); 66 | else { 67 | pubSet = new TreeSet<>(); 68 | map.put(el, pubSet); 69 | } 70 | pubSet.add(e.getURI()); 71 | } 72 | } 73 | 74 | HashMap> sameAsMap = new HashMap<>(); 75 | 76 | PrintWriter pw = new PrintWriter(new File(Commons.DISTANCES_CSV)); 77 | 78 | // algorithm starts here 79 | for (String author : map.keySet()) { 80 | 81 | String authorName = getName(author); 82 | 83 | System.out.println("Listing " + authorName + " (" + author + "): " 84 | + map.get(author)); 85 | 86 | TreeSet sameAs = new TreeSet<>(); 87 | 88 | nextPub: for (String l3s : map.get(author)) { 89 | 90 | System.out.println("L3S: " + l3s); 91 | 92 | String acmRkb = l3sMap.get(l3s); 93 | 94 | float distMin = Float.MAX_VALUE; 95 | Entity entity = null; 96 | 97 | ArrayList rkb; 98 | final int MAX_TRIES = 3; 99 | int tries = 0; 100 | do { 101 | 102 | rkb = getCreators(acmRkb); 103 | tries++; 104 | for (Entity e : rkb) { 105 | Levenshtein lev = new Levenshtein(); 106 | float d = lev.distance(authorName, e.getLabel()); 107 | if (d <= distMin) { 108 | distMin = d; 109 | entity = e; 110 | } 111 | System.out.println("d(" + authorName + ", " 112 | + e.getLabel() + ") = " + d); 113 | } 114 | 115 | if (entity == null) { 116 | System.out.println("URI " + acmRkb 117 | + " is deprecated or has issues."); 118 | 119 | acmRkb = getRedirect(acmRkb.substring(acmRkb 120 | .lastIndexOf('/') + 1)); 121 | 122 | if(acmRkb == null) { 123 | System.out.println("*** No redirects available. Skipping "+l3s); 124 | continue nextPub; 125 | } 126 | System.out.println("*** Redirected to: " + acmRkb); 127 | 128 | } 129 | 130 | } while (rkb.isEmpty() && tries < MAX_TRIES); 131 | 132 | if (distMin >= 5.0) 133 | pw.write(authorName + "," + entity.getLabel() + "," 134 | + author + "," + entity.getUri() + "\n"); 135 | 136 | System.out.println("sameAs = " + entity.getUri()); 137 | sameAs.add(entity.getUri()); 138 | 139 | } 140 | 141 | if (!sameAs.isEmpty()) 142 | sameAsMap.put( 143 | Commons.LINKEDACM_NAMESPACE + author.substring(32), 144 | new ArrayList<>(sameAs)); 145 | else 146 | System.out.println("*** " + Commons.LINKEDACM_NAMESPACE 147 | + author.substring(32) + " had an empty sameAs set."); 148 | 149 | // System.out.println(sameAsMap); 150 | // break; 151 | 152 | } 153 | 154 | pw.close(); 155 | 156 | DataIO.serialize(sameAsMap, Commons.AUTHORS_SAMEAS_MAP); 157 | 158 | } 159 | 160 | private String getRedirect(String acmID) { 161 | 162 | // get remote file 163 | String uri = Commons.ACMRKB_NAMESPACE + acmID; 164 | String fileIn = "http://acm.rkbexplorer.com/data/" + acmID; 165 | String fileOut = "tmp/" + acmID + ".rdf"; 166 | try { 167 | download(fileIn, fileOut); 168 | } catch (IOException e) { 169 | } 170 | 171 | Model model = null; 172 | try { 173 | model = RDFDataMgr.loadModel(fileOut); 174 | } catch (RiotNotFoundException e) { 175 | // There is no information about the requested URI in this repository. 176 | return null; 177 | } 178 | NodeIterator it = model.listObjectsOfProperty( 179 | ResourceFactory.createResource(uri), Commons.OWL_SAMEAS); 180 | 181 | if (it.hasNext()) 182 | return it.nextNode().asResource().getURI(); 183 | 184 | return null; 185 | } 186 | 187 | private void download(String url, String file) throws IOException { 188 | URL link = new URL(url); 189 | InputStream in = new BufferedInputStream(link.openStream()); 190 | ByteArrayOutputStream out = new ByteArrayOutputStream(); 191 | byte[] buf = new byte[1024]; 192 | int n = 0; 193 | while (-1 != (n = in.read(buf))) { 194 | out.write(buf, 0, n); 195 | } 196 | out.close(); 197 | in.close(); 198 | byte[] response = out.toByteArray(); 199 | 200 | FileOutputStream fos = new FileOutputStream(file); 201 | fos.write(response); 202 | fos.close(); 203 | } 204 | 205 | private String getName(String uri) { 206 | 207 | String query = "SELECT * WHERE { <" + uri + "> <" + Commons.RDFS_LABEL 208 | + "> ?l }"; 209 | System.out.println(query); 210 | 211 | ResultSet rs = Commons.sparql(query, Commons.DBLPL3S_ENDPOINT, Commons.DBLPL3S_GRAPH); 212 | 213 | if (rs.hasNext()) { 214 | QuerySolution qs = rs.next(); 215 | return qs.getLiteral("?l").getString(); 216 | } 217 | 218 | return ""; 219 | } 220 | 221 | private ArrayList getCreators(String acmRkb) { 222 | 223 | String query = "SELECT DISTINCT * WHERE { <" + acmRkb + "> " + "<" 224 | + Commons.HAS_AUTHOR + "> ?s . " + "?s <" + Commons.FULL_NAME 225 | + "> ?l }"; 226 | System.out.println(query); 227 | 228 | ResultSet rs = Commons.sparql(query, Commons.ACMRKB_ENDPOINT, 229 | Commons.ACMRKB_GRAPH); 230 | 231 | ArrayList ent = new ArrayList<>(); 232 | 233 | while (rs.hasNext()) { 234 | QuerySolution qs = rs.next(); 235 | Entity e = new Entity(qs.getResource("?s").getURI(), qs.getLiteral( 236 | "?l").getString()); 237 | ent.add(e); 238 | } 239 | 240 | return ent; 241 | } 242 | 243 | private HashMap l3sToACMRkb() throws FileNotFoundException { 244 | HashMap map = new HashMap<>(); 245 | 246 | Scanner in = new Scanner(new File(Commons.DBLP_ACM_CSV)); 247 | in.nextLine(); 248 | int i = 0; 249 | while (in.hasNextLine()) { 250 | String[] line = in.nextLine().split(","); 251 | map.put(Commons.DBLPL3S_NAMESPACE + line[0].replaceAll("\"", ""), 252 | Commons.ACMRKB_NAMESPACE + line[1].replaceAll("\"", "")); 253 | if (++i == N_EXAMPLES) 254 | break; 255 | } 256 | in.close(); 257 | 258 | return map; 259 | } 260 | 261 | } 262 | 263 | class Entity { 264 | 265 | String uri, label; 266 | 267 | Entity(String uri, String label) { 268 | this.uri = uri; 269 | this.label = label; 270 | } 271 | 272 | public String getUri() { 273 | return uri; 274 | } 275 | 276 | public String getLabel() { 277 | return label; 278 | } 279 | 280 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/semantifier/SemantifierPipeline.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.semantifier; 2 | 3 | import java.io.IOException; 4 | 5 | /** 6 | * 7 | * @author Tommaso Soru 8 | * 9 | */ 10 | public class SemantifierPipeline { 11 | 12 | public static void main(String[] args) throws ClassNotFoundException, 13 | IOException { 14 | 15 | int n = Integer.parseInt(args[0]); 16 | 17 | if (args[1].equals("part1")) { 18 | 19 | new DatasetBuildStarter().run(); 20 | new DatasetBuilderAlgorithm(n).run(); 21 | 22 | } else if (args[1].equals("part2")) { 23 | 24 | System.out.println("SECTION START: FIXER"); 25 | DatasetBuildFixer fixr = new DatasetBuildFixer(); 26 | fixr.run(); 27 | fixr.fix(); 28 | System.out.println("SECTION START: SEMANTIFIER"); 29 | DatasetBuildSemantifier semr = new DatasetBuildSemantifier(n); 30 | semr.linkedDBLP(); 31 | semr.mapping(); 32 | // semr.linkedACM(); 33 | DatasetBuildSatellites.run(); 34 | // System.out.println("SECTION START: CLOSURE"); 35 | // DatasetBuildClosure clsr = new DatasetBuildClosure(); 36 | // clsr.runReflSymmTransClosure(); 37 | 38 | } else 39 | System.out.println("Second argument is {part1, part2}."); 40 | 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/util/Bundle.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.util; 2 | 3 | import java.io.FileInputStream; 4 | import java.io.IOException; 5 | import java.util.MissingResourceException; 6 | import java.util.ResourceBundle; 7 | 8 | /** 9 | * @author Tommaso Soru 10 | * 11 | */ 12 | public class Bundle { 13 | 14 | private static String bundleName; 15 | private static ResourceBundle resBundle; 16 | 17 | public static String getBundleName() { 18 | return bundleName; 19 | } 20 | 21 | public static void setBundleName(String bName) { 22 | bundleName = bName; 23 | resBundle = ResourceBundle.getBundle(bundleName); 24 | } 25 | 26 | public static int getArrayValue(String key, int pos) { 27 | return Integer.parseInt(getString(key).split(",")[pos]); 28 | } 29 | 30 | public static String getString(String key) { 31 | try { 32 | return resBundle.getString(key); 33 | } catch (MissingResourceException | NullPointerException e) { 34 | String str = getConfig(key); 35 | if(str != null) 36 | return str; 37 | else 38 | return '!' + key + '!'; 39 | } 40 | } 41 | 42 | private static String getConfig(String key) { 43 | // to load application's properties, we use this class 44 | java.util.Properties mainProperties = new java.util.Properties(); 45 | 46 | FileInputStream file; 47 | 48 | // the base folder is ./, the root of the main.properties file 49 | String path = "./mandolin.properties"; 50 | 51 | try { 52 | // load the file handle for main.properties 53 | file = new FileInputStream(path); 54 | 55 | // load all the properties from this file 56 | mainProperties.load(file); 57 | 58 | // we have loaded the properties, so close the file handle 59 | file.close(); 60 | // retrieve the property we are intrested 61 | return mainProperties.getProperty(key); 62 | } catch (IOException e) { 63 | return null; 64 | } 65 | 66 | } 67 | 68 | } 69 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/util/CustomQuoteMode.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.util; 2 | 3 | import org.supercsv.prefs.CsvPreference; 4 | import org.supercsv.quote.NormalQuoteMode; 5 | import org.supercsv.util.CsvContext; 6 | 7 | /** 8 | * This SuperCSV preference mode adds quotes for strings containing a space. 9 | * 10 | * @author Tommaso Soru 11 | * 12 | */ 13 | public class CustomQuoteMode extends NormalQuoteMode { 14 | 15 | public boolean quotesRequired(String csvColumn, CsvContext context, 16 | CsvPreference preference) { 17 | if (csvColumn.contains(" ")) 18 | return true; 19 | else 20 | return super.quotesRequired(csvColumn, context, preference); 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/util/DataIO.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.util; 2 | 3 | import java.io.FileInputStream; 4 | import java.io.FileNotFoundException; 5 | import java.io.FileOutputStream; 6 | import java.io.IOException; 7 | import java.io.ObjectInputStream; 8 | import java.io.ObjectOutputStream; 9 | import java.util.ArrayList; 10 | import java.util.HashMap; 11 | 12 | /** 13 | * @author Tommaso Soru 14 | * 15 | */ 16 | public class DataIO { 17 | 18 | public static void serialize(ArrayList list, String filepath) throws FileNotFoundException, IOException { 19 | ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(filepath)); 20 | oos.writeObject(list); 21 | oos.close(); 22 | } 23 | 24 | public static void serialize(HashMap map, String filepath) throws FileNotFoundException, IOException { 25 | ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(filepath)); 26 | oos.writeObject(map); 27 | oos.close(); 28 | } 29 | 30 | @SuppressWarnings("unchecked") 31 | public static ArrayList readList(String filepath) throws FileNotFoundException, IOException, ClassNotFoundException { 32 | ObjectInputStream ois = new ObjectInputStream(new FileInputStream(filepath)); 33 | ArrayList list = (ArrayList) ois.readObject(); 34 | ois.close(); 35 | return list; 36 | } 37 | 38 | @SuppressWarnings("unchecked") 39 | public static HashMap readMap(String filepath) throws FileNotFoundException, IOException, ClassNotFoundException { 40 | ObjectInputStream ois = new ObjectInputStream(new FileInputStream(filepath)); 41 | HashMap map = (HashMap) ois.readObject(); 42 | ois.close(); 43 | return map; 44 | } 45 | 46 | } 47 | 48 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/util/PostgreNotStartedException.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.util; 2 | 3 | /** 4 | * @author Tommaso Soru 5 | * 6 | */ 7 | public class PostgreNotStartedException extends RuntimeException { 8 | 9 | /** 10 | * 11 | */ 12 | private static final long serialVersionUID = 2109773019615897856L; 13 | 14 | public PostgreNotStartedException() { 15 | super(); 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/util/PrettyRandom.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.util; 2 | 3 | /** 4 | * @author Tommaso Soru 5 | * 6 | */ 7 | public class PrettyRandom { 8 | 9 | public static String get(int digits) { 10 | String r = ""; 11 | while (r.equals("") || r.length() < digits) 12 | r = String.valueOf(Math.random()).substring(2); 13 | return r.substring(0, digits); 14 | } 15 | 16 | } 17 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/util/SetUtils.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.util; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.FileOutputStream; 6 | import java.util.TreeSet; 7 | 8 | import org.apache.jena.riot.Lang; 9 | import org.apache.jena.riot.RDFDataMgr; 10 | import org.apache.jena.riot.system.StreamRDF; 11 | import org.apache.jena.riot.system.StreamRDFWriter; 12 | import org.apache.logging.log4j.LogManager; 13 | import org.apache.logging.log4j.Logger; 14 | 15 | import com.hp.hpl.jena.graph.Triple; 16 | import com.hp.hpl.jena.sparql.core.Quad; 17 | import com.hp.hpl.jena.vocabulary.OWL; 18 | import com.hp.hpl.jena.vocabulary.RDF; 19 | 20 | /** 21 | * @author Tommaso Soru 22 | * 23 | */ 24 | public class SetUtils { 25 | 26 | private final static Logger logger = LogManager.getLogger(SetUtils.class); 27 | 28 | /** 29 | * Set difference. 30 | * 31 | * @param setA 32 | * @param setB 33 | * @param output 34 | */ 35 | public static void minus(String setA, String setB, String output) { 36 | 37 | TreeSet setBindex = new TreeSet<>(); 38 | StreamRDF bStream = new StreamRDF() { 39 | 40 | @Override 41 | public void start() { 42 | } 43 | 44 | @Override 45 | public void triple(Triple triple) { 46 | setBindex.add(triple.toString()); 47 | } 48 | 49 | @Override 50 | public void quad(Quad quad) { 51 | } 52 | 53 | @Override 54 | public void base(String base) { 55 | } 56 | 57 | @Override 58 | public void prefix(String prefix, String iri) { 59 | } 60 | 61 | @Override 62 | public void finish() { 63 | } 64 | 65 | }; 66 | RDFDataMgr.parse(bStream, setB); 67 | 68 | final FileOutputStream out; 69 | final StreamRDF outStream; 70 | try { 71 | out = new FileOutputStream(new File(output)); 72 | outStream = StreamRDFWriter.getWriterStream(out, Lang.NT); 73 | } catch (FileNotFoundException e) { 74 | logger.error(e.getMessage()); 75 | return; 76 | } 77 | 78 | outStream.start(); 79 | 80 | StreamRDF aStream = new StreamRDF() { 81 | 82 | @Override 83 | public void start() { 84 | } 85 | 86 | @Override 87 | public void triple(Triple triple) { 88 | boolean trivial = triple.getPredicate().hasURI(RDF.type.getURI()) 89 | && triple.getObject().hasURI(OWL.Thing.getURI()); 90 | boolean known = setBindex.contains(triple.toString()); 91 | if(!known && !trivial) // save discovered triple 92 | outStream.triple(triple); 93 | logger.trace("\tknown=" + known + "\ttrivial=" + trivial + "\t" + triple.toString()); 94 | } 95 | @Override 96 | public void quad(Quad quad) { 97 | } 98 | 99 | @Override 100 | public void base(String base) { 101 | } 102 | 103 | @Override 104 | public void prefix(String prefix, String iri) { 105 | } 106 | 107 | @Override 108 | public void finish() { 109 | } 110 | 111 | }; 112 | RDFDataMgr.parse(aStream, setA); 113 | 114 | outStream.finish(); 115 | 116 | } 117 | 118 | /** 119 | * @param setA 120 | * @param setB 121 | * @param output 122 | */ 123 | public static void union(String setA, String setB, String output) { 124 | 125 | final FileOutputStream out; 126 | final StreamRDF outStream; 127 | try { 128 | out = new FileOutputStream(new File(output)); 129 | outStream = StreamRDFWriter.getWriterStream(out, Lang.NT); 130 | } catch (FileNotFoundException e) { 131 | logger.error(e.getMessage()); 132 | return; 133 | } 134 | 135 | outStream.start(); 136 | 137 | StreamRDF dataStream = new StreamRDF() { 138 | 139 | @Override 140 | public void start() { 141 | } 142 | 143 | @Override 144 | public void triple(Triple triple) { 145 | outStream.triple(triple); 146 | } 147 | 148 | @Override 149 | public void quad(Quad quad) { 150 | } 151 | 152 | @Override 153 | public void base(String base) { 154 | } 155 | 156 | @Override 157 | public void prefix(String prefix, String iri) { 158 | } 159 | 160 | @Override 161 | public void finish() { 162 | } 163 | 164 | }; 165 | 166 | RDFDataMgr.parse(dataStream, setA); 167 | RDFDataMgr.parse(dataStream, setB); 168 | 169 | outStream.finish(); 170 | 171 | } 172 | 173 | public static void keepOnly(String relation, String in, 174 | String out) { 175 | 176 | final FileOutputStream output; 177 | try { 178 | output = new FileOutputStream(new File(out)); 179 | } catch (FileNotFoundException e) { 180 | logger.error(e.getMessage()); 181 | return; 182 | } 183 | 184 | final StreamRDF writer = StreamRDFWriter.getWriterStream(output, Lang.NT); 185 | 186 | StreamRDF dataStream = new StreamRDF() { 187 | 188 | @Override 189 | public void start() { 190 | writer.start(); 191 | } 192 | 193 | @Override 194 | public void quad(Quad quad) { 195 | } 196 | 197 | @Override 198 | public void base(String base) { 199 | } 200 | 201 | @Override 202 | public void prefix(String prefix, String iri) { 203 | } 204 | 205 | @Override 206 | public void finish() { 207 | writer.finish(); 208 | } 209 | 210 | @Override 211 | public void triple(Triple triple) { 212 | if(triple.getPredicate().getURI().equals(relation)) 213 | writer.triple(triple); 214 | } 215 | 216 | }; 217 | 218 | RDFDataMgr.parse(dataStream, in); 219 | } 220 | 221 | } 222 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/util/Shell.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.util; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.InputStreamReader; 5 | 6 | import org.apache.logging.log4j.LogManager; 7 | import org.apache.logging.log4j.Logger; 8 | 9 | /** 10 | * @author Tommaso Soru 11 | * 12 | */ 13 | public class Shell { 14 | 15 | private final static Logger logger = LogManager.getLogger(Shell.class); 16 | 17 | /** 18 | * Execute a command which expects an output. 19 | * 20 | * @param command 21 | * @param show 22 | * @return 23 | */ 24 | public static String execute(String command, boolean show) { 25 | StringBuffer sb = new StringBuffer(); 26 | Process p; 27 | try { 28 | p = Runtime.getRuntime().exec(command); 29 | p.waitFor(); 30 | 31 | BufferedReader reader = new BufferedReader(new InputStreamReader( 32 | p.getInputStream())); 33 | 34 | String line = ""; 35 | while ((line = reader.readLine()) != null) { 36 | if (show) 37 | logger.debug(line); 38 | sb.append(line + "\n"); 39 | } 40 | } catch (Exception e) { 41 | e.printStackTrace(); 42 | } 43 | return sb.toString(); 44 | } 45 | 46 | /** 47 | * Execute a command which expects no output. 48 | * 49 | * @param command 50 | * @return 51 | */ 52 | public static String execute(String command) { 53 | return execute(command, false); 54 | } 55 | 56 | /** 57 | * @param args 58 | */ 59 | public static void main(String[] args) { 60 | logger.debug("Streamed:"); 61 | String output = execute("ls -l", true); 62 | logger.debug("\nBuffered:\n" + output); 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/util/StringClean.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.util; 2 | 3 | /** 4 | * @author Tommaso Soru <> 5 | * 6 | */ 7 | public class StringClean { 8 | 9 | public static String clean(String string) { 10 | return string.replaceAll("[^\\dA-Za-z]", ""); 11 | } 12 | 13 | public static String oneRow(String string) { 14 | return string.replaceAll("\n", " ").replaceAll("\t", " "); 15 | } 16 | 17 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/util/Timer.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.util; 2 | 3 | import java.util.ArrayList; 4 | 5 | import org.apache.log4j.Logger; 6 | 7 | /** 8 | * @author Tommaso Soru 9 | * 10 | */ 11 | public class Timer { 12 | 13 | private final static Logger LOGGER = Logger.getLogger("ROCKER"); 14 | 15 | private ArrayList stops; 16 | 17 | public Timer() { 18 | stops = new ArrayList<>(); 19 | stops.add(System.currentTimeMillis()); 20 | } 21 | 22 | public void lap() { 23 | stops.add(System.currentTimeMillis()); 24 | } 25 | 26 | public double getLapSeconds(int lap) { 27 | if(stops.size() < 2) 28 | return Double.NaN; 29 | return (stops.get(lap + 1) - stops.get(lap)) / 1000.0; 30 | } 31 | 32 | public double getLapMillis(int lap) { 33 | if(stops.size() < 2) 34 | return Double.NaN; 35 | return (stops.get(lap + 1) - stops.get(lap)); 36 | } 37 | 38 | public double getLastLapSeconds() { 39 | if(stops.size() < 2) 40 | return Double.NaN; 41 | return (stops.get(stops.size() - 1) - stops.get(stops.size() - 2)) / 1000.0; 42 | } 43 | 44 | public double getLastLapMillis() { 45 | if(stops.size() < 2) 46 | return Double.NaN; 47 | return stops.get(stops.size() - 1) - stops.get(stops.size() - 2); 48 | } 49 | 50 | public int getSize() { 51 | return stops.size() - 1; 52 | } 53 | 54 | public static void main(String[] args) throws InterruptedException { 55 | Timer t = new Timer(); 56 | Thread.sleep(1000); 57 | t.lap(); 58 | LOGGER.info(t.getLastLapMillis()); 59 | LOGGER.info(t.getLapMillis(0)); 60 | Thread.sleep(500); 61 | t.lap(); 62 | LOGGER.info(t.getLastLapSeconds()); 63 | LOGGER.info(t.getLapSeconds(1)); 64 | } 65 | 66 | } 67 | 68 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/util/URIHandler.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.util; 2 | 3 | import org.apache.logging.log4j.LogManager; 4 | import org.apache.logging.log4j.Logger; 5 | 6 | import com.hp.hpl.jena.graph.Node; 7 | 8 | /** 9 | * Add blank-node support. 10 | * 11 | * @author Tommaso Soru 12 | * 13 | */ 14 | public class URIHandler { 15 | 16 | private final static Logger logger = LogManager.getLogger(URIHandler.class); 17 | 18 | public static String parse(Node r) { 19 | String s; 20 | try { 21 | s = r.getURI(); 22 | } catch (UnsupportedOperationException e) { 23 | logger.debug(e.getMessage()); 24 | s = r.getBlankNodeLabel(); 25 | logger.debug("Changing to "+s); 26 | } 27 | return s; 28 | } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/mandolin/util/URLs.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin.util; 2 | 3 | /** 4 | * @author Tommaso Soru 5 | * 6 | */ 7 | public class URLs { 8 | 9 | public static final String 10 | RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", 11 | RDF_CLASS = "http://www.w3.org/2000/01/rdf-schema#Class", 12 | OWL_CLASS = "http://www.w3.org/2002/07/owl#Class", 13 | RDFS_SUBCLASSOF = "http://www.w3.org/2000/01/rdf-schema#subClassOf", 14 | OWL_SAMEAS = "http://www.w3.org/2002/07/owl#sameAs"; 15 | 16 | } 17 | -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set root logger level to DEBUG and its only appender to A1. 2 | log4j.rootLogger=INFO, A1 3 | log4j.logger.org.apache.jena.riot=ERROR, A1 4 | log4j.logger.org.slf4j.impl.Log4jLoggerAdapter=ERROR, A1 5 | 6 | # A1 is set to be a ConsoleAppender. 7 | log4j.appender.A1=org.apache.log4j.ConsoleAppender 8 | 9 | # A1 uses PatternLayout. 10 | log4j.appender.A1.layout=org.apache.log4j.PatternLayout 11 | log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n 12 | 13 | -------------------------------------------------------------------------------- /src/main/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | %-5level %d [%t] %c:%M(%L): %m%n 5 | logs 6 | ${LOG_DIR}/archive 7 | 8 | 9 | 10 | 11 | 12 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /src/main/resources/publications.properties: -------------------------------------------------------------------------------- 1 | ############## Required Settings ############# 2 | 3 | mapping_files=mappings/dblp-acm.csv 4 | csv_link_type=http://www.w3.org/2002/07/owl#sameAs 5 | local_namespace=http://aksw.org/Groups/SIMBA/SemSRL/ 6 | 7 | # can be {1, 2} 8 | query_depth=1 9 | 10 | ################ Data Sources ################ 11 | 12 | dblp_namespace=http://dblp.rkbexplorer.com/id/ 13 | dblp_store_type=sparql 14 | dblp_store_path=http://dblp.rkbexplorer.com/sparql 15 | 16 | acm_namespace=http://acm.rkbexplorer.com/id/ 17 | acm_store_type=sparql 18 | acm_store_path=http://acm.rkbexplorer.com/sparql 19 | 20 | scholar_namespace=http://scholar.google.com/id/ 21 | scholar_store_type=csv 22 | scholar_store_path=datasets/scholar.csv 23 | 24 | ############### Do Not Modify ################ 25 | 26 | owl_same_as=http://www.w3.org/2002/07/owl#sameAs 27 | -------------------------------------------------------------------------------- /src/test/java/org/aksw/mandolin/MandolinTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.mandolin; 2 | 3 | import static org.junit.Assert.fail; 4 | 5 | import org.junit.Test; 6 | 7 | /** 8 | * @author Tommaso Soru 9 | * 10 | */ 11 | public class MandolinTest { 12 | 13 | @Test 14 | public void aimRelation() throws Exception { 15 | 16 | String theArgs = "--output eval/mandolin-test --input AKSW-one-out.nt " 17 | + "--aim http://mandolin.aksw.org/example/topic"; 18 | 19 | run(theArgs); 20 | 21 | } 22 | 23 | @Test 24 | public void aimAnything() throws Exception { 25 | 26 | String theArgs = "--output eval/mandolin-test --input AKSW-one-out.nt " 27 | + "--aim *"; 28 | 29 | run(theArgs); 30 | 31 | } 32 | 33 | private void run(String theArgs) { 34 | 35 | String[] theArgsArray = theArgs.split(" "); 36 | 37 | try { 38 | Mandolin.main(theArgsArray); 39 | } catch (Exception e) { 40 | fail(); 41 | } 42 | 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/test/resources/AKSW-one-out.nt: -------------------------------------------------------------------------------- 1 | . 2 | . 3 | . 4 | . 5 | . 6 | . 7 | . 8 | -------------------------------------------------------------------------------- /src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set root logger level to DEBUG and its only appender to A1. 2 | log4j.rootLogger=INFO, A1 3 | log4j.logger.org.apache.jena.riot=ERROR, A1 4 | log4j.logger.org.slf4j.impl.Log4jLoggerAdapter=ERROR, A1 5 | 6 | # A1 is set to be a ConsoleAppender. 7 | log4j.appender.A1=org.apache.log4j.ConsoleAppender 8 | 9 | # A1 uses PatternLayout. 10 | log4j.appender.A1.layout=org.apache.log4j.PatternLayout 11 | log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n 12 | 13 | -------------------------------------------------------------------------------- /src/test/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | %-5level %d [%t] %c:%M(%L): %m%n 5 | logs 6 | ${LOG_DIR}/archive 7 | 8 | 9 | 10 | 11 | 12 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | --------------------------------------------------------------------------------