├── .gitignore
├── .gitmodules
├── LICENSE
├── Mandolin-Evaluation-and-Discussion.pdf
├── README.md
├── install.sh
├── mandolin-400px.png
├── mandolin-500x500px.png
├── mandolin.properties
├── pgsql-create.sh
├── pgsql-init.sh
├── pgsql-start.sh
├── pgsql-stop.sh
├── pgsql
    ├── LICENSE
    ├── drop.sh
    ├── initdb.sh
    ├── sql
    │   ├── create.sql
    │   ├── debug.sql
    │   ├── drop.sql
    │   ├── ground.sql
    │   ├── load-body.sql
    │   ├── load-head.sql
    │   ├── load-tail.sql
    │   ├── load.sql
    │   ├── qc.sql
    │   └── run.sql
    ├── start.sh
    └── stop.sh
├── pom.xml
├── rockit.properties
└── src
    ├── main
        ├── java
        │   └── org
        │   │   └── aksw
        │   │       └── mandolin
        │   │           ├── MainDolin.java
        │   │           ├── Mandolin.java
        │   │           ├── common
        │   │               ├── MandolinCommon.java
        │   │               └── NameMapperCommon.java
        │   │           ├── controller
        │   │               ├── Classes.java
        │   │               ├── Evidence.java
        │   │               ├── NameMapper.java
        │   │               ├── OntoImporter.java
        │   │               ├── ProbKBData.java
        │   │               ├── SimilarityJoin.java
        │   │               └── Validator.java
        │   │           ├── eval
        │   │               ├── CrossValidation.java
        │   │               ├── Dataset.java
        │   │               ├── FMeasureEvaluation.java
        │   │               ├── LinkPredictionEvaluation.java
        │   │               └── MeanRankCalc.java
        │   │           ├── grounding
        │   │               └── Grounding.java
        │   │           ├── inference
        │   │               ├── Factors.java
        │   │               ├── PostgreDB.java
        │   │               ├── ProbKBToRockitGibbsSampling.java
        │   │               ├── RockitGibbsSampling.java
        │   │               └── RockitGroundingAndGibbsSampling.java
        │   │           ├── model
        │   │               ├── Cache.java
        │   │               ├── ComparableLiteral.java
        │   │               ├── PredictionLiteral.java
        │   │               └── PredictionSet.java
        │   │           ├── reasoner
        │   │               └── PelletReasoner.java
        │   │           ├── rulemining
        │   │               ├── AmieHandler.java
        │   │               ├── RDFToTSV.java
        │   │               ├── RuleDriver.java
        │   │               └── RuleMiner.java
        │   │           ├── semantifier
        │   │               ├── Commons.java
        │   │               ├── DatasetBuildFixer.java
        │   │               ├── DatasetBuildSatellites.java
        │   │               ├── DatasetBuildSemantifier.java
        │   │               ├── DatasetBuildStarter.java
        │   │               ├── DatasetBuilderAlgorithm.java
        │   │               └── SemantifierPipeline.java
        │   │           └── util
        │   │               ├── Bundle.java
        │   │               ├── CustomQuoteMode.java
        │   │               ├── DataIO.java
        │   │               ├── PostgreNotStartedException.java
        │   │               ├── PrettyRandom.java
        │   │               ├── SetUtils.java
        │   │               ├── Shell.java
        │   │               ├── StringClean.java
        │   │               ├── Timer.java
        │   │               ├── URIHandler.java
        │   │               └── URLs.java
        └── resources
        │   ├── log4j.properties
        │   ├── log4j2.xml
        │   └── publications.properties
    └── test
        ├── java
            └── org
            │   └── aksw
            │       └── mandolin
            │           └── MandolinTest.java
        └── resources
            ├── AKSW-one-out.nt
            ├── log4j.properties
            └── log4j2.xml


/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | bin/
 3 | target/
 4 | 
 5 | .classpath
 6 | .project
 7 | .settings/
 8 | 
 9 | data/
10 | data.zip
11 | logs/
12 | eval/
13 | pgsql/db/
14 | 
15 | .DS_Store
16 | *.log
17 | *.lp
18 | *.mps


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "horn-concerto"]
2 | 	path = horn-concerto
3 | 	url = https://github.com/mommi84/horn-concerto.git
4 | 


--------------------------------------------------------------------------------
/Mandolin-Evaluation-and-Discussion.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AKSW/Mandolin/9b8266d600b83c6368625af669d3fb355296564b/Mandolin-Evaluation-and-Discussion.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![logo](https://github.com/mommi84/Mandolin/raw/master/mandolin-400px.png "Mandolin logo")
  2 | 
  3 | MANDOLIN
  4 | ========
  5 | 
  6 | *The best modules for Markov Logic Networks - rule mining, grounding, inference - condensed in one framework.*
  7 | 
  8 | ## Requirements
  9 | 
 10 | * Java 1.8+
 11 | * PostgreSQL 9.4.x
 12 | * Gurobi solver
 13 | * Maven
 14 | * Wget, Unzip
 15 | 
 16 | ## Quick start
 17 | 
 18 | * Download and decompress [Mandolin v0.4.0-alpha](https://github.com/AKSW/Mandolin/releases/download/v0.4.0-alpha/mandolin-binaries-v0.4.0-alpha.zip)
 19 | * Run `bash install.sh`
 20 | 
 21 | ## Experiments
 22 | 
 23 | The following command will discover new links of any predicate (`--aim`) on the WordNet dataset (`--input`) with mining threshold 0.8 (`--mining`) and 1 million Gibbs sampling iterations (`--sampling`).
 24 | 
 25 | ```bash
 26 | java -Xmx1g -jar target/Mandolin-0.4.0-jar-with-dependencies.jar plain --input data/benchmark/wn18/wordnet-mlj12-train.nt,data/benchmark/wn18/wordnet-mlj12-valid.nt --output eval/wn18 --mining 0.8 --sampling 1000000 --aim "*"
 27 | ```
 28 | 
 29 | Discovered links can be found in the `--output` folder at `./eval/wn18/discovered_X.nt`, where `X` is the output threshold, meaning that a file contains all links whose confidence is greater or equal than `X`.
 30 | 
 31 | An excerpt of the discovered **rules and weights**:
 32 | 
 33 | ```text
 34 | 0.990517419  wn18:_part_of(b, a) => wn18:_has_part(a, b)
 35 | 0.862068966  wn18:_instance_hypernym(a, c) AND wn18:_synset_domain_topic_of(f, b) => wn18:_synset_domain_topic_of(a, b)
 36 | ```
 37 | 
 38 | An excerpt of the discovered **links** with confidence > 0.9:
 39 | 
 40 | ```text
 41 | wn18:08131530 wn18:_has_part wn18:08132046 .
 42 | wn18:09189411 wn18:_has_part wn18:08707917 .
 43 | wn18:10484858 wn18:_synset_domain_topic_of wn18:08441203 .
 44 | wn18:01941987 wn18:_synset_domain_topic_of wn18:00300441 .
 45 | ```
 46 | 
 47 | ### Basic documentation
 48 | 
 49 | Mandolin can be launched as follows.
 50 | 
 51 | ```bash
 52 | java -Xmx1g -jar target/Mandolin-0.4.0-jar-with-dependencies.jar <GOAL> <PARAMETERS>
 53 | ```
 54 | 
 55 | #### Goals
 56 | 
 57 | **Goal**|**Description**
 58 | :-----|:-----
 59 | `plain`|Launch a plain Mandolin execution.
 60 | `eval`|Evaluate MRR and hits@k.
 61 | 
 62 | #### Plain execution
 63 | 
 64 | Parameters for `plain` goal:
 65 | 
 66 | **Parameter**|**Description**|**Example value**
 67 | :-----|:-----|:-----
 68 | `--input`|Comma-separated N-Triple files.|`data1.nt,data2.nt`
 69 | `--output`|Workspace and output folder.|`eval/experiment1`
 70 | `--aim`|Aim predicate. For all predicates use wildcard `*`.|`http://www.w3.org/2002/07/owl#sameAs`
 71 | `--mining`|Rule mining threshold.|`0.9` (default: `0.0` support)
 72 | `--sampling`|Gibbs sampling iterations.|`1000000` (default: 100 x evidence size)
 73 | `--rules`|Maximum number of rules.|`1500` (default: none)
 74 | `--sim`|Enable similarity among literals as `min,step,max`.|`0.8,0.1,0.9` (default: none)
 75 | `--onto`|Enable ontology import.|`true` (default: `false`)
 76 | `--fwc`|Enable forward-chain.|`true` (default: `false`)
 77 | 
 78 | #### Evaluation
 79 | 
 80 | The `eval` goal takes two parameters: the N-Triples file of the test set and Mandolin's output directory.
 81 | 
 82 | Example run:
 83 | 
 84 | ```bash
 85 | java -Xmx1g -jar target/Mandolin-0.4.0-jar-with-dependencies.jar eval data/benchmark/wn18/wordnet-mlj12-test.nt eval/wn18
 86 | ```
 87 | 
 88 | ## Manual install
 89 | 
 90 | * Clone project:
 91 | 
 92 | ```bash
 93 | git clone https://github.com/mommi84/Mandolin.git
 94 | cd Mandolin
 95 | ```
 96 | 
 97 | * Get PostgreSQL 9.4.x - [Ubuntu/Debian binaries](http://oscg-downloads.s3.amazonaws.com/packages/postgresql-9.4.8-1-x64-bigsql.deb)
 98 | 
 99 | ### Alternative 1
100 | 
101 | * Launch `bash install.sh -c`
102 | 
103 | ### Alternative 2
104 | 
105 | * Insert PostgreSQL setting parameters into a file `./mandolin.properties`. Example:
106 | 
107 | ```properties
108 | # GENERAL CONFIGURATION FOR MANDOLIN
109 | pgsql_home=/usr/local/Cellar/postgresql/9.4.1
110 | pgsql_username=tom
111 | pgsql_password=
112 | pgsql_url=localhost
113 | ```
114 | 
115 | * Download [data](https://s3-eu-west-1.amazonaws.com/anonymous-folder/data.zip)
116 | 
117 | * Compile project:
118 | 
119 | ```bash
120 | export MAVEN_OPTS=-Xss4m
121 | mvn clean compile assembly:single
122 | ```
123 | 
124 | ## Database handler
125 | 
126 | After using Mandolin, stop the DB instance with:
127 | 
128 | ```bash
129 | sh pgsql-stop.sh
130 | ```
131 | 
132 | The instance can be restarted with:
133 | 
134 | ```bash
135 | sh pgsql-start.sh
136 | ```
137 | 
138 | ## Citing
139 | 
140 | ```
141 | @article{soru2017mandolin,
142 |   title={Mandolin: A Knowledge Discovery Framework for the Web of Data},
143 |   author={Soru, Tommaso and Esteves, Diego and Marx, Edgard and Ngomo, Axel-Cyrille Ngonga},
144 |   journal={arXiv preprint arXiv:1711.01283},
145 |   year={2017}
146 | }
147 | ```
148 | 
149 | ## License(s)
150 | 
151 | **Mandolin** is licensed under GNU General Public License v2.0.
152 | **AMIE** is licensed under Creative Commons Attribution-NonComercial license v3.0.
153 | **ProbKB** is licensed under the BSD license.
154 | **RockIt** is licensed under the MIT License.
155 | **Gurobi** can be activated using a [free academic license](http://www.gurobi.com/academia/academia-center).
156 | 


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | echo ""
 3 | echo "=== Mandolin Installer ==="
 4 | echo ""
 5 | 
 6 | if [[ $1 == "-c" ]]
 7 | then
 8 | 	echo "Compiling Mandolin..."
 9 | 	export MAVEN_OPTS=-Xss4m
10 | 	mvn -q clean compile assembly:single
11 | fi
12 | 
13 | read -p "Download datasets into ./data/? " -n 1 -r
14 | echo    # (optional) move to a new line
15 | if [[ $REPLY =~ ^[Yy]$ ]]
16 | then
17 |     # do stuff
18 | 	echo "Downloading datasets..."
19 | 	wget -q https://s3-eu-west-1.amazonaws.com/anonymous-folder/data.zip
20 | 	unzip -qq data.zip && rm -rf data.zip
21 | fi
22 | 
23 | read -p "Download and install PostgreSQL? [Ubuntu systems only] " -n 1 -r
24 | echo    # (optional) move to a new line
25 | if [[ $REPLY =~ ^[Yy]$ ]]
26 | then
27 |     # do stuff
28 | 	echo "Downloading PostgreSQL..."
29 | 	wget -q http://oscg-downloads.s3.amazonaws.com/packages/postgresql-9.4.8-1-x64-bigsql.deb
30 | 	pgdr=`pwd`"/postgres/"
31 | 	echo "Installing PostgreSQL in "$pgdr
32 | 	dpkg-deb -x postgresql-9.4.8-1-x64-bigsql.deb $pgdr && rm -rf postgresql-9.4.8-1-x64-bigsql.deb
33 | 	pgdir=$pgdr"opt/postgresql/pg94" # changing to home
34 | 	echo "# GENERAL CONFIGURATION FOR MANDOLIN" > mandolin.properties
35 | 	echo "pgsql_home="$pgdir >> mandolin.properties
36 | 	echo "pgsql_username="`whoami` >> mandolin.properties
37 | 	echo "pgsql_password=" >> mandolin.properties
38 | 	echo "pgsql_url=localhost" >> mandolin.properties
39 | else
40 | 	read -p "PostgreSQL home? " pgdir
41 | 	echo "# GENERAL CONFIGURATION FOR MANDOLIN" > mandolin.properties
42 | 	echo "pgsql_home="$pgdir >> mandolin.properties
43 | 	read -p "PostgreSQL username? " puname
44 | 	echo "pgsql_username="$puname >> mandolin.properties
45 | 	read -sp "PostgreSQL password? " ppwd
46 | 	echo "pgsql_password="$ppwd >> mandolin.properties
47 | 	read -p "PostgreSQL host? " phost
48 | 	echo "pgsql_url="$phost >> mandolin.properties
49 | fi
50 | 
51 | echo "Initializing database..."
52 | cd pgsql && $pgdir/bin/initdb db -E utf8
53 | echo "Starting server and creating DB..."
54 | $pgdir/bin/pg_ctl start -D db/ && sleep 5s && $pgdir/bin/createdb probkb && cd ..
55 | 
56 | echo "Done."
57 | 


--------------------------------------------------------------------------------
/mandolin-400px.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AKSW/Mandolin/9b8266d600b83c6368625af669d3fb355296564b/mandolin-400px.png


--------------------------------------------------------------------------------
/mandolin-500x500px.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AKSW/Mandolin/9b8266d600b83c6368625af669d3fb355296564b/mandolin-500x500px.png


--------------------------------------------------------------------------------
/mandolin.properties:
--------------------------------------------------------------------------------
1 | # GENERAL CONFIGURATION FOR MANDOLIN
2 | pgsql_home=/usr/local/Cellar/postgresql/9.4.1
3 | pgsql_username=tom
4 | pgsql_password=
5 | pgsql_url=localhost
6 | 


--------------------------------------------------------------------------------
/pgsql-create.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | createdb probkb


--------------------------------------------------------------------------------
/pgsql-init.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | cd pgsql && sh initdb.sh && cd ..


--------------------------------------------------------------------------------
/pgsql-start.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | echo "\n\tStarting PostgreSQL database... Run 'sh pgsql-stop.sh' to terminate it.\n" && cd pgsql && sh start.sh && cd ..
3 | 


--------------------------------------------------------------------------------
/pgsql-stop.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | cd pgsql && sh stop.sh && cd ..
3 | 


--------------------------------------------------------------------------------
/pgsql/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015, University of Florida
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 | 


--------------------------------------------------------------------------------
/pgsql/drop.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | psql probkb -f sql/drop.sql
3 | 


--------------------------------------------------------------------------------
/pgsql/initdb.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | initdb db -E utf8
3 | 


--------------------------------------------------------------------------------
/pgsql/sql/create.sql:
--------------------------------------------------------------------------------
 1 | CREATE SCHEMA probkb;
 2 | 
 3 | -- data tables
 4 | CREATE TABLE probkb.classes(id INT PRIMARY KEY, name TEXT);
 5 | CREATE TABLE probkb.entities(id INT PRIMARY KEY, name TEXT);
 6 | CREATE TABLE probkb.relations(id INT PRIMARY KEY, name TEXT);
 7 | 
 8 | CREATE TABLE probkb.entClasses(
 9 |   ent INT, class INT,
10 |   PRIMARY KEY(ent, class)
11 | );
12 | 
13 | CREATE TABLE probkb.relClasses(
14 |   rel INT, class1 INT, class2 INT,
15 |   PRIMARY KEY(rel, class1, class2)
16 | );
17 | 
18 | CREATE TABLE probkb.extractions(
19 |   rel INT, ent1 INT, ent2 INT, weight DOUBLE PRECISION, url TEXT,
20 |   PRIMARY KEY(rel, ent1, ent2)
21 | );
22 | 
23 | CREATE TABLE probkb.functionals(
24 |   rel INT, arg INT, deg INT,
25 |   PRIMARY KEY (rel, arg)
26 | );
27 |   
28 | CREATE TABLE probkb.ambiguities(
29 |   ent INT, class INT,
30 |   PRIMARY KEY(ent, class)
31 | );
32 | 
33 | CREATE TABLE probkb.trash(
34 |   id INT PRIMARY KEY
35 | );
36 | 
37 | CREATE SEQUENCE probkb.relids;
38 | 
39 | -- mln tables
40 | CREATE TABLE probkb.mln1(head INT, body INT, class1 INT, class2 INT, weight DOUBLE PRECISION);
41 | CREATE TABLE probkb.mln2(head INT, body INT, class1 INT, class2 INT, weight DOUBLE PRECISION);
42 | CREATE TABLE probkb.mln3(head INT, body1 INT, body2 INT,
43 |                   class1 INT, class2 INT, class3 INT, weight DOUBLE PRECISION);
44 | CREATE TABLE probkb.mln4(head INT, body1 INT, body2 INT,
45 |                   class1 INT, class2 INT, class3 INT, weight DOUBLE PRECISION);
46 | CREATE TABLE probkb.mln5(head INT, body1 INT, body2 INT,
47 |                   class1 INT, class2 INT, class3 INT, weight DOUBLE PRECISION);
48 | CREATE TABLE probkb.mln6(head INT, body1 INT, body2 INT,
49 |                   class1 INT, class2 INT, class3 INT, weight DOUBLE PRECISION);
50 | 


--------------------------------------------------------------------------------
/pgsql/sql/debug.sql:
--------------------------------------------------------------------------------
 1 | -------------------------------------------------------
 2 | -- DEBUGGING utilities
 3 | -------------------------------------------------------
 4 | CREATE OR REPLACE FUNCTION probkb.trace(rsid INT) RETURNS VOID AS $$
 5 | DECLARE
 6 |   cnt INT := 1;
 7 |   target INT;
 8 |   rule RECORD;
 9 | BEGIN
10 |   DROP TABLE IF EXISTS probkb.tr, probkb.queue;
11 |   CREATE TABLE probkb.tr(head TEXT, body1 TEXT, body2 TEXT);
12 | 
13 |   CREATE TABLE probkb.queue(id INT PRIMARY KEY);
14 |   INSERT INTO probkb.queue VALUES (rsid);
15 | 
16 |   WHILE cnt > 0 LOOP
17 |     SELECT MAX(id) INTO target FROM probkb.queue;
18 |     SELECT (probkb.traceStep(target)).* INTO rule;
19 |     RAISE INFO '(%) %:-%,%', rule.id1, rule.name1, rule.name2, rule.name3;
20 | 
21 |     INSERT INTO probkb.tr(head, body1, body2) VALUES (rule.name1, rule.name2, rule.name3);
22 |     INSERT INTO probkb.queue
23 |     SELECT rule.id2 WHERE rule.id2 IS NOT NULL
24 |       UNION
25 |     SELECT rule.id3 WHERE rule.id3 IS NOT NULL
26 |       EXCEPT
27 |     SELECT id FROM probkb.queue;
28 | 
29 |     DELETE FROM probkb.queue WHERE id = target;
30 |     SELECT COUNT(*) INTO cnt FROM probkb.queue;
31 |   END LOOP;
32 | END;
33 | $$ LANGUAGE plpgsql;
34 | 
35 | CREATE OR REPLACE FUNCTION probkb.traceStep(rsid INT, OUT id1 INT, OUT name1 TEXT,
36 |                                                       OUT id2 INT, OUT name2 TEXT,
37 |                                                       OUT id3 INT, OUT name3 TEXT) AS $$
38 | DECLARE
39 |   factor RECORD;
40 | BEGIN
41 |   SELECT factors.id1, factors.id2, factors.id3 INTO factor FROM probkb.factors
42 |   WHERE factors.id1 = rsid AND factors.id2 < rsid AND factors.id3 < rsid
43 |   ORDER BY (factors.id2+factors.id3) LIMIT 1;
44 | 
45 |   SELECT INTO id1, name1
46 |     r.id, relations.name || '(' || e1.name || ',' || e2.name || ')'
47 |   FROM probkb.relationships r JOIN probkb.relations ON r.rel = relations.id
48 |                               JOIN probkb.entities e1 ON r.ent1 = e1.id
49 |                               JOIN probkb.entities e2 ON r.ent2 = e2.id
50 |   WHERE r.id = rsid;
51 | 
52 |   SELECT INTO id2, name2
53 |     r.id, relations.name || '(' || e1.name || ',' || e2.name || ')'
54 |   FROM probkb.relationships r JOIN probkb.relations ON r.rel = relations.id
55 |                               JOIN probkb.entities e1 ON r.ent1 = e1.id
56 |                               JOIN probkb.entities e2 ON r.ent2 = e2.id
57 |   WHERE r.id = factor.id2;
58 | 
59 |   SELECT INTO id3, name3
60 |     r.id, relations.name || '(' || e1.name || ',' || e2.name || ')'
61 |   FROM probkb.relationships r JOIN probkb.relations ON r.rel = relations.id
62 |                               JOIN probkb.entities e1 ON r.ent1 = e1.id
63 |                               JOIN probkb.entities e2 ON r.ent2 = e2.id
64 |   WHERE r.id = factor.id3;
65 | 
66 |   IF id2 IS NULL THEN   -- look for urls
67 |     SELECT extractions.url INTO name2
68 |     FROM probkb.relationships JOIN probkb.extractions ON relationships.rel = extractions.rel
69 |     AND relationships.ent1 = extractions.ent1 AND relationships.ent2 = extractions.ent2
70 |     WHERE relationships.id = rsid LIMIT 1;
71 |   END IF;
72 | END;
73 | $$ LANGUAGE plpgsql;
74 | 


--------------------------------------------------------------------------------
/pgsql/sql/drop.sql:
--------------------------------------------------------------------------------
1 | DROP SCHEMA IF EXISTS probkb CASCADE;
2 | 


--------------------------------------------------------------------------------
/pgsql/sql/load-body.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | -- build relationships table with type information
 3 | CREATE TABLE probkb.relationships AS
 4 | SELECT nextval('probkb.relids') AS id, r.rel AS rel,
 5 |        r.ent1 AS ent1, rc.class1 AS class1,
 6 |        r.ent2 AS ent2, rc.class2 AS class2, AVG(weight) AS weight
 7 | FROM probkb.extractions r, probkb.relClasses rc, probkb.entClasses ec1, probkb.entClasses ec2
 8 | WHERE r.rel = rc.rel
 9 | AND r.ent1 = ec1.ent AND ec1.class = rc.class1
10 | AND r.ent2 = ec2.ent AND ec2.class = rc.class2
11 | GROUP BY r.rel, r.ent1, rc.class1, r.ent2, rc.class2;
12 | CREATE INDEX relationships_rel_idx ON probkb.relationships(rel);
13 | CLUSTER probkb.relationships USING relationships_rel_idx;
14 | 
15 | DELETE FROM probkb.relationships WHERE ent1 = ent2;
16 | 
17 | SELECT probkb.qc();
18 | 
19 | 


--------------------------------------------------------------------------------
/pgsql/sql/load-head.sql:
--------------------------------------------------------------------------------
 1 | SET work_mem='4GB';
 2 | SET enable_mergejoin=OFF;
 3 | 
 4 | -- generate random types for 0 typed entities
 5 | --INSERT INTO entClasses
 6 | --SELECT tt.ent, trunc(random()*156) AS class
 7 | --FROM (SELECT id AS ent FROM entities
 8 | --        EXCEPT
 9 | --      SELECT ent FROM entClasses) tt;
10 | 
11 | -- import csv
12 | 


--------------------------------------------------------------------------------
/pgsql/sql/load-tail.sql:
--------------------------------------------------------------------------------
1 | 
2 | ANALYZE probkb.relationships;  -- gather statistics for better query plan
3 | ANALYZE probkb.mln1;
4 | ANALYZE probkb.mln2;
5 | ANALYZE probkb.mln3;
6 | ANALYZE probkb.mln4;
7 | ANALYZE probkb.mln5;
8 | ANALYZE probkb.mln6;
9 | 


--------------------------------------------------------------------------------
/pgsql/sql/load.sql:
--------------------------------------------------------------------------------
 1 | -- WARNING: This file is not used. Check `load-*.sql` files instead.
 2 | 
 3 | SET work_mem='4GB';
 4 | SET enable_mergejoin=OFF;
 5 | 
 6 | -- generate random types for 0 typed entities
 7 | --INSERT INTO entClasses
 8 | --SELECT tt.ent, trunc(random()*156) AS class
 9 | --FROM (SELECT id AS ent FROM entities
10 | --        EXCEPT
11 | --      SELECT ent FROM entClasses) tt;
12 | 
13 | -- import csv
14 | COPY probkb.classes FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/classes.csv' DELIMITERS ',' CSV;
15 | COPY probkb.entities FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/entities.csv' DELIMITERS ',' CSV;
16 | COPY probkb.relations FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/relations.csv' DELIMITERS ',' CSV;
17 | COPY probkb.entClasses FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/entClasses.csv' DELIMITERS ',' CSV;
18 | COPY probkb.relClasses FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/relClasses.csv' DELIMITERS ',' CSV;
19 | COPY probkb.functionals FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/functionals.csv' DELIMITERS ',' CSV;
20 | COPY probkb.extractions FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/relationships.csv' DELIMITERS ',' CSV;
21 | 
22 | -- build relationships table with type information
23 | CREATE TABLE probkb.relationships AS
24 | SELECT nextval('probkb.relids') AS id, r.rel AS rel,
25 |        r.ent1 AS ent1, rc.class1 AS class1,
26 |        r.ent2 AS ent2, rc.class2 AS class2, AVG(weight) AS weight
27 | FROM probkb.extractions r, probkb.relClasses rc, probkb.entClasses ec1, probkb.entClasses ec2
28 | WHERE r.rel = rc.rel
29 | AND r.ent1 = ec1.ent AND ec1.class = rc.class1
30 | AND r.ent2 = ec2.ent AND ec2.class = rc.class2
31 | GROUP BY r.rel, r.ent1, rc.class1, r.ent2, rc.class2;
32 | CREATE INDEX relationships_rel_idx ON probkb.relationships(rel);
33 | CLUSTER probkb.relationships USING relationships_rel_idx;
34 | 
35 | DELETE FROM probkb.relationships WHERE ent1 = ent2;
36 | 
37 | SELECT probkb.qc();
38 | 
39 | COPY probkb.mln1 FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/mln1.csv' DELIMITERS ',' CSV;
40 | COPY probkb.mln2 FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/mln2.csv' DELIMITERS ',' CSV;
41 | COPY probkb.mln3 FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/mln3.csv' DELIMITERS ',' CSV;
42 | COPY probkb.mln4 FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/mln4.csv' DELIMITERS ',' CSV;
43 | COPY probkb.mln5 FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/mln5.csv' DELIMITERS ',' CSV;
44 | COPY probkb.mln6 FROM '/Users/tom/PhD/srl/Mandolin/eval/10_publi-probkb/mln6.csv' DELIMITERS ',' CSV;
45 | 
46 | ANALYZE probkb.relationships;  -- gather statistics for better query plan
47 | ANALYZE probkb.mln1;
48 | ANALYZE probkb.mln2;
49 | ANALYZE probkb.mln3;
50 | ANALYZE probkb.mln4;
51 | ANALYZE probkb.mln5;
52 | ANALYZE probkb.mln6;
53 | 


--------------------------------------------------------------------------------
/pgsql/sql/qc.sql:
--------------------------------------------------------------------------------
 1 | CREATE OR REPLACE FUNCTION probkb.qc() RETURNS BIGINT AS $$
 2 | DECLARE
 3 |   deleted1 BIGINT := 0;
 4 |   deleted2 BIGINT := 0;
 5 | BEGIN
 6 |   -- Detecting ambiguity.
 7 |   INSERT INTO probkb.ambiguities
 8 |   SELECT DISTINCT r.ent1, r.class1
 9 |   FROM   probkb.relationships r JOIN probkb.functionals f ON r.rel = f.rel
10 |   WHERE  f.arg = 1
11 |   GROUP  BY  r.rel, ent1, class1, class2
12 |   HAVING COUNT(*) > MIN(f.deg)
13 |     EXCEPT
14 |   SELECT ent, class FROM probkb.ambiguities;
15 | 
16 |   -- Remove ambiguous entities.
17 |   DELETE FROM probkb.relationships
18 |   WHERE (ent1, class1) IN (
19 |     SELECT ent, class FROM probkb.ambiguities
20 |   );
21 |   GET DIAGNOSTICS deleted1 = ROW_COUNT;
22 | 
23 |   DELETE FROM probkb.relationships
24 |   WHERE (ent2, class2) IN (
25 |     SELECT ent, class FROM probkb.ambiguities
26 |   );
27 |   GET DIAGNOSTICS deleted2 = ROW_COUNT;
28 | 
29 |   RETURN deleted1 + deleted2;
30 | END;
31 | $$ LANGUAGE plpgsql;
32 | 


--------------------------------------------------------------------------------
/pgsql/sql/run.sql:
--------------------------------------------------------------------------------
1 | -- Run grounding phase...
2 | 
3 | SELECT probkb.ground();
4 | SELECT probkb.groundFactors();
5 | 


--------------------------------------------------------------------------------
/pgsql/start.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | postgres -D db/ &
3 | 


--------------------------------------------------------------------------------
/pgsql/stop.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | pg_ctl stop -D db/
3 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 | 	<modelVersion>4.0.0</modelVersion>
  4 | 	<groupId>org.aksw.mandolin</groupId>
  5 | 	<artifactId>Mandolin</artifactId>
  6 | 	<version>0.4.0</version>
  7 | 	<name>Mandolin</name>
  8 | 	<properties>
  9 | 		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 10 | 	</properties>
 11 | 	<build>
 12 | 		<plugins>
 13 | 			<plugin>
 14 | 				<artifactId>maven-compiler-plugin</artifactId>
 15 | 				<version>3.1</version>
 16 | 				<configuration>
 17 | 					<source>1.8</source>
 18 | 					<target>1.8</target>
 19 | 				</configuration>
 20 | 			</plugin>
 21 | 			<plugin>
 22 | 				<artifactId>maven-assembly-plugin</artifactId>
 23 | 				<configuration>
 24 | 					<archive>
 25 | 						<manifest>
 26 | 							<mainClass>org.aksw.mandolin.MainDolin</mainClass>
 27 | 						</manifest>
 28 | 					</archive>
 29 | 					<descriptorRefs>
 30 | 						<descriptorRef>jar-with-dependencies</descriptorRef>
 31 | 					</descriptorRefs>
 32 | 				</configuration>
 33 | 				<executions>
 34 | 					<execution>
 35 | 						<phase>package</phase>
 36 | 						<goals>
 37 | 							<goal>single</goal>
 38 | 						</goals>
 39 | 					</execution>
 40 | 				</executions>
 41 | 			</plugin>
 42 | 			<plugin>
 43 | 				<groupId>org.apache.maven.plugins</groupId>
 44 | 				<artifactId>maven-surefire-plugin</artifactId>
 45 | 				<version>2.14.1</version>
 46 | 				<configuration>
 47 | 					<includes>
 48 | 						<include>**/*Test.java</include>
 49 | 					</includes>
 50 | 				</configuration>
 51 | 			</plugin>
 52 | 		</plugins>
 53 | 	</build>
 54 | 	<repositories>
 55 | 		<repository>
 56 | 			<id>maven.aksw.internal</id>
 57 | 			<name>University Leipzig, AKSW Maven2 Repository</name>
 58 | 			<url>http://maven.aksw.org/repository/internal</url>
 59 | 			<releases />
 60 | 		</repository>
 61 | 		<repository>
 62 | 			<id>maven.aksw.snapshots</id>
 63 | 			<name>University Leipzig, AKSW Maven2 Repository</name>
 64 | 			<url>http://maven.aksw.org/repository/snapshots</url>
 65 | 			<snapshots />
 66 | 		</repository>
 67 | 	</repositories>
 68 | 	<dependencies>
 69 | 		<dependency>
 70 | 			<groupId>org.apache.jena</groupId>
 71 | 			<artifactId>jena-core</artifactId>
 72 | 			<version>2.13.0</version>
 73 | 		</dependency>
 74 | 		<dependency>
 75 | 			<groupId>org.apache.jena</groupId>
 76 | 			<artifactId>jena-arq</artifactId>
 77 | 			<version>2.13.0</version>
 78 | 		</dependency>
 79 | 		<dependency>
 80 | 			<groupId>com.opencsv</groupId>
 81 | 			<artifactId>opencsv</artifactId>
 82 | 			<version>3.3</version>
 83 | 		</dependency>
 84 | 		<dependency>
 85 | 			<groupId>org.apache.logging.log4j</groupId>
 86 | 			<artifactId>log4j-core</artifactId>
 87 | 			<version>2.4.1</version>
 88 | 		</dependency>
 89 | 		<dependency>
 90 | 			<groupId>net.sf.supercsv</groupId>
 91 | 			<artifactId>super-csv</artifactId>
 92 | 			<version>2.3.1</version>
 93 | 		</dependency>
 94 | 		<dependency>
 95 | 			<groupId>com.github.mpkorstanje</groupId>
 96 | 			<artifactId>simmetrics-core</artifactId>
 97 | 			<version>3.2.0</version>
 98 | 		</dependency>
 99 | 		<dependency>
100 | 			<groupId>it.unimi.dsi</groupId>
101 | 			<artifactId>fastutil</artifactId>
102 | 			<version>6.3</version>
103 | 		</dependency>
104 | 		<dependency>
105 | 			<groupId>com.github.ansell.aterms</groupId>
106 | 			<artifactId>aterm-java</artifactId>
107 | 			<version>1.8.2</version>
108 | 		</dependency>
109 | 		<dependency>
110 | 			<groupId>com.github.ansell.aterms</groupId>
111 | 			<artifactId>shared-objects</artifactId>
112 | 			<version>1.4.9-p1</version>
113 | 		</dependency>
114 | 		<dependency>
115 | 			<groupId>com.clarkparsia.pellet</groupId>
116 | 			<artifactId>pellet-core</artifactId>
117 | 			<version>2.4.0-SNAPSHOT</version>
118 | 		</dependency>
119 | 		<dependency>
120 | 			<groupId>com.clarkparsia.pellet</groupId>
121 | 			<artifactId>pellet-jena</artifactId>
122 | 			<version>2.4.0-SNAPSHOT</version>
123 | 		</dependency>
124 | 		<dependency>
125 | 			<groupId>org.postgresql</groupId>
126 | 			<artifactId>postgresql</artifactId>
127 | 			<version>9.4-1205-jdbc42</version>
128 | 		</dependency>
129 | 		<dependency>
130 | 			<groupId>it.tsoru.ppjoinhandler</groupId>
131 | 			<artifactId>ppjoin-handler</artifactId>
132 | 			<version>0.1.1</version>
133 | 		</dependency>
134 | 		<dependency>
135 | 			<groupId>com.googlecode.rockit</groupId>
136 | 			<artifactId>tsoru-rockit</artifactId>
137 | 			<version>0.5.277</version>
138 | 		</dependency>
139 | 		<dependency>
140 | 			<groupId>org.jgrapht</groupId>
141 | 			<artifactId>jgrapht-core</artifactId>
142 | 			<version>0.9.1</version>
143 | 		</dependency>
144 | 		<dependency>
145 | 			<groupId>de.mpg.mpi-inf.amie</groupId>
146 | 			<artifactId>amie-plus</artifactId>
147 | 			<version>2015-08-26</version>
148 | 		</dependency>
149 | 		<dependency>
150 | 			<groupId>commons-cli</groupId>
151 | 			<artifactId>commons-cli</artifactId>
152 | 			<version>1.2</version>
153 | 		</dependency>
154 | 	</dependencies>
155 | 	<description>Markov Logic Networks for the Discovery of Links.</description>
156 | 	<url>http://mandolin.aksw.org/</url>
157 | 	<inceptionYear>2014</inceptionYear>
158 | </project>
159 | 
160 | 


--------------------------------------------------------------------------------
/rockit.properties:
--------------------------------------------------------------------------------
  1 | # CONFIGURATION FILE FOR ROCKIT
  2 | #
  3 | # Pay attention that you do not use spaces at the end of the line (do not write "root ", but only "root")
  4 | # 
  5 | #
  6 | # Specify your data how to access your MySQL installation 
  7 | #sql_username=
  8 | #sql_password=
  9 | #sql_url=
 10 | #?useCursorFetch=true&defaultFetchSize=1000
 11 | #
 12 | #
 13 | # Specify a temp folder where files are created and deleted during runtime.
 14 | temp_path=tmp/
 15 | #
 16 | # Here you can set the name of the database that will be used. 
 17 | # All data in this database will be deleted
 18 | #sql_database=
 19 | #
 20 | # ==================================================================
 21 | # PARAMETERS 
 22 | # ==================================================================
 23 | # The standard setting is usually optimal. If your problem is too complex, you might want to increase the gurobi_tollerance parameter.
 24 | #
 25 | # ------------------------------------------------------------------
 26 | # Maximum a-posteriori inference with ILP
 27 | # ------------------------------------------------------------------
 28 | #
 29 | # Sets the gap of the Gurobi solver. This gives exacter (value around 0.000001) or 
 30 | # more approximative (value 0.01) solutions.
 31 | # If the gap is set to -1, the standard Gurobi gap is used.
 32 | # HINT: If your problem does not terminate, then increase this value (take for instance 0.01)
 33 | gap=-1
 34 | # gap=0.01
 35 | # gap=0.001
 36 | #...
 37 | # gap=0.000001
 38 | #
 39 | # Enables cutting-plane inference (do not add all constraints at once but add them step by step)
 40 | # Reference: http://arxiv.org/abs/1206.3282
 41 | # Advantage: Problems are much smaller, thus much faster to solve
 42 | # Disadvantage: More than one problem has to be solved (but usually the overall time is still smaller)
 43 | # use_cutting_plane_inference=true
 44 | use_cutting_plane_inference=true
 45 | #
 46 | # If activated, variables will be aggregated. 
 47 | # Reference: https://rockit.googlecode.com/files/rockit.pdf
 48 | # Advantage: Less formulas in ILP, ILP runs faster.
 49 | # Disadvantage: Maybe a slightly slower Java Code (but usually the overall time is faster when activated)
 50 | # use_cutting_plane_aggregation=true
 51 | use_cutting_plane_aggregation=true
 52 | #
 53 | # Number of threads (integer). Determines the number of threads created for SQL processing. 
 54 | # If value is -1 then the number of threads is your number of CPUs + 1.
 55 | number_of_threads=-1
 56 | #
 57 | #
 58 | #Time limit in seconds. Limits the total time expended for the gurobi solver (in seconds). 
 59 | #Note that this only limits the time of the gurobi solver. Since there can be multiple calls of the gurobi solver, the overall runtime 
 60 | #is usually larger.
 61 | #If -1 is set, time limit is infinitive.
 62 | time_limit=-1
 63 | #
 64 | #
 65 | # ------------------------------------------------------------------
 66 | # Marginal inference using Gibbs sampling in combination with symmetry detection
 67 | # ------------------------------------------------------------------
 68 | #
 69 | # Leverage Symmetries:
 70 | # Reference: http://arxiv.org/abs/1304.2694
 71 | # Advantage: The resulting marginal probabilities are of a higher quality since all their symmetries 
 72 | # are leveraged. This usually leads to a significant better result in fewer samples (proven).
 73 | # Disadvantage: Minimal longer runtime and needs to run saucy in the background (not possible for ios apple machines)
 74 | use_symmetries_in_marginal_inference=false
 75 | #
 76 | # ------------------------------------------------------------------
 77 | # General Parameters
 78 | # ------------------------------------------------------------------
 79 | #
 80 | #
 81 | # Enables larger (debug) output. Additionally, it prints the ilp into the file "model.lp" before solving. 
 82 | # Important: Set this to false for runtime evaluations.
 83 | # debug_output=false
 84 | debug_output=true
 85 | #
 86 | # Simplifies formulas with negatives weights like it is done in Tuffy.
 87 | # Example:
 88 | # -2  student(a1) v !advisedBy(a2,a1) v !advisedBy(a1,a2)
 89 | # -->
 90 | # 2  student(a1) n advisedBy(a2,a1) n advisedBy(a1,a2)
 91 | # -->
 92 | # 1   student(a1) v advisedBy(a1,a2)
 93 | # 1	  student(a1) v advisedBy(a2,a1)
 94 | # 
 95 | # The formulars with negative weights are changed so that:
 96 | # - Their weight is multiplied with -1 (made positive).
 97 | # - Each non-negated hidden predicate (in the restriction part) is set to its negation .
 98 | # - Each negated hidden predicate (in the restriction part) is set to be not negated.
 99 | # - Formular becomes a conjunction 
100 | #
101 | # The formulas with conjunction are changed in a way that:
102 | # - The weight is divided by the number of hidden predicates.
103 | # - For each predicate a new formula is created.
104 | #	 
105 | # If deactivated, negative weights and conjunctions are left as such (no changes are made)
106 | simplify_negative_weight_and_conjunction=false
107 | #simplify_negative_weight_and_conjunction=false
108 | #
109 | # changes handling of String variables
110 | # converts string values to extra observed predicates if they occur in positive predicates.
111 | # advise: Leave this value as it is
112 | convert_string_values=true
113 | #
114 | # Select ILP solver:
115 | # GUROBI, CPLEX, SCIP
116 | # default: GUROBI
117 | ilp_solver=GUROBI
118 | #
119 | scip=
120 | # it was C:\\scip-3.1.0.exe


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/MainDolin.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin;
 2 | 
 3 | import java.util.Arrays;
 4 | 
 5 | import org.aksw.mandolin.eval.CrossValidation;
 6 | import org.aksw.mandolin.eval.MeanRankCalc;
 7 | 
 8 | /**
 9 |  * Main controller for Mandolin.
10 |  * 
11 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
12 |  *
13 |  */
14 | public class MainDolin {
15 | 
16 | 	public static void main(String[] args) throws NumberFormatException, Exception {
17 | 		
18 | 		String[] argsw = Arrays.copyOfRange(args, 1, args.length);
19 | 		
20 | 		switch(args[0]) {
21 | 		case "plain":
22 | 			Mandolin.main(argsw);
23 | 			break;
24 | 		case "eval":
25 | 			MeanRankCalc.main(argsw);
26 | 			break;
27 | 		case "cv":
28 | 			CrossValidation.main(argsw);
29 | 			break;
30 | 		default:
31 | 		}
32 | 		
33 | 	}
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/Mandolin.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin;
  2 | 
  3 | import java.io.File;
  4 | import java.util.Arrays;
  5 | import java.util.Scanner;
  6 | 
  7 | import org.aksw.mandolin.controller.Classes;
  8 | import org.aksw.mandolin.controller.Evidence;
  9 | import org.aksw.mandolin.controller.NameMapper;
 10 | import org.aksw.mandolin.controller.OntoImporter;
 11 | import org.aksw.mandolin.controller.ProbKBData;
 12 | import org.aksw.mandolin.controller.Validator;
 13 | import org.aksw.mandolin.grounding.Grounding;
 14 | import org.aksw.mandolin.inference.ProbKBToRockitGibbsSampling;
 15 | import org.aksw.mandolin.model.PredictionSet;
 16 | import org.aksw.mandolin.reasoner.PelletReasoner;
 17 | import org.aksw.mandolin.rulemining.RDFToTSV;
 18 | import org.aksw.mandolin.rulemining.RuleMiner;
 19 | import org.aksw.mandolin.util.PostgreNotStartedException;
 20 | import org.aksw.mandolin.util.SetUtils;
 21 | import org.apache.logging.log4j.LogManager;
 22 | import org.apache.logging.log4j.Logger;
 23 | 
 24 | /**
 25 |  * The final pipeline for MANDOLIN, a scalable join of several
 26 |  * statistical-relational-learning algorithms to predict RDF links of any type
 27 |  * (i.e., triples) in one or more RDF datasets using rule mining of Horn
 28 |  * clauses, Markov Logic Networks, and Gibbs Sampling.
 29 |  * 
 30 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 31 |  *
 32 |  */
 33 | public class Mandolin {
 34 | 
 35 | 	private final static Logger logger = LogManager.getLogger(Mandolin.class);
 36 | 	
 37 | 	private static final int THETA_MIN = 0;
 38 | 	private static final int THETA_MAX = 10;
 39 | 	// input datasets
 40 | 	private String[] inputPaths;
 41 | 	private String workspace;
 42 | 	private String aimRelation;
 43 | 
 44 | 	// thresholds for similarity joins among datatype values
 45 | 	private int thrMin;
 46 | 	private int thrMax;
 47 | 	private int thrStep;
 48 | 	
 49 | 	/**
 50 | 	 * Enable ontology import.
 51 | 	 */
 52 | 	private boolean enableOnt;
 53 | 	
 54 | 	/**
 55 | 	 * Enable forward chain.
 56 | 	 */
 57 | 	private boolean enableFwc;
 58 | 	
 59 | 	/**
 60 | 	 * Enable similarity graph enrichment.
 61 | 	 */
 62 | 	private boolean enableSim;
 63 | 
 64 | 	/*
 65 | 	 * Threshold for head-coverage method in rule mining. If null, support method is used.
 66 | 	 */
 67 | 	private Double mining;
 68 | 	
 69 | 	/**
 70 | 	 * Maximum number of rules to mine.
 71 | 	 */
 72 | 	private Integer maxRules;
 73 | 
 74 | 	// -------------------------------------------------------------------------
 75 | 
 76 | 	private NameMapper map;
 77 | 
 78 | 	/**
 79 | 	 * Iteration for the Gibbs sampling.
 80 | 	 */
 81 | 	private Integer sampling;
 82 | 	
 83 | 	/**
 84 | 	 * @param workspace workspace path
 85 | 	 * @param csInputPaths comma-separated input paths
 86 | 	 * @param aimRelation aim relation URI
 87 | 	 * @param thrMin
 88 | 	 * @param thrStep
 89 | 	 * @param thrMax
 90 | 	 * @param enableOnt
 91 | 	 * @param enableFwc
 92 | 	 * @param enableSim
 93 | 	 */
 94 | 	public Mandolin(String workspace, String csInputPaths, String aimRelation, int thrMin, int thrStep, int thrMax, boolean enableOnt, boolean enableFwc, boolean enableSim) {
 95 | 		super();
 96 | 		
 97 | 		this.workspace = workspace;
 98 | 		this.inputPaths = csInputPaths.split(",");
 99 | 		this.aimRelation = aimRelation;
100 | 		this.thrMin = thrMin;
101 | 		this.thrStep = thrStep;
102 | 		this.thrMax = thrMax;
103 | 		this.enableOnt = enableOnt;
104 | 		this.enableFwc = enableFwc;
105 | 		this.enableSim = enableSim;
106 | 
107 | 		map = new NameMapper(aimRelation);
108 | 
109 | 	}
110 | 
111 | 	/**
112 | 	 * @throws Exception
113 | 	 */
114 | 	public void run() throws Exception {
115 | 
116 | 		logger.info("Mandolin started!");
117 | 		printInfo();
118 | 
119 | 		// create working directory
120 | 		new File(workspace).mkdirs();
121 | 		
122 | 		if(enableOnt) {
123 | 			// inputs -> model-tmp.nt
124 | 			OntoImporter.run(workspace, inputPaths);
125 | 		}
126 | 		
127 | 		// inputs (or model-tmp.nt) -> model.nt (or model-fwc.nt)
128 | 		Validator.run(workspace, inputPaths, enableFwc, enableOnt);
129 | 		if(enableFwc) {
130 | 			// model.nt -> model-fwc.nt
131 | 			PelletReasoner.run(workspace);
132 | 		}
133 | 		
134 | 		// model-fwc.nt -> map (classes)
135 | 		Classes.build(map, workspace);
136 | 		// model-fwc.nt -> map (other)
137 | 		if(enableSim)
138 | 			Evidence.build(map, workspace, thrMin, thrMax, thrStep);
139 | 		else
140 | 			Evidence.build(map, workspace);
141 | 
142 | 		if(logger.isTraceEnabled())
143 | 			map.pretty();
144 | 
145 | 		logger.info("# entClasses: " + map.getEntClasses().size());
146 | 		logger.info("# relClasses: " + map.getRelClasses().size());
147 | 		logger.info("# relationships: " + map.getRelationships().size());
148 | 
149 | 		// map -> KB description csv
150 | 		ProbKBData.buildCSV(map, workspace);
151 | 
152 | 		// model-fwc.nt -> model.tsv
153 | 		RDFToTSV.run(workspace);
154 | 		// model.tsv -> MLN csv
155 | 		RuleMiner.run(map, workspace, mining, maxRules);
156 | 
157 | 		// csv -> Postgre factors
158 | 		Grounding.ground(workspace);
159 | 
160 | 		// Postgre factors -> predictions
161 | 		PredictionSet pset = new ProbKBToRockitGibbsSampling(map).infer(sampling);
162 | 
163 | 		pset.saveTo(workspace + "/predictions.dat");
164 | 		
165 | 		for(int th=THETA_MIN; th<=THETA_MAX; th+=1) {
166 | 			double theta = th / 10.0;
167 | 			logger.info("theta = "+theta);
168 | 			
169 | 			// get set of predicted (just outputted) links
170 | 			String knowledge = workspace + "/model-fwc.nt";
171 | 			String predicted = workspace + "/output_" + theta + ".nt";
172 | 			pset.saveLinkset(map, theta, predicted);
173 | 			
174 | 			// compute set of discovered (emergent) links
175 | 			String discovered = workspace + "/discovered_" + theta + ".nt";
176 | 			SetUtils.minus(predicted, knowledge, discovered);
177 | 			logger.debug("+++ DISCOVERED +++");
178 | 			Scanner in = new Scanner(new File(discovered));
179 | 			int size = 0;
180 | 			while(in.hasNextLine()) {
181 | 				logger.debug(in.nextLine());
182 | 				size++;
183 | 			}
184 | 			in.close();
185 | 			logger.info("Discovered triples size: "+size);
186 | 		}
187 | 		
188 | 
189 | 		logger.info("Mandolin done.");
190 | 
191 | 	}
192 | 
193 | 	/**
194 | 	 * 
195 | 	 */
196 | 	private void printInfo() {
197 | 		logger.info("BASE = "+workspace);
198 | 		logger.info("INPUT_PATHS:");
199 | 		for(String ip : inputPaths)
200 | 			logger.info("\t" + ip);
201 | 		logger.info("AIM_RELATION = "+aimRelation);
202 | 		logger.info("ONTO_IMPORT = "+enableOnt);
203 | 		logger.info("FORWARD_CHAIN = "+enableFwc);
204 | 		logger.info("SIMILARITIES = "+enableSim);
205 | 		logger.info("THR = [min="+thrMin+", step="+thrStep+", max="+thrMax+"]");
206 | 		logger.info("MINING_THR = "+mining);
207 | 		logger.info("MAX_RULES = "+maxRules);
208 | 		logger.info("SAMPLING_ITER = "+sampling);
209 | 	}
210 | 
211 | 
212 | 	public NameMapper getMap() {
213 | 		return map;
214 | 	}
215 | 	
216 | 	public static void main(String[] args) throws Exception {
217 | 		
218 | 		logger.info("Mandolin initialized with args = {}", Arrays.toString(args));
219 | 		
220 | 		String output = null, input = null, aim = "false", rules = null, 
221 | 				onto = "false", fwc = "false", mining = null, sampling = null;
222 | 		String[] simVal = {"-1", "-1", "-1"};
223 | 		
224 | 		for(int i=0; i<args.length; i+=2) {
225 | 			switch(args[i]) {
226 | 			case "--output":
227 | 				output = args[i+1];
228 | 				break;
229 | 			case "--input":
230 | 				input = args[i+1];
231 | 				break;
232 | 			case "--aim":
233 | 				aim = args[i+1];
234 | 				break;
235 | 			case "--sim":
236 | 				simVal = args[i+1].split(",");
237 | 				break;
238 | 			case "--onto":
239 | 				onto = args[i+1];
240 | 				break;
241 | 			case "--fwc":
242 | 				fwc = args[i+1];
243 | 				break;
244 | 			case "--mining":
245 | 				mining = args[i+1];
246 | 				break;
247 | 			case "--rules":
248 | 				rules = args[i+1];
249 | 				break;
250 | 			case "--sampling":
251 | 				sampling = args[i+1];
252 | 				break;
253 | 			}
254 | 		}
255 | 
256 | 		try {
257 | 			
258 | 			Mandolin m = new Mandolin(output, input, aim, Integer.parseInt(simVal[0]), 
259 | 					Integer.parseInt(simVal[1]), Integer.parseInt(simVal[2]), 
260 | 					Boolean.parseBoolean(onto), Boolean.parseBoolean(fwc),
261 | 					!simVal[0].equals("-1"));
262 | 			if(mining != null)
263 | 				m.setMining(mining);
264 | 			if(rules != null)
265 | 				m.setMaxRules(rules);
266 | 			if(sampling != null)
267 | 				m.setSampling(sampling);
268 | 			m.run();
269 | 			
270 | 		// TODO handle all exceptions
271 | 		} catch (PostgreNotStartedException e) {
272 | 			logger.fatal("Mandolin exited with errors (-1).");
273 | 			throw new RuntimeException();
274 | 		}
275 | 
276 | 	}
277 | 
278 | 	public void setMaxRules(String rules) {
279 | 		this.maxRules = Integer.parseInt(rules);
280 | 	}
281 | 
282 | 	public void setSampling(String sampling) {
283 | 		this.sampling = Integer.parseInt(sampling);
284 | 	}
285 | 
286 | 	public void setMining(String mining) {
287 | 		switch(mining) {
288 | 		case "support":
289 | 			this.mining = null;
290 | 			break;
291 | 		default:
292 | 			this.mining = Double.parseDouble(mining);
293 | 			break;
294 | 		}
295 | 	}
296 | }
297 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/common/MandolinCommon.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.common;
  2 | 
  3 | import java.io.File;
  4 | import java.io.FileNotFoundException;
  5 | import java.io.PrintWriter;
  6 | import java.util.Arrays;
  7 | import java.util.HashMap;
  8 | import java.util.Iterator;
  9 | import java.util.List;
 10 | import java.util.Map.Entry;
 11 | import java.util.TreeSet;
 12 | 
 13 | import jp.ndca.similarity.join.PPJoin;
 14 | import jp.ndca.similarity.join.StringItem;
 15 | import jp.ndca.similarity.join.Tokenizer;
 16 | 
 17 | import org.aksw.mandolin.common.NameMapperCommon.Type;
 18 | import org.aksw.mandolin.model.Cache;
 19 | import org.aksw.mandolin.model.ComparableLiteral;
 20 | import org.aksw.mandolin.semantifier.Commons;
 21 | import org.aksw.mandolin.util.URLs;
 22 | import org.apache.jena.riot.RDFDataMgr;
 23 | import org.apache.jena.riot.system.StreamRDF;
 24 | 
 25 | import com.hp.hpl.jena.graph.Triple;
 26 | import com.hp.hpl.jena.sparql.core.Quad;
 27 | import com.hp.hpl.jena.vocabulary.XSD;
 28 | 
 29 | /**
 30 |  * Mandolin for syntax used by common MLN learning frameworks (e.g., Netkit,
 31 |  * ProbCog, Alchemy, Tuffy).
 32 |  * 
 33 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 34 |  *
 35 |  */
 36 | public class MandolinCommon {
 37 | 
 38 | 	public static final String SRC_PATH = "datasets/DBLPL3S.nt";
 39 | 	public static final String TGT_PATH = "datasets/LinkedACM.nt";
 40 | 	public static final String LINKSET_PATH = "linksets/DBLPL3S-LinkedACM.nt";
 41 | 	public static final String GOLD_STANDARD_PATH = "linksets/DBLPL3S-LinkedACM-GoldStandard.nt";
 42 | 
 43 | 	public static final String BASE = "eval/09_publi-tuffy";
 44 | 
 45 | 	public static final String EVIDENCE_DB = BASE + "/evidence.db";
 46 | 	public static final String QUERY_DB = BASE + "/query.db";
 47 | 	public static final String PROG_MLN = BASE + "/prog.mln";
 48 | 
 49 | 	public static final int TRAINING_SIZE = Integer.MAX_VALUE; // TODO restore:
 50 | 																// (int) (47 *
 51 | 																// 0.9);
 52 | 
 53 | 	private static final int THR_MIN = 80;
 54 | 	private static final int THR_MAX = 90;
 55 | 	private static final int THR_STEP = 10;
 56 | 
 57 | 	private TreeSet<String> unary = new TreeSet<>();
 58 | 
 59 | 	private NameMapperCommon map;
 60 | 
 61 | 	public NameMapperCommon getMap() {
 62 | 		return map;
 63 | 	}
 64 | 
 65 | 	public MandolinCommon() {
 66 | 
 67 | 		map = new NameMapperCommon();
 68 | 
 69 | 	}
 70 | 
 71 | 	private void run() throws FileNotFoundException {
 72 | 
 73 | 		new File(BASE).mkdirs();
 74 | 
 75 | 		PrintWriter pwEvid = new PrintWriter(new File(EVIDENCE_DB));
 76 | 		graphEvidence(pwEvid);
 77 | 		mappingEvidence(pwEvid, 0, TRAINING_SIZE);
 78 | 		pwEvid.close();
 79 | 
 80 | 		buildQueryDB(new PrintWriter(new File(QUERY_DB)));
 81 | 
 82 | 		buildProgMLN(new PrintWriter(new File(PROG_MLN)));
 83 | 
 84 | 	}
 85 | 
 86 | 	public void buildProgMLN(PrintWriter pwProg) {
 87 | 
 88 | 		String sameAs = map.getName(URLs.OWL_SAMEAS);
 89 | 		for (String name : map.getNamesByType(Type.PROPERTY)) {
 90 | 			// closed world assumption is false for owl:sameAs
 91 | 			String cw = name.equals(sameAs) ? "" : "*";
 92 | 			pwProg.write(cw + name + "(res, res)\n");
 93 | 		}
 94 | 		for (int thr = THR_MIN; thr <= THR_MAX; thr += THR_STEP)
 95 | 			pwProg.write("*Sim" + thr + "(res, res)\n");
 96 | 		for (String u : unary)
 97 | 			pwProg.write("*" + u + "(res)\n");
 98 | 		pwProg.write("\n");
 99 | 		for (String name : map.getNamesByType(Type.PROPERTY)) {
100 | 			// symmetric property
101 | 			pwProg.write("1 !" + name + "(x, y) v " + name + "(y, x)\n");
102 | 			pwProg.write("1 !" + name + "(y, x) v " + name + "(x, y)\n");
103 | 			// transitive property
104 | 			pwProg.write("1 !" + name + "(x, y) v !" + name + "(y, z) v "
105 | 					+ name + "(x, z)\n");
106 | 		}
107 | 		pwProg.close();
108 | 
109 | 	}
110 | 
111 | 	public void buildQueryDB(PrintWriter pwQuery) {
112 | 
113 | 		String sameAs = map.getName(URLs.OWL_SAMEAS);
114 | 		pwQuery.write(sameAs);
115 | 		pwQuery.close();
116 | 
117 | 	}
118 | 
119 | 	public void graphEvidence(PrintWriter pwEvid) {
120 | 
121 | 		final Cache cache = new Cache();
122 | 
123 | 		PPJoin ppjoin = new PPJoin();
124 | 
125 | 		Tokenizer tok = ppjoin.getTokenizer();
126 | 		HashMap<Integer, ComparableLiteral> dataset = new HashMap<>();
127 | 
128 | 		// use a TreeSet to deduplicate
129 | 		final TreeSet<ComparableLiteral> setOfStrings = new TreeSet<>();
130 | 
131 | 		StreamRDF dataStream = new StreamRDF() {
132 | 
133 | 			@Override
134 | 			public void base(String arg0) {
135 | 			}
136 | 
137 | 			@Override
138 | 			public void finish() {
139 | 			}
140 | 
141 | 			@Override
142 | 			public void prefix(String arg0, String arg1) {
143 | 			}
144 | 
145 | 			@Override
146 | 			public void quad(Quad arg0) {
147 | 			}
148 | 
149 | 			@Override
150 | 			public void start() {
151 | 			}
152 | 
153 | 			@Override
154 | 			public void triple(Triple arg0) {
155 | 				String s = map.add(arg0.getSubject().getURI(), Type.RESOURCE);
156 | 				// System.out.println("Added "+s+" - "+map.getURI(s));
157 | 				String p = map.add(arg0.getPredicate().getURI(), Type.PROPERTY);
158 | 				// System.out.println("Added "+p+" - "+map.getURI(p));
159 | 				String o = map.add(arg0.getObject().toString(), Type.RESOURCE);
160 | 				// System.out.println("Added "+o+" - "+map.getURI(o));
161 | 
162 | 				if (pwEvid != null) {
163 | 
164 | 					if (arg0.getPredicate().getURI()
165 | 							.equals(Commons.RDF_TYPE.getURI())) {
166 | 						System.out.println(o + "(" + s + ")");
167 | 						System.out.println("NEWCLASS\t" + o + "\t"
168 | 								+ arg0.getObject().toString());
169 | 						pwEvid.write(o + "(" + s + ")\n");
170 | 						unary.add(o);
171 | 					} else {
172 | 						System.out.println(p + "(" + s + ", " + o + ")");
173 | 						pwEvid.write(p + "(" + s + ", " + o + ")\n");
174 | 					}
175 | 				}
176 | 
177 | 				if (arg0.getObject().isLiteral()) {
178 | 					String dtURI = arg0.getObject().getLiteralDatatypeURI();
179 | 
180 | 					boolean considerString;
181 | 					if (dtURI == null)
182 | 						considerString = true;
183 | 					else
184 | 						considerString = dtURI.equals(XSD.xstring.getURI());
185 | 
186 | 					if (considerString) {
187 | 						ComparableLiteral lit = new ComparableLiteral(arg0
188 | 								.getObject().getLiteral().toString(true), arg0
189 | 								.getObject().getLiteral().getValue().toString());
190 | 						setOfStrings.add(lit);
191 | 					}
192 | 				}
193 | 
194 | 			}
195 | 
196 | 		};
197 | 
198 | 		RDFDataMgr.parse(dataStream, SRC_PATH);
199 | 		RDFDataMgr.parse(dataStream, TGT_PATH);
200 | 
201 | 		map.pretty();
202 | 
203 | 		Iterator<ComparableLiteral> it = setOfStrings.iterator();
204 | 		for (int i = 0; it.hasNext(); i++) {
205 | 			ComparableLiteral lit = it.next();
206 | 			String val = lit.getVal();
207 | 			cache.stringItems.add(new StringItem(tok.tokenize(val, false), i));
208 | 			dataset.put(i, lit);
209 | 		}
210 | 
211 | 		System.out.println(cache.stringItems.size());
212 | 		List<StringItem> stringItems = cache.stringItems;
213 | 
214 | 		StringItem[] strDatum = stringItems.toArray(new StringItem[stringItems
215 | 				.size()]);
216 | 		Arrays.sort(strDatum);
217 | 
218 | 		ppjoin.setUseSortAtExtractPairs(false);
219 | 
220 | 		for (int thr = THR_MIN; thr <= THR_MAX; thr += THR_STEP) {
221 | 			System.out.println("thr = " + (thr / 100.0));
222 | 			List<Entry<StringItem, StringItem>> result = ppjoin.extractPairs(
223 | 					strDatum, thr / 100.0);
224 | 			for (Entry<StringItem, StringItem> entry : result) {
225 | 				ComparableLiteral lit1 = dataset.get(entry.getKey().getId());
226 | 				ComparableLiteral lit2 = dataset.get(entry.getValue().getId());
227 | 				pwEvid.write("Sim" + thr + "(" + map.getName(lit1.getUri())
228 | 						+ ", " + map.getName(lit2.getUri()) + ")\n");
229 | 				System.out.println(lit1.getUri() + " <=> " + lit2.getUri());
230 | 				System.out.println(lit1.getVal() + " <=> " + lit2.getVal());
231 | 			}
232 | 		}
233 | 
234 | 	}
235 | 
236 | 	public void mappingEvidence(PrintWriter pwEvid, final int START,
237 | 			final int END) {
238 | 
239 | 		final Cache training = new Cache();
240 | 
241 | 		StreamRDF mapStream = new StreamRDF() {
242 | 
243 | 			@Override
244 | 			public void base(String arg0) {
245 | 			}
246 | 
247 | 			@Override
248 | 			public void finish() {
249 | 			}
250 | 
251 | 			@Override
252 | 			public void prefix(String arg0, String arg1) {
253 | 			}
254 | 
255 | 			@Override
256 | 			public void quad(Quad arg0) {
257 | 			}
258 | 
259 | 			@Override
260 | 			public void start() {
261 | 			}
262 | 
263 | 			@Override
264 | 			public void triple(Triple arg0) {
265 | 				String s = map.add(arg0.getSubject().getURI(), Type.RESOURCE);
266 | 				// System.out.println("Added "+s+" - "+map.getURI(s));
267 | 				String p = map.add(arg0.getPredicate().getURI(), Type.PROPERTY);
268 | 				// System.out.println("Added "+p+" - "+map.getURI(p));
269 | 				String o = map.add(arg0.getObject().toString(), Type.RESOURCE);
270 | 				// System.out.println("Added "+o+" - "+map.getURI(o));
271 | 
272 | 				if (pwEvid != null) {
273 | 					int c = ++training.count;
274 | 					if (START <= c && c <= END) {
275 | 						System.out.println(training.count + "\t" + p + "(" + s
276 | 								+ ", " + o + ")");
277 | 						pwEvid.write(p + "(" + s + ", " + o + ")\n");
278 | 					}
279 | 				}
280 | 			}
281 | 
282 | 		};
283 | 
284 | 		RDFDataMgr.parse(mapStream, LINKSET_PATH);
285 | 
286 | 	}
287 | 
288 | 	public void closureEvidence(PrintWriter pwEvid) {
289 | 
290 | 		StreamRDF mapStream = new StreamRDF() {
291 | 
292 | 			@Override
293 | 			public void base(String arg0) {
294 | 			}
295 | 
296 | 			@Override
297 | 			public void finish() {
298 | 			}
299 | 
300 | 			@Override
301 | 			public void prefix(String arg0, String arg1) {
302 | 			}
303 | 
304 | 			@Override
305 | 			public void quad(Quad arg0) {
306 | 			}
307 | 
308 | 			@Override
309 | 			public void start() {
310 | 			}
311 | 
312 | 			@Override
313 | 			public void triple(Triple arg0) {
314 | 				String s = map.getName(arg0.getSubject().getURI());
315 | 				String p = map.getName(arg0.getPredicate().getURI());
316 | 				String o = map.getName(arg0.getObject().toString());
317 | 
318 | 				if (s == null || p == null || o == null)
319 | 					System.err.println("HALT!");
320 | 
321 | 				if (pwEvid != null) {
322 | 					pwEvid.write(p + "(" + s + ", " + o + ")\n");
323 | 				}
324 | 			}
325 | 
326 | 		};
327 | 
328 | 		RDFDataMgr.parse(mapStream, GOLD_STANDARD_PATH);
329 | 
330 | 	}
331 | 
332 | 	public static void main(String[] args) throws FileNotFoundException {
333 | 
334 | 		// System.err.println("Launch line commented to prevent file overwrite.");
335 | 		new MandolinCommon().run();
336 | 
337 | 	}
338 | 
339 | }


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/common/NameMapperCommon.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin.common;
 2 | 
 3 | import java.util.HashMap;
 4 | import java.util.TreeSet;
 5 | 
 6 | /**
 7 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 8 |  *
 9 |  */
10 | public class NameMapperCommon {
11 | 	
12 | 	private HashMap<String, String> mlnToUri = new HashMap<>();
13 | 	private HashMap<String, String> uriToMln = new HashMap<>();
14 | 	
15 | 	private HashMap<Type, TreeSet<String>> listByType = new HashMap<>();	
16 | 	
17 | 	public enum Type {
18 | 		RESOURCE("Res"), PROPERTY("Prop");
19 | 		private String str;
20 | 		Type(String str) {
21 | 			this.str = str;
22 | 		}
23 | 		public String toString() {
24 | 			return str;
25 | 		}
26 | 	}
27 | 	
28 | 	private HashMap<Type, Integer> count = new HashMap<>();
29 | 	
30 | 	public NameMapperCommon() {
31 | 		super();
32 | 		count.put(Type.RESOURCE, 0);
33 | 		count.put(Type.PROPERTY, 0);
34 | 		listByType.put(Type.RESOURCE, new TreeSet<>());
35 | 		listByType.put(Type.PROPERTY, new TreeSet<>());
36 | 	}
37 | 	
38 | 	/**
39 | 	 * Add an URI to the map and return the MLN name.
40 | 	 * 
41 | 	 * @param uri
42 | 	 * @return
43 | 	 */
44 | 	public String add(String uri, Type type) {
45 | 		
46 | 		if(uriToMln.containsKey(uri))
47 | 			return uriToMln.get(uri);
48 | 		
49 | 		String name = type.toString() + count.get(type);
50 | 		mlnToUri.put(name, uri);
51 | 		uriToMln.put(uri, name);
52 | 		listByType.get(type).add(name);
53 | 		increase(type);
54 | 		return name;
55 | 	}
56 | 	
57 | 	private void increase(Type type) {
58 | 		count.put(type, count.get(type) + 1);
59 | 	}
60 | 	
61 | 	public String getURI(String name) {
62 | 		return mlnToUri.get(name);
63 | 	}
64 | 
65 | 	public String getName(String uri) {
66 | 		return uriToMln.get(uri);
67 | 	}
68 | 	
69 | 	public TreeSet<String> getNamesByType(Type type) {
70 | 		return listByType.get(type);
71 | 	}
72 | 	
73 | 	public void pretty() {
74 | 		for(String key : mlnToUri.keySet())
75 | //			if(listByType.get(Type.PROPERTY).contains(key)) // TODO remove me!
76 | 			System.out.println(key + "\t" + mlnToUri.get(key));
77 | 	}
78 | 	
79 | }
80 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/controller/Classes.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.controller;
  2 | 
  3 | import org.aksw.mandolin.controller.NameMapper.Type;
  4 | import org.aksw.mandolin.util.URIHandler;
  5 | import org.apache.jena.riot.RDFDataMgr;
  6 | import org.apache.jena.riot.system.StreamRDF;
  7 | import org.apache.logging.log4j.LogManager;
  8 | import org.apache.logging.log4j.Logger;
  9 | 
 10 | import com.hp.hpl.jena.graph.Triple;
 11 | import com.hp.hpl.jena.sparql.core.Quad;
 12 | import com.hp.hpl.jena.vocabulary.OWL;
 13 | import com.hp.hpl.jena.vocabulary.RDF;
 14 | import com.hp.hpl.jena.vocabulary.RDFS;
 15 | 
 16 | /**
 17 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 18 |  *
 19 |  */
 20 | public class Classes {
 21 | 	
 22 | 	private final static Logger logger = LogManager.getLogger(Classes.class);
 23 | 	
 24 | 	private final static Cache size = new Cache();
 25 | 	
 26 | 	/**
 27 | 	 * @param map 
 28 | 	 * @param SRC_PATH
 29 | 	 * @param TGT_PATH
 30 | 	 */
 31 | 	public static void build(final NameMapper map, final String BASE) {
 32 | 		
 33 | 		final CollectionCache nodes = new CollectionCache();
 34 | //		final CollectionCache classes = new CollectionCache();
 35 | 		
 36 | 		// reader implementation
 37 | 		StreamRDF dataStream = new StreamRDF() {
 38 | 
 39 | 			@Override
 40 | 			public void base(String arg0) {
 41 | 			}
 42 | 
 43 | 			@Override
 44 | 			public void finish() {
 45 | 			}
 46 | 
 47 | 			@Override
 48 | 			public void prefix(String arg0, String arg1) {
 49 | 			}
 50 | 
 51 | 			@Override
 52 | 			public void quad(Quad arg0) {
 53 | 			}
 54 | 
 55 | 			@Override
 56 | 			public void start() {
 57 | 			}
 58 | 
 59 | 			@Override
 60 | 			public void triple(Triple arg0) {
 61 | 				String s = URIHandler.parse(arg0.getSubject());
 62 | 				String p = arg0.getPredicate().getURI();
 63 | 				String o = arg0.getObject().toString();
 64 | 
 65 | 				// if property is rdf:type...
 66 | 				if(p.equals(RDF.type.getURI())) {
 67 | 					// then object is always a class
 68 | 					String className = map.add(o, Type.CLASS);
 69 | 					// if object is :Class...
 70 | 					if(o.equals(OWL.Class.getURI()) ||
 71 | 							o.equals(RDFS.Class.getURI())) {
 72 | 						// then also subject is a class
 73 | 						map.add(s, Type.CLASS);
 74 | 					} else {
 75 | 						// else subject is an entity
 76 | 						// XXX subject could be even a property!
 77 | 						String entName = map.add(s, Type.ENTITY);
 78 | 						map.addEntClass(entName, className);
 79 | 					}
 80 | //					// save class
 81 | //					// TODO this could be extended to all properties with domain or range = rdfs:Class
 82 | //					classes.set.add(o);
 83 | 				}
 84 | 				
 85 | 				map.add(s, Type.ENTITY);
 86 | 				map.add(o, Type.ENTITY);
 87 | 
 88 | 				// save nodes
 89 | 				nodes.set.add(s);
 90 | 				nodes.set.add(o);
 91 | 				
 92 | //				// save property
 93 | //				properties.set.add(p);
 94 | 				// count triples
 95 | 				size.value++;
 96 | 			}
 97 | 
 98 | 		};
 99 | 
100 | 		RDFDataMgr.parse(dataStream, BASE + "/model-fwc.nt");
101 | 		
102 | 		logger.info("Adding owl:Thing type to {} nodes.", nodes.set.size());
103 | 		for(String s : nodes.set)
104 | 			map.addEntClass(map.toName(s), map.getOwlThingName());
105 | 		
106 | 		map.setCollisionDelta(collisionDelta());
107 | 		
108 | 	}
109 | 	
110 | 	/**
111 | 	 * Compute the upper bound for the order of magnitude of entities and return the sum to add to avoid ID collision.
112 | 	 * 
113 | 	 * @return
114 | 	 */
115 | 	public static int collisionDelta() {
116 | 		int upper = (int) Math.log10(size.value * 2) + 1;
117 | 		return (int) Math.pow(10, upper);
118 | 	}
119 | 
120 | 
121 | }
122 | 
123 | class Cache {
124 | 	int value = 0;
125 | }
126 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/controller/Evidence.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.controller;
  2 | 
  3 | import java.io.File;
  4 | import java.io.FileNotFoundException;
  5 | import java.io.FileOutputStream;
  6 | import java.util.TreeSet;
  7 | 
  8 | import org.aksw.mandolin.controller.NameMapper.Type;
  9 | import org.aksw.mandolin.model.Cache;
 10 | import org.aksw.mandolin.model.ComparableLiteral;
 11 | import org.aksw.mandolin.util.URIHandler;
 12 | import org.apache.jena.riot.Lang;
 13 | import org.apache.jena.riot.RDFDataMgr;
 14 | import org.apache.jena.riot.system.StreamRDF;
 15 | import org.apache.jena.riot.system.StreamRDFWriter;
 16 | import org.apache.logging.log4j.LogManager;
 17 | import org.apache.logging.log4j.Logger;
 18 | 
 19 | import com.hp.hpl.jena.graph.Node;
 20 | import com.hp.hpl.jena.graph.Triple;
 21 | import com.hp.hpl.jena.sparql.core.Quad;
 22 | import com.hp.hpl.jena.vocabulary.RDF;
 23 | import com.hp.hpl.jena.vocabulary.RDFS;
 24 | import com.hp.hpl.jena.vocabulary.XSD;
 25 | 
 26 | /**
 27 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 28 |  *
 29 |  */
 30 | public class Evidence {
 31 | 
 32 | 	private final static Logger logger = LogManager.getLogger(Evidence.class);
 33 | 	
 34 | 	/**
 35 | 	 * @param map
 36 | 	 * @param SRC_PATH
 37 | 	 * @param TGT_PATH
 38 | 	 * @param LNK_PATH
 39 | 	 * @param THR_MIN
 40 | 	 * @param THR_MAX
 41 | 	 * @param THR_STEP
 42 | 	 */
 43 | 	public static void build(final NameMapper map, final String BASE,
 44 | 			final int THR_MIN, final int THR_MAX, final int THR_STEP) {
 45 | 
 46 | 		// for similarity join
 47 | 		final Cache cache = new Cache();
 48 | 
 49 | 		final TreeSet<ComparableLiteral> setOfStrings = build(map, BASE);
 50 | 		
 51 | 		// call similarity join
 52 | 		SimilarityJoin.build(map, setOfStrings, cache, BASE, THR_MIN, THR_MAX,
 53 | 				THR_STEP);
 54 | 		
 55 | 		// append model-sim-fwc.nt to model-fwc.nt
 56 | 		final FileOutputStream output;
 57 | 		try {
 58 | 			output = new FileOutputStream(new File(BASE + "/model-sim-temp.nt"));
 59 | 		} catch (FileNotFoundException e) {
 60 | 			logger.fatal(e.getMessage());
 61 | 			throw new RuntimeException("File " + BASE + "/model-sim-temp.nt not found!");
 62 | 		}
 63 | 		
 64 | 		final StreamRDF writer = StreamRDFWriter.getWriterStream(output, Lang.NT);
 65 | 		writer.start();
 66 | 		
 67 | 		StreamRDF reader = new StreamRDF() {
 68 | 			
 69 | 			@Override
 70 | 			public void triple(Triple triple) {
 71 | 				writer.triple(triple);
 72 | 			}
 73 | 			
 74 | 			@Override
 75 | 			public void start() {
 76 | 			}
 77 | 			
 78 | 			@Override
 79 | 			public void quad(Quad quad) {
 80 | 			}
 81 | 			
 82 | 			@Override
 83 | 			public void prefix(String prefix, String iri) {
 84 | 			}
 85 | 			
 86 | 			@Override
 87 | 			public void finish() {
 88 | 			}
 89 | 			
 90 | 			@Override
 91 | 			public void base(String base) {
 92 | 			}
 93 | 			
 94 | 		};
 95 | 		
 96 | 		RDFDataMgr.parse(reader, BASE + "/model-fwc.nt");
 97 | 		
 98 | 		StreamRDF readerSim = new StreamRDF() {
 99 | 			
100 | 			@Override
101 | 			public void triple(Triple triple) {
102 | 				writer.triple(triple);
103 | 				String s = triple.getSubject().getURI();
104 | 				String p = triple.getPredicate().getURI();
105 | 				
106 | 				String o = parse(triple.getObject());
107 | 				if(o == null)
108 | 					return;
109 | //				String relName = 
110 | 						map.add(p, Type.RELATION);
111 | //				String name1 = 
112 | 						map.add(s, Type.ENTITY);
113 | //				String name2 = 
114 | 						map.add(o, Type.ENTITY);
115 | 				
116 | 				// XXX oddly this shall be off
117 | //				map.addRelationship(relName, name1, name2);
118 | 			}
119 | 			
120 | 			@Override
121 | 			public void start() {
122 | 			}
123 | 			
124 | 			@Override
125 | 			public void quad(Quad quad) {
126 | 			}
127 | 			
128 | 			@Override
129 | 			public void prefix(String prefix, String iri) {
130 | 			}
131 | 			
132 | 			@Override
133 | 			public void finish() {
134 | 			}
135 | 			
136 | 			@Override
137 | 			public void base(String base) {
138 | 			}
139 | 			
140 | 		};
141 | 
142 | 		RDFDataMgr.parse(readerSim, BASE + "/model-sim-fwc.nt");
143 | 		
144 | 		writer.finish();
145 | 		
146 | 		
147 | 		// delete old file, rename temp file
148 | 		new File(BASE + "/model-fwc.nt").delete();
149 | 		new File(BASE + "/model-sim-temp.nt").renameTo(new File(BASE + "/model-fwc.nt"));
150 | 		
151 | 	}
152 | 
153 | 	/**
154 | 	 * @param map
155 | 	 * @param BASE
156 | 	 */
157 | 	public static final TreeSet<ComparableLiteral> build(final NameMapper map, final String BASE) {
158 | 		
159 | 		final TreeSet<ComparableLiteral> setOfStrings = new TreeSet<>();
160 | 		
161 | 		// reader implementation
162 | 		StreamRDF dataStream = new StreamRDF() {
163 | 
164 | 			@Override
165 | 			public void base(String arg0) {
166 | 			}
167 | 
168 | 			@Override
169 | 			public void finish() {
170 | 			}
171 | 
172 | 			@Override
173 | 			public void prefix(String arg0, String arg1) {
174 | 			}
175 | 
176 | 			@Override
177 | 			public void quad(Quad arg0) {
178 | 			}
179 | 
180 | 			@Override
181 | 			public void start() {
182 | 			}
183 | 
184 | 			@Override
185 | 			public void triple(Triple arg0) {
186 | 				String s = URIHandler.parse(arg0.getSubject());
187 | 				String p = arg0.getPredicate().getURI();
188 | 				// TODO if (o.isBlankNode) => URIHandler
189 | 				String o = parse(arg0.getObject());
190 | 				if(o == null)
191 | 					return;
192 | 
193 | 				String relName = map.add(p, Type.RELATION);
194 | 				String subjName = map.add(s, Type.ENTITY);
195 | 				String objName = map.add(o, Type.ENTITY);
196 | 				
197 | 				// now check for non-instantiations...
198 | 				if (!p.equals(RDF.type.getURI())) {
199 | 					// it is supposed that the map contains only classes
200 | 					// and instances of these classes (see Classes.build)
201 | 					// assume non-instantiated resources are entities
202 | 
203 | 					// domain/range specification
204 | 					if (p.equals(RDFS.domain.getURI())) {
205 | 						subjName = map.add(s, Type.RELATION);
206 | 						// property name, target class, is domain
207 | 						map.addRelClass(subjName, objName, true);
208 | 					}
209 | 					if (p.equals(RDFS.range.getURI())) {
210 | 						subjName = map.add(s, Type.RELATION);
211 | 						// property name, target class, is range
212 | 						map.addRelClass(subjName, objName, false);
213 | 					}
214 | 
215 | 					// if subject or object are not found, it means that they
216 | 					// have not been instantiated earlier (see Classes.build)
217 | 					if (subjName == null)
218 | 						// not found => instance subject, create entity
219 | 						subjName = map.add(s, Type.ENTITY);
220 | 					else {
221 | 						// create entity form for class
222 | 						if (subjName.startsWith(Type.CLASS.toString()))
223 | 							subjName = map.classToEntityForm(subjName);
224 | 						// create stable entity form for properties
225 | 						if (subjName.startsWith(Type.RELATION.toString()))
226 | 							subjName = map.relationToEntityForm(subjName);
227 | 
228 | 					}
229 | 					if (objName == null)
230 | 						// not found => instance/datatype object, create entity
231 | 						objName = map.add(o, Type.ENTITY);
232 | 					else {
233 | 						// create entity form for class
234 | 						if (objName.startsWith(Type.CLASS.toString()))
235 | 							objName = map.classToEntityForm(objName);
236 | 						// create stable entity form for properties
237 | 						if (objName.startsWith(Type.RELATION.toString()))
238 | 							objName = map.relationToEntityForm(objName);
239 | 					}
240 | 
241 | 					// property, subject (entity), object (entity) names
242 | 					map.addRelationship(relName, subjName, objName);
243 | 
244 | 				}
245 | 
246 | 				if (arg0.getObject().isLiteral()) {
247 | 					String dtURI = arg0.getObject().getLiteralDatatypeURI();
248 | 
249 | 					boolean considerString;
250 | 					if (dtURI == null)
251 | 						considerString = true;
252 | 					else
253 | 						considerString = dtURI.equals(XSD.xstring.getURI());
254 | 
255 | 					if (considerString) {
256 | //						ComparableLiteral lit = new ComparableLiteral(arg0
257 | //								.getObject().getLiteral().toString(true), arg0
258 | //								.getObject().getLiteral().getValue().toString());
259 | 						ComparableLiteral lit = new ComparableLiteral(o, o);
260 | 						logger.trace(lit.getVal());
261 | 						setOfStrings.add(lit);
262 | 					}
263 | 					
264 | 					map.addRelationship(relName, subjName, objName);
265 | 				}
266 | 
267 | 			}
268 | 
269 | 		};
270 | 
271 | 		RDFDataMgr.parse(dataStream, BASE + "/model-fwc.nt");
272 | 		
273 | 		return setOfStrings;
274 | 		
275 | 	}
276 | 
277 | 	private static String parse(Node obj) {
278 | 		try {
279 | 			if(obj.isURI())
280 | 				return obj.getURI();
281 | 			if(obj.isLiteral())
282 | 				return obj.getLiteralValue().toString();
283 | 			if(obj.isBlank())
284 | 				return obj.getBlankNodeLabel();
285 | 		} catch(Exception e) {
286 | 			logger.warn("Cannot parse node: "+obj);
287 | 		}
288 | 		return null;
289 | 	}
290 | }
291 | 
292 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/controller/NameMapper.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.controller;
  2 | 
  3 | import java.util.HashMap;
  4 | import java.util.TreeSet;
  5 | 
  6 | import org.apache.logging.log4j.LogManager;
  7 | import org.apache.logging.log4j.Logger;
  8 | 
  9 | import com.hp.hpl.jena.vocabulary.OWL;
 10 | import com.hp.hpl.jena.vocabulary.RDF;
 11 | 
 12 | /**
 13 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 14 |  *
 15 |  */
 16 | public class NameMapper {
 17 | 	
 18 | 	private final static Logger logger = LogManager.getLogger(NameMapper.class);
 19 | 	
 20 | 	private HashMap<String, String> mlnToUri = new HashMap<>();
 21 | 	private HashMap<String, String> uriToMln = new HashMap<>();
 22 | 	
 23 | 	private HashMap<Type, TreeSet<String>> listByType = new HashMap<>();
 24 | 	
 25 | 	private String RDF_TYPE_NAME;
 26 | 	private String OWL_THING_NAME;
 27 | 	private String AIM_NAME;
 28 | 	
 29 | 	public String getOwlThingId() {
 30 | 		return OWL_THING_NAME.substring(ProbKBData.CLS_LENGTH);
 31 | 	}
 32 | 
 33 | 	public String getOwlThingName() {
 34 | 		return OWL_THING_NAME;
 35 | 	}
 36 | 
 37 | 	public TreeSet<String> getEntClasses() {
 38 | 		return entClasses;
 39 | 	}
 40 | 
 41 | 	public TreeSet<String> getRelClasses() {
 42 | 		return relClasses;
 43 | 	}
 44 | 
 45 | 	public TreeSet<String> getRelationships() {
 46 | 		return relationships;
 47 | 	}
 48 | 
 49 | 	// TODO change to HashMap<String, TreeSet<String>>
 50 | 	private TreeSet<String> entClasses = new TreeSet<>();
 51 | 	private TreeSet<String> relClasses = new TreeSet<>();
 52 | 	private TreeSet<String> relationships = new TreeSet<>();
 53 | 	
 54 | 	public enum Type {
 55 | 		CLASS, ENTITY, RELATION;
 56 | 		public String toString() {
 57 | 			return this.name();
 58 | 		}
 59 | 	}
 60 | 	
 61 | 	private HashMap<Type, Integer> count = new HashMap<>();
 62 | 	
 63 | 	private String aimURI;
 64 | 	private int cDelta;
 65 | 	
 66 | 	public NameMapper(String aimURI) {
 67 | 		super();
 68 | 		
 69 | 		for(Type t : Type.values()) {
 70 | 			count.put(t, 1);
 71 | 			listByType.put(t, new TreeSet<>());
 72 | 		}
 73 | 		// for comodity, the first element is always rdf:type
 74 | 		RDF_TYPE_NAME = this.add(RDF.type.getURI(), Type.RELATION);
 75 | 		logger.debug("Alias for rdf:type is " + RDF_TYPE_NAME);
 76 | 		// same for owl:Thing
 77 | 		OWL_THING_NAME = this.add(OWL.Thing.getURI(), Type.CLASS);
 78 | 		logger.debug("Alias for owl:Thing is " + OWL_THING_NAME);
 79 | 		
 80 | 		this.aimURI = aimURI;
 81 | 		if(!aimURI.equals("*")) {
 82 | 			AIM_NAME = this.add(aimURI, Type.RELATION);
 83 | 			logger.debug("Alias for AIM ("+aimURI+") is " + AIM_NAME);
 84 | 		}
 85 | 	}
 86 | 	
 87 | 	/**
 88 | 	 * Add the instantiation of an entity. Duality class-entity: a class with ID=x has an entity counterpart with ID=-x.
 89 | 	 * 
 90 | 	 * @param entName
 91 | 	 * @param className
 92 | 	 */
 93 | 	public void addEntClass(String entName, String className) {
 94 | 		
 95 | 		if(entName.startsWith(Type.CLASS.name()))
 96 | 			entName = classToEntityForm(entName);
 97 | 		if(entName.startsWith(Type.RELATION.name()))
 98 | 			entName = relationToEntityForm(entName);
 99 | 		
100 | 		logger.trace("ENTCLASS: "+entName+", "+className);
101 | 		entClasses.add(entName + "#" + className);
102 | 		entClasses.add(entName + "#" + OWL_THING_NAME);
103 | 		// add an rdf:type relationship
104 | 		this.addRelationship(RDF_TYPE_NAME, entName, Type.ENTITY.toString() + "-" + className.substring(ProbKBData.CLS_LENGTH));
105 | 		// add rdf:type owl:Thing
106 | 		this.addRelationship(RDF_TYPE_NAME, entName, Type.ENTITY.toString() + "-" + OWL_THING_NAME.substring(ProbKBData.CLS_LENGTH));
107 | 	}
108 | 
109 | 	/**
110 | 	 * Add domain or range for a relation.
111 | 	 * 
112 | 	 * @param relName
113 | 	 * @param className
114 | 	 * @param isDomain
115 | 	 */
116 | 	public void addRelClass(String relName, String className, boolean isDomain) {
117 | 		relClasses.add(relName + "#" + className + "#" + isDomain);
118 | 	}
119 | 
120 | 	public void addRelationship(String relName, String name1, String name2) {
121 | 		if(relName.startsWith(Type.ENTITY.toString())) {
122 | 			String before = relName;
123 | 			relName = entityToRelationForm(relName);
124 | 			// some properties had been recognised as entities before
125 | 			String uri = mlnToUri.get(before);
126 | 			mlnToUri.put(relName, uri);
127 | 			uriToMln.put(uri, relName);
128 | 		}
129 | 		relationships.add(relName + "#" + name1 + "#" + name2);
130 | 	}
131 | 
132 | 	public String entityToRelationForm(String relName) {
133 | 		String idr = String.valueOf(Integer.parseInt(relName.substring(ProbKBData.ENT_LENGTH)) + cDelta);
134 | 		logger.trace(relName+" => "+idr);
135 | 		relName = Type.RELATION.toString() + idr;
136 | 		return relName;
137 | 	}
138 | 
139 | 	public String relationToEntityForm(String relName) {
140 | 		String idr = String.valueOf(Integer.parseInt(relName.substring(ProbKBData.REL_LENGTH)) + cDelta);
141 | 		logger.trace(relName+" => "+idr);
142 | 		relName = Type.ENTITY.toString() + idr;
143 | 		return relName;
144 | 	}
145 | 
146 | 	public String classToEntityForm(String className) {
147 | 		return Type.ENTITY.toString() + "-"
148 | 				+ className.substring(ProbKBData.CLS_LENGTH);
149 | 	}
150 | 
151 | 	/**
152 | 	 * Add an URI to the map and return the MLN name.
153 | 	 * 
154 | 	 * @param uri
155 | 	 * @return
156 | 	 */
157 | 	public String add(String uri, Type type) {
158 | 		
159 | 		if(uriToMln.containsKey(uri))
160 | 			return uriToMln.get(uri);
161 | 		
162 | 		String name = type.toString() + count.get(type);
163 | 		mlnToUri.put(name, uri);
164 | 		uriToMln.put(uri, name);
165 | 		listByType.get(type).add(name);
166 | 		increase(type);
167 | 		return name;
168 | 	}
169 | 	
170 | 	public HashMap<String, String> getNamesToURIs() {
171 | 		return mlnToUri;
172 | 	}
173 | 	
174 | 	private void increase(Type type) {
175 | 		count.put(type, count.get(type) + 1);
176 | 	}
177 | 	
178 | 	public String getURI(String name) {
179 | 		return mlnToUri.get(name);
180 | 	}
181 | 
182 | 	public String getName(String uri) {
183 | 		return uriToMln.get(uri);
184 | 	}
185 | 	
186 | 	public boolean containsURI(String name) {
187 | 		return mlnToUri.containsKey(name);
188 | 	}
189 | 
190 | 	public boolean containsName(String uri) {
191 | 		return uriToMln.containsKey(uri);
192 | 	}
193 | 	
194 | 	public TreeSet<String> getNamesByType(Type type) {
195 | 		return listByType.get(type);
196 | 	}
197 | 	
198 | 	public void pretty() {
199 | 		for(String key : mlnToUri.keySet())
200 | 			logger.trace(key + "\t" + mlnToUri.get(key));
201 | 	}
202 | 
203 | 	/**
204 | 	 * Return only the ID (number after the Type) of the class the given entity belongs to. If not found, return the ID of owl:Thing.
205 | 	 * 
206 | 	 * @param entityName
207 | 	 * @return
208 | 	 */
209 | 	public String classIdOf(String entityName) {
210 | 		for(String ec : entClasses)
211 | 			if(ec.startsWith(entityName+"#"))
212 | 				return ec.substring(ProbKBData.CLS_LENGTH);
213 | 		return OWL_THING_NAME.substring(ProbKBData.CLS_LENGTH);
214 | 	}
215 | 
216 | 	public String getAim() {
217 | 		return aimURI;
218 | 	}
219 | 
220 | 	public String getAimName() {
221 | 		return AIM_NAME;
222 | 	}
223 | 
224 | 	public void setCollisionDelta(int cDelta) {
225 | 		logger.debug("Collision delta: "+cDelta);
226 | 		this.cDelta = cDelta;
227 | 	}
228 | 
229 | 	public String toName(String uri) {
230 | 		String name = uriToMln.get(uri);
231 | 		if(name.startsWith(Type.ENTITY.name()))
232 | 			return name;
233 | 		if(name.startsWith(Type.CLASS.name()))
234 | 			return classToEntityForm(name);
235 | 		// relation
236 | 		return relationToEntityForm(name);
237 | 	}
238 | 
239 | 	public static int parse(String string) {
240 | 		String sub = null;
241 | 		if(string.startsWith(Type.CLASS.name()))
242 | 			sub = Type.CLASS.name();
243 | 		if(string.startsWith(Type.ENTITY.name()))
244 | 			sub = Type.ENTITY.name();
245 | 		if(string.startsWith(Type.RELATION.name()))
246 | 			sub = Type.RELATION.name();
247 | 		return Integer.parseInt(string.substring(sub.length()));
248 | 	}
249 | 	
250 | }
251 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/controller/OntoImporter.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.controller;
  2 | 
  3 | import java.io.File;
  4 | import java.io.FileNotFoundException;
  5 | import java.io.FileOutputStream;
  6 | import java.io.IOException;
  7 | import java.net.URL;
  8 | import java.util.TreeSet;
  9 | 
 10 | import org.aksw.mandolin.util.PrettyRandom;
 11 | import org.apache.commons.io.FileUtils;
 12 | import org.apache.jena.riot.Lang;
 13 | import org.apache.jena.riot.RDFDataMgr;
 14 | import org.apache.jena.riot.RiotException;
 15 | import org.apache.jena.riot.system.StreamRDF;
 16 | import org.apache.jena.riot.system.StreamRDFWriter;
 17 | import org.apache.logging.log4j.LogManager;
 18 | import org.apache.logging.log4j.Logger;
 19 | 
 20 | import com.hp.hpl.jena.graph.Triple;
 21 | import com.hp.hpl.jena.rdf.model.Model;
 22 | import com.hp.hpl.jena.rdf.model.ModelFactory;
 23 | import com.hp.hpl.jena.rdf.model.Statement;
 24 | import com.hp.hpl.jena.rdf.model.StmtIterator;
 25 | import com.hp.hpl.jena.sparql.core.Quad;
 26 | import com.hp.hpl.jena.vocabulary.OWL;
 27 | import com.hp.hpl.jena.vocabulary.RDF;
 28 | import com.hp.hpl.jena.vocabulary.RDFS;
 29 | 
 30 | /**
 31 |  * Ontologies cannot be imported using Jena, because most datasets are not
 32 |  * actual OWL files, thus they do not provide meta-information about used and
 33 |  * imported ontologies. For instance, an N-Triples file could be using URIs
 34 |  * which are referred only in the file itself. Phisically visiting these URIs is
 35 |  * a method to retrieve their definition. In this version, we limit the URIs to
 36 |  * classes and properties.
 37 |  * 
 38 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 39 |  * @version 0.0.1
 40 |  *
 41 |  */
 42 | public class OntoImporter {
 43 | 	
 44 | 	private final static Logger logger = LogManager.getLogger(OntoImporter.class);
 45 | 	
 46 | 	private static final Lang[] LANG_ATTEMPTS = {Lang.RDFXML, Lang.TTL, Lang.NT};
 47 | 
 48 | 	/**
 49 | 	 * @param BASE
 50 | 	 * @param paths
 51 | 	 */
 52 | 	public static void run(final String BASE, final String[] paths) {
 53 | 
 54 | 		final CollectionCache properties = new CollectionCache();
 55 | 		final CollectionCache classes = new CollectionCache();
 56 | 
 57 | 		final FileOutputStream output;
 58 | 		try {
 59 | 			output = new FileOutputStream(new File(BASE + "/model-tmp.nt"));
 60 | 		} catch (FileNotFoundException e) {
 61 | 			e.printStackTrace();
 62 | 			return;
 63 | 		}
 64 | 		
 65 | 		final StreamRDF writer = StreamRDFWriter.getWriterStream(output, Lang.NT);
 66 | 
 67 | 		// reader implementation
 68 | 		StreamRDF dataStream = new StreamRDF() {
 69 | 
 70 | 			@Override
 71 | 			public void base(String arg0) {
 72 | 			}
 73 | 
 74 | 			@Override
 75 | 			public void finish() {
 76 | 			}
 77 | 
 78 | 			@Override
 79 | 			public void prefix(String arg0, String arg1) {
 80 | 			}
 81 | 
 82 | 			@Override
 83 | 			public void quad(Quad arg0) {
 84 | 			}
 85 | 
 86 | 			@Override
 87 | 			public void start() {
 88 | 				writer.start();
 89 | 			}
 90 | 
 91 | 			@Override
 92 | 			public void triple(Triple triple) {
 93 | 				String s = triple.getSubject().getURI();
 94 | 				String p = triple.getPredicate().getURI();
 95 | 				String o = triple.getObject().toString();
 96 | 
 97 | 				// if property is rdf:type...
 98 | 				if (p.equals(RDF.type.getURI())) {
 99 | 					// save class
100 | 					// TODO this could be extended to all properties with domain
101 | 					// or range = rdfs:Class
102 | 					classes.set.add(o);
103 | 					// as well as all super-classes of rdfs:Class
104 | 					if(o.equals(RDFS.Class.getURI()) || 
105 | 							o.equals(OWL.Class.getURI()))
106 | 						classes.set.add(s);
107 | 				}
108 | 				// save property
109 | 				properties.set.add(p);
110 | 				
111 | 				// write triple
112 | 				writer.triple(triple);
113 | 
114 | 			}
115 | 
116 | 		};
117 | 		
118 | 		for(String path : paths)
119 | 			RDFDataMgr.parse(dataStream, path);
120 | 		
121 | 		logger.info("# classes collected = "+classes.set.size());
122 | 		logger.info("# properties collected = "+properties.set.size());
123 | 		
124 | 		// ontology importer
125 | 		for(String uri : classes.set) {
126 | 			logger.trace("Crawling <"+uri+">...");
127 | 			Model model = ModelFactory.createDefaultModel();
128 | 			// visit URIs in classes and properties
129 | 			String path = BASE + "/temp-file-" + PrettyRandom.get(6) + "";
130 | 			File file = new File(path);
131 | 			try {
132 | 				FileUtils.copyURLToFile(new URL(uri), file);
133 | 			} catch (IOException e) {
134 | 				logger.warn("Cannot download <"+uri+">.");
135 | 				continue;
136 | 			}
137 | 			logger.trace("Saved to "+path+".");
138 | 			for(Lang lang : LANG_ATTEMPTS) {
139 | 				try {
140 | 					logger.trace("Trying with "+lang);
141 | 					RDFDataMgr.read(model, path, lang);
142 | 					break;
143 | 				} catch (RiotException e) {
144 | 					logger.warn("Cannot interpret <"+uri+"> using "+lang+".");
145 | 				}
146 | 			}
147 | 			logger.trace("# statements: "+model.size());
148 | 			StmtIterator list = model.listStatements();
149 | 			// append NT files to model...
150 | 			while(list.hasNext()) {
151 | 				// save wanted part of RDF files
152 | 				Statement stmt = list.next();
153 | 				
154 | 				logger.trace(stmt);
155 | 				
156 | 				boolean imprt = stmt.getPredicate().getURI().equals(uri);
157 | 				
158 | 				if(!imprt)
159 | 					if(stmt.getSubject().isURIResource())
160 | 						if(stmt.getSubject().getURI().equals(uri))
161 | 							imprt = true;
162 | 				if(!imprt)
163 | 					if(stmt.getObject().isURIResource())
164 | 						if(stmt.getObject().asResource().getURI().equals(uri))
165 | 							imprt = true;
166 | 				
167 | 				if(imprt) {
168 | 					Triple t = stmt.asTriple();
169 | 					logger.trace(t);
170 | 					writer.triple(t);
171 | 				}
172 | 			}
173 | 		}
174 | 		writer.finish();
175 | 
176 | 	}
177 | 
178 | }
179 | 
180 | class CollectionCache {
181 | 	TreeSet<String> set = new TreeSet<>();
182 | }


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/controller/ProbKBData.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.controller;
  2 | 
  3 | import java.io.File;
  4 | import java.io.FileWriter;
  5 | import java.io.IOException;
  6 | import java.util.Arrays;
  7 | import java.util.HashMap;
  8 | import java.util.Iterator;
  9 | 
 10 | import org.aksw.mandolin.controller.NameMapper.Type;
 11 | import org.apache.logging.log4j.LogManager;
 12 | import org.apache.logging.log4j.Logger;
 13 | 
 14 | import com.opencsv.CSVWriter;
 15 | 
 16 | /**
 17 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 18 |  *
 19 |  */
 20 | public class ProbKBData {
 21 | 	
 22 | 	private final static Logger logger = LogManager.getLogger(ProbKBData.class);
 23 | 	
 24 | 	public final static int ENT_LENGTH = Type.ENTITY.name().length();
 25 | 	public final static int CLS_LENGTH = Type.CLASS.name().length();
 26 | 	public final static int REL_LENGTH = Type.RELATION.name().length();
 27 | 	
 28 | 	private static String base;
 29 | 	private static NameMapper map;
 30 | 	
 31 | 	public static void buildCSV(NameMapper theMap, String theBase) throws IOException {
 32 | 		
 33 | 		base = theBase;
 34 | 		map = theMap;
 35 | 		
 36 | 		allNodes();
 37 | 		
 38 | 		entClasses();
 39 | 		relClasses();
 40 | 		relationships();
 41 | 		functionals();
 42 | 		
 43 | 	}
 44 | 	
 45 | 	
 46 | 	private static void functionals() throws IOException {
 47 | 		
 48 | 		CSVWriter writer = new CSVWriter(new FileWriter(new File(base + "/functionals.csv"))); 
 49 | 		
 50 | 		// TODO
 51 | 		
 52 | 		writer.close();
 53 | 
 54 | 	}
 55 | 
 56 | 
 57 | 	private static void allNodes() throws IOException {
 58 | 		
 59 | 		CSVWriter entWriter = new CSVWriter(new FileWriter(new File(base + "/entities.csv"))); 
 60 | 		CSVWriter clsWriter = new CSVWriter(new FileWriter(new File(base + "/classes.csv"))); 
 61 | 		CSVWriter relWriter = new CSVWriter(new FileWriter(new File(base + "/relations.csv"))); 
 62 | 		
 63 | 		HashMap<String, String> hmap = map.getNamesToURIs();
 64 | 		
 65 | 		for(String key : hmap.keySet()) {
 66 | 			String id = "";
 67 | 			if(key.startsWith(Type.ENTITY.name())) {
 68 | 				id = key.substring(ENT_LENGTH);
 69 | 				entWriter.writeNext(new String[] {id, hmap.get(key)});
 70 | 			}
 71 | 			if(key.startsWith(Type.CLASS.name())) {
 72 | 				id = key.substring(CLS_LENGTH);
 73 | 				clsWriter.writeNext(new String[] {id, hmap.get(key)});
 74 | 				entWriter.writeNext(new String[] {"-" + id, hmap.get(key)});
 75 | 			}
 76 | 			if(key.startsWith(Type.RELATION.name())) {
 77 | 				id = key.substring(REL_LENGTH);
 78 | 				relWriter.writeNext(new String[] {id, hmap.get(key)});
 79 | 			}
 80 | 		}
 81 | 		
 82 | 		relWriter.close();
 83 | 		clsWriter.close();
 84 | 		entWriter.close();
 85 | 
 86 | 	}
 87 | 
 88 | 
 89 | 	private static void entClasses() throws IOException {
 90 | 		
 91 | 		CSVWriter writer = new CSVWriter(new FileWriter(new File(base + "/entClasses.csv"))); 
 92 | 		
 93 | 		for(String line : map.getEntClasses()) {
 94 | 			String[] arr = line.split("#");
 95 | 			// entity_id+"|"+class_id
 96 | 			String id1 = arr[0].substring(ENT_LENGTH);
 97 | 			String id2 = arr[1].substring(CLS_LENGTH);
 98 | 			// TODO find a fix for these relationships
 99 | 			try {
100 | 				Integer.parseInt(id1);
101 | 				Integer.parseInt(id2);
102 | 			} catch(NumberFormatException e) {
103 | 				continue;
104 | 			}
105 | 			writer.writeNext(new String[] {id1, id2});
106 | 		}
107 | 		
108 | 		writer.close();
109 | 		
110 | 	}
111 | 	
112 | 	
113 | 	/**
114 | 	 * Domain and range information, as required by ProbKB.
115 | 	 * 
116 | 	 * @throws IOException
117 | 	 */
118 | 	private static void relClasses() throws IOException {
119 | 		
120 | 		HashMap<String, String[]> entries = new HashMap<>();
121 | 		
122 | 		String owlThing = map.getOwlThingId();
123 | 		
124 | 		// set defaults
125 | 		for(String prop : map.getNamesByType(Type.RELATION)) {
126 | 			String rel = prop.substring(REL_LENGTH);
127 | 			entries.put(rel, new String[] {rel, owlThing, owlThing});
128 | 		}
129 | 		
130 | 		for(String line : map.getRelClasses()) {
131 | 			String[] arr = line.split("#");
132 | 			// rel_id+"#"+class_id+"#"+is_domain
133 | 			String rel = arr[0].startsWith(Type.RELATION.name()) ? arr[0].substring(REL_LENGTH) : arr[0].substring(ENT_LENGTH);
134 | 			String cl = arr[1].substring(CLS_LENGTH);
135 | 			Boolean isDomain = Boolean.parseBoolean(arr[2]);
136 | 			
137 | 			String[] obj;
138 | 			if(entries.containsKey(rel))
139 | 				obj = entries.get(rel);
140 | 			else {
141 | 				obj = new String[] {rel, owlThing, owlThing};
142 | 				entries.put(rel, obj);
143 | 			}
144 | 			obj[isDomain ? 1 : 2] = cl;
145 | 			logger.trace((isDomain ? "domain" : "range") + " => " + Arrays.toString(obj));
146 | 			
147 | 		}
148 | 		
149 | 		CSVWriter writer = new CSVWriter(new FileWriter(new File(base + "/relClasses.csv"))); 
150 | 		
151 | 		for(String entry : entries.keySet()) {
152 | 			String[] obj = entries.get(entry);
153 | 			// TODO find a fix for these relationships
154 | 			try {
155 | 				for(String o : obj)
156 | 					Integer.parseInt(o);
157 | 			} catch(NumberFormatException e) {
158 | 				continue;
159 | 			}
160 | 			writer.writeNext(obj);
161 | 		}
162 | 		
163 | 		writer.close();
164 | 		
165 | 	}
166 | 
167 | 	
168 | 	private static void relationships() throws IOException {
169 | 		
170 | 		CSVWriter writer = new CSVWriter(new FileWriter(new File(base + "/relationships.csv"))); 
171 | 		
172 | 		Iterator<String> it = map.getRelationships().iterator();
173 | 		while(it.hasNext()) {
174 | 			String line = it.next();
175 | 			String[] arr = line.split("#");
176 | 			// relation_id+"|"+entity_id+"|"+entity_id
177 | 			String id1 = arr[0].substring(REL_LENGTH);
178 | 			String id2 = arr[1].substring(ENT_LENGTH);
179 | 			String id3 = arr[2].substring(ENT_LENGTH);
180 | 			
181 | //			System.out.println(line);
182 | //			if(arr[0].startsWith(Type.ENTITY.toString())) {
183 | //				id1 = String.valueOf(Integer.parseInt(arr[0].substring(ENT_LENGTH)) + 10000);
184 | //				System.out.println("rel = "+id1);
185 | //			}
186 | 			
187 | 			// TODO find a fix for these relationships
188 | 			try {
189 | 				Integer.parseInt(id1);
190 | 				Integer.parseInt(id2);
191 | 				Integer.parseInt(id3);
192 | 			} catch(NumberFormatException e) {
193 | 				continue;
194 | 			}
195 | 			
196 | 			writer.writeNext(new String[] {id1, id2, id3, "1.0", "http://"});
197 | 		}
198 | 		
199 | 		writer.close();
200 | 		
201 | 	}
202 | 
203 | 
204 | }
205 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/controller/SimilarityJoin.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.controller;
  2 | 
  3 | import java.io.File;
  4 | import java.io.FileNotFoundException;
  5 | import java.io.FileOutputStream;
  6 | import java.io.IOException;
  7 | import java.util.Arrays;
  8 | import java.util.HashMap;
  9 | import java.util.Iterator;
 10 | import java.util.List;
 11 | import java.util.Map.Entry;
 12 | import java.util.TreeSet;
 13 | 
 14 | import jp.ndca.similarity.join.PPJoin;
 15 | import jp.ndca.similarity.join.StringItem;
 16 | import jp.ndca.similarity.join.Tokenizer;
 17 | 
 18 | import org.aksw.mandolin.controller.NameMapper.Type;
 19 | import org.aksw.mandolin.model.Cache;
 20 | import org.aksw.mandolin.model.ComparableLiteral;
 21 | import org.aksw.mandolin.reasoner.PelletReasoner;
 22 | import org.apache.commons.codec.digest.DigestUtils;
 23 | import org.apache.commons.io.FileUtils;
 24 | import org.apache.jena.riot.Lang;
 25 | import org.apache.jena.riot.system.StreamRDF;
 26 | import org.apache.jena.riot.system.StreamRDFWriter;
 27 | import org.apache.logging.log4j.LogManager;
 28 | import org.apache.logging.log4j.Logger;
 29 | 
 30 | import com.hp.hpl.jena.graph.Node;
 31 | import com.hp.hpl.jena.graph.NodeFactory;
 32 | import com.hp.hpl.jena.graph.Triple;
 33 | import com.hp.hpl.jena.vocabulary.OWL;
 34 | import com.hp.hpl.jena.vocabulary.RDF;
 35 | import com.hp.hpl.jena.vocabulary.RDFS;
 36 | 
 37 | /**
 38 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 39 |  *
 40 |  */
 41 | public class SimilarityJoin {
 42 | 
 43 | 	private final static Logger logger = LogManager
 44 | 			.getLogger(SimilarityJoin.class);
 45 | 
 46 | 	public static final String SIMILAR_PREFIX = "http://mandolin.aksw.org/ontology#similar";
 47 | 
 48 | 	public static final String SIMILAR_TO_PREFIX = "http://mandolin.aksw.org/ontology#similarTo";
 49 | 
 50 | 	static HashMap<String, String> hashes = new HashMap<>();
 51 | 
 52 | 	/**
 53 | 	 * This could be left turned off, as the similarity join algorithm already
 54 | 	 * finds all pairs. A mere copy of the file is executed if false.
 55 | 	 */
 56 | 	private static boolean useClosure = false;
 57 | 
 58 | 	public static final String similarCompositePropertyURI(int thr, String uri) {
 59 | 
 60 | 		String s;
 61 | 
 62 | 		if (hashes.containsKey(uri))
 63 | 			s = hashes.get(uri);
 64 | 		else {
 65 | 			s = DigestUtils.sha1Hex(uri);
 66 | 			hashes.put(uri, s);
 67 | 		}
 68 | 
 69 | 		return SIMILAR_PREFIX + thr + "-" + s;
 70 | 	}
 71 | 
 72 | 	public static final String similarToURI(int thr) {
 73 | 		// no such property
 74 | 		if (thr <= 0 || thr >= 100)
 75 | 			return null;
 76 | 		return SIMILAR_TO_PREFIX + thr;
 77 | 	}
 78 | 
 79 | 	public static void build(NameMapper map,
 80 | 			TreeSet<ComparableLiteral> setOfStrings, Cache cache,
 81 | 			final String BASE, final int THR_MIN, final int THR_MAX,
 82 | 			final int THR_STEP) {
 83 | 
 84 | 		PPJoin ppjoin = new PPJoin();
 85 | 		Tokenizer tok = ppjoin.getTokenizer();
 86 | 		HashMap<Integer, ComparableLiteral> dataset = new HashMap<>();
 87 | 
 88 | 		Iterator<ComparableLiteral> it = setOfStrings.iterator();
 89 | 		for (int i = 0; it.hasNext(); i++) {
 90 | 			ComparableLiteral lit = it.next();
 91 | 			String val = lit.getVal();
 92 | 			cache.stringItems.add(new StringItem(tok.tokenize(val, false), i));
 93 | 			dataset.put(i, lit);
 94 | 		}
 95 | 
 96 | 		logger.trace(cache.stringItems.size());
 97 | 		List<StringItem> stringItems = cache.stringItems;
 98 | 
 99 | 		StringItem[] strDatum = stringItems.toArray(new StringItem[stringItems
100 | 				.size()]);
101 | 		Arrays.sort(strDatum);
102 | 
103 | 		ppjoin.setUseSortAtExtractPairs(false);
104 | 
105 | 		// open NT file of similarity joins.
106 | 		final FileOutputStream output;
107 | 		try {
108 | 			output = new FileOutputStream(new File(BASE + "/model-sim.nt"));
109 | 		} catch (FileNotFoundException e) {
110 | 			logger.fatal(e.getMessage());
111 | 			throw new RuntimeException("Cannot open file " + BASE
112 | 					+ "/model-sim.nt of similarity joins.");
113 | 		}
114 | 
115 | 		final StreamRDF writer = StreamRDFWriter.getWriterStream(output,
116 | 				Lang.NT);
117 | 		writer.start();
118 | 
119 | 		int cTBox = 0, cABox = 0;
120 | 
121 | 		for (int thr = THR_MIN; thr <= THR_MAX; thr += THR_STEP) {
122 | 
123 | 			String rel = similarToURI(thr);
124 | 			if (rel == null)
125 | 				continue;
126 | 			if (rel.isEmpty())
127 | 				continue;
128 | 			Node relNode = NodeFactory.createURI(rel);
129 | 
130 | 			writer.triple(new Triple(relNode, RDF.type.asNode(),
131 | 					OWL.SymmetricProperty.asNode()));
132 | 			writer.triple(new Triple(relNode, RDF.type.asNode(),
133 | 					OWL.TransitiveProperty.asNode()));
134 | 			cTBox += 2;
135 | 
136 | 			for (int thrj = THR_MIN; thrj < thr; thrj += THR_STEP) {
137 | 				Triple t = new Triple(relNode, RDFS.subPropertyOf.asNode(),
138 | 						NodeFactory.createURI(similarToURI(thrj)));
139 | 				logger.trace(t);
140 | 				writer.triple(t);
141 | 				cTBox++;
142 | 			}
143 | 
144 | 			List<Entry<StringItem, StringItem>> result = ppjoin.extractPairs(
145 | 					strDatum, thr / 100.0);
146 | 			for (Entry<StringItem, StringItem> entry : result) {
147 | 				ComparableLiteral lit1 = dataset.get(entry.getKey().getId());
148 | 				ComparableLiteral lit2 = dataset.get(entry.getValue().getId());
149 | 				String relName = map.add(rel, Type.RELATION);
150 | 				map.addRelationship(relName, map.getName(lit1.getUri()),
151 | 						map.getName(lit2.getUri()));
152 | 
153 | 				// add similarTo relationship
154 | 				writer.triple(new Triple(NodeFactory.createURI(lit1.getUri()),
155 | 						relNode, NodeFactory.createURI(lit2.getUri())));
156 | 
157 | 				int c = compositeRelations(writer, map, thr, lit1.getUri(),
158 | 						lit2.getUri());
159 | 				cABox += c;
160 | 
161 | 				logger.trace(lit1.getUri() + " <=> " + lit2.getUri());
162 | 				logger.trace(lit1.getVal() + " <=> " + lit2.getVal());
163 | 			}
164 | 
165 | 			cABox += result.size();
166 | 
167 | 		}
168 | 
169 | 		// close NT file
170 | 		writer.finish();
171 | 
172 | 		logger.info("Triples added after similarity join: TBox=" + cTBox
173 | 				+ ", ABox=" + cABox);
174 | 
175 | 		if (useClosure) {
176 | 			// computing closure on similarity joins
177 | 			PelletReasoner.closure(BASE + "/model-sim.nt", BASE
178 | 					+ "/model-sim-fwc.nt");
179 | 		} else {
180 | 			try {
181 | 				FileUtils.copyFile(new File(BASE + "/model-sim.nt"), new File(
182 | 						BASE + "/model-sim-fwc.nt"));
183 | 			} catch (IOException e) {
184 | 				e.printStackTrace();
185 | 			}
186 | 		}
187 | 
188 | 	}
189 | 
190 | 	private static int compositeRelations(StreamRDF writer, NameMapper map,
191 | 			int thr, String wURI, String zURI) {
192 | 
193 | 		String w = map.getName(wURI), z = map.getName(zURI);
194 | 		Node wNode = NodeFactory.createURI(wURI);
195 | 		Node zNode = NodeFactory.createURI(zURI);
196 | 
197 | 		TreeSet<String> rships = map.getRelationships();
198 | 		TreeSet<String> wTree = new TreeSet<>();
199 | 		TreeSet<String> zTree = new TreeSet<>();
200 | 		for (String rship : rships) {
201 | 			String[] rsh = rship.split("#");
202 | 			// w and z can be only in 2nd position, as they are datatypes
203 | 			if (rsh[2].equals(w))
204 | 				wTree.add(rship);
205 | 			if (rsh[2].equals(z))
206 | 				zTree.add(rship);
207 | 		}
208 | 
209 | 		logger.trace("wTree = " + wTree);
210 | 		logger.trace("zTree = " + zTree);
211 | 
212 | 		// forall x : (x, rel, w) . add (x, extRel, z)
213 | 		for (String rship : wTree) {
214 | 			String[] rsh = rship.split("#");
215 | 			String rel = rsh[0], subj = rsh[1];
216 | 
217 | 			String extRelURI = similarCompositePropertyURI(thr, rel);
218 | 			Node extRelNode = NodeFactory.createURI(extRelURI);
219 | 			String extRelName = map.add(extRelURI, Type.RELATION);
220 | 			logger.trace(rel + " => " + extRelURI + " => " + extRelName);
221 | 
222 | 			map.addRelationship(extRelName, subj, z);
223 | 			logger.trace(extRelName + "#" + subj + "#" + z);
224 | 
225 | 			// add composite-relation triple
226 | 			Triple t = new Triple(NodeFactory.createURI(map.getURI(subj)),
227 | 					extRelNode, zNode);
228 | 			logger.trace(t);
229 | 			writer.triple(t);
230 | 
231 | 		}
232 | 
233 | 		// forall y : (y, rel, z) . add (y, extRel, w)
234 | 		for (String rship : zTree) {
235 | 			String[] rsh = rship.split("#");
236 | 			String rel = rsh[0], subj = rsh[1];
237 | 
238 | 			String extRelURI = similarCompositePropertyURI(thr, rel);
239 | 			Node extRelNode = NodeFactory.createURI(extRelURI);
240 | 			String extRelName = map.add(extRelURI, Type.RELATION);
241 | 			logger.trace(rel + " => " + extRelURI + " => " + extRelName);
242 | 
243 | 			map.addRelationship(extRelName, subj, w);
244 | 			logger.trace(extRelName + "#" + subj + "#" + w);
245 | 
246 | 			// add composite-relation triple
247 | 			Triple t = new Triple(NodeFactory.createURI(map.getURI(subj)),
248 | 					extRelNode, wNode);
249 | 			logger.trace(t);
250 | 			writer.triple(t);
251 | 
252 | 		}
253 | 
254 | 		return wTree.size() + zTree.size();
255 | 
256 | 	}
257 | 
258 | }
259 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/controller/Validator.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.controller;
  2 | 
  3 | import java.io.File;
  4 | import java.io.FileNotFoundException;
  5 | import java.io.FileOutputStream;
  6 | 
  7 | import org.apache.jena.riot.Lang;
  8 | import org.apache.jena.riot.RDFDataMgr;
  9 | import org.apache.jena.riot.system.StreamRDF;
 10 | import org.apache.jena.riot.system.StreamRDFWriter;
 11 | import org.apache.logging.log4j.LogManager;
 12 | import org.apache.logging.log4j.Logger;
 13 | 
 14 | import com.hp.hpl.jena.graph.Node;
 15 | import com.hp.hpl.jena.graph.NodeFactory;
 16 | import com.hp.hpl.jena.graph.Triple;
 17 | import com.hp.hpl.jena.sparql.core.Quad;
 18 | import com.hp.hpl.jena.vocabulary.XSD;
 19 | 
 20 | /**
 21 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 22 |  *
 23 |  */
 24 | public class Validator {
 25 | 
 26 | 	private final static Logger logger = LogManager.getLogger(Validator.class);
 27 | 	
 28 | 	/**
 29 | 	 * @param base
 30 | 	 * @param input
 31 | 	 * @param enableFwc
 32 | 	 * @param enableOnt
 33 | 	 */
 34 | 	public static void run(String base, String[] input, boolean enableFwc, boolean enableOnt) {
 35 | 		
 36 | 		String outputFile = enableFwc ? "model.nt" : "model-fwc.nt";
 37 | 		
 38 | 		final FileOutputStream output;
 39 | 		try {
 40 | 			output = new FileOutputStream(new File(base + "/" + outputFile));
 41 | 		} catch (FileNotFoundException e) {
 42 | 			e.printStackTrace();
 43 | 			return;
 44 | 		}
 45 | 		
 46 | 		final StreamRDF writer = StreamRDFWriter.getWriterStream(output, Lang.NT);		
 47 | 		
 48 | 		StreamRDF dataStream = new StreamRDF() {
 49 | 
 50 | 			@Override
 51 | 			public void start() {
 52 | 				writer.start();
 53 | 			}
 54 | 
 55 | 			@Override
 56 | 			public void quad(Quad quad) {
 57 | 			}
 58 | 
 59 | 			@Override
 60 | 			public void base(String base) {
 61 | 			}
 62 | 
 63 | 			@Override
 64 | 			public void prefix(String prefix, String iri) {
 65 | 			}
 66 | 
 67 | 			@Override
 68 | 			public void finish() {
 69 | 				writer.finish();
 70 | 			}
 71 | 			
 72 | 			@Override
 73 | 			public void triple(Triple triple) {
 74 | 				Node node = triple.getObject();
 75 | 				if(node.isLiteral()) {
 76 | 					if(!node.getLiteral().isWellFormed()) {
 77 | 						// known issue: fix gYear literals
 78 | 						if(node.getLiteralDatatypeURI() != null) {
 79 | 							if(node.getLiteralDatatypeURI().equals(XSD.gYear.getURI()) || 
 80 | 									node.getLiteralDatatypeURI().equals(XSD.gYear.getLocalName())) {
 81 | 								Node newNode = NodeFactory.createLiteral(
 82 | 										node.getLiteral().toString().substring(0, 4) + "^^" + XSD.gYear);
 83 | 								triple = new Triple(triple.getSubject(), triple.getPredicate(), 
 84 | 										newNode);
 85 | 								logger.warn("Bad-formed literal: "+node+" - Using: "+newNode);
 86 | 							}
 87 | 						}
 88 | 					}
 89 | 				}
 90 | 				writer.triple(triple);
 91 | 			}
 92 | 			
 93 | 		};
 94 | 		
 95 | 		if(enableOnt) {
 96 | 			String inputFile = base + "/model-tmp.nt";
 97 | 			RDFDataMgr.parse(dataStream, inputFile);
 98 | 			
 99 | 			new File(inputFile).delete();
100 | 		} else {
101 | 			for(String path : input)
102 | 				RDFDataMgr.parse(dataStream, path);
103 | 		}
104 | 		
105 | 		if(!enableFwc)
106 | 			new File(base + "/model.nt").delete();
107 | 	}
108 | 	
109 | 	
110 | 	@SuppressWarnings("unused")
111 | 	private static void validate(String in, String out) {
112 | 		
113 | 		final FileOutputStream output;
114 | 		try {
115 | 			output = new FileOutputStream(new File(out));
116 | 		} catch (FileNotFoundException e) {
117 | 			e.printStackTrace();
118 | 			return;
119 | 		}
120 | 		
121 | 		final StreamRDF writer = StreamRDFWriter.getWriterStream(output, Lang.NT);		
122 | 		
123 | 		StreamRDF dataStream = new StreamRDF() {
124 | 
125 | 			@Override
126 | 			public void start() {
127 | 				writer.start();
128 | 			}
129 | 
130 | 			@Override
131 | 			public void quad(Quad quad) {
132 | 			}
133 | 
134 | 			@Override
135 | 			public void base(String base) {
136 | 			}
137 | 
138 | 			@Override
139 | 			public void prefix(String prefix, String iri) {
140 | 			}
141 | 
142 | 			@Override
143 | 			public void finish() {
144 | 				writer.finish();
145 | 			}
146 | 			
147 | 			@Override
148 | 			public void triple(Triple triple) {
149 | 				Node node = triple.getObject();
150 | 				if(node.isLiteral()) {
151 | 					if(!node.getLiteral().isWellFormed()) {
152 | 						// known issue: fix gYear literals
153 | 						if(node.getLiteralDatatypeURI() != null) {
154 | 							if(node.getLiteralDatatypeURI().equals(XSD.gYear.getURI()) || 
155 | 									node.getLiteralDatatypeURI().equals(XSD.gYear.getLocalName())) {
156 | 								Node newNode = NodeFactory.createLiteral(
157 | 										node.getLiteral().toString().substring(0, 4) + "^^" + XSD.gYear);
158 | 								triple = new Triple(triple.getSubject(), triple.getPredicate(), 
159 | 										newNode);
160 | //								System.out.println("Bad-formed literal: "+node+" - Using: "+newNode);
161 | 							}
162 | 						}
163 | 					}
164 | 				}
165 | 				writer.triple(triple);
166 | 			}
167 | 			
168 | 		};
169 | 
170 | 		RDFDataMgr.parse(dataStream, in);
171 | 		
172 | 	}
173 | 	
174 | 
175 | }
176 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/eval/Dataset.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin.eval;
 2 | 
 3 | /**
 4 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 5 |  *
 6 |  */
 7 | enum Dataset {
 8 | 	
 9 | 	FB15K("benchmark/fb15k/freebase_mtr100_mte100-test.nt", "fb15k_"),
10 | 	WN18("benchmark/wn18/wordnet-mlj12-test.nt", "wn18_");
11 | 	
12 | 	String ref, prefix;
13 | 	
14 | 	Dataset(String ref, String prefix) {
15 | 		this.ref = ref;
16 | 		this.prefix = prefix;
17 | 	}
18 | 	
19 | }


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/eval/FMeasureEvaluation.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.eval;
  2 | 
  3 | import java.util.TreeSet;
  4 | 
  5 | import org.apache.jena.riot.RDFDataMgr;
  6 | import org.apache.jena.riot.system.StreamRDF;
  7 | 
  8 | import com.hp.hpl.jena.graph.Triple;
  9 | import com.hp.hpl.jena.sparql.core.Quad;
 10 | 
 11 | /**
 12 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 13 |  *
 14 |  */
 15 | public class FMeasureEvaluation {
 16 | 	
 17 | 	
 18 | 	private String psetPath, hsetPath;
 19 | 	
 20 | 	private int tp, fp, fn;
 21 | 	private double pre, rec, f1;
 22 | 
 23 | 	public FMeasureEvaluation(String psetPath, String hsetPath) {
 24 | 		super();
 25 | 		this.psetPath = psetPath;
 26 | 		this.hsetPath = hsetPath;
 27 | 	}
 28 | 	
 29 | 	public void run() {
 30 | 		
 31 | 		System.out.println("Running evaluation on set "+psetPath+" against set "+hsetPath);
 32 | 		
 33 | 		TreeSet<String> spoP = read(psetPath);
 34 | 		TreeSet<String> spoH = read(hsetPath);
 35 | 		
 36 | 		System.out.println("Predicted");
 37 | 		for(String s : spoP)
 38 | 			System.out.println("\t"+s);
 39 | 		System.out.println("Hidden");
 40 | 		for(String s : spoH)
 41 | 			System.out.println("\t"+s);
 42 | 		
 43 | 		TreeSet<String> fpSet = new TreeSet<>(spoP);
 44 | 		fpSet.removeAll(spoH);
 45 | 		fp = fpSet.size();
 46 | 		
 47 | 		TreeSet<String> fnSet = new TreeSet<>(spoH);
 48 | 		fnSet.removeAll(spoP);
 49 | 		fn = fnSet.size();
 50 | 		
 51 | 		tp = spoP.size() - fp;
 52 | 		
 53 | 		System.out.println("TP = "+tp+"; FP = "+fp+"; FN = "+fn);
 54 | 		
 55 | 		pre = (tp + fp) == 0 ? 0d : (double) tp / (tp + fp);
 56 | 		rec = (tp + fn) == 0 ? 0d : (double) tp / (tp + fn);
 57 | 		f1 = (pre + rec) == 0d ? 0d : 2 * pre * rec / (pre + rec);
 58 | 		
 59 | 		System.out.println("F1 = "+f1+"; Pre = "+pre+"; Rec = "+rec);
 60 | 		
 61 | 	}
 62 | 
 63 | 	private TreeSet<String> read(String path) {
 64 | 		
 65 | 		TreeSet<String> spo = new TreeSet<>();
 66 | 		
 67 | 		StreamRDF dataStream = new StreamRDF() {
 68 | 
 69 | 			@Override
 70 | 			public void start() {
 71 | 			}
 72 | 
 73 | 			@Override
 74 | 			public void triple(Triple triple) {
 75 | 				spo.add(triple.getSubject().getURI()+" "+
 76 | 						triple.getPredicate().getURI()+" "+
 77 | 						triple.getObject().toString());
 78 | 			}
 79 | 
 80 | 			@Override
 81 | 			public void quad(Quad quad) {
 82 | 			}
 83 | 
 84 | 			@Override
 85 | 			public void base(String base) {
 86 | 			}
 87 | 
 88 | 			@Override
 89 | 			public void prefix(String prefix, String iri) {
 90 | 			}
 91 | 
 92 | 			@Override
 93 | 			public void finish() {
 94 | 			}
 95 | 			
 96 | 		};
 97 | 		
 98 | 		RDFDataMgr.parse(dataStream, path);
 99 | 		
100 | 		return spo;
101 | 	}
102 | 	
103 | 	public String getPsetPath() {
104 | 		return psetPath;
105 | 	}
106 | 
107 | 	public String getHsetPath() {
108 | 		return hsetPath;
109 | 	}
110 | 
111 | 	public int getTp() {
112 | 		return tp;
113 | 	}
114 | 
115 | 	public int getFp() {
116 | 		return fp;
117 | 	}
118 | 
119 | 	public int getFn() {
120 | 		return fn;
121 | 	}
122 | 
123 | 	public double getPre() {
124 | 		return pre;
125 | 	}
126 | 
127 | 	public double getRec() {
128 | 		return rec;
129 | 	}
130 | 
131 | 	public double getF1() {
132 | 		return f1;
133 | 	}
134 | 
135 | 	public static void main(String[] args) {
136 | 		new FMeasureEvaluation("/Users/tom/PhD/srl/Mandolin/eval/0002/cv/run0/output_1.0.nt", 
137 | 				"/Users/tom/PhD/srl/Mandolin/eval/0002/cv/partitions/0.nt").run();
138 | 	}
139 | 
140 | }
141 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/eval/LinkPredictionEvaluation.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin.eval;
 2 | 
 3 | import java.io.IOException;
 4 | import java.util.ArrayList;
 5 | 
 6 | /**
 7 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 8 |  *
 9 |  */
10 | public class LinkPredictionEvaluation {
11 | 	
12 | 	/**
13 | 	 * Dataset = First parameter. 
14 | 	 */
15 | 	static Dataset DATASET;
16 | 	
17 | 	/**
18 | 	 * Experiment code = Second parameter.
19 | 	 */
20 | 	static String EXP_CODE;
21 | 	
22 | 	/**
23 | 	 * @param args
24 | 	 * @throws IOException
25 | 	 */
26 | 	public static void main(String[] args) throws IOException {
27 | 		
28 | 		if(args.length > 0) {
29 | 			DATASET = Dataset.valueOf(args[0]);
30 | 			EXP_CODE = args[1];
31 | 		} else {
32 | 			// demo values
33 | 			DATASET = Dataset.FB15K;
34 | 			EXP_CODE = "09_?m_v";
35 | 		}
36 | 		
37 | 		final String REF = DATASET.ref;
38 | 		final String BASE = "eval/" + DATASET.prefix + EXP_CODE;
39 | 		
40 | 		ArrayList<Double> meanranks = new ArrayList<>();
41 | 		
42 | 		for(int i=1; i<=5; i++) {
43 | 			
44 | 			String testSet = REF;
45 | 			String output = BASE.replace("?", String.valueOf(i));
46 | 			
47 | 			MeanRankCalc mr = new MeanRankCalc(testSet, output);
48 | 			mr.setMinThr(0);
49 | 			mr.partitionData();
50 | 			meanranks.add(mr.start());
51 | 			
52 | 		}
53 | 		
54 | 		System.out.println("\nmeanranks = " + meanranks);
55 | 
56 | 	}
57 | 
58 | }
59 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/eval/MeanRankCalc.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.eval;
  2 | 
  3 | import java.io.File;
  4 | import java.io.FileNotFoundException;
  5 | import java.io.IOException;
  6 | import java.io.PrintWriter;
  7 | import java.text.DecimalFormat;
  8 | import java.util.ArrayList;
  9 | import java.util.Scanner;
 10 | import java.util.TreeSet;
 11 | 
 12 | import org.apache.commons.io.FileUtils;
 13 | import org.apache.jena.riot.RDFDataMgr;
 14 | import org.apache.jena.riot.system.StreamRDF;
 15 | 
 16 | import com.hp.hpl.jena.graph.Triple;
 17 | import com.hp.hpl.jena.rdf.model.Model;
 18 | import com.hp.hpl.jena.rdf.model.Property;
 19 | import com.hp.hpl.jena.rdf.model.RDFNode;
 20 | import com.hp.hpl.jena.rdf.model.Resource;
 21 | import com.hp.hpl.jena.rdf.model.ResourceFactory;
 22 | import com.hp.hpl.jena.sparql.core.Quad;
 23 | import com.hp.hpl.jena.util.iterator.ExtendedIterator;
 24 | 
 25 | /**
 26 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 27 |  *
 28 |  */
 29 | public class MeanRankCalc {
 30 | 	
 31 | 	private PrintWriter pw; 
 32 | 	
 33 | 	private String testSet, mandolinOut; 
 34 | 	
 35 | 	private int minThr = 1;
 36 | 
 37 | 	public int getMinThr() {
 38 | 		return minThr;
 39 | 	}
 40 | 
 41 | 	public void setMinThr(int minThr) {
 42 | 		this.minThr = minThr;
 43 | 	}
 44 | 
 45 | 	public static void main(String[] args) throws IOException {
 46 | 		
 47 | 		String testSet = args[0];
 48 | 		String mandolinOut = args[1];
 49 | 		
 50 | 		MeanRankCalc mr = new MeanRankCalc(testSet, mandolinOut);
 51 | 	
 52 | 		mr.partitionData();
 53 | 		mr.start();
 54 | 		
 55 | 	}
 56 | 	
 57 | 	public MeanRankCalc(String testSet, String mandolinOut) {
 58 | 		super();
 59 | 		this.testSet = testSet;
 60 | 		this.mandolinOut = mandolinOut;
 61 | 	}
 62 | 	
 63 | 	public double start() throws FileNotFoundException {
 64 | 		
 65 | 		pw = new PrintWriter(new File(mandolinOut + "/evaluation.csv"));
 66 | 		
 67 | 		Scanner in = new Scanner(new File(mandolinOut + "/entities.csv"));
 68 | 		int entities = 0;
 69 | 		while(in.hasNextLine()) {
 70 | 			in.nextLine();
 71 | 			entities++;
 72 | 		}
 73 | 		in.close();
 74 | 		
 75 | 		final Integer MEDIAN_RANK = entities / 2;
 76 | 		System.out.println("Median rank = "+MEDIAN_RANK);
 77 | 		
 78 | 		DecimalFormat df = new DecimalFormat("0.0");
 79 | 		
 80 | 		// load N=max-min+1 models in descending order
 81 | 		final Model[] m = new Model[10 - minThr + 1];
 82 | 		for(int i=m.length; i>=1; i--) {
 83 | 			String thr = String.valueOf(df.format((double) (i+minThr-1) / 10.0));
 84 | 			String discovered = mandolinOut + "/ranked_" + thr + ".nt";
 85 | 			System.out.println("Loading model "+i+"...");
 86 | 			m[m.length-i] = RDFDataMgr.loadModel(discovered);
 87 | 		}
 88 | 		
 89 | 		final ArrayList<Integer> ranks = new ArrayList<>();
 90 | 		
 91 | 		final MRCache cache = new MRCache();
 92 | 		
 93 | 		StreamRDF dataStream = new StreamRDF() {
 94 | 
 95 | 			@Override
 96 | 			public void start() {				
 97 | 			}
 98 | 
 99 | 			private Integer check(Model mdl, MRCache cache, Triple triple, boolean forward) {
100 | 				
101 | 				Resource s = ResourceFactory.createResource(triple.getSubject()
102 | 						.getURI());
103 | 				Property p = ResourceFactory.createProperty(triple
104 | 						.getPredicate().getURI());
105 | 				Resource o = ResourceFactory.createResource(triple.getObject()
106 | 						.getURI());
107 | 
108 | 				ExtendedIterator<? extends RDFNode> it = forward ? mdl
109 | 						.listObjectsOfProperty(s, p) : mdl
110 | 						.listSubjectsWithProperty(p, o);
111 | 				// initialize count
112 | 				int y = 0;
113 | 				while (it.hasNext()) {
114 | 					
115 | 					Resource res = it.next().asResource();
116 | 					String uri = forward ? triple.getObject().getURI() : triple
117 | 							.getSubject().getURI();
118 | 					// if triple is found
119 | 					if (res.getURI().equals(uri)) {
120 | 						// rank[triple] = x + 1
121 | 						int rank = cache.x+1;
122 | 						System.out.println(triple+"  >>>  "+rank);
123 | 						ranks.add(cache.x + 1);
124 | 						// next triple
125 | 						return rank;
126 | 					}
127 | 					y++;
128 | 				}
129 | 				// add up to rank value
130 | 				cache.x += y;
131 | 
132 | 				return null;
133 | 			}
134 | 
135 | 			@Override
136 | 			public void triple(Triple triple) {
137 | 				// initialize rank value
138 | 				cache.x = 0;
139 | 				
140 | 				// for each model
141 | 				for(int i=0; i<m.length; i++) {
142 | 					
143 | 					Model model = m[i];
144 | 					
145 | 					Integer rank = check(model, cache, triple, true);
146 | 					if(rank != null) {
147 | 						System.out.println("\ttriple found in model #"+(m.length-i));
148 | 						if(rank <= 1)
149 | 							cache.hitsAt1++;
150 | 						if(rank <= 3)
151 | 							cache.hitsAt3++;
152 | 						if(rank <= 10)
153 | 							cache.hitsAt10++;
154 | 						return;
155 | 					}
156 | 			
157 | 				}
158 | 				
159 | 				// median rank
160 | 				ranks.add(MEDIAN_RANK);
161 | 				System.out.println(triple+"  >>>  (median)");
162 | 			}
163 | 
164 | 			@Override
165 | 			public void quad(Quad quad) {
166 | 			}
167 | 
168 | 			@Override
169 | 			public void base(String base) {
170 | 			}
171 | 
172 | 			@Override
173 | 			public void prefix(String prefix, String iri) {
174 | 			}
175 | 
176 | 			@Override
177 | 			public void finish() {
178 | 			}
179 | 			
180 | 		};
181 | 		
182 | 		// stream test set
183 | 		RDFDataMgr.parse(dataStream, testSet);
184 | 		
185 | 		System.out.println("\n=== FILTERED SETTING === "+mandolinOut.substring(mandolinOut.lastIndexOf('/')+1));
186 | 		// compute mean rank
187 | 		int sum = 0, sumR = 0;
188 | 		System.out.println(ranks);
189 | 		for(Integer i : ranks) {
190 | 			sum += i;
191 | 			sumR += 1.0 / i;
192 | 		}
193 | 		
194 | 		double mr = (double) sum / (double) ranks.size();
195 | 		double mrr = (double) sumR / (double) ranks.size();
196 | 		System.out.println("\nMeanRank = "+mr);
197 | 		System.out.println("\nMRR = "+mrr);
198 | 		
199 | 		double h1 = (double) cache.hitsAt1 * 100 / (double) ranks.size();
200 | 		double h3 = (double) cache.hitsAt3 * 100 / (double) ranks.size();
201 | 		double h10 = (double) cache.hitsAt10 * 100 / (double) ranks.size();
202 | 		System.out.println("\nHits@1  = "+h1);
203 | 		System.out.println("Hits@3  = "+h3);
204 | 		System.out.println("Hits@10 = "+h10);
205 | 		
206 | 		pw.println(mandolinOut + "," + mrr + "," + h1 + "," + h3 + "," + h10);
207 | 		pw.close();
208 | 		
209 | 		return mr;
210 | 	}
211 | 
212 | 	public void partitionData() throws IOException {
213 | 		
214 | 		System.out.println("Partitioning data...");
215 | 		
216 | 		DecimalFormat df = new DecimalFormat("0.0");
217 | 		for (int i = minThr; i <= 9; i++) {
218 | 			String thrA = String.valueOf(df.format((double) i / 10.0));
219 | 			String thrB = String.valueOf(df.format((double) (i+1) / 10.0));
220 | 			System.out.println(thrA+","+thrB);
221 | 			String outA = mandolinOut + "/discovered_" + thrA + ".nt";
222 | 			String outB = mandolinOut + "/discovered_" + thrB + ".nt";
223 | 			String ranked = mandolinOut + "/ranked_" + thrA + ".nt";
224 | 			
225 | 			if(new File(ranked).exists()) {
226 | 				System.out.println("Partitions exist. Skipping...");
227 | 				return;
228 | 			}
229 | 
230 | 			Scanner inB = new Scanner(new File(outB));
231 | 			TreeSet<String> indexB = new TreeSet<>();
232 | 			while(inB.hasNextLine())
233 | 				indexB.add(inB.nextLine());
234 | 			inB.close();
235 | 
236 | 			Scanner inA = new Scanner(new File(outA));
237 | 			PrintWriter pw = new PrintWriter(new File(ranked));
238 | 			while(inA.hasNextLine()) {
239 | 				String line = inA.nextLine();
240 | 				if(!indexB.contains(line))
241 | 					pw.println(line);
242 | 			}
243 | 			pw.close();
244 | 			inA.close();
245 | 		}
246 | 		FileUtils.copyFile(new File(mandolinOut + "/discovered_1.0.nt"), new File(mandolinOut + "/ranked_1.0.nt"));
247 | 
248 | 
249 | 	}
250 | 
251 | }
252 | 
253 | class MRCache {
254 | 	int x;
255 | 	int hitsAt1 = 0;
256 | 	int hitsAt3 = 0;
257 | 	int hitsAt10 = 0;
258 | }
259 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/grounding/Grounding.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.grounding;
  2 | 
  3 | import java.io.File;
  4 | import java.io.FileNotFoundException;
  5 | import java.io.PrintWriter;
  6 | import java.util.Scanner;
  7 | 
  8 | import org.aksw.mandolin.util.Bundle;
  9 | import org.aksw.mandolin.util.Shell;
 10 | import org.apache.logging.log4j.LogManager;
 11 | import org.apache.logging.log4j.Logger;
 12 | 
 13 | /**
 14 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 15 |  *
 16 |  */
 17 | public class Grounding {
 18 | 
 19 | 	private final static Logger logger = LogManager.getLogger(Grounding.class);
 20 | 	
 21 | 	public static void ground(String base) throws FileNotFoundException {
 22 | 		// prepare SQL files
 23 | 		prepare(base);
 24 | 		// generate tables and procedures
 25 | 		generate(base);
 26 | 		// run scripts for grounding
 27 | 		run();
 28 | 	}
 29 | 
 30 | 	private static void run() {
 31 | 		
 32 | 		logger.info("Grounding...");
 33 | 		
 34 | 		String[] cmd = {
 35 | 		// Drop schema
 36 | 		Bundle.getString("pgsql_home") + "/bin/psql probkb -h "
 37 | 				+ Bundle.getString("pgsql_url") + " -p 5432 -f "
 38 | 				+ System.getProperty("user.dir") + "/pgsql/sql/run.sql", };
 39 | 		for (String c : cmd) {
 40 | 			logger.debug("> " + c);
 41 | 			Shell.execute(c, true);
 42 | 		}
 43 | 	}
 44 | 
 45 | 	private static void generate(String base) {
 46 | 		String[] cmd = {
 47 | 				// Drop schema
 48 | 				Bundle.getString("pgsql_home") + "/bin/psql probkb -h "
 49 | 						+ Bundle.getString("pgsql_url") + " -p 5432 -f "
 50 | 						+ System.getProperty("user.dir")
 51 | 						+ "/pgsql/sql/drop.sql",
 52 | //				// Create db
 53 | //				Bundle.getString("pgsql_home") + "/bin/createdb probkb -h "
 54 | //						+ Bundle.getString("pgsql_url") + " -p 5432",
 55 | 				// Create the probkb schema and tables.
 56 | 				Bundle.getString("pgsql_home") + "/bin/psql probkb -h "
 57 | 						+ Bundle.getString("pgsql_url") + " -p 5432 -f "
 58 | 						+ System.getProperty("user.dir")
 59 | 						+ "/pgsql/sql/create.sql",
 60 | 				// Create quality control procedures.
 61 | 				Bundle.getString("pgsql_home") + "/bin/psql probkb -h "
 62 | 						+ Bundle.getString("pgsql_url") + " -p 5432 -f "
 63 | 						+ System.getProperty("user.dir") + "/pgsql/sql/qc.sql",
 64 | 				// Load the files in CSV format.
 65 | 				Bundle.getString("pgsql_home") + "/bin/psql probkb -h "
 66 | 						+ Bundle.getString("pgsql_url") + " -p 5432 -f "
 67 | 						+ System.getProperty("user.dir") + "/" + base
 68 | 						+ "/load.sql",
 69 | 				// Create grounding procedures.
 70 | 				Bundle.getString("pgsql_home") + "/bin/psql probkb -h "
 71 | 						+ Bundle.getString("pgsql_url") + " -p 5432 -f "
 72 | 						+ System.getProperty("user.dir")
 73 | 						+ "/pgsql/sql/ground.sql" };
 74 | 		for (String c : cmd) {
 75 | 			logger.debug("> " + c);
 76 | 			Shell.execute(c, true);
 77 | 		}
 78 | 	}
 79 | 
 80 | 	private static void prepare(String base) throws FileNotFoundException {
 81 | 
 82 | 		PrintWriter load = new PrintWriter(new File(base + "/load.sql"));
 83 | 
 84 | 		// write head
 85 | 		write("pgsql/sql/load-head.sql", load);
 86 | 
 87 | 		// write graph tables
 88 | 		String[] tables = { "classes", "entities", "relations", "entClasses",
 89 | 				"relClasses", "functionals", "extractions", };
 90 | 		// due to a stylistic choice from ProbKB, table `extractions`
 91 | 		// corresponds to file `relationships.csv`
 92 | 		String[] csv = { "classes", "entities", "relations", "entClasses",
 93 | 				"relClasses", "functionals", "relationships", };
 94 | 		for (int i = 0; i < tables.length; i++)
 95 | 			load.write("COPY probkb." + tables[i] + " FROM '"
 96 | 					+ System.getProperty("user.dir") + "/" + base + "/"
 97 | 					+ csv[i] + ".csv' DELIMITERS ',' CSV;\n");
 98 | 
 99 | 		// write body
100 | 		write("pgsql/sql/load-body.sql", load);
101 | 
102 | 		// write MLN tables
103 | 		for (int i = 1; i <= 6; i++)
104 | 			load.write("COPY probkb.mln" + i + " FROM '"
105 | 					+ System.getProperty("user.dir") + "/" + base + "/mln" + i
106 | 					+ ".csv' DELIMITERS ',' CSV;\n");
107 | 
108 | 		// write tail
109 | 		write("pgsql/sql/load-tail.sql", load);
110 | 		load.close();
111 | 
112 | 	}
113 | 
114 | 	private static void write(String filename, PrintWriter pw)
115 | 			throws FileNotFoundException {
116 | 		Scanner in = new Scanner(new File(filename));
117 | 		while (in.hasNextLine())
118 | 			pw.write(in.nextLine() + "\n");
119 | 		in.close();
120 | 	}
121 | 
122 | //	public static void main(String[] args) throws FileNotFoundException {
123 | //
124 | //		ground("eval/0001");
125 | //
126 | //	}
127 | 
128 | }
129 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/inference/Factors.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.inference;
  2 | 
  3 | import java.sql.ResultSet;
  4 | import java.sql.SQLException;
  5 | import java.util.ArrayList;
  6 | import java.util.Collection;
  7 | 
  8 | import org.aksw.mandolin.controller.ProbKBData;
  9 | import org.aksw.mandolin.controller.NameMapper.Type;
 10 | import org.apache.logging.log4j.LogManager;
 11 | import org.apache.logging.log4j.Logger;
 12 | 
 13 | import com.googlecode.rockit.app.solver.pojo.Clause;
 14 | import com.googlecode.rockit.app.solver.pojo.Literal;
 15 | import com.googlecode.rockit.javaAPI.HerbrandUniverse;
 16 | 
 17 | /**
 18 |  * The "factors" singleton makes the three collections needed by the RockIt
 19 |  * inference out of ProbKB output.
 20 |  * 
 21 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 22 |  *
 23 |  */
 24 | public class Factors {
 25 | 
 26 | 	private final static Logger logger = LogManager.getLogger(Factors.class);
 27 | 	
 28 | 	private static Factors instance = null;
 29 | 
 30 | 	private ArrayList<String> consistentStartingPoints;
 31 | 	private ArrayList<Clause> clauses;
 32 | 	private Collection<Literal> evidence;
 33 | 
 34 | 	private PostgreDB db;
 35 | 
 36 | 	private static HerbrandUniverse u = HerbrandUniverse.getInstance();
 37 | 
 38 | 	protected Factors() {
 39 | 		super();
 40 | 	}
 41 | 
 42 | 	public static Factors getInstance() {
 43 | 		if (instance == null)
 44 | 			instance = new Factors();
 45 | 		return instance;
 46 | 	}
 47 | 
 48 | 	/**
 49 | 	 * Preprocess factors from ProbKB for RockIt.
 50 | 	 * 
 51 | 	 * @param aimName
 52 | 	 */
 53 | 	public void preprocess(String aimName) {
 54 | 
 55 | 		db = new PostgreDB();
 56 | 		db.connect();
 57 | 
 58 | 		buildClauses();
 59 | 		buildEvidence(aimName);
 60 | 
 61 | 		db.close();
 62 | 	}
 63 | 
 64 | 	private void buildEvidence(String aimName) {
 65 | 		
 66 | 		evidence = new ArrayList<>();
 67 | 		consistentStartingPoints = new ArrayList<>();
 68 | 
 69 | 		ResultSet rs;
 70 | 		if(aimName == null) {
 71 | 			rs = db.evidence();
 72 | 		} else {
 73 | 			int aimNumber = Integer.parseInt(aimName
 74 | 				.substring(ProbKBData.REL_LENGTH));
 75 | 			rs = db.evidence(aimNumber);
 76 | 		}
 77 | 		
 78 | 		try {
 79 | 			while (rs.next()) {
 80 | 				String a1 = u.getKey(Type.ENTITY.name() + rs.getInt("ent1"));
 81 | 				String b1 = u.getKey(Type.ENTITY.name() + rs.getInt("ent2"));
 82 | 				String r = Type.RELATION.name() + rs.getInt("rel");
 83 | //				String string = aimName + "|" + a1 + "|" + b1;
 84 | 				String string = r + "|" + a1 + "|" + b1;
 85 | 				// As the Semantic Web deals only with true statements,
 86 | 				// all literals are set to true and belong to the starting
 87 | 				// points.
 88 | 				consistentStartingPoints.add(string);
 89 | 				evidence.add(new Literal(string, true));
 90 | 			}
 91 | 		} catch (SQLException e) {
 92 | 			e.printStackTrace();
 93 | 		}
 94 | 
 95 | 		logger.info("EVIDENCE SIZE = "+evidence.size());
 96 | //		logger.debug("EVIDENCE");
 97 | //		for (Literal l : evidence)
 98 | //			logger.debug(l);
 99 | 
100 | 	}
101 | 
102 | 	private void buildClauses() {
103 | 
104 | 		clauses = new ArrayList<>();
105 | 
106 | 		for (int i = 1; i <= 3; i++) {
107 | 			
108 | 			logger.debug("Selecting type "+i+" factors...");
109 | 			ResultSet rs = db.factors(i);
110 | 			try {
111 | 				while (rs.next()) {
112 | 
113 | 					ArrayList<Literal> lit = new ArrayList<>();
114 | 					boolean positive = true;
115 | 
116 | 					// first restriction
117 | 					String r1 = Type.RELATION.name() + rs.getInt("r1");
118 | 					String a1 = u.getKey(Type.ENTITY.name() + rs.getInt("a1"));
119 | 					String b1 = u.getKey(Type.ENTITY.name() + rs.getInt("b1"));
120 | 					lit.add(new Literal(r1 + "|" + a1 + "|" + b1, positive));
121 | 
122 | 					if (i >= 2) {
123 | 						// second restriction
124 | 						String r2 = Type.RELATION.name() + rs.getInt("r2");
125 | 						String a2 = u.getKey(Type.ENTITY.name() + rs.getInt("a2"));
126 | 						String b2 = u.getKey(Type.ENTITY.name() + rs.getInt("b2"));
127 | 						lit.add(new Literal(r2 + "|" + a2 + "|" + b2, positive));
128 | 
129 | 						if (i >= 3) {
130 | 							// third restriction
131 | 							String r3 = Type.RELATION.name() + rs.getInt("r3");
132 | 							String a3 = u.getKey(Type.ENTITY.name() + rs.getInt("a3"));
133 | 							String b3 = u.getKey(Type.ENTITY.name() + rs.getInt("b3"));
134 | 							lit.add(new Literal(r3 + "|" + a3 + "|" + b3,
135 | 									positive));
136 | 						}
137 | 					}
138 | 
139 | 					// XXX Since there is a weight, its value is finite
140 | 					// (hard=false).
141 | 					boolean hard = false;
142 | 					
143 | 					Clause clause = new Clause(rs.getDouble("weight"), lit, hard);
144 | 					clauses.add(clause);
145 | 					logger.trace(clause);
146 | 
147 | 				}
148 | 			} catch (SQLException e) {
149 | 				e.printStackTrace();
150 | 			}
151 | 			
152 | 			logger.debug(clauses.size() + " clauses collected until type "+i+".");
153 | 		}
154 | 
155 | 		logger.info(clauses.size() + " clauses collected.");
156 | 
157 | 	}
158 | 
159 | 	public ArrayList<String> getConsistentStartingPoints() {
160 | 		return consistentStartingPoints;
161 | 	}
162 | 
163 | 	public ArrayList<Clause> getClauses() {
164 | 		return clauses;
165 | 	}
166 | 
167 | 	public Collection<Literal> getEvidence() {
168 | 		return evidence;
169 | 	}
170 | 
171 | }
172 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/inference/PostgreDB.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.inference;
  2 | 
  3 | import java.sql.Connection;
  4 | import java.sql.DriverManager;
  5 | import java.sql.ResultSet;
  6 | import java.sql.SQLException;
  7 | import java.sql.Statement;
  8 | 
  9 | import org.aksw.mandolin.util.Bundle;
 10 | import org.aksw.mandolin.util.PostgreNotStartedException;
 11 | import org.apache.logging.log4j.LogManager;
 12 | import org.apache.logging.log4j.Logger;
 13 | 
 14 | /**
 15 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 16 |  *
 17 |  */
 18 | public class PostgreDB {
 19 | 
 20 | 	private final static Logger logger = LogManager.getLogger(PostgreDB.class);
 21 | 	private Connection con = null;
 22 | 	private Statement st = null;
 23 | 
 24 | 	public PostgreDB() {
 25 | 		super();
 26 | 	}
 27 | 
 28 | 	public void connect() {
 29 | 
 30 | 		String host = Bundle.getString("pgsql_url");
 31 | 		// String db = Bundle.getString("pgsql_database");
 32 | 		String url = "jdbc:postgresql://" + host + "/probkb";
 33 | 		String user = Bundle.getString("pgsql_username");
 34 | 		String password = Bundle.getString("pgsql_password");
 35 | 
 36 | 		try {
 37 | 			con = DriverManager.getConnection(url, user, password);
 38 | 			st = con.createStatement();
 39 | 
 40 | 		} catch (SQLException ex) {
 41 | 			logger.fatal(ex.getMessage() + "\n\n"
 42 | 					+ "Maybe PostgreSQL was not started?" + "\n"
 43 | 					+ "Open a console and run:" + "\n" + "\tsh pgsql-start.sh"
 44 | 					+ "\n");
 45 | 			throw new PostgreNotStartedException();
 46 | 		}
 47 | 
 48 | 	}
 49 | 
 50 | 	/**
 51 | 	 * A factor graph is composed by factors connected with one, two, or three
 52 | 	 * clauses (i.e., relationships).
 53 | 	 * 
 54 | 	 * @param n
 55 | 	 *            size of the restriction, i.e. number of clauses (1, 2, 3).
 56 | 	 * @return
 57 | 	 */
 58 | 	public ResultSet factors(int n) {
 59 | 
 60 | 		try {
 61 | 
 62 | 			switch (n) {
 63 | 			case 1:
 64 | 				// one...
 65 | 				return st
 66 | 						.executeQuery("select rs1.rel as r1, rs1.ent1 as a1, rs1.ent2 as b1, "
 67 | 								+ "f.weight from probkb.relationships as rs1, probkb.factors as f "
 68 | 								+ "where f.id1 = rs1.id and f.id2 is null and f.id3 is null;");
 69 | 			case 2:
 70 | 				// two...
 71 | 				return st
 72 | 						.executeQuery("select rs1.rel as r1, rs1.ent1 as a1, rs1.ent2 as b1, "
 73 | 								+ "rs2.rel as r2, rs2.ent1 as a2, rs2.ent2 as b2, "
 74 | 								+ "f.weight from probkb.relationships as rs1, "
 75 | 								+ "probkb.relationships as rs2, probkb.factors as f "
 76 | 								+ "where f.id1 = rs1.id and f.id2 = rs2.id and f.id3 is null;");
 77 | 			case 3:
 78 | 				// three...
 79 | 				return st
 80 | 						.executeQuery("select rs1.rel as r1, rs1.ent1 as a1, rs1.ent2 as b1, "
 81 | 								+ "rs2.rel as r2, rs2.ent1 as a2, rs2.ent2 as b2, "
 82 | 								+ "rs3.rel as r3, rs3.ent1 as a3, rs3.ent2 as b3, "
 83 | 								+ "f.weight from probkb.relationships as rs1, "
 84 | 								+ "probkb.relationships as rs2, probkb.relationships as rs3, "
 85 | 								+ "probkb.factors as f "
 86 | 								+ "where f.id1 = rs1.id and f.id2 = rs2.id and f.id3 = rs3.id;");
 87 | 			}
 88 | 		} catch (SQLException ex) {
 89 | 			logger.warn(ex.getMessage(), ex);
 90 | 		}
 91 | 
 92 | 		return null;
 93 | 
 94 | 	}
 95 | 
 96 | 	public void close() {
 97 | 		try {
 98 | 
 99 | 			if (st != null) {
100 | 				st.close();
101 | 			}
102 | 			if (con != null) {
103 | 				con.close();
104 | 			}
105 | 
106 | 		} catch (SQLException ex) {
107 | 			logger.warn(ex.getMessage(), ex);
108 | 		}
109 | 	}
110 | 
111 | 	public ResultSet evidence(int aimNumber) {
112 | 		
113 | 		ResultSet rs = null;
114 | 		try {
115 | 			 rs = st.executeQuery("select rel, ent1, ent2 from probkb.relationships where rel = "
116 | 					 + aimNumber + ";");
117 | 
118 | 		} catch (SQLException ex) {
119 | 			logger.warn(ex.getMessage(), ex);
120 | 		}
121 | 		return rs;
122 | 		
123 | 	}
124 | 	
125 | 	public ResultSet evidence() {
126 | 
127 | 		ResultSet rs = null;
128 | 		try {
129 | 			rs = st.executeQuery("select rel, ent1, ent2 from probkb.extractions;");
130 | 
131 | 		} catch (SQLException ex) {
132 | 			logger.warn(ex.getMessage(), ex);
133 | 		}
134 | 		return rs;
135 | 	}
136 | 
137 | }
138 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/inference/ProbKBToRockitGibbsSampling.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin.inference;
 2 | 
 3 | import java.sql.SQLException;
 4 | import java.util.ArrayList;
 5 | import java.util.Collection;
 6 | 
 7 | import org.aksw.mandolin.controller.NameMapper;
 8 | import org.aksw.mandolin.model.PredictionLiteral;
 9 | import org.aksw.mandolin.model.PredictionSet;
10 | import org.apache.logging.log4j.LogManager;
11 | import org.apache.logging.log4j.Logger;
12 | 
13 | import com.googlecode.rockit.app.solver.pojo.Clause;
14 | import com.googlecode.rockit.app.solver.pojo.Literal;
15 | import com.googlecode.rockit.exception.ParseException;
16 | import com.googlecode.rockit.exception.SolveException;
17 | import com.hp.hpl.jena.vocabulary.OWL;
18 | 
19 | /**
20 |  * Manager for the Gibbs-Sampling inference. Ground rules can be extracted from
21 |  * the Postgre database after being generated by ProbKB (faster) or generated
22 |  * through standard grounding by RockIt (slower).
23 |  * 
24 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
25 |  *
26 |  */
27 | public class ProbKBToRockitGibbsSampling extends RockitGibbsSampling {
28 | 
29 | 	private final static Logger logger = LogManager.getLogger(ProbKBToRockitGibbsSampling.class);
30 | 	
31 | 	public static void main(String[] args) {
32 | 
33 | 		PredictionSet ps = new ProbKBToRockitGibbsSampling(
34 | 				new NameMapper(OWL.sameAs.getURI())).infer(null);
35 | 		for (PredictionLiteral lit : ps)
36 | 			logger.info(lit);
37 | 
38 | 	}
39 | 
40 | 	public ProbKBToRockitGibbsSampling(NameMapper map) {
41 | 		super(map);			
42 | 	}
43 | 
44 | 	/**
45 | 	 * Call ProbKB for grounding and preprocess its input for Gibbs sampling by
46 | 	 * RockIt.
47 | 	 */
48 | 	public PredictionSet infer(Integer sampling) {
49 | 		
50 | 		Factors factors = Factors.getInstance();
51 | 		factors.preprocess(map.getAimName());
52 | 
53 | 		// +++ STARTING POINTS +++
54 | 		// Prop2|alb|nob
55 | 		ArrayList<String> consistentStartingPoints = factors
56 | 				.getConsistentStartingPoints();
57 | 
58 | 		// +++ CLAUSES +++
59 | 		// Clause [weight=0.0, restriction=[[Prop2|b|e]], hard=true]
60 | 		ArrayList<Clause> clauses = factors.getClauses();
61 | 
62 | 		// +++ EVIDENCE +++
63 | 		// [Prop2|2db|h0e]
64 | 		Collection<Literal> evidence = factors.getEvidence();
65 | 
66 | 		logger.debug("Evidence: "+evidence);
67 | 
68 | 		// call Gibbs sampler
69 | 		PredictionSet ps = null;
70 | 		try {
71 | 			ps = gibbsSampling(consistentStartingPoints, clauses, evidence, sampling);
72 | 		} catch (SQLException | SolveException | ParseException e) {
73 | 			e.printStackTrace();
74 | 		}
75 | 
76 | 		return ps;
77 | 	}
78 | 
79 | }
80 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/inference/RockitGibbsSampling.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.inference;
  2 | 
  3 | import java.sql.SQLException;
  4 | import java.util.ArrayList;
  5 | import java.util.Collection;
  6 | 
  7 | import org.aksw.mandolin.controller.NameMapper;
  8 | import org.aksw.mandolin.model.PredictionLiteral;
  9 | import org.aksw.mandolin.model.PredictionSet;
 10 | import org.apache.logging.log4j.LogManager;
 11 | import org.apache.logging.log4j.Logger;
 12 | 
 13 | import com.googlecode.rockit.app.Parameters;
 14 | import com.googlecode.rockit.app.sampler.gibbs.GIBBSLiteral;
 15 | import com.googlecode.rockit.app.sampler.gibbs.GIBBSSampler;
 16 | import com.googlecode.rockit.app.solver.pojo.Clause;
 17 | import com.googlecode.rockit.app.solver.pojo.Literal;
 18 | import com.googlecode.rockit.exception.ParseException;
 19 | import com.googlecode.rockit.exception.ReadOrWriteToFileException;
 20 | import com.googlecode.rockit.exception.SolveException;
 21 | import com.googlecode.rockit.parser.SyntaxReader;
 22 | 
 23 | /**
 24 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 25 |  *
 26 |  */
 27 | public abstract class RockitGibbsSampling {
 28 | 
 29 | 	private final static Logger logger = LogManager.getLogger(RockitGibbsSampling.class);
 30 | 	
 31 | 	protected static SyntaxReader reader;
 32 | 	
 33 | 	protected NameMapper map;
 34 | 
 35 | 	// Sampling only
 36 | 	/**
 37 | 	 * The maximum number of iterations for sampling.
 38 | 	 */
 39 | 	public static final int MAX_ITERATIONS = 10000000;
 40 | 	
 41 | 	protected GIBBSSampler gibbsSampler;
 42 | 
 43 | 	protected RockitGibbsSampling(NameMapper map) {
 44 | 		super();
 45 | 		
 46 | 		this.map = map;
 47 | 		
 48 | 		try {
 49 | 			Parameters.readPropertyFile();
 50 | 		} catch (ReadOrWriteToFileException e) {
 51 | 			logger.error(e.getMessage());
 52 | 		}
 53 | 		Parameters.USE_CUTTING_PLANE_AGGREGATION = false;
 54 | 		Parameters.USE_CUTTING_PLANE_INFERENCE = false;
 55 | 		reader = new SyntaxReader();
 56 | 	
 57 | 	}
 58 | 
 59 | 	public abstract PredictionSet infer(Integer samples);
 60 | 
 61 | 	/**
 62 | 	 * Gibbs Sampling by RockIt.
 63 | 	 * 
 64 | 	 * @param consistentStartingPoints
 65 | 	 * @param clauses
 66 | 	 * @param evidence
 67 | 	 * @throws SQLException
 68 | 	 * @throws SolveException
 69 | 	 * @throws ParseException
 70 | 	 */
 71 | 	public PredictionSet gibbsSampling(
 72 | 			ArrayList<String> consistentStartingPoints,
 73 | 			ArrayList<Clause> clauses, Collection<Literal> evidence, Integer sampling)
 74 | 			throws SQLException, SolveException, ParseException {
 75 | 
 76 | 		PredictionSet ps = new PredictionSet(map.getAim());
 77 | 
 78 | 		gibbsSampler = new GIBBSSampler();
 79 | 		int iter = iterations(clauses.size() + evidence.size(), sampling);
 80 | 		ArrayList<GIBBSLiteral> gibbsOutput = gibbsSampler.sample(iter,
 81 | 				clauses, evidence, consistentStartingPoints);
 82 | 
 83 | 		for (GIBBSLiteral l : gibbsOutput)
 84 | 			ps.add(new PredictionLiteral(l, iter));
 85 | 
 86 | 		return ps;
 87 | 	}
 88 | 
 89 | 	/**
 90 | 	 * Get number of iterations.
 91 | 	 * @param sampling 
 92 | 	 * 
 93 | 	 * @param i
 94 | 	 * @return
 95 | 	 */
 96 | 	private int iterations(int literals, Integer sampling) {
 97 | 		
 98 | 		
 99 | 		int iterations;
100 | 		
101 | 		long iter = (long) literals * 1000;
102 | 		
103 | 		if(sampling != null) // pre-assigned 
104 | 			iterations = sampling;
105 | 		else if(iter >= Integer.MAX_VALUE) // overflow
106 | 			iterations = MAX_ITERATIONS;
107 | 		else if(iter >= MAX_ITERATIONS) // not overflow, but still too high
108 | 			iterations = MAX_ITERATIONS;
109 | 		else
110 | 			iterations = (int) iter; // acceptable value
111 | 		
112 | 		logger.info("literals={}, supposed_iter={}, actual_iter={}", literals, iter, iterations);
113 | 		return iterations;
114 | 		
115 | 	}
116 | 
117 | }
118 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/inference/RockitGroundingAndGibbsSampling.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.inference;
  2 | 
  3 | import java.io.IOException;
  4 | import java.sql.SQLException;
  5 | import java.util.ArrayList;
  6 | import java.util.Collection;
  7 | 
  8 | import org.aksw.mandolin.controller.NameMapper;
  9 | import org.aksw.mandolin.model.PredictionSet;
 10 | import org.apache.logging.log4j.LogManager;
 11 | import org.apache.logging.log4j.Logger;
 12 | 
 13 | import com.googlecode.rockit.app.solver.StandardSolver;
 14 | import com.googlecode.rockit.app.solver.pojo.Clause;
 15 | import com.googlecode.rockit.app.solver.pojo.Literal;
 16 | import com.googlecode.rockit.exception.ParseException;
 17 | import com.googlecode.rockit.exception.ReadOrWriteToFileException;
 18 | import com.googlecode.rockit.exception.SolveException;
 19 | import com.googlecode.rockit.javaAPI.Model;
 20 | import com.hp.hpl.jena.vocabulary.OWL;
 21 | 
 22 | /**
 23 |  * Manager for the Gibbs-Sampling inference. Ground rules can be extracted from
 24 |  * the Postgre database after being generated by ProbKB (faster) or generated
 25 |  * through standard grounding by RockIt (slower).
 26 |  * 
 27 |  * TODO missing link: AMIE rules to MLN file (now using dumb rules)!
 28 |  * 
 29 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 30 |  *
 31 |  */
 32 | public class RockitGroundingAndGibbsSampling extends RockitGibbsSampling {
 33 | 
 34 | 	private final static Logger logger = LogManager.getLogger(RockitGroundingAndGibbsSampling.class);
 35 | 	
 36 | 	/**
 37 | 	 * MLN file.
 38 | 	 */
 39 | 	private String input;
 40 | 
 41 | 	/**
 42 | 	 * DB file.
 43 | 	 */
 44 | 	private String groundings;
 45 | 
 46 | 	private Model model;
 47 | 
 48 | 	public static void main(String[] args) throws ReadOrWriteToFileException,
 49 | 			ParseException, IOException {
 50 | 
 51 | 		// launch test
 52 | 		new RockitGroundingAndGibbsSampling(new NameMapper(
 53 | 				OWL.sameAs.getURI()), "eval/11_publi-mln/prog.mln",
 54 | 				"eval/11_publi-mln/evidence.db").infer(null);
 55 | 
 56 | 	}
 57 | 
 58 | 	public RockitGroundingAndGibbsSampling(NameMapper map, String input,
 59 | 			String groundings) throws ReadOrWriteToFileException,
 60 | 			ParseException, IOException {
 61 | 		super(map);
 62 | 
 63 | 		this.input = input;
 64 | 		this.groundings = groundings;
 65 | 
 66 | 	}
 67 | 
 68 | 	/**
 69 | 	 * Call RockIt for both standard grounding and Gibbs-sampling inference.
 70 | 	 */
 71 | 	public PredictionSet infer(Integer samples) {
 72 | 
 73 | 		PredictionSet ps = null;
 74 | 
 75 | 		try {
 76 | 			model = reader.getModel(input, groundings);
 77 | 
 78 | 			// standard grounding...
 79 | 			logger.info("Input: " + this.input);
 80 | 			StandardSolver solver = new StandardSolver(model);
 81 | 			// ground MLN and retrieve Clauses
 82 | 			ArrayList<String> consistentStartingPoints = solver.solve();
 83 | 			logger.info("+++ STARTING POINTS +++");
 84 | 			for (String s : consistentStartingPoints)
 85 | 				logger.info(s);
 86 | 			ArrayList<Clause> clauses = solver.getAllClauses();
 87 | 			logger.info("+++ CLAUSES +++");
 88 | 			for (Clause c : clauses)
 89 | 				logger.info(c);
 90 | 			Collection<Literal> evidence = solver.getEvidenceAxioms();
 91 | 			logger.info("+++ EVIDENCE +++");
 92 | 			for (Literal l : evidence)
 93 | 				logger.info(l);
 94 | 			solver = null; // free memory
 95 | 
 96 | 			// call Gibbs sampler
 97 | 			ps = gibbsSampling(consistentStartingPoints, clauses, evidence, samples);
 98 | 
 99 | 		} catch (ParseException | IOException | SQLException | SolveException e) {
100 | 			e.printStackTrace();
101 | 		}
102 | 
103 | 		return ps;
104 | 	}
105 | 
106 | }
107 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/model/Cache.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin.model;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.List;
 5 | 
 6 | import jp.ndca.similarity.join.StringItem;
 7 | 
 8 | /**
 9 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
10 |  *
11 |  */
12 | public class Cache {
13 | 
14 | 	public int count = 0;
15 | 	public List<StringItem> stringItems = new ArrayList<StringItem>();
16 | 
17 | }


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/model/ComparableLiteral.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin.model;
 2 | 
 3 | /**
 4 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 5 |  *
 6 |  */
 7 | public class ComparableLiteral implements Comparable<ComparableLiteral> {
 8 | 
 9 | 	private String uri;
10 | 	private String val;
11 | 
12 | 	public ComparableLiteral(String uri, String val) {
13 | 		this.uri = uri;
14 | 		this.val = val;
15 | 	}
16 | 
17 | 	public String getUri() {
18 | 		return uri;
19 | 	}
20 | 
21 | 	public String getVal() {
22 | 		return val;
23 | 	}
24 | 
25 | 	@Override
26 | 	public int compareTo(ComparableLiteral o) {
27 | 		return this.getUri().compareTo(o.getUri());
28 | 	}
29 | 
30 | }


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/model/PredictionLiteral.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin.model;
 2 | 
 3 | import java.io.Serializable;
 4 | 
 5 | import com.googlecode.rockit.app.sampler.gibbs.GIBBSLiteral;
 6 | import com.googlecode.rockit.javaAPI.HerbrandUniverse;
 7 | 
 8 | /**
 9 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
10 |  *
11 |  */
12 | public class PredictionLiteral implements Comparable<PredictionLiteral>, Serializable {
13 | 	
14 | 	/**
15 | 	 * 
16 | 	 */
17 | 	private static final long serialVersionUID = 4558540244149162506L;
18 | 	
19 | 	private static HerbrandUniverse u = HerbrandUniverse.getInstance();
20 | 	private String p, x, y;
21 | 	private String id;
22 | 	private double prob;
23 | 	
24 | 	public PredictionLiteral(String input, double prob) {
25 | 		String[] name = input.split("\\|");
26 | 		p = name[0];
27 | 		x = u.getConstant(name[1]);
28 | 		y = u.getConstant(name[2]);
29 | 		id = p + "(" + x + ", " + y + ")";
30 | 		this.prob = prob;
31 | 	}
32 | 
33 | 	public PredictionLiteral(GIBBSLiteral l, int iter) {
34 | 		this(l.getName(), l.return_my_probability(iter));
35 | 	}
36 | 
37 | 	public String getP() {
38 | 		return p;
39 | 	}
40 | 
41 | 	public String getX() {
42 | 		return x;
43 | 	}
44 | 
45 | 	public String getY() {
46 | 		return y;
47 | 	}
48 | 	
49 | 	public double getProb() {
50 | 		return prob;
51 | 	}
52 | 
53 | 	public String toString() {
54 | 		return "P[ " + id + " = true ] = " + prob;
55 | 	}
56 | 
57 | 	@Override
58 | 	public int compareTo(PredictionLiteral o) {
59 | 		return this.id.compareTo(o.id);
60 | 	}
61 | 	
62 | }


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/model/PredictionSet.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.model;
  2 | 
  3 | import java.io.File;
  4 | import java.io.FileNotFoundException;
  5 | import java.io.FileOutputStream;
  6 | import java.io.IOException;
  7 | import java.io.ObjectOutputStream;
  8 | import java.io.Serializable;
  9 | import java.net.URI;
 10 | import java.net.URISyntaxException;
 11 | import java.util.TreeSet;
 12 | 
 13 | import org.aksw.mandolin.controller.NameMapper;
 14 | import org.aksw.mandolin.controller.NameMapper.Type;
 15 | import org.apache.jena.riot.Lang;
 16 | import org.apache.jena.riot.system.StreamRDF;
 17 | import org.apache.jena.riot.system.StreamRDFWriter;
 18 | import org.apache.logging.log4j.LogManager;
 19 | import org.apache.logging.log4j.Logger;
 20 | 
 21 | import com.hp.hpl.jena.graph.NodeFactory;
 22 | import com.hp.hpl.jena.graph.Triple;
 23 | import com.hp.hpl.jena.shared.JenaException;
 24 | 
 25 | /**
 26 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 27 |  *
 28 |  */
 29 | public class PredictionSet extends TreeSet<PredictionLiteral> implements
 30 | 		Serializable {
 31 | 
 32 | 	private final static Logger logger = LogManager.getLogger(PredictionSet.class);
 33 | 	
 34 | 	/**
 35 | 	 * 
 36 | 	 */
 37 | 	private static final long serialVersionUID = 864082651004354757L;
 38 | 
 39 | 	/**
 40 | 	 * Internal name only.
 41 | 	 */
 42 | 	private String aim;
 43 | 
 44 | 	public PredictionSet(String aim) {
 45 | 		this.aim = aim;
 46 | 		logger.info("Created prediction set with aim: " + aim);
 47 | 	}
 48 | 
 49 | 	public String getAim() {
 50 | 		return aim;
 51 | 	}
 52 | 
 53 | 	public void saveTo(String path) {
 54 | 		ObjectOutputStream oos;
 55 | 		try {
 56 | 			oos = new ObjectOutputStream(new FileOutputStream(path));
 57 | 			oos.writeObject(this);
 58 | 			oos.close();
 59 | 			logger.info("Predictions saved to " + path);
 60 | 		} catch (IOException e) {
 61 | 			logger.warn("Cannot save " + this.toString() + ": "
 62 | 					+ e.getMessage());
 63 | 		}
 64 | 	}
 65 | 
 66 | 	public void saveLinkset(NameMapper map, double theta, String path) {
 67 | 
 68 | 		FileOutputStream output;
 69 | 		try {
 70 | 			output = new FileOutputStream(new File(path));
 71 | 		} catch (FileNotFoundException e) {
 72 | 			e.printStackTrace();
 73 | 			return;
 74 | 		}
 75 | 
 76 | 		StreamRDF writer = StreamRDFWriter.getWriterStream(output, Lang.NT);
 77 | 		writer.start();
 78 | 		
 79 | 		double max = Double.MIN_VALUE;
 80 | 		double min = Double.MAX_VALUE;
 81 | 		for (PredictionLiteral lit : this) {
 82 | 			if(lit.getProb() > max)
 83 | 				max = lit.getProb();
 84 | 			if(lit.getProb() < min)
 85 | 				min = lit.getProb();
 86 | 		}
 87 | 		double delta = max - min;
 88 | 		logger.debug("Normalization extrema: max = "+max+", min = "+min+", delta = "+delta);
 89 | 
 90 | 		logger.info("Inferred triples size: "+this.size());
 91 | 		for (PredictionLiteral lit : this) {
 92 | 			
 93 | 			// filter only aim relation from pset
 94 | 			String p = map.getURI(lit.getP());
 95 | 			if (!p.equals(aim) && !aim.equals("*"))
 96 | 				continue;
 97 | 			
 98 | 			// relative value for probability
 99 | 			double relprob = (lit.getProb() - min) / delta;
100 | 				
101 | 			if (relprob >= theta) {
102 | 				logger.debug(lit + " (" + relprob + ")");
103 | 				String s = map.getURI(lit.getX());
104 | 				if(s == null) {
105 | 					int a = NameMapper.parse(lit.getX());
106 | 					String str = String.valueOf(-a);
107 | 					s = map.getURI(Type.CLASS.name() + str);
108 | 				}
109 | 				
110 | 				// filter out illegal triples...
111 | 				try {
112 | 					new URI(s);
113 | 				} catch (URISyntaxException e) {
114 | 					logger.debug("A predicted triple has a subject "
115 | 							+ "(" + s + ") which is not a URI. Skipping triple...");
116 | 					continue;
117 | 				} catch (NullPointerException e) {
118 | 					logger.debug("Error on lit.X="+lit.getX()+ " lit.Y="+lit.getY());
119 | 					continue;
120 | 				}
121 | 				
122 | 				String o = map.getURI(lit.getY());
123 | 				if(o == null) {
124 | 					int b = NameMapper.parse(lit.getY());
125 | 					String str = String.valueOf(-b);
126 | 					o = map.getURI(Type.CLASS.name() + str);
127 | 				}
128 | 				Triple t;
129 | 				try {
130 | 					t = new Triple(NodeFactory.createURI(s),
131 | 							NodeFactory.createURI(p), NodeFactory.createURI(o));
132 | 				} catch (JenaException e) {
133 | 					logger.debug("Some of the following is not a URI: s="+s+", p="+p+", o="+o);
134 | 					continue;
135 | 				}
136 | 				
137 | 				logger.debug(lit.getProb() + "\t" + t);
138 | 				
139 | 				writer.triple(t);
140 | 			}
141 | 		}
142 | 		
143 | 		writer.finish();
144 | 
145 | 	}
146 | 
147 | }
148 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/reasoner/PelletReasoner.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.reasoner;
  2 | 
  3 | import java.io.File;
  4 | import java.io.FileNotFoundException;
  5 | import java.io.FileOutputStream;
  6 | import java.util.Iterator;
  7 | 
  8 | import org.aksw.mandolin.util.Timer;
  9 | import org.apache.jena.riot.Lang;
 10 | import org.apache.jena.riot.RDFDataMgr;
 11 | import org.apache.jena.riot.system.StreamRDF;
 12 | import org.apache.logging.log4j.LogManager;
 13 | import org.apache.logging.log4j.Logger;
 14 | import org.mindswap.pellet.jena.PelletReasonerFactory;
 15 | 
 16 | import com.hp.hpl.jena.graph.Node;
 17 | import com.hp.hpl.jena.graph.NodeFactory;
 18 | import com.hp.hpl.jena.graph.Triple;
 19 | import com.hp.hpl.jena.ontology.OntModel;
 20 | import com.hp.hpl.jena.rdf.model.InfModel;
 21 | import com.hp.hpl.jena.rdf.model.ModelFactory;
 22 | import com.hp.hpl.jena.rdf.model.Property;
 23 | import com.hp.hpl.jena.rdf.model.RDFNode;
 24 | import com.hp.hpl.jena.rdf.model.Resource;
 25 | import com.hp.hpl.jena.reasoner.Reasoner;
 26 | import com.hp.hpl.jena.reasoner.ValidityReport;
 27 | import com.hp.hpl.jena.shared.Lock;
 28 | import com.hp.hpl.jena.sparql.core.Quad;
 29 | import com.hp.hpl.jena.vocabulary.XSD;
 30 | 
 31 | /**
 32 |  * Pellet-Jena reasoner. The inferred closure model is saved in file; it will
 33 |  * not be available as an in-memory object.
 34 |  * 
 35 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 36 |  *
 37 |  */
 38 | public class PelletReasoner {
 39 | 
 40 | 	private final static Logger logger = LogManager.getLogger(PelletReasoner.class);
 41 | 	
 42 | 	public static void main(String[] args) {
 43 | 		testThis();
 44 | //		 run("eval/0001");
 45 | 	}
 46 | 
 47 | 	/**
 48 | 	 * Add OWL rules and compute the forward chain.
 49 | 	 * 
 50 | 	 * @param base
 51 | 	 * @param datasetPaths
 52 | 	 */
 53 | 	public static void run(String base) {
 54 | 
 55 | 		Reasoner reasoner = PelletReasonerFactory.theInstance().create();
 56 | 		OntModel ontModel = ModelFactory
 57 | 				.createOntologyModel(PelletReasonerFactory.THE_SPEC);
 58 | 		InfModel infModel = ModelFactory.createInfModel(reasoner, ontModel);
 59 | 
 60 | 		String path = System.getProperty("user.dir");
 61 | 		RDFDataMgr.read(infModel, "file://" + path + "/" + base + "/model.nt");
 62 | 
 63 | 		logger.info("Model size = " + ontModel.size());
 64 | 
 65 | 		ValidityReport report = infModel.validate();
 66 | 		printIterator(report.getReports(), "Validation Results");
 67 | 
 68 | 		logger.info("Inferred model size = " + infModel.size());
 69 | 
 70 | 		infModel.enterCriticalSection(Lock.READ);
 71 | 
 72 | 		try {
 73 | 			RDFDataMgr.write(new FileOutputStream(new File(base
 74 | 					+ "/model-fwc.nt")), infModel, Lang.NT);
 75 | 			logger.info("Model generated.");
 76 | 		} catch (FileNotFoundException e) {
 77 | 			logger.fatal(e.getMessage());
 78 | 			throw new RuntimeException("Necessary file model-fwc.nt was not generated.");
 79 | 		} finally {
 80 | 			infModel.leaveCriticalSection();
 81 | 		}
 82 | 
 83 | 		new File(base + "/model.nt").delete();
 84 | 
 85 | 	}
 86 | 	
 87 | 	public static void closure(String input, String output) {
 88 | 		
 89 | 		Reasoner reasoner = PelletReasonerFactory.theInstance().create();
 90 | 		OntModel ontModel = ModelFactory
 91 | 				.createOntologyModel(PelletReasonerFactory.THE_SPEC);
 92 | 		InfModel infModel = ModelFactory.createInfModel(reasoner, ontModel);
 93 | 
 94 | 		String path = System.getProperty("user.dir");
 95 | 		RDFDataMgr.read(infModel, "file://" + path + "/" + input);
 96 | 
 97 | 		logger.info("Model = "+input+", size = " + ontModel.size());
 98 | 
 99 | 		ValidityReport report = infModel.validate();
100 | 		printIterator(report.getReports(), "Validation Results");
101 | 
102 | 		logger.info("Inferred model size = " + infModel.size());
103 | 
104 | 		infModel.enterCriticalSection(Lock.READ);
105 | 
106 | 		try {
107 | 			RDFDataMgr.write(new FileOutputStream(new File(output)), 
108 | 					infModel, Lang.NT);
109 | 			logger.info("Model generated at "+output);
110 | 		} catch (FileNotFoundException e) {
111 | 			logger.fatal(e.getMessage());
112 | 			throw new RuntimeException("Necessary file "+output+" was not generated.");
113 | 		} finally {
114 | 			infModel.leaveCriticalSection();
115 | 		}
116 | 		
117 | 	}
118 | 
119 | 	private static void testThis() {
120 | 		
121 | 		Timer t = new Timer();
122 | 
123 | 		Reasoner reasoner = PelletReasonerFactory.theInstance().create();
124 | 		OntModel ontModel = ModelFactory
125 | 				.createOntologyModel(PelletReasonerFactory.THE_SPEC);
126 | 		InfModel infModel = ModelFactory.createInfModel(reasoner, ontModel);
127 | 		
128 | 		t.lap();
129 | 
130 | 		String path = System.getProperty("user.dir");
131 | 
132 | 		String[] paths = { "file://" + path + "/datasets/DBLPL3S-100.nt",
133 | 				"file://" + path + "/datasets/LinkedACM-100.nt",
134 | 				"file://" + path + "/linksets/DBLPL3S-LinkedACM-100.nt" };
135 | 
136 | 		StreamRDF dataStream = new StreamRDF() {
137 | 
138 | 			@Override
139 | 			public void start() {
140 | 			}
141 | 
142 | 			@Override
143 | 			public void quad(Quad quad) {
144 | 			}
145 | 
146 | 			@Override
147 | 			public void base(String base) {
148 | 			}
149 | 
150 | 			@Override
151 | 			public void prefix(String prefix, String iri) {
152 | 			}
153 | 
154 | 			@Override
155 | 			public void finish() {
156 | 			}
157 | 
158 | 			@Override
159 | 			public void triple(Triple triple) {
160 | 				Node node = triple.getObject();
161 | 				if (node.isLiteral()) {
162 | 					if (!node.getLiteral().isWellFormed()) {
163 | 						// known issue: fix gYear literals
164 | 						if (node.getLiteralDatatypeURI() != null) {
165 | 							if (node.getLiteralDatatypeURI().equals(
166 | 									XSD.gYear.getURI())
167 | 									|| node.getLiteralDatatypeURI().equals(
168 | 											XSD.gYear.getLocalName())) {
169 | 								Node newNode = NodeFactory.createLiteral(node
170 | 										.getLiteral().toString()
171 | 										.substring(0, 4)
172 | 										+ "^^" + XSD.gYear);
173 | 								triple = new Triple(triple.getSubject(),
174 | 										triple.getPredicate(), newNode);
175 | //								logger.warn("Bad-formed literal: "
176 | //										+ node + " - Using: " + newNode);
177 | 							}
178 | 						}
179 | 					}
180 | 				}
181 | 
182 | 				Resource s = infModel.createResource(triple.getSubject()
183 | 						.getURI());
184 | 				Property p = infModel.createProperty(triple.getPredicate()
185 | 						.getURI());
186 | 				RDFNode o = infModel.asRDFNode(triple.getObject());
187 | 
188 | 				infModel.add(s, p, o);
189 | 			}
190 | 
191 | 		};
192 | 
193 | 		for (String p : paths)
194 | 			RDFDataMgr.parse(dataStream, p);
195 | 
196 | 		t.lap();
197 | 
198 | 		logger.info("Model size = " + ontModel.size());
199 | 
200 | 		ValidityReport report = infModel.validate();
201 | 		printIterator(report.getReports(), "Validation Results");
202 | 
203 | 		logger.info("Inferred model size = " + infModel.size());
204 | 
205 | 		infModel.enterCriticalSection(Lock.READ);
206 | 
207 | 		String f = "tmp/test-this.nt";
208 | 		try {
209 | 			RDFDataMgr.write(new FileOutputStream(new File(f)),
210 | 					infModel, Lang.NT);
211 | 			logger.info("Model generated.");
212 | 		} catch (FileNotFoundException e) {
213 | 			logger.fatal(e.getMessage());
214 | 			throw new RuntimeException("Necessary file "+f+" was not generated.");
215 | 		} finally {
216 | 			infModel.leaveCriticalSection();
217 | 		}
218 | 
219 | 		t.lap();
220 | 
221 | 		logger.info("Reasoner init (ms): " + t.getLapMillis(0));
222 | 		logger.info("Model load (ms): " + t.getLapMillis(1));
223 | 		logger.info("Model load (ms/triple): " + t.getLapMillis(1)
224 | 				/ infModel.size());
225 | 		logger.info("Validation (ms): " + t.getLapMillis(2));
226 | 		logger.info("Save inferred model (ms): " + t.getLapMillis(3));
227 | 		printIterator(report.getReports(), "Validation Results");
228 | 
229 | 	}
230 | 
231 | 	private static void printIterator(Iterator<?> i, String header) {
232 | 		logger.info(header);
233 | 
234 | 		if (i.hasNext()) {
235 | 			while (i.hasNext())
236 | 				logger.info(i.next());
237 | 		} else
238 | 			logger.info("<Nothing to say.>");
239 | 
240 | 		logger.info("");
241 | 	}
242 | 
243 | }
244 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/rulemining/AmieHandler.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin.rulemining;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import org.apache.logging.log4j.LogManager;
 6 | import org.apache.logging.log4j.Logger;
 7 | 
 8 | import javatools.parsers.NumberFormatter;
 9 | import amie.mining.AMIE;
10 | import amie.rules.Rule;
11 | 
12 | /**
13 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
14 |  *
15 |  */
16 | public class AmieHandler {
17 | 	
18 | 	private final static Logger logger = LogManager.getLogger(AmieHandler.class);
19 | 	
20 | 	public static enum MiningStrategy {
21 | 		HEAD_COVERAGE, SUPPORT;
22 | 	}
23 | 
24 | 	private String ontology;
25 | 	private List<Rule> rules = null;
26 | 	private Double miningThr = 0.01;
27 | 
28 | 	public AmieHandler(String ontology) {
29 | 		super();
30 | 		this.ontology = ontology;
31 | 	}
32 | 
33 | 	public void run(MiningStrategy ms) throws Exception {
34 | 
35 | 		AMIE miner;
36 | 		switch(ms) {
37 | 		case HEAD_COVERAGE:
38 | 			miner =	AMIE.getInstance(new String[] { ontology, "-minhc", String.valueOf(miningThr) });
39 | 			break;
40 | 		case SUPPORT:
41 | 			miner =	AMIE.getInstance(new String[] { ontology, "-pm", "support", "-mins", "0" });
42 | 			break;
43 | 		default:
44 | 			throw new RuntimeException("MiningStrategy does not exist: " + ms.name());
45 | 		}
46 | 		
47 | 		logger.info("Starting the mining phase");
48 | 
49 | 		long time = System.currentTimeMillis();
50 | 
51 | 		rules = miner.mine();
52 | 
53 | 		if (!miner.isRealTime()) {
54 | 			Rule.printRuleHeaders();
55 | 			for (Rule rule : rules) {
56 | 				logger.info(rule.getFullRuleString());
57 | 			}
58 | 		}
59 | 
60 | 		long miningTime = System.currentTimeMillis() - time;
61 | 		logger.info("Mining done in "
62 | 				+ NumberFormatter.formatMS(miningTime));
63 | 		logger.info(rules.size() + " rules mined.");
64 | 
65 | 	}
66 | 
67 | 	public List<Rule> getRules() {
68 | 		return rules;
69 | 	}
70 | 
71 | 	public void setMiningThr(Double mining) {
72 | 		this.miningThr = mining;
73 | 	}
74 | 
75 | }
76 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/rulemining/RDFToTSV.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin.rulemining;
 2 | 
 3 | import java.io.File;
 4 | import java.io.FileNotFoundException;
 5 | import java.io.PrintWriter;
 6 | 
 7 | import org.aksw.mandolin.util.URIHandler;
 8 | import org.apache.jena.riot.RDFDataMgr;
 9 | import org.apache.jena.riot.system.StreamRDF;
10 | 
11 | import com.hp.hpl.jena.graph.Triple;
12 | import com.hp.hpl.jena.sparql.core.Quad;
13 | 
14 | /**
15 |  * Generate input for AMIE.
16 |  * 
17 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
18 |  *
19 |  */
20 | public class RDFToTSV {
21 | 
22 | 	public static void main(String[] args) throws Exception {
23 | 		
24 | 		run("eval/0001");
25 | 		
26 | 	}
27 | 	
28 | 	public static void run(String base)
29 | 			throws FileNotFoundException {
30 | 
31 | 		PrintWriter pw = new PrintWriter(new File(base + "/model.tsv"));
32 | 
33 | 		StreamRDF stream = new StreamRDF() {
34 | 
35 | 			@Override
36 | 			public void triple(Triple triple) {
37 | 				pw.write(URIHandler.parse(triple.getSubject()) + "\t"
38 | 						+ triple.getPredicate().getURI() + "\t"
39 | 						+ triple.getObject().toString() + "\n");
40 | 			}
41 | 
42 | 			@Override
43 | 			public void start() {
44 | 			}
45 | 
46 | 			@Override
47 | 			public void quad(Quad quad) {
48 | 			}
49 | 
50 | 			@Override
51 | 			public void prefix(String prefix, String iri) {
52 | 			}
53 | 
54 | 			@Override
55 | 			public void finish() {
56 | 			}
57 | 
58 | 			@Override
59 | 			public void base(String base) {
60 | 			}
61 | 
62 | 		};
63 | 
64 | 		RDFDataMgr.parse(stream, base + "/model-fwc.nt");
65 | 
66 | 		pw.close();
67 | 
68 | 	}
69 | 
70 | }
71 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/rulemining/RuleDriver.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.rulemining;
  2 | 
  3 | import java.io.File;
  4 | import java.io.FileWriter;
  5 | import java.io.IOException;
  6 | import java.util.ArrayList;
  7 | import java.util.HashMap;
  8 | 
  9 | import javatools.datatypes.ByteString;
 10 | 
 11 | import org.aksw.mandolin.controller.NameMapper;
 12 | import org.aksw.mandolin.controller.ProbKBData;
 13 | import org.apache.logging.log4j.LogManager;
 14 | import org.apache.logging.log4j.Logger;
 15 | 
 16 | import amie.rules.Rule;
 17 | 
 18 | import com.opencsv.CSVWriter;
 19 | 
 20 | /**
 21 |  * Driver of rules from Amie to ProbKB.
 22 |  * 
 23 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 24 |  *
 25 |  */
 26 | public class RuleDriver {
 27 | 	
 28 | 	private final static Logger logger = LogManager.getLogger(RuleDriver.class);
 29 | 	
 30 | 	private NameMapper map;
 31 | 	private String base;
 32 | 
 33 | 	private static final String HEAD_LEFT = "?a";
 34 | 	private static final String HEAD_RIGHT = "?b";
 35 | 	
 36 | 	private HashMap<String, ArrayList<String[]>> csvContent = new HashMap<>();
 37 | 	
 38 | 	public RuleDriver(NameMapper map, String base) {
 39 | 		super();
 40 | 		this.map = map;
 41 | 		this.base = base;
 42 | 		for(int i=1; i<=6; i++)
 43 | 			csvContent.put(base + "/mln"+i+".csv", new ArrayList<>());
 44 | 	}
 45 | 
 46 | 	public void process(Rule rule) throws IOException {
 47 | 		
 48 | 		int size = rule.getBody().size();
 49 | 
 50 | 		if (size == 1) { // call one or two
 51 | 
 52 | 			ByteString[] b = rule.getBody().get(0);
 53 | 			// subject, predicate, object
 54 | 			String pHead = rule.getHeadRelation();
 55 | 			String pBody = b[1].toString(); // TODO check me!
 56 | 			if (b[0].toString().equals(HEAD_LEFT))
 57 | 				addTypeOne(pHead, pBody, toWeight(rule.getPcaConfidence()));
 58 | 			else
 59 | 				addTypeTwo(pHead, pBody, toWeight(rule.getPcaConfidence()));
 60 | 		} else { // call three to six
 61 | 
 62 | 			ByteString[] b1 = rule.getBody().get(0);
 63 | 			ByteString[] b2 = rule.getBody().get(1);
 64 | 
 65 | 			String pHead = rule.getHeadRelation();
 66 | 			String pBody1 = b1[1].toString();
 67 | 			String pBody2 = b2[1].toString();
 68 | 
 69 | 			if (b1[0].toString().equals(HEAD_LEFT) && b2[0].toString().equals(HEAD_RIGHT))
 70 | 				addTypeThree(pHead, pBody1, pBody2,
 71 | 						toWeight(rule.getPcaConfidence()));
 72 | 			if (b1[0].toString().equals(HEAD_RIGHT) && b2[0].toString().equals(HEAD_LEFT))
 73 | 				addTypeThree(pHead, pBody2, pBody1,
 74 | 						toWeight(rule.getPcaConfidence()));
 75 | 			
 76 | 			if (b1[0].toString().equals(HEAD_LEFT) && b2[2].toString().equals(HEAD_RIGHT))
 77 | 				addTypeFour(pHead, pBody1, pBody2,
 78 | 						toWeight(rule.getPcaConfidence()));
 79 | 			if (b1[2].toString().equals(HEAD_RIGHT) && b2[0].toString().equals(HEAD_LEFT))
 80 | 				addTypeFour(pHead, pBody2, pBody1,
 81 | 						toWeight(rule.getPcaConfidence()));
 82 | 			
 83 | 			if (b1[2].toString().equals(HEAD_LEFT) && b2[0].toString().equals(HEAD_RIGHT))
 84 | 				addTypeFive(pHead, pBody1, pBody2,
 85 | 						toWeight(rule.getPcaConfidence()));
 86 | 			if (b1[0].toString().equals(HEAD_RIGHT) && b2[2].toString().equals(HEAD_LEFT))
 87 | 				addTypeFive(pHead, pBody2, pBody1,
 88 | 						toWeight(rule.getPcaConfidence()));
 89 | 			
 90 | 			if (b1[2].toString().equals(HEAD_LEFT) && b2[2].toString().equals(HEAD_RIGHT))
 91 | 				addTypeSix(pHead, pBody1, pBody2,
 92 | 						toWeight(rule.getPcaConfidence()));
 93 | 			if (b1[2].toString().equals(HEAD_RIGHT) && b2[2].toString().equals(HEAD_LEFT))
 94 | 				addTypeSix(pHead, pBody2, pBody1,
 95 | 						toWeight(rule.getPcaConfidence()));
 96 | 			
 97 | 		}
 98 | 	}
 99 | 
100 | 	/**
101 | 	 * @param pcaConfidence
102 | 	 * @return
103 | 	 */
104 | 	private double toWeight(double pcaConfidence) {
105 | 		return pcaConfidence;
106 | 	}
107 | 
108 | 	/**
109 | 	 * p(x,y) <- q(x,y)
110 | 	 *
111 | 	 * @param pHead
112 | 	 * @param pBody
113 | 	 * @param weight
114 | 	 * @throws IOException 
115 | 	 */
116 | 	private void addTypeOne(String pHead, String pBody, double weight) {
117 | 		logger.trace("Adding type one: "+pHead+", "+pBody+", "+weight);
118 | 		String headName = map.getName(pHead).substring(ProbKBData.REL_LENGTH);
119 | 		String bodyName = map.getName(pBody).substring(ProbKBData.REL_LENGTH); 
120 | 		String str[] = {
121 | 				headName,
122 | 				bodyName,
123 | 				"1", // TODO class of x
124 | 				"1", // TODO class of y
125 | 				"" + weight
126 | 		};
127 | 		csvContent.get(base + "/mln1.csv").add(str);
128 | 	}
129 | 
130 | 	/**
131 | 	 * p(x,y) <- q(y,x)
132 | 	 *
133 | 	 * @param pHead
134 | 	 * @param pBody
135 | 	 * @param weight
136 | 	 */
137 | 	private void addTypeTwo(String pHead, String pBody, double weight) {
138 | 		logger.trace("Adding type two: "+pHead+", "+pBody+", "+weight);
139 | 		String str[] = {
140 | 				map.getName(pHead).substring(ProbKBData.REL_LENGTH),
141 | 				map.getName(pBody).substring(ProbKBData.REL_LENGTH),
142 | 				"1", // TODO class of x
143 | 				"1", // TODO class of y
144 | 				"" + weight
145 | 		};
146 | 		csvContent.get(base + "/mln2.csv").add(str);
147 | 	}
148 | 
149 | 	/**
150 | 	 * p(x,y) <- q(x,z), r(y,z)
151 | 	 * 
152 | 	 * @param pHead
153 | 	 * @param pBodyQ
154 | 	 * @param pBodyR
155 | 	 * @param weight
156 | 	 */
157 | 	private void addTypeThree(String pHead, String pBodyQ, String pBodyR,
158 | 			double weight) {
159 | 		logger.trace("Adding type three: "+pHead+", "+pBodyQ+", "+pBodyR+", "+weight);
160 | 		String str[] = {
161 | 				map.getName(pHead).substring(ProbKBData.REL_LENGTH),
162 | 				map.getName(pBodyQ).substring(ProbKBData.REL_LENGTH),
163 | 				map.getName(pBodyR).substring(ProbKBData.REL_LENGTH),
164 | 				"1", // TODO class of x
165 | 				"1", // TODO class of y
166 | 				"1", // TODO class of z
167 | 				"" + weight
168 | 		};
169 | 		csvContent.get(base + "/mln3.csv").add(str);
170 | 	}
171 | 
172 | 	/**
173 | 	 * p(x,y) <- q(x,z), r(z,y)
174 | 	 * @param pHead
175 | 	 * @param pBodyQ
176 | 	 * @param pBodyR
177 | 	 * @param weight
178 | 	 */
179 | 	private void addTypeFour(String pHead, String pBodyQ, String pBodyR,
180 | 			double weight) {
181 | 		logger.trace("Adding type four: "+pHead+", "+pBodyQ+", "+pBodyR+", "+weight);
182 | 		String str[] = {
183 | 				map.getName(pHead).substring(ProbKBData.REL_LENGTH),
184 | 				map.getName(pBodyQ).substring(ProbKBData.REL_LENGTH),
185 | 				map.getName(pBodyR).substring(ProbKBData.REL_LENGTH),
186 | 				"1", // TODO class of x
187 | 				"1", // TODO class of y
188 | 				"1", // TODO class of z
189 | 				"" + weight
190 | 		};
191 | 		csvContent.get(base + "/mln4.csv").add(str);
192 | 	}
193 | 
194 | 	/**
195 | 	 * p(x,y) <- q(z,x), r(y,z)
196 | 	 * @param pHead
197 | 	 * @param pBodyQ
198 | 	 * @param pBodyR
199 | 	 * @param weight
200 | 	 */
201 | 	private void addTypeFive(String pHead, String pBodyQ, String pBodyR,
202 | 			double weight) {
203 | 		logger.trace("Adding type five: "+pHead+", "+pBodyQ+", "+pBodyR+", "+weight);
204 | 		String str[] = {
205 | 				map.getName(pHead).substring(ProbKBData.REL_LENGTH),
206 | 				map.getName(pBodyQ).substring(ProbKBData.REL_LENGTH),
207 | 				map.getName(pBodyR).substring(ProbKBData.REL_LENGTH),
208 | 				"1", // TODO class of x
209 | 				"1", // TODO class of y
210 | 				"1", // TODO class of z
211 | 				"" + weight
212 | 		};
213 | 		csvContent.get(base + "/mln5.csv").add(str);
214 | 	}
215 | 
216 | 	/**
217 | 	 * p(x,y) <- q(z,x), r(z,y)
218 | 	 * @param pHead
219 | 	 * @param pBodyQ
220 | 	 * @param pBodyR
221 | 	 * @param weight
222 | 	 */
223 | 	private void addTypeSix(String pHead, String pBodyQ, String pBodyR,
224 | 			double weight) {
225 | 		logger.trace("Adding type six: "+pHead+", "+pBodyQ+", "+pBodyR+", "+weight);
226 | 		String str[] = {
227 | 				map.getName(pHead).substring(ProbKBData.REL_LENGTH),
228 | 				map.getName(pBodyQ).substring(ProbKBData.REL_LENGTH),
229 | 				map.getName(pBodyR).substring(ProbKBData.REL_LENGTH),
230 | 				"1", // TODO class of x
231 | 				"1", // TODO class of y
232 | 				"1", // TODO class of z
233 | 				"" + weight
234 | 		};
235 | 		csvContent.get(base + "/mln6.csv").add(str);
236 | 	}
237 | 
238 | 	public void buildCSV() {
239 | 		
240 | 		for(String key : csvContent.keySet()) {
241 | 			CSVWriter writer = null;
242 | 			try {
243 | 				writer = new CSVWriter(new FileWriter(new File(key)));
244 | 				for(String[] line : csvContent.get(key))
245 | 					writer.writeNext(line);
246 | 				writer.close();
247 | 			} catch (IOException e) {
248 | 				logger.error(e.getMessage());
249 | 				// XXX RuntimeException?
250 | 			}			
251 | 		}
252 | 	}
253 | 
254 | }
255 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/rulemining/RuleMiner.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.rulemining;
  2 | 
  3 | import java.util.Comparator;
  4 | import java.util.HashMap;
  5 | import java.util.List;
  6 | import java.util.Map;
  7 | import java.util.TreeMap;
  8 | import java.util.TreeSet;
  9 | 
 10 | import javatools.datatypes.ByteString;
 11 | 
 12 | import org.aksw.mandolin.controller.NameMapper;
 13 | import org.aksw.mandolin.rulemining.AmieHandler.MiningStrategy;
 14 | import org.apache.logging.log4j.LogManager;
 15 | import org.apache.logging.log4j.Logger;
 16 | 
 17 | import amie.rules.Rule;
 18 | 
 19 | import com.hp.hpl.jena.vocabulary.OWL;
 20 | import com.hp.hpl.jena.vocabulary.RDF;
 21 | import com.hp.hpl.jena.vocabulary.RDFS;
 22 | 
 23 | /**
 24 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 25 |  *
 26 |  */
 27 | public class RuleMiner {
 28 | 	
 29 | 	private final static Logger logger = LogManager.getLogger(RuleMiner.class);
 30 | 
 31 | 	/**
 32 | 	 * @param map
 33 | 	 * @param base
 34 | 	 * @param mining
 35 | 	 * @param maxRules
 36 | 	 * @throws Exception
 37 | 	 */
 38 | 	public static void run(NameMapper map, String base, Double mining, Integer maxRules) throws Exception {
 39 | 		
 40 | 		boolean support = (mining == null);
 41 | 		
 42 | 		AmieHandler h = new AmieHandler(base + "/model.tsv");
 43 | 		
 44 | 		if(!support)  {
 45 | 			h.setMiningThr(mining);
 46 | 			h.run(MiningStrategy.HEAD_COVERAGE);
 47 | 			if(h.getRules().isEmpty())
 48 | 				support = true;
 49 | 		}
 50 | 		
 51 | 		if(support) {
 52 | 			h.run(MiningStrategy.SUPPORT);
 53 | 			if(h.getRules().isEmpty()) {
 54 | 				logger.fatal("Rules size = 0");
 55 | 				throw new RuntimeException("Mandolin cannot continue without MLN rules!");
 56 | 			}
 57 | 		}
 58 | 		
 59 | 		List<Rule> rules = h.getRules();
 60 | 		if(rules.isEmpty()) {
 61 | 			logger.fatal("Rules size = 0");
 62 | 			throw new RuntimeException("Mandolin cannot continue without MLN rules!");
 63 | 		}
 64 | 		
 65 | 		TreeSet<String> topNRules = new TreeSet<>();
 66 | 		if(maxRules != null) {
 67 | 			HashMap<String, Double> rank = new HashMap<>();
 68 | 			for(Rule rule : rules)
 69 | 				rank.put(rule.toString(), rule.getPcaConfidence());
 70 | 			ValueComparator bvc = new ValueComparator(rank);
 71 | 	        TreeMap<String, Double> sortedRank = new TreeMap<String, Double>(bvc);
 72 | 	        sortedRank.putAll(rank);
 73 | 	        int i=0;
 74 | 	        for(String key : sortedRank.keySet()) {
 75 | 	        	topNRules.add(key);
 76 | 	        	logger.trace(key + ", " + rank.get(key));
 77 | 	        	if(++i == maxRules)
 78 | 	        		break;
 79 | 	        }
 80 | 		}
 81 | 		
 82 | 		RuleDriver driver = new RuleDriver(map, base);
 83 | 		
 84 | 		for(Rule rule : rules) {
 85 | 			
 86 | 			if(maxRules != null)
 87 | 				if(!topNRules.contains(rule.toString()))
 88 | 					continue;
 89 | 			
 90 | 			// filter out RDF/RDFS/OWL-only rules
 91 | 			if(isUpper(rule.getHeadRelation())) {
 92 | 				boolean skip = true;
 93 | 				for(ByteString[] bs : rule.getBody())
 94 | 					if(!isUpper(bs[1].toString())) {
 95 | 						skip = false;
 96 | 						break;
 97 | 					}
 98 | 				if(skip) {
 99 | 					logger.trace("Skipping upper-ontology rule...");
100 | 					continue;
101 | 				}
102 | 			}
103 | 						
104 | 			// send rule to driver
105 | 			driver.process(rule);
106 | 			// print rule information
107 | 			printInfo(rule);
108 | 		}
109 | 		
110 | 		// make CSVs
111 | 		driver.buildCSV();
112 | 
113 | 	}
114 | 
115 | 	/**
116 | 	 * @param rule
117 | 	 */
118 | 	private static void printInfo(Rule rule) {
119 | 		String str = "";
120 | 		for(ByteString[] bs : rule.getBody()) {
121 | 			String bstr = "";
122 | 			for(ByteString b : bs)
123 | 				bstr += b + ",";
124 | 			str += bstr + " | ";
125 | 		}
126 | 		logger.info(rule.getHeadRelation() + "\t" + str + "\t" + rule.getPcaConfidence());		
127 | 	}
128 | 
129 | 	/**
130 | 	 * @param headRelation
131 | 	 * @return
132 | 	 */
133 | 	private static boolean isUpper(String headRelation) {
134 | 		if(headRelation.startsWith(OWL.NS))
135 | 			return true;
136 | 		if(headRelation.startsWith(RDF.getURI()))
137 | 			return true;
138 | 		if(headRelation.startsWith(RDFS.getURI()))
139 | 			return true;
140 | 		return false;
141 | 	}
142 | 	
143 | }
144 | 
145 | class ValueComparator implements Comparator<String> {
146 | 	
147 |     Map<String, Double> base;
148 | 
149 |     public ValueComparator(Map<String, Double> base) {
150 |         this.base = base;
151 |     }
152 | 
153 |     // Note: this comparator imposes orderings that are inconsistent with
154 |     // equals.
155 |     public int compare(String a, String b) {
156 |         if (base.get(a) >= base.get(b)) {
157 |             return -1;
158 |         } else {
159 |             return 1;
160 |         } // returning 0 would merge keys
161 |     }
162 |     
163 | }
164 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/semantifier/Commons.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.semantifier;
  2 | 
  3 | import java.io.FileOutputStream;
  4 | 
  5 | import com.hp.hpl.jena.query.Query;
  6 | import com.hp.hpl.jena.query.QueryExecution;
  7 | import com.hp.hpl.jena.query.QueryExecutionFactory;
  8 | import com.hp.hpl.jena.query.QueryFactory;
  9 | import com.hp.hpl.jena.query.ResultSet;
 10 | import com.hp.hpl.jena.query.Syntax;
 11 | import com.hp.hpl.jena.rdf.model.Model;
 12 | import com.hp.hpl.jena.rdf.model.Property;
 13 | import com.hp.hpl.jena.rdf.model.Resource;
 14 | import com.hp.hpl.jena.rdf.model.ResourceFactory;
 15 | 
 16 | /**
 17 |  * Database builder common constants and methods.
 18 |  * 
 19 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 20 |  *
 21 |  */
 22 | public class Commons {
 23 | 
 24 | 	// SPARQL
 25 | 	// used to be "http://dblp.l3s.de/d2r/sparql"
 26 | 	public static final String DBLPL3S_ENDPOINT = "http://139.18.8.97:8890/sparql";
 27 | 	// used to be ""
 28 | 	public static final String DBLPL3S_GRAPH = "http://dblp.l3s.de";
 29 | 	public static final String ACMRKB_ENDPOINT = "http://139.18.8.97:8890/sparql";
 30 | 	public static final String ACMRKB_GRAPH = "http://acm.rkbexplorer.com";
 31 | 
 32 | 	public static final String DBLP_NAMESPACE = "http://dblp.rkbexplorer.com/id/";
 33 | 	public static final String DBLPL3S_NAMESPACE = "http://dblp.l3s.de/d2r/resource/publications/";
 34 | 	public static final String ACMRKB_NAMESPACE = "http://acm.rkbexplorer.com/id/";
 35 | 	public static final String LINKEDACM_NAMESPACE = "http://mandolin.aksw.org/acm/";
 36 | 	
 37 | 	public static final String OLD_AUTHOR_PREFIX = "http://acm.rkbexplorer.com/id/person-";
 38 | 
 39 | 	// URIs
 40 | 	public static final Resource DBLPL3S_PUBLICATION_CLASS = ResourceFactory
 41 | 			.createResource("http://xmlns.com/foaf/0.1/Document");
 42 | 	public static final Resource ACMRKB_PUBLICATION_CLASS = ResourceFactory
 43 | 			.createResource("http://www.aktors.org/ontology/portal#Article-Reference");
 44 | 	public static final Resource DBLPL3S_AUTHOR_CLASS = ResourceFactory
 45 | 			.createResource("http://xmlns.com/foaf/0.1/Agent");
 46 | 	public static final Resource ACMRKB_AUTHOR_CLASS = ResourceFactory
 47 | 			.createResource("http://www.aktors.org/ontology/portal#Person");
 48 | 	public static final Property RDF_TYPE = ResourceFactory
 49 | 			.createProperty("http://www.w3.org/1999/02/22-rdf-syntax-ns#type");
 50 | 	public static final Property OWL_SAMEAS = ResourceFactory
 51 | 			.createProperty("http://www.w3.org/2002/07/owl#sameAs");
 52 | 	public static final Property RDFS_LABEL = ResourceFactory
 53 | 			.createProperty("http://www.w3.org/2000/01/rdf-schema#label");
 54 | 	public static final Property HAS_AUTHOR = ResourceFactory
 55 | 			.createProperty("http://www.aktors.org/ontology/portal#has-author");
 56 | 	public static final Property HAS_TITLE = ResourceFactory
 57 | 			.createProperty("http://www.aktors.org/ontology/portal#has-title");
 58 | 	public static final Property FULL_NAME = ResourceFactory
 59 | 			.createProperty("http://www.aktors.org/ontology/portal#full-name");
 60 | 	public static final Property DC_CREATOR = ResourceFactory
 61 | 			.createProperty("http://purl.org/dc/elements/1.1/creator");
 62 | 	
 63 | 
 64 | 	// I/O
 65 | 	public static final String DBLP_ACM_CSV = "mappings/dblp-acm.csv";
 66 | 	public static final String DBLP_ACM_FIXED_CSV = "mappings/dblp-acm-fixed.csv";
 67 | 	public static final String DBLP_ACM_REMOVED_CSV = "tmp/removed-publications.csv";
 68 | 	
 69 | 	public static final String PUBS_WITH_AUTHORS_MAP = "tmp/pubs-with-authors.dblp-l3s.map";
 70 | 	public static final String AUTHORS_SAMEAS_MAP = "tmp/authors-sameas.map";
 71 | 	
 72 | 	public static final String TO_BE_DELETED_ID = "tmp/to-be-deleted-id.txt";
 73 | 	public static final String TO_BE_DELETED = "tmp/to-be-deleted.txt";
 74 | 	public static final String DISTANCES_CSV = "tmp/distances.csv";
 75 | 	
 76 | 	public static final String LINKEDACM_NT = "datasets/LinkedACM.nt";
 77 | 	public static final String DBLPL3S_LINKEDACM_NT = "linksets/DBLPL3S-LinkedACM.nt";
 78 | 	public static final String DBLPL3S_NT = "datasets/DBLPL3S.nt";
 79 | 
 80 | 	
 81 | 	
 82 | 	/**
 83 | 	 * Perform SPARQL query against an endpoint on a given graph.
 84 | 	 * 
 85 | 	 * @param query
 86 | 	 * @param endpoint
 87 | 	 * @param graph
 88 | 	 * @return
 89 | 	 */
 90 | 	public static ResultSet sparql(String query, String endpoint, String graph) {
 91 | 
 92 | 		Query sparqlQuery = QueryFactory.create(query, Syntax.syntaxARQ);
 93 | 		QueryExecution qexec = QueryExecutionFactory.sparqlService(endpoint,
 94 | 				sparqlQuery, graph);
 95 | 		return qexec.execSelect();
 96 | 
 97 | 	}
 98 | 
 99 | 	/**
100 | 	 * Save the model to N-Triple file.
101 | 	 * 
102 | 	 * @param m
103 | 	 * @param name
104 | 	 */
105 | 	public static void save(Model m, String name) {
106 | 
107 | 		// save to TURTLE/N3
108 | 		try {
109 | 			FileOutputStream fout = new FileOutputStream(name);
110 | 			m.write(fout, "N-TRIPLES");
111 | 			fout.close();
112 | 		} catch (Exception e) {
113 | 			System.out.println("Exception caught" + e.getMessage());
114 | 			e.printStackTrace();
115 | 		}
116 | 
117 | 	}
118 | 
119 | }
120 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/semantifier/DatasetBuildFixer.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.semantifier;
  2 | 
  3 | import java.io.File;
  4 | import java.io.FileNotFoundException;
  5 | import java.io.FileReader;
  6 | import java.io.FileWriter;
  7 | import java.io.IOException;
  8 | import java.io.PrintWriter;
  9 | import java.util.HashMap;
 10 | import java.util.Scanner;
 11 | import java.util.TreeSet;
 12 | 
 13 | import org.simmetrics.metrics.Levenshtein;
 14 | 
 15 | import com.hp.hpl.jena.query.QuerySolution;
 16 | import com.hp.hpl.jena.query.ResultSet;
 17 | import com.opencsv.CSVReader;
 18 | import com.opencsv.CSVWriter;
 19 | 
 20 | /**
 21 |  * Removes faulty mappings from the gold standard, e.g. when the authors cannot
 22 |  * be linked because one of them is missing in one dataset.
 23 |  * 
 24 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 25 |  *
 26 |  */
 27 | public class DatasetBuildFixer {
 28 | 
 29 | 	public static void main(String[] args) throws IOException, ClassNotFoundException {
 30 | 		
 31 | //		System.out.println(new Levenshtein().distance("Query Execution Techniques for Caching Expensive Methods.", "2Q"));
 32 | 		
 33 | 		new DatasetBuildFixer().run();
 34 | 		new DatasetBuildFixer().fix();
 35 | 	}
 36 | 
 37 | 	public void fix() throws IOException {
 38 | 		
 39 | 		TreeSet<String> ids = new TreeSet<>();
 40 | 		Scanner in = new Scanner(new File(Commons.TO_BE_DELETED_ID));
 41 | 		while (in.hasNextLine())
 42 | 			ids.add(in.nextLine());
 43 | 		in.close();
 44 | 		
 45 | 		System.out.println("-----------\n"+ids);
 46 | 		
 47 | 		CSVReader reader = new CSVReader(new FileReader(new File(Commons.DBLP_ACM_CSV)));
 48 | 		CSVWriter writer = new CSVWriter(new FileWriter(new File(Commons.DBLP_ACM_FIXED_CSV)));
 49 | 		CSVWriter removed = new CSVWriter(new FileWriter(new File(Commons.DBLP_ACM_REMOVED_CSV)));
 50 | 		String[] nextLine = reader.readNext();
 51 | 		writer.writeNext(nextLine);
 52 | 		removed.writeNext(nextLine);
 53 | 		while ((nextLine = reader.readNext()) != null) {
 54 | 			if(ids.contains(nextLine[1])) {
 55 | 				removed.writeNext(nextLine);
 56 | 				System.out.println("Removed: "+nextLine[0]+" | "+nextLine[1]);
 57 | 			} else
 58 | 				writer.writeNext(nextLine);
 59 | 		}
 60 | 		removed.close();
 61 | 		writer.close();
 62 | 		reader.close();
 63 | 		
 64 | 	}
 65 | 
 66 | 	public void run() throws FileNotFoundException {
 67 | 		
 68 | 		TreeSet<String> blacklist = new TreeSet<>();
 69 | 		PrintWriter pw = new PrintWriter(new File(Commons.TO_BE_DELETED_ID));
 70 | 		
 71 | 		// get list of faulty authors
 72 | 		TreeSet<String> pairs = new TreeSet<>();
 73 | 		Scanner in = new Scanner(new File(Commons.TO_BE_DELETED));
 74 | 		while (in.hasNextLine())
 75 | 			pairs.add(in.nextLine());
 76 | 		in.close();
 77 | 
 78 | 		for (String pair : pairs) {
 79 | 			String dblp = pair.split(",")[0];
 80 | 			String acm = pair.split(",")[1];
 81 | 			
 82 | 			System.out.println(dblp+" | "+acm);
 83 | 
 84 | 			// query for DBLP-L3S publications
 85 | 			HashMap<String, String> dblpLabelToURI = new HashMap<>();
 86 | 			ResultSet rs1 = Commons.sparql(
 87 | 					"select ?p ?t where { ?p <"+Commons.DC_CREATOR+"> <" + dblp
 88 | 							+ "> . ?p <"+Commons.RDFS_LABEL+"> ?t }",
 89 | 					Commons.DBLPL3S_ENDPOINT, Commons.DBLPL3S_GRAPH);
 90 | 			while(rs1.hasNext()) {
 91 | 				QuerySolution qs = rs1.next();
 92 | 				dblpLabelToURI.put(qs.getLiteral("t").getString(), qs.getResource("p").getURI());
 93 | 			}
 94 | 
 95 | 			// query for ACM publications
 96 | 			HashMap<String, String> acmLabelToURI = new HashMap<>();
 97 | 			ResultSet rs2 = Commons.sparql(
 98 | 					"select ?p ?t where { ?p <"+Commons.HAS_AUTHOR+"> <" + acm
 99 | 							+ "> . ?p <"+Commons.HAS_TITLE+"> ?t }",
100 | 					Commons.ACMRKB_ENDPOINT, Commons.ACMRKB_GRAPH);
101 | 			while(rs2.hasNext()) {
102 | 				QuerySolution qs = rs2.next();
103 | 				acmLabelToURI.put(qs.getLiteral("t").getString(), qs.getResource("p").getURI());
104 | 			}
105 | 			
106 | 			// Round-Robin among labels, checking also for substrings (e.g., to cut off undertitles)
107 | 			float dMin = Float.MAX_VALUE, dMinSub = Float.MAX_VALUE;
108 | 			String l1min = null, l2min = null, l1minSub = null, l2minSub = null;
109 | 			Levenshtein lev = new Levenshtein();
110 | 			for(String l1 : dblpLabelToURI.keySet()) {
111 | 				for(String l2 : acmLabelToURI.keySet()) {
112 | 					float d = lev.distance(l1.toLowerCase(), l2.toLowerCase());
113 | 					if(d < dMin) {
114 | 						dMin = d;
115 | 						l1min = l1;
116 | 						l2min = l2;
117 | 					}
118 | 					for(int i=0; i<l1.length(); i++) {
119 | 						float dd = lev.distance(l1.substring(0, i).toLowerCase(), l2.toLowerCase());
120 | 						if(dd < dMinSub) {
121 | 							dMinSub = dd;
122 | 							l1minSub = l1;
123 | 							l2minSub = l2;
124 | 						}
125 | 					}
126 | 					for(int i=0; i<l2.length(); i++) {
127 | 						float dd = lev.distance(l1.toLowerCase(), l2.substring(0, i).toLowerCase());
128 | 						if(dd < dMinSub) {
129 | 							dMinSub = dd;
130 | 							l1minSub = l1;
131 | 							l2minSub = l2;
132 | 						}
133 | 					}
134 | 				}
135 | 			}
136 | 			// give more importance to full string comparison when d is at most 2.0
137 | 			if(dMin > 2.0) {
138 | 				System.out.println("Using substring comparison (dMin = "+dMin+")");
139 | 				dMin = dMinSub;
140 | 				l1min = l1minSub;
141 | 				l2min = l2minSub;
142 | 			}
143 | 				
144 | 			
145 | 			// add publications to the blacklist
146 | 			System.out.println("DISTANCE = " + dMin + "\n" + l1min + "\n" + l2min);
147 | 			String l2URI = acmLabelToURI.get(l2min);
148 | 			System.out.println("URI: "+l2URI + "\n");
149 | 			blacklist.add(l2URI.substring(l2URI.lastIndexOf("/") + 1));
150 | 			
151 | //			break;
152 | 		}
153 | 		
154 | 		System.out.println(blacklist);
155 | 		for(String id : blacklist)
156 | 			pw.write(id+"\n");
157 | 		pw.close();
158 | 	}
159 | 
160 | }
161 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/semantifier/DatasetBuildSatellites.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.semantifier;
  2 | 
  3 | import java.io.File;
  4 | import java.io.FileNotFoundException;
  5 | import java.io.FileOutputStream;
  6 | import java.util.TreeSet;
  7 | 
  8 | import org.apache.jena.riot.Lang;
  9 | import org.apache.jena.riot.RDFDataMgr;
 10 | import org.apache.jena.riot.system.StreamRDF;
 11 | import org.apache.jena.riot.system.StreamRDFWriter;
 12 | 
 13 | import com.hp.hpl.jena.graph.Triple;
 14 | import com.hp.hpl.jena.query.Query;
 15 | import com.hp.hpl.jena.query.QueryExecution;
 16 | import com.hp.hpl.jena.query.QueryExecutionFactory;
 17 | import com.hp.hpl.jena.query.QueryFactory;
 18 | import com.hp.hpl.jena.query.Syntax;
 19 | import com.hp.hpl.jena.rdf.model.Model;
 20 | import com.hp.hpl.jena.rdf.model.StmtIterator;
 21 | import com.hp.hpl.jena.sparql.core.Quad;
 22 | import com.hp.hpl.jena.vocabulary.OWL;
 23 | import com.hp.hpl.jena.vocabulary.RDF;
 24 | 
 25 | /**
 26 |  * Add datatype properties and satellites (URIs belonging to the CBD) for each
 27 |  * author. The only tolerated predicates for satellites are defined in the
 28 |  * 'predicates' set.
 29 |  * 
 30 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 31 |  *
 32 |  */
 33 | public class DatasetBuildSatellites {
 34 | 
 35 | 	private static final String ENDPOINT = "http://localhost:8890/sparql";
 36 | 	
 37 | //	private static final String GRAPH = "http://mandolin.aksw.org/acm";
 38 | //	private static final String FILE = "LinkedACM-10.nt";
 39 | //	private static final String ARTICLE = "http://www.aktors.org/ontology/portal#Article-Reference";
 40 | 	
 41 | 	private static final String GRAPH = "http://dblp.l3s.de";
 42 | 	private static final String FILE = "DBLPL3S.nt";
 43 | 	private static final String ARTICLE = "http://xmlns.com/foaf/0.1/Document";
 44 | 
 45 | 
 46 | 	private static TreeSet<String> predicates = new TreeSet<>();
 47 | 
 48 | 	static {
 49 | 		// tolerate only these two types of object properties of satellites
 50 | 		predicates.add(RDF.type.getURI());
 51 | 		predicates.add(OWL.sameAs.getURI());
 52 | 	}
 53 | 
 54 | 	public static void main(String[] args) {
 55 | 		
 56 | 		run();
 57 | 		deduplicate();
 58 | 		
 59 | 	}
 60 | 	
 61 | 	public static void deduplicate() {
 62 | 		
 63 | 		File old = new File("datasets2/" + FILE);
 64 | 		
 65 | 		Model m = RDFDataMgr.loadModel(old.getPath());
 66 | 		System.out.println("Model has "+m.size()+" deduplicated triples.");
 67 | 		File tmp = new File("datasets2/tmp_" + FILE);
 68 | 		try {
 69 | 			m.write(new FileOutputStream(tmp), "N-TRIPLE");
 70 | 		} catch (FileNotFoundException e) {
 71 | 			e.printStackTrace();
 72 | 			return;
 73 | 		}
 74 | 		
 75 | 		old.delete();
 76 | 		tmp.renameTo(old);
 77 | 		
 78 | 		System.out.println("Done.");
 79 | 		
 80 | 	}
 81 | 
 82 | 	public static void run() {
 83 | 		
 84 | 		new File("datasets2/").mkdirs();
 85 | 
 86 | 		FileOutputStream output;
 87 | 		try {
 88 | 			output = new FileOutputStream(new File("datasets2/" + FILE));
 89 | 		} catch (FileNotFoundException e) {
 90 | 			e.printStackTrace();
 91 | 			return;
 92 | 		}
 93 | 
 94 | 		final StreamRDF writer = StreamRDFWriter.getWriterStream(output,
 95 | 				Lang.NT);
 96 | 
 97 | 		TreeSet<String> articleIDs = new TreeSet<>();
 98 | 		TreeSet<String> satelliteIDs = new TreeSet<>();
 99 | 
100 | 		// stream dataset
101 | 		// search for ?s a <ArticleClass>
102 | 		// collect article IDs
103 | 		collectWrite(articleIDs, writer);
104 | 
105 | 		System.out.println("file = " + FILE);
106 | 		System.out.println("articles = " + articleIDs.size());
107 | 
108 | 		// for each article ID:
109 | 		// add its CBD and
110 | 		// collect satellite IDs
111 | 		for (String a : articleIDs) {
112 | 			System.out.print(a + "...");
113 | 			cbd(a, writer, articleIDs, satelliteIDs, true);
114 | 			System.out.println(" OK");
115 | 		}
116 | 
117 | 		System.out.println();
118 | 
119 | 		System.out.println("satellites = " + satelliteIDs.size());
120 | 
121 | 		// for each satellite ID:
122 | 		// launch describe query
123 | 		// write out triples
124 | 		for (String aut : satelliteIDs) {
125 | 			System.out.print(aut + "...");
126 | 			boolean success = cbd(aut, writer, articleIDs, satelliteIDs, false);
127 | 			if(success)
128 | 				System.out.println(" OK");
129 | 			else
130 | 				System.out.println(" skipped");
131 | 		}
132 | 
133 | 		writer.finish();
134 | 		System.out.println("\nDone.");
135 | 
136 | 	}
137 | 
138 | 	private static boolean cbd(String uri, StreamRDF writer,
139 | 			TreeSet<String> articleIDs, TreeSet<String> satelliteIDs,
140 | 			boolean addAll) {
141 | 		String query = "DESCRIBE <" + uri + ">";
142 | 		Query sparqlQuery = QueryFactory.create(query, Syntax.syntaxARQ);
143 | 		QueryExecution qexec = QueryExecutionFactory.sparqlService(ENDPOINT,
144 | 				sparqlQuery, GRAPH);
145 | 		Model m;
146 | 		try {
147 | 			m = qexec.execDescribe();
148 | 		} catch (Exception e1) {
149 | 			return false;
150 | 		}
151 | 		StmtIterator it = m.listStatements();
152 | 		while (it.hasNext()) {
153 | 			Triple t = it.next().asTriple();
154 | 
155 | 			if (addAll) {
156 | 				writer.triple(t);
157 | 				String s = t.getSubject().getURI();
158 | 				boolean isUri = t.getObject().isURI();
159 | 				if (isUri) {
160 | 					String o = t.getObject().getURI();
161 | 					if (s.equals(uri))
162 | 						satelliteIDs.add(o);
163 | 					if (o.equals(uri))
164 | 						satelliteIDs.add(s);
165 | 				}
166 | 
167 | 			} else {
168 | 				String s = t.getSubject().getURI();
169 | 				String p = t.getPredicate().getURI();
170 | 				boolean isUri = t.getObject().isURI();
171 | 
172 | 				if (!isUri) {
173 | 					writer.triple(t);
174 | 				} else {
175 | 					String o = t.getObject().getURI();
176 | 					if (articleIDs.contains(o))
177 | 						writer.triple(t);
178 | 					else if (articleIDs.contains(s))
179 | 						writer.triple(t);
180 | 					else if (predicates.contains(p))
181 | 						writer.triple(t);
182 | 				}
183 | 			}
184 | 
185 | 		}
186 | 		return true;
187 | 	}
188 | 
189 | 	private static void collectWrite(TreeSet<String> articleIDs,
190 | 			StreamRDF writer) {
191 | 
192 | 		StreamRDF dataStream = new StreamRDF() {
193 | 
194 | 			@Override
195 | 			public void start() {
196 | 				writer.start();
197 | 			}
198 | 
199 | 			@Override
200 | 			public void triple(Triple triple) {
201 | 
202 | 				if (triple.getPredicate().getURI().equals(RDF.type.getURI()))
203 | 					if (triple.getObject().getURI().equals(ARTICLE))
204 | 						articleIDs.add(triple.getSubject().getURI());
205 | 
206 | 			}
207 | 
208 | 			@Override
209 | 			public void quad(Quad quad) {
210 | 			}
211 | 
212 | 			@Override
213 | 			public void base(String base) {
214 | 			}
215 | 
216 | 			@Override
217 | 			public void prefix(String prefix, String iri) {
218 | 			}
219 | 
220 | 			@Override
221 | 			public void finish() {
222 | 				// finishes later
223 | 			}
224 | 
225 | 		};
226 | 
227 | 		RDFDataMgr.parse(dataStream, "datasets/" + FILE);
228 | 
229 | 	}
230 | 
231 | }
232 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/semantifier/DatasetBuildStarter.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.semantifier;
  2 | 
  3 | import java.io.File;
  4 | import java.io.FileNotFoundException;
  5 | import java.io.IOException;
  6 | import java.io.Serializable;
  7 | import java.util.ArrayList;
  8 | import java.util.List;
  9 | import java.util.Scanner;
 10 | 
 11 | import org.aksw.mandolin.util.DataIO;
 12 | 
 13 | import com.hp.hpl.jena.query.QuerySolution;
 14 | import com.hp.hpl.jena.query.ResultSet;
 15 | 
 16 | /**
 17 |  * Build the Commons.PUBS_WITH_AUTHORS_MAP containing all DBLPL3S publications
 18 |  * with their respective authors, if any. The process could have been carried
 19 |  * out by replacing namespaces (RKBExplorer to L3S), however this is a safer way
 20 |  * to do it.
 21 |  * 
 22 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 23 |  *
 24 |  */
 25 | public class DatasetBuildStarter {
 26 | 
 27 | 	public static void main(String[] args) throws IOException,
 28 | 			ClassNotFoundException {
 29 | 		new DatasetBuildStarter().run();
 30 | 	}
 31 | 
 32 | 	public void run() throws IOException, ClassNotFoundException {
 33 | 
 34 | 		ArrayList<Elements> data = new ArrayList<>();
 35 | 
 36 | 		for (String rkbURI : getRKBURIs()) {
 37 | 
 38 | 			Elements e = getElements(rkbURI, Commons.DC_CREATOR.getURI(),
 39 | 					Commons.DBLPL3S_ENDPOINT, Commons.DBLPL3S_GRAPH);
 40 | 
 41 | 			System.out.println(e.getURI());
 42 | 
 43 | 			if (e.getURI() != null) {
 44 | 				// should always happen
 45 | 				data.add(e);
 46 | 			}
 47 | 
 48 | 		}
 49 | 
 50 | 		DataIO.serialize(data, Commons.PUBS_WITH_AUTHORS_MAP);
 51 | 
 52 | 	}
 53 | 
 54 | 	/**
 55 | 	 * Get publication URIs from the perfect mapping.
 56 | 	 * 
 57 | 	 * @return
 58 | 	 * @throws FileNotFoundException
 59 | 	 */
 60 | 	private ArrayList<String> getRKBURIs() throws FileNotFoundException {
 61 | 
 62 | 		ArrayList<String> list = new ArrayList<>();
 63 | 
 64 | 		Scanner in = new Scanner(new File(Commons.DBLP_ACM_CSV));
 65 | 		in.nextLine(); // skip header
 66 | 		while (in.hasNextLine()) {
 67 | 			String[] line = in.nextLine().split(",");
 68 | 			String rkb = line[0].replaceAll("\"", "");
 69 | 			list.add(Commons.DBLP_NAMESPACE + rkb);
 70 | 		}
 71 | 		in.close();
 72 | 
 73 | 		return list;
 74 | 	}
 75 | 
 76 | 	/**
 77 | 	 * Get the publication associated with a list of elements (e.g., authors).
 78 | 	 * 
 79 | 	 * @param rkbURI
 80 | 	 * @param relation
 81 | 	 * @param endpoint
 82 | 	 * @return
 83 | 	 */
 84 | 	private Elements getElements(String rkbURI, String relation, String endpoint, String graph) {
 85 | 
 86 | 		String query = "SELECT ?cr ?pub WHERE { ?pub <" + Commons.OWL_SAMEAS
 87 | 				+ "> <" + rkbURI + "> ; <" + relation + "> ?cr }";
 88 | 		System.out.println(query);
 89 | 
 90 | 		ResultSet rs = Commons.sparql(query, endpoint, graph);
 91 | 
 92 | 		ArrayList<String> list = new ArrayList<>();
 93 | 		String l3sURI = null;
 94 | 
 95 | 		while (rs.hasNext()) {
 96 | 			QuerySolution qs = rs.next();
 97 | 			l3sURI = qs.getResource("?pub").getURI();
 98 | 			list.add(qs.getResource("?cr").getURI());
 99 | 		}
100 | 
101 | 		Elements elem;
102 | 
103 | 		if (l3sURI == null) {
104 | 			elem = getElementsNoCreator(rkbURI, relation, endpoint, graph);
105 | 		} else {
106 | 			elem = new Elements(l3sURI);
107 | 			elem.setElements(list);
108 | 		}
109 | 
110 | 		return elem;
111 | 
112 | 	}
113 | 
114 | 	private Elements getElementsNoCreator(String rkbURI, String relation,
115 | 			String endpoint, String graph) {
116 | 		String query = "SELECT ?pub WHERE { ?pub <" + Commons.OWL_SAMEAS
117 | 				+ "> <" + rkbURI + "> }";
118 | 		System.out.println(query);
119 | 
120 | 		ResultSet rs = Commons.sparql(query, endpoint, graph);
121 | 
122 | 		String l3sURI = null;
123 | 
124 | 		while (rs.hasNext()) {
125 | 			l3sURI = rs.next().getResource("?pub").getURI();
126 | 		}
127 | 
128 | 		Elements elem = new Elements(l3sURI);
129 | 		elem.setElements(new ArrayList<>());
130 | 
131 | 		return elem;
132 | 	}
133 | 
134 | }
135 | 
136 | class Elements implements Serializable {
137 | 
138 | 	private static final long serialVersionUID = -4523439946804741035L;
139 | 
140 | 	private String uri;
141 | 	private List<String> elements;
142 | 
143 | 	public void setElements(List<String> elements) {
144 | 		this.elements = elements;
145 | 	}
146 | 
147 | 	Elements(String uri) {
148 | 		this.uri = uri;
149 | 		elements = new ArrayList<String>();
150 | 	}
151 | 
152 | 	public String getURI() {
153 | 		return uri;
154 | 	}
155 | 
156 | 	public List<String> getElements() {
157 | 		return elements;
158 | 	}
159 | }
160 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/semantifier/DatasetBuilderAlgorithm.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.semantifier;
  2 | 
  3 | import java.io.BufferedInputStream;
  4 | import java.io.ByteArrayOutputStream;
  5 | import java.io.File;
  6 | import java.io.FileNotFoundException;
  7 | import java.io.FileOutputStream;
  8 | import java.io.IOException;
  9 | import java.io.InputStream;
 10 | import java.io.PrintWriter;
 11 | import java.net.URL;
 12 | import java.util.ArrayList;
 13 | import java.util.HashMap;
 14 | import java.util.Scanner;
 15 | import java.util.TreeSet;
 16 | 
 17 | import org.aksw.mandolin.util.DataIO;
 18 | import org.apache.jena.riot.RDFDataMgr;
 19 | import org.apache.jena.riot.RiotNotFoundException;
 20 | import org.simmetrics.metrics.Levenshtein;
 21 | 
 22 | import com.hp.hpl.jena.query.QuerySolution;
 23 | import com.hp.hpl.jena.query.ResultSet;
 24 | import com.hp.hpl.jena.rdf.model.Model;
 25 | import com.hp.hpl.jena.rdf.model.NodeIterator;
 26 | import com.hp.hpl.jena.rdf.model.ResourceFactory;
 27 | 
 28 | /**
 29 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 30 |  *
 31 |  */
 32 | public class DatasetBuilderAlgorithm {
 33 | 	
 34 | 	private int N_EXAMPLES;
 35 | 	
 36 | 	public DatasetBuilderAlgorithm(int n) {
 37 | 		this.N_EXAMPLES = n;
 38 | 	}
 39 | 
 40 | 	public static void main(String[] args) throws FileNotFoundException,
 41 | 			ClassNotFoundException, IOException {
 42 | 		new DatasetBuilderAlgorithm(100).run();
 43 | 	}
 44 | 
 45 | 	public void run() throws FileNotFoundException, ClassNotFoundException,
 46 | 			IOException {
 47 | 
 48 | 		// load DBLP l3s to ACM rkb
 49 | 		HashMap<String, String> l3sMap = l3sToACMRkb();
 50 | 
 51 | 		// build reverse map
 52 | 		HashMap<String, TreeSet<String>> map = new HashMap<>();
 53 | 
 54 | 		ArrayList<Elements> data = DataIO
 55 | 				.readList(Commons.PUBS_WITH_AUTHORS_MAP);
 56 | 		for (Elements e : data) {
 57 | 			System.out.println(e.getURI());
 58 | 			// TODO remove me!
 59 | 			if (!l3sMap.containsKey(e.getURI()))
 60 | 				continue;
 61 | 
 62 | 			for (String el : e.getElements()) {
 63 | 				TreeSet<String> pubSet;
 64 | 				if (map.containsKey(el))
 65 | 					pubSet = map.get(el);
 66 | 				else {
 67 | 					pubSet = new TreeSet<>();
 68 | 					map.put(el, pubSet);
 69 | 				}
 70 | 				pubSet.add(e.getURI());
 71 | 			}
 72 | 		}
 73 | 
 74 | 		HashMap<String, ArrayList<String>> sameAsMap = new HashMap<>();
 75 | 
 76 | 		PrintWriter pw = new PrintWriter(new File(Commons.DISTANCES_CSV));
 77 | 
 78 | 		// algorithm starts here
 79 | 		for (String author : map.keySet()) {
 80 | 
 81 | 			String authorName = getName(author);
 82 | 
 83 | 			System.out.println("Listing " + authorName + " (" + author + "): "
 84 | 					+ map.get(author));
 85 | 
 86 | 			TreeSet<String> sameAs = new TreeSet<>();
 87 | 
 88 | 			nextPub: for (String l3s : map.get(author)) {
 89 | 
 90 | 				System.out.println("L3S: " + l3s);
 91 | 
 92 | 				String acmRkb = l3sMap.get(l3s);
 93 | 
 94 | 				float distMin = Float.MAX_VALUE;
 95 | 				Entity entity = null;
 96 | 
 97 | 				ArrayList<Entity> rkb;
 98 | 				final int MAX_TRIES = 3;
 99 | 				int tries = 0;
100 | 				do {
101 | 
102 | 					rkb = getCreators(acmRkb);
103 | 					tries++;
104 | 					for (Entity e : rkb) {
105 | 						Levenshtein lev = new Levenshtein();
106 | 						float d = lev.distance(authorName, e.getLabel());
107 | 						if (d <= distMin) {
108 | 							distMin = d;
109 | 							entity = e;
110 | 						}
111 | 						System.out.println("d(" + authorName + ", "
112 | 								+ e.getLabel() + ") = " + d);
113 | 					}
114 | 
115 | 					if (entity == null) {
116 | 						System.out.println("URI " + acmRkb
117 | 								+ " is deprecated or has issues.");
118 | 
119 | 						acmRkb = getRedirect(acmRkb.substring(acmRkb
120 | 								.lastIndexOf('/') + 1));
121 | 						
122 | 						if(acmRkb == null) {
123 | 							System.out.println("*** No redirects available. Skipping "+l3s);
124 | 							continue nextPub;
125 | 						}
126 | 						System.out.println("*** Redirected to: " + acmRkb);
127 | 
128 | 					}
129 | 
130 | 				} while (rkb.isEmpty() && tries < MAX_TRIES);
131 | 				
132 | 				if (distMin >= 5.0)
133 | 					pw.write(authorName + "," + entity.getLabel() + ","
134 | 							+ author + "," + entity.getUri() + "\n");
135 | 
136 | 				System.out.println("sameAs = " + entity.getUri());
137 | 				sameAs.add(entity.getUri());
138 | 
139 | 			}
140 | 
141 | 			if (!sameAs.isEmpty())
142 | 				sameAsMap.put(
143 | 						Commons.LINKEDACM_NAMESPACE + author.substring(32),
144 | 						new ArrayList<>(sameAs));
145 | 			else
146 | 				System.out.println("*** " + Commons.LINKEDACM_NAMESPACE
147 | 						+ author.substring(32) + " had an empty sameAs set.");
148 | 
149 | 			// System.out.println(sameAsMap);
150 | 			// break;
151 | 
152 | 		}
153 | 
154 | 		pw.close();
155 | 
156 | 		DataIO.serialize(sameAsMap, Commons.AUTHORS_SAMEAS_MAP);
157 | 
158 | 	}
159 | 
160 | 	private String getRedirect(String acmID) {
161 | 
162 | 		// get remote file
163 | 		String uri = Commons.ACMRKB_NAMESPACE + acmID;
164 | 		String fileIn = "http://acm.rkbexplorer.com/data/" + acmID;
165 | 		String fileOut = "tmp/" + acmID + ".rdf";
166 | 		try {
167 | 			download(fileIn, fileOut);
168 | 		} catch (IOException e) {
169 | 		}
170 | 
171 | 		Model model = null;
172 | 		try {
173 | 			model = RDFDataMgr.loadModel(fileOut);
174 | 		} catch (RiotNotFoundException e) {
175 | 			// There is no information about the requested URI in this repository.
176 | 			return null;
177 | 		}
178 | 		NodeIterator it = model.listObjectsOfProperty(
179 | 				ResourceFactory.createResource(uri), Commons.OWL_SAMEAS);
180 | 
181 | 		if (it.hasNext())
182 | 			return it.nextNode().asResource().getURI();
183 | 
184 | 		return null;
185 | 	}
186 | 
187 | 	private void download(String url, String file) throws IOException {
188 | 		URL link = new URL(url);
189 | 		InputStream in = new BufferedInputStream(link.openStream());
190 | 		ByteArrayOutputStream out = new ByteArrayOutputStream();
191 | 		byte[] buf = new byte[1024];
192 | 		int n = 0;
193 | 		while (-1 != (n = in.read(buf))) {
194 | 			out.write(buf, 0, n);
195 | 		}
196 | 		out.close();
197 | 		in.close();
198 | 		byte[] response = out.toByteArray();
199 | 
200 | 		FileOutputStream fos = new FileOutputStream(file);
201 | 		fos.write(response);
202 | 		fos.close();
203 | 	}
204 | 
205 | 	private String getName(String uri) {
206 | 
207 | 		String query = "SELECT * WHERE { <" + uri + "> <" + Commons.RDFS_LABEL
208 | 				+ "> ?l }";
209 | 		System.out.println(query);
210 | 
211 | 		ResultSet rs = Commons.sparql(query, Commons.DBLPL3S_ENDPOINT, Commons.DBLPL3S_GRAPH);
212 | 
213 | 		if (rs.hasNext()) {
214 | 			QuerySolution qs = rs.next();
215 | 			return qs.getLiteral("?l").getString();
216 | 		}
217 | 
218 | 		return "";
219 | 	}
220 | 
221 | 	private ArrayList<Entity> getCreators(String acmRkb) {
222 | 
223 | 		String query = "SELECT DISTINCT * WHERE { <" + acmRkb + "> " + "<"
224 | 				+ Commons.HAS_AUTHOR + "> ?s . " + "?s <" + Commons.FULL_NAME
225 | 				+ "> ?l }";
226 | 		System.out.println(query);
227 | 
228 | 		ResultSet rs = Commons.sparql(query, Commons.ACMRKB_ENDPOINT,
229 | 				Commons.ACMRKB_GRAPH);
230 | 
231 | 		ArrayList<Entity> ent = new ArrayList<>();
232 | 
233 | 		while (rs.hasNext()) {
234 | 			QuerySolution qs = rs.next();
235 | 			Entity e = new Entity(qs.getResource("?s").getURI(), qs.getLiteral(
236 | 					"?l").getString());
237 | 			ent.add(e);
238 | 		}
239 | 
240 | 		return ent;
241 | 	}
242 | 
243 | 	private HashMap<String, String> l3sToACMRkb() throws FileNotFoundException {
244 | 		HashMap<String, String> map = new HashMap<>();
245 | 
246 | 		Scanner in = new Scanner(new File(Commons.DBLP_ACM_CSV));
247 | 		in.nextLine();
248 | 		int i = 0;
249 | 		while (in.hasNextLine()) {
250 | 			String[] line = in.nextLine().split(",");
251 | 			map.put(Commons.DBLPL3S_NAMESPACE + line[0].replaceAll("\"", ""),
252 | 					Commons.ACMRKB_NAMESPACE + line[1].replaceAll("\"", ""));
253 | 			if (++i == N_EXAMPLES)
254 | 				break;
255 | 		}
256 | 		in.close();
257 | 
258 | 		return map;
259 | 	}
260 | 
261 | }
262 | 
263 | class Entity {
264 | 
265 | 	String uri, label;
266 | 
267 | 	Entity(String uri, String label) {
268 | 		this.uri = uri;
269 | 		this.label = label;
270 | 	}
271 | 
272 | 	public String getUri() {
273 | 		return uri;
274 | 	}
275 | 
276 | 	public String getLabel() {
277 | 		return label;
278 | 	}
279 | 
280 | }


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/semantifier/SemantifierPipeline.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin.semantifier;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | /**
 6 |  * 
 7 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 8 |  *
 9 |  */
10 | public class SemantifierPipeline {
11 | 
12 | 	public static void main(String[] args) throws ClassNotFoundException,
13 | 			IOException {
14 | 
15 | 		int n = Integer.parseInt(args[0]);
16 | 
17 | 		if (args[1].equals("part1")) {
18 | 
19 | 			new DatasetBuildStarter().run();
20 | 			new DatasetBuilderAlgorithm(n).run();
21 | 
22 | 		} else if (args[1].equals("part2")) {
23 | 
24 | 			System.out.println("SECTION START: FIXER");
25 | 			DatasetBuildFixer fixr = new DatasetBuildFixer();
26 | 			fixr.run();
27 | 			fixr.fix();
28 | 			System.out.println("SECTION START: SEMANTIFIER");
29 | 			DatasetBuildSemantifier semr = new DatasetBuildSemantifier(n);
30 | 			semr.linkedDBLP();
31 | 			semr.mapping();
32 | //			semr.linkedACM();
33 | 			DatasetBuildSatellites.run();
34 | //			System.out.println("SECTION START: CLOSURE");
35 | //			DatasetBuildClosure clsr = new DatasetBuildClosure();
36 | //			clsr.runReflSymmTransClosure();
37 | 
38 | 		} else
39 | 			System.out.println("Second argument is {part1, part2}.");
40 | 
41 | 	}
42 | 
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/util/Bundle.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin.util;
 2 | 
 3 | import java.io.FileInputStream;
 4 | import java.io.IOException;
 5 | import java.util.MissingResourceException;
 6 | import java.util.ResourceBundle;
 7 | 
 8 | /**
 9 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
10 |  * 
11 |  */
12 | public class Bundle {
13 | 
14 | 	private static String bundleName;
15 | 	private static ResourceBundle resBundle;
16 | 
17 | 	public static String getBundleName() {
18 | 		return bundleName;
19 | 	}
20 | 
21 | 	public static void setBundleName(String bName) {
22 | 		bundleName = bName;
23 | 		resBundle = ResourceBundle.getBundle(bundleName);
24 | 	}
25 | 	
26 | 	public static int getArrayValue(String key, int pos) {
27 | 		return Integer.parseInt(getString(key).split(",")[pos]);
28 | 	}
29 | 
30 | 	public static String getString(String key) {
31 | 		try {
32 | 			return resBundle.getString(key);
33 | 		} catch (MissingResourceException | NullPointerException e) {
34 | 			String str = getConfig(key);
35 | 			if(str != null)
36 | 				return str;
37 | 			else
38 | 				return '!' + key + '!';
39 | 		}
40 | 	}
41 | 	
42 | 	private static String getConfig(String key) {
43 | 		// to load application's properties, we use this class
44 | 		java.util.Properties mainProperties = new java.util.Properties();
45 | 
46 | 		FileInputStream file;
47 | 
48 | 		// the base folder is ./, the root of the main.properties file
49 | 		String path = "./mandolin.properties";
50 | 
51 | 		try {
52 | 			// load the file handle for main.properties
53 | 			file = new FileInputStream(path);
54 | 
55 | 			// load all the properties from this file
56 | 			mainProperties.load(file);
57 | 
58 | 			// we have loaded the properties, so close the file handle
59 | 			file.close();
60 | 			// retrieve the property we are intrested
61 | 			return mainProperties.getProperty(key);
62 | 		} catch (IOException e) {
63 | 			return null;
64 | 		}
65 | 
66 | 	}
67 | 
68 | }
69 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/util/CustomQuoteMode.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin.util;
 2 | 
 3 | import org.supercsv.prefs.CsvPreference;
 4 | import org.supercsv.quote.NormalQuoteMode;
 5 | import org.supercsv.util.CsvContext;
 6 | 
 7 | /**
 8 |  * This SuperCSV preference mode adds quotes for strings containing a space.
 9 |  * 
10 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
11 |  * 
12 |  */
13 | public class CustomQuoteMode extends NormalQuoteMode {
14 | 	
15 | 	public boolean quotesRequired(String csvColumn, CsvContext context,
16 | 			CsvPreference preference) {
17 | 		if (csvColumn.contains(" "))
18 | 			return true;
19 | 		else
20 | 			return super.quotesRequired(csvColumn, context, preference);
21 | 	}
22 | 	
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/util/DataIO.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin.util;
 2 | 
 3 | import java.io.FileInputStream;
 4 | import java.io.FileNotFoundException;
 5 | import java.io.FileOutputStream;
 6 | import java.io.IOException;
 7 | import java.io.ObjectInputStream;
 8 | import java.io.ObjectOutputStream;
 9 | import java.util.ArrayList;
10 | import java.util.HashMap;
11 | 
12 | /**
13 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
14 |  *
15 |  */
16 | public class DataIO {
17 | 	
18 | 	public static <T> void serialize(ArrayList<T> list, String filepath) throws FileNotFoundException, IOException {
19 |         ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(filepath));
20 |         oos.writeObject(list);
21 |         oos.close();
22 | 	}
23 | 	
24 | 	public static <T, U> void serialize(HashMap<T, U> map, String filepath) throws FileNotFoundException, IOException {
25 |         ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(filepath));
26 |         oos.writeObject(map);
27 |         oos.close();
28 | 	}
29 | 
30 | 	@SuppressWarnings("unchecked")
31 | 	public static <T> ArrayList<T> readList(String filepath) throws FileNotFoundException, IOException, ClassNotFoundException {
32 |         ObjectInputStream ois = new ObjectInputStream(new FileInputStream(filepath));
33 |         ArrayList<T> list = (ArrayList<T>) ois.readObject();
34 |         ois.close();
35 | 		return list;
36 | 	}
37 | 
38 | 	@SuppressWarnings("unchecked")
39 | 	public static <T, U> HashMap<T, U> readMap(String filepath) throws FileNotFoundException, IOException, ClassNotFoundException {
40 |         ObjectInputStream ois = new ObjectInputStream(new FileInputStream(filepath));
41 |         HashMap<T, U> map = (HashMap<T, U>) ois.readObject();
42 |         ois.close();
43 | 		return map;
44 | 	}
45 | 
46 | }
47 | 
48 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/util/PostgreNotStartedException.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin.util;
 2 | 
 3 | /**
 4 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 5 |  *
 6 |  */
 7 | public class PostgreNotStartedException extends RuntimeException {
 8 | 
 9 | 	/**
10 | 	 * 
11 | 	 */
12 | 	private static final long serialVersionUID = 2109773019615897856L;
13 | 
14 | 	public PostgreNotStartedException() {
15 | 		super();
16 | 	}
17 | 	
18 | }
19 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/util/PrettyRandom.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin.util;
 2 | 
 3 | /**
 4 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 5 |  *
 6 |  */
 7 | public class PrettyRandom {
 8 | 
 9 | 	public static String get(int digits) {
10 | 		String r = "";
11 | 		while (r.equals("") || r.length() < digits)
12 | 			r = String.valueOf(Math.random()).substring(2);
13 | 		return r.substring(0, digits);
14 | 	}
15 | 	
16 | }
17 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/util/SetUtils.java:
--------------------------------------------------------------------------------
  1 | package org.aksw.mandolin.util;
  2 | 
  3 | import java.io.File;
  4 | import java.io.FileNotFoundException;
  5 | import java.io.FileOutputStream;
  6 | import java.util.TreeSet;
  7 | 
  8 | import org.apache.jena.riot.Lang;
  9 | import org.apache.jena.riot.RDFDataMgr;
 10 | import org.apache.jena.riot.system.StreamRDF;
 11 | import org.apache.jena.riot.system.StreamRDFWriter;
 12 | import org.apache.logging.log4j.LogManager;
 13 | import org.apache.logging.log4j.Logger;
 14 | 
 15 | import com.hp.hpl.jena.graph.Triple;
 16 | import com.hp.hpl.jena.sparql.core.Quad;
 17 | import com.hp.hpl.jena.vocabulary.OWL;
 18 | import com.hp.hpl.jena.vocabulary.RDF;
 19 | 
 20 | /**
 21 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 22 |  *
 23 |  */
 24 | public class SetUtils {
 25 | 
 26 | 	private final static Logger logger = LogManager.getLogger(SetUtils.class);
 27 | 	
 28 | 	/**
 29 | 	 * Set difference.
 30 | 	 * 
 31 | 	 * @param setA
 32 | 	 * @param setB
 33 | 	 * @param output
 34 | 	 */
 35 | 	public static void minus(String setA, String setB, String output) {
 36 | 		
 37 | 		TreeSet<String> setBindex = new TreeSet<>();
 38 | 		StreamRDF bStream = new StreamRDF() {
 39 | 	
 40 | 			@Override
 41 | 			public void start() {
 42 | 			}
 43 | 	
 44 | 			@Override
 45 | 			public void triple(Triple triple) {
 46 | 				setBindex.add(triple.toString());
 47 | 			}
 48 | 	
 49 | 			@Override
 50 | 			public void quad(Quad quad) {
 51 | 			}
 52 | 	
 53 | 			@Override
 54 | 			public void base(String base) {
 55 | 			}
 56 | 	
 57 | 			@Override
 58 | 			public void prefix(String prefix, String iri) {
 59 | 			}
 60 | 	
 61 | 			@Override
 62 | 			public void finish() {
 63 | 			}
 64 | 			
 65 | 		};
 66 | 		RDFDataMgr.parse(bStream, setB);
 67 | 		
 68 | 		final FileOutputStream out;
 69 | 		final StreamRDF outStream;
 70 | 		try {
 71 | 			out = new FileOutputStream(new File(output));
 72 | 			outStream = StreamRDFWriter.getWriterStream(out, Lang.NT);
 73 | 		} catch (FileNotFoundException e) {
 74 | 			logger.error(e.getMessage());
 75 | 			return;
 76 | 		}
 77 | 		
 78 | 		outStream.start();
 79 | 		
 80 | 		StreamRDF aStream = new StreamRDF() {
 81 | 	
 82 | 			@Override
 83 | 			public void start() {
 84 | 			}
 85 | 	
 86 | 			@Override
 87 | 			public void triple(Triple triple) {
 88 | 				boolean trivial = triple.getPredicate().hasURI(RDF.type.getURI()) 
 89 | 						&& triple.getObject().hasURI(OWL.Thing.getURI());
 90 | 				boolean known = setBindex.contains(triple.toString());
 91 | 				if(!known && !trivial) // save discovered triple
 92 | 					outStream.triple(triple);
 93 | 				logger.trace("\tknown=" + known + "\ttrivial=" + trivial + "\t" + triple.toString());
 94 | 			}
 95 | 			@Override
 96 | 			public void quad(Quad quad) {
 97 | 			}
 98 | 	
 99 | 			@Override
100 | 			public void base(String base) {
101 | 			}
102 | 	
103 | 			@Override
104 | 			public void prefix(String prefix, String iri) {
105 | 			}
106 | 	
107 | 			@Override
108 | 			public void finish() {
109 | 			}
110 | 			
111 | 		};
112 | 		RDFDataMgr.parse(aStream, setA);
113 | 		
114 | 		outStream.finish();
115 | 		
116 | 	}
117 | 
118 | 	/**
119 | 	 * @param setA
120 | 	 * @param setB
121 | 	 * @param output
122 | 	 */
123 | 	public static void union(String setA, String setB, String output) {
124 | 		
125 | 		final FileOutputStream out;
126 | 		final StreamRDF outStream;
127 | 		try {
128 | 			out = new FileOutputStream(new File(output));
129 | 			outStream = StreamRDFWriter.getWriterStream(out, Lang.NT);
130 | 		} catch (FileNotFoundException e) {
131 | 			logger.error(e.getMessage());
132 | 			return;
133 | 		}
134 | 		
135 | 		outStream.start();
136 | 		
137 | 		StreamRDF dataStream = new StreamRDF() {
138 | 	
139 | 			@Override
140 | 			public void start() {
141 | 			}
142 | 	
143 | 			@Override
144 | 			public void triple(Triple triple) {
145 | 				outStream.triple(triple);
146 | 			}
147 | 	
148 | 			@Override
149 | 			public void quad(Quad quad) {
150 | 			}
151 | 	
152 | 			@Override
153 | 			public void base(String base) {
154 | 			}
155 | 	
156 | 			@Override
157 | 			public void prefix(String prefix, String iri) {
158 | 			}
159 | 	
160 | 			@Override
161 | 			public void finish() {
162 | 			}
163 | 			
164 | 		};
165 | 		
166 | 		RDFDataMgr.parse(dataStream, setA);
167 | 		RDFDataMgr.parse(dataStream, setB);
168 | 		
169 | 		outStream.finish();
170 | 		
171 | 	}
172 | 
173 | 	public static void keepOnly(String relation, String in,
174 | 			String out) {
175 | 	
176 | 		final FileOutputStream output;
177 | 		try {
178 | 			output = new FileOutputStream(new File(out));
179 | 		} catch (FileNotFoundException e) {
180 | 			logger.error(e.getMessage());
181 | 			return;
182 | 		}
183 | 		
184 | 		final StreamRDF writer = StreamRDFWriter.getWriterStream(output, Lang.NT);		
185 | 		
186 | 		StreamRDF dataStream = new StreamRDF() {
187 | 	
188 | 			@Override
189 | 			public void start() {
190 | 				writer.start();
191 | 			}
192 | 	
193 | 			@Override
194 | 			public void quad(Quad quad) {
195 | 			}
196 | 	
197 | 			@Override
198 | 			public void base(String base) {
199 | 			}
200 | 	
201 | 			@Override
202 | 			public void prefix(String prefix, String iri) {
203 | 			}
204 | 	
205 | 			@Override
206 | 			public void finish() {
207 | 				writer.finish();
208 | 			}
209 | 			
210 | 			@Override
211 | 			public void triple(Triple triple) {
212 | 				if(triple.getPredicate().getURI().equals(relation))
213 | 					writer.triple(triple);
214 | 			}
215 | 			
216 | 		};
217 | 		
218 | 		RDFDataMgr.parse(dataStream, in);		
219 | 	}
220 | 
221 | }
222 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/util/Shell.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin.util;
 2 | 
 3 | import java.io.BufferedReader;
 4 | import java.io.InputStreamReader;
 5 | 
 6 | import org.apache.logging.log4j.LogManager;
 7 | import org.apache.logging.log4j.Logger;
 8 | 
 9 | /**
10 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
11 |  *
12 |  */
13 | public class Shell {
14 | 
15 | 	private final static Logger logger = LogManager.getLogger(Shell.class);
16 | 
17 | 	/**
18 | 	 * Execute a command which expects an output.
19 | 	 * 
20 | 	 * @param command
21 | 	 * @param show
22 | 	 * @return
23 | 	 */
24 | 	public static String execute(String command, boolean show) {
25 | 		StringBuffer sb = new StringBuffer();
26 | 		Process p;
27 | 		try {
28 | 			p = Runtime.getRuntime().exec(command);
29 | 			p.waitFor();
30 | 
31 | 			BufferedReader reader = new BufferedReader(new InputStreamReader(
32 | 					p.getInputStream()));
33 | 
34 | 			String line = "";
35 | 			while ((line = reader.readLine()) != null) {
36 | 				if (show)
37 | 					logger.debug(line);
38 | 				sb.append(line + "\n");
39 | 			}
40 | 		} catch (Exception e) {
41 | 			e.printStackTrace();
42 | 		}
43 | 		return sb.toString();
44 | 	}
45 | 
46 | 	/**
47 | 	 * Execute a command which expects no output.
48 | 	 * 
49 | 	 * @param command
50 | 	 * @return
51 | 	 */
52 | 	public static String execute(String command) {
53 | 		return execute(command, false);
54 | 	}
55 | 
56 | 	/**
57 | 	 * @param args
58 | 	 */
59 | 	public static void main(String[] args) {
60 | 		logger.debug("Streamed:");
61 | 		String output = execute("ls -l", true);
62 | 		logger.debug("\nBuffered:\n" + output);
63 | 	}
64 | 
65 | }
66 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/util/StringClean.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin.util;
 2 | 
 3 | /**
 4 |  * @author Tommaso Soru <>
 5 |  *
 6 |  */
 7 | public class StringClean {
 8 | 	
 9 | 	public static String clean(String string) {
10 | 		return string.replaceAll("[^\\dA-Za-z]", "");
11 | 	}
12 | 	
13 | 	public static String oneRow(String string) {
14 | 		return string.replaceAll("\n", " ").replaceAll("\t", " ");
15 | 	}
16 | 
17 | }


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/util/Timer.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin.util;
 2 | 
 3 | import java.util.ArrayList;
 4 | 
 5 | import org.apache.log4j.Logger;
 6 | 
 7 | /**
 8 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 9 |  *
10 |  */
11 | public class Timer {
12 | 	
13 | 	private final static Logger LOGGER = Logger.getLogger("ROCKER");
14 | 	
15 | 	private ArrayList<Long> stops;
16 | 	
17 | 	public Timer() {
18 | 		stops = new ArrayList<>();
19 | 		stops.add(System.currentTimeMillis());
20 | 	}
21 | 	
22 | 	public void lap() {
23 | 		stops.add(System.currentTimeMillis());
24 | 	}
25 | 	
26 | 	public double getLapSeconds(int lap) {
27 | 		if(stops.size() < 2)
28 | 			return Double.NaN;
29 | 		return (stops.get(lap + 1) - stops.get(lap)) / 1000.0;
30 | 	}
31 | 
32 | 	public double getLapMillis(int lap) {
33 | 		if(stops.size() < 2)
34 | 			return Double.NaN;
35 | 		return (stops.get(lap + 1) - stops.get(lap));
36 | 	}
37 | 	
38 | 	public double getLastLapSeconds() {
39 | 		if(stops.size() < 2)
40 | 			return Double.NaN;
41 | 		return (stops.get(stops.size() - 1) - stops.get(stops.size() - 2)) / 1000.0;
42 | 	}
43 | 
44 | 	public double getLastLapMillis() {
45 | 		if(stops.size() < 2)
46 | 			return Double.NaN;
47 | 		return stops.get(stops.size() - 1) - stops.get(stops.size() - 2);
48 | 	}
49 | 	
50 | 	public int getSize() {
51 | 		return stops.size() - 1;
52 | 	}
53 | 	
54 | 	public static void main(String[] args) throws InterruptedException {
55 | 		Timer t = new Timer();
56 | 		Thread.sleep(1000);
57 | 		t.lap();
58 | 		LOGGER.info(t.getLastLapMillis());
59 | 		LOGGER.info(t.getLapMillis(0));
60 | 		Thread.sleep(500);
61 | 		t.lap();
62 | 		LOGGER.info(t.getLastLapSeconds());
63 | 		LOGGER.info(t.getLapSeconds(1));
64 | 	}
65 | 
66 | }
67 | 
68 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/util/URIHandler.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin.util;
 2 | 
 3 | import org.apache.logging.log4j.LogManager;
 4 | import org.apache.logging.log4j.Logger;
 5 | 
 6 | import com.hp.hpl.jena.graph.Node;
 7 | 
 8 | /**
 9 |  * Add blank-node support.
10 |  * 
11 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
12 |  *
13 |  */
14 | public class URIHandler {
15 | 	
16 | 	private final static Logger logger = LogManager.getLogger(URIHandler.class);
17 | 	
18 | 	public static String parse(Node r) {
19 | 		String s;
20 | 		try {
21 | 			s = r.getURI();
22 | 		} catch (UnsupportedOperationException e) {
23 | 			logger.debug(e.getMessage());
24 | 			s = r.getBlankNodeLabel();
25 | 			logger.debug("Changing to "+s);
26 | 		}
27 | 		return s;
28 | 	}
29 | 
30 | }
31 | 


--------------------------------------------------------------------------------
/src/main/java/org/aksw/mandolin/util/URLs.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin.util;
 2 | 
 3 | /**
 4 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 5 |  *
 6 |  */
 7 | public class URLs {
 8 | 
 9 | 	public static final String 
10 | 		RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
11 | 		RDF_CLASS = "http://www.w3.org/2000/01/rdf-schema#Class",
12 | 		OWL_CLASS = "http://www.w3.org/2002/07/owl#Class",
13 | 		RDFS_SUBCLASSOF = "http://www.w3.org/2000/01/rdf-schema#subClassOf",
14 | 		OWL_SAMEAS = "http://www.w3.org/2002/07/owl#sameAs";
15 | 	
16 | }
17 | 


--------------------------------------------------------------------------------
/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | # Set root logger level to DEBUG and its only appender to A1.
 2 | log4j.rootLogger=INFO, A1
 3 | log4j.logger.org.apache.jena.riot=ERROR, A1
 4 | log4j.logger.org.slf4j.impl.Log4jLoggerAdapter=ERROR, A1
 5 | 
 6 | # A1 is set to be a ConsoleAppender.
 7 | log4j.appender.A1=org.apache.log4j.ConsoleAppender
 8 | 
 9 | # A1 uses PatternLayout.
10 | log4j.appender.A1.layout=org.apache.log4j.PatternLayout
11 | log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
12 | 
13 | 


--------------------------------------------------------------------------------
/src/main/resources/log4j2.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <Configuration status="INFO" name="example-config">
 3 | 	<Properties>
 4 | 		<Property name="PATTERN">%-5level %d [%t] %c:%M(%L): %m%n</Property>
 5 | 		<Property name="LOG_DIR">logs</Property>
 6 | 		<Property name="ARCHIVE">${LOG_DIR}/archive</Property>
 7 | 	</Properties>
 8 | 	<Appenders>
 9 | 		<Console name="STDOUT" target="SYSTEM_OUT">
10 | 			<PatternLayout pattern="${PATTERN}" />
11 | 		</Console>
12 | 		<RollingFile name="fileWriter" fileName="${LOG_DIR}/mandolin.log"
13 | 			filePattern="${ARCHIVE}/mandolin.log.%d{yyyy-MM-dd-hh-mm}.gz">
14 | 			<PatternLayout pattern="${PATTERN}" />
15 | 			<TimeBasedTriggeringPolicy />
16 | 		</RollingFile>
17 | 	</Appenders>
18 | 	<Loggers>
19 | 		<Root level="INFO">
20 | 			<AppenderRef ref="STDOUT" />
21 | 			<AppenderRef ref="fileWriter" />
22 | 		</Root>
23 | 	</Loggers>
24 | 
25 | </Configuration>


--------------------------------------------------------------------------------
/src/main/resources/publications.properties:
--------------------------------------------------------------------------------
 1 | ############## Required Settings #############
 2 | 
 3 | mapping_files=mappings/dblp-acm.csv
 4 | csv_link_type=http://www.w3.org/2002/07/owl#sameAs
 5 | local_namespace=http://aksw.org/Groups/SIMBA/SemSRL/
 6 | 
 7 | # can be {1, 2}
 8 | query_depth=1
 9 | 
10 | ################ Data Sources ################
11 | 
12 | dblp_namespace=http://dblp.rkbexplorer.com/id/
13 | dblp_store_type=sparql
14 | dblp_store_path=http://dblp.rkbexplorer.com/sparql
15 | 
16 | acm_namespace=http://acm.rkbexplorer.com/id/
17 | acm_store_type=sparql
18 | acm_store_path=http://acm.rkbexplorer.com/sparql
19 | 
20 | scholar_namespace=http://scholar.google.com/id/
21 | scholar_store_type=csv
22 | scholar_store_path=datasets/scholar.csv
23 | 
24 | ############### Do Not Modify ################
25 | 
26 | owl_same_as=http://www.w3.org/2002/07/owl#sameAs
27 | 


--------------------------------------------------------------------------------
/src/test/java/org/aksw/mandolin/MandolinTest.java:
--------------------------------------------------------------------------------
 1 | package org.aksw.mandolin;
 2 | 
 3 | import static org.junit.Assert.fail;
 4 | 
 5 | import org.junit.Test;
 6 | 
 7 | /**
 8 |  * @author Tommaso Soru <tsoru@informatik.uni-leipzig.de>
 9 |  *
10 |  */
11 | public class MandolinTest {
12 | 
13 | 	@Test
14 | 	public void aimRelation() throws Exception {
15 | 
16 | 		String theArgs = "--output eval/mandolin-test --input AKSW-one-out.nt "
17 | 				+ "--aim http://mandolin.aksw.org/example/topic";
18 | 		
19 | 		run(theArgs);
20 | 		
21 | 	}
22 | 
23 | 	@Test
24 | 	public void aimAnything() throws Exception {
25 | 
26 | 		String theArgs = "--output eval/mandolin-test --input AKSW-one-out.nt "
27 | 				+ "--aim *";
28 | 
29 | 		run(theArgs);
30 | 				
31 | 	}
32 | 
33 | 	private void run(String theArgs) {
34 | 		
35 | 		String[] theArgsArray = theArgs.split(" ");
36 | 		
37 | 		try {
38 | 			Mandolin.main(theArgsArray);
39 | 		} catch (Exception e) {
40 | 			fail();
41 | 		}
42 | 		
43 | 	}
44 | 	
45 | }
46 | 


--------------------------------------------------------------------------------
/src/test/resources/AKSW-one-out.nt:
--------------------------------------------------------------------------------
1 | <http://mandolin.aksw.org/example/Edgard> <http://mandolin.aksw.org/example/supervisor> <http://mandolin.aksw.org/example/Soeren> .
2 | <http://mandolin.aksw.org/example/Tommaso> <http://mandolin.aksw.org/example/supervisor> <http://mandolin.aksw.org/example/Axel> .
3 | <http://mandolin.aksw.org/example/Kleanthi> <http://mandolin.aksw.org/example/supervisor> <http://mandolin.aksw.org/example/Axel> .
4 | <http://mandolin.aksw.org/example/Edgard> <http://mandolin.aksw.org/example/topic> <http://mandolin.aksw.org/example/QuestionAnswering> .
5 | <http://mandolin.aksw.org/example/Soeren> <http://mandolin.aksw.org/example/topic> <http://mandolin.aksw.org/example/QuestionAnswering> .
6 | <http://mandolin.aksw.org/example/Kleanthi> <http://mandolin.aksw.org/example/topic> <http://mandolin.aksw.org/example/LinkDiscovery> .
7 | <http://mandolin.aksw.org/example/Axel> <http://mandolin.aksw.org/example/topic> <http://mandolin.aksw.org/example/LinkDiscovery> .
8 | 


--------------------------------------------------------------------------------
/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | # Set root logger level to DEBUG and its only appender to A1.
 2 | log4j.rootLogger=INFO, A1
 3 | log4j.logger.org.apache.jena.riot=ERROR, A1
 4 | log4j.logger.org.slf4j.impl.Log4jLoggerAdapter=ERROR, A1
 5 | 
 6 | # A1 is set to be a ConsoleAppender.
 7 | log4j.appender.A1=org.apache.log4j.ConsoleAppender
 8 | 
 9 | # A1 uses PatternLayout.
10 | log4j.appender.A1.layout=org.apache.log4j.PatternLayout
11 | log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
12 | 
13 | 


--------------------------------------------------------------------------------
/src/test/resources/log4j2.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <Configuration status="TRACE" name="example-config">
 3 | 	<Properties>
 4 | 		<Property name="PATTERN">%-5level %d [%t] %c:%M(%L): %m%n</Property>
 5 | 		<Property name="LOG_DIR">logs</Property>
 6 | 		<Property name="ARCHIVE">${LOG_DIR}/archive</Property>
 7 | 	</Properties>
 8 | 	<Appenders>
 9 | 		<Console name="STDOUT" target="SYSTEM_OUT">
10 | 			<PatternLayout pattern="${PATTERN}" />
11 | 		</Console>
12 | 		<RollingFile name="fileWriter" fileName="${LOG_DIR}/mandolin.log"
13 | 			filePattern="${ARCHIVE}/mandolin.log.%d{yyyy-MM-dd-hh-mm}.gz">
14 | 			<PatternLayout pattern="${PATTERN}" />
15 | 			<TimeBasedTriggeringPolicy />
16 | 		</RollingFile>
17 | 	</Appenders>
18 | 	<Loggers>
19 | 		<Root level="TRACE">
20 | 			<AppenderRef ref="STDOUT" />
21 | 			<AppenderRef ref="fileWriter" />
22 | 		</Root>
23 | 	</Loggers>
24 | 
25 | </Configuration>


--------------------------------------------------------------------------------