├── sansa-examples-flink
    ├── config
    │   ├── csswrapper
    │   │   ├── .gitignore
    │   │   ├── Makefile
    │   │   ├── Dockerfile
    │   │   ├── scripts
    │   │   │   └── inject-etc-hosts.sh
    │   │   └── frontend
    │   │   │   ├── css
    │   │   │       ├── bde-hdfs.css
    │   │   │       ├── bde-hadoop.css
    │   │   │       └── bde-flink.css
    │   │   │   └── default.conf
    │   ├── integrator
    │   │   └── user-interfaces
    │   └── hadoop
    │   │   └── hadoop.env
    ├── Dockerfile
    ├── .gitignore
    ├── src
    │   └── main
    │   │   ├── resources
    │   │       ├── Clustering_sampledata.nt
    │   │       ├── ont_functional.owl
    │   │       └── ont_manchester.owl
    │   │   └── scala
    │   │       └── net
    │   │           └── sansa_stack
    │   │               └── examples
    │   │                   └── flink
    │   │                       ├── rdf
    │   │                           ├── TripleReader.scala
    │   │                           ├── TripleWriter.scala
    │   │                           ├── RDFStats.scala
    │   │                           └── TripleOps.scala
    │   │                       ├── ml
    │   │                           └── clustering
    │   │                           │   └── RDFByModularityClustering.scala
    │   │                       ├── owl
    │   │                           └── OWLReaderDataSet.scala
    │   │                       └── inference
    │   │                           └── RDFGraphInference.scala
    ├── README.md
    ├── docker-compose.yml
    └── pom.xml
├── sansa-examples-spark
    ├── src
    │   └── main
    │   │   ├── resources
    │   │       ├── datalake
    │   │       │   ├── data
    │   │       │   │   ├── offer.csv
    │   │       │   │   │   ├── ._SUCCESS.crc
    │   │       │   │   │   ├── part-00001-86272586-5266-423f-b936-829019bf5d8c.csv
    │   │       │   │   │   ├── .part-00001-86272586-5266-423f-b936-829019bf5d8c.csv.crc
    │   │       │   │   │   └── .part-00000-86272586-5266-423f-b936-829019bf5d8c.csv.crc
    │   │       │   │   ├── person.csv
    │   │       │   │   │   ├── ._SUCCESS.crc
    │   │       │   │   │   ├── part-00001-212c906c-dbf7-4fb8-8f79-a254f2d587f5.csv
    │   │       │   │   │   ├── .part-00001-212c906c-dbf7-4fb8-8f79-a254f2d587f5.csv.crc
    │   │       │   │   │   ├── .part-00000-212c906c-dbf7-4fb8-8f79-a254f2d587f5.csv.crc
    │   │       │   │   │   └── part-00000-212c906c-dbf7-4fb8-8f79-a254f2d587f5.csv
    │   │       │   │   ├── product.csv
    │   │       │   │   │   ├── ._SUCCESS.crc
    │   │       │   │   │   ├── part-00001-62a2679b-bfab-43fe-80d3-d775b14cf02c.csv
    │   │       │   │   │   ├── .part-00001-62a2679b-bfab-43fe-80d3-d775b14cf02c.csv.crc
    │   │       │   │   │   ├── .part-00000-62a2679b-bfab-43fe-80d3-d775b14cf02c.csv.crc
    │   │       │   │   │   └── part-00000-62a2679b-bfab-43fe-80d3-d775b14cf02c.csv
    │   │       │   │   ├── review.csv
    │   │       │   │   │   ├── ._SUCCESS.crc
    │   │       │   │   │   ├── part-00001-0e3df86e-9a8f-4cf3-837d-dfc1c92b5aba.csv
    │   │       │   │   │   ├── .part-00001-0e3df86e-9a8f-4cf3-837d-dfc1c92b5aba.csv.crc
    │   │       │   │   │   └── .part-00000-0e3df86e-9a8f-4cf3-837d-dfc1c92b5aba.csv.crc
    │   │       │   │   └── producer.csv
    │   │       │   │   │   └── producer.csv
    │   │       │   ├── queries
    │   │       │   │   └── Q1.sparql
    │   │       │   ├── config
    │   │       │   └── mappings.ttl
    │   │       ├── BorderFlow_Sample1.txt
    │   │       ├── metrics.conf
    │   │       ├── log4j.properties
    │   │       ├── Clustering_sampledata.nt
    │   │       ├── ont_functional.owl
    │   │       └── ont_manchester.owl
    │   │   └── scala
    │   │       └── net
    │   │           └── sansa_stack
    │   │               └── examples
    │   │                   └── spark
    │   │                       ├── rdf
    │   │                           ├── TripleReader.scala
    │   │                           ├── TripleWriter.scala
    │   │                           ├── RDFStats.scala
    │   │                           ├── PageRank.scala
    │   │                           ├── TripleOps.scala
    │   │                           └── RDFQualityAssessment.scala
    │   │                       ├── query
    │   │                           ├── HDTQuery.scala
    │   │                           ├── Semantic.scala
    │   │                           ├── DataLake.scala
    │   │                           ├── Sparklify.scala
    │   │                           └── GraphQuery.scala
    │   │                       ├── ml
    │   │                           ├── clustering
    │   │                           │   ├── BorderFlowClustering.scala
    │   │                           │   ├── SilviaClustering.scala
    │   │                           │   ├── RDFByModularityClustering.scala
    │   │                           │   └── RDFGraphPIClustering.scala
    │   │                           ├── kernel
    │   │                           │   └── RDFGraphKernel.scala
    │   │                           ├── mining
    │   │                           │   └── MineRules.scala
    │   │                           ├── kge
    │   │                           │   └── CrossValidation.scala
    │   │                           └── outliers
    │   │                           │   └── anomalydetection
    │   │                           │       └── AnomalyDetection.scala
    │   │                       ├── owl
    │   │                           ├── OWLReaderRDD.scala
    │   │                           └── OWLReaderDataset.scala
    │   │                       └── inference
    │   │                           ├── axioms
    │   │                               └── RDFGraphInference.scala
    │   │                           └── triples
    │   │                               └── RDFGraphInference.scala
    ├── config
    │   ├── csswrapper
    │   │   ├── Makefile
    │   │   ├── Dockerfile
    │   │   ├── scripts
    │   │   │   └── inject-etc-hosts.sh
    │   │   └── frontend
    │   │   │   ├── bde-css
    │   │   │       ├── bde-hdfs.css
    │   │   │       ├── bde-spark-master.css
    │   │   │       └── bde-hadoop.css
    │   │   │   └── default.conf
    │   ├── integrator
    │   │   └── user-interfaces
    │   └── hadoop
    │   │   └── hadoop.env
    ├── .gitignore
    ├── Dockerfile
    ├── docker-compose-sansa-examples.yml
    ├── README.md
    └── docker-compose.yml
├── .travis.yml
├── .gitignore
├── run-examples-wip.sh
├── README.md
├── pom.xml
└── LICENSE


/sansa-examples-flink/config/csswrapper/.gitignore:
--------------------------------------------------------------------------------
1 | data/
2 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/data/offer.csv/._SUCCESS.crc:
--------------------------------------------------------------------------------
1 | crc    


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/data/person.csv/._SUCCESS.crc:
--------------------------------------------------------------------------------
1 | crc    


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/data/product.csv/._SUCCESS.crc:
--------------------------------------------------------------------------------
1 | crc    


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/data/review.csv/._SUCCESS.crc:
--------------------------------------------------------------------------------
1 | crc    


--------------------------------------------------------------------------------
/sansa-examples-flink/config/csswrapper/Makefile:
--------------------------------------------------------------------------------
1 | hosts:
2 | 	bash scripts/inject-etc-hosts.sh
3 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/config/csswrapper/Makefile:
--------------------------------------------------------------------------------
1 | hosts:
2 | 	bash scripts/inject-etc-hosts.sh
3 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/data/offer.csv/part-00001-86272586-5266-423f-b936-829019bf5d8c.csv:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/data/person.csv/part-00001-212c906c-dbf7-4fb8-8f79-a254f2d587f5.csv:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/data/product.csv/part-00001-62a2679b-bfab-43fe-80d3-d775b14cf02c.csv:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/data/review.csv/part-00001-0e3df86e-9a8f-4cf3-837d-dfc1c92b5aba.csv:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/data/offer.csv/.part-00001-86272586-5266-423f-b936-829019bf5d8c.csv.crc:
--------------------------------------------------------------------------------
1 | crc    


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/data/person.csv/.part-00001-212c906c-dbf7-4fb8-8f79-a254f2d587f5.csv.crc:
--------------------------------------------------------------------------------
1 | crc    


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/data/product.csv/.part-00001-62a2679b-bfab-43fe-80d3-d775b14cf02c.csv.crc:
--------------------------------------------------------------------------------
1 | crc    


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/data/review.csv/.part-00001-0e3df86e-9a8f-4cf3-837d-dfc1c92b5aba.csv.crc:
--------------------------------------------------------------------------------
1 | crc    


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/BorderFlow_Sample1.txt:
--------------------------------------------------------------------------------
 1 | 52	1412	
 2 | 53	2542	
 3 | 48	52	
 4 | 47	2385	
 5 | 46	46	
 6 | 48	1412	
 7 | 48	46	
 8 | 4315	48	
 9 | 481	1412	
10 | 1412	52	
11 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: scala
 2 | sudo: false
 3 | cache:
 4 |   directories:
 5 |   - $HOME/.m2
 6 | scala:
 7 |   - 2.11.11
 8 | jdk:
 9 |   - openjdk8
10 | script:
11 |   - mvn scalastyle:check
12 |   - mvn clean install 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/data/offer.csv/.part-00000-86272586-5266-423f-b936-829019bf5d8c.csv.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SANSA-Stack/Archived-SANSA-Examples/HEAD/sansa-examples-spark/src/main/resources/datalake/data/offer.csv/.part-00000-86272586-5266-423f-b936-829019bf5d8c.csv.crc


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/data/person.csv/.part-00000-212c906c-dbf7-4fb8-8f79-a254f2d587f5.csv.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SANSA-Stack/Archived-SANSA-Examples/HEAD/sansa-examples-spark/src/main/resources/datalake/data/person.csv/.part-00000-212c906c-dbf7-4fb8-8f79-a254f2d587f5.csv.crc


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/data/review.csv/.part-00000-0e3df86e-9a8f-4cf3-837d-dfc1c92b5aba.csv.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SANSA-Stack/Archived-SANSA-Examples/HEAD/sansa-examples-spark/src/main/resources/datalake/data/review.csv/.part-00000-0e3df86e-9a8f-4cf3-837d-dfc1c92b5aba.csv.crc


--------------------------------------------------------------------------------
/sansa-examples-spark/.gitignore:
--------------------------------------------------------------------------------
 1 | target/
 2 | pom.xml.tag
 3 | pom.xml.releaseBackup
 4 | pom.xml.versionsBackup
 5 | pom.xml.next
 6 | release.properties
 7 | dependency-reduced-pom.xml
 8 | buildNumber.properties
 9 | *.iml
10 | /graph
11 | *.idea
12 | .cache-main
13 | .classpath
14 | .project
15 | .settings
16 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/data/product.csv/.part-00000-62a2679b-bfab-43fe-80d3-d775b14cf02c.csv.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SANSA-Stack/Archived-SANSA-Examples/HEAD/sansa-examples-spark/src/main/resources/datalake/data/product.csv/.part-00000-62a2679b-bfab-43fe-80d3-d775b14cf02c.csv.crc


--------------------------------------------------------------------------------
/sansa-examples-flink/config/csswrapper/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nginx
2 | 
3 | MAINTAINER "Aad Versteden <aad.versteden@gmail.com>
4 | MAINTAINER "Ivan Ermilov <mailto:ivan.s.ermilov@gmail.com>"
5 | MAINTAINER "Gezim Sejdiu <g.sejdiu@gmail.com>"
6 | 
7 | COPY frontend/default.conf /etc/nginx/conf.d/default.conf
8 | COPY frontend/css /data/bde-css
9 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/config/csswrapper/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nginx
2 | 
3 | MAINTAINER "Aad Versteden <aad.versteden@gmail.com>
4 | MAINTAINER "Ivan Ermilov <mailto:ivan.s.ermilov@gmail.com>"
5 | MAINTAINER "Gezim Sejdiu <g.sejdiu@gmail.com>"
6 | 
7 | COPY frontend/default.conf /etc/nginx/conf.d/default.conf
8 | COPY frontend/bde-css /data/bde-css
9 | 


--------------------------------------------------------------------------------
/sansa-examples-flink/config/csswrapper/scripts/inject-etc-hosts.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | echo "# SANSA-Examples-Flink-data-integrator-ui" | sudo tee -a /etc/hosts
4 | echo "127.0.0.1  hdfs.demo.sansa-stack.local  hue.demo.sansa-stack.local flink-master.demo.sansa-stack.local flink-worker.demo.sansa-stack.local demo.sansa-stack.local" | sudo tee -a /etc/hosts;
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/config/csswrapper/scripts/inject-etc-hosts.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | echo "# SANSA-Examples-Spark-data-integrator-ui" | sudo tee -a /etc/hosts
4 | echo "127.0.0.1  hdfs.demo.sansa-stack.local  hue.demo.sansa-stack.local spark-master.demo.sansa-stack.local spark-worker.demo.sansa-stack.local demo.sansa-stack.local " | sudo tee -a /etc/hosts;
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/sansa-examples-flink/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM bde2020/flink-maven-template:1.1.3-hadoop2.7
2 | 
3 | MAINTAINER Gezim Sejdiu <g.sejdiu@gmail.com>
4 | 
5 | ENV FLINK_APPLICATION_JAR_NAME sansa-examples-flink-1.1-with-dependencies
6 | ENV FLINK_APPLICATION_MAIN_CLASS net.sansa_stack.examples.flink.rdf.TripleReader
7 | ENV FLINK_APPLICATION_ARGS "hdfs://namenode:8020/user/root/input/rdf.nt"
8 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/data/producer.csv/producer.csv:
--------------------------------------------------------------------------------
1 | nr,label,comment,homepage,country,publisher,publishDate
2 | 1,"enzymologist neb falsehoods","smashes leavening beauticians novitiates peaks nonhistoric fluorinations seductresses promotions corresponding denuder wispier laboriousness mechanisms skepsis tulips barstools demobs bandmasters pallbearer","http://www.Producer1.com/","DE",1,"2003-06-15"
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.class
 2 | *.log
 3 | 
 4 | # sbt specific
 5 | .cache
 6 | .settings
 7 | .history
 8 | .lib/
 9 | .project
10 | dist/*
11 | target/
12 | lib_managed/
13 | src_managed/
14 | project/boot/
15 | project/plugins/project/
16 | 
17 | # Scala-IDE specific
18 | .scala_dependencies
19 | .worksheet
20 | .idea/
21 | *.iml
22 | 
23 | deptree.txt
24 | scalastyle-output.xml
25 | 
26 | 
27 | # Files generated for spark runs
28 | sansa-examples-spark/data/
29 | 
30 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM bde2020/spark-java-template:2.0.1-hadoop2.7
 2 | 
 3 | MAINTAINER Gezim Sejdiu <g.sejdiu@gmail.com>
 4 | 
 5 | ENV SPARK_APPLICATION_JAR_NAME sansa-examples-spark-1.1-with-dependencies
 6 | ENV SPARK_APPLICATION_MAIN_CLASS net.sansa_stack.examples.spark.rdf.TripleReader
 7 | ENV SPARK_APPLICATION_ARGS "hdfs://namenode:8020/user/hue/input/rdf.nt hdfs://namenode:8020/user/hue/output/result.nt"
 8 | 
 9 | ENV HDFS_URL=hdfs://hdfs:9000
10 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/metrics.conf:
--------------------------------------------------------------------------------
 1 | # This configuration file contains the settings for the assessment.
 2 | rdf.qualityassessment.dataset.prefixes=["http://dbpedia.org/"]
 3 | 
 4 | rdf.qualityassessment.dataset.subject="http://dbpedia.org/ontology/Person"
 5 | rdf.qualityassessment.dataset.property="http://commons.dbpedia.org/property/source"
 6 | 
 7 | rdf.qualityassessment.dataset.lowerBound=0.1
 8 | rdf.qualityassessment.dataset.upperBound=0.9
 9 | 
10 | rdf.qualityassessment.dataset.shortUri.threshold = 95


--------------------------------------------------------------------------------
/sansa-examples-spark/docker-compose-sansa-examples.yml:
--------------------------------------------------------------------------------
 1 | version: '2' 
 2 | services:
 3 |  sansa-examples:
 4 |   build: .
 5 |   hostname: sansa-examples
 6 |   container_name: sansa-examples
 7 |   networks:
 8 |     - hadoop
 9 |   environment:
10 |       - HDFS_URL=hdfs://namenode:8020
11 |       - SPARK_APPLICATION_ARGS = $SPARK_APPLICATION_ARGS
12 |       - SPARK_APPLICATION_MAIN_CLASS = $SPARK_APPLICATION_MAIN_CLASS
13 | #  env_file:
14 | #    - ./config/hadoop/hadoop.env
15 | #  links:
16 | #   - "spark-master"
17 | 
18 | networks:
19 |   hadoop:
20 |     external: true
21 | 


--------------------------------------------------------------------------------
/sansa-examples-flink/.gitignore:
--------------------------------------------------------------------------------
 1 | # use glob syntax.
 2 | syntax: glob
 3 | *.ser
 4 | *.class
 5 | *~
 6 | *.bak
 7 | #*.off
 8 | *.old
 9 | 
10 | # eclipse conf file
11 | .settings
12 | .classpath
13 | .project
14 | .manager
15 | .scala_dependencies
16 | 
17 | # idea
18 | .idea
19 | *.iml
20 | 
21 | # building
22 | target
23 | build
24 | null
25 | tmp*
26 | temp*
27 | dist
28 | test-output
29 | build.log
30 | 
31 | # other scm
32 | .svn
33 | .CVS
34 | .hg*
35 | 
36 | # switch to regexp syntax.
37 | #  syntax: regexp
38 | #  ^\.pc/
39 | 
40 | #SHITTY output not in target directory
41 | build.log
42 | 


--------------------------------------------------------------------------------
/run-examples-wip.sh:
--------------------------------------------------------------------------------
 1 | LAYER=query
 2 | EXAMPLE=Sparklify
 3 | 
 4 | JAR=`ls sansa-examples-spark/target/sansa-examples-spark_*-dist.jar`
 5 | BASE_URL="file://"`pwd`"/"
 6 | echo "Using jar file $JAR"
 7 | echo "Base URL: $BASE_URL"
 8 | 
 9 | spark-submit \
10 |   	--class net.sansa_stack.examples.spark.$LAYER.$EXAMPLE \
11 |   	--master spark://spark-master:7077 \
12 |   	"$JAR" -i "$BASE_URL/sansa-examples-spark/src/main/resources/rdf.nt" \
13 | 
14 | # TODO Validate the output
15 | curl -LH 'Accept: application/sparql-results+json' 'http://localhost:7531/sparql?query=SELECT%20%2A%20%7B%20%3Fs%20%3Fp%20%3Fo%20%7D'
16 | 
17 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/data/person.csv/part-00000-212c906c-dbf7-4fb8-8f79-a254f2d587f5.csv:
--------------------------------------------------------------------------------
1 | nr,name,mbox_sha1sum,country,publisher,publishDate
2 | 1,'Ruggiero-Delane','fb3efd92e3c7a8d775a895ba476e11a3e8f3fac','US',1,2008-09-05
3 | 2,'Eyana-Aurelianus','df1cf8e68d49e5b65f1507dbecec6b61e9dc98','JP',1,2008-08-07
4 | 3,'Danijela-Adalbrand','9b9d4b8dcf7ada3c181b4bed1fa3c53d29caf65','US',1,2008-07-21
5 | 4,'Allegra-Walburga','619b2f69a01a7d86c0eca3f5e910c5b559ff3a','RU',1,2008-06-23
6 | 5,'Przemek-Berte','c3b1c82511908f706153319688a7a5599b8ad8c0','ES',1,2008-08-19
7 | 6,'Caryn','d6deee088e99af0f7c65fb7cca9bdfbbe3d7343','CN',1,2008-06-29
8 | 


--------------------------------------------------------------------------------
/sansa-examples-flink/config/csswrapper/frontend/css/bde-hdfs.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |     padding: 0;
 3 | }
 4 | 
 5 | .navigator {
 6 | 	position: fixed;
 7 |     top: 0;
 8 |     left: 0;
 9 |     width: 100%;
10 |     height: 50px;
11 |     margin: 0 auto;
12 |     padding: 10px;
13 |     background: #A94F74;
14 |     box-sizing: border-box;
15 | }
16 | 
17 | .navbar {
18 |     position: relative;
19 |     top: auto;
20 |     margin-top: 50px;
21 | }
22 | 
23 | .container-fluid .card {
24 |  	width: 1280px;
25 |     margin: 2rem auto;
26 |     padding: 1rem;
27 |     border: none;
28 |     background: white;
29 |     box-shadow: 0 2px 5px 0 rgba(0,0,0,0.16), 0 2px 10px 0 rgba(0,0,0,0.12);   
30 | }


--------------------------------------------------------------------------------
/sansa-examples-flink/config/integrator/user-interfaces:
--------------------------------------------------------------------------------
 1 | { "data": [
 2 |   {
 3 |     "id": 1,
 4 |     "type": "user-interfaces",
 5 |     "attributes": {
 6 |       "label": "Apache Flink Dashboard",
 7 |       "base-url": "http://flink-master.demo.sansa-stack.local"
 8 |     }
 9 |   },
10 |   {
11 |     "id": 2,
12 |     "type": "user-interfaces",
13 |     "attributes": {
14 |       "label": "HDFS",
15 |       "base-url": "http://hdfs.demo.sansa-stack.local"
16 |     }
17 |   },
18 |   {
19 |     "id": 3,
20 |     "type": "user-interfaces",
21 |     "attributes": {
22 |       "label": "Hue",
23 |       "base-url": "http://hue.demo.sansa-stack.local",
24 |       "append-path": "/home"
25 |     }
26 |   }
27 | ]
28 | }
29 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/config/csswrapper/frontend/bde-css/bde-hdfs.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |     padding: 0;
 3 | }
 4 | 
 5 | .navigator {
 6 | 	position: fixed;
 7 |     top: 0;
 8 |     left: 0;
 9 |     width: 100%;
10 |     height: 50px;
11 |     margin: 0 auto;
12 |     padding: 10px;
13 |     background: #A94F74;
14 |     box-sizing: border-box;
15 | }
16 | 
17 | .navbar {
18 |     position: relative;
19 |     top: auto;
20 |     margin-top: 50px;
21 | }
22 | 
23 | .container-fluid .card {
24 |  	width: 1280px;
25 |     margin: 2rem auto;
26 |     padding: 1rem;
27 |     border: none;
28 |     background: white;
29 |     box-shadow: 0 2px 5px 0 rgba(0,0,0,0.16), 0 2px 10px 0 rgba(0,0,0,0.12);   
30 | }


--------------------------------------------------------------------------------
/sansa-examples-spark/config/integrator/user-interfaces:
--------------------------------------------------------------------------------
 1 | { "data": [
 2 |   {
 3 |     "id": 1,
 4 |     "type": "user-interfaces",
 5 |     "attributes": {
 6 |       "label": "Spark Master",
 7 |       "base-url": "http://spark-master.demo.sansa-stack.local",
 8 |       "append-path": "/"
 9 |     }
10 |   },
11 |   {
12 |     "id": 2,
13 |     "type": "user-interfaces",
14 |     "attributes": {
15 |       "label": "Spark Worker",
16 |       "base-url": "http://spark-worker.demo.sansa-stack.local"
17 |     }
18 |   },
19 |   {
20 |    "id": 3,
21 |     "type": "user-interfaces",
22 |     "attributes": {
23 |       "label": "HDFS",
24 |       "base-url": "http://hdfs.demo.sansa-stack.local"
25 |     }
26 |   },
27 |   {
28 |     "id": 4,
29 |     "type": "user-interfaces",
30 |     "attributes": {
31 |       "label": "Hue",
32 |       "base-url": "http://hue.demo.sansa-stack.local",
33 |       "append-path": "/home"
34 |     }
35 |   }
36 | ]
37 | }
38 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | # Root logger option
 2 | log4j.rootLogger=INFO stdout
 3 | 
 4 | # Direct log messages to a log file
 5 | log4j.appender.file=org.apache.log4j.RollingFileAppender
 6 | log4j.appender.file.File=C:\\logging.log
 7 | log4j.appender.file.MaxFileSize=10MB
 8 | log4j.appender.file.MaxBackupIndex=10
 9 | log4j.appender.file.layout=org.apache.log4j.PatternLayout
10 | log4j.appender.file.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %C{1}:%L - %m%n
11 | 
12 | # Direct log messages to stdout
13 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
14 | log4j.appender.stdout.Target=System.out
15 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
16 | log4j.appender.stdout.layout.ConversionPattern=%r %-5p %C:%L - %m%n
17 | 
18 | log4j.logger.akka.event.slf4j.Slf4jLogger=ERROR
19 | log4j.logger.akka.remote.Remoting=ERROR
20 | log4j.logger.org.spark_project.jetty=ERROR
21 | log4j.logger.org.apache.spark=ERROR
22 | log4j.logger.org.apache.hadoop=ERROR


--------------------------------------------------------------------------------
/sansa-examples-flink/src/main/resources/Clustering_sampledata.nt:
--------------------------------------------------------------------------------
 1 | <http://twitter/user0> <http://twitter/follows> <http://twitter/user1> .
 2 | <http://twitter/user0> <http://twitter/follows> <http://twitter/user2>  .
 3 | <http://twitter/user0> <http://twitter/follows> <http://twitter/user3> .
 4 | <http://twitter/user1>  <http://twitter/follows> <http://twitter/user2> .
 5 | <http://twitter/user1>  <http://twitter/follows> <http://twitter/user3> .
 6 | <http://twitter/user1>  <http://twitter/follows> <http://twitter/user6> .
 7 | <http://twitter/user2> <http://twitter/follows> <http://twitter/user3> .
 8 | <http://twitter/user3> <http://twitter/follows> <http://twitter/user4> .
 9 | <http://twitter/user4> <http://twitter/follows> <http://twitter/user5> .
10 | <http://twitter/user5> <http://twitter/follows> <http://twitter/user6> .
11 | <http://twitter/user4> <http://twitter/follows> <http://twitter/user7> .
12 | <http://twitter/user5> <http://twitter/follows> <http://twitter/user7> .
13 | <http://twitter/user6> <http://twitter/follows> <http://twitter/user7> .


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/Clustering_sampledata.nt:
--------------------------------------------------------------------------------
 1 | <http://twitter/user0> <http://twitter/follows> <http://twitter/user1> .
 2 | <http://twitter/user0> <http://twitter/follows> <http://twitter/user2>  .
 3 | <http://twitter/user0> <http://twitter/follows> <http://twitter/user3> .
 4 | <http://twitter/user1>  <http://twitter/follows> <http://twitter/user2> .
 5 | <http://twitter/user1>  <http://twitter/follows> <http://twitter/user3> .
 6 | <http://twitter/user1>  <http://twitter/follows> <http://twitter/user6> .
 7 | <http://twitter/user2> <http://twitter/follows> <http://twitter/user3> .
 8 | <http://twitter/user3> <http://twitter/follows> <http://twitter/user4> .
 9 | <http://twitter/user4> <http://twitter/follows> <http://twitter/user5> .
10 | <http://twitter/user5> <http://twitter/follows> <http://twitter/user6> .
11 | <http://twitter/user4> <http://twitter/follows> <http://twitter/user7> .
12 | <http://twitter/user5> <http://twitter/follows> <http://twitter/user7> .
13 | <http://twitter/user6> <http://twitter/follows> <http://twitter/user7> .


--------------------------------------------------------------------------------
/sansa-examples-spark/config/csswrapper/frontend/bde-css/bde-spark-master.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |     background: #F1F1F1;
 3 | }
 4 | 
 5 | strong {
 6 |     font-weight: 700;
 7 | }
 8 | 
 9 | .row-fluid {
10 |     width: 1280px;
11 |     margin: 2rem auto;
12 |     padding: 1rem;
13 |     background: white;
14 |     box-shadow: 0 2px 5px 0 rgba(0,0,0,0.16), 0 2px 10px 0 rgba(0,0,0,0.12);
15 | }
16 | 
17 | .row-fluid:first-of-type {
18 |     position: fixed;
19 |     top: 0;
20 |     left: 0;
21 |     width: 100%;
22 |     margin: 0 auto;
23 |     padding: 0;
24 |     background: #A94F74;
25 |     box-shadow: 0 2px 5px 0 rgba(0,0,0,0.16), 0 2px 10px 0 rgba(0,0,0,0.12);
26 | }
27 | 
28 | .row-fluid:nth-of-type(2) {
29 |     margin-top: 5rem;
30 | }
31 | 
32 | .row-fluid:first-of-type .span12 {
33 |     float: none;
34 |     width: 1280px;
35 |     margin: auto;
36 | }
37 | 
38 | .row-fluid:first-of-type h3 {
39 |     font-size: 1rem;
40 |     line-height: 1;
41 |     margin: auto;
42 |     color: white;
43 | }
44 | 
45 | .row-fluid:first-of-type span {
46 |     color: white;
47 |     margin-right: 50px !important;
48 | }


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/queries/Q1.sparql:
--------------------------------------------------------------------------------
 1 | PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
 2 | PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
 3 | PREFIX foaf: <http://xmlns.com/foaf/spec/>
 4 | PREFIX schema: <http://schema.org/>
 5 | PREFIX rev: <http://purl.org/stuff/rev#>
 6 | PREFIX edm: <http://www.europeana.eu/schemas/edm/>
 7 | PREFIX dc: <http://purl.org/dc/elements/1.1/>
 8 | PREFIX gr: <http://purl.org/goodrelations/v1#>
 9 | PREFIX dcterms: <http://purl.org/dc/terms/>
10 | PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
11 | PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>
12 | 
13 | SELECT DISTINCT ?label ?value
14 | WHERE {
15 |     ?product rdfs:label ?label .
16 |     ?product bsbm:productPropertyNumeric1 ?value .
17 |     ?product rdf:type bsbm:Product .
18 |     ?product bsbm:producer ?producer .
19 |     ?producer rdf:type bsbm:Producer .
20 |     ?producer foaf:homepage ?hp .
21 |     ?review bsbm:reviewFor ?product .
22 |     ?review rdf:type schema:Review .
23 |     ?review rev:reviewer ?pers .
24 |     ?pers foaf:name ?fn .
25 |     ?pers edm:country ?cn .
26 |     ?offer bsbm:product ?product .
27 |     ?offer rdf:type schema:Offer .
28 |     FILTER (?value > 102)
29 | }
30 | ORDER BY ?label
31 | LIMIT 10


--------------------------------------------------------------------------------
/sansa-examples-flink/config/hadoop/hadoop.env:
--------------------------------------------------------------------------------
 1 | CORE_CONF_fs_defaultFS=hdfs://namenode:8020
 2 | CORE_CONF_hadoop_http_staticuser_user=root
 3 | CORE_CONF_hadoop_proxyuser_hue_hosts=*
 4 | CORE_CONF_hadoop_proxyuser_hue_groups=*
 5 | HDFS_CONF_dfs_webhdfs_enabled=true
 6 | HDFS_CONF_dfs_permissions_enabled=false
 7 | 
 8 | YARN_CONF_yarn_log___aggregation___enable=true
 9 | YARN_CONF_yarn_resourcemanager_recovery_enabled=true
10 | YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
11 | YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
12 | YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
13 | YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
14 | YARN_CONF_yarn_timeline___service_enabled=true
15 | YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
16 | YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
17 | YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
18 | YARN_CONF_yarn_timeline___service_hostname=historyserver
19 | YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
20 | YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
21 | YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031
22 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/config/hadoop/hadoop.env:
--------------------------------------------------------------------------------
 1 | CORE_CONF_fs_defaultFS=hdfs://namenode:8020
 2 | CORE_CONF_hadoop_http_staticuser_user=root
 3 | CORE_CONF_hadoop_proxyuser_hue_hosts=*
 4 | CORE_CONF_hadoop_proxyuser_hue_groups=*
 5 | 
 6 | HDFS_CONF_dfs_webhdfs_enabled=true
 7 | HDFS_CONF_dfs_permissions_enabled=false
 8 | 
 9 | YARN_CONF_yarn_log___aggregation___enable=true
10 | YARN_CONF_yarn_resourcemanager_recovery_enabled=true
11 | YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
12 | YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
13 | YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
14 | YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
15 | YARN_CONF_yarn_timeline___service_enabled=true
16 | YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
17 | YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
18 | YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
19 | YARN_CONF_yarn_timeline___service_hostname=historyserver
20 | YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
21 | YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
22 | YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Archived Repository - Do not use this repository anymore!
 2 | 
 3 | SANSA got easier to use! All its code has been consolidated into a single repository at https://github.com/SANSA-Stack/SANSA-Stack
 4 | 
 5 | 
 6 | 
 7 | # SANSA-Examples
 8 | [![Build Status](https://ci.aksw.org/jenkins/job/SANSA%20Examples/job/develop/badge/icon)](https://ci.aksw.org/jenkins/job/SANSA%20Examples/job/develop/)
 9 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
10 | [![Twitter](https://img.shields.io/twitter/follow/SANSA_Stack.svg?style=social)](https://twitter.com/SANSA_Stack)
11 | 
12 | This directory contains code examples for various SANSA functionality.
13 | 
14 | ### [sansa-examples-spark](https://github.com/SANSA-Stack/SANSA-Examples/tree/master/sansa-examples-spark)
15 | Contains the SANSA Examples for [Apache Spark](http://spark.apache.org/).
16 | 
17 | ### [sansa-examples-flink](https://github.com/SANSA-Stack/SANSA-Examples/tree/master/sansa-examples-flink)
18 | Contains the SANSA Examples for [Apache Flink](http://flink.apache.org/).
19 | 
20 | ## How to Contribute
21 | We always welcome new contributors to the project! Please see [our contribution guide](http://sansa-stack.net/contributing-to-sansa/) for more details on how to get started contributing to SANSA.
22 | 


--------------------------------------------------------------------------------
/sansa-examples-flink/src/main/scala/net/sansa_stack/examples/flink/rdf/TripleReader.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.flink.rdf
 2 | 
 3 | import scala.collection.mutable
 4 | 
 5 | import net.sansa_stack.rdf.flink.io._
 6 | import net.sansa_stack.rdf.flink.model._
 7 | import org.apache.flink.api.scala.ExecutionEnvironment
 8 | import org.apache.jena.riot.Lang
 9 | 
10 | object TripleReader {
11 | 
12 |   def main(args: Array[String]) {
13 |     parser.parse(args, Config()) match {
14 |       case Some(config) =>
15 |         run(config.in)
16 |       case None =>
17 |         println(parser.usage)
18 |     }
19 |   }
20 | 
21 |   def run(input: String): Unit = {
22 | 
23 |     println("======================================")
24 |     println("|        Triple reader example       |")
25 |     println("======================================")
26 | 
27 |     val env = ExecutionEnvironment.getExecutionEnvironment
28 | 
29 |     val triples = env.rdf(Lang.NTRIPLES)(input)
30 |     triples.getTriples().first(10).print()
31 |   }
32 | 
33 |   case class Config(in: String = "")
34 | 
35 |   val parser = new scopt.OptionParser[Config]("Triple reader example") {
36 | 
37 |     head(" Triple reader example")
38 | 
39 |     opt[String]('i', "input").required().valueName("<path>").
40 |       action((x, c) => c.copy(in = x)).
41 |       text("path to file that contains the data (in N-Triples format)")
42 | 
43 |     help("help").text("prints this usage text")
44 |   }
45 | }
46 | 


--------------------------------------------------------------------------------
/sansa-examples-flink/config/csswrapper/frontend/css/bde-hadoop.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |     background: #F1F1F1;
 3 | }
 4 | 
 5 | body > .container {
 6 |     margin: 5rem auto;
 7 |     background: white;
 8 |     box-shadow: 0 2px 5px 0 rgba(0,0,0,0.16), 0 2px 10px 0 rgba(0,0,0,0.12);
 9 | }
10 | 
11 | header.bs-docs-nav {
12 |     position: fixed;
13 |     top: 0;
14 |     left: 0;
15 |     width: 100%;
16 |     height: 3rem;
17 |     border: none;
18 |     background: #A94F74;
19 |     box-shadow: 0 2px 5px 0 rgba(0,0,0,0.16), 0 2px 10px 0 rgba(0,0,0,0.12);
20 | }
21 | 
22 | header.bs-docs-nav .navbar-brand {
23 |     background: inherit;
24 | }
25 | 
26 | #ui-tabs .active a {
27 |     background: #B96A8B;
28 | }
29 | 
30 | #ui-tabs > li > a {
31 |     color: white;
32 | }
33 | 
34 | .navbar-inverse .navbar-nav > .dropdown > a .caret {
35 |     border-top-color: white;
36 |     border-bottom-color: white;
37 | }
38 | 
39 | .navbar-inverse .navbar-nav > .open > a,
40 | .navbar-inverse .navbar-nav > .open > a:hover,
41 | .navbar-inverse .navbar-nav > .open > a:focus {
42 |     background-color: #B96A8B;
43 | }
44 | 
45 | .dropdown-menu > li > a {
46 |     color: #A94F74;
47 | }
48 | 
49 | .modal-dialog .panel-success {
50 |     border-color: lightgrey;
51 | }
52 | 
53 | .modal-dialog .panel-heading {
54 |     background-color: #A94F74 !important;
55 | }
56 | 
57 | .modal-dialog .panel-heading select {
58 |     margin-top: 1rem;
59 | }


--------------------------------------------------------------------------------
/sansa-examples-spark/config/csswrapper/frontend/bde-css/bde-hadoop.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |     background: #F1F1F1;
 3 | }
 4 | 
 5 | body > .container {
 6 |     margin: 5rem auto;
 7 |     background: white;
 8 |     box-shadow: 0 2px 5px 0 rgba(0,0,0,0.16), 0 2px 10px 0 rgba(0,0,0,0.12);
 9 | }
10 | 
11 | header.bs-docs-nav {
12 |     position: fixed;
13 |     top: 0;
14 |     left: 0;
15 |     width: 100%;
16 |     height: 3rem;
17 |     border: none;
18 |     background: #A94F74;
19 |     box-shadow: 0 2px 5px 0 rgba(0,0,0,0.16), 0 2px 10px 0 rgba(0,0,0,0.12);
20 | }
21 | 
22 | header.bs-docs-nav .navbar-brand {
23 |     background: inherit;
24 | }
25 | 
26 | #ui-tabs .active a {
27 |     background: #B96A8B;
28 | }
29 | 
30 | #ui-tabs > li > a {
31 |     color: white;
32 | }
33 | 
34 | .navbar-inverse .navbar-nav > .dropdown > a .caret {
35 |     border-top-color: white;
36 |     border-bottom-color: white;
37 | }
38 | 
39 | .navbar-inverse .navbar-nav > .open > a,
40 | .navbar-inverse .navbar-nav > .open > a:hover,
41 | .navbar-inverse .navbar-nav > .open > a:focus {
42 |     background-color: #B96A8B;
43 | }
44 | 
45 | .dropdown-menu > li > a {
46 |     color: #A94F74;
47 | }
48 | 
49 | .modal-dialog .panel-success {
50 |     border-color: lightgrey;
51 | }
52 | 
53 | .modal-dialog .panel-heading {
54 |     background-color: #A94F74 !important;
55 | }
56 | 
57 | .modal-dialog .panel-heading select {
58 |     margin-top: 1rem;
59 | }


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/rdf/TripleReader.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.spark.rdf
 2 | 
 3 | import net.sansa_stack.rdf.spark.io._
 4 | import org.apache.jena.riot.Lang
 5 | import org.apache.spark.sql.SparkSession
 6 | 
 7 | 
 8 | object TripleReader {
 9 | 
10 |   def main(args: Array[String]) {
11 |     parser.parse(args, Config()) match {
12 |       case Some(config) =>
13 |         run(config.in)
14 |       case None =>
15 |         println(parser.usage)
16 |     }
17 |   }
18 | 
19 |   def run(input: String): Unit = {
20 | 
21 |     val spark = SparkSession.builder
22 |       .appName(s"Triple reader example  $input")
23 |       .master("local[*]")
24 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
25 |       .getOrCreate()
26 | 
27 |     println("======================================")
28 |     println("|        Triple reader example       |")
29 |     println("======================================")
30 | 
31 |     val lang = Lang.NTRIPLES
32 |     val triples = spark.rdf(lang)(input)
33 | 
34 |     triples.take(5).foreach(println(_))
35 | 
36 |     // triples.saveAsNTriplesFile(output)
37 | 
38 |     spark.stop
39 | 
40 |   }
41 | 
42 |   case class Config(in: String = "")
43 | 
44 |   val parser = new scopt.OptionParser[Config]("Triple reader example") {
45 | 
46 |     head(" Triple reader example")
47 | 
48 |     opt[String]('i', "input").required().valueName("<path>").
49 |       action((x, c) => c.copy(in = x)).
50 |       text("path to file that contains the data (in N-Triples format)")
51 | 
52 |     help("help").text("prints this usage text")
53 |   }
54 | }
55 | 


--------------------------------------------------------------------------------
/sansa-examples-flink/src/main/scala/net/sansa_stack/examples/flink/rdf/TripleWriter.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.flink.rdf
 2 | 
 3 | import scala.collection.mutable
 4 | 
 5 | import net.sansa_stack.rdf.flink.io._
 6 | import org.apache.flink.api.scala.ExecutionEnvironment
 7 | import org.apache.jena.riot.Lang
 8 | 
 9 | object TripleWriter {
10 | 
11 |   def main(args: Array[String]) {
12 |     parser.parse(args, Config()) match {
13 |       case Some(config) =>
14 |         run(config.in, config.out)
15 |       case None =>
16 |         println(parser.usage)
17 |     }
18 |   }
19 | 
20 |   def run(input: String, output: String): Unit = {
21 | 
22 |     println("======================================")
23 |     println("|        Triple writer example       |")
24 |     println("======================================")
25 | 
26 |     val env = ExecutionEnvironment.getExecutionEnvironment
27 | 
28 |     val triples = env.rdf(Lang.NTRIPLES)(input)
29 |     triples.saveAsNTriplesFile(output)
30 | 
31 |     env.execute(s"Triple writer example ($input)")
32 | 
33 |   }
34 | 
35 |   case class Config(
36 |     in: String = "",
37 |     out: String = "")
38 | 
39 |   val parser = new scopt.OptionParser[Config]("Triple writer example ") {
40 | 
41 |     head("Triple writer example ")
42 | 
43 |     opt[String]('i', "input").required().valueName("<path>").
44 |       action((x, c) => c.copy(in = x)).
45 |       text("path to file that contains the data (in N-Triples format)")
46 | 
47 |     opt[String]('o', "out").required().valueName("<directory>").
48 |       action((x, c) => c.copy(out = x)).
49 |       text("the output directory")
50 | 
51 |     help("help").text("prints this usage text")
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/sansa-examples-flink/src/main/scala/net/sansa_stack/examples/flink/rdf/RDFStats.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.flink.rdf
 2 | 
 3 | import java.io.File
 4 | 
 5 | import scala.collection.mutable
 6 | 
 7 | import net.sansa_stack.rdf.flink.io._
 8 | import net.sansa_stack.rdf.flink.stats._
 9 | import org.apache.flink.api.scala.ExecutionEnvironment
10 | import org.apache.jena.riot.Lang
11 | 
12 | object RDFStats {
13 | 
14 |   def main(args: Array[String]) {
15 |     parser.parse(args, Config()) match {
16 |       case Some(config) =>
17 |         run(config.in, config.out)
18 |       case None =>
19 |         println(parser.usage)
20 |     }
21 |   }
22 | 
23 |   def run(input: String, output: String): Unit = {
24 | 
25 |     val rdf_stats_file = new File(input).getName
26 | 
27 |     println("======================================")
28 |     println("|        RDF Statistic example       |")
29 |     println("======================================")
30 | 
31 |     val env = ExecutionEnvironment.getExecutionEnvironment
32 | 
33 |     val triples = env.rdf(Lang.NTRIPLES)(input)
34 | 
35 |     // compute stats
36 |     val rdf_statistics = triples.stats
37 |       .voidify(rdf_stats_file, output)
38 |   }
39 | 
40 |   case class Config(
41 |     in: String = "",
42 |     out: String = "")
43 | 
44 |   // the CLI parser
45 |   val parser = new scopt.OptionParser[Config]("RDF Dataset Statistics Example") {
46 | 
47 |     head("RDF Dataset Statistics Example")
48 | 
49 |     opt[String]('i', "input").required().valueName("<path>").
50 |       action((x, c) => c.copy(in = x)).
51 |       text("path to file that contains the data (in N-Triples format)")
52 | 
53 |     opt[String]('o', "out").required().valueName("<directory>").
54 |       action((x, c) => c.copy(out = x)).
55 |       text("the output directory")
56 | 
57 |     help("help").text("prints this usage text")
58 |   }
59 | }
60 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/config:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"sources": [
 3 | 	{
 4 | 		"type": "csv",
 5 | 		"options": {
 6 | 			"header": "true",
 7 | 			"delimiter": ",",
 8 | 			"mode": "DROPMALFORMED"
 9 | 		},
10 | 		"source": "src/main/resources/Data/person.csv",
11 | 		"entity": "Person"
12 | 	}
13 | 	,	{
14 | 		"type": "parquet",
15 | 		"options": {
16 | 			"spark_sql_parquet_filterPushdown": "true"
17 | 		},
18 | 		"source": "src/main/resources/Data/review.parquet",
19 | 		"entity": "Review"
20 | 	}
21 | 	,	{
22 | 		"type": "mongodb",
23 | 		"options": {
24 | 			"url": "127.0.0.1",
25 | 			"database": "bsbm",
26 | 			"collection": "offer",
27 | 			"options": ""
28 | 		},
29 | 		"source": "//Offer",
30 | 		"entity": "Offer"
31 | 	}
32 | 	,	{
33 | 		"type": "cassandra",
34 | 		"options": {
35 | 			"keyspace": "db",
36 | 			"table": "product"
37 | 		},
38 | 		"source": "//Product",
39 | 		"entity": "Product"
40 | 	}
41 | 	,	{
42 | 		"type": "jdbc",
43 | 		"options": {
44 | 			"url": "jdbc:mysql://localhost:3306/benchmark?useUnicode=true&useJDBCCompliantTimezoneShift=true&useLegacyDatetimeCode=false&serverTimezone=UTC&autoReconnect=true&useSSL=false",
45 | 
46 | 			"driver": "com.mysql.cj.jdbc.Driver",
47 | 			"dbtable": "producer",
48 | 			"user": "root",
49 | 			"password": "root"
50 | 		},
51 | 		"source": "//Producer",
52 | 		"entity": "Producer"
53 | 	}
54 | 	],
55 |     "weights": [
56 |         {
57 |             "datasource": "cassandra",
58 |             "weight": 1
59 |         },
60 |         {
61 |             "datasource": "mongodb",
62 |             "weight": 1
63 |         },
64 |         {
65 |             "datasource": "parquet",
66 |             "weight": 1
67 |         },
68 |         {
69 |             "datasource": "csv",
70 |             "weight": 1
71 |         },
72 |         {
73 |             "datasource": "jdbc",
74 |             "weight": 1
75 |         }
76 |     ]
77 | }


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/rdf/TripleWriter.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.spark.rdf
 2 | 
 3 | import java.io.File
 4 | import java.net.URI
 5 | 
 6 | import scala.collection.mutable
 7 | 
 8 | import net.sansa_stack.rdf.spark.io._
 9 | import org.apache.jena.riot.Lang
10 | import org.apache.spark.sql.SparkSession
11 | 
12 | object TripleWriter {
13 | 
14 |   def main(args: Array[String]) {
15 |     parser.parse(args, Config()) match {
16 |       case Some(config) =>
17 |         run(config.in, config.out)
18 |       case None =>
19 |         println(parser.usage)
20 |     }
21 |   }
22 | 
23 |   def run(input: String, output: String): Unit = {
24 | 
25 |     val spark = SparkSession.builder
26 |       .appName(s"Triple writer example ( $input )")
27 |       .master("local[*]")
28 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
29 |       .getOrCreate()
30 | 
31 |     println("======================================")
32 |     println("|        Triple writer example       |")
33 |     println("======================================")
34 | 
35 |     val lang = Lang.NTRIPLES
36 |     val triples = spark.rdf(lang)(input)
37 | 
38 |     triples.saveAsNTriplesFile(output)
39 | 
40 |     spark.stop
41 | 
42 |   }
43 | 
44 |   case class Config(
45 |     in: String = "",
46 |     out: String = "")
47 | 
48 |   // the CLI parser
49 |   val parser = new scopt.OptionParser[Config]("Triple writer example ") {
50 | 
51 |     head("Triple writer example ")
52 | 
53 |     opt[String]('i', "input").required().valueName("<path>").
54 |       action((x, c) => c.copy(in = x)).
55 |       text("path to file that contains the data (in N-Triples format)")
56 | 
57 |     opt[String]('o', "out").required().valueName("<directory>").
58 |       action((x, c) => c.copy(out = x)).
59 |       text("the output directory")
60 | 
61 |     help("help").text("prints this usage text")
62 |   }
63 | 
64 | }
65 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/query/HDTQuery.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.spark.query
 2 | 
 3 | import net.sansa_stack.query.spark.query._
 4 | import net.sansa_stack.rdf.spark.io._
 5 | import net.sansa_stack.rdf.spark.model._
 6 | import org.apache.jena.riot.Lang
 7 | import org.apache.spark.sql.SparkSession
 8 | 
 9 | object HDTQuery {
10 | 
11 |   def main(args: Array[String]) {
12 |     parser.parse(args, Config()) match {
13 |       case Some(config) =>
14 |         run(config.in, config.query)
15 |       case None =>
16 |         println(parser.usage)
17 |     }
18 |   }
19 | 
20 |   def run(input: String, query: String): Unit = {
21 | 
22 |     println("===========================================")
23 |     println("| SANSA - HDT example                     |")
24 |     println("===========================================")
25 | 
26 |     val spark = SparkSession.builder
27 |       .master("local[*]")
28 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
29 |       .appName("SANSA - HDT")
30 |       .getOrCreate()
31 | 
32 |     val lang = Lang.NTRIPLES
33 |     val triples = spark.rdf(lang)(input)
34 | 
35 |     val triples_hdt = triples.asHDT()
36 | 
37 |     val result = triples_hdt.sparqlHDT(query)
38 | 
39 |     result.show()
40 | 
41 |     spark.close()
42 | 
43 |   }
44 | 
45 |   case class Config(in: String = "", query: String = "")
46 | 
47 |   val parser = new scopt.OptionParser[Config]("SANSA - HDT example") {
48 | 
49 |     head(" SANSA - HDT example")
50 | 
51 |     opt[String]('i', "input").required().valueName("<path>").
52 |       action((x, c) => c.copy(in = x)).
53 |       text("path to file that contains the data (in N-Triples format)")
54 | 
55 |     opt[String]('q', "query").required().valueName("SPARQL query").
56 |       action((x, c) => c.copy(query = x)).
57 |       text("the SPARQL query")
58 | 
59 |     help("help").text("prints this usage text")
60 |   }
61 | 
62 | }
63 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/clustering/BorderFlowClustering.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.spark.ml.clustering
 2 | 
 3 | import scala.collection.mutable
 4 | 
 5 | import net.sansa_stack.ml.spark.clustering._
 6 | import net.sansa_stack.ml.spark.clustering.algorithms.BorderFlow
 7 | import net.sansa_stack.rdf.spark.io._
 8 | import net.sansa_stack.rdf.spark.model._
 9 | import org.apache.jena.riot.Lang
10 | import org.apache.log4j.{ Level, Logger }
11 | import org.apache.spark.sql.SparkSession
12 | 
13 | object BorderFlowClustering {
14 | 
15 |   def main(args: Array[String]) {
16 |     parser.parse(args, Config()) match {
17 |       case Some(config) =>
18 |         run(config.in)
19 |       case None =>
20 |         println(parser.usage)
21 |     }
22 |   }
23 | 
24 |   def run(input: String): Unit = {
25 | 
26 |     val spark = SparkSession.builder
27 |       .appName(s"BorderFlow example: ( $input )")
28 |       .master("local[*]")
29 |       .config("spark.hadoop.validateOutputSpecs", "false")
30 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
31 |       .getOrCreate()
32 | 
33 |     println("============================================")
34 |     println(s"| Border Flow example                     |")
35 |     println("============================================")
36 | 
37 |     val lang = Lang.NTRIPLES
38 |     val triples = spark.rdf(lang)(input)
39 | 
40 |     val borderflow = triples.cluster(ClusteringAlgorithm.BorderFlow).asInstanceOf[BorderFlow].run()
41 | 
42 |     borderflow.collect().foreach(println)
43 | 
44 |     spark.stop
45 | 
46 |   }
47 | 
48 |   case class Config(in: String = "")
49 | 
50 |   val parser = new scopt.OptionParser[Config]("BorderFlow") {
51 | 
52 |     head("BorderFlow: an example BorderFlow app.")
53 | 
54 |     opt[String]('i', "input").required().valueName("<path>").
55 |       action((x, c) => c.copy(in = x)).
56 |       text("path to file contains the input files")
57 | 
58 |   }
59 | }
60 | 


--------------------------------------------------------------------------------
/sansa-examples-flink/src/main/scala/net/sansa_stack/examples/flink/ml/clustering/RDFByModularityClustering.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.flink.ml.clustering
 2 | 
 3 | import scala.collection.mutable
 4 | 
 5 | import net.sansa_stack.ml.flink.clustering.{ RDFByModularityClustering => RDFByModularityClusteringAlg }
 6 | import org.apache.flink.api.scala._
 7 | import org.apache.flink.api.scala.ExecutionEnvironment
 8 | 
 9 | object RDFByModularityClustering {
10 |   def main(args: Array[String]) {
11 |     parser.parse(args, Config()) match {
12 |       case Some(config) =>
13 |         run(config.in, config.out, config.numIterations)
14 |       case None =>
15 |         println(parser.usage)
16 |     }
17 |   }
18 | 
19 |   def run(input: String, output: String, numIterations: Int): Unit = {
20 | 
21 |     println("============================================")
22 |     println("| RDF By Modularity Clustering example     |")
23 |     println("============================================")
24 | 
25 |     val env = ExecutionEnvironment.getExecutionEnvironment
26 | 
27 |     RDFByModularityClusteringAlg(env, numIterations, input, output)
28 | 
29 |   }
30 | 
31 |   case class Config(in: String = "", out: String = "", numIterations: Int = 100)
32 | 
33 |   val defaultParams = Config()
34 | 
35 |   val parser = new scopt.OptionParser[Config]("RDF By Modularity Clustering") {
36 | 
37 |     head("RDF By Modularity Clustering: an example RDF By Modularity Clustering app using RDF Graph.")
38 | 
39 |     opt[String]('i', "input").required().valueName("<path>")
40 |       .text(s"path to file that contains the input files (in N-Triple format)")
41 |       .action((x, c) => c.copy(in = x))
42 | 
43 |     opt[String]('o', "output").valueName("<directory>")
44 |       .text("the output directory")
45 |       .action((x, c) => c.copy(out = x))
46 | 
47 |     opt[Int]("numIterations")
48 |       .text(s"number of iterations, default: ${defaultParams.numIterations}")
49 |       .action((x, c) => c.copy(numIterations = x))
50 | 
51 |     help("help").text("prints this usage text")
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/rdf/RDFStats.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.spark.rdf
 2 | 
 3 | import java.io.File
 4 | 
 5 | import scala.collection.mutable
 6 | 
 7 | import net.sansa_stack.rdf.spark.io._
 8 | import net.sansa_stack.rdf.spark.stats._
 9 | import org.apache.jena.riot.Lang
10 | import org.apache.spark.sql.SparkSession
11 | 
12 | 
13 | object RDFStats {
14 | 
15 |   def main(args: Array[String]) {
16 |     parser.parse(args, Config()) match {
17 |       case Some(config) =>
18 |         run(config.in, config.out)
19 |       case None =>
20 |         println(parser.usage)
21 |     }
22 |   }
23 | 
24 |   def run(input: String, output: String): Unit = {
25 | 
26 |     val rdf_stats_file = new File(input).getName
27 | 
28 |     val spark = SparkSession.builder
29 |       .appName(s"RDF Dataset Statistics example $rdf_stats_file")
30 |       .master("local[*]")
31 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
32 |       .getOrCreate()
33 | 
34 |     println("======================================")
35 |     println("|        RDF Statistic example       |")
36 |     println("======================================")
37 | 
38 |     val lang = Lang.NTRIPLES
39 |     val triples = spark.rdf(lang)(input)
40 | 
41 |     // compute  criterias
42 |     val stats = triples.stats
43 |       .voidify(rdf_stats_file, output)
44 |   }
45 | 
46 |   // the config object
47 |   case class Config(in: String = "", out: String = "")
48 | 
49 |   // the CLI parser
50 |   val parser = new scopt.OptionParser[Config]("RDF Dataset Statistics Example") {
51 | 
52 |     head("RDF Dataset Statistics Example")
53 | 
54 |     opt[String]('i', "input").required().valueName("<path>").
55 |       action((x, c) => c.copy(in = x)).
56 |       text("path to file that contains the data (in N-Triples format)")
57 | 
58 |     opt[String]('o', "out").required().valueName("<directory>").
59 |       action((x, c) => c.copy(out = x)).
60 |       text("the output directory")
61 | 
62 |     help("help").text("prints this usage text")
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/sansa-examples-flink/config/csswrapper/frontend/css/bde-flink.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |     background: #F1F1F1;
 3 | }
 4 | 
 5 | .mainHeading {
 6 |     margin: 0;
 7 |     color: white;
 8 |     border: none;
 9 |     background: #A94F74;
10 |     box-shadow: 0 2px 5px 0 rgba(0,0,0,0.16), 0 2px 10px 0 rgba(0,0,0,0.12);
11 | }
12 | 
13 | .mainHeading > h1 > div {
14 |     top: 20px !important;
15 |     right: 20px !important;
16 | }
17 | 
18 | #jobsContents {
19 |     padding: 10px;
20 |     border: 0;
21 |     border-radius: 2px;
22 |     background: white;
23 |     box-shadow: 0 1px 3px 0 rgba(0,0,0,.2),0 1px 1px 0 rgba(0,0,0,.14),0 2px 1px -1px rgba(0,0,0,.12);
24 | }
25 | 
26 | #jobsContents + div {
27 |     padding: 10px;
28 |     border: 0;
29 |     border-radius: 2px;
30 |     background: white;
31 |     box-shadow: 0 1px 3px 0 rgba(0,0,0,.2),0 1px 1px 0 rgba(0,0,0,.14),0 2px 1px -1px rgba(0,0,0,.12);
32 | }
33 | 
34 | div.canvas {
35 |     margin: 0;
36 | }
37 | 
38 | .footer .boxed {
39 |     height: auto !important;
40 |     width: auto !important;
41 |     padding: 0 20px;
42 |     border: none;
43 |     border-radius: 2px;
44 |     background: white;
45 |     box-shadow: 0 1px 3px 0 rgba(0,0,0,.2),0 1px 1px 0 rgba(0,0,0,.14),0 2px 1px -1px rgba(0,0,0,.12);
46 | }
47 | 
48 | .footer .boxed > div {
49 |     margin-top: 20px !important;
50 | }
51 | 
52 | .footer .boxed > div #suspendedOption > span {
53 |     position: absolute;
54 | }
55 | 
56 | .footer .boxed:first-of-type > .footer {
57 |     padding: 0 20px 10px;
58 | }
59 | 
60 | .footer .boxed:nth-of-type(2) > .footer {
61 |     position: relative;
62 |     padding: 0 0 10px;
63 | }
64 | 
65 | #upload_file_name_text {
66 |     width: auto !important;
67 | }
68 | 
69 | #upload_form > div {
70 |     margin-top: 20px !important;
71 | }
72 | 
73 | #upload_file_name_text {
74 |     position: relative !important;
75 |     top: auto !important;
76 |     right: auto !important;
77 |     width: calc(100% - 85px) !important;
78 | }
79 | 
80 | #upload_file_input {
81 |     height: 100% !important;
82 |     width: 75px;
83 |     padding: 0;
84 | }


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/rdf/PageRank.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.spark.rdf
 2 | 
 3 | import scala.collection.mutable
 4 | 
 5 | import net.sansa_stack.rdf.spark.io._
 6 | import net.sansa_stack.rdf.spark.model._
 7 | import org.apache.jena.riot.Lang
 8 | import org.apache.spark.graphx.Graph
 9 | import org.apache.spark.sql.SparkSession
10 | 
11 | /*
12 |  * Computes the PageRank of Resources from an input .nt file.
13 |  */
14 | object PageRank {
15 | 
16 |   def main(args: Array[String]) {
17 |     parser.parse(args, Config()) match {
18 |       case Some(config) =>
19 |         run(config.in)
20 |       case None =>
21 |         println(parser.usage)
22 |     }
23 |   }
24 |   def run(input: String): Unit = {
25 | 
26 |     println("======================================")
27 |     println("|   PageRank of resources example    |")
28 |     println("======================================")
29 | 
30 |     val spark = SparkSession.builder
31 |       .appName(s"PageRank of resources example ( $input )")
32 |       .master("local[*]")
33 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
34 |       // .config("spark.kryo.registrator", "net.sansa_stack.rdf.spark.io.JenaKryoRegistrator")
35 |       .getOrCreate()
36 | 
37 |     val lang = Lang.NTRIPLES
38 |     val triples = spark.rdf(lang)(input)
39 | 
40 |     val graph = triples.asGraph()
41 | 
42 |     val pagerank = graph.pageRank(0.00001).vertices
43 |     val report = pagerank.join(graph.vertices)
44 |       .map({ case (k, (r, v)) => (r, v, k) })
45 |       .sortBy(50 - _._1)
46 | 
47 |     report.take(50).foreach(println)
48 | 
49 |     spark.stop
50 | 
51 |   }
52 |   case class Config(in: String = "")
53 | 
54 |   // the CLI parser
55 |   val parser = new scopt.OptionParser[Config]("PageRank of resources example") {
56 | 
57 |     head(" PageRank of resources example")
58 | 
59 |     opt[String]('i', "input").required().valueName("<path>").
60 |       action((x, c) => c.copy(in = x)).
61 |       text("path to file that contains the data (in N-Triples format)")
62 |     help("help").text("prints this usage text")
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/owl/OWLReaderRDD.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.spark.owl
 2 | 
 3 | import net.sansa_stack.owl.spark.owl._
 4 | import org.apache.spark.sql.SparkSession
 5 | 
 6 | 
 7 | object OWLReaderRDD {
 8 | 
 9 |   def main(args: Array[String]) {
10 |     parser.parse(args, Config()) match {
11 |       case Some(config) =>
12 |         run(config.in, config.syntax)
13 |       case None =>
14 |         println(parser.usage)
15 |     }
16 |   }
17 | 
18 |   def run(input: String, syntax: String): Unit = {
19 | 
20 |     println(".============================================.")
21 |     println("| RDD OWL reader example (" + syntax + " syntax)|")
22 |     println("============================================")
23 | 
24 |     val spark = SparkSession.builder
25 |       .appName(s"OWL reader example ( $input + )($syntax)")
26 |       .master("local[*]")
27 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
28 |       .config("spark.kryo.registrator", "net.sansa_stack.owl.spark.dataset.UnmodifiableCollectionKryoRegistrator")
29 |       .getOrCreate()
30 | 
31 |     val rdd = syntax match {
32 |       case "fun" => spark.owl(Syntax.FUNCTIONAL)(input)
33 |       case "manch" => spark.owl(Syntax.MANCHESTER)(input)
34 |       case "owl_xml" => spark.owl(Syntax.OWLXML)(input)
35 |       case _ =>
36 |         throw new RuntimeException("Invalid syntax type: '" + syntax + "'")
37 |     }
38 | 
39 |     rdd.take(10).foreach(println(_))
40 |     spark.stop()
41 |   }
42 | 
43 |   case class Config(
44 |     in: String = "",
45 |     syntax: String = "")
46 | 
47 |   // the CLI parser
48 |   val parser = new scopt.OptionParser[Config]("RDD OWL reader example") {
49 | 
50 |     head("RDD OWL reader example")
51 | 
52 |     opt[String]('i', "input").required().valueName("<path>").
53 |       action((x, c) => c.copy(in = x)).
54 |       text("path to file that contains the data")
55 | 
56 |     opt[String]('s', "syntax").required().valueName("{fun | manch | owl_xml}").
57 |       action((x, c) => c.copy(syntax = x)).
58 |       text("the syntax format")
59 | 
60 |     help("help").text("prints this usage text")
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/query/Semantic.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.spark.query
 2 | 
 3 | import net.sansa_stack.query.spark.semantic.QuerySystem
 4 | import net.sansa_stack.rdf.spark.io._
 5 | import net.sansa_stack.rdf.spark.partition._
 6 | import org.apache.jena.riot.Lang
 7 | import org.apache.spark.sql.SparkSession
 8 | 
 9 | /**
10 |  * Run SPARQL queries over Spark using Semantic partitioning approach.
11 |  *
12 |  * @author Gezim Sejdiu
13 |  */
14 | object Semantic {
15 | 
16 |   def main(args: Array[String]) {
17 |     parser.parse(args, Config()) match {
18 |       case Some(config) =>
19 |         run(config.in, config.queries)
20 |       case None =>
21 |         println(parser.usage)
22 |     }
23 |   }
24 | 
25 |   def run(input: String, queries: String): Unit = {
26 | 
27 |     println("===========================================")
28 |     println("| SANSA - Semantic Partioning example     |")
29 |     println("===========================================")
30 | 
31 |     val spark = SparkSession.builder
32 |       .master("local[*]")
33 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
34 |       .appName("SANSA - Semantic Partioning")
35 |       .getOrCreate()
36 | 
37 |     val lang = Lang.NTRIPLES
38 |     val triples = spark.rdf(lang)(input)
39 | 
40 |     val partitionData = triples.partitionGraphAsSemantic()
41 | 
42 |     val result = new QuerySystem(partitionData, queries).run()
43 |     result.take(5).foreach(println)
44 | 
45 |     spark.close()
46 | 
47 |   }
48 | 
49 |   case class Config(in: String = "", queries: String = "")
50 | 
51 |   val parser = new scopt.OptionParser[Config]("SANSA - Semantic Partioning example") {
52 | 
53 |     head(" SANSA - Semantic Partioning example")
54 | 
55 |     opt[String]('i', "input").required().valueName("<path>").
56 |       action((x, c) => c.copy(in = x)).
57 |       text("path to file that contains the data (in N-Triples format)")
58 | 
59 |     opt[String]('q', "queries").required().valueName("<directory>").
60 |       action((x, c) => c.copy(queries = x)).
61 |       text("path to the file containing the SPARQL query")
62 | 
63 |     help("help").text("prints this usage text")
64 |   }
65 | 
66 | }
67 | 


--------------------------------------------------------------------------------
/sansa-examples-flink/src/main/scala/net/sansa_stack/examples/flink/owl/OWLReaderDataSet.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.flink.owl
 2 | 
 3 | import de.javakaffee.kryoserializers.UnmodifiableCollectionsSerializer
 4 | 
 5 | import scala.collection.mutable
 6 | import net.sansa_stack.owl.flink.owl._
 7 | import org.apache.flink.api.scala.ExecutionEnvironment
 8 | 
 9 | 
10 | object OWLReaderDataSet {
11 | 
12 |   def main(args: Array[String]) {
13 |     parser.parse(args, Config()) match {
14 |       case Some(config) =>
15 |         run(config.in, config.syntax)
16 |       case None =>
17 |         println(parser.usage)
18 |     }
19 |   }
20 | 
21 |   def run(input: String, syntax: String): Unit = {
22 | 
23 |     println(".============================================.")
24 |     println("| Dataset OWL reader example (" + syntax + " syntax)|")
25 |     println(".============================================.")
26 | 
27 |     val env = ExecutionEnvironment.getExecutionEnvironment
28 |     // scalastyle:off classforname
29 |     env.getConfig.addDefaultKryoSerializer(
30 |       Class.forName("java.util.Collections$UnmodifiableCollection"),
31 |       classOf[UnmodifiableCollectionsSerializer])
32 |     // scalastyle:on classforname
33 | 
34 |     val dataSet = syntax match {
35 |       case "fun" => env.owl(Syntax.FUNCTIONAL)(input)
36 |       case "manch" => env.owl(Syntax.MANCHESTER)(input)
37 |       case "owl_xml" =>
38 |         throw new RuntimeException("'" + syntax + "' - Not supported, yet.")
39 |       case _ =>
40 |         throw new RuntimeException("Invalid syntax type: '" + syntax + "'")
41 |     }
42 | 
43 |     dataSet.first(10).print()
44 | 
45 |   }
46 | 
47 |   case class Config(
48 |     in: String = "",
49 |     syntax: String = "")
50 | 
51 |   // the CLI parser
52 |   val parser = new scopt.OptionParser[Config]("Dataset OWL reader example") {
53 | 
54 |     head("Dataset OWL reader example")
55 | 
56 |     opt[String]('i', "input").required().valueName("<path>").
57 |       action((x, c) => c.copy(in = x)).
58 |       text("path to file that contains the data")
59 | 
60 |     opt[String]('s', "syntax").required().valueName("{fun | manch | owl_xml}").
61 |       action((x, c) => c.copy(syntax = x)).
62 |       text("the syntax format")
63 | 
64 |     help("help").text("prints this usage text")
65 |   }
66 | }
67 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/clustering/SilviaClustering.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.spark.ml.clustering
 2 | 
 3 | import scala.collection.mutable
 4 | 
 5 | import net.sansa_stack.ml.spark.clustering._
 6 | import net.sansa_stack.ml.spark.clustering.algorithms.SilviaClustering
 7 | import net.sansa_stack.rdf.spark.io._
 8 | import net.sansa_stack.rdf.spark.model._
 9 | import org.apache.jena.riot.Lang
10 | import org.apache.log4j.{ Level, Logger }
11 | import org.apache.spark.sql.SparkSession
12 | 
13 | object SilviaClusteringExample {
14 | 
15 |   def main(args: Array[String]) {
16 |     parser.parse(args, Config()) match {
17 |       case Some(config) =>
18 |         run(config.in, config.out)
19 |       case None =>
20 |         println(parser.usage)
21 |     }
22 |   }
23 | 
24 |   def run(input: String, output: String): Unit = {
25 | 
26 |     val spark = SparkSession.builder
27 |       .appName(s"SilviaClustering example ( $input )")
28 |       .master("local[*]")
29 |       .config("spark.hadoop.validateOutputSpecs", "false")
30 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
31 |       .getOrCreate()
32 | 
33 |     println("============================================")
34 |     println("| Silvia Clustering example                      |")
35 |     println("============================================")
36 | 
37 |     Logger.getRootLogger.setLevel(Level.WARN)
38 | 
39 |     val lang = Lang.NTRIPLES
40 |     val triples = spark.rdf(lang)(input)
41 | 
42 |     val silvia = triples.cluster(ClusteringAlgorithm.SilviaClustering).asInstanceOf[SilviaClustering].run()
43 | 
44 |     silvia.collect.foreach(println)
45 | 
46 |     spark.stop
47 | 
48 |   }
49 | 
50 |   case class Config(in: String = "", out: String = "")
51 | 
52 |   val parser = new scopt.OptionParser[Config]("SilviaClustering") {
53 | 
54 |     head("SilviaClustering: an example SilviaClustering app.")
55 | 
56 |     opt[String]('i', "input").required().valueName("<path>").
57 |       action((x, c) => c.copy(in = x)).
58 |       text("path to file contains the input files")
59 | 
60 |     opt[String]('o', "output").optional().valueName("<directory>").
61 |       action((x, c) => c.copy(out = x)).
62 |       text("the output directory")
63 | 
64 |     help("help").text("prints this usage text")
65 |   }
66 | }
67 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/clustering/RDFByModularityClustering.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.spark.ml.clustering
 2 | 
 3 | import scala.collection.mutable
 4 | 
 5 | import net.sansa_stack.ml.spark.clustering.algorithms.{ RDFByModularityClustering => RDFByModularityClusteringAlg }
 6 | import org.apache.log4j.{ Level, Logger }
 7 | import org.apache.spark.sql.SparkSession
 8 | 
 9 | 
10 | object RDFByModularityClustering {
11 | 
12 |   def main(args: Array[String]) {
13 |     parser.parse(args, Config()) match {
14 |       case Some(config) =>
15 |         run(config.in, config.out, config.numIterations)
16 |       case None =>
17 |         println(parser.usage)
18 |     }
19 |   }
20 | 
21 |   def run(input: String, output: String, numIterations: Int): Unit = {
22 | 
23 |     val spark = SparkSession.builder
24 |       .appName(s"RDF By Modularity Clustering example example ( $input )")
25 |       .master("local[*]")
26 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
27 |       .getOrCreate()
28 | 
29 |     println("============================================")
30 |     println("| RDF By Modularity Clustering example     |")
31 |     println("============================================")
32 | 
33 |     Logger.getRootLogger.setLevel(Level.ERROR)
34 | 
35 |     RDFByModularityClusteringAlg(spark.sparkContext, numIterations, input, output)
36 | 
37 |     spark.stop
38 | 
39 |   }
40 | 
41 |   case class Config(in: String = "", out: String = "", numIterations: Int = 100)
42 | 
43 |   val defaultParams = Config()
44 | 
45 |   val parser = new scopt.OptionParser[Config]("RDF By Modularity Clustering") {
46 | 
47 |     head("RDF By Modularity Clustering: an example RDF By Modularity Clustering app using RDF Graph.")
48 | 
49 |     opt[String]('i', "input").required().valueName("<path>")
50 |       .text(s"path to file that contains the input files (in N-Triple format)")
51 |       .action((x, c) => c.copy(in = x))
52 | 
53 |     opt[String]('o', "output").valueName("<directory>")
54 |       .text("the output directory")
55 |       .action((x, c) => c.copy(out = x))
56 | 
57 |     opt[Int]("numIterations")
58 |       .text(s"number of iterations, default: ${defaultParams.numIterations}")
59 |       .action((x, c) => c.copy(numIterations = x))
60 | 
61 |     help("help").text("prints this usage text")
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/owl/OWLReaderDataset.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.spark.owl
 2 | 
 3 | import net.sansa_stack.owl.spark.dataset.{ FunctionalSyntaxOWLAxiomsDatasetBuilder, ManchesterSyntaxOWLAxiomsDatasetBuilder }
 4 | import org.apache.spark.sql.SparkSession
 5 | 
 6 | object OWLReaderDataset {
 7 | 
 8 |   def main(args: Array[String]) {
 9 |     parser.parse(args, Config()) match {
10 |       case Some(config) =>
11 |         run(config.in, config.syntax)
12 |       case None =>
13 |         println(parser.usage)
14 |     }
15 |   }
16 | 
17 |   def run(input: String, syntax: String): Unit = {
18 | 
19 |     println(".============================================.")
20 |     println("| Dataset OWL reader example (" + syntax + " syntax)|")
21 |     println(".============================================.")
22 | 
23 |     val spark = SparkSession.builder
24 |       .appName(s"Dataset OWL reader ( $input + )($syntax)")
25 |       .master("local[*]")
26 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
27 |       .config("spark.kryo.registrator", "net.sansa_stack.owl.spark.dataset.UnmodifiableCollectionKryoRegistrator")
28 |       .getOrCreate()
29 | 
30 |     val dataset = syntax match {
31 |       case "fun" => FunctionalSyntaxOWLAxiomsDatasetBuilder.build(spark, input)
32 |       case "manch" => ManchesterSyntaxOWLAxiomsDatasetBuilder.build(spark, input)
33 |       case "owl_xml" =>
34 |         throw new RuntimeException("'" + syntax + "' - Not supported, yet.")
35 |       case _ =>
36 |         throw new RuntimeException("Invalid syntax type: '" + syntax + "'")
37 |     }
38 | 
39 |     dataset.take(10).foreach(println(_))
40 |     spark.stop()
41 |   }
42 | 
43 |   case class Config(
44 |     in: String = "",
45 |     syntax: String = "")
46 | 
47 |   // the CLI parser
48 |   val parser = new scopt.OptionParser[Config]("Dataset OWL reader example") {
49 | 
50 |     head("Dataset OWL reader example")
51 | 
52 |     opt[String]('i', "input").required().valueName("<path>").
53 |       action((x, c) => c.copy(in = x)).
54 |       text("path to file that contains the data")
55 | 
56 |     opt[String]('s', "syntax").required().valueName("{fun | manch | owl_xml}").
57 |       action((x, c) => c.copy(syntax = x)).
58 |       text("the syntax format")
59 | 
60 |     help("help").text("prints this usage text")
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/query/DataLake.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.spark.query
 2 | 
 3 | import net.sansa_stack.query.spark.datalake.DataLakeEngine
 4 | import org.apache.spark.sql.SparkSession
 5 | 
 6 | /**
 7 |  * Run SPARQL queries over Spark using Data Lake approach.
 8 |  */
 9 | object DataLake {
10 | 
11 |   def main(args: Array[String]) {
12 |     parser.parse(args, Config()) match {
13 |       case Some(config) =>
14 |         run(config.queryFile, config.mappingsFile, config.configFile)
15 |       case None =>
16 |         println(parser.usage)
17 |     }
18 |   }
19 | 
20 |   def run(queryFile: String, mappingsFile: String, configFile: String): Unit = {
21 | 
22 |     println("======================================")
23 |     println("|   DataLake (CSV) example           |")
24 |     println("======================================")
25 | 
26 |     val spark = SparkSession.builder
27 |       .appName(s"DataLake (CSV) example")
28 |       .master("local[*]")
29 |       .getOrCreate()
30 | 
31 |     // val result = spark.sparqlDL(queryFile, mappingsFile, configFile)
32 |     val result = DataLakeEngine.run(queryFile, mappingsFile, configFile, spark)
33 |     result.show()
34 | 
35 |     spark.stop
36 | 
37 |   }
38 | 
39 |   case class Config(
40 |     queryFile: String = getClass.getResource("/datalake/queries/Q1.sparql").getPath,
41 |     mappingsFile: String = getClass.getResource("/datalake/config").getPath,
42 |     configFile: String = getClass.getResource("/datalake/mappings.ttl").getPath)
43 | 
44 |   val parser = new scopt.OptionParser[Config]("Sparqlify example") {
45 | 
46 |     head(" DataLake (CSV) example")
47 | 
48 |     opt[String]('f', "queryFile").valueName("<queryFile>").
49 |       action((x, c) => c.copy(queryFile = x)).
50 |       text("a file containing SPARQL queries or a single query, default: /queries/Q1.sparql")
51 | 
52 |     opt[String]('m', "mappingsFile").valueName("<mappingsFile>").
53 |       action((x, c) => c.copy(mappingsFile = x)).
54 |       text("the mappings to the target sources, default: /config_csv-only")
55 | 
56 |     opt[String]('c', "configFile").optional().valueName("<configFile>").
57 |       action((x, c) => c.copy(configFile = x)).
58 |       text("configuration file for different data sources, default: /mappings_csv-only.ttl")
59 | 
60 |     help("help").text("prints this usage text")
61 |   }
62 | 
63 | }
64 | 
65 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/kernel/RDFGraphKernel.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.spark.ml.kernel
 2 | 
 3 | import net.sansa_stack.ml.spark.kernel._
 4 | import net.sansa_stack.rdf.spark.io._
 5 | import org.apache.jena.riot.Lang
 6 | import org.apache.spark.sql.SparkSession
 7 | 
 8 | /**
 9 |  * RDF Graph Kernel example.
10 |  */
11 | object RDFGraphKernel {
12 |   def main(args: Array[String]) {
13 |     parser.parse(args, Config()) match {
14 |       case Some(config) =>
15 |         run(config.in, config.iteration)
16 |       case None =>
17 |         println(parser.usage)
18 |     }
19 |   }
20 | 
21 |   def run(input: String, iteration: Int = 5): Unit = {
22 | 
23 |     println("======================================")
24 |     println("|        RDF Graph Kernel example     |")
25 |     println("======================================")
26 | 
27 |     val spark = SparkSession.builder
28 |       .appName(s" RDF Graph Kernel example ( $input )")
29 |       .master("local[*]")
30 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
31 |       .getOrCreate()
32 | 
33 |     val t0 = System.nanoTime
34 |     val lang = Lang.NTRIPLES
35 | 
36 |     val triples = spark.rdf(lang)(input)
37 |       .filter(_.getPredicate.getURI != "http://swrc.ontoware.org/ontology#employs")
38 | 
39 |     val rdfFastGraphKernel = RDFFastGraphKernel(spark, triples, "http://swrc.ontoware.org/ontology#affiliation")
40 |     val data = rdfFastGraphKernel.getMLLibLabeledPoints
41 | 
42 |     val t1 = System.nanoTime
43 |     RDFFastTreeGraphKernelUtil.printTime("Initialization", t0, t1)
44 | 
45 |     RDFFastTreeGraphKernelUtil.predictLogisticRegressionMLLIB(data, 4, iteration)
46 | 
47 |     val t2 = System.nanoTime
48 |     RDFFastTreeGraphKernelUtil.printTime("Run Prediction", t1, t2)
49 | 
50 |   }
51 | 
52 |   case class Config(
53 |     in: String = "",
54 |     iteration: Int = 5)
55 | 
56 |   val parser = new scopt.OptionParser[Config]("Mines the Rules example") {
57 | 
58 |     head("Mines the Rules example")
59 | 
60 |     opt[String]('i', "input").required().valueName("<path>").
61 |       action((x, c) => c.copy(in = x)).
62 |       text("path to file that contains the data")
63 | 
64 |     opt[Int]('k', "iteration").required().valueName("<iteration>").
65 |       action((x, c) => c.copy(iteration = x)).
66 |       text("the iteration or folding on validation")
67 | 
68 |     help("help").text("prints this usage text")
69 |   }
70 | }
71 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/README.md:
--------------------------------------------------------------------------------
 1 | # SANSA-Examples on Apache Spark
 2 | This is a SANSA-Examples repo for Apache Spark.
 3 | 
 4 | ## Running the application on a Spark standalone cluster
 5 | 
 6 | To run the application on a standalone Spark cluster
 7 | 
 8 | 1. Setup a Spark cluster
 9 | 2. Build the application with Maven
10 | 
11 |   ```
12 |   git clone https://github.com/SANSA-Stack/SANSA-Examples.git
13 |   cd SANSA-Examples/sansa-examples-spark
14 | 
15 |   mvn clean package
16 | 
17 |   ```
18 | 
19 | 3. Submit the application to the Spark cluster
20 | 
21 |   ```
22 |   spark-submit \
23 | 		--class net.sansa_stack.examples.spark.<SANSA Layer>.<Example> \
24 | 		--master spark://spark-master:7077 \
25 |  		/app/application.jar \
26 | 		SPARK_APPLICATION_ARGUMENTS  
27 |   ```
28 | 
29 | ## Running the application on a Spark standalone cluster via Spark Docker using BDE Platform
30 | 
31 | To run the SANSA-Examples application on BDE platform, execute the following commands:
32 | 
33 | ```
34 |   git clone https://github.com/SANSA-Stack/SANSA-Examples.git
35 |   cd SANSA-Examples/sansa-examples-spark
36 | 
37 |   make --directory config/csswrapper/ hosts
38 |   
39 |   docker network create hadoop
40 | 
41 |   docker-compose up -d
42 | ```
43 | Note:To make it run, you may need to modify your /etc/hosts file. There is a Makefile, which will do it automatically for you (you should clean up your /etc/hosts after demo).
44 | 
45 | After BDE platform is up and running, let’s throw some data into our HDFS now by using Hue FileBrowser runing in our network. To perform these actions navigate to 'hue' tab into http://demo.sansa-stack.local. Use “hue” username with any password to login into the FileBrowser (“hue” user is set up as a proxy user for HDFS, see hadoop.env for the configuration parameters). Click on “File Browser” in upper right corner of the screen and use GUI to create /user/root/input and /user/root/output folders and upload the data file into /input folder.
46 | Go to HDFS tab into http://demo.sansa-stack.local and check if the file exists under the path ‘/user/root/input/yourfile’.
47 | 
48 | After we have all the configuration needed for our example, let’s run our sansa-examples.
49 | 
50 | ```
51 | docker build --rm=true -t sansa/sansa-examples-spark .
52 | ```
53 | And then just run this image:
54 | ```
55 | docker run --name sansa-examples-spark-app --net hadoop --link spark-master:spark-master \
56 | -e ENABLE_INIT_DAEMON=false \
57 | -d sansa/sansa-examples-spark
58 | 
59 | ```
60 | 
61 | 


--------------------------------------------------------------------------------
/sansa-examples-flink/README.md:
--------------------------------------------------------------------------------
 1 | # SANSA-Examples on Apache Flink
 2 | This is a SANSA-Examples repo for Apache Flink.
 3 | 
 4 | ## Running the application on a Flink standalone cluster
 5 | 
 6 | To run the application on a standalone Flink cluster
 7 | 
 8 | 1. Setup a Flink cluster
 9 | 2. Build the application with Maven
10 | 
11 |   ```
12 |   git clone https://github.com/SANSA-Stack/SANSA-Examples.git
13 |   cd SANSA-Examples/sansa-examples-flink
14 | 
15 |   mvn clean package
16 | 
17 |   ```
18 | 
19 | 3. Submit the application to the Flink cluster
20 | 
21 |   ```
22 | cd /path/to/flink/installation
23 | ./bin/flink run -c \
24 | 		net.sansa_stack.examples.flink.<SANSA Layer>.<Example> \
25 |  		/app/application.jar \
26 | 		FLINK_APPLICATION_ARGUMENTS  
27 |   ```
28 | 
29 | ## Running the application on a Flink standalone cluster via Flink Docker using BDE Platform
30 | 
31 | To run the SANSA-Examples application on BDE platform, execute the following commands:
32 | 
33 | ```
34 |   git clone https://github.com/SANSA-Stack/SANSA-Examples.git
35 |   cd SANSA-Examples/sansa-examples-flink
36 | 
37 |   make --directory config/csswrapper/ hosts
38 | 
39 |   docker network create hadoop
40 | 
41 |   docker-compose up -d
42 | ```
43 | Note:To make it run, you may need to modify your /etc/hosts file. There is a Makefile, which will do it automatically for you (you should clean up your /etc/hosts after demo).
44 | 
45 | After BDE platform is up and running, let’s throw some data into our HDFS now by using Hue FileBrowser runing in our network. To perform these actions navigate to 'hue' tab into http://demo.sansa-stack.local. Use “hue” username with any password to login into the FileBrowser (“hue” user is set up as a proxy user for HDFS, see hadoop.env for the configuration parameters). Click on “File Browser” in upper right corner of the screen and use GUI to create /user/root/input and /user/root/output folders and upload the data file into /input folder.
46 | Go to HDFS tab into http://demo.sansa-stack.local and check if the file exists under the path ‘/user/root/input/yourfile’.
47 | 
48 | After we have all the configuration needed for our example, let’s run our sansa-examples.
49 | 
50 | ```
51 | docker build --rm=true -t sansa/sansa-examples-flink .
52 | ```
53 | And then just run this image:
54 | ```
55 | docker run --name flink-entityrank-app --net hadoop --link flink-master:flink-master \
56 | -e ENABLE_INIT_DAEMON=false \
57 | -e FLINK_MASTER_PORT_6123_TCP_ADDR=flink-master \
58 | -e FLINK_MASTER_PORT_6123_TCP_PORT=6123 \
59 | -d sansa/sansa-examples-flink
60 | ```
61 | 
62 | 


--------------------------------------------------------------------------------
/sansa-examples-flink/src/main/scala/net/sansa_stack/examples/flink/rdf/TripleOps.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.flink.rdf
 2 | 
 3 | import scala.collection.mutable
 4 | 
 5 | import net.sansa_stack.rdf.flink.io._
 6 | import net.sansa_stack.rdf.flink.model._
 7 | import org.apache.flink.api.scala._
 8 | import org.apache.flink.api.scala.ExecutionEnvironment
 9 | import org.apache.jena.graph.{Node, NodeFactory}
10 | import org.apache.jena.riot.Lang
11 | 
12 | object TripleOps {
13 |   def main(args: Array[String]) {
14 |     parser.parse(args, Config()) match {
15 |       case Some(config) =>
16 |         run(config.in)
17 |       case None =>
18 |         println(parser.usage)
19 |     }
20 |   }
21 | 
22 |   def run(input: String): Unit = {
23 | 
24 |     println("======================================")
25 |     println("|        Triple Ops example       |")
26 |     println("======================================")
27 |     val env = ExecutionEnvironment.getExecutionEnvironment
28 | 
29 |     val triples = env.rdf(Lang.NTRIPLES)(input)
30 | 
31 |     triples.getTriples().collect().take(4).foreach(println(_))
32 |     // Triples filtered by subject ( "http://dbpedia.org/resource/Charles_Dickens" )
33 |     println("All triples related to Dickens:\n" + triples.find(Some(NodeFactory.createURI("http://commons.dbpedia.org/resource/Category:Places")), None, None).collect().mkString("\n"))
34 | 
35 |     // Triples filtered by predicate ( "http://dbpedia.org/ontology/influenced" )
36 |     println("All triples for predicate influenced:\n" + triples.find(None, Some(NodeFactory.createURI("http://dbpedia.org/ontology/influenced")), None).collect().mkString("\n"))
37 | 
38 |     // Triples filtered by object ( <http://dbpedia.org/resource/Henry_James> )
39 |     println("All triples influenced by Henry_James:\n" + triples.find(None, None, Some(NodeFactory.createURI("<http://dbpedia.org/resource/Henry_James>"))).collect().mkString("\n"))
40 | 
41 |     // println("Number of triples: " + rdfgraph.triples.distinct.count())
42 |     println("Number of subjects: " + triples.getSubjects.map(_.toString).distinct().count)
43 |     println("Number of predicates: " + triples.getPredicates.map(_.toString).distinct.count())
44 |     println("Number of objects: " + triples.getPredicates.map(_.toString).distinct.count())
45 | 
46 |   }
47 |   case class Config(in: String = "")
48 | 
49 |   val parser = new scopt.OptionParser[Config]("Triple Ops example") {
50 | 
51 |     head(" Triple Ops example")
52 | 
53 |     opt[String]('i', "input").required().valueName("<path>").
54 |       action((x, c) => c.copy(in = x)).
55 |       text("path to file that contains the data (in N-Triples format)")
56 |     help("help").text("prints this usage text")
57 |   }
58 | 
59 | }
60 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/clustering/RDFGraphPIClustering.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.spark.ml.clustering
 2 | 
 3 | import scala.collection.mutable
 4 | 
 5 | import net.sansa_stack.ml.spark.clustering._
 6 | import net.sansa_stack.ml.spark.clustering.algorithms.RDFGraphPowerIterationClustering
 7 | import net.sansa_stack.rdf.spark.io._
 8 | import net.sansa_stack.rdf.spark.model._
 9 | import org.apache.jena.riot.{ Lang, RDFDataMgr }
10 | import org.apache.log4j.{ Level, Logger }
11 | import org.apache.spark.sql.SparkSession
12 | 
13 | object RDFGraphPIClustering {
14 | 
15 |   def main(args: Array[String]) {
16 |     parser.parse(args, Config()) match {
17 |       case Some(config) =>
18 |         run(config.in, config.out, config.k, config.maxIterations)
19 |       case None =>
20 |         println(parser.usage)
21 |     }
22 |   }
23 | 
24 |   def run(input: String, output: String, k: Int, maxIterations: Int): Unit = {
25 | 
26 |     val spark = SparkSession.builder
27 |       .appName(s"Power Iteration Clustering example ( $input )")
28 |       .master("local[*]")
29 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
30 |       .getOrCreate()
31 |     System.setProperty("spark.akka.frameSize", "2000")
32 | 
33 |     println("============================================")
34 |     println("| Power Iteration Clustering   example     |")
35 |     println("============================================")
36 | 
37 |     val lang = Lang.NTRIPLES
38 |     val triples = spark.rdf(lang)(input)
39 | 
40 |     val cluster = triples.cluster(ClusteringAlgorithm.RDFGraphPowerIterationClustering).asInstanceOf[RDFGraphPowerIterationClustering]
41 |       .setK(k).setMaxIterations(maxIterations).run()
42 | 
43 |     cluster.collect.foreach(println)
44 | 
45 |     spark.stop
46 | 
47 |   }
48 | 
49 |   case class Config(in: String = "", out: String = "", k: Int = 2, maxIterations: Int = 5)
50 | 
51 |   val defaultParams = Config()
52 | 
53 |   val parser = new scopt.OptionParser[Config]("RDFGraphPIClustering") {
54 | 
55 |     head("PowerIterationClusteringExample: an example PIC app using concentric circles.")
56 | 
57 |     opt[String]('i', "input").required().valueName("<path>")
58 |       .text(s"path (local/hdfs) to file that contains the input files (in N-Triple format)")
59 |       .action((x, c) => c.copy(in = x))
60 | 
61 |     opt[String]('o', "out").required().valueName("<directory>").
62 |       action((x, c) => c.copy(out = x)).
63 |       text("the output directory")
64 | 
65 |     opt[Int]('k', "k")
66 |       .text(s"number of circles (/clusters), default: ${defaultParams.k}")
67 |       .action((x, c) => c.copy(k = x))
68 | 
69 |     opt[Int]("maxIterations")
70 |       .text(s"number of iterations, default: ${defaultParams.maxIterations}")
71 |       .action((x, c) => c.copy(maxIterations = x))
72 | 
73 |     help("help").text("prints this usage text")
74 |   }
75 | }
76 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/mining/MineRules.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.spark.ml.mining
 2 | 
 3 | import scala.collection.mutable
 4 | 
 5 | import net.sansa_stack.ml.spark.mining.amieSpark.{ DfLoader, RDFGraphLoader }
 6 | import net.sansa_stack.ml.spark.mining.amieSpark.KBObject.KB
 7 | import net.sansa_stack.ml.spark.mining.amieSpark.MineRules.Algorithm
 8 | import org.apache.spark.sql.SparkSession
 9 | 
10 | /*
11 |  * Mine Rules
12 |  *
13 |  */
14 | object MineRules {
15 | 
16 |   def main(args: Array[String]) {
17 |     parser.parse(args, Config()) match {
18 |       case Some(config) =>
19 |         run(config.in, config.out)
20 |       case None =>
21 |         println(parser.usage)
22 |     }
23 |   }
24 | 
25 |   def run(input: String, outputPath: String): Unit = {
26 | 
27 |     println("======================================")
28 |     println("|        Mines the Rules example     |")
29 |     println("======================================")
30 | 
31 |     val spark = SparkSession.builder
32 |       .appName(s" Mines the Rules example ( $input )")
33 |       .master("local[*]")
34 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
35 |       .getOrCreate()
36 | 
37 |     val hdfsPath = outputPath + "/"
38 | 
39 |     val know = new KB()
40 |     know.sethdfsPath(hdfsPath)
41 |     know.setKbSrc(input)
42 | 
43 |     know.setKbGraph(RDFGraphLoader.loadFromFile(input, spark.sparkContext, 2))
44 |     know.setDFTable(DfLoader.loadFromFileDF(know.getKbSrc, spark.sparkContext, spark.sqlContext, 2))
45 | 
46 |     val algo = new Algorithm(know, 0.01, 3, 0.1, hdfsPath)
47 | 
48 |     // var erg = algo.ruleMining(sparkSession.sparkContext, sparkSession.sqlContext)
49 |     // println(erg)
50 |     var output = algo.ruleMining(spark.sparkContext, spark.sqlContext)
51 | 
52 |     var outString = output.map { x =>
53 |       var rdfTrp = x.getRule()
54 |       var temp = ""
55 |       for (i <- 0 to rdfTrp.length - 1) {
56 |         if (i == 0) {
57 |           temp = rdfTrp(i) + " <= "
58 |         } else {
59 |           temp += rdfTrp(i) + """ \u2227 """
60 |         }
61 |       }
62 |       temp = temp.stripSuffix(" \u2227 ")
63 |       temp
64 |     }.toSeq
65 |     var rddOut = spark.sparkContext.parallelize(outString).repartition(1)
66 | 
67 |     rddOut.saveAsTextFile(outputPath + "/testOut")
68 |   }
69 | 
70 |   case class Config(
71 |     in: String = "",
72 |     out: String = "")
73 | 
74 |   val parser = new scopt.OptionParser[Config]("Mines the Rules example") {
75 | 
76 |     head("Mines the Rules example")
77 | 
78 |     opt[String]('i', "input").required().valueName("<path>").
79 |       action((x, c) => c.copy(in = x)).
80 |       text("path to file that contains the data")
81 | 
82 |     opt[String]('o', "out").required().valueName("<directory>").
83 |       action((x, c) => c.copy(out = x)).
84 |       text("the output directory")
85 | 
86 |     help("help").text("prints this usage text")
87 |   }
88 | }
89 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/rdf/TripleOps.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.spark.rdf
 2 | 
 3 | import scala.collection.mutable
 4 | 
 5 | import net.sansa_stack.rdf.spark.io._
 6 | import net.sansa_stack.rdf.spark.model._
 7 | import org.apache.jena.graph.NodeFactory
 8 | import org.apache.jena.riot.Lang
 9 | import org.apache.spark.sql.SparkSession
10 | 
11 | object TripleOps {
12 | 
13 |   def main(args: Array[String]) {
14 |     parser.parse(args, Config()) match {
15 |       case Some(config) =>
16 |         run(config.in)
17 |       case None =>
18 |         println(parser.usage)
19 |     }
20 |   }
21 | 
22 |   def run(input: String): Unit = {
23 | 
24 |     val spark = SparkSession.builder
25 |       .appName(s"Triple Ops example  $input")
26 |       .master("local[*]")
27 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
28 |       .getOrCreate()
29 | 
30 |     println("======================================")
31 |     println("|        Triple Ops example       |")
32 |     println("======================================")
33 | 
34 |     val lang = Lang.NTRIPLES
35 |     val triples = spark.rdf(lang)(input)
36 | 
37 |     // Triples filtered by subject ( "http://dbpedia.org/resource/Charles_Dickens" )
38 |     println("All triples related to Dickens:\n" + triples.find(Some(NodeFactory.createURI("http://dbpedia.org/resource/Charles_Dickens")), None, None).collect().mkString("\n"))
39 | 
40 |     // Triples filtered by predicate ( "http://dbpedia.org/ontology/influenced" )
41 |     println("All triples for predicate influenced:\n" + triples.find(None, Some(NodeFactory.createURI("http://dbpedia.org/ontology/influenced")), None).collect().mkString("\n"))
42 | 
43 |     // Triples filtered by object ( <http://dbpedia.org/resource/Henry_James> )
44 |     println("All triples influenced by Henry_James:\n" + triples.find(None, None, Some(NodeFactory.createURI("http://dbpedia.org/resource/Henry_James"))).collect().mkString("\n"))
45 | 
46 |     println("Number of triples: " + triples.distinct.count())
47 |     println("Number of subjects: " + triples.getSubjects.distinct.count())
48 |     println("Number of predicates: " + triples.getPredicates.distinct.count())
49 |     println("Number of objects: " + triples.getObjects.distinct.count())
50 | 
51 |     val subjects = triples.filterSubjects(_.isURI()).collect.mkString("\n")
52 | 
53 |     val predicates = triples.filterPredicates(_.isVariable()).collect.mkString("\n")
54 |     val objects = triples.filterObjects(_.isLiteral()).collect.mkString("\n")
55 | 
56 |     // graph.getTriples.take(5).foreach(println(_))
57 | 
58 |     spark.stop
59 | 
60 |   }
61 |   // the config object
62 |   case class Config(in: String = "")
63 | 
64 |   // the CLI parser
65 |   val parser = new scopt.OptionParser[Config]("Triple Ops example") {
66 | 
67 |     head(" Triple Ops example")
68 | 
69 |     opt[String]('i', "input").required().valueName("<path>").
70 |       action((x, c) => c.copy(in = x)).
71 |       text("path to file that contains the data (in N-Triples format)")
72 |     help("help").text("prints this usage text")
73 |   }
74 | }
75 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/inference/axioms/RDFGraphInference.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.spark.inference.axioms
 2 | 
 3 | import net.sansa_stack.inference.rules.ReasoningProfile
 4 | import net.sansa_stack.inference.rules.ReasoningProfile._
 5 | import net.sansa_stack.inference.spark.forwardchaining.axioms.{ForwardRuleReasonerOWLHorst, ForwardRuleReasonerRDFS}
 6 | import net.sansa_stack.owl.spark.owl._
 7 | import org.apache.spark.sql.SparkSession
 8 | 
 9 | object RDFGraphInference {
10 | 
11 |   def main(args: Array[String]) {
12 |     parser.parse(args, Config()) match {
13 |       case Some(config) =>
14 |         run(config.in, config.profile, config.parallelism)
15 |       case None =>
16 |         println(parser.usage)
17 |     }
18 |   }
19 | 
20 |   def run(input: String, profile: ReasoningProfile, parallelism: Int): Unit = {
21 | 
22 |     // the SPARK config
23 |     val spark = SparkSession.builder
24 |       .appName(s"SPARK $profile Reasoning")
25 |       .master("local[*]")
26 |       .config("spark.hadoop.validateOutputSpecs", "false") // override output files
27 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
28 |       .config("spark.default.parallelism", parallelism)
29 |       .config("spark.ui.showConsoleProgress", "false")
30 |       .config("spark.sql.shuffle.partitions", parallelism)
31 |       .getOrCreate()
32 | 
33 |     // load axioms from disk
34 |     var owlAxioms = spark.owl(Syntax.FUNCTIONAL)(input)
35 |     println(s"|G| = ${owlAxioms.count()}")
36 |     // create reasoner and compute inferred graph
37 |     val inferredGraph = profile match {
38 |       case RDFS => new ForwardRuleReasonerRDFS(spark.sparkContext, parallelism)(owlAxioms)
39 |       case OWL_HORST => new ForwardRuleReasonerOWLHorst(spark.sparkContext, parallelism)(owlAxioms)
40 |       case _ =>
41 |         throw new RuntimeException("Invalid profile: '" + profile + "'")
42 |     }
43 | 
44 |     println(s"|G_inf| = ${inferredGraph.count()}")
45 | 
46 |     spark.stop()
47 |   }
48 | 
49 |   case class Config(
50 |                      in: String = "",
51 |                      profile: ReasoningProfile = ReasoningProfile.RDFS,
52 |                      parallelism: Int = 4)
53 | 
54 |   // read ReasoningProfile enum
55 |   implicit val profilesRead: scopt.Read[ReasoningProfile.Value] =
56 |     scopt.Read.reads(ReasoningProfile forName _.toLowerCase())
57 | 
58 |   // the CLI parser
59 |   val parser = new scopt.OptionParser[Config]("RDFGraphMaterializer") {
60 | 
61 |     head("RDFGraphMaterializer (axioms)", "0.5.0")
62 | 
63 |     opt[String]('i', "input").required().valueName("<path>").
64 |       action((x, c) => c.copy(in = x)).
65 |       text("path to file or directory that contains the input files")
66 | 
67 |     opt[ReasoningProfile]('p', "profile").required().valueName("{rdfs | owl-horst}").
68 |       action((x, c) => c.copy(profile = x)).
69 |       text("the reasoning profile")
70 | 
71 |     opt[Int]("parallelism").optional().action((x, c) =>
72 |       c.copy(parallelism = x)).text("the degree of parallelism, i.e. the number of Spark partitions used in the Spark operations")
73 | 
74 |     help("help").text("prints this usage text")
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/kge/CrossValidation.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.spark.ml.kge
 2 | 
 3 | import net.sansa_stack.ml.spark.kge.linkprediction.crossvalidation.{ kFold, Bootstrapping, Holdout }
 4 | import net.sansa_stack.rdf.spark.kge.convertor.ByIndex
 5 | import net.sansa_stack.rdf.spark.kge.triples.Triples
 6 | import org.apache.spark.sql._
 7 | 
 8 | object CrossValidation {
 9 | 
10 |   def main(args: Array[String]) {
11 |     parser.parse(args, Config()) match {
12 |       case Some(config) =>
13 |         run(config.in, config.technique, config.k)
14 |       case None =>
15 |         println(parser.usage)
16 |     }
17 |   }
18 | 
19 |   def run(input: String, technique: String, k: Int): Unit = {
20 | 
21 |     val spark = SparkSession.builder
22 |       .appName(s"Cross validation techniques example  $input")
23 |       .master("local[*]")
24 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
25 |       .getOrCreate()
26 | 
27 |     println("==============================================")
28 |     println("|Cross validation techniques example |")
29 |     println("==============================================")
30 | 
31 |     val data = new Triples(input, "\t", false, false, spark)
32 | 
33 |     // getting 10 distinct entities in (s,p,o) i.e. subjects + objects and printing them
34 |     data.getEntities().take(10).foreach(println)
35 | 
36 |     // getting 10 distinct predicates in (s,p,o) and printing them
37 |     data.getEntities().take(10).foreach(println)
38 | 
39 |     // converting the original data to indexData
40 |     val indexedData = new ByIndex(data.triples, spark)
41 |     val numericData = indexedData.numeric()
42 | 
43 |     // getting 10 distinct (s,p,o) in their numeric (indexed) form and print them
44 |     indexedData.numeric.take(10).foreach(println)
45 | 
46 |     val (train, test) = technique match {
47 |       case "holdout" => new Holdout(numericData, 0.6f).crossValidation()
48 |       case "bootstrapping" => new Bootstrapping(numericData).crossValidation()
49 |       case "kFold" => new kFold(numericData, k, spark).crossValidation()
50 |       case _ =>
51 |         throw new RuntimeException("'" + technique + "' - Not supported, yet.")
52 |     }
53 | 
54 |     println("<< DONE >>")
55 | 
56 |     spark.stop
57 | 
58 |   }
59 | 
60 |   case class Config(in: String = "", technique: String = "", k: Int = 0)
61 | 
62 |   val parser = new scopt.OptionParser[Config]("Cross validation techniques example") {
63 | 
64 |     head("Cross validation techniques example")
65 | 
66 |     opt[String]('i', "input").required().valueName("<path>").
67 |       action((x, c) => c.copy(in = x)).
68 |       text("path to file that contains the data")
69 | 
70 |     opt[String]('t', "technique").required().valueName("{holdout | bootstrapping | kFold}").
71 |       action((x, c) => c.copy(technique = x)).
72 |       text("cross validation techniques")
73 | 
74 |     opt[Int]("k").optional().valueName("<value>").
75 |       action((x, c) => {
76 |         c.copy(k = x)
77 |       }).
78 |       text("The k value (used only for technique'kFold')")
79 | 
80 |     checkConfig(c =>
81 |       if (c.technique == "kFold" && c.k == 0) failure("Option --k-Fold must not be empty if technique 'kFold' is set")
82 |       else success)
83 | 
84 |     help("help").text("prints this usage text")
85 |   }
86 | }
87 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/docker-compose.yml:
--------------------------------------------------------------------------------
  1 | version: '2' 
  2 | services:
  3 |  namenode:
  4 |     image: bde2020/hadoop-namenode:1.0.0
  5 |     hostname: namenode
  6 |     container_name: namenode
  7 |     networks:
  8 |       - hadoop
  9 |     volumes:
 10 |       - ./data/namenode:/hadoop/dfs/name
 11 |     environment:
 12 |       - CLUSTER_NAME=test
 13 |       - INIT_DAEMON_STEP=setup_hdfs
 14 |       - VIRTUAL_HOST=hdfs.demo.sansa-stack.local
 15 |     env_file:
 16 |       - ./config/hadoop/hadoop.env
 17 |     ports:
 18 |       - "50070:50070"
 19 |       - "8020:8020"
 20 |  datanode1:
 21 |     image: bde2020/hadoop-datanode:1.0.0
 22 |     hostname: datanode1
 23 |     container_name: datanode1
 24 |     networks:
 25 |       - hadoop
 26 |     volumes:
 27 |       - ./data/datanode1:/hadoop/dfs/data
 28 |     env_file:
 29 |       - ./config/hadoop/hadoop.env
 30 | 
 31 |  datanode2:
 32 |     image: bde2020/hadoop-datanode:1.0.0
 33 |     hostname: datanode2
 34 |     container_name: datanode2
 35 |     networks: 
 36 |       - hadoop
 37 |     volumes:
 38 |       - ./data/datanode2:/hadoop/dfs/data
 39 |     env_file:
 40 |       - ./config/hadoop/hadoop.env
 41 | 
 42 |  filebrowser:
 43 |     image: bde2020/hdfs-filebrowser:3.9
 44 |     hostname: filebrowser
 45 |     container_name: filebrowser
 46 |     networks:
 47 |       - hadoop
 48 |     environment:
 49 |       - NAMENODE_HOST=namenode
 50 |       - VIRTUAL_HOST=hue.demo.sansa-stack.local
 51 |       - VIRTUAL_PORT=8088
 52 | #    ports:
 53 | #      - "8088:8088"
 54 | 
 55 |  master:
 56 |    image: bde2020/spark-master:2.0.1-hadoop2.7
 57 |    hostname: spark-master
 58 |    container_name: spark-master
 59 |    networks:
 60 |     - hadoop
 61 |    environment:
 62 |       - CORE_CONF_fs_defaultFS=hdfs://namenode:8020
 63 |       - VIRTUAL_HOST=spark-master.demo.sansa-stack.local
 64 |       - VIRTUAL_PORT=8080
 65 |    env_file:
 66 |       - ./config/hadoop/hadoop.env
 67 | 
 68 |  worker:
 69 |    image: bde2020/spark-worker:2.0.1-hadoop2.7
 70 |    hostname: spark-worker
 71 |    container_name: spark-worker
 72 |    networks:
 73 |      - hadoop
 74 |    environment:
 75 |      - CORE_CONF_fs_defaultFS=hdfs://namenode:8020
 76 |      - VIRTUAL_HOST=spark-worker.demo.sansa-stack.local
 77 |      - VIRTUAL_PORT=8081
 78 |    env_file:
 79 |      - ./config/hadoop/hadoop.env
 80 |    links:
 81 |      - "master:spark-master"
 82 | 
 83 |  integratorui:
 84 |    image: bde2020/integrator-ui:latest
 85 | #   build: ./config/csswrapper/integrator-ui/
 86 |    hostname: integratorui
 87 |    container_name: integratorui
 88 |    networks:
 89 |      - hadoop
 90 |    volumes:
 91 |       - ./config/integrator:/app/config
 92 |    environment:
 93 |       - VIRTUAL_HOST=demo.sansa-stack.local
 94 | 
 95 |  csswrapper:
 96 |     build: ./config/csswrapper/
 97 | #    image: bde2020/nginx-proxy-with-css:latest
 98 |     hostname: csswrapper
 99 |     container_name: csswrapper
100 |     networks:
101 |       - hadoop
102 |     ports:
103 |       - 80:80
104 | #    volumes:
105 | #      -  /var/run/docker.sock:/tmp/docker.sock:ro
106 |     links:
107 |       - namenode:namenode
108 |       - filebrowser:filebrowser
109 |       - master:master
110 |       - worker:worker
111 |       - integratorui:integratorui
112 |     depends_on:
113 |       - namenode
114 |       - filebrowser 
115 |       - master
116 |       - worker
117 |       - integratorui
118 | 
119 | networks:
120 |   hadoop:
121 |     external: true
122 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/rdf/RDFQualityAssessment.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.examples.spark.rdf
 2 | 
 3 | import java.io.File
 4 | 
 5 | import scala.collection.mutable
 6 | 
 7 | import net.sansa_stack.rdf.spark.io._
 8 | import net.sansa_stack.rdf.spark.qualityassessment._
 9 | import org.apache.jena.riot.Lang
10 | import org.apache.spark.sql.SparkSession
11 | 
12 | object RDFQualityAssessment {
13 | 
14 |   def main(args: Array[String]) {
15 |     parser.parse(args, Config()) match {
16 |       case Some(config) =>
17 |         run(config.in, config.out)
18 |       case None =>
19 |         println(parser.usage)
20 |     }
21 |   }
22 | 
23 |   def run(input: String, output: String): Unit = {
24 | 
25 |     val rdf_quality_file = new File(input).getName
26 | 
27 |     val spark = SparkSession.builder
28 |       .appName(s"RDF Quality Assessment Example $rdf_quality_file")
29 |       .master("local[*]")
30 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
31 |       .getOrCreate()
32 | 
33 |     println("======================================")
34 |     println("| RDF Quality Assessment Example     |")
35 |     println("======================================")
36 | 
37 |     val lang = Lang.NTRIPLES
38 |     val triples = spark.rdf(lang)(input)
39 | 
40 |     // compute  quality assessment
41 |     val completeness_schema = triples.assessSchemaCompleteness()
42 |     val completeness_interlinking = triples.assessInterlinkingCompleteness()
43 |     val completeness_property = triples.assessPropertyCompleteness()
44 | 
45 |     val syntacticvalidity_literalnumeric = triples.assessLiteralNumericRangeChecker()
46 |     val syntacticvalidity_XSDDatatypeCompatibleLiterals = triples.assessXSDDatatypeCompatibleLiterals()
47 | 
48 |     val availability_DereferenceableUris = triples.assessDereferenceableUris()
49 | 
50 |     val relevancy_CoverageDetail = triples.assessCoverageDetail()
51 |     val relevancy_CoverageScope = triples.assessCoverageScope()
52 |     val relevancy_AmountOfTriples = triples.assessAmountOfTriples()
53 | 
54 |     val performance_NoHashURIs = triples.assessNoHashUris()
55 |     val understandability_LabeledResources = triples.assessLabeledResources()
56 | 
57 |     val AssessQualityStr = s"""
58 |       completeness_schema:$completeness_schema
59 |       completeness_interlinking:$completeness_interlinking
60 |       completeness_property:$completeness_property
61 |       syntacticvalidity_literalnumeric:$syntacticvalidity_literalnumeric
62 |       syntacticvalidity_XSDDatatypeCompatibleLiterals:$syntacticvalidity_XSDDatatypeCompatibleLiterals
63 |       availability_DereferenceableUris:$availability_DereferenceableUris
64 |       relevancy_CoverageDetail:$relevancy_CoverageDetail
65 |       relevancy_CoverageScope:$relevancy_CoverageScope
66 |       relevancy_AmountOfTriples:$relevancy_AmountOfTriples
67 |       performance_NoHashURIs:$performance_NoHashURIs
68 |       understandability_LabeledResources:$understandability_LabeledResources
69 |       """
70 | 
71 |     println(s"\n AssessQuality for $rdf_quality_file :\n $AssessQualityStr")
72 |   }
73 | 
74 |   case class Config(
75 |     in: String = "",
76 |     out: String = "")
77 | 
78 |   val parser = new scopt.OptionParser[Config]("RDF Quality Assessment Example") {
79 | 
80 |     head("RDF Quality Assessment Example")
81 | 
82 |     opt[String]('i', "input").required().valueName("<path>").
83 |       action((x, c) => c.copy(in = x)).
84 |       text("path to file that contains the data (in N-Triples format)")
85 | 
86 |     opt[String]('o', "out").required().valueName("<directory>").
87 |       action((x, c) => c.copy(out = x)).
88 |       text("the output directory")
89 | 
90 |     help("help").text("prints this usage text")
91 |   }
92 | }
93 | 


--------------------------------------------------------------------------------
/sansa-examples-flink/docker-compose.yml:
--------------------------------------------------------------------------------
  1 | version: '2' 
  2 | services:
  3 |  namenode:
  4 |     image: bde2020/hadoop-namenode:1.0.0
  5 |     hostname: namenode
  6 |     container_name: namenode
  7 |     domainname: hadoop
  8 |     networks:
  9 |       - hadoop
 10 |     volumes:
 11 |       - ./data/namenode:/hadoop/dfs/name
 12 |     environment:
 13 |       - CLUSTER_NAME=test
 14 |       - VIRTUAL_HOST=hdfs.demo.sansa-stack.local
 15 |     env_file:
 16 |       - ./config/hadoop/hadoop.env
 17 |     ports:
 18 |       - "50070:50070"
 19 |       - "8020:8020"
 20 |  datanode1:
 21 |     image: bde2020/hadoop-datanode:1.0.0
 22 |     hostname: datanode1
 23 |     container_name: datanode1
 24 |     domainname: hadoop
 25 |     networks:
 26 |       - hadoop
 27 |     volumes:
 28 |       - ./data/datanode1:/hadoop/dfs/data
 29 |     env_file:
 30 |       - ./config/hadoop/hadoop.env
 31 | 
 32 |  datanode2:
 33 |     image: bde2020/hadoop-datanode:1.0.0
 34 |     hostname: datanode2
 35 |     container_name: datanode2
 36 |     domainname: hadoop
 37 |     networks: 
 38 |       - hadoop
 39 |     volumes:
 40 |       - ./data/datanode2:/hadoop/dfs/data
 41 |     env_file:
 42 |       - ./config/hadoop/hadoop.env
 43 | 
 44 |  filebrowser:
 45 |     image: bde2020/hdfs-filebrowser:3.9
 46 |     hostname: filebrowser
 47 |     container_name: filebrowser
 48 |     domainname: hadoop
 49 |     networks:
 50 |       - hadoop
 51 |     environment:
 52 |       - NAMENODE_HOST=namenode
 53 |       - VIRTUAL_HOST=hue.demo.sansa-stack.local
 54 |       - VIRTUAL_PORT=8088
 55 | #    ports:
 56 | #      - "8088:8088"
 57 | 
 58 |  flink-master:
 59 |    image: bde2020/flink-master:1.1.3-hadoop2.7
 60 |    hostname: flink-master
 61 |    container_name: flink-master
 62 |    domainname: hadoop
 63 |    networks:
 64 |      - hadoop
 65 |    environment:
 66 |       - CORE_CONF_fs_defaultFS=hdfs://namenode:8020
 67 |       - YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
 68 |       - VIRTUAL_HOST=flink-master.demo.sansa-stack.local
 69 |       - VIRTUAL_PORT=8080
 70 |    env_file:
 71 |       - ./config/hadoop/hadoop.env
 72 |    ports:
 73 |      - "8080:8080"
 74 |      - "8081:8081"
 75 | 
 76 |  flink-worker:
 77 |    image: bde2020/flink-worker:1.1.3-hadoop2.7
 78 |    hostname: flink-worker
 79 |    container_name: flink-worker
 80 |    domainname: hadoop
 81 |    networks: 
 82 |      - hadoop
 83 |    environment:
 84 |      - CORE_CONF_fs_defaultFS=hdfs://namenode:8020
 85 |      - YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
 86 |      - FLINK_MASTER_PORT_6123_TCP_ADDR=flink-master 
 87 |      - VIRTUAL_HOST=flink-worker.demo.sansa-stack.local
 88 |      - VIRTUAL_PORT=8081 
 89 |    env_file:
 90 |      - ./config/hadoop/hadoop.env
 91 |    links:
 92 |       - "flink-master"
 93 | 
 94 |  integratorui:
 95 |    image: bde2020/integrator-ui:latest
 96 |    hostname: integratorui
 97 |    container_name: integratorui
 98 |    domainname: hadoop
 99 |    networks:
100 |      - hadoop
101 |    volumes:
102 |       - ./config/integrator:/app/config
103 |    environment:
104 |       - VIRTUAL_HOST=demo.sansa-stack.local
105 | 
106 |  csswrapper:
107 | #    image: gezim/flink-starter-integrator-css-wrapper
108 |     build: ./config/csswrapper/
109 |     hostname: csswrapper
110 |     container_name: csswrapper
111 |     domainname: hadoop
112 |     networks:
113 |       - hadoop
114 |     ports:
115 |       - 80:80
116 |     links:
117 |       - namenode:namenode
118 |       - filebrowser:filebrowser
119 |       - flink-master:flink-master
120 |       - flink-worker:flink-worker
121 |       - integratorui:integratorui
122 |     depends_on:
123 |       - namenode
124 |       - filebrowser
125 |       - flink-master
126 |       - flink-worker
127 |       - integratorui
128 | 
129 | networks:
130 |   hadoop:
131 |     external: true
132 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/query/Sparklify.scala:
--------------------------------------------------------------------------------
  1 | package net.sansa_stack.examples.spark.query
  2 | 
  3 | import java.awt.Desktop
  4 | import java.net.URI
  5 | 
  6 | import net.sansa_stack.query.spark.sparqlify.{QueryExecutionFactorySparqlifySpark, SparqlifyUtils3}
  7 | import net.sansa_stack.rdf.spark.io._
  8 | import net.sansa_stack.rdf.spark.partition.core.RdfPartitionUtilsSpark
  9 | import org.aksw.jena_sparql_api.server.utils.FactoryBeanSparqlServer
 10 | import org.apache.jena.riot.Lang
 11 | import org.apache.spark.sql.SparkSession
 12 | 
 13 | /**
 14 |   * Run SPARQL queries over Spark using Sparqlify approach.
 15 |   */
 16 | object Sparklify {
 17 | 
 18 |   def main(args: Array[String]) {
 19 |     parser.parse(args, Config()) match {
 20 |       case Some(config) =>
 21 |         run(config.in, config.sparql, config.run, config.port)
 22 |       case None =>
 23 |         println(parser.usage)
 24 |     }
 25 |   }
 26 | 
 27 |   def run(input: String, sparqlQuery: String = "", run: String = "cli", port: String = "7531"): Unit = {
 28 | 
 29 |     println("======================================")
 30 |     println("|   Sparklify example                |")
 31 |     println("======================================")
 32 | 
 33 |     val spark = SparkSession.builder
 34 |       .appName(s"Sparklify example ( $input )")
 35 |       .master("local[*]")
 36 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
 37 |       .config("spark.kryo.registrator", String.join(
 38 |         ", ",
 39 |         "net.sansa_stack.rdf.spark.io.JenaKryoRegistrator",
 40 |         "net.sansa_stack.query.spark.sparqlify.KryoRegistratorSparqlify"))
 41 |       .getOrCreate()
 42 | 
 43 |     val lang = Lang.NTRIPLES
 44 |     val graphRdd = spark.rdf(lang)(input)
 45 | 
 46 |     run match {
 47 |       case "cli" =>
 48 |         import net.sansa_stack.query.spark.query._
 49 |         // val sparqlQuery = "SELECT * WHERE {?s ?p ?o} LIMIT 10"
 50 |         val result = graphRdd.sparql(sparqlQuery)
 51 |         result.rdd.foreach(println)
 52 |       case _ =>
 53 |         val partitions = RdfPartitionUtilsSpark.partitionGraph(graphRdd)
 54 |         val rewriter = SparqlifyUtils3.createSparqlSqlRewriter(spark, partitions)
 55 | 
 56 |         val port = 7531
 57 | 
 58 |         val qef = new QueryExecutionFactorySparqlifySpark(spark, rewriter)
 59 |         val server = FactoryBeanSparqlServer.newInstance.setSparqlServiceFactory(qef).setPort(port).create()
 60 |         if (Desktop.isDesktopSupported) {
 61 |           Desktop.getDesktop.browse(URI.create("http://localhost:" + port + "/sparql"))
 62 |         }
 63 |         server.join()
 64 |     }
 65 | 
 66 |     spark.stop
 67 | 
 68 |   }
 69 | 
 70 |   case class Config(in: String = "", sparql: String = "SELECT * WHERE {?s ?p ?o} LIMIT 10", run: String = "cli", port: String = "7531")
 71 | 
 72 |   val parser = new scopt.OptionParser[Config]("Sparklify example") {
 73 | 
 74 |     head(" Sparqlify example")
 75 | 
 76 |     opt[String]('i', "input").required().valueName("<path>").
 77 |       action((x, c) => c.copy(in = x)).
 78 |       text("path to file that contains the data (in N-Triples format)")
 79 | 
 80 |     opt[String]('q', "sparql").optional().valueName("<query>").
 81 |       action((x, c) => c.copy(sparql = x)).
 82 |       text("a SPARQL query")
 83 | 
 84 |     opt[String]('r', "run").optional().valueName("Runner").
 85 |       action((x, c) => c.copy(run = x)).
 86 |       text("Runner method, default:'cli'")
 87 | 
 88 |     opt[String]('p', "port").optional().valueName("port").
 89 |       action((x, c) => c.copy(port = x)).
 90 |       text("port that SPARQL endpoint will be exposed, default:'7531'")
 91 | 
 92 |     checkConfig(c =>
 93 |       if (c.run == "cli" && c.sparql.isEmpty) failure("Option --sparql must not be empty if cli is enabled")
 94 |       else success)
 95 | 
 96 |     help("help").text("prints this usage text")
 97 |   }
 98 | 
 99 | }
100 | 


--------------------------------------------------------------------------------
/sansa-examples-flink/src/main/resources/ont_functional.owl:
--------------------------------------------------------------------------------
  1 | Prefix(:=<http://ex.com/default#>)
  2 | Prefix(foo:=<http://ex.com/foo#>)
  3 | Prefix(bar:=<http://ex.com/bar#>)
  4 | Prefix(xsd:=<http://www.w3.org/2001/XMLSchema#>)
  5 | 
  6 | Ontology(<http://ex.com/ont/sample1.owl>
  7 | <http://ex.com/ont/release/123/sample1.owl>
  8 | #Import(<http://www.example.com/my/2.0>)
  9 | # annotations
 10 | Annotation(foo:hasName "Name")
 11 |     Annotation(bar:hasTitle "Title")
 12 | Annotation(:description "A longer
 13 | description running over
 14 | several lines")
 15 | 
 16 | ## declarations
 17 | # class declarations
 18 | Declaration(Annotation(foo:ann "some annotation") Class(bar:Cls1))
 19 | Declaration(Class(bar:Cls2))
 20 | # datatype declarations
 21 | Declaration(Datatype(bar:dtype1))
 22 | Declaration(Datatype(bar:dtype2))
 23 | # object property declarations
 24 | Declaration(ObjectProperty(bar:objProp1))
 25 | Declaration(ObjectProperty(bar:objProp2))
 26 | # data property declarations
 27 | Declaration(DataProperty(bar:dataProp1))
 28 | Declaration(DataProperty(bar:dataProp2))
 29 | # annotation property declarations
 30 | Declaration(AnnotationProperty(bar:annProp1))
 31 | Declaration(AnnotationProperty(bar:annProp2))
 32 | # named individual declarations
 33 | Declaration(NamedIndividual(foo:indivA))
 34 | Declaration(NamedIndividual(foo:indivB))
 35 | 
 36 | ## annotation axioms
 37 | AnnotationAssertion(bar:label bar:Cls1 "Class 1")
 38 | SubAnnotationPropertyOf(bar:annProp1 bar:annProp2)
 39 | AnnotationPropertyDomain(bar:annProp1 bar:Cls1)
 40 | AnnotationPropertyRange(bar:annProp1 bar:Cls2)
 41 | 
 42 | ## class expressions  -- 20
 43 | EquivalentClasses(bar:IntersectionCls ObjectIntersectionOf(bar:Cls1 bar:Cls2))
 44 | EquivalentClasses(bar:UnionCls ObjectUnionOf(bar:Cls1 bar:Cls2))
 45 | EquivalentClasses(bar:ComplementCls ObjectComplementOf(bar:Cls1))
 46 | EquivalentClasses(bar:AllIndividualsCls ObjectOneOf(foo:indivA foo:indivB))
 47 | EquivalentClasses(bar:SomeProp1Cls1 ObjectSomeValuesFrom(bar:objProp1 bar:Cls1))
 48 | EquivalentClasses(bar:AllProp1Cls1 ObjectAllValuesFrom(bar:objProp1 bar:Cls1))
 49 | EquivalentClasses(bar:HasValProp1IndivB ObjectHasValue(bar:objProp1 foo:indivB))
 50 | EquivalentClasses(bar:HasSelfProp1 ObjectHasSelf(bar:objProp1))
 51 | EquivalentClasses(bar:Min2Prop1Cls1 ObjectMinCardinality(2 bar:objProp1 bar:Cls1))
 52 | EquivalentClasses(bar:Max3Prop1Cls1 ObjectMaxCardinality(3 bar:objProp1 bar:Cls1))
 53 | EquivalentClasses(bar:Exact5Prop1Cls1 ObjectExactCardinality(5 bar:objProp1 bar:Cls1))
 54 | EquivalentClasses(bar:DataSomeIntLT20 DataSomeValuesFrom(bar:dataProp2 DatatypeRestriction(xsd:integer xsd:maxExclusive "20"^^xsd:integer)))
 55 | EquivalentClasses(bar:DataAllIntGT10 DataAllValuesFrom(bar:dataProp2 DatatypeRestriction(xsd:integer xsd:minInclusive "10"^^xsd:integer)))
 56 | EquivalentClasses(bar:DataHasVal5 DataHasValue(bar:dataProp2 "5"^^xsd:integer))
 57 | EquivalentClasses(bar:DataMin3Prop1 DataMinCardinality(3 bar:dataProp1))
 58 | EquivalentClasses(bar:DataMax2Prop1 DataMaxCardinality(2 bar:dataProp1))
 59 | EquivalentClasses(bar:DataExact5Prop1 DataExactCardinality(5 bar:dataProp1))
 60 | SubClassOf(bar:Cls1 bar:UnionCls)
 61 | DisjointClasses(bar:DataMin3Prop1 bar:DataMax2Prop1)
 62 | DisjointUnion(bar:Cl1OrNegate bar:Cls1 bar:ComplementCls1)
 63 | 
 64 | ## object property axioms  -- 40
 65 | EquivalentObjectProperties(bar:invObjProp1 ObjectInverseOf(bar:objProp1))
 66 | SubObjectPropertyOf(bar:subObjProp1 bar:objProp1)
 67 | EquivalentObjectProperties(bar:sameAsObjProp1 bar:objProp1)
 68 | DisjointObjectProperties(bar:objProp1 bar:objProp2)
 69 | InverseObjectProperties(bar:invObjProp1 bar:objProp1)
 70 | ObjectPropertyDomain(bar:objProp1 bar:Cls1)
 71 | ObjectPropertyRange(bar:objProp1 bar:AllIndividualsCls)
 72 | FunctionalObjectProperty(bar:objProp2)
 73 | InverseFunctionalObjectProperty(bar:invObjProp1)
 74 | ReflexiveObjectProperty(bar:objProp1)
 75 | IrreflexiveObjectProperty(bar:objProp2)
 76 | SymmetricObjectProperty(bar:objProp2)
 77 | AsymmetricObjectProperty(bar:asymmObjProp)
 78 | TransitiveObjectProperty(bar:objProp1)
 79 | 
 80 | ## data property axioms
 81 | DataPropertyRange(bar:dataProp1 xsd:string)
 82 | DataPropertyDomain(bar:dataProp1 bar:Cls1)
 83 | DataPropertyRange(bar:dataProp2 xsd:int)
 84 | DataPropertyDomain(bar:dataProp2 bar:Cls1)
 85 | SubDataPropertyOf(bar:subDataProp1 bar:dataProp1)
 86 | EquivalentDataProperties(bar:sameAsDataProp1 bar:dataProp1)
 87 | DisjointDataProperties(bar:dataProp1 bar:dataProp2)
 88 | FunctionalDataProperty(bar:dataProp1)
 89 | HasKey(bar:Cls1 () (bar:dataProp1))
 90 | 
 91 | ## assertions  -- 63
 92 | SameIndividual(foo:sameAsIndivA foo:indivA)
 93 | DifferentIndividuals(foo:indivA foo:indivB)
 94 | ClassAssertion(bar:Cls1 foo:indivA)
 95 | ObjectPropertyAssertion(bar:objProp1 foo:indivA foo:indivB)
 96 | NegativeObjectPropertyAssertion(bar:Prop2 foo:indivB foo:indivA)
 97 | DataPropertyAssertion(bar:dataProp1 foo:indivA "ABCD")
 98 | DataPropertyAssertion(bar:dataProp1 foo:indivB "BCDE")
 99 | NegativeDataPropertyAssertion(bar:dataProp2 foo:indivA "23"^^xsd:integer)
100 | )
101 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/ont_functional.owl:
--------------------------------------------------------------------------------
  1 | Prefix(:=<http://ex.com/default#>)
  2 | Prefix(foo:=<http://ex.com/foo#>)
  3 | Prefix(bar:=<http://ex.com/bar#>)
  4 | Prefix(xsd:=<http://www.w3.org/2001/XMLSchema#>)
  5 | 
  6 | Ontology(<http://ex.com/ont/sample1.owl>
  7 | <http://ex.com/ont/release/123/sample1.owl>
  8 | #Import(<http://www.example.com/my/2.0>)
  9 | # annotations
 10 | Annotation(foo:hasName "Name")
 11 |     Annotation(bar:hasTitle "Title")
 12 | Annotation(:description "A longer
 13 | description running over
 14 | several lines")
 15 | 
 16 | ## declarations
 17 | # class declarations
 18 | Declaration(Annotation(foo:ann "some annotation") Class(bar:Cls1))
 19 | Declaration(Class(bar:Cls2))
 20 | # datatype declarations
 21 | Declaration(Datatype(bar:dtype1))
 22 | Declaration(Datatype(bar:dtype2))
 23 | # object property declarations
 24 | Declaration(ObjectProperty(bar:objProp1))
 25 | Declaration(ObjectProperty(bar:objProp2))
 26 | # data property declarations
 27 | Declaration(DataProperty(bar:dataProp1))
 28 | Declaration(DataProperty(bar:dataProp2))
 29 | # annotation property declarations
 30 | Declaration(AnnotationProperty(bar:annProp1))
 31 | Declaration(AnnotationProperty(bar:annProp2))
 32 | # named individual declarations
 33 | Declaration(NamedIndividual(foo:indivA))
 34 | Declaration(NamedIndividual(foo:indivB))
 35 | 
 36 | ## annotation axioms
 37 | AnnotationAssertion(bar:label bar:Cls1 "Class 1")
 38 | SubAnnotationPropertyOf(bar:annProp1 bar:annProp2)
 39 | AnnotationPropertyDomain(bar:annProp1 bar:Cls1)
 40 | AnnotationPropertyRange(bar:annProp1 bar:Cls2)
 41 | 
 42 | ## class expressions  -- 20
 43 | EquivalentClasses(bar:IntersectionCls ObjectIntersectionOf(bar:Cls1 bar:Cls2))
 44 | EquivalentClasses(bar:UnionCls ObjectUnionOf(bar:Cls1 bar:Cls2))
 45 | EquivalentClasses(bar:ComplementCls ObjectComplementOf(bar:Cls1))
 46 | EquivalentClasses(bar:AllIndividualsCls ObjectOneOf(foo:indivA foo:indivB))
 47 | EquivalentClasses(bar:SomeProp1Cls1 ObjectSomeValuesFrom(bar:objProp1 bar:Cls1))
 48 | EquivalentClasses(bar:AllProp1Cls1 ObjectAllValuesFrom(bar:objProp1 bar:Cls1))
 49 | EquivalentClasses(bar:HasValProp1IndivB ObjectHasValue(bar:objProp1 foo:indivB))
 50 | EquivalentClasses(bar:HasSelfProp1 ObjectHasSelf(bar:objProp1))
 51 | EquivalentClasses(bar:Min2Prop1Cls1 ObjectMinCardinality(2 bar:objProp1 bar:Cls1))
 52 | EquivalentClasses(bar:Max3Prop1Cls1 ObjectMaxCardinality(3 bar:objProp1 bar:Cls1))
 53 | EquivalentClasses(bar:Exact5Prop1Cls1 ObjectExactCardinality(5 bar:objProp1 bar:Cls1))
 54 | EquivalentClasses(bar:DataSomeIntLT20 DataSomeValuesFrom(bar:dataProp2 DatatypeRestriction(xsd:integer xsd:maxExclusive "20"^^xsd:integer)))
 55 | EquivalentClasses(bar:DataAllIntGT10 DataAllValuesFrom(bar:dataProp2 DatatypeRestriction(xsd:integer xsd:minInclusive "10"^^xsd:integer)))
 56 | EquivalentClasses(bar:DataHasVal5 DataHasValue(bar:dataProp2 "5"^^xsd:integer))
 57 | EquivalentClasses(bar:DataMin3Prop1 DataMinCardinality(3 bar:dataProp1))
 58 | EquivalentClasses(bar:DataMax2Prop1 DataMaxCardinality(2 bar:dataProp1))
 59 | EquivalentClasses(bar:DataExact5Prop1 DataExactCardinality(5 bar:dataProp1))
 60 | SubClassOf(bar:Cls1 bar:UnionCls)
 61 | DisjointClasses(bar:DataMin3Prop1 bar:DataMax2Prop1)
 62 | DisjointUnion(bar:Cl1OrNegate bar:Cls1 bar:ComplementCls1)
 63 | 
 64 | ## object property axioms  -- 40
 65 | EquivalentObjectProperties(bar:invObjProp1 ObjectInverseOf(bar:objProp1))
 66 | SubObjectPropertyOf(bar:subObjProp1 bar:objProp1)
 67 | EquivalentObjectProperties(bar:sameAsObjProp1 bar:objProp1)
 68 | DisjointObjectProperties(bar:objProp1 bar:objProp2)
 69 | InverseObjectProperties(bar:invObjProp1 bar:objProp1)
 70 | ObjectPropertyDomain(bar:objProp1 bar:Cls1)
 71 | ObjectPropertyRange(bar:objProp1 bar:AllIndividualsCls)
 72 | FunctionalObjectProperty(bar:objProp2)
 73 | InverseFunctionalObjectProperty(bar:invObjProp1)
 74 | ReflexiveObjectProperty(bar:objProp1)
 75 | IrreflexiveObjectProperty(bar:objProp2)
 76 | SymmetricObjectProperty(bar:objProp2)
 77 | AsymmetricObjectProperty(bar:asymmObjProp)
 78 | TransitiveObjectProperty(bar:objProp1)
 79 | 
 80 | ## data property axioms
 81 | DataPropertyRange(bar:dataProp1 xsd:string)
 82 | DataPropertyDomain(bar:dataProp1 bar:Cls1)
 83 | DataPropertyRange(bar:dataProp2 xsd:int)
 84 | DataPropertyDomain(bar:dataProp2 bar:Cls1)
 85 | SubDataPropertyOf(bar:subDataProp1 bar:dataProp1)
 86 | EquivalentDataProperties(bar:sameAsDataProp1 bar:dataProp1)
 87 | DisjointDataProperties(bar:dataProp1 bar:dataProp2)
 88 | FunctionalDataProperty(bar:dataProp1)
 89 | HasKey(bar:Cls1 () (bar:dataProp1))
 90 | 
 91 | ## assertions  -- 63
 92 | SameIndividual(foo:sameAsIndivA foo:indivA)
 93 | DifferentIndividuals(foo:indivA foo:indivB)
 94 | ClassAssertion(bar:Cls1 foo:indivA)
 95 | ObjectPropertyAssertion(bar:objProp1 foo:indivA foo:indivB)
 96 | NegativeObjectPropertyAssertion(bar:Prop2 foo:indivB foo:indivA)
 97 | DataPropertyAssertion(bar:dataProp1 foo:indivA "ABCD")
 98 | DataPropertyAssertion(bar:dataProp1 foo:indivB "BCDE")
 99 | NegativeDataPropertyAssertion(bar:dataProp2 foo:indivA "23"^^xsd:integer)
100 | )
101 | 


--------------------------------------------------------------------------------
/sansa-examples-flink/config/csswrapper/frontend/default.conf:
--------------------------------------------------------------------------------
  1 | #
  2 | # Hadoop block
  3 | #
  4 | 
  5 | server {
  6 |     listen       80;
  7 |     server_name  hdfs.demo.sansa-stack.local
  8 | 
  9 |     root /data;
 10 |     gzip on;
 11 | 
 12 |     location / {
 13 |         proxy_pass http://127.0.0.1:8000;
 14 |         proxy_set_header Accept-Encoding "";
 15 |     }
 16 | 
 17 |     location /bde-css/ {
 18 |     }
 19 | }
 20 | 
 21 | server {
 22 |   listen 127.0.0.1:8000;
 23 |   location / {
 24 |       proxy_pass http://127.0.0.1:8001;
 25 |       sub_filter '</head>' '<link rel="stylesheet" type="text/css" href="/bde-css/materialize.min.css">
 26 |       <link rel="stylesheet" type="text/css" href="/bde-css/bde-hadoop.css"></head>';
 27 |       sub_filter_once on;
 28 |       proxy_set_header Accept-Encoding "";
 29 |   }
 30 | }
 31 | 
 32 | server {
 33 |   listen 127.0.0.1:8001;
 34 |   gunzip on;
 35 |   location / {
 36 |     proxy_pass http://namenode:50070;
 37 |     proxy_set_header Accept-Encoding gzip;
 38 |   }
 39 | }
 40 | 
 41 | 
 42 | #
 43 | # HDFS Hue
 44 | #
 45 | 
 46 | server {
 47 |     listen       80;
 48 |     server_name  hue.demo.sansa-stack.local
 49 |     root /data;
 50 |     gzip on;
 51 | 
 52 |     location / {
 53 |         proxy_pass http://127.0.0.1:8006;
 54 |         proxy_set_header Accept-Encoding "";
 55 |         proxy_hide_header X-Frame-Options;
 56 |     }
 57 | 
 58 |     location /bde-css/ {
 59 |     }
 60 | }
 61 | 
 62 | server {
 63 |   listen 127.0.0.1:8006;
 64 |   location / {
 65 |       proxy_pass http://127.0.0.1:8007;
 66 |       # sub_filter '</head>' '<link rel="stylesheet" type="text/css" href="/bde-css/materialize.min.css">
 67 |       # <link rel="stylesheet" type="text/css" href="/bde-css/bde-hadoop.css"></head>';
 68 |       # sub_filter_once on;
 69 |       sub_filter 'if (self == top){' 'if (true){';
 70 |       sub_filter_once on;
 71 |       proxy_set_header Accept-Encoding "";
 72 |   }
 73 | }
 74 | 
 75 | server {
 76 |   listen 127.0.0.1:8007;
 77 |   gunzip on;
 78 |   location / {
 79 |     proxy_pass http://filebrowser:8088;
 80 |     proxy_set_header Accept-Encoding gzip;
 81 |   }
 82 | }
 83 | 
 84 | #
 85 | # Flink Master
 86 | #
 87 | 
 88 | server {
 89 |     listen       80;
 90 |     server_name  flink-master.demo.sansa-stack.local
 91 | 
 92 |     root /data;
 93 |     gzip on;
 94 | 
 95 |     location / {
 96 |         proxy_pass http://127.0.0.1:8008;
 97 |         proxy_set_header Accept-Encoding "";
 98 |     }
 99 | 
100 |     location /bde-css/ {
101 |     }
102 | }
103 | 
104 | server {
105 |   listen 127.0.0.1:8008;
106 |   location / {
107 |       proxy_pass http://127.0.0.1:8009;
108 |       sub_filter '</head>' '<link rel="stylesheet" type="text/css" href="/bde-css/materialize.min.css">
109 |       <link rel="stylesheet" type="text/css" href="/bde-css/bde-flink.css"></head>';
110 |       sub_filter_once on;
111 |       proxy_set_header Accept-Encoding "";
112 |   }
113 | }
114 | 
115 | server {
116 |   listen 127.0.0.1:8009;
117 |   gunzip on;
118 |   location / {
119 |     proxy_pass http://flink-master:8081;
120 |     proxy_set_header Accept-Encoding gzip;
121 |   }
122 | }
123 | 
124 | #
125 | # Flink Worker
126 | #
127 | 
128 | server {
129 |     listen       80;
130 |     server_name  flink-worker.demo.sansa-stack.local
131 | 
132 |     root /data;
133 |     gzip on;
134 | 
135 |     location / {
136 |         proxy_pass http://127.0.0.1:8010;
137 |         proxy_set_header Accept-Encoding "";
138 |     }
139 | 
140 |     location /bde-css/ {
141 |     }
142 | }
143 | 
144 | server {
145 |   listen 127.0.0.1:8010;
146 |   location / {
147 |       proxy_pass http://127.0.0.1:8011;
148 |       # sub_filter '</head>' '<link rel="stylesheet" type="text/css" href="/bde-css/materialize.min.css">
149 |       # <link rel="stylesheet" type="text/css" href="/bde-css/bde-flink.css"></head>';
150 |       # sub_filter_once on;
151 |       proxy_set_header Accept-Encoding "";
152 |   }
153 | }
154 | 
155 | server {
156 |   listen 127.0.0.1:8011;
157 |   gunzip on;
158 |   location / {
159 |     proxy_pass http://flink-worker:8080;
160 |     proxy_set_header Accept-Encoding gzip;
161 |   }
162 | }
163 | 
164 | #
165 | # Integrator
166 | #
167 | 
168 | server {
169 |     listen       80;
170 |     server_name  demo.sansa-stack.local
171 | 
172 |     root /data;
173 |     gzip on;
174 | 
175 |     location / {
176 |         proxy_pass http://127.0.0.1:8012;
177 |         proxy_set_header Accept-Encoding "";
178 |     }
179 | 
180 |     location /bde-css/ {
181 |     }
182 | }
183 | 
184 | server {
185 |   listen 127.0.0.1:8012;
186 |   location / {
187 |       proxy_pass http://127.0.0.1:8013;
188 |       # sub_filter '</head>' '<link rel="stylesheet" type="text/css" href="/bde-css/materialize.min.css">
189 |       # <link rel="stylesheet" type="text/css" href="/bde-css/bde-flink.css"></head>';
190 |       # sub_filter_once on;
191 |       proxy_set_header Accept-Encoding "";
192 |   }
193 | }
194 | 
195 | server {
196 |   listen 127.0.0.1:8013;
197 |   gunzip on;
198 |   location / {
199 |     proxy_pass http://integratorui:80;
200 |     proxy_set_header Accept-Encoding gzip;
201 |   }
202 | }
203 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/config/csswrapper/frontend/default.conf:
--------------------------------------------------------------------------------
  1 | #
  2 | # Hadoop block
  3 | #
  4 | 
  5 | server {
  6 |     listen       80;
  7 |     server_name  hdfs.demo.sansa-stack.local
  8 | 
  9 |     root /data;
 10 |     gzip on;
 11 | 
 12 |     location / {
 13 |         proxy_pass http://127.0.0.1:8000;
 14 |         proxy_set_header Accept-Encoding "";
 15 |     }
 16 | 
 17 |     location /bde-css/ {
 18 |     }
 19 | }
 20 | 
 21 | server {
 22 |   listen 127.0.0.1:8000;
 23 |   location / {
 24 |       proxy_pass http://127.0.0.1:8001;
 25 |       sub_filter '</head>' '<link rel="stylesheet" type="text/css" href="/bde-css/materialize.min.css">
 26 |      <link rel="stylesheet" type="text/css" href="/bde-css/bde-hadoop.css"></head>';
 27 |       sub_filter_once on;
 28 |       proxy_set_header Accept-Encoding "";
 29 |   }
 30 | }
 31 | 
 32 | server {
 33 |   listen 127.0.0.1:8001;
 34 |   gunzip on;
 35 |   location / {
 36 |    proxy_pass http://namenode:50070;
 37 |    proxy_set_header Accept-Encoding gzip;
 38 |   }
 39 | }
 40 | 
 41 | 
 42 | #
 43 | # HDFS Hue
 44 | #
 45 | 
 46 | server {
 47 |     listen       80;
 48 |     server_name  hue.demo.sansa-stack.local
 49 |     root /data;
 50 |     gzip on;
 51 | 
 52 |     location / {
 53 |         proxy_pass http://127.0.0.1:8006;
 54 |         proxy_set_header Accept-Encoding "";
 55 |         proxy_hide_header X-Frame-Options;
 56 |     }
 57 | 
 58 |     location /bde-css/ {
 59 |     }
 60 | }
 61 | 
 62 | server {
 63 |   listen 127.0.0.1:8006;
 64 |   location / {
 65 |       proxy_pass http://127.0.0.1:8007;
 66 |       # sub_filter '</head>' '<link rel="stylesheet" type="text/css" href="/bde-css/materialize.min.css">
 67 |       # <link rel="stylesheet" type="text/css" href="/bde-css/bde-hadoop.css"></head>';
 68 |       # sub_filter_once on;
 69 |       sub_filter 'if (self == top){' 'if (true){';
 70 |       sub_filter_once on;
 71 |       proxy_set_header Accept-Encoding "";
 72 |   }
 73 | }
 74 | 
 75 | server {
 76 |   listen 127.0.0.1:8007;
 77 |   gunzip on;
 78 |   location / {
 79 |     proxy_pass http://filebrowser:8088;
 80 |     proxy_set_header Accept-Encoding gzip;
 81 |   }
 82 | }
 83 | 
 84 | #
 85 | # Spark Master
 86 | #
 87 | 
 88 | server {
 89 |     listen       80;
 90 |     server_name  spark-master.demo.sansa-stack.local
 91 | 
 92 |     root /data;
 93 |     gzip on;
 94 | 
 95 |     location / {
 96 |         proxy_pass http://127.0.0.1:8008;
 97 |         proxy_set_header Accept-Encoding "";
 98 |     }
 99 | 
100 |     location /bde-css/ {
101 |     }
102 | }
103 | 
104 | server {
105 |   listen 127.0.0.1:8008;
106 |   location / {
107 |       proxy_pass http://127.0.0.1:8009;
108 |      # sub_filter '</head>' '<link rel="stylesheet" type="text/css" href="/bde-css/materialize.min.css">
109 |      # <link rel="stylesheet" type="text/css" href="/bde-css/bde-spark-master.css"></head>';
110 |      # sub_filter_once on;
111 |       proxy_set_header Accept-Encoding "";
112 |   }
113 | }
114 | 
115 | server {
116 |   listen 127.0.0.1:8009;
117 |   gunzip on;
118 |   location / {
119 |     proxy_pass http://master:8080/home;
120 |     proxy_set_header Accept-Encoding gzip;
121 |   }
122 | }
123 | #
124 | # Spark Worker
125 | #
126 | 
127 | server {
128 |     listen       80;
129 |     server_name  spark-worker.demo.sansa-stack.local
130 | 
131 |     root /data;
132 |     gzip on;
133 | 
134 |     location / {
135 |         proxy_pass http://127.0.0.1:8010;
136 |         proxy_set_header Accept-Encoding "";
137 |     }
138 | 
139 |     location /bde-css/ {
140 |     }
141 | }
142 | 
143 | server {
144 |   listen 127.0.0.1:8010;
145 |   location / {
146 |       proxy_pass http://127.0.0.1:8011;
147 |       # sub_filter '</head>' '<link rel="stylesheet" type="text/css" href="/bde-css/materialize.min.css">
148 |       # <link rel="stylesheet" type="text/css" href="/bde-css/bde-spark-master.css"></head>';
149 |       # sub_filter_once on;
150 |       proxy_set_header Accept-Encoding "";
151 |   }
152 | }
153 | 
154 | server {
155 |   listen 127.0.0.1:8011;
156 |   gunzip on;
157 |   location / {
158 |     proxy_pass http://worker:8081;
159 |     proxy_set_header Accept-Encoding gzip;
160 |   }
161 | }
162 | 
163 | #
164 | # Integrator
165 | #
166 | 
167 | server {
168 |     listen       80;
169 |     server_name  demo.sansa-stack.local
170 | 
171 |     root /data;
172 |     gzip on;
173 | 
174 |     location / {
175 |         proxy_pass http://127.0.0.1:8012;
176 |         proxy_set_header Accept-Encoding "";
177 |     }
178 | 
179 |     location /bde-css/ {
180 |     }
181 | }
182 | 
183 | server {
184 |   listen 127.0.0.1:8012;
185 |   location / {
186 |       proxy_pass http://127.0.0.1:8013;
187 |       # sub_filter '</head>' '<link rel="stylesheet" type="text/css" href="/bde-css/materialize.min.css">
188 |       # <link rel="stylesheet" type="text/css" href="/bde-css/bde-flink.css"></head>';
189 |       # sub_filter_once on;
190 |       proxy_set_header Accept-Encoding "";
191 |   }
192 | }
193 | 
194 | server {
195 |   listen 127.0.0.1:8013;
196 |   gunzip on;
197 |   location / {
198 |     proxy_pass http://integratorui:80;
199 |     proxy_set_header Accept-Encoding gzip;
200 |   }
201 | }
202 | 
203 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/inference/triples/RDFGraphInference.scala:
--------------------------------------------------------------------------------
  1 | package net.sansa_stack.examples.spark.inference.triples
  2 | 
  3 | import java.net.URI
  4 | 
  5 | import scala.collection.Seq
  6 | 
  7 | import net.sansa_stack.inference.rules.{ RDFSLevel, ReasoningProfile }
  8 | import net.sansa_stack.inference.rules.ReasoningProfile._
  9 | import net.sansa_stack.inference.spark.data.loader.RDFGraphLoader
 10 | import net.sansa_stack.inference.spark.data.writer.RDFGraphWriter
 11 | import net.sansa_stack.inference.spark.forwardchaining.triples.{ForwardRuleReasonerOWLHorst, ForwardRuleReasonerRDFS, TransitiveReasoner}
 12 | import org.apache.jena.graph.{ Node, NodeFactory }
 13 | import org.apache.spark.sql.SparkSession
 14 | 
 15 | object RDFGraphInference {
 16 | 
 17 |   def main(args: Array[String]) {
 18 |     parser.parse(args, Config()) match {
 19 |       case Some(config) =>
 20 |         run(config.in, config.out, config.profile, config.properties, config.writeToSingleFile, config.sortedOutput, config.parallelism)
 21 |       case None =>
 22 |         println(parser.usage)
 23 |     }
 24 |   }
 25 | 
 26 |   def run(input: Seq[URI], output: URI, profile: ReasoningProfile, properties: Seq[Node] = Seq(),
 27 |           writeToSingleFile: Boolean, sortedOutput: Boolean, parallelism: Int): Unit = {
 28 | 
 29 |     // the SPARK config
 30 |     val spark = SparkSession.builder
 31 |       .appName(s"SPARK $profile Reasoning")
 32 |       .master("local[*]")
 33 |       .config("spark.hadoop.validateOutputSpecs", "false") // override output files
 34 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
 35 |       .config("spark.default.parallelism", parallelism)
 36 |       .config("spark.ui.showConsoleProgress", "false")
 37 |       .config("spark.sql.shuffle.partitions", parallelism)
 38 |       .getOrCreate()
 39 | 
 40 |     // load triples from disk
 41 |     val graph = RDFGraphLoader.loadFromDisk(spark, input, parallelism)
 42 |     println(s"|G| = ${graph.size()}")
 43 | 
 44 |     // create reasoner
 45 |     val reasoner = profile match {
 46 |       case TRANSITIVE => new TransitiveReasoner(spark.sparkContext, properties, parallelism)
 47 |       case RDFS => new ForwardRuleReasonerRDFS(spark.sparkContext, parallelism)
 48 |       case RDFS_SIMPLE =>
 49 |         val r = new ForwardRuleReasonerRDFS(spark.sparkContext, parallelism)
 50 |         r.level = RDFSLevel.SIMPLE
 51 |         r
 52 |       case OWL_HORST => new ForwardRuleReasonerOWLHorst(spark.sparkContext)
 53 |     }
 54 | 
 55 |     // compute inferred graph
 56 |     val inferredGraph = reasoner.apply(graph)
 57 |     println(s"|G_inf| = ${inferredGraph.size()}")
 58 | 
 59 |     // write triples to disk
 60 |     RDFGraphWriter.writeToDisk(inferredGraph, output.toString, writeToSingleFile, sortedOutput)
 61 | 
 62 |     spark.stop()
 63 |   }
 64 | 
 65 |   case class Config(
 66 |     in: Seq[URI] = Seq(),
 67 |     out: URI = new URI("."),
 68 |     properties: Seq[Node] = Seq(),
 69 |     profile: ReasoningProfile = ReasoningProfile.RDFS,
 70 |     writeToSingleFile: Boolean = false,
 71 |     sortedOutput: Boolean = false,
 72 |     parallelism: Int = 4)
 73 | 
 74 |   // read ReasoningProfile enum
 75 |   implicit val profilesRead: scopt.Read[ReasoningProfile.Value] =
 76 |     scopt.Read.reads(ReasoningProfile forName _.toLowerCase())
 77 | 
 78 |   // read ReasoningProfile enum
 79 |   implicit val nodeRead: scopt.Read[Node] =
 80 |     scopt.Read.reads(NodeFactory.createURI(_))
 81 | 
 82 |   // the CLI parser
 83 |   val parser = new scopt.OptionParser[Config]("RDFGraphMaterializer") {
 84 | 
 85 |     head("RDFGraphMaterializer", "0.1.0")
 86 | 
 87 |     opt[Seq[URI]]('i', "input").required().valueName("<path1>,<path2>,...").
 88 |       action((x, c) => c.copy(in = x)).
 89 |       text("path to file or directory that contains the input files (in N-Triples format)")
 90 | 
 91 |     opt[URI]('o', "out").required().valueName("<directory>").
 92 |       action((x, c) => c.copy(out = x)).
 93 |       text("the output directory")
 94 | 
 95 |     opt[Seq[Node]]("properties").optional().valueName("<property1>,<property2>,...").
 96 |       action((x, c) => {
 97 |         c.copy(properties = x)
 98 |       }).
 99 |       text("list of properties for which the transitive closure will be computed (used only for profile 'transitive')")
100 | 
101 |     opt[ReasoningProfile]('p', "profile").required().valueName("{rdfs | rdfs-simple | owl-horst | transitive}").
102 |       action((x, c) => c.copy(profile = x)).
103 |       text("the reasoning profile")
104 | 
105 |     opt[Unit]("single-file").optional().action((_, c) =>
106 |       c.copy(writeToSingleFile = true)).text("write the output to a single file in the output directory")
107 | 
108 |     opt[Unit]("sorted").optional().action((_, c) =>
109 |       c.copy(sortedOutput = true)).text("sorted output of the triples (per file)")
110 | 
111 |     opt[Int]("parallelism").optional().action((x, c) =>
112 |       c.copy(parallelism = x)).text("the degree of parallelism, i.e. the number of Spark partitions used in the Spark operations")
113 | 
114 |     help("help").text("prints this usage text")
115 | 
116 |     checkConfig(c =>
117 |       if (c.profile == TRANSITIVE && c.properties.isEmpty) failure("Option --properties must not be empty if profile 'transitive' is set")
118 |       else success)
119 |   }
120 | }
121 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/outliers/anomalydetection/AnomalyDetection.scala:
--------------------------------------------------------------------------------
  1 | package net.sansa_stack.examples.spark.ml.outliers.anomalydetection
  2 | 
  3 | import scala.collection.mutable
  4 | 
  5 | import net.sansa_stack.ml.spark.outliers.anomalydetection._
  6 | import net.sansa_stack.rdf.spark.io._
  7 | import org.apache.jena.riot.Lang
  8 | import org.apache.spark.rdd.RDD
  9 | import org.apache.spark.sql.{ Dataset, Row, SaveMode, SparkSession }
 10 | import org.apache.spark.storage.StorageLevel
 11 | 
 12 | object AnomalyDetection {
 13 |   def main(args: Array[String]) {
 14 |     parser.parse(args, Config()) match {
 15 |       case Some(config) =>
 16 |         run(config.in, config.threshold, config.anomalyListLimit, config.numofpartition, config.out)
 17 |       case None =>
 18 |         println(parser.usage)
 19 |     }
 20 |   }
 21 | 
 22 |   def run(
 23 |     input: String,
 24 |     JSimThreshold: Double,
 25 |     anomalyListLimit: Int,
 26 |     numofpartition: Int,
 27 |     output: String): Unit = {
 28 | 
 29 |     println("==================================================")
 30 |     println("|        Distributed Anomaly Detection           |")
 31 |     println("==================================================")
 32 | 
 33 |     val spark = SparkSession.builder
 34 |       .appName(s"Anomaly Detection example ( $input )")
 35 |       .master("local[*]")
 36 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
 37 |       .getOrCreate()
 38 | 
 39 |     // N-Triples Reader
 40 |     val lang = Lang.NTRIPLES
 41 |     val triplesRDD = spark.rdf(lang)(input).repartition(numofpartition).persist()
 42 | 
 43 |     // predicated that are not interesting for evaluation
 44 |     val wikiList = List("wikiPageRevisionID,wikiPageID")
 45 | 
 46 |     // filtering numeric literal having xsd type double,integer,nonNegativeInteger and squareKilometre
 47 |     val objList = List(
 48 |       "http://www.w3.org/2001/XMLSchema#double",
 49 |       "http://www.w3.org/2001/XMLSchema#integer",
 50 |       "http://www.w3.org/2001/XMLSchema#nonNegativeInteger",
 51 |       "http://dbpedia.org/datatype/squareKilometre")
 52 | 
 53 |     // helful for considering only Dbpedia type as their will be yago type,wikidata type also
 54 |     val triplesType = List("http://dbpedia.org/ontology")
 55 | 
 56 |     // some of the supertype which are present for most of the subject
 57 |     val listSuperType = List(
 58 |       "http://dbpedia.org/ontology/Activity", "http://dbpedia.org/ontology/Organisation",
 59 |       "http://dbpedia.org/ontology/Agent", "http://dbpedia.org/ontology/SportsLeague",
 60 |       "http://dbpedia.org/ontology/Person", "http://dbpedia.org/ontology/Athlete",
 61 |       "http://dbpedia.org/ontology/Event", "http://dbpedia.org/ontology/Place",
 62 |       "http://dbpedia.org/ontology/PopulatedPlace", "http://dbpedia.org/ontology/Region",
 63 |       "http://dbpedia.org/ontology/Species", "http://dbpedia.org/ontology/Eukaryote",
 64 |       "http://dbpedia.org/ontology/Location")
 65 | 
 66 |     // hypernym URI
 67 |     val hypernym = "http://purl.org/linguistics/gold/hypernym"
 68 | 
 69 |     var clusterOfSubject: RDD[(Set[(String, String, Object)])] = null
 70 |     println("AnomalyDetection-using ApproxSimilarityJoin function with the help of HashingTF ")
 71 | 
 72 |     val outDetection = new AnomalyWithHashingTF(triplesRDD, objList, triplesType, JSimThreshold, listSuperType, spark, hypernym, numofpartition)
 73 |     clusterOfSubject = outDetection.run()
 74 | 
 75 |     val setData = clusterOfSubject.repartition(1000).persist(StorageLevel.MEMORY_AND_DISK)
 76 |     val setDataStore = setData.map(f => f.toSeq)
 77 | 
 78 |     val setDataSize = setDataStore.filter(f => f.size > anomalyListLimit)
 79 | 
 80 |     val test = setDataSize.map(f => outDetection.iqr2(f, anomalyListLimit))
 81 | 
 82 |     val testfilter = test.filter(f => f.size > 0) // .distinct()
 83 |     val testfilterDistinct = testfilter.flatMap(f => f)
 84 |     testfilterDistinct.saveAsTextFile(output)
 85 |     setData.unpersist()
 86 | 
 87 |     spark.stop()
 88 |   }
 89 | 
 90 |   case class Config(
 91 |     in: String = "",
 92 |     threshold: Double = 0.0,
 93 |     anomalyListLimit: Int = 0,
 94 |     numofpartition: Int = 0,
 95 |     out: String = "")
 96 | 
 97 |   val parser = new scopt.OptionParser[Config]("SANSA -Outlier Detection") {
 98 | 
 99 |     head("Detecting Numerical Outliers in dataset")
100 | 
101 |     opt[String]('i', "input").required().valueName("<path>").
102 |       action((x, c) => c.copy(in = x)).
103 |       text("path to file that contains RDF data (in N-Triples format)")
104 | 
105 |     // Jaccard similarity threshold value
106 |     opt[Double]('t', "threshold").required().
107 |       action((x, c) => c.copy(threshold = x)).
108 |       text("the Jaccard Similarity value")
109 | 
110 |     // number of partition
111 |     opt[Int]('a', "numofpartition").required().
112 |       action((x, c) => c.copy(numofpartition = x)).
113 |       text("Number of partition")
114 | 
115 |     // List limit for calculating IQR
116 |     opt[Int]('c', "anomalyListLimit").required().
117 |       action((x, c) => c.copy(anomalyListLimit = x)).
118 |       text("the outlier List Limit")
119 | 
120 |     // output file path
121 |     opt[String]('o', "output").required().valueName("<directory>").
122 |       action((x, c) => c.copy(out = x)).
123 |       text("the output directory")
124 | 
125 |     help("help").text("prints this usage text")
126 |   }
127 | }
128 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  3 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  4 | 	<modelVersion>4.0.0</modelVersion>
  5 | 	<artifactId>sansa-examples-parent_2.11</artifactId>
  6 | 
  7 | 	<parent>
  8 | 		<groupId>net.sansa-stack</groupId>
  9 | 		<artifactId>sansa-parent</artifactId>
 10 | 		<version>0.7.1</version>
 11 | 	</parent>
 12 | 
 13 | 	<packaging>pom</packaging>
 14 | 	<name>SANSA-Examples - Parent</name>
 15 | 	<description>SANSA examples</description>
 16 | 
 17 | 	<organization>
 18 | 		<name>Smart Data Analytics (SDA) Research Group</name>
 19 | 		<url>http://sda.tech/</url>
 20 | 	</organization>
 21 | 
 22 | 	<inceptionYear>2017</inceptionYear>
 23 | 
 24 | 	<url>http://sansa-stack.net/</url>
 25 | 
 26 | 	<licenses>
 27 | 		<license>
 28 | 			<name>GNU GENERAL PUBLIC LICENSE, Version 3</name>
 29 | 			<url>http://www.gnu.org/licenses/gpl-3.0.txt</url>
 30 | 			<distribution>repo</distribution>
 31 | 		</license>
 32 | 	</licenses>
 33 | 
 34 | 	<developers>
 35 | 		<developer>
 36 | 			<id>jlehmann</id>
 37 | 			<name>Jens Lehmann</name>
 38 | 			<email>jens.lehmann@cs.uni-bonn.de</email>
 39 | 			<organization>Department of Computer Science, University of Bonn</organization>
 40 | 			<organizationUrl>http://www.informatik.uni-bonn.de/</organizationUrl>
 41 | 			<roles>
 42 | 				<role>principle maintainer</role>
 43 | 			</roles>
 44 | 			<timezone>0</timezone>
 45 | 		</developer>
 46 | 	</developers>
 47 | 
 48 | 	<issueManagement>
 49 | 		<system>GitHub</system>
 50 | 		<url>https://github.com/SANSA-Stack/SANSA-Examples</url>
 51 | 	</issueManagement>
 52 | 
 53 | 	<modules>
 54 | 
 55 | 		<module>sansa-examples-flink</module>
 56 | 		<module>sansa-examples-spark</module>
 57 | 
 58 | 	</modules>
 59 | 
 60 | 	<dependencyManagement>
 61 | 		<dependencies>
 62 | 			<!-- RDF Layer -->
 63 | 			<dependency>
 64 | 				<groupId>${project.groupId}</groupId>
 65 | 				<artifactId>sansa-rdf-spark_${scala.binary.version}</artifactId>
 66 | 				<version>${project.version}</version>
 67 | 			</dependency>
 68 | 
 69 | 			<dependency>
 70 | 				<groupId>${project.groupId}</groupId>
 71 | 				<artifactId>sansa-rdf-flink_${scala.binary.version}</artifactId>
 72 | 				<version>${project.version}</version>
 73 | 			</dependency>
 74 | 			<!-- OWL Layer -->
 75 | 			<dependency>
 76 | 				<groupId>${project.groupId}</groupId>
 77 | 				<artifactId>sansa-owl-spark_${scala.binary.version}</artifactId>
 78 | 				<version>${project.version}</version>
 79 | 			</dependency>
 80 | 			<dependency>
 81 | 				<groupId>${project.groupId}</groupId>
 82 | 				<artifactId>sansa-owl-flink_${scala.binary.version}</artifactId>
 83 | 				<version>${project.version}</version>
 84 | 			</dependency>
 85 | 
 86 | 			<!-- Query Layer -->
 87 | 			<dependency>
 88 | 				<groupId>${project.groupId}</groupId>
 89 | 				<artifactId>sansa-query-spark_${scala.binary.version}</artifactId>
 90 | 				<!-- <version>${project.parent.version}</version> -->
 91 | 				<version>${project.version}</version>
 92 | 			</dependency>
 93 | 
 94 | 			<!-- Inference Layer -->
 95 | 			<dependency>
 96 | 				<groupId>${project.groupId}</groupId>
 97 | 				<artifactId>sansa-inference-spark_${scala.binary.version}</artifactId>
 98 | 				<version>${sansa.version}</version>
 99 | 			</dependency>
100 | 			<dependency>
101 | 				<groupId>${project.groupId}</groupId>
102 | 				<artifactId>sansa-inference-flink_${scala.binary.version}</artifactId>
103 | 				<version>${sansa.version}</version>
104 | 			</dependency>
105 | 
106 | 			<!-- ML Layer -->
107 | 			<dependency>
108 | 				<groupId>${project.groupId}</groupId>
109 | 				<artifactId>sansa-ml-spark_${scala.binary.version}</artifactId>
110 | 				<version>${project.version}</version>
111 | 			</dependency>
112 | 			<dependency>
113 | 				<groupId>${project.groupId}</groupId>
114 | 				<artifactId>sansa-ml-flink_${scala.binary.version}</artifactId>
115 | 				<version>${project.version}</version>
116 | 			</dependency>
117 | 
118 | 		</dependencies>
119 | 	</dependencyManagement>
120 | 
121 | 
122 | 	<profiles>
123 | 		<profile>
124 | 			<id>root-dir</id>
125 | 			<activation>
126 | 				<file>
127 | 					<exists>${project.basedir}/../../scalastyle-config.xml</exists>
128 | 				</file>
129 | 			</activation>
130 | 			<properties>
131 | 				<scalastyle.config.path>${project.basedir}/../scalastyle-config.xml</scalastyle.config.path>
132 | 			</properties>
133 | 		</profile>
134 | 	</profiles>
135 | 
136 | 	<repositories>
137 | 		<repository>
138 | 			<id>oss-sonatype</id>
139 | 			<name>oss-sonatype</name>
140 | 			<url>https://oss.sonatype.org/content/repositories/snapshots/</url>
141 | 			<snapshots>
142 | 				<enabled>true</enabled>
143 | 			</snapshots>
144 | 		</repository>
145 | 		<repository>
146 | 			<id>apache-snapshot</id>
147 | 			<name>Apache repository (snapshots)</name>
148 | 			<url>https://repository.apache.org/content/repositories/snapshots/</url>
149 | 			<snapshots>
150 | 				<enabled>true</enabled>
151 | 			</snapshots>
152 | 		</repository>
153 | 		<repository>
154 | 			<id>maven.aksw.internal</id>
155 | 			<name>AKSW Release Repository</name>
156 | 			<url>http://maven.aksw.org/archiva/repository/internal</url>
157 | 			<releases>
158 | 				<enabled>true</enabled>
159 | 			</releases>
160 | 			<snapshots>
161 | 				<enabled>false</enabled>
162 | 			</snapshots>
163 | 		</repository>
164 | 		<repository>
165 | 			<id>maven.aksw.snapshots</id>
166 | 			<name>AKSW Snapshot Repository</name>
167 | 			<url>http://maven.aksw.org/archiva/repository/snapshots</url>
168 | 			<releases>
169 | 				<enabled>false</enabled>
170 | 			</releases>
171 | 			<snapshots>
172 | 				<enabled>true</enabled>
173 | 			</snapshots>
174 | 		</repository>
175 | 	</repositories>
176 | 
177 | </project>
178 | 


--------------------------------------------------------------------------------
/sansa-examples-flink/src/main/scala/net/sansa_stack/examples/flink/inference/RDFGraphInference.scala:
--------------------------------------------------------------------------------
  1 | package net.sansa_stack.examples.flink.inference
  2 | 
  3 | import java.io.{File, FileInputStream}
  4 | import java.net.URI
  5 | import java.util.Properties
  6 | 
  7 | import scala.io.Source
  8 | import com.typesafe.config.ConfigFactory
  9 | import de.javakaffee.kryoserializers.UnmodifiableCollectionsSerializer
 10 | import net.sansa_stack.inference.flink.data.{RDFGraphLoader, RDFGraphWriter}
 11 | import net.sansa_stack.inference.flink.forwardchaining.{ForwardRuleReasonerOWLHorst, ForwardRuleReasonerRDFS}
 12 | import net.sansa_stack.inference.rules.{RDFSLevel, ReasoningProfile}
 13 | import net.sansa_stack.inference.rules.ReasoningProfile._
 14 | import org.apache.flink.api.java.utils.ParameterTool
 15 | import org.apache.flink.api.scala.ExecutionEnvironment
 16 | import org.apache.flink.configuration.Configuration
 17 | import org.apache.flink.runtime.webmonitor.WebMonitorUtils
 18 | 
 19 | object RDFGraphInference {
 20 | 
 21 |   def main(args: Array[String]) {
 22 |     parser.parse(args, Config()) match {
 23 |       case Some(config) =>
 24 |         run(
 25 |           args,
 26 |           config.in,
 27 |           config.out,
 28 |           config.profile,
 29 |           config.writeToSingleFile,
 30 |           config.sortedOutput,
 31 |           config.propertiesFile,
 32 |           config.jobName)
 33 |       case None =>
 34 |         println(parser.usage)
 35 |     }
 36 |   }
 37 | 
 38 |   def run(
 39 |     args: Array[String],
 40 |     input: Seq[URI],
 41 |     output: URI,
 42 |     profile: ReasoningProfile,
 43 |     writeToSingleFile: Boolean,
 44 |     sortedOutput: Boolean,
 45 |     propertiesFile: File,
 46 |     jobName: String): Unit = {
 47 | 
 48 |     // read reasoner optimization properties
 49 |     val reasonerConf =
 50 |       if (propertiesFile != null) ConfigFactory.parseFile(propertiesFile)
 51 |       else ConfigFactory.load("reasoner")
 52 | 
 53 |     // get params
 54 |     val params: ParameterTool = ParameterTool.fromArgs(args)
 55 | 
 56 |     println("======================================")
 57 |     println("|        RDF Graph Inference         |")
 58 |     println("======================================")
 59 | 
 60 |     val conf = new Configuration()
 61 |     conf.setInteger("taskmanager.network.numberOfBuffers", 3000)
 62 | 
 63 |     // set up the execution environment
 64 |     val env = ExecutionEnvironment.getExecutionEnvironment
 65 |     env.getConfig.disableSysoutLogging()
 66 | 
 67 |     // make parameters available in the web interface
 68 |     env.getConfig.setGlobalJobParameters(params)
 69 | 
 70 |     // load triples from disk
 71 |     val graph = RDFGraphLoader.loadFromDisk(input, env)
 72 |     println(s"|G| = ${graph.size}")
 73 | 
 74 |     // create reasoner
 75 |     val reasoner = profile match {
 76 |       case RDFS | RDFS_SIMPLE =>
 77 |         val r = new ForwardRuleReasonerRDFS(env)
 78 |         r.useSchemaBroadCasting = reasonerConf.getBoolean("reasoner.rdfs.schema.broadcast")
 79 |         r.extractSchemaTriplesInAdvance =
 80 |           reasonerConf.getBoolean("reasoner.rdfs.schema.extractTriplesInAdvance")
 81 |         if (profile == RDFS_SIMPLE) r.level = RDFSLevel.SIMPLE
 82 |         r
 83 |       case OWL_HORST => new ForwardRuleReasonerOWLHorst(env)
 84 |     }
 85 | 
 86 |     // compute inferred graph
 87 |     val inferredGraph = reasoner.apply(graph)
 88 |     println(s"|G_inf| = ${inferredGraph.size}")
 89 | 
 90 |     val jn = if (jobName.isEmpty) s"RDF Graph Inference ($profile)" else jobName
 91 |   }
 92 | 
 93 |   // the config object
 94 |   case class Config(
 95 |     in: Seq[URI] = Seq(),
 96 |     out: URI = new URI("."),
 97 |     profile: ReasoningProfile = ReasoningProfile.RDFS,
 98 |     writeToSingleFile: Boolean = false,
 99 |     sortedOutput: Boolean = false,
100 |     propertiesFile: File = null,
101 |     jobName: String = "") // new File(getClass.getResource("reasoner.properties").toURI)
102 | 
103 |   // read ReasoningProfile enum
104 |   implicit val profilesRead: scopt.Read[ReasoningProfile.Value] =
105 |     scopt.Read.reads(ReasoningProfile forName _.toLowerCase())
106 | 
107 |   // the CLI parser
108 |   val parser = new scopt.OptionParser[Config]("RDFGraphMaterializer") {
109 |     head("RDFGraphMaterializer", "0.1.0")
110 | 
111 |     opt[Seq[URI]]('i', "input")
112 |       .required()
113 |       .valueName("<path>")
114 |       .action((x, c) => c.copy(in = x))
115 |       .text("path to file or directory that contains the input files (in N-Triple format)")
116 | 
117 |     opt[URI]('o', "out")
118 |       .required()
119 |       .valueName("<directory>")
120 |       .action((x, c) => c.copy(out = x))
121 |       .text("the output directory")
122 | 
123 |     opt[Unit]("single-file")
124 |       .optional()
125 |       .action((_, c) => c.copy(writeToSingleFile = true))
126 |       .text("write the output to a single file in the output directory")
127 | 
128 |     opt[Unit]("sorted")
129 |       .optional()
130 |       .action((_, c) => c.copy(sortedOutput = true))
131 |       .text("sorted output of the triples (per file)")
132 | 
133 |     opt[ReasoningProfile]('p', "profile")
134 |       .required()
135 |       .valueName("{rdfs | rdfs-simple | owl-horst}")
136 |       .action((x, c) => c.copy(profile = x))
137 |       .text("the reasoning profile")
138 | 
139 |     opt[File]('p', "prop")
140 |       .optional()
141 |       .valueName("<path_to_properties_file>")
142 |       .action((x, c) => c.copy(propertiesFile = x))
143 |       .text("the (optional) properties file which allows some more advanced options")
144 | 
145 |     opt[String]('j', "jobName")
146 |       .optional()
147 |       .valueName("<name_of_the_Flink_job>")
148 |       .action((x, c) => c.copy(jobName = x))
149 |       .text("the name of the Flink job that occurs also in the Web-UI")
150 | 
151 |     help("help").text("prints this usage text")
152 | 
153 |   }
154 |   parser.showUsageOnError
155 | }
156 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/query/GraphQuery.scala:
--------------------------------------------------------------------------------
  1 | package net.sansa_stack.examples.spark.query
  2 | 
  3 | import scala.concurrent.duration.Duration
  4 | 
  5 | import net.sansa_stack.query.spark.graph.jena.SparqlParser
  6 | import net.sansa_stack.query.spark.graph.jena.model.{ Config => modelConfig, IntermediateResult, SparkExecutionModel }
  7 | import net.sansa_stack.rdf.spark.partition.graph.algo._
  8 | import org.apache.jena.graph.Node
  9 | import org.apache.jena.riot.Lang
 10 | import org.apache.log4j.Logger
 11 | import org.apache.spark.graphx.Graph
 12 | 
 13 | object GraphQuery {
 14 | 
 15 |   def main(args: Array[String]): Unit = {
 16 | 
 17 |     parser.parse(args, Config()) match {
 18 |       case Some(config) => run(config)
 19 |       case None =>
 20 |         println(parser.usage)
 21 |     }
 22 |   }
 23 | 
 24 |   def run(config: Config): Unit = {
 25 | 
 26 |     println("===========================================")
 27 |     println("| SANSA - Graph query example     |")
 28 |     println("===========================================")
 29 | 
 30 |     val log = Logger.getLogger(GraphQuery.getClass)
 31 | 
 32 |     // set configures for query engine model
 33 |     modelConfig.setAppName("SANSA Graph Query")
 34 |       .setInputGraphFile(config.input)
 35 |       .setInputQueryFile(config.query.head)
 36 |       .setLang(Lang.NTRIPLES)
 37 |       .setMaster("local[*]")
 38 | 
 39 |     // load graph
 40 |     log.info("Start to load graph")
 41 | 
 42 |     SparkExecutionModel.createSparkSession()
 43 |     val session = SparkExecutionModel.getSession
 44 | 
 45 |     // apply graph partitioning algorithm
 46 |     val prevG = SparkExecutionModel.getGraph
 47 |     var g: Graph[Node, Node] = null
 48 |     var msg: String = null
 49 |     var numParts: Int = 0
 50 |     var numIters: Int = 0
 51 | 
 52 |     // Set number of partitions (if config.numParts is 0, number of partitions equals to that of previous graph)
 53 |     config.numParts match {
 54 |       case 0 => numParts = prevG.edges.partitions.length
 55 |       case other => numParts = other
 56 |     }
 57 | 
 58 |     config.numIters match {
 59 |       case 0 =>
 60 |       case other => numIters = other
 61 |     }
 62 | 
 63 |     var partAlgo: PartitionAlgo[Node, Node] = null
 64 | 
 65 |     config.algo match {
 66 |       case "SSHP" =>
 67 |         if (numIters == 0) {
 68 |           // Partition algorithm will use default number of iterations
 69 |           partAlgo = new SubjectHashPartition[Node, Node](prevG, session, numParts)
 70 |         } else {
 71 |           partAlgo = new SubjectHashPartition[Node, Node](prevG, session, numParts).setNumIterations(numIters)
 72 |         }
 73 |         msg = "Start to execute subject semantic hash partitioning"
 74 |       case "OSHP" =>
 75 |         if (numIters == 0) {
 76 |           partAlgo = new ObjectHashPartition[Node, Node](prevG, session, numParts)
 77 |         } else {
 78 |           partAlgo = new ObjectHashPartition[Node, Node](prevG, session, numParts).setNumIterations(numIters)
 79 |         }
 80 |         msg = "Start to execute object semantic hash partitioning"
 81 |       case "SOSHP" =>
 82 |         if (numIters == 0) {
 83 |           partAlgo = new SOHashPartition[Node, Node](prevG, session, numParts)
 84 |         } else {
 85 |           partAlgo = new SOHashPartition[Node, Node](prevG, session, numParts).setNumIterations(numIters)
 86 |         }
 87 |         msg = "Start to execute subject-object semantic hash partitioning"
 88 |       case "PP" =>
 89 |         if (numIters == 0) {
 90 |           partAlgo = new PathPartition[Node, Node](prevG, session, numParts)
 91 |         } else {
 92 |           partAlgo = new PathPartition[Node, Node](prevG, session, numParts).setNumIterations(numIters)
 93 |         }
 94 |         msg = "Start to execute path partitioning"
 95 |       case "" =>
 96 |       case other => println(s"the input $other doesn't match any options, no algorithm will be applied.")
 97 |     }
 98 | 
 99 |     var start = 0L
100 |     var end = 0L
101 | 
102 |     if (partAlgo != null) {
103 |       log.info(msg)
104 |       start = System.currentTimeMillis()
105 |       g = partAlgo.partitionBy().cache()
106 |       SparkExecutionModel.loadGraph(g)
107 |       end = System.currentTimeMillis()
108 |       log.info("Graph partitioning execution time: " + Duration(end - start, "millis").toMillis + " ms")
109 |     }
110 | 
111 |     // query executing
112 |     log.info("Start to execute queries")
113 | 
114 |     config.query.foreach { path =>
115 |       log.info("Query file: " + path)
116 |       modelConfig.setInputQueryFile(path)
117 |       val sp = new SparqlParser(modelConfig.getInputQueryFile)
118 |       sp.getOps.foreach { ops =>
119 |         val tag = ops.getTag
120 |         log.info("Operation " + tag + " start")
121 |         start = System.currentTimeMillis()
122 |         ops.execute()
123 |         end = System.currentTimeMillis()
124 |         log.info(tag + " execution time: " + Duration(end - start, "millis").toMillis + " ms")
125 |       }
126 |     }
127 | 
128 |     // print results to console
129 |     if (config.print) {
130 |       log.info("print final result")
131 |       val results = IntermediateResult.getFinalResult.cache()
132 |       if (results.count() >= 10) {
133 |         log.info("Too long results(more than 10)")
134 |       } else {
135 |         results.collect().foreach(println(_))
136 |       }
137 |       results.unpersist()
138 |     }
139 |   }
140 | 
141 |   case class Config(input: String = "", query: Seq[String] = null, print: Boolean = false, algo: String = "",
142 |                     numParts: Int = 0, numIters: Int = 0)
143 | 
144 |   val parser: scopt.OptionParser[Config] = new scopt.OptionParser[Config]("Spark-Graph-Example") {
145 | 
146 |     head("SANSA-Query-Graph-Example")
147 | 
148 |     opt[String]('i', "input").required().valueName("<path>").
149 |       action((x, c) => c.copy(input = x)).
150 |       text("path to file that contains the data (in N-Triples format).")
151 | 
152 |     opt[Seq[String]]('q', "query").required().valueName("<query1>, <query2>...").
153 |       action((x, c) => c.copy(query = x)).
154 |       text("files that contain SPARQL queries.")
155 | 
156 |     opt[Boolean]('p', "print").optional().valueName("Boolean").
157 |       action((_, c) => c.copy(print = true)).
158 |       text("print the result to the console(maximum 10 rows), default: false.")
159 | 
160 |     opt[String]('a', "algorithm").optional().valueName("<SSHP | OSHP | SOSHP | PP>").
161 |       action((x, c) => c.copy(algo = x)).
162 |       text("choose one graph partitioning algorithm, default: no algorithm applied.")
163 | 
164 |     opt[Int]('n', "number of partitions").optional().valueName("<Int>")
165 |       .action((x, c) => c.copy(numParts = x))
166 |       .text("set the number of partitions.")
167 | 
168 |     opt[Int]('t', "number of iterations").optional().valueName("<Int>")
169 |       .action((x, c) => c.copy(numIters = x))
170 |       .text("set the number of iterations.")
171 | 
172 |     help("help").text("prints this usage text")
173 |   }
174 | }
175 | 


--------------------------------------------------------------------------------
/sansa-examples-flink/src/main/resources/ont_manchester.owl:
--------------------------------------------------------------------------------
  1 | Prefix: : <http://ex.com/default#>
  2 | Prefix: bar: <http://ex.com/bar#>
  3 | Prefix: foo: <http://ex.com/foo#>
  4 | Prefix: owl: <http://www.w3.org/2002/07/owl#>
  5 | Prefix: rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
  6 | Prefix: rdfs: <http://www.w3.org/2000/01/rdf-schema#>
  7 | Prefix: xml: <http://www.w3.org/XML/1998/namespace>
  8 | Prefix: xsd: <http://www.w3.org/2001/XMLSchema#>
  9 | 
 10 | 
 11 | 
 12 | Ontology: <http://ex.com/ont/sample1.owl>
 13 | <http://ex.com/ont/release/123/sample1.owl>
 14 | 
 15 | Annotations: 
 16 |     bar:hasTitle "Title",
 17 |     description "A longer
 18 | description running over
 19 | several lines",
 20 |     foo:hasName "Name"
 21 | 
 22 | AnnotationProperty: bar:annProp1
 23 | 
 24 |     SubPropertyOf: 
 25 |         bar:annProp2
 26 |     
 27 |     Domain: 
 28 |         <http://ex.com/bar#Cls1>
 29 |     
 30 |     Range: 
 31 |         <http://ex.com/bar#Cls2>
 32 |     
 33 |     
 34 | AnnotationProperty: bar:annProp2
 35 | 
 36 |     
 37 | AnnotationProperty: bar:hasTitle
 38 | 
 39 |     
 40 | AnnotationProperty: bar:label
 41 | 
 42 |     
 43 | AnnotationProperty: description
 44 | 
 45 |     
 46 | AnnotationProperty: foo:ann
 47 | 
 48 |     
 49 | AnnotationProperty: foo:hasName
 50 | 
 51 |     
 52 | Datatype: bar:dtype1
 53 | 
 54 |     
 55 | Datatype: bar:dtype2
 56 | 
 57 |     
 58 | Datatype: rdf:PlainLiteral
 59 | 
 60 |     
 61 | Datatype: rdfs:Literal
 62 | 
 63 |     
 64 | Datatype: xsd:int
 65 | 
 66 |     
 67 | Datatype: xsd:integer
 68 | 
 69 |     
 70 | Datatype: xsd:string
 71 | 
 72 |     
 73 | ObjectProperty: bar:Prop2
 74 | 
 75 |     
 76 | ObjectProperty: bar:asymmObjProp
 77 | 
 78 |     Characteristics: 
 79 |         Asymmetric
 80 |     
 81 |     
 82 | ObjectProperty: bar:invObjProp1
 83 | 
 84 |     EquivalentTo: 
 85 |          inverse (bar:objProp1)
 86 |     
 87 |     Characteristics: 
 88 |         InverseFunctional
 89 |     
 90 |     InverseOf: 
 91 |         bar:objProp1
 92 |     
 93 |     
 94 | ObjectProperty: bar:objProp1
 95 | 
 96 |     EquivalentTo: 
 97 |         bar:sameAsObjProp1
 98 |     
 99 |     DisjointWith: 
100 |         bar:objProp2
101 |     
102 |     Characteristics: 
103 |         Transitive,
104 |         Reflexive
105 |     
106 |     Domain: 
107 |         bar:Cls1
108 |     
109 |     Range: 
110 |         bar:AllIndividualsCls
111 |     
112 |     InverseOf: 
113 |         bar:invObjProp1
114 |     
115 |     
116 | ObjectProperty: bar:objProp2
117 | 
118 |     DisjointWith: 
119 |         bar:objProp1
120 |     
121 |     Characteristics: 
122 |         Functional,
123 |         Symmetric,
124 |         Irreflexive
125 |     
126 |     
127 | ObjectProperty: bar:sameAsObjProp1
128 | 
129 |     EquivalentTo: 
130 |         bar:objProp1
131 |     
132 |     
133 | ObjectProperty: bar:subObjProp1
134 | 
135 |     SubPropertyOf: 
136 |         bar:objProp1
137 |     
138 |     
139 | DataProperty: bar:dataProp1
140 | 
141 |     Characteristics: 
142 |         Functional
143 |     
144 |     Domain: 
145 |         bar:Cls1
146 |     
147 |     Range: 
148 |         xsd:string
149 |     
150 |     EquivalentTo: 
151 |         bar:sameAsDataProp1
152 |     
153 |     DisjointWith: 
154 |         bar:dataProp2
155 |     
156 |     
157 | DataProperty: bar:dataProp2
158 | 
159 |     Domain: 
160 |         bar:Cls1
161 |     
162 |     Range: 
163 |         xsd:int
164 |     
165 |     DisjointWith: 
166 |         bar:dataProp1
167 |     
168 |     
169 | DataProperty: bar:sameAsDataProp1
170 | 
171 |     EquivalentTo: 
172 |         bar:dataProp1
173 |     
174 |     
175 | DataProperty: bar:subDataProp1
176 | 
177 |     SubPropertyOf: 
178 |         bar:dataProp1
179 |     
180 |     
181 | Class: bar:AllIndividualsCls
182 | 
183 |     EquivalentTo: 
184 |         {foo:indivA , foo:indivB}
185 |     
186 |     
187 | Class: bar:AllProp1Cls1
188 | 
189 |     EquivalentTo: 
190 |         bar:objProp1 only bar:Cls1
191 |     
192 |     
193 | Class: bar:Cl1OrNegate
194 | 
195 |     DisjointUnionOf: 
196 |         bar:Cls1, bar:ComplementCls1
197 |     
198 |     
199 | Class: bar:Cls1
200 | 
201 |     Annotations: 
202 |         bar:label "Class 1"
203 |     
204 |     SubClassOf: 
205 |         bar:UnionCls
206 |     
207 |     HasKey: 
208 |         bar:dataProp1
209 |     
210 |     
211 | Class: bar:Cls2
212 | 
213 |     
214 | Class: bar:ComplementCls
215 | 
216 |     EquivalentTo: 
217 |         not (bar:Cls1)
218 |     
219 |     
220 | Class: bar:ComplementCls1
221 | 
222 |     
223 | Class: bar:DataAllIntGT10
224 | 
225 |     EquivalentTo: 
226 |         bar:dataProp2 only xsd:integer[>= 10]
227 |     
228 |     
229 | Class: bar:DataExact5Prop1
230 | 
231 |     EquivalentTo: 
232 |         bar:dataProp1 exactly 5 rdfs:Literal
233 |     
234 |     
235 | Class: bar:DataHasVal5
236 | 
237 |     EquivalentTo: 
238 |         bar:dataProp2 value 5
239 |     
240 |     
241 | Class: bar:DataMax2Prop1
242 | 
243 |     EquivalentTo: 
244 |         bar:dataProp1 max 2 rdfs:Literal
245 |     
246 |     DisjointWith: 
247 |         bar:DataMin3Prop1
248 |     
249 |     
250 | Class: bar:DataMin3Prop1
251 | 
252 |     EquivalentTo: 
253 |         bar:dataProp1 min 3 rdfs:Literal
254 |     
255 |     DisjointWith: 
256 |         bar:DataMax2Prop1
257 |     
258 |     
259 | Class: bar:DataSomeIntLT20
260 | 
261 |     EquivalentTo: 
262 |         bar:dataProp2 some xsd:integer[< 20]
263 |     
264 |     
265 | Class: bar:Exact5Prop1Cls1
266 | 
267 |     EquivalentTo: 
268 |         bar:objProp1 exactly 5 bar:Cls1
269 |     
270 |     
271 | Class: bar:HasSelfProp1
272 | 
273 |     EquivalentTo: 
274 |         bar:objProp1 some  Self 
275 |     
276 |     
277 | Class: bar:HasValProp1IndivB
278 | 
279 |     EquivalentTo: 
280 |         bar:objProp1 value foo:indivB
281 |     
282 |     
283 | Class: bar:IntersectionCls
284 | 
285 |     EquivalentTo: 
286 |         bar:Cls1
287 |          and bar:Cls2
288 |     
289 |     
290 | Class: bar:Max3Prop1Cls1
291 | 
292 |     EquivalentTo: 
293 |         bar:objProp1 max 3 bar:Cls1
294 |     
295 |     
296 | Class: bar:Min2Prop1Cls1
297 | 
298 |     EquivalentTo: 
299 |         bar:objProp1 min 2 bar:Cls1
300 |     
301 |     
302 | Class: bar:SomeProp1Cls1
303 | 
304 |     EquivalentTo: 
305 |         bar:objProp1 some bar:Cls1
306 |     
307 |     
308 | Class: bar:UnionCls
309 | 
310 |     EquivalentTo: 
311 |         bar:Cls1 or bar:Cls2
312 |     
313 |     
314 | Individual: foo:indivA
315 | 
316 |     Types: 
317 |         bar:Cls1
318 |     
319 |     Facts:  
320 |      bar:objProp1  foo:indivB,
321 |      bar:dataProp1  "ABCD",
322 |       not  bar:dataProp2  23
323 |     
324 |     SameAs: 
325 |         foo:sameAsIndivA
326 |     
327 |     DifferentFrom: 
328 |         foo:indivB
329 |     
330 |     
331 | Individual: foo:indivB
332 | 
333 |     Facts:  
334 |       not  bar:Prop2  foo:indivA,
335 |      bar:dataProp1  "BCDE"
336 |     
337 |     DifferentFrom: 
338 |         foo:indivA
339 |     
340 |     
341 | Individual: foo:sameAsIndivA
342 | 
343 |     SameAs: 
344 |         foo:indivA
345 |     
346 |     
347 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/ont_manchester.owl:
--------------------------------------------------------------------------------
  1 | Prefix: : <http://ex.com/default#>
  2 | Prefix: bar: <http://ex.com/bar#>
  3 | Prefix: foo: <http://ex.com/foo#>
  4 | Prefix: owl: <http://www.w3.org/2002/07/owl#>
  5 | Prefix: rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
  6 | Prefix: rdfs: <http://www.w3.org/2000/01/rdf-schema#>
  7 | Prefix: xml: <http://www.w3.org/XML/1998/namespace>
  8 | Prefix: xsd: <http://www.w3.org/2001/XMLSchema#>
  9 | 
 10 | 
 11 | 
 12 | Ontology: <http://ex.com/ont/sample1.owl>
 13 | <http://ex.com/ont/release/123/sample1.owl>
 14 | 
 15 | Annotations: 
 16 |     bar:hasTitle "Title",
 17 |     description "A longer
 18 | description running over
 19 | several lines",
 20 |     foo:hasName "Name"
 21 | 
 22 | AnnotationProperty: bar:annProp1
 23 | 
 24 |     SubPropertyOf: 
 25 |         bar:annProp2
 26 |     
 27 |     Domain: 
 28 |         <http://ex.com/bar#Cls1>
 29 |     
 30 |     Range: 
 31 |         <http://ex.com/bar#Cls2>
 32 |     
 33 |     
 34 | AnnotationProperty: bar:annProp2
 35 | 
 36 |     
 37 | AnnotationProperty: bar:hasTitle
 38 | 
 39 |     
 40 | AnnotationProperty: bar:label
 41 | 
 42 |     
 43 | AnnotationProperty: description
 44 | 
 45 |     
 46 | AnnotationProperty: foo:ann
 47 | 
 48 |     
 49 | AnnotationProperty: foo:hasName
 50 | 
 51 |     
 52 | Datatype: bar:dtype1
 53 | 
 54 |     
 55 | Datatype: bar:dtype2
 56 | 
 57 |     
 58 | Datatype: rdf:PlainLiteral
 59 | 
 60 |     
 61 | Datatype: rdfs:Literal
 62 | 
 63 |     
 64 | Datatype: xsd:int
 65 | 
 66 |     
 67 | Datatype: xsd:integer
 68 | 
 69 |     
 70 | Datatype: xsd:string
 71 | 
 72 |     
 73 | ObjectProperty: bar:Prop2
 74 | 
 75 |     
 76 | ObjectProperty: bar:asymmObjProp
 77 | 
 78 |     Characteristics: 
 79 |         Asymmetric
 80 |     
 81 |     
 82 | ObjectProperty: bar:invObjProp1
 83 | 
 84 |     EquivalentTo: 
 85 |          inverse (bar:objProp1)
 86 |     
 87 |     Characteristics: 
 88 |         InverseFunctional
 89 |     
 90 |     InverseOf: 
 91 |         bar:objProp1
 92 |     
 93 |     
 94 | ObjectProperty: bar:objProp1
 95 | 
 96 |     EquivalentTo: 
 97 |         bar:sameAsObjProp1
 98 |     
 99 |     DisjointWith: 
100 |         bar:objProp2
101 |     
102 |     Characteristics: 
103 |         Transitive,
104 |         Reflexive
105 |     
106 |     Domain: 
107 |         bar:Cls1
108 |     
109 |     Range: 
110 |         bar:AllIndividualsCls
111 |     
112 |     InverseOf: 
113 |         bar:invObjProp1
114 |     
115 |     
116 | ObjectProperty: bar:objProp2
117 | 
118 |     DisjointWith: 
119 |         bar:objProp1
120 |     
121 |     Characteristics: 
122 |         Functional,
123 |         Symmetric,
124 |         Irreflexive
125 |     
126 |     
127 | ObjectProperty: bar:sameAsObjProp1
128 | 
129 |     EquivalentTo: 
130 |         bar:objProp1
131 |     
132 |     
133 | ObjectProperty: bar:subObjProp1
134 | 
135 |     SubPropertyOf: 
136 |         bar:objProp1
137 |     
138 |     
139 | DataProperty: bar:dataProp1
140 | 
141 |     Characteristics: 
142 |         Functional
143 |     
144 |     Domain: 
145 |         bar:Cls1
146 |     
147 |     Range: 
148 |         xsd:string
149 |     
150 |     EquivalentTo: 
151 |         bar:sameAsDataProp1
152 |     
153 |     DisjointWith: 
154 |         bar:dataProp2
155 |     
156 |     
157 | DataProperty: bar:dataProp2
158 | 
159 |     Domain: 
160 |         bar:Cls1
161 |     
162 |     Range: 
163 |         xsd:int
164 |     
165 |     DisjointWith: 
166 |         bar:dataProp1
167 |     
168 |     
169 | DataProperty: bar:sameAsDataProp1
170 | 
171 |     EquivalentTo: 
172 |         bar:dataProp1
173 |     
174 |     
175 | DataProperty: bar:subDataProp1
176 | 
177 |     SubPropertyOf: 
178 |         bar:dataProp1
179 |     
180 |     
181 | Class: bar:AllIndividualsCls
182 | 
183 |     EquivalentTo: 
184 |         {foo:indivA , foo:indivB}
185 |     
186 |     
187 | Class: bar:AllProp1Cls1
188 | 
189 |     EquivalentTo: 
190 |         bar:objProp1 only bar:Cls1
191 |     
192 |     
193 | Class: bar:Cl1OrNegate
194 | 
195 |     DisjointUnionOf: 
196 |         bar:Cls1, bar:ComplementCls1
197 |     
198 |     
199 | Class: bar:Cls1
200 | 
201 |     Annotations: 
202 |         bar:label "Class 1"
203 |     
204 |     SubClassOf: 
205 |         bar:UnionCls
206 |     
207 |     HasKey: 
208 |         bar:dataProp1
209 |     
210 |     
211 | Class: bar:Cls2
212 | 
213 |     
214 | Class: bar:ComplementCls
215 | 
216 |     EquivalentTo: 
217 |         not (bar:Cls1)
218 |     
219 |     
220 | Class: bar:ComplementCls1
221 | 
222 |     
223 | Class: bar:DataAllIntGT10
224 | 
225 |     EquivalentTo: 
226 |         bar:dataProp2 only xsd:integer[>= 10]
227 |     
228 |     
229 | Class: bar:DataExact5Prop1
230 | 
231 |     EquivalentTo: 
232 |         bar:dataProp1 exactly 5 rdfs:Literal
233 |     
234 |     
235 | Class: bar:DataHasVal5
236 | 
237 |     EquivalentTo: 
238 |         bar:dataProp2 value 5
239 |     
240 |     
241 | Class: bar:DataMax2Prop1
242 | 
243 |     EquivalentTo: 
244 |         bar:dataProp1 max 2 rdfs:Literal
245 |     
246 |     DisjointWith: 
247 |         bar:DataMin3Prop1
248 |     
249 |     
250 | Class: bar:DataMin3Prop1
251 | 
252 |     EquivalentTo: 
253 |         bar:dataProp1 min 3 rdfs:Literal
254 |     
255 |     DisjointWith: 
256 |         bar:DataMax2Prop1
257 |     
258 |     
259 | Class: bar:DataSomeIntLT20
260 | 
261 |     EquivalentTo: 
262 |         bar:dataProp2 some xsd:integer[< 20]
263 |     
264 |     
265 | Class: bar:Exact5Prop1Cls1
266 | 
267 |     EquivalentTo: 
268 |         bar:objProp1 exactly 5 bar:Cls1
269 |     
270 |     
271 | Class: bar:HasSelfProp1
272 | 
273 |     EquivalentTo: 
274 |         bar:objProp1 some  Self 
275 |     
276 |     
277 | Class: bar:HasValProp1IndivB
278 | 
279 |     EquivalentTo: 
280 |         bar:objProp1 value foo:indivB
281 |     
282 |     
283 | Class: bar:IntersectionCls
284 | 
285 |     EquivalentTo: 
286 |         bar:Cls1
287 |          and bar:Cls2
288 |     
289 |     
290 | Class: bar:Max3Prop1Cls1
291 | 
292 |     EquivalentTo: 
293 |         bar:objProp1 max 3 bar:Cls1
294 |     
295 |     
296 | Class: bar:Min2Prop1Cls1
297 | 
298 |     EquivalentTo: 
299 |         bar:objProp1 min 2 bar:Cls1
300 |     
301 |     
302 | Class: bar:SomeProp1Cls1
303 | 
304 |     EquivalentTo: 
305 |         bar:objProp1 some bar:Cls1
306 |     
307 |     
308 | Class: bar:UnionCls
309 | 
310 |     EquivalentTo: 
311 |         bar:Cls1 or bar:Cls2
312 |     
313 |     
314 | Individual: foo:indivA
315 | 
316 |     Types: 
317 |         bar:Cls1
318 |     
319 |     Facts:  
320 |      bar:objProp1  foo:indivB,
321 |      bar:dataProp1  "ABCD",
322 |       not  bar:dataProp2  23
323 |     
324 |     SameAs: 
325 |         foo:sameAsIndivA
326 |     
327 |     DifferentFrom: 
328 |         foo:indivB
329 |     
330 |     
331 | Individual: foo:indivB
332 | 
333 |     Facts:  
334 |       not  bar:Prop2  foo:indivA,
335 |      bar:dataProp1  "BCDE"
336 |     
337 |     DifferentFrom: 
338 |         foo:indivA
339 |     
340 |     
341 | Individual: foo:sameAsIndivA
342 | 
343 |     SameAs: 
344 |         foo:indivA
345 |     
346 |     
347 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/mappings.ttl:
--------------------------------------------------------------------------------
  1 | @prefix exp: <http://example.com/ns/>
  2 | @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
  3 | @prefix dcterms: <http://purl.org/dc/terms/>
  4 | @prefix schema: <http://schema.org/>
  5 | @prefix gr: <http://purl.org/goodrelations/v1#>
  6 | @prefix npg: <http://ns.nature.com/terms/>
  7 | @prefix foaf: <http://xmlns.com/foaf/spec/> # correct http://xmlns.com/foaf/0.1/
  8 | @prefix edm: <http://www.europeana.eu/schemas/edm/>
  9 | @prefix rr: <http://www.w3.org/ns/r2rml#>
 10 | @prefix rml: <http://semweb.mmlab.be/ns/rml#>
 11 | @prefix nosql: <http://purl.org/db/nosql#>
 12 | @prefix bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>
 13 | @prefix dc: <http://purl.org/dc/elements/1.1/>
 14 | @prefix rev: <http://purl.org/stuff/rev#>
 15 | 
 16 | <#ProducerMapping>
 17 | 	rml:logicalSource [
 18 | 		rml:source "//Producer";
 19 | 		nosql:store nosql:jdbc
 20 | 	];
 21 | 	rr:subjectMap [
 22 | 		rr:template "http://example.com/{nr}";
 23 | 		rr:class bsbm:Producer
 24 | 	];
 25 | 
 26 | 	rr:predicateObjectMap [
 27 | 		rr:predicate edm:country;
 28 | 		rr:objectMap [rml:reference "country"]
 29 | 	];
 30 | 
 31 | 	rr:predicateObjectMap [
 32 | 		rr:predicate rdfs:label;
 33 | 		rr:objectMap [rml:reference "label"]
 34 | 	];
 35 | 
 36 | 	rr:predicateObjectMap [
 37 | 		rr:predicate bsbm:publisher;
 38 | 		rr:objectMap [rml:reference "publisher"]
 39 | 	];
 40 | 
 41 | 	rr:predicateObjectMap [
 42 | 		rr:predicate foaf:homepage;
 43 | 		rr:objectMap [rml:reference "homepage"]
 44 | 	];
 45 | 
 46 | 	rr:predicateObjectMap [
 47 | 		rr:predicate exp:publishDate;
 48 | 		rr:objectMap [rml:reference "publishDate"]
 49 | 	];
 50 | 
 51 | 	rr:predicateObjectMap [
 52 | 		rr:predicate rdfs:comment;
 53 | 		rr:objectMap [rml:reference "comment"]
 54 | 	].
 55 | 
 56 | <#ReviewMapping>
 57 | 	rml:logicalSource [
 58 | 		rml:source "src/main/resources/Data/review.parquet";
 59 | 		nosql:store nosql:parquet
 60 | 	];
 61 | 	rr:subjectMap [
 62 | 		rr:template "http://example.com/{nr}";
 63 | 		rr:class schema:Review
 64 | 	];
 65 | 
 66 | 	rr:predicateObjectMap [
 67 | 		rr:predicate dc:publisher;
 68 | 		rr:objectMap [rml:reference "publisher"]
 69 | 	];
 70 | 
 71 | 	rr:predicateObjectMap [
 72 | 		rr:predicate rev:text;
 73 | 		rr:objectMap [rml:reference "text"]
 74 | 	];
 75 | 
 76 | 	rr:predicateObjectMap [
 77 | 		rr:predicate bsbm:producer;
 78 | 		rr:objectMap [rml:reference "producer"]
 79 | 	];
 80 | 
 81 | 	rr:predicateObjectMap [
 82 | 		rr:predicate rev:reviewer;
 83 | 		rr:objectMap [rml:reference "person"]
 84 | 	];
 85 | 
 86 | 	rr:predicateObjectMap [
 87 | 		rr:predicate bsbm:rating3;
 88 | 		rr:objectMap [rml:reference "rating3"]
 89 | 	];
 90 | 
 91 | 	rr:predicateObjectMap [
 92 | 		rr:predicate dcterms:language;
 93 | 		rr:objectMap [rml:reference "language"]
 94 | 	];
 95 | 
 96 | 	rr:predicateObjectMap [
 97 | 		rr:predicate bsbm:reviewFor;
 98 | 		rr:objectMap [rml:reference "product"]
 99 | 	];
100 | 
101 | 	rr:predicateObjectMap [
102 | 		rr:predicate dc:title;
103 | 		rr:objectMap [rml:reference "title"]
104 | 	];
105 | 
106 | 	rr:predicateObjectMap [
107 | 		rr:predicate bsbm:rating2;
108 | 		rr:objectMap [rml:reference "rating2"]
109 | 	];
110 | 
111 | 	rr:predicateObjectMap [
112 | 		rr:predicate bsbm:reviewDate;
113 | 		rr:objectMap [rml:reference "reviewDate"]
114 | 	];
115 | 
116 | 	rr:predicateObjectMap [
117 | 		rr:predicate exp:publishDate;
118 | 		rr:objectMap [rml:reference "publishDate"]
119 | 	];
120 | 
121 | 	rr:predicateObjectMap [
122 | 		rr:predicate bsbm:rating1;
123 | 		rr:objectMap [rml:reference "rating1"]
124 | 	];
125 | 
126 | 	rr:predicateObjectMap [
127 | 		rr:predicate bsbm:rating4;
128 | 		rr:objectMap [rml:reference "rating4"]
129 | 	].
130 | 
131 | <#PersonMapping>
132 | 	rml:logicalSource [
133 | 		rml:source "src/main/resources/Data/person.csv";
134 | 		nosql:store nosql:csv
135 | 	];
136 | 	rr:subjectMap [
137 | 		rr:template "http://example.com/{nr}";
138 | 		rr:class foaf:Person
139 | 	];
140 | 
141 | 	rr:predicateObjectMap [
142 | 		rr:predicate edm:country;
143 | 		rr:objectMap [rml:reference "country"]
144 | 	];
145 | 
146 | 	rr:predicateObjectMap [
147 | 		rr:predicate dc:publisher;
148 | 		rr:objectMap [rml:reference "publisher"]
149 | 	];
150 | 
151 | 	rr:predicateObjectMap [
152 | 		rr:predicate foaf:mbox_sha1sum;
153 | 		rr:objectMap [rml:reference "mbox_sha1sum"]
154 | 	];
155 | 
156 | 	rr:predicateObjectMap [
157 | 		rr:predicate exp:publishDate;
158 | 		rr:objectMap [rml:reference "publishDate"]
159 | 	];
160 | 
161 | 	rr:predicateObjectMap [
162 | 		rr:predicate foaf:name;
163 | 		rr:objectMap [rml:reference "name"]
164 | 	].
165 | 
166 | <#OfferMapping>
167 | 	rml:logicalSource [
168 | 		rml:source "//Offer";
169 | 		nosql:store nosql:mongodb
170 | 	];
171 | 	rr:subjectMap [
172 | 		rr:template "http://example.com/{_id}";
173 | 		rr:class schema:Offer
174 | 	];
175 | 
176 | 	rr:predicateObjectMap [
177 | 		rr:predicate bsbm:validTo;
178 | 		rr:objectMap [rml:reference "validTo"]
179 | 	];
180 | 
181 | 	rr:predicateObjectMap [
182 | 		rr:predicate dc:publisher;
183 | 		rr:objectMap [rml:reference "publisher"]
184 | 	];
185 | 
186 | 	rr:predicateObjectMap [
187 | 		rr:predicate bsbm:producer;
188 | 		rr:objectMap [rml:reference "producer"]
189 | 	];
190 | 
191 | 	rr:predicateObjectMap [
192 | 		rr:predicate bsbm:product;
193 | 		rr:objectMap [rml:reference "product"]
194 | 	];
195 | 
196 | 	rr:predicateObjectMap [
197 | 		rr:predicate gr:validFrom;
198 | 		rr:objectMap [rml:reference "validFrom"]
199 | 	];
200 | 
201 | 	rr:predicateObjectMap [
202 | 		rr:predicate bsbm:deliveryDays;
203 | 		rr:objectMap [rml:reference "deliveryDays"]
204 | 	];
205 | 
206 | 	rr:predicateObjectMap [
207 | 		rr:predicate exp:publishDate;
208 | 		rr:objectMap [rml:reference "publishDate"]
209 | 	];
210 | 
211 | 	rr:predicateObjectMap [
212 | 		rr:predicate npg:webpage;
213 | 		rr:objectMap [rml:reference "offerWebpage"]
214 | 	];
215 | 
216 | 	rr:predicateObjectMap [
217 | 		rr:predicate bsbm:price;
218 | 		rr:objectMap [rml:reference "price"]
219 | 	];
220 | 
221 | 	rr:predicateObjectMap [
222 | 		rr:predicate bsbm:vendor;
223 | 		rr:objectMap [rml:reference "vendor"]
224 | 	].
225 | 
226 | <#ProductMapping>
227 | 	rml:logicalSource [
228 | 		rml:source "//Product";
229 | 		nosql:store nosql:cassandra
230 | 	];
231 | 	rr:subjectMap [
232 | 		rr:template "http://example.com/{nr}";
233 | 		rr:class bsbm:Product
234 | 	];
235 | 
236 | 	rr:predicateObjectMap [
237 | 		rr:predicate rdfs:label;
238 | 		rr:objectMap [rml:reference "label"]
239 | 	];
240 | 
241 | 	rr:predicateObjectMap [
242 | 		rr:predicate dc:publisher;
243 | 		rr:objectMap [rml:reference "publisher"]
244 | 	];
245 | 
246 | 	rr:predicateObjectMap [
247 | 		rr:predicate bsbm:producer;
248 | 		rr:objectMap [rml:reference "producer"]
249 | 	];
250 | 
251 | 	rr:predicateObjectMap [
252 | 		rr:predicate bsbm:productPropertyNumeric4;
253 | 		rr:objectMap [rml:reference "propertyNum4"]
254 | 	];
255 | 
256 | 	rr:predicateObjectMap [
257 | 		rr:predicate bsbm:productPropertyNumeric3;
258 | 		rr:objectMap [rml:reference "propertyNum3"]
259 | 	];
260 | 
261 | 	rr:predicateObjectMap [
262 | 		rr:predicate bsbm:productPropertyTextual6;
263 | 		rr:objectMap [rml:reference "propertyTex6"]
264 | 	];
265 | 
266 | 	rr:predicateObjectMap [
267 | 		rr:predicate bsbm:productPropertyTextual1;
268 | 		rr:objectMap [rml:reference "propertyTex1"]
269 | 	];
270 | 
271 | 	rr:predicateObjectMap [
272 | 		rr:predicate bsbm:productPropertyNumeric1;
273 | 		rr:objectMap [rml:reference "propertyNum1"]
274 | 	];
275 | 
276 | 	rr:predicateObjectMap [
277 | 		rr:predicate bsbm:productPropertyTextual4;
278 | 		rr:objectMap [rml:reference "propertyTex4"]
279 | 	];
280 | 
281 | 	rr:predicateObjectMap [
282 | 		rr:predicate bsbm:productPropertyNumeric5;
283 | 		rr:objectMap [rml:reference "propertyNum5"]
284 | 	];
285 | 
286 | 	rr:predicateObjectMap [
287 | 		rr:predicate bsbm:productPropertyNumeric2;
288 | 		rr:objectMap [rml:reference "propertyNum2"]
289 | 	];
290 | 
291 | 	rr:predicateObjectMap [
292 | 		rr:predicate exp:publishDate;
293 | 		rr:objectMap [rml:reference "publishDate"]
294 | 	];
295 | 
296 | 	rr:predicateObjectMap [
297 | 		rr:predicate bsbm:productPropertyTextual5;
298 | 		rr:objectMap [rml:reference "propertyTex5"]
299 | 	];
300 | 
301 | 	rr:predicateObjectMap [
302 | 		rr:predicate bsbm:productPropertyTextual3;
303 | 		rr:objectMap [rml:reference "propertyTex3"]
304 | 	];
305 | 
306 | 	rr:predicateObjectMap [
307 | 		rr:predicate rdfs:comment;
308 | 		rr:objectMap [rml:reference "comment"]
309 | 	];
310 | 
311 | 	rr:predicateObjectMap [
312 | 		rr:predicate bsbm:productPropertyNumeric6;
313 | 		rr:objectMap [rml:reference "propertyNum6"]
314 | 	];
315 | 
316 | 	rr:predicateObjectMap [
317 | 		rr:predicate bsbm:productPropertyTextual2;
318 | 		rr:objectMap [rml:reference "propertyTex2"]
319 | 	].


--------------------------------------------------------------------------------
/sansa-examples-flink/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  3 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  4 | 	<modelVersion>4.0.0</modelVersion>
  5 | 
  6 | 
  7 | 	<parent>
  8 | 		<groupId>net.sansa-stack</groupId>
  9 | 		<artifactId>sansa-examples-parent_2.11</artifactId>
 10 | 		<version>0.7.1</version>
 11 | 	</parent>
 12 | 
 13 | 	<artifactId>sansa-examples-flink_2.11</artifactId>
 14 | 	<name>SANSA Examples - Apache Flink</name>
 15 | 	<description>SANSA examples for Apache Flink</description>
 16 | 
 17 | 	<dependencies>
 18 | 		<!-- SANSA RDF -->
 19 | 		<dependency>
 20 | 			<groupId>${project.groupId}</groupId>
 21 | 			<artifactId>sansa-rdf-flink_${scala.binary.version}</artifactId>
 22 | 		</dependency>
 23 | 
 24 | 		<!-- SANSA OWL -->
 25 | 		<dependency>
 26 | 			<groupId>${project.groupId}</groupId>
 27 | 			<artifactId>sansa-owl-flink_${scala.binary.version}</artifactId>
 28 | 			<exclusions>
 29 | 				<exclusion>
 30 | 					<groupId>org.mortbay.jetty</groupId>
 31 | 					<artifactId>jetty</artifactId>
 32 | 				</exclusion>
 33 | 				<exclusion>
 34 | 					<groupId>org.mortbay.jetty</groupId>
 35 | 					<artifactId>jetty-util</artifactId>
 36 | 				</exclusion>
 37 | 			</exclusions>
 38 | 		</dependency>
 39 | 
 40 | 		<!-- SANSA Inference -->
 41 | 		<dependency>
 42 | 			<groupId>${project.groupId}</groupId>
 43 | 			<artifactId>sansa-inference-flink_${scala.binary.version}</artifactId>
 44 | 		</dependency>
 45 | 
 46 | 		<!-- SANSA Query -->
 47 | 
 48 | 		<!-- SANSA ML -->
 49 | 		<dependency>
 50 | 			<groupId>${project.groupId}</groupId>
 51 | 			<artifactId>sansa-ml-flink_${scala.binary.version}</artifactId>
 52 | 		</dependency>
 53 | 
 54 | 		<!-- Scala -->
 55 | 		<dependency>
 56 | 			<groupId>org.scala-lang</groupId>
 57 | 			<artifactId>scala-library</artifactId>
 58 | 		</dependency>
 59 | 
 60 | 		<!-- Apache Flink -->
 61 | 		<dependency>
 62 | 			<groupId>org.apache.flink</groupId>
 63 | 			<artifactId>flink-scala_${scala.binary.version}</artifactId>
 64 | 		</dependency>
 65 | 		<dependency>
 66 | 			<groupId>org.apache.flink</groupId>
 67 | 			<artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
 68 | 		</dependency>
 69 | 		<dependency>
 70 | 			<groupId>org.apache.flink</groupId>
 71 | 			<artifactId>flink-clients_${scala.binary.version}</artifactId>
 72 | 		</dependency>
 73 | 
 74 | 		<!-- Logging -->
 75 | 		<dependency>
 76 | 			<groupId>com.typesafe.scala-logging</groupId>
 77 | 			<artifactId>scala-logging_${scala.binary.version}</artifactId>
 78 | 		</dependency>
 79 | 
 80 | 		<!-- Scopt -->
 81 | 		<dependency>
 82 | 			<groupId>com.github.scopt</groupId>
 83 | 			<artifactId>scopt_${scala.binary.version}</artifactId>
 84 | 		</dependency>
 85 | 
 86 | 		<!-- Test -->
 87 | 		<dependency>
 88 | 			<groupId>junit</groupId>
 89 | 			<artifactId>junit</artifactId>
 90 | 			<scope>test</scope>
 91 | 		</dependency>
 92 | 		<dependency>
 93 | 			<groupId>org.specs2</groupId>
 94 | 			<artifactId>specs2-core_${scala.binary.version}</artifactId>
 95 | 			<scope>test</scope>
 96 | 		</dependency>
 97 | 		<dependency>
 98 | 			<groupId>org.scalatest</groupId>
 99 | 			<artifactId>scalatest_${scala.binary.version}</artifactId>
100 | 			<scope>test</scope>
101 | 		</dependency>
102 | 	</dependencies>
103 | 
104 | 	<build>
105 | 		<sourceDirectory>src/main/scala</sourceDirectory>
106 | 		<testSourceDirectory>src/test/scala</testSourceDirectory>
107 | 		<plugins>
108 | 			<plugin>
109 | 				<groupId>org.apache.maven.plugins</groupId>
110 | 				<artifactId>maven-compiler-plugin</artifactId>
111 | 			</plugin>
112 | 			<plugin>
113 | 				<groupId>net.alchim31.maven</groupId>
114 | 				<artifactId>scala-maven-plugin</artifactId>
115 | 				<executions>
116 | 					<execution>
117 | 						<goals>
118 | 							<goal>compile</goal>
119 | 							<goal>testCompile</goal>
120 | 						</goals>
121 | 					</execution>
122 | 				</executions>
123 | 			</plugin>
124 | 			<plugin>
125 | 				<groupId>org.apache.maven.plugins</groupId>
126 | 				<artifactId>maven-shade-plugin</artifactId>
127 | 				<executions>
128 | 					<execution>
129 | 						<phase>package</phase>
130 | 						<goals>
131 | 							<goal>shade</goal>
132 | 						</goals>
133 | 						<configuration>
134 | 							<artifactSet>
135 | 								<excludes>
136 | 									<exclude>com.esotericsoftware.kryo:kryo</exclude>
137 | 									<exclude>com.esotericsoftware.minlog:minlog</exclude>
138 | 									<exclude>com.fasterxml.jackson.core:jackson*</exclude>
139 | 									<exclude>com.github.scopt:scopt_2.11</exclude>
140 | 									<exclude>com.google.code.findbugs:jsr305</exclude>
141 | 									<exclude>com.google.protobuf:protobuf-java</exclude>
142 | 									<exclude>com.jamesmurty.utils:java-xmlbuilder</exclude>
143 | 									<exclude>com.jcraft:jsch</exclude>
144 | 									<exclude>com.sun.jersey:jersey-core</exclude>
145 | 									<exclude>com.thoughtworks.paranamer:paranamer</exclude>
146 | 									<exclude>com.typesafe:config</exclude>
147 | 									<exclude>com.typesafe.akka:*</exclude>
148 | 									<exclude>com.twitter:chill_2.11</exclude>
149 | 									<exclude>commons-beanutils:commons-beanutils-bean-collections</exclude>
150 | 									<exclude>commons-cli:commons-cli</exclude>
151 | 									<exclude>commons-codec:commons-codec</exclude>
152 | 									<exclude>commons-configuration:commons-configuration</exclude>
153 | 									<exclude>commons-daemon:commons-daemon</exclude>
154 | 									<exclude>commons-digester:commons-digester</exclude>
155 | 									<exclude>commons-el:commons-el</exclude>
156 | 									<exclude>commons-lang:commons-lang</exclude>
157 | 									<exclude>commons-logging:commons-logging</exclude>
158 | 									<exclude>commons-net:commons-net</exclude>
159 | 									<exclude>commons-collections:commons-collections</exclude>
160 | 									<exclude>io.netty:netty*</exclude>
161 | 									<exclude>io.dropwizard.metrics:metrics*</exclude>
162 | 									<exclude>javax.activation:activation</exclude>
163 | 									<exclude>javax.servlet:servlet-api</exclude>
164 | 									<exclude>javax.xml.bind:jaxb-api</exclude>
165 | 									<exclude>javax.xml.stream:stax-api</exclude>
166 | 									<exclude>jline:jline</exclude>
167 | 									<exclude>junit:junit</exclude>
168 | 									<exclude>org.apache.avro:avro</exclude>
169 | 									<exclude>org.apache.commons:commons-compress</exclude>
170 | 									<exclude>org.apache.commons:commons-lang3</exclude>
171 | 									<exclude>org.apache.commons:commons-math3</exclude>
172 | 									<exclude>org.apache.flink:*</exclude>
173 | 									<exclude>org.apache.zookeeper:zookeeper</exclude>
174 | 									<exclude>org.clapper:grizzled-slf4j_2.11</exclude>
175 | 									<exclude>org.codehaus.jackson:jackson-*</exclude>
176 | 									<exclude>org.javassist:javassist</exclude>
177 | 									<exclude>org.mortbay.jetty:jetty-util</exclude>
178 | 									<exclude>org.objenesis:objenesis</exclude>
179 | 									<exclude>org.scala-lang:*</exclude>
180 | 									<exclude>org.uncommons.maths:uncommons-maths</exclude>
181 | 									<exclude>org.xerial.snappy:snappy-java</exclude>
182 | 									<exclude>xmlenc:xmlenc</exclude>
183 | 								</excludes>
184 | 							</artifactSet>
185 | 							<filters>
186 | 								<filter>
187 | 									<artifact>*:*</artifact>
188 | 									<excludes>
189 | 										<exclude>META-INF/*.SF</exclude>
190 | 										<exclude>META-INF/*.DSA</exclude>
191 | 										<exclude>META-INF/*.RSA</exclude>
192 | 									</excludes>
193 | 								</filter>
194 | 							</filters>
195 | 							<createDependencyReducedPom>false</createDependencyReducedPom>
196 | 						</configuration>
197 | 					</execution>
198 | 				</executions>
199 | 			</plugin>
200 | 			<plugin>
201 | 				<groupId>org.scalastyle</groupId>
202 | 				<artifactId>scalastyle-maven-plugin</artifactId>
203 | 			</plugin>
204 | 		</plugins>
205 | 	</build>
206 | 
207 | 	<profiles>
208 | 		<profile>
209 | 			<!-- A profile that does everything correctly: We set the Flink dependencies 
210 | 				to provided -->
211 | 			<id>build-jar</id>
212 | 			<activation>
213 | 				<activeByDefault>false</activeByDefault>
214 | 			</activation>
215 | 			<dependencies>
216 | 				<dependency>
217 | 					<groupId>org.apache.flink</groupId>
218 | 					<artifactId>flink-scala_${scala.binary.version}</artifactId>
219 | 					<scope>provided</scope>
220 | 				</dependency>
221 | 				<dependency>
222 | 					<groupId>org.apache.flink</groupId>
223 | 					<artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
224 | 					<scope>provided</scope>
225 | 				</dependency>
226 | 				<dependency>
227 | 					<groupId>org.apache.flink</groupId>
228 | 					<artifactId>flink-clients_${scala.binary.version}</artifactId>
229 | 					<scope>provided</scope>
230 | 				</dependency>
231 | 			</dependencies>
232 | 		</profile>
233 | 	</profiles>
234 | 	<repositories>
235 | 		<repository>
236 | 			<id>maven.aksw.snapshots</id>
237 | 			<name>AKSW Snapshot Repository</name>
238 | 			<url>http://maven.aksw.org/archiva/repository/snapshots</url>
239 | 			<releases>
240 | 				<enabled>false</enabled>
241 | 			</releases>
242 | 			<snapshots>
243 | 				<enabled>true</enabled>
244 | 			</snapshots>
245 | 		</repository>
246 | 	</repositories>
247 | 
248 | </project>
249 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    Copyright, Smart Data Analytics Research Group (http://sda.tech)
179 | 
180 |    Licensed under the Apache License, Version 2.0 (the "License");
181 |    you may not use this file except in compliance with the License.
182 |    You may obtain a copy of the License at
183 | 
184 |        http://www.apache.org/licenses/LICENSE-2.0
185 | 
186 |    Unless required by applicable law or agreed to in writing, software
187 |    distributed under the License is distributed on an "AS IS" BASIS,
188 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
189 |    See the License for the specific language governing permissions and
190 |    limitations under the License.
191 | 


--------------------------------------------------------------------------------
/sansa-examples-spark/src/main/resources/datalake/data/product.csv/part-00000-62a2679b-bfab-43fe-80d3-d775b14cf02c.csv:
--------------------------------------------------------------------------------
 1 | nr,label,comment,producer,propertyNum1,propertyNum2,propertyNum3,propertyNum4,propertyNum5,propertyNum6,propertyTex1,propertyTex2,propertyTex3,propertyTex4,propertyTex5,propertyTex6,publisher,publishDate
 2 | 1,'manner gatemen','lordlings dialyzed hoardings palmitate resisters redesigned trowing fledging disinters occasionally refry objective comedown senders attendance calculous redux zed bidets subacute swinks berhymed pumping overassured outrush corteges chitters civilest chiffonniers kimonos protects epizootic centimos dismast boomage issues aggrieves sociably ammoniacs polliwogs labyrinths infatuates whiteout dissentients newmown flunkey titillated caduceus rediscovered breaststrokes schillings endorsement cheerleaders nonconcurrent intoned outpaces inkle superpowers habaneras subsoils paramours laughed',1,831,312,735,-1,150,-1,'guzzling jillion psychotherapists substantiation nonuple deluded snowmelt interlards overrefinement annoyed stuntedness calcimining stereophonically','recommendation embezzler reconviction misproportions discountenances callings defacers crummiest triglyceride','decentralizations impacting promulgations bibliotherapy murexes professorships locomotes durning lyncher spoonier abhorrence assize goglets','distracts universally trashily enervator',null,null,1,2000-11-01
 3 | 2,'coterie','naughtiness illuminating careerers computerese brakeless mesozoa lineate fulminant batholiths mohawks exhalation paraguayan alcaldes foulings primordially almightily placed flukey improvises pommelled sententious bookmark rashers truces mordanted shunter praxeological causable compassed decertified transubstantiation automatize boxful befouling tragedienne visiting alliums triangulates hounders compressively camphorates mammons armories scrapes hanger nucleation loftless refractoriness nonhabitual paperer aridness jingliest sportswriters gained efficiently marshals tomogram tambura pureeing doughty compromised antineutrinos revertible picadors oddballs hominies drek irradiations fearlessness cortin hussy museful pupfish bulletproofing geminates nacre subsistence presifted abhors whereat wanes mooing refused biodegradability oghamic stouter venosities recopying supplantation buxomly foregoers pathologist welches comicality manifestos untangles mongols sluices demits inventers entitled taxability fancifulness claimed gastroenterology geotropically glenwood alack autochthonous nabob preempts alternativeness xviii fruiter deist electorally cooker voce abbeys composts jugsful glowing basset worshipfully rebait bushwhacker implorer jackknifing paraguayan enrolls blazonry dendrological pavilioned cully epistles foreshorten couth usurps legibilities yammered somnolently',1,1891,1040,1731,-1,992,-1,'scalded decoct practitioners infolds levered quartan calcined untransferable auditoria','charred payment linoleums cowsheds preconceive undergrounder nosier sawhorse coerces assn turgidities venins obliged homogenize','componential redemonstrates dewberries pearlers triplicates planked goddaughters largeness citator',null,'palpal thoroughly enactive swimmiest syrups',null,1,2005-03-08
 4 | 3,'ahchoo','chanceman ventrals phlegmy vower matureness fictionalize iliads gasman tumours afeared tuneably insurrectionaries enfolds cisterns adduction leafage maharajas prancingly mannerless vitrines radiocast insulates stilbestrol compartmented appearers undercurrents gunnel hopes launchings deluder overemotional unfolder bioflavonoid snorter thawed instinctively halidome classed towages unctions carcase recollects germanely disputation ciscoes unsettle calculableness artiest disprovable soporose rankly fuguing pox recontamination windiness hypothermic chutzpahs lilliput cognoscing divestitive misbehaves culpableness mutilators biogeographic inmesh flumed apeak doweling reheels unshut avascular redirected wiverns graveled quae pithiest unities monks boniness dancing gleemen unimpeachably reaming trekked calculabilities sphinxes protozoans toasty understanding elks ultrasonography dreggier slashingly pimpernels survivors cultivating swaybacks immodestly pennsylvanian vitrine unpronounceable gallicism laggers undefeated deers heinousness cocobolo steadily gendarmerie blatantly spinosely totalizes invited preyers bandoleers airmailed quotationally intercuts updates digests accusatorially dusking stoppages littles cadaveric holsteins repertoires',1,594,434,227,-1,-1,-1,'whirs radiation overman violative adulators benumbs disaffectedly cuttages bluebeard','vichies resituates breads visard unfought adjunctly bractlets foamed durability amends retailer creaking inseminator sedatest rodeos','unreels voicer acidifiers shredding fistula uniformer chivies immunological grimacer spoilt admiringly',null,'hyperbolas knouted eulogists',null,1,2001-05-18
 5 | 4,'reexhibit wrang tarts','bihourly prosiest matrixes jaggedest violinists dins archipelagos heighths limber azons acceptee husbander ashram relativeness grannies rectangles unearthing conies capered toeshoe fervour domination impishly satirically photonegative kaleidoscopic morticians eyewaters rapturousness animater granting twier geosynclinal relearns cosmopolis maizes gemmy unmixt mumbler laundries selenography unpin findings mistrusts porgy discontentedly bolter hulkier windily whoremaster sovietizes expellees reordain fondness nightspots boggier microvasculature fellatee holders inebrious upping mucking yugoslavs blondness appal premenstrually fiddled disfavors sketchers inhumanities tightest unsatisfied cherubically stonefly mentally buddhists atelier eighteens smartly retaliates marbleizing trappings egomaniacal undercook roadstead reascend dekaliter grinning retakers paintbrushes cichlid ashlars conventual smoothen gombos appurtenance botchy nonchalantly atremble thieve overflows daimons enwinding crystalloidal reproval nontaxable crossbars troupes photoreception tortuousness caromed creamier sphenoid authors nabbing mistimes enactment agoraphobic footslog boycotting overabounding cinerary vixenishly rearwards eczematous chuckler farming drudging ruinable soothest highted incontestabilities archdeacons agendums somatotypology garnished headband curves mows',1,940,290,1788,-1,-1,-1,'skinfuls uncertainty craving incas maenades fitfulness mas creditors fixity churchlier assumed routines','quartered mishmosh booms globoid syndicating orcas','visas deprecatingly conceptualist reactivation comebacks matchmaking disinformation muffled granulator basketries warthogs childbeds empowers',null,null,'pantomimist riverbanks loosens foretime managers updatable unzip bayonets bruins abstemiousness advocates destitutely increasing',1,2006-09-01
 6 | 5,'vacillator mortifying','workaholics situating repartees mobilizers anorak magdalen inattentiveness filmstrips gusting runways tressiest obeyable lapps mooch defamatory whirs stealer pyramided motivates lapidates syllables showily orientates unhat smelting efficiencies calumniation adolescents loyalest steamboats excitability platy americanist photomicrograph wantonness parabolas massacring heatless episodes hopsack currying kb miscalled unweakened binned compactest pansophies palomino enureses ternate plottage brightly alkalinization underclerk fishmeal moulter valse kaleyards thaws hedonisms veiled tapes recension concusses enlargements mislabel requiems epitomizes clipsheets quixotries uplinking handsprings flexors concurrences snarls postulator involutions cortically upshifts dinette footfalls untaxed personification subdirectors fleabanes greases paradoxical discusses nondiscriminating heaped aroused machining broccolis synergist toppled techie scarlets scattiest magnetometer wiling pretensions impenetrableness argils feedbox halvahs sachems snobbiest impetigos hawknose zulus inadequately potbellied fetters sensuality revalidate elflock bucketful',1,834,450,736,202,27,-1,'egoisms welterweight friendship topsoiling securer reevaluated proclaimer murker awaiting ferried quinone dengue toxoids','unacceptance antibody reinflamed rehardens','steeper aerifies unum overbalanced disciplined vulnerably achromat gustative celebrants nonfreezing kindnesses fresher analogs viviparous cosets',null,null,null,1,2001-09-16
 7 | 6,'desolates waging reveilles','poleward sagest impellers enjoyed tailpipes raying prevued flickering inshrining subscript resubmit grandstander pellagras cunts paleographer obsessiveness windlasses songfest inartistically epergnes guarani sonarman hassocks yardages alterability tetralogy treaded elevators anathematize surveyed evolvements juicier suffered commonweals wallpapering ionicity inaptness refreezes pepsine shammes reappraises squishiest ethicists bedraggling shivering crimsoned bondable archness occulter overdramatizes liverishness dandles boosting embalms restamp chaining strugglers skullduggeries hums manger pomades alternately pressurized gamed materfamilias supervenes darkest finer seels triste pronators giddiest tensing frauds vapouring sucroses emulsifiable cesarian octagons stoically suppurative',1,374,536,1567,813,-1,-1,'catarrhs triadisms foreseen wennier scurviest focuser recrowned cropped signatures plotting asks disembarkations rickettsiae clubmen reinduction','chilblains intertribal balsamic exotism reintroduced charade cubage oarless humanoids observingly mudcaps ubiquities decapitating','digged glockenspiels hearted sapid weeny colliers cylindrical treeing ferries proselyte victorians signifies','limeades iterances expressionist sculked supplanter commitment indemnifying sudoral calcifying captivator versified prating pithier daily nearsightedly',null,null,1,2004-07-20
 8 | 7,'tither pettiness','buskin recompensable capacitances bootee lockets enticement disservices strategists licensors kennelled barding autocrats lathworks blueings tiresomely outdistanced gothicize ejectable materiels homogeneously paintiest sneezers workboat interphone ascii unconnected instrumentalist topographical disgruntling pederast sceptring racier evens trimeter banes misdemeanor webless rehinge mitigative defamers naturalist accustoms reclean valiancies pilled bearcats tents demultiplexes skulking publicized typecase supervisors escapeway recoinage blinding execs conformism treasures durums rondelle drawing oppressing bores coplots monogamousness zoospores unlikelihood preengaged overhands limekiln penetrator ultrasound lymphocytic radiolucencies chefs feoffment conventionalism cascading machos abundantly godchild frequented misplacing repairers surfy antedating chunter cancelers photoflash mistaught testacies',1,1900,774,66,-1,-1,-1,'solidest incarnation arrayers gruelingly honorands slobs','builders preadjusting cpl fosterage trulls fakers toepiece maximally bouldery hampshirites','dermatologies flopover noctambulation frizzler submissively reconsolidate clapping enunciators championed nigglingly tongers liquoring reminder podiatrists tussocks',null,null,null,1,2004-09-03
 9 | 8,'resettling uncoagulated lowish','reprice renovating chevrolets refolds fantastical polarity ennuis franchisers undiluted macaroons overexcited habitability reaching ethers gratias biers wretchedly warps poetess forthrightness kinaesthetically lukewarmly decorators viselike ionized pumpernickel durations legitimation hazings protracting beechier monopolizes yids flavonols scaler letup condignly gipsies spoonily forging workups drapers oculist aldehydes subassociation forejudge graciousness carromed mal alpinisms alongshore proceeded institutionally ultraconservatives harpooner vises puffers trainful bordellos wayfarings cumquats jilter strewed imputing sibilantly venosities nosiness wharfs comediennes reprices manias trichroic valvar vaporizing obsolescently feedboxes radiophone antibacterial singlets soaper deists untangle undergraduates kwacha discombobulation chargers slumping servantship vittled jadishly superabundant gibbeting signatary frypan horsehides nonclassical sharecropped friendless flushers corrupted utilized emigres acolytes shouldering rassling surfs carvers braw weatherglasses soakers haggis hoisted cowlicks middlemost caroused reattach tenably tympanic binder foresters agamic busboys',1,1743,136,867,-1,-1,-1,'unrestored kneeler chaplet newts deckle vegas overeducating','replicates corks cinematheques charmers licitation geezer recombines admitters underpasses nighty ensnarled pardonable imperialness','cowiest crimpers unmuzzles repacify poilu',null,null,null,1,2003-07-24
10 | 9,'procreators taiwanese antigene','candors dictaphone youngsters stet millers impecuniousness likability comparts endemics reinsert clerkship halfbeak expansionary metaphysician effrontery helpfully hogs secures micks tacks oilstone guises clangs bendable maladministrative mopped halitoses quirts amortizable buggered dewclaw loners hydrozoon zinging reequips saddlebags exorcises laudability topside novelle chemoreceptor temporized bloodstreams housemaids sharks bannock resuscitative motets gruels choosier apologizes anticyclonic equalities struggled crappiness reuniter immixes ozonizes quashes unlivable backslider feminizing bretons overhauling streaky forejudge weepers dourness viceless hies tubercular refurnish taggers inquisitional rebecs mouthwash deaconing metastasis weans adjectivally troths apperceptive nonprotectively beckoning commiseration guttersnipes doctoring gymkhana sheepmen apologizers ortolan toadyish quatrains revalue straying restacks frivolously pierced fauces babus novelistic gills regresses fossilization lipless sulphurize ohmage procrastinator alienator travelled rivets thrivers pingrasses steps campier mismarks plushier proteinaceous bunco snoozes glitters finical simulants hippies skims engrave soundtracks huffish nonnegotiable eclectically hatsful shaftings disobeyers identifies wingspread',1,1504,1010,1517,90,1980,-1,'exceptionally replan aiming bedstraws tragically pollutants prefabricating isolation sextets rewarders approvement productive','salaamed minatory ruleless microsurgeons circumambulate sapphisms nonsexually adjoined noticing deescalated habiting touristy unequalled lucidities discords','matureness hydrocephaloid certifies undyingly doylies spellbinds prenatally govt unpolarized togae overjoying shirts reediness ecclesiastics awakens','arrowing corncake crumbliness recommits viperish monkery','unappetizingly quarrier speaks malthus overlook fatted archaists refreshed gayness',null,1,2005-07-22
11 | 10,'lignites rallying specters','filaree cirque vibrations leukemoid enquirer drossier prescience housewifeliness timed contentiousness constricting scramblers shivarees foilable dreidl tinfuls foolhardier downloading stuccos interpersonal doggish mislabel lowered solubility beguiler aboil slavey strolling prorating dimming descents benthos viced bruising hetero romps polymerically undecided runners libidinal fustic escapements obols sandlots channelizes notational gongs elks misspellings heedfully accelerative labella phlebotomy preeners diviners hugeness zilches amortizable roughness pullers remunerates doomsdays brisks coordinately unequaled stopcocks consistently bafflers drypoints nannies vialing trolleys ologist uncork rigatonis airhead remodification sereneness playsuit microtomy skewness reelecting prevailingly musicians sightings bylined reconveyed preconception overanxious',1,133,141,1580,1194,525,-1,'sinfully scampi slaveries mishandles ailment waggish tonicity ablutions randomizes innervations','healthiness lights disassociates spinel countenancing expedites roped helloed querists halloo assignment rendezvouses dentistries','climaxed filmlands frills poulticing nakedest jabs','chlorites amused psychologists cloches adducers requisitioner gapes tessellation consecratory stilting adders unclothes flabbiest detrain gardening','eyedroppers levied carroms uncourageous tormented destining',null,1,2005-09-29
12 | 


--------------------------------------------------------------------------------