├── sansa-examples-flink ├── config │ ├── csswrapper │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── Dockerfile │ │ ├── scripts │ │ │ └── inject-etc-hosts.sh │ │ └── frontend │ │ │ ├── css │ │ │ ├── bde-hdfs.css │ │ │ ├── bde-hadoop.css │ │ │ └── bde-flink.css │ │ │ └── default.conf │ ├── integrator │ │ └── user-interfaces │ └── hadoop │ │ └── hadoop.env ├── Dockerfile ├── .gitignore ├── src │ └── main │ │ ├── resources │ │ ├── Clustering_sampledata.nt │ │ ├── ont_functional.owl │ │ └── ont_manchester.owl │ │ └── scala │ │ └── net │ │ └── sansa_stack │ │ └── examples │ │ └── flink │ │ ├── rdf │ │ ├── TripleReader.scala │ │ ├── TripleWriter.scala │ │ ├── RDFStats.scala │ │ └── TripleOps.scala │ │ ├── ml │ │ └── clustering │ │ │ └── RDFByModularityClustering.scala │ │ ├── owl │ │ └── OWLReaderDataSet.scala │ │ └── inference │ │ └── RDFGraphInference.scala ├── README.md ├── docker-compose.yml └── pom.xml ├── sansa-examples-spark ├── src │ └── main │ │ ├── resources │ │ ├── datalake │ │ │ ├── data │ │ │ │ ├── offer.csv │ │ │ │ │ ├── ._SUCCESS.crc │ │ │ │ │ ├── part-00001-86272586-5266-423f-b936-829019bf5d8c.csv │ │ │ │ │ ├── .part-00001-86272586-5266-423f-b936-829019bf5d8c.csv.crc │ │ │ │ │ └── .part-00000-86272586-5266-423f-b936-829019bf5d8c.csv.crc │ │ │ │ ├── person.csv │ │ │ │ │ ├── ._SUCCESS.crc │ │ │ │ │ ├── part-00001-212c906c-dbf7-4fb8-8f79-a254f2d587f5.csv │ │ │ │ │ ├── .part-00001-212c906c-dbf7-4fb8-8f79-a254f2d587f5.csv.crc │ │ │ │ │ ├── .part-00000-212c906c-dbf7-4fb8-8f79-a254f2d587f5.csv.crc │ │ │ │ │ └── part-00000-212c906c-dbf7-4fb8-8f79-a254f2d587f5.csv │ │ │ │ ├── product.csv │ │ │ │ │ ├── ._SUCCESS.crc │ │ │ │ │ ├── part-00001-62a2679b-bfab-43fe-80d3-d775b14cf02c.csv │ │ │ │ │ ├── .part-00001-62a2679b-bfab-43fe-80d3-d775b14cf02c.csv.crc │ │ │ │ │ ├── .part-00000-62a2679b-bfab-43fe-80d3-d775b14cf02c.csv.crc │ │ │ │ │ └── part-00000-62a2679b-bfab-43fe-80d3-d775b14cf02c.csv │ │ │ │ ├── review.csv │ │ │ │ │ ├── ._SUCCESS.crc │ │ │ │ │ ├── part-00001-0e3df86e-9a8f-4cf3-837d-dfc1c92b5aba.csv │ │ │ │ │ ├── .part-00001-0e3df86e-9a8f-4cf3-837d-dfc1c92b5aba.csv.crc │ │ │ │ │ └── .part-00000-0e3df86e-9a8f-4cf3-837d-dfc1c92b5aba.csv.crc │ │ │ │ └── producer.csv │ │ │ │ │ └── producer.csv │ │ │ ├── queries │ │ │ │ └── Q1.sparql │ │ │ ├── config │ │ │ └── mappings.ttl │ │ ├── BorderFlow_Sample1.txt │ │ ├── metrics.conf │ │ ├── log4j.properties │ │ ├── Clustering_sampledata.nt │ │ ├── ont_functional.owl │ │ └── ont_manchester.owl │ │ └── scala │ │ └── net │ │ └── sansa_stack │ │ └── examples │ │ └── spark │ │ ├── rdf │ │ ├── TripleReader.scala │ │ ├── TripleWriter.scala │ │ ├── RDFStats.scala │ │ ├── PageRank.scala │ │ ├── TripleOps.scala │ │ └── RDFQualityAssessment.scala │ │ ├── query │ │ ├── HDTQuery.scala │ │ ├── Semantic.scala │ │ ├── DataLake.scala │ │ ├── Sparklify.scala │ │ └── GraphQuery.scala │ │ ├── ml │ │ ├── clustering │ │ │ ├── BorderFlowClustering.scala │ │ │ ├── SilviaClustering.scala │ │ │ ├── RDFByModularityClustering.scala │ │ │ └── RDFGraphPIClustering.scala │ │ ├── kernel │ │ │ └── RDFGraphKernel.scala │ │ ├── mining │ │ │ └── MineRules.scala │ │ ├── kge │ │ │ └── CrossValidation.scala │ │ └── outliers │ │ │ └── anomalydetection │ │ │ └── AnomalyDetection.scala │ │ ├── owl │ │ ├── OWLReaderRDD.scala │ │ └── OWLReaderDataset.scala │ │ └── inference │ │ ├── axioms │ │ └── RDFGraphInference.scala │ │ └── triples │ │ └── RDFGraphInference.scala ├── config │ ├── csswrapper │ │ ├── Makefile │ │ ├── Dockerfile │ │ ├── scripts │ │ │ └── inject-etc-hosts.sh │ │ └── frontend │ │ │ ├── bde-css │ │ │ ├── bde-hdfs.css │ │ │ ├── bde-spark-master.css │ │ │ └── bde-hadoop.css │ │ │ └── default.conf │ ├── integrator │ │ └── user-interfaces │ └── hadoop │ │ └── hadoop.env ├── .gitignore ├── Dockerfile ├── docker-compose-sansa-examples.yml ├── README.md └── docker-compose.yml ├── .travis.yml ├── .gitignore ├── run-examples-wip.sh ├── README.md ├── pom.xml └── LICENSE /sansa-examples-flink/config/csswrapper/.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/data/offer.csv/._SUCCESS.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/data/person.csv/._SUCCESS.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/data/product.csv/._SUCCESS.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/data/review.csv/._SUCCESS.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /sansa-examples-flink/config/csswrapper/Makefile: -------------------------------------------------------------------------------- 1 | hosts: 2 | bash scripts/inject-etc-hosts.sh 3 | -------------------------------------------------------------------------------- /sansa-examples-spark/config/csswrapper/Makefile: -------------------------------------------------------------------------------- 1 | hosts: 2 | bash scripts/inject-etc-hosts.sh 3 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/data/offer.csv/part-00001-86272586-5266-423f-b936-829019bf5d8c.csv: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/data/person.csv/part-00001-212c906c-dbf7-4fb8-8f79-a254f2d587f5.csv: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/data/product.csv/part-00001-62a2679b-bfab-43fe-80d3-d775b14cf02c.csv: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/data/review.csv/part-00001-0e3df86e-9a8f-4cf3-837d-dfc1c92b5aba.csv: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/data/offer.csv/.part-00001-86272586-5266-423f-b936-829019bf5d8c.csv.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/data/person.csv/.part-00001-212c906c-dbf7-4fb8-8f79-a254f2d587f5.csv.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/data/product.csv/.part-00001-62a2679b-bfab-43fe-80d3-d775b14cf02c.csv.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/data/review.csv/.part-00001-0e3df86e-9a8f-4cf3-837d-dfc1c92b5aba.csv.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/BorderFlow_Sample1.txt: -------------------------------------------------------------------------------- 1 | 52 1412 2 | 53 2542 3 | 48 52 4 | 47 2385 5 | 46 46 6 | 48 1412 7 | 48 46 8 | 4315 48 9 | 481 1412 10 | 1412 52 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: scala 2 | sudo: false 3 | cache: 4 | directories: 5 | - $HOME/.m2 6 | scala: 7 | - 2.11.11 8 | jdk: 9 | - openjdk8 10 | script: 11 | - mvn scalastyle:check 12 | - mvn clean install -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/data/offer.csv/.part-00000-86272586-5266-423f-b936-829019bf5d8c.csv.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SANSA-Stack/Archived-SANSA-Examples/HEAD/sansa-examples-spark/src/main/resources/datalake/data/offer.csv/.part-00000-86272586-5266-423f-b936-829019bf5d8c.csv.crc -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/data/person.csv/.part-00000-212c906c-dbf7-4fb8-8f79-a254f2d587f5.csv.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SANSA-Stack/Archived-SANSA-Examples/HEAD/sansa-examples-spark/src/main/resources/datalake/data/person.csv/.part-00000-212c906c-dbf7-4fb8-8f79-a254f2d587f5.csv.crc -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/data/review.csv/.part-00000-0e3df86e-9a8f-4cf3-837d-dfc1c92b5aba.csv.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SANSA-Stack/Archived-SANSA-Examples/HEAD/sansa-examples-spark/src/main/resources/datalake/data/review.csv/.part-00000-0e3df86e-9a8f-4cf3-837d-dfc1c92b5aba.csv.crc -------------------------------------------------------------------------------- /sansa-examples-spark/.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | pom.xml.tag 3 | pom.xml.releaseBackup 4 | pom.xml.versionsBackup 5 | pom.xml.next 6 | release.properties 7 | dependency-reduced-pom.xml 8 | buildNumber.properties 9 | *.iml 10 | /graph 11 | *.idea 12 | .cache-main 13 | .classpath 14 | .project 15 | .settings 16 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/data/product.csv/.part-00000-62a2679b-bfab-43fe-80d3-d775b14cf02c.csv.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SANSA-Stack/Archived-SANSA-Examples/HEAD/sansa-examples-spark/src/main/resources/datalake/data/product.csv/.part-00000-62a2679b-bfab-43fe-80d3-d775b14cf02c.csv.crc -------------------------------------------------------------------------------- /sansa-examples-flink/config/csswrapper/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nginx 2 | 3 | MAINTAINER "Aad Versteden 4 | MAINTAINER "Ivan Ermilov " 5 | MAINTAINER "Gezim Sejdiu " 6 | 7 | COPY frontend/default.conf /etc/nginx/conf.d/default.conf 8 | COPY frontend/css /data/bde-css 9 | -------------------------------------------------------------------------------- /sansa-examples-spark/config/csswrapper/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nginx 2 | 3 | MAINTAINER "Aad Versteden 4 | MAINTAINER "Ivan Ermilov " 5 | MAINTAINER "Gezim Sejdiu " 6 | 7 | COPY frontend/default.conf /etc/nginx/conf.d/default.conf 8 | COPY frontend/bde-css /data/bde-css 9 | -------------------------------------------------------------------------------- /sansa-examples-flink/config/csswrapper/scripts/inject-etc-hosts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "# SANSA-Examples-Flink-data-integrator-ui" | sudo tee -a /etc/hosts 4 | echo "127.0.0.1 hdfs.demo.sansa-stack.local hue.demo.sansa-stack.local flink-master.demo.sansa-stack.local flink-worker.demo.sansa-stack.local demo.sansa-stack.local" | sudo tee -a /etc/hosts; 5 | 6 | 7 | -------------------------------------------------------------------------------- /sansa-examples-spark/config/csswrapper/scripts/inject-etc-hosts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "# SANSA-Examples-Spark-data-integrator-ui" | sudo tee -a /etc/hosts 4 | echo "127.0.0.1 hdfs.demo.sansa-stack.local hue.demo.sansa-stack.local spark-master.demo.sansa-stack.local spark-worker.demo.sansa-stack.local demo.sansa-stack.local " | sudo tee -a /etc/hosts; 5 | 6 | 7 | -------------------------------------------------------------------------------- /sansa-examples-flink/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM bde2020/flink-maven-template:1.1.3-hadoop2.7 2 | 3 | MAINTAINER Gezim Sejdiu 4 | 5 | ENV FLINK_APPLICATION_JAR_NAME sansa-examples-flink-1.1-with-dependencies 6 | ENV FLINK_APPLICATION_MAIN_CLASS net.sansa_stack.examples.flink.rdf.TripleReader 7 | ENV FLINK_APPLICATION_ARGS "hdfs://namenode:8020/user/root/input/rdf.nt" 8 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/data/producer.csv/producer.csv: -------------------------------------------------------------------------------- 1 | nr,label,comment,homepage,country,publisher,publishDate 2 | 1,"enzymologist neb falsehoods","smashes leavening beauticians novitiates peaks nonhistoric fluorinations seductresses promotions corresponding denuder wispier laboriousness mechanisms skepsis tulips barstools demobs bandmasters pallbearer","http://www.Producer1.com/","DE",1,"2003-06-15" 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.log 3 | 4 | # sbt specific 5 | .cache 6 | .settings 7 | .history 8 | .lib/ 9 | .project 10 | dist/* 11 | target/ 12 | lib_managed/ 13 | src_managed/ 14 | project/boot/ 15 | project/plugins/project/ 16 | 17 | # Scala-IDE specific 18 | .scala_dependencies 19 | .worksheet 20 | .idea/ 21 | *.iml 22 | 23 | deptree.txt 24 | scalastyle-output.xml 25 | 26 | 27 | # Files generated for spark runs 28 | sansa-examples-spark/data/ 29 | 30 | -------------------------------------------------------------------------------- /sansa-examples-spark/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM bde2020/spark-java-template:2.0.1-hadoop2.7 2 | 3 | MAINTAINER Gezim Sejdiu 4 | 5 | ENV SPARK_APPLICATION_JAR_NAME sansa-examples-spark-1.1-with-dependencies 6 | ENV SPARK_APPLICATION_MAIN_CLASS net.sansa_stack.examples.spark.rdf.TripleReader 7 | ENV SPARK_APPLICATION_ARGS "hdfs://namenode:8020/user/hue/input/rdf.nt hdfs://namenode:8020/user/hue/output/result.nt" 8 | 9 | ENV HDFS_URL=hdfs://hdfs:9000 10 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/metrics.conf: -------------------------------------------------------------------------------- 1 | # This configuration file contains the settings for the assessment. 2 | rdf.qualityassessment.dataset.prefixes=["http://dbpedia.org/"] 3 | 4 | rdf.qualityassessment.dataset.subject="http://dbpedia.org/ontology/Person" 5 | rdf.qualityassessment.dataset.property="http://commons.dbpedia.org/property/source" 6 | 7 | rdf.qualityassessment.dataset.lowerBound=0.1 8 | rdf.qualityassessment.dataset.upperBound=0.9 9 | 10 | rdf.qualityassessment.dataset.shortUri.threshold = 95 -------------------------------------------------------------------------------- /sansa-examples-spark/docker-compose-sansa-examples.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | sansa-examples: 4 | build: . 5 | hostname: sansa-examples 6 | container_name: sansa-examples 7 | networks: 8 | - hadoop 9 | environment: 10 | - HDFS_URL=hdfs://namenode:8020 11 | - SPARK_APPLICATION_ARGS = $SPARK_APPLICATION_ARGS 12 | - SPARK_APPLICATION_MAIN_CLASS = $SPARK_APPLICATION_MAIN_CLASS 13 | # env_file: 14 | # - ./config/hadoop/hadoop.env 15 | # links: 16 | # - "spark-master" 17 | 18 | networks: 19 | hadoop: 20 | external: true 21 | -------------------------------------------------------------------------------- /sansa-examples-flink/.gitignore: -------------------------------------------------------------------------------- 1 | # use glob syntax. 2 | syntax: glob 3 | *.ser 4 | *.class 5 | *~ 6 | *.bak 7 | #*.off 8 | *.old 9 | 10 | # eclipse conf file 11 | .settings 12 | .classpath 13 | .project 14 | .manager 15 | .scala_dependencies 16 | 17 | # idea 18 | .idea 19 | *.iml 20 | 21 | # building 22 | target 23 | build 24 | null 25 | tmp* 26 | temp* 27 | dist 28 | test-output 29 | build.log 30 | 31 | # other scm 32 | .svn 33 | .CVS 34 | .hg* 35 | 36 | # switch to regexp syntax. 37 | # syntax: regexp 38 | # ^\.pc/ 39 | 40 | #SHITTY output not in target directory 41 | build.log 42 | -------------------------------------------------------------------------------- /run-examples-wip.sh: -------------------------------------------------------------------------------- 1 | LAYER=query 2 | EXAMPLE=Sparklify 3 | 4 | JAR=`ls sansa-examples-spark/target/sansa-examples-spark_*-dist.jar` 5 | BASE_URL="file://"`pwd`"/" 6 | echo "Using jar file $JAR" 7 | echo "Base URL: $BASE_URL" 8 | 9 | spark-submit \ 10 | --class net.sansa_stack.examples.spark.$LAYER.$EXAMPLE \ 11 | --master spark://spark-master:7077 \ 12 | "$JAR" -i "$BASE_URL/sansa-examples-spark/src/main/resources/rdf.nt" \ 13 | 14 | # TODO Validate the output 15 | curl -LH 'Accept: application/sparql-results+json' 'http://localhost:7531/sparql?query=SELECT%20%2A%20%7B%20%3Fs%20%3Fp%20%3Fo%20%7D' 16 | 17 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/data/person.csv/part-00000-212c906c-dbf7-4fb8-8f79-a254f2d587f5.csv: -------------------------------------------------------------------------------- 1 | nr,name,mbox_sha1sum,country,publisher,publishDate 2 | 1,'Ruggiero-Delane','fb3efd92e3c7a8d775a895ba476e11a3e8f3fac','US',1,2008-09-05 3 | 2,'Eyana-Aurelianus','df1cf8e68d49e5b65f1507dbecec6b61e9dc98','JP',1,2008-08-07 4 | 3,'Danijela-Adalbrand','9b9d4b8dcf7ada3c181b4bed1fa3c53d29caf65','US',1,2008-07-21 5 | 4,'Allegra-Walburga','619b2f69a01a7d86c0eca3f5e910c5b559ff3a','RU',1,2008-06-23 6 | 5,'Przemek-Berte','c3b1c82511908f706153319688a7a5599b8ad8c0','ES',1,2008-08-19 7 | 6,'Caryn','d6deee088e99af0f7c65fb7cca9bdfbbe3d7343','CN',1,2008-06-29 8 | -------------------------------------------------------------------------------- /sansa-examples-flink/config/csswrapper/frontend/css/bde-hdfs.css: -------------------------------------------------------------------------------- 1 | body { 2 | padding: 0; 3 | } 4 | 5 | .navigator { 6 | position: fixed; 7 | top: 0; 8 | left: 0; 9 | width: 100%; 10 | height: 50px; 11 | margin: 0 auto; 12 | padding: 10px; 13 | background: #A94F74; 14 | box-sizing: border-box; 15 | } 16 | 17 | .navbar { 18 | position: relative; 19 | top: auto; 20 | margin-top: 50px; 21 | } 22 | 23 | .container-fluid .card { 24 | width: 1280px; 25 | margin: 2rem auto; 26 | padding: 1rem; 27 | border: none; 28 | background: white; 29 | box-shadow: 0 2px 5px 0 rgba(0,0,0,0.16), 0 2px 10px 0 rgba(0,0,0,0.12); 30 | } -------------------------------------------------------------------------------- /sansa-examples-flink/config/integrator/user-interfaces: -------------------------------------------------------------------------------- 1 | { "data": [ 2 | { 3 | "id": 1, 4 | "type": "user-interfaces", 5 | "attributes": { 6 | "label": "Apache Flink Dashboard", 7 | "base-url": "http://flink-master.demo.sansa-stack.local" 8 | } 9 | }, 10 | { 11 | "id": 2, 12 | "type": "user-interfaces", 13 | "attributes": { 14 | "label": "HDFS", 15 | "base-url": "http://hdfs.demo.sansa-stack.local" 16 | } 17 | }, 18 | { 19 | "id": 3, 20 | "type": "user-interfaces", 21 | "attributes": { 22 | "label": "Hue", 23 | "base-url": "http://hue.demo.sansa-stack.local", 24 | "append-path": "/home" 25 | } 26 | } 27 | ] 28 | } 29 | -------------------------------------------------------------------------------- /sansa-examples-spark/config/csswrapper/frontend/bde-css/bde-hdfs.css: -------------------------------------------------------------------------------- 1 | body { 2 | padding: 0; 3 | } 4 | 5 | .navigator { 6 | position: fixed; 7 | top: 0; 8 | left: 0; 9 | width: 100%; 10 | height: 50px; 11 | margin: 0 auto; 12 | padding: 10px; 13 | background: #A94F74; 14 | box-sizing: border-box; 15 | } 16 | 17 | .navbar { 18 | position: relative; 19 | top: auto; 20 | margin-top: 50px; 21 | } 22 | 23 | .container-fluid .card { 24 | width: 1280px; 25 | margin: 2rem auto; 26 | padding: 1rem; 27 | border: none; 28 | background: white; 29 | box-shadow: 0 2px 5px 0 rgba(0,0,0,0.16), 0 2px 10px 0 rgba(0,0,0,0.12); 30 | } -------------------------------------------------------------------------------- /sansa-examples-spark/config/integrator/user-interfaces: -------------------------------------------------------------------------------- 1 | { "data": [ 2 | { 3 | "id": 1, 4 | "type": "user-interfaces", 5 | "attributes": { 6 | "label": "Spark Master", 7 | "base-url": "http://spark-master.demo.sansa-stack.local", 8 | "append-path": "/" 9 | } 10 | }, 11 | { 12 | "id": 2, 13 | "type": "user-interfaces", 14 | "attributes": { 15 | "label": "Spark Worker", 16 | "base-url": "http://spark-worker.demo.sansa-stack.local" 17 | } 18 | }, 19 | { 20 | "id": 3, 21 | "type": "user-interfaces", 22 | "attributes": { 23 | "label": "HDFS", 24 | "base-url": "http://hdfs.demo.sansa-stack.local" 25 | } 26 | }, 27 | { 28 | "id": 4, 29 | "type": "user-interfaces", 30 | "attributes": { 31 | "label": "Hue", 32 | "base-url": "http://hue.demo.sansa-stack.local", 33 | "append-path": "/home" 34 | } 35 | } 36 | ] 37 | } 38 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Root logger option 2 | log4j.rootLogger=INFO stdout 3 | 4 | # Direct log messages to a log file 5 | log4j.appender.file=org.apache.log4j.RollingFileAppender 6 | log4j.appender.file.File=C:\\logging.log 7 | log4j.appender.file.MaxFileSize=10MB 8 | log4j.appender.file.MaxBackupIndex=10 9 | log4j.appender.file.layout=org.apache.log4j.PatternLayout 10 | log4j.appender.file.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %C{1}:%L - %m%n 11 | 12 | # Direct log messages to stdout 13 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 14 | log4j.appender.stdout.Target=System.out 15 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.stdout.layout.ConversionPattern=%r %-5p %C:%L - %m%n 17 | 18 | log4j.logger.akka.event.slf4j.Slf4jLogger=ERROR 19 | log4j.logger.akka.remote.Remoting=ERROR 20 | log4j.logger.org.spark_project.jetty=ERROR 21 | log4j.logger.org.apache.spark=ERROR 22 | log4j.logger.org.apache.hadoop=ERROR -------------------------------------------------------------------------------- /sansa-examples-flink/src/main/resources/Clustering_sampledata.nt: -------------------------------------------------------------------------------- 1 | . 2 | . 3 | . 4 | . 5 | . 6 | . 7 | . 8 | . 9 | . 10 | . 11 | . 12 | . 13 | . -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/Clustering_sampledata.nt: -------------------------------------------------------------------------------- 1 | . 2 | . 3 | . 4 | . 5 | . 6 | . 7 | . 8 | . 9 | . 10 | . 11 | . 12 | . 13 | . -------------------------------------------------------------------------------- /sansa-examples-spark/config/csswrapper/frontend/bde-css/bde-spark-master.css: -------------------------------------------------------------------------------- 1 | body { 2 | background: #F1F1F1; 3 | } 4 | 5 | strong { 6 | font-weight: 700; 7 | } 8 | 9 | .row-fluid { 10 | width: 1280px; 11 | margin: 2rem auto; 12 | padding: 1rem; 13 | background: white; 14 | box-shadow: 0 2px 5px 0 rgba(0,0,0,0.16), 0 2px 10px 0 rgba(0,0,0,0.12); 15 | } 16 | 17 | .row-fluid:first-of-type { 18 | position: fixed; 19 | top: 0; 20 | left: 0; 21 | width: 100%; 22 | margin: 0 auto; 23 | padding: 0; 24 | background: #A94F74; 25 | box-shadow: 0 2px 5px 0 rgba(0,0,0,0.16), 0 2px 10px 0 rgba(0,0,0,0.12); 26 | } 27 | 28 | .row-fluid:nth-of-type(2) { 29 | margin-top: 5rem; 30 | } 31 | 32 | .row-fluid:first-of-type .span12 { 33 | float: none; 34 | width: 1280px; 35 | margin: auto; 36 | } 37 | 38 | .row-fluid:first-of-type h3 { 39 | font-size: 1rem; 40 | line-height: 1; 41 | margin: auto; 42 | color: white; 43 | } 44 | 45 | .row-fluid:first-of-type span { 46 | color: white; 47 | margin-right: 50px !important; 48 | } -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/queries/Q1.sparql: -------------------------------------------------------------------------------- 1 | PREFIX rdfs: 2 | PREFIX rdf: 3 | PREFIX foaf: 4 | PREFIX schema: 5 | PREFIX rev: 6 | PREFIX edm: 7 | PREFIX dc: 8 | PREFIX gr: 9 | PREFIX dcterms: 10 | PREFIX xsd: 11 | PREFIX bsbm: 12 | 13 | SELECT DISTINCT ?label ?value 14 | WHERE { 15 | ?product rdfs:label ?label . 16 | ?product bsbm:productPropertyNumeric1 ?value . 17 | ?product rdf:type bsbm:Product . 18 | ?product bsbm:producer ?producer . 19 | ?producer rdf:type bsbm:Producer . 20 | ?producer foaf:homepage ?hp . 21 | ?review bsbm:reviewFor ?product . 22 | ?review rdf:type schema:Review . 23 | ?review rev:reviewer ?pers . 24 | ?pers foaf:name ?fn . 25 | ?pers edm:country ?cn . 26 | ?offer bsbm:product ?product . 27 | ?offer rdf:type schema:Offer . 28 | FILTER (?value > 102) 29 | } 30 | ORDER BY ?label 31 | LIMIT 10 -------------------------------------------------------------------------------- /sansa-examples-flink/config/hadoop/hadoop.env: -------------------------------------------------------------------------------- 1 | CORE_CONF_fs_defaultFS=hdfs://namenode:8020 2 | CORE_CONF_hadoop_http_staticuser_user=root 3 | CORE_CONF_hadoop_proxyuser_hue_hosts=* 4 | CORE_CONF_hadoop_proxyuser_hue_groups=* 5 | HDFS_CONF_dfs_webhdfs_enabled=true 6 | HDFS_CONF_dfs_permissions_enabled=false 7 | 8 | YARN_CONF_yarn_log___aggregation___enable=true 9 | YARN_CONF_yarn_resourcemanager_recovery_enabled=true 10 | YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore 11 | YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate 12 | YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs 13 | YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/ 14 | YARN_CONF_yarn_timeline___service_enabled=true 15 | YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true 16 | YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true 17 | YARN_CONF_yarn_resourcemanager_hostname=resourcemanager 18 | YARN_CONF_yarn_timeline___service_hostname=historyserver 19 | YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032 20 | YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030 21 | YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031 22 | -------------------------------------------------------------------------------- /sansa-examples-spark/config/hadoop/hadoop.env: -------------------------------------------------------------------------------- 1 | CORE_CONF_fs_defaultFS=hdfs://namenode:8020 2 | CORE_CONF_hadoop_http_staticuser_user=root 3 | CORE_CONF_hadoop_proxyuser_hue_hosts=* 4 | CORE_CONF_hadoop_proxyuser_hue_groups=* 5 | 6 | HDFS_CONF_dfs_webhdfs_enabled=true 7 | HDFS_CONF_dfs_permissions_enabled=false 8 | 9 | YARN_CONF_yarn_log___aggregation___enable=true 10 | YARN_CONF_yarn_resourcemanager_recovery_enabled=true 11 | YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore 12 | YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate 13 | YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs 14 | YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/ 15 | YARN_CONF_yarn_timeline___service_enabled=true 16 | YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true 17 | YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true 18 | YARN_CONF_yarn_resourcemanager_hostname=resourcemanager 19 | YARN_CONF_yarn_timeline___service_hostname=historyserver 20 | YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032 21 | YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030 22 | YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Archived Repository - Do not use this repository anymore! 2 | 3 | SANSA got easier to use! All its code has been consolidated into a single repository at https://github.com/SANSA-Stack/SANSA-Stack 4 | 5 | 6 | 7 | # SANSA-Examples 8 | [![Build Status](https://ci.aksw.org/jenkins/job/SANSA%20Examples/job/develop/badge/icon)](https://ci.aksw.org/jenkins/job/SANSA%20Examples/job/develop/) 9 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 10 | [![Twitter](https://img.shields.io/twitter/follow/SANSA_Stack.svg?style=social)](https://twitter.com/SANSA_Stack) 11 | 12 | This directory contains code examples for various SANSA functionality. 13 | 14 | ### [sansa-examples-spark](https://github.com/SANSA-Stack/SANSA-Examples/tree/master/sansa-examples-spark) 15 | Contains the SANSA Examples for [Apache Spark](http://spark.apache.org/). 16 | 17 | ### [sansa-examples-flink](https://github.com/SANSA-Stack/SANSA-Examples/tree/master/sansa-examples-flink) 18 | Contains the SANSA Examples for [Apache Flink](http://flink.apache.org/). 19 | 20 | ## How to Contribute 21 | We always welcome new contributors to the project! Please see [our contribution guide](http://sansa-stack.net/contributing-to-sansa/) for more details on how to get started contributing to SANSA. 22 | -------------------------------------------------------------------------------- /sansa-examples-flink/src/main/scala/net/sansa_stack/examples/flink/rdf/TripleReader.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.flink.rdf 2 | 3 | import scala.collection.mutable 4 | 5 | import net.sansa_stack.rdf.flink.io._ 6 | import net.sansa_stack.rdf.flink.model._ 7 | import org.apache.flink.api.scala.ExecutionEnvironment 8 | import org.apache.jena.riot.Lang 9 | 10 | object TripleReader { 11 | 12 | def main(args: Array[String]) { 13 | parser.parse(args, Config()) match { 14 | case Some(config) => 15 | run(config.in) 16 | case None => 17 | println(parser.usage) 18 | } 19 | } 20 | 21 | def run(input: String): Unit = { 22 | 23 | println("======================================") 24 | println("| Triple reader example |") 25 | println("======================================") 26 | 27 | val env = ExecutionEnvironment.getExecutionEnvironment 28 | 29 | val triples = env.rdf(Lang.NTRIPLES)(input) 30 | triples.getTriples().first(10).print() 31 | } 32 | 33 | case class Config(in: String = "") 34 | 35 | val parser = new scopt.OptionParser[Config]("Triple reader example") { 36 | 37 | head(" Triple reader example") 38 | 39 | opt[String]('i', "input").required().valueName(""). 40 | action((x, c) => c.copy(in = x)). 41 | text("path to file that contains the data (in N-Triples format)") 42 | 43 | help("help").text("prints this usage text") 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /sansa-examples-flink/config/csswrapper/frontend/css/bde-hadoop.css: -------------------------------------------------------------------------------- 1 | body { 2 | background: #F1F1F1; 3 | } 4 | 5 | body > .container { 6 | margin: 5rem auto; 7 | background: white; 8 | box-shadow: 0 2px 5px 0 rgba(0,0,0,0.16), 0 2px 10px 0 rgba(0,0,0,0.12); 9 | } 10 | 11 | header.bs-docs-nav { 12 | position: fixed; 13 | top: 0; 14 | left: 0; 15 | width: 100%; 16 | height: 3rem; 17 | border: none; 18 | background: #A94F74; 19 | box-shadow: 0 2px 5px 0 rgba(0,0,0,0.16), 0 2px 10px 0 rgba(0,0,0,0.12); 20 | } 21 | 22 | header.bs-docs-nav .navbar-brand { 23 | background: inherit; 24 | } 25 | 26 | #ui-tabs .active a { 27 | background: #B96A8B; 28 | } 29 | 30 | #ui-tabs > li > a { 31 | color: white; 32 | } 33 | 34 | .navbar-inverse .navbar-nav > .dropdown > a .caret { 35 | border-top-color: white; 36 | border-bottom-color: white; 37 | } 38 | 39 | .navbar-inverse .navbar-nav > .open > a, 40 | .navbar-inverse .navbar-nav > .open > a:hover, 41 | .navbar-inverse .navbar-nav > .open > a:focus { 42 | background-color: #B96A8B; 43 | } 44 | 45 | .dropdown-menu > li > a { 46 | color: #A94F74; 47 | } 48 | 49 | .modal-dialog .panel-success { 50 | border-color: lightgrey; 51 | } 52 | 53 | .modal-dialog .panel-heading { 54 | background-color: #A94F74 !important; 55 | } 56 | 57 | .modal-dialog .panel-heading select { 58 | margin-top: 1rem; 59 | } -------------------------------------------------------------------------------- /sansa-examples-spark/config/csswrapper/frontend/bde-css/bde-hadoop.css: -------------------------------------------------------------------------------- 1 | body { 2 | background: #F1F1F1; 3 | } 4 | 5 | body > .container { 6 | margin: 5rem auto; 7 | background: white; 8 | box-shadow: 0 2px 5px 0 rgba(0,0,0,0.16), 0 2px 10px 0 rgba(0,0,0,0.12); 9 | } 10 | 11 | header.bs-docs-nav { 12 | position: fixed; 13 | top: 0; 14 | left: 0; 15 | width: 100%; 16 | height: 3rem; 17 | border: none; 18 | background: #A94F74; 19 | box-shadow: 0 2px 5px 0 rgba(0,0,0,0.16), 0 2px 10px 0 rgba(0,0,0,0.12); 20 | } 21 | 22 | header.bs-docs-nav .navbar-brand { 23 | background: inherit; 24 | } 25 | 26 | #ui-tabs .active a { 27 | background: #B96A8B; 28 | } 29 | 30 | #ui-tabs > li > a { 31 | color: white; 32 | } 33 | 34 | .navbar-inverse .navbar-nav > .dropdown > a .caret { 35 | border-top-color: white; 36 | border-bottom-color: white; 37 | } 38 | 39 | .navbar-inverse .navbar-nav > .open > a, 40 | .navbar-inverse .navbar-nav > .open > a:hover, 41 | .navbar-inverse .navbar-nav > .open > a:focus { 42 | background-color: #B96A8B; 43 | } 44 | 45 | .dropdown-menu > li > a { 46 | color: #A94F74; 47 | } 48 | 49 | .modal-dialog .panel-success { 50 | border-color: lightgrey; 51 | } 52 | 53 | .modal-dialog .panel-heading { 54 | background-color: #A94F74 !important; 55 | } 56 | 57 | .modal-dialog .panel-heading select { 58 | margin-top: 1rem; 59 | } -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/rdf/TripleReader.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.rdf 2 | 3 | import net.sansa_stack.rdf.spark.io._ 4 | import org.apache.jena.riot.Lang 5 | import org.apache.spark.sql.SparkSession 6 | 7 | 8 | object TripleReader { 9 | 10 | def main(args: Array[String]) { 11 | parser.parse(args, Config()) match { 12 | case Some(config) => 13 | run(config.in) 14 | case None => 15 | println(parser.usage) 16 | } 17 | } 18 | 19 | def run(input: String): Unit = { 20 | 21 | val spark = SparkSession.builder 22 | .appName(s"Triple reader example $input") 23 | .master("local[*]") 24 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 25 | .getOrCreate() 26 | 27 | println("======================================") 28 | println("| Triple reader example |") 29 | println("======================================") 30 | 31 | val lang = Lang.NTRIPLES 32 | val triples = spark.rdf(lang)(input) 33 | 34 | triples.take(5).foreach(println(_)) 35 | 36 | // triples.saveAsNTriplesFile(output) 37 | 38 | spark.stop 39 | 40 | } 41 | 42 | case class Config(in: String = "") 43 | 44 | val parser = new scopt.OptionParser[Config]("Triple reader example") { 45 | 46 | head(" Triple reader example") 47 | 48 | opt[String]('i', "input").required().valueName(""). 49 | action((x, c) => c.copy(in = x)). 50 | text("path to file that contains the data (in N-Triples format)") 51 | 52 | help("help").text("prints this usage text") 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /sansa-examples-flink/src/main/scala/net/sansa_stack/examples/flink/rdf/TripleWriter.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.flink.rdf 2 | 3 | import scala.collection.mutable 4 | 5 | import net.sansa_stack.rdf.flink.io._ 6 | import org.apache.flink.api.scala.ExecutionEnvironment 7 | import org.apache.jena.riot.Lang 8 | 9 | object TripleWriter { 10 | 11 | def main(args: Array[String]) { 12 | parser.parse(args, Config()) match { 13 | case Some(config) => 14 | run(config.in, config.out) 15 | case None => 16 | println(parser.usage) 17 | } 18 | } 19 | 20 | def run(input: String, output: String): Unit = { 21 | 22 | println("======================================") 23 | println("| Triple writer example |") 24 | println("======================================") 25 | 26 | val env = ExecutionEnvironment.getExecutionEnvironment 27 | 28 | val triples = env.rdf(Lang.NTRIPLES)(input) 29 | triples.saveAsNTriplesFile(output) 30 | 31 | env.execute(s"Triple writer example ($input)") 32 | 33 | } 34 | 35 | case class Config( 36 | in: String = "", 37 | out: String = "") 38 | 39 | val parser = new scopt.OptionParser[Config]("Triple writer example ") { 40 | 41 | head("Triple writer example ") 42 | 43 | opt[String]('i', "input").required().valueName(""). 44 | action((x, c) => c.copy(in = x)). 45 | text("path to file that contains the data (in N-Triples format)") 46 | 47 | opt[String]('o', "out").required().valueName(""). 48 | action((x, c) => c.copy(out = x)). 49 | text("the output directory") 50 | 51 | help("help").text("prints this usage text") 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /sansa-examples-flink/src/main/scala/net/sansa_stack/examples/flink/rdf/RDFStats.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.flink.rdf 2 | 3 | import java.io.File 4 | 5 | import scala.collection.mutable 6 | 7 | import net.sansa_stack.rdf.flink.io._ 8 | import net.sansa_stack.rdf.flink.stats._ 9 | import org.apache.flink.api.scala.ExecutionEnvironment 10 | import org.apache.jena.riot.Lang 11 | 12 | object RDFStats { 13 | 14 | def main(args: Array[String]) { 15 | parser.parse(args, Config()) match { 16 | case Some(config) => 17 | run(config.in, config.out) 18 | case None => 19 | println(parser.usage) 20 | } 21 | } 22 | 23 | def run(input: String, output: String): Unit = { 24 | 25 | val rdf_stats_file = new File(input).getName 26 | 27 | println("======================================") 28 | println("| RDF Statistic example |") 29 | println("======================================") 30 | 31 | val env = ExecutionEnvironment.getExecutionEnvironment 32 | 33 | val triples = env.rdf(Lang.NTRIPLES)(input) 34 | 35 | // compute stats 36 | val rdf_statistics = triples.stats 37 | .voidify(rdf_stats_file, output) 38 | } 39 | 40 | case class Config( 41 | in: String = "", 42 | out: String = "") 43 | 44 | // the CLI parser 45 | val parser = new scopt.OptionParser[Config]("RDF Dataset Statistics Example") { 46 | 47 | head("RDF Dataset Statistics Example") 48 | 49 | opt[String]('i', "input").required().valueName(""). 50 | action((x, c) => c.copy(in = x)). 51 | text("path to file that contains the data (in N-Triples format)") 52 | 53 | opt[String]('o', "out").required().valueName(""). 54 | action((x, c) => c.copy(out = x)). 55 | text("the output directory") 56 | 57 | help("help").text("prints this usage text") 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/config: -------------------------------------------------------------------------------- 1 | { 2 | "sources": [ 3 | { 4 | "type": "csv", 5 | "options": { 6 | "header": "true", 7 | "delimiter": ",", 8 | "mode": "DROPMALFORMED" 9 | }, 10 | "source": "src/main/resources/Data/person.csv", 11 | "entity": "Person" 12 | } 13 | , { 14 | "type": "parquet", 15 | "options": { 16 | "spark_sql_parquet_filterPushdown": "true" 17 | }, 18 | "source": "src/main/resources/Data/review.parquet", 19 | "entity": "Review" 20 | } 21 | , { 22 | "type": "mongodb", 23 | "options": { 24 | "url": "127.0.0.1", 25 | "database": "bsbm", 26 | "collection": "offer", 27 | "options": "" 28 | }, 29 | "source": "//Offer", 30 | "entity": "Offer" 31 | } 32 | , { 33 | "type": "cassandra", 34 | "options": { 35 | "keyspace": "db", 36 | "table": "product" 37 | }, 38 | "source": "//Product", 39 | "entity": "Product" 40 | } 41 | , { 42 | "type": "jdbc", 43 | "options": { 44 | "url": "jdbc:mysql://localhost:3306/benchmark?useUnicode=true&useJDBCCompliantTimezoneShift=true&useLegacyDatetimeCode=false&serverTimezone=UTC&autoReconnect=true&useSSL=false", 45 | 46 | "driver": "com.mysql.cj.jdbc.Driver", 47 | "dbtable": "producer", 48 | "user": "root", 49 | "password": "root" 50 | }, 51 | "source": "//Producer", 52 | "entity": "Producer" 53 | } 54 | ], 55 | "weights": [ 56 | { 57 | "datasource": "cassandra", 58 | "weight": 1 59 | }, 60 | { 61 | "datasource": "mongodb", 62 | "weight": 1 63 | }, 64 | { 65 | "datasource": "parquet", 66 | "weight": 1 67 | }, 68 | { 69 | "datasource": "csv", 70 | "weight": 1 71 | }, 72 | { 73 | "datasource": "jdbc", 74 | "weight": 1 75 | } 76 | ] 77 | } -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/rdf/TripleWriter.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.rdf 2 | 3 | import java.io.File 4 | import java.net.URI 5 | 6 | import scala.collection.mutable 7 | 8 | import net.sansa_stack.rdf.spark.io._ 9 | import org.apache.jena.riot.Lang 10 | import org.apache.spark.sql.SparkSession 11 | 12 | object TripleWriter { 13 | 14 | def main(args: Array[String]) { 15 | parser.parse(args, Config()) match { 16 | case Some(config) => 17 | run(config.in, config.out) 18 | case None => 19 | println(parser.usage) 20 | } 21 | } 22 | 23 | def run(input: String, output: String): Unit = { 24 | 25 | val spark = SparkSession.builder 26 | .appName(s"Triple writer example ( $input )") 27 | .master("local[*]") 28 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 29 | .getOrCreate() 30 | 31 | println("======================================") 32 | println("| Triple writer example |") 33 | println("======================================") 34 | 35 | val lang = Lang.NTRIPLES 36 | val triples = spark.rdf(lang)(input) 37 | 38 | triples.saveAsNTriplesFile(output) 39 | 40 | spark.stop 41 | 42 | } 43 | 44 | case class Config( 45 | in: String = "", 46 | out: String = "") 47 | 48 | // the CLI parser 49 | val parser = new scopt.OptionParser[Config]("Triple writer example ") { 50 | 51 | head("Triple writer example ") 52 | 53 | opt[String]('i', "input").required().valueName(""). 54 | action((x, c) => c.copy(in = x)). 55 | text("path to file that contains the data (in N-Triples format)") 56 | 57 | opt[String]('o', "out").required().valueName(""). 58 | action((x, c) => c.copy(out = x)). 59 | text("the output directory") 60 | 61 | help("help").text("prints this usage text") 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/query/HDTQuery.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.query 2 | 3 | import net.sansa_stack.query.spark.query._ 4 | import net.sansa_stack.rdf.spark.io._ 5 | import net.sansa_stack.rdf.spark.model._ 6 | import org.apache.jena.riot.Lang 7 | import org.apache.spark.sql.SparkSession 8 | 9 | object HDTQuery { 10 | 11 | def main(args: Array[String]) { 12 | parser.parse(args, Config()) match { 13 | case Some(config) => 14 | run(config.in, config.query) 15 | case None => 16 | println(parser.usage) 17 | } 18 | } 19 | 20 | def run(input: String, query: String): Unit = { 21 | 22 | println("===========================================") 23 | println("| SANSA - HDT example |") 24 | println("===========================================") 25 | 26 | val spark = SparkSession.builder 27 | .master("local[*]") 28 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 29 | .appName("SANSA - HDT") 30 | .getOrCreate() 31 | 32 | val lang = Lang.NTRIPLES 33 | val triples = spark.rdf(lang)(input) 34 | 35 | val triples_hdt = triples.asHDT() 36 | 37 | val result = triples_hdt.sparqlHDT(query) 38 | 39 | result.show() 40 | 41 | spark.close() 42 | 43 | } 44 | 45 | case class Config(in: String = "", query: String = "") 46 | 47 | val parser = new scopt.OptionParser[Config]("SANSA - HDT example") { 48 | 49 | head(" SANSA - HDT example") 50 | 51 | opt[String]('i', "input").required().valueName(""). 52 | action((x, c) => c.copy(in = x)). 53 | text("path to file that contains the data (in N-Triples format)") 54 | 55 | opt[String]('q', "query").required().valueName("SPARQL query"). 56 | action((x, c) => c.copy(query = x)). 57 | text("the SPARQL query") 58 | 59 | help("help").text("prints this usage text") 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/clustering/BorderFlowClustering.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.ml.clustering 2 | 3 | import scala.collection.mutable 4 | 5 | import net.sansa_stack.ml.spark.clustering._ 6 | import net.sansa_stack.ml.spark.clustering.algorithms.BorderFlow 7 | import net.sansa_stack.rdf.spark.io._ 8 | import net.sansa_stack.rdf.spark.model._ 9 | import org.apache.jena.riot.Lang 10 | import org.apache.log4j.{ Level, Logger } 11 | import org.apache.spark.sql.SparkSession 12 | 13 | object BorderFlowClustering { 14 | 15 | def main(args: Array[String]) { 16 | parser.parse(args, Config()) match { 17 | case Some(config) => 18 | run(config.in) 19 | case None => 20 | println(parser.usage) 21 | } 22 | } 23 | 24 | def run(input: String): Unit = { 25 | 26 | val spark = SparkSession.builder 27 | .appName(s"BorderFlow example: ( $input )") 28 | .master("local[*]") 29 | .config("spark.hadoop.validateOutputSpecs", "false") 30 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 31 | .getOrCreate() 32 | 33 | println("============================================") 34 | println(s"| Border Flow example |") 35 | println("============================================") 36 | 37 | val lang = Lang.NTRIPLES 38 | val triples = spark.rdf(lang)(input) 39 | 40 | val borderflow = triples.cluster(ClusteringAlgorithm.BorderFlow).asInstanceOf[BorderFlow].run() 41 | 42 | borderflow.collect().foreach(println) 43 | 44 | spark.stop 45 | 46 | } 47 | 48 | case class Config(in: String = "") 49 | 50 | val parser = new scopt.OptionParser[Config]("BorderFlow") { 51 | 52 | head("BorderFlow: an example BorderFlow app.") 53 | 54 | opt[String]('i', "input").required().valueName(""). 55 | action((x, c) => c.copy(in = x)). 56 | text("path to file contains the input files") 57 | 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /sansa-examples-flink/src/main/scala/net/sansa_stack/examples/flink/ml/clustering/RDFByModularityClustering.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.flink.ml.clustering 2 | 3 | import scala.collection.mutable 4 | 5 | import net.sansa_stack.ml.flink.clustering.{ RDFByModularityClustering => RDFByModularityClusteringAlg } 6 | import org.apache.flink.api.scala._ 7 | import org.apache.flink.api.scala.ExecutionEnvironment 8 | 9 | object RDFByModularityClustering { 10 | def main(args: Array[String]) { 11 | parser.parse(args, Config()) match { 12 | case Some(config) => 13 | run(config.in, config.out, config.numIterations) 14 | case None => 15 | println(parser.usage) 16 | } 17 | } 18 | 19 | def run(input: String, output: String, numIterations: Int): Unit = { 20 | 21 | println("============================================") 22 | println("| RDF By Modularity Clustering example |") 23 | println("============================================") 24 | 25 | val env = ExecutionEnvironment.getExecutionEnvironment 26 | 27 | RDFByModularityClusteringAlg(env, numIterations, input, output) 28 | 29 | } 30 | 31 | case class Config(in: String = "", out: String = "", numIterations: Int = 100) 32 | 33 | val defaultParams = Config() 34 | 35 | val parser = new scopt.OptionParser[Config]("RDF By Modularity Clustering") { 36 | 37 | head("RDF By Modularity Clustering: an example RDF By Modularity Clustering app using RDF Graph.") 38 | 39 | opt[String]('i', "input").required().valueName("") 40 | .text(s"path to file that contains the input files (in N-Triple format)") 41 | .action((x, c) => c.copy(in = x)) 42 | 43 | opt[String]('o', "output").valueName("") 44 | .text("the output directory") 45 | .action((x, c) => c.copy(out = x)) 46 | 47 | opt[Int]("numIterations") 48 | .text(s"number of iterations, default: ${defaultParams.numIterations}") 49 | .action((x, c) => c.copy(numIterations = x)) 50 | 51 | help("help").text("prints this usage text") 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/rdf/RDFStats.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.rdf 2 | 3 | import java.io.File 4 | 5 | import scala.collection.mutable 6 | 7 | import net.sansa_stack.rdf.spark.io._ 8 | import net.sansa_stack.rdf.spark.stats._ 9 | import org.apache.jena.riot.Lang 10 | import org.apache.spark.sql.SparkSession 11 | 12 | 13 | object RDFStats { 14 | 15 | def main(args: Array[String]) { 16 | parser.parse(args, Config()) match { 17 | case Some(config) => 18 | run(config.in, config.out) 19 | case None => 20 | println(parser.usage) 21 | } 22 | } 23 | 24 | def run(input: String, output: String): Unit = { 25 | 26 | val rdf_stats_file = new File(input).getName 27 | 28 | val spark = SparkSession.builder 29 | .appName(s"RDF Dataset Statistics example $rdf_stats_file") 30 | .master("local[*]") 31 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 32 | .getOrCreate() 33 | 34 | println("======================================") 35 | println("| RDF Statistic example |") 36 | println("======================================") 37 | 38 | val lang = Lang.NTRIPLES 39 | val triples = spark.rdf(lang)(input) 40 | 41 | // compute criterias 42 | val stats = triples.stats 43 | .voidify(rdf_stats_file, output) 44 | } 45 | 46 | // the config object 47 | case class Config(in: String = "", out: String = "") 48 | 49 | // the CLI parser 50 | val parser = new scopt.OptionParser[Config]("RDF Dataset Statistics Example") { 51 | 52 | head("RDF Dataset Statistics Example") 53 | 54 | opt[String]('i', "input").required().valueName(""). 55 | action((x, c) => c.copy(in = x)). 56 | text("path to file that contains the data (in N-Triples format)") 57 | 58 | opt[String]('o', "out").required().valueName(""). 59 | action((x, c) => c.copy(out = x)). 60 | text("the output directory") 61 | 62 | help("help").text("prints this usage text") 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /sansa-examples-flink/config/csswrapper/frontend/css/bde-flink.css: -------------------------------------------------------------------------------- 1 | body { 2 | background: #F1F1F1; 3 | } 4 | 5 | .mainHeading { 6 | margin: 0; 7 | color: white; 8 | border: none; 9 | background: #A94F74; 10 | box-shadow: 0 2px 5px 0 rgba(0,0,0,0.16), 0 2px 10px 0 rgba(0,0,0,0.12); 11 | } 12 | 13 | .mainHeading > h1 > div { 14 | top: 20px !important; 15 | right: 20px !important; 16 | } 17 | 18 | #jobsContents { 19 | padding: 10px; 20 | border: 0; 21 | border-radius: 2px; 22 | background: white; 23 | box-shadow: 0 1px 3px 0 rgba(0,0,0,.2),0 1px 1px 0 rgba(0,0,0,.14),0 2px 1px -1px rgba(0,0,0,.12); 24 | } 25 | 26 | #jobsContents + div { 27 | padding: 10px; 28 | border: 0; 29 | border-radius: 2px; 30 | background: white; 31 | box-shadow: 0 1px 3px 0 rgba(0,0,0,.2),0 1px 1px 0 rgba(0,0,0,.14),0 2px 1px -1px rgba(0,0,0,.12); 32 | } 33 | 34 | div.canvas { 35 | margin: 0; 36 | } 37 | 38 | .footer .boxed { 39 | height: auto !important; 40 | width: auto !important; 41 | padding: 0 20px; 42 | border: none; 43 | border-radius: 2px; 44 | background: white; 45 | box-shadow: 0 1px 3px 0 rgba(0,0,0,.2),0 1px 1px 0 rgba(0,0,0,.14),0 2px 1px -1px rgba(0,0,0,.12); 46 | } 47 | 48 | .footer .boxed > div { 49 | margin-top: 20px !important; 50 | } 51 | 52 | .footer .boxed > div #suspendedOption > span { 53 | position: absolute; 54 | } 55 | 56 | .footer .boxed:first-of-type > .footer { 57 | padding: 0 20px 10px; 58 | } 59 | 60 | .footer .boxed:nth-of-type(2) > .footer { 61 | position: relative; 62 | padding: 0 0 10px; 63 | } 64 | 65 | #upload_file_name_text { 66 | width: auto !important; 67 | } 68 | 69 | #upload_form > div { 70 | margin-top: 20px !important; 71 | } 72 | 73 | #upload_file_name_text { 74 | position: relative !important; 75 | top: auto !important; 76 | right: auto !important; 77 | width: calc(100% - 85px) !important; 78 | } 79 | 80 | #upload_file_input { 81 | height: 100% !important; 82 | width: 75px; 83 | padding: 0; 84 | } -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/rdf/PageRank.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.rdf 2 | 3 | import scala.collection.mutable 4 | 5 | import net.sansa_stack.rdf.spark.io._ 6 | import net.sansa_stack.rdf.spark.model._ 7 | import org.apache.jena.riot.Lang 8 | import org.apache.spark.graphx.Graph 9 | import org.apache.spark.sql.SparkSession 10 | 11 | /* 12 | * Computes the PageRank of Resources from an input .nt file. 13 | */ 14 | object PageRank { 15 | 16 | def main(args: Array[String]) { 17 | parser.parse(args, Config()) match { 18 | case Some(config) => 19 | run(config.in) 20 | case None => 21 | println(parser.usage) 22 | } 23 | } 24 | def run(input: String): Unit = { 25 | 26 | println("======================================") 27 | println("| PageRank of resources example |") 28 | println("======================================") 29 | 30 | val spark = SparkSession.builder 31 | .appName(s"PageRank of resources example ( $input )") 32 | .master("local[*]") 33 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 34 | // .config("spark.kryo.registrator", "net.sansa_stack.rdf.spark.io.JenaKryoRegistrator") 35 | .getOrCreate() 36 | 37 | val lang = Lang.NTRIPLES 38 | val triples = spark.rdf(lang)(input) 39 | 40 | val graph = triples.asGraph() 41 | 42 | val pagerank = graph.pageRank(0.00001).vertices 43 | val report = pagerank.join(graph.vertices) 44 | .map({ case (k, (r, v)) => (r, v, k) }) 45 | .sortBy(50 - _._1) 46 | 47 | report.take(50).foreach(println) 48 | 49 | spark.stop 50 | 51 | } 52 | case class Config(in: String = "") 53 | 54 | // the CLI parser 55 | val parser = new scopt.OptionParser[Config]("PageRank of resources example") { 56 | 57 | head(" PageRank of resources example") 58 | 59 | opt[String]('i', "input").required().valueName(""). 60 | action((x, c) => c.copy(in = x)). 61 | text("path to file that contains the data (in N-Triples format)") 62 | help("help").text("prints this usage text") 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/owl/OWLReaderRDD.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.owl 2 | 3 | import net.sansa_stack.owl.spark.owl._ 4 | import org.apache.spark.sql.SparkSession 5 | 6 | 7 | object OWLReaderRDD { 8 | 9 | def main(args: Array[String]) { 10 | parser.parse(args, Config()) match { 11 | case Some(config) => 12 | run(config.in, config.syntax) 13 | case None => 14 | println(parser.usage) 15 | } 16 | } 17 | 18 | def run(input: String, syntax: String): Unit = { 19 | 20 | println(".============================================.") 21 | println("| RDD OWL reader example (" + syntax + " syntax)|") 22 | println("============================================") 23 | 24 | val spark = SparkSession.builder 25 | .appName(s"OWL reader example ( $input + )($syntax)") 26 | .master("local[*]") 27 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 28 | .config("spark.kryo.registrator", "net.sansa_stack.owl.spark.dataset.UnmodifiableCollectionKryoRegistrator") 29 | .getOrCreate() 30 | 31 | val rdd = syntax match { 32 | case "fun" => spark.owl(Syntax.FUNCTIONAL)(input) 33 | case "manch" => spark.owl(Syntax.MANCHESTER)(input) 34 | case "owl_xml" => spark.owl(Syntax.OWLXML)(input) 35 | case _ => 36 | throw new RuntimeException("Invalid syntax type: '" + syntax + "'") 37 | } 38 | 39 | rdd.take(10).foreach(println(_)) 40 | spark.stop() 41 | } 42 | 43 | case class Config( 44 | in: String = "", 45 | syntax: String = "") 46 | 47 | // the CLI parser 48 | val parser = new scopt.OptionParser[Config]("RDD OWL reader example") { 49 | 50 | head("RDD OWL reader example") 51 | 52 | opt[String]('i', "input").required().valueName(""). 53 | action((x, c) => c.copy(in = x)). 54 | text("path to file that contains the data") 55 | 56 | opt[String]('s', "syntax").required().valueName("{fun | manch | owl_xml}"). 57 | action((x, c) => c.copy(syntax = x)). 58 | text("the syntax format") 59 | 60 | help("help").text("prints this usage text") 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/query/Semantic.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.query 2 | 3 | import net.sansa_stack.query.spark.semantic.QuerySystem 4 | import net.sansa_stack.rdf.spark.io._ 5 | import net.sansa_stack.rdf.spark.partition._ 6 | import org.apache.jena.riot.Lang 7 | import org.apache.spark.sql.SparkSession 8 | 9 | /** 10 | * Run SPARQL queries over Spark using Semantic partitioning approach. 11 | * 12 | * @author Gezim Sejdiu 13 | */ 14 | object Semantic { 15 | 16 | def main(args: Array[String]) { 17 | parser.parse(args, Config()) match { 18 | case Some(config) => 19 | run(config.in, config.queries) 20 | case None => 21 | println(parser.usage) 22 | } 23 | } 24 | 25 | def run(input: String, queries: String): Unit = { 26 | 27 | println("===========================================") 28 | println("| SANSA - Semantic Partioning example |") 29 | println("===========================================") 30 | 31 | val spark = SparkSession.builder 32 | .master("local[*]") 33 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 34 | .appName("SANSA - Semantic Partioning") 35 | .getOrCreate() 36 | 37 | val lang = Lang.NTRIPLES 38 | val triples = spark.rdf(lang)(input) 39 | 40 | val partitionData = triples.partitionGraphAsSemantic() 41 | 42 | val result = new QuerySystem(partitionData, queries).run() 43 | result.take(5).foreach(println) 44 | 45 | spark.close() 46 | 47 | } 48 | 49 | case class Config(in: String = "", queries: String = "") 50 | 51 | val parser = new scopt.OptionParser[Config]("SANSA - Semantic Partioning example") { 52 | 53 | head(" SANSA - Semantic Partioning example") 54 | 55 | opt[String]('i', "input").required().valueName(""). 56 | action((x, c) => c.copy(in = x)). 57 | text("path to file that contains the data (in N-Triples format)") 58 | 59 | opt[String]('q', "queries").required().valueName(""). 60 | action((x, c) => c.copy(queries = x)). 61 | text("path to the file containing the SPARQL query") 62 | 63 | help("help").text("prints this usage text") 64 | } 65 | 66 | } 67 | -------------------------------------------------------------------------------- /sansa-examples-flink/src/main/scala/net/sansa_stack/examples/flink/owl/OWLReaderDataSet.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.flink.owl 2 | 3 | import de.javakaffee.kryoserializers.UnmodifiableCollectionsSerializer 4 | 5 | import scala.collection.mutable 6 | import net.sansa_stack.owl.flink.owl._ 7 | import org.apache.flink.api.scala.ExecutionEnvironment 8 | 9 | 10 | object OWLReaderDataSet { 11 | 12 | def main(args: Array[String]) { 13 | parser.parse(args, Config()) match { 14 | case Some(config) => 15 | run(config.in, config.syntax) 16 | case None => 17 | println(parser.usage) 18 | } 19 | } 20 | 21 | def run(input: String, syntax: String): Unit = { 22 | 23 | println(".============================================.") 24 | println("| Dataset OWL reader example (" + syntax + " syntax)|") 25 | println(".============================================.") 26 | 27 | val env = ExecutionEnvironment.getExecutionEnvironment 28 | // scalastyle:off classforname 29 | env.getConfig.addDefaultKryoSerializer( 30 | Class.forName("java.util.Collections$UnmodifiableCollection"), 31 | classOf[UnmodifiableCollectionsSerializer]) 32 | // scalastyle:on classforname 33 | 34 | val dataSet = syntax match { 35 | case "fun" => env.owl(Syntax.FUNCTIONAL)(input) 36 | case "manch" => env.owl(Syntax.MANCHESTER)(input) 37 | case "owl_xml" => 38 | throw new RuntimeException("'" + syntax + "' - Not supported, yet.") 39 | case _ => 40 | throw new RuntimeException("Invalid syntax type: '" + syntax + "'") 41 | } 42 | 43 | dataSet.first(10).print() 44 | 45 | } 46 | 47 | case class Config( 48 | in: String = "", 49 | syntax: String = "") 50 | 51 | // the CLI parser 52 | val parser = new scopt.OptionParser[Config]("Dataset OWL reader example") { 53 | 54 | head("Dataset OWL reader example") 55 | 56 | opt[String]('i', "input").required().valueName(""). 57 | action((x, c) => c.copy(in = x)). 58 | text("path to file that contains the data") 59 | 60 | opt[String]('s', "syntax").required().valueName("{fun | manch | owl_xml}"). 61 | action((x, c) => c.copy(syntax = x)). 62 | text("the syntax format") 63 | 64 | help("help").text("prints this usage text") 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/clustering/SilviaClustering.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.ml.clustering 2 | 3 | import scala.collection.mutable 4 | 5 | import net.sansa_stack.ml.spark.clustering._ 6 | import net.sansa_stack.ml.spark.clustering.algorithms.SilviaClustering 7 | import net.sansa_stack.rdf.spark.io._ 8 | import net.sansa_stack.rdf.spark.model._ 9 | import org.apache.jena.riot.Lang 10 | import org.apache.log4j.{ Level, Logger } 11 | import org.apache.spark.sql.SparkSession 12 | 13 | object SilviaClusteringExample { 14 | 15 | def main(args: Array[String]) { 16 | parser.parse(args, Config()) match { 17 | case Some(config) => 18 | run(config.in, config.out) 19 | case None => 20 | println(parser.usage) 21 | } 22 | } 23 | 24 | def run(input: String, output: String): Unit = { 25 | 26 | val spark = SparkSession.builder 27 | .appName(s"SilviaClustering example ( $input )") 28 | .master("local[*]") 29 | .config("spark.hadoop.validateOutputSpecs", "false") 30 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 31 | .getOrCreate() 32 | 33 | println("============================================") 34 | println("| Silvia Clustering example |") 35 | println("============================================") 36 | 37 | Logger.getRootLogger.setLevel(Level.WARN) 38 | 39 | val lang = Lang.NTRIPLES 40 | val triples = spark.rdf(lang)(input) 41 | 42 | val silvia = triples.cluster(ClusteringAlgorithm.SilviaClustering).asInstanceOf[SilviaClustering].run() 43 | 44 | silvia.collect.foreach(println) 45 | 46 | spark.stop 47 | 48 | } 49 | 50 | case class Config(in: String = "", out: String = "") 51 | 52 | val parser = new scopt.OptionParser[Config]("SilviaClustering") { 53 | 54 | head("SilviaClustering: an example SilviaClustering app.") 55 | 56 | opt[String]('i', "input").required().valueName(""). 57 | action((x, c) => c.copy(in = x)). 58 | text("path to file contains the input files") 59 | 60 | opt[String]('o', "output").optional().valueName(""). 61 | action((x, c) => c.copy(out = x)). 62 | text("the output directory") 63 | 64 | help("help").text("prints this usage text") 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/clustering/RDFByModularityClustering.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.ml.clustering 2 | 3 | import scala.collection.mutable 4 | 5 | import net.sansa_stack.ml.spark.clustering.algorithms.{ RDFByModularityClustering => RDFByModularityClusteringAlg } 6 | import org.apache.log4j.{ Level, Logger } 7 | import org.apache.spark.sql.SparkSession 8 | 9 | 10 | object RDFByModularityClustering { 11 | 12 | def main(args: Array[String]) { 13 | parser.parse(args, Config()) match { 14 | case Some(config) => 15 | run(config.in, config.out, config.numIterations) 16 | case None => 17 | println(parser.usage) 18 | } 19 | } 20 | 21 | def run(input: String, output: String, numIterations: Int): Unit = { 22 | 23 | val spark = SparkSession.builder 24 | .appName(s"RDF By Modularity Clustering example example ( $input )") 25 | .master("local[*]") 26 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 27 | .getOrCreate() 28 | 29 | println("============================================") 30 | println("| RDF By Modularity Clustering example |") 31 | println("============================================") 32 | 33 | Logger.getRootLogger.setLevel(Level.ERROR) 34 | 35 | RDFByModularityClusteringAlg(spark.sparkContext, numIterations, input, output) 36 | 37 | spark.stop 38 | 39 | } 40 | 41 | case class Config(in: String = "", out: String = "", numIterations: Int = 100) 42 | 43 | val defaultParams = Config() 44 | 45 | val parser = new scopt.OptionParser[Config]("RDF By Modularity Clustering") { 46 | 47 | head("RDF By Modularity Clustering: an example RDF By Modularity Clustering app using RDF Graph.") 48 | 49 | opt[String]('i', "input").required().valueName("") 50 | .text(s"path to file that contains the input files (in N-Triple format)") 51 | .action((x, c) => c.copy(in = x)) 52 | 53 | opt[String]('o', "output").valueName("") 54 | .text("the output directory") 55 | .action((x, c) => c.copy(out = x)) 56 | 57 | opt[Int]("numIterations") 58 | .text(s"number of iterations, default: ${defaultParams.numIterations}") 59 | .action((x, c) => c.copy(numIterations = x)) 60 | 61 | help("help").text("prints this usage text") 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/owl/OWLReaderDataset.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.owl 2 | 3 | import net.sansa_stack.owl.spark.dataset.{ FunctionalSyntaxOWLAxiomsDatasetBuilder, ManchesterSyntaxOWLAxiomsDatasetBuilder } 4 | import org.apache.spark.sql.SparkSession 5 | 6 | object OWLReaderDataset { 7 | 8 | def main(args: Array[String]) { 9 | parser.parse(args, Config()) match { 10 | case Some(config) => 11 | run(config.in, config.syntax) 12 | case None => 13 | println(parser.usage) 14 | } 15 | } 16 | 17 | def run(input: String, syntax: String): Unit = { 18 | 19 | println(".============================================.") 20 | println("| Dataset OWL reader example (" + syntax + " syntax)|") 21 | println(".============================================.") 22 | 23 | val spark = SparkSession.builder 24 | .appName(s"Dataset OWL reader ( $input + )($syntax)") 25 | .master("local[*]") 26 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 27 | .config("spark.kryo.registrator", "net.sansa_stack.owl.spark.dataset.UnmodifiableCollectionKryoRegistrator") 28 | .getOrCreate() 29 | 30 | val dataset = syntax match { 31 | case "fun" => FunctionalSyntaxOWLAxiomsDatasetBuilder.build(spark, input) 32 | case "manch" => ManchesterSyntaxOWLAxiomsDatasetBuilder.build(spark, input) 33 | case "owl_xml" => 34 | throw new RuntimeException("'" + syntax + "' - Not supported, yet.") 35 | case _ => 36 | throw new RuntimeException("Invalid syntax type: '" + syntax + "'") 37 | } 38 | 39 | dataset.take(10).foreach(println(_)) 40 | spark.stop() 41 | } 42 | 43 | case class Config( 44 | in: String = "", 45 | syntax: String = "") 46 | 47 | // the CLI parser 48 | val parser = new scopt.OptionParser[Config]("Dataset OWL reader example") { 49 | 50 | head("Dataset OWL reader example") 51 | 52 | opt[String]('i', "input").required().valueName(""). 53 | action((x, c) => c.copy(in = x)). 54 | text("path to file that contains the data") 55 | 56 | opt[String]('s', "syntax").required().valueName("{fun | manch | owl_xml}"). 57 | action((x, c) => c.copy(syntax = x)). 58 | text("the syntax format") 59 | 60 | help("help").text("prints this usage text") 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/query/DataLake.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.query 2 | 3 | import net.sansa_stack.query.spark.datalake.DataLakeEngine 4 | import org.apache.spark.sql.SparkSession 5 | 6 | /** 7 | * Run SPARQL queries over Spark using Data Lake approach. 8 | */ 9 | object DataLake { 10 | 11 | def main(args: Array[String]) { 12 | parser.parse(args, Config()) match { 13 | case Some(config) => 14 | run(config.queryFile, config.mappingsFile, config.configFile) 15 | case None => 16 | println(parser.usage) 17 | } 18 | } 19 | 20 | def run(queryFile: String, mappingsFile: String, configFile: String): Unit = { 21 | 22 | println("======================================") 23 | println("| DataLake (CSV) example |") 24 | println("======================================") 25 | 26 | val spark = SparkSession.builder 27 | .appName(s"DataLake (CSV) example") 28 | .master("local[*]") 29 | .getOrCreate() 30 | 31 | // val result = spark.sparqlDL(queryFile, mappingsFile, configFile) 32 | val result = DataLakeEngine.run(queryFile, mappingsFile, configFile, spark) 33 | result.show() 34 | 35 | spark.stop 36 | 37 | } 38 | 39 | case class Config( 40 | queryFile: String = getClass.getResource("/datalake/queries/Q1.sparql").getPath, 41 | mappingsFile: String = getClass.getResource("/datalake/config").getPath, 42 | configFile: String = getClass.getResource("/datalake/mappings.ttl").getPath) 43 | 44 | val parser = new scopt.OptionParser[Config]("Sparqlify example") { 45 | 46 | head(" DataLake (CSV) example") 47 | 48 | opt[String]('f', "queryFile").valueName(""). 49 | action((x, c) => c.copy(queryFile = x)). 50 | text("a file containing SPARQL queries or a single query, default: /queries/Q1.sparql") 51 | 52 | opt[String]('m', "mappingsFile").valueName(""). 53 | action((x, c) => c.copy(mappingsFile = x)). 54 | text("the mappings to the target sources, default: /config_csv-only") 55 | 56 | opt[String]('c', "configFile").optional().valueName(""). 57 | action((x, c) => c.copy(configFile = x)). 58 | text("configuration file for different data sources, default: /mappings_csv-only.ttl") 59 | 60 | help("help").text("prints this usage text") 61 | } 62 | 63 | } 64 | 65 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/kernel/RDFGraphKernel.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.ml.kernel 2 | 3 | import net.sansa_stack.ml.spark.kernel._ 4 | import net.sansa_stack.rdf.spark.io._ 5 | import org.apache.jena.riot.Lang 6 | import org.apache.spark.sql.SparkSession 7 | 8 | /** 9 | * RDF Graph Kernel example. 10 | */ 11 | object RDFGraphKernel { 12 | def main(args: Array[String]) { 13 | parser.parse(args, Config()) match { 14 | case Some(config) => 15 | run(config.in, config.iteration) 16 | case None => 17 | println(parser.usage) 18 | } 19 | } 20 | 21 | def run(input: String, iteration: Int = 5): Unit = { 22 | 23 | println("======================================") 24 | println("| RDF Graph Kernel example |") 25 | println("======================================") 26 | 27 | val spark = SparkSession.builder 28 | .appName(s" RDF Graph Kernel example ( $input )") 29 | .master("local[*]") 30 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 31 | .getOrCreate() 32 | 33 | val t0 = System.nanoTime 34 | val lang = Lang.NTRIPLES 35 | 36 | val triples = spark.rdf(lang)(input) 37 | .filter(_.getPredicate.getURI != "http://swrc.ontoware.org/ontology#employs") 38 | 39 | val rdfFastGraphKernel = RDFFastGraphKernel(spark, triples, "http://swrc.ontoware.org/ontology#affiliation") 40 | val data = rdfFastGraphKernel.getMLLibLabeledPoints 41 | 42 | val t1 = System.nanoTime 43 | RDFFastTreeGraphKernelUtil.printTime("Initialization", t0, t1) 44 | 45 | RDFFastTreeGraphKernelUtil.predictLogisticRegressionMLLIB(data, 4, iteration) 46 | 47 | val t2 = System.nanoTime 48 | RDFFastTreeGraphKernelUtil.printTime("Run Prediction", t1, t2) 49 | 50 | } 51 | 52 | case class Config( 53 | in: String = "", 54 | iteration: Int = 5) 55 | 56 | val parser = new scopt.OptionParser[Config]("Mines the Rules example") { 57 | 58 | head("Mines the Rules example") 59 | 60 | opt[String]('i', "input").required().valueName(""). 61 | action((x, c) => c.copy(in = x)). 62 | text("path to file that contains the data") 63 | 64 | opt[Int]('k', "iteration").required().valueName(""). 65 | action((x, c) => c.copy(iteration = x)). 66 | text("the iteration or folding on validation") 67 | 68 | help("help").text("prints this usage text") 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /sansa-examples-spark/README.md: -------------------------------------------------------------------------------- 1 | # SANSA-Examples on Apache Spark 2 | This is a SANSA-Examples repo for Apache Spark. 3 | 4 | ## Running the application on a Spark standalone cluster 5 | 6 | To run the application on a standalone Spark cluster 7 | 8 | 1. Setup a Spark cluster 9 | 2. Build the application with Maven 10 | 11 | ``` 12 | git clone https://github.com/SANSA-Stack/SANSA-Examples.git 13 | cd SANSA-Examples/sansa-examples-spark 14 | 15 | mvn clean package 16 | 17 | ``` 18 | 19 | 3. Submit the application to the Spark cluster 20 | 21 | ``` 22 | spark-submit \ 23 | --class net.sansa_stack.examples.spark.. \ 24 | --master spark://spark-master:7077 \ 25 | /app/application.jar \ 26 | SPARK_APPLICATION_ARGUMENTS 27 | ``` 28 | 29 | ## Running the application on a Spark standalone cluster via Spark Docker using BDE Platform 30 | 31 | To run the SANSA-Examples application on BDE platform, execute the following commands: 32 | 33 | ``` 34 | git clone https://github.com/SANSA-Stack/SANSA-Examples.git 35 | cd SANSA-Examples/sansa-examples-spark 36 | 37 | make --directory config/csswrapper/ hosts 38 | 39 | docker network create hadoop 40 | 41 | docker-compose up -d 42 | ``` 43 | Note:To make it run, you may need to modify your /etc/hosts file. There is a Makefile, which will do it automatically for you (you should clean up your /etc/hosts after demo). 44 | 45 | After BDE platform is up and running, let’s throw some data into our HDFS now by using Hue FileBrowser runing in our network. To perform these actions navigate to 'hue' tab into http://demo.sansa-stack.local. Use “hue” username with any password to login into the FileBrowser (“hue” user is set up as a proxy user for HDFS, see hadoop.env for the configuration parameters). Click on “File Browser” in upper right corner of the screen and use GUI to create /user/root/input and /user/root/output folders and upload the data file into /input folder. 46 | Go to HDFS tab into http://demo.sansa-stack.local and check if the file exists under the path ‘/user/root/input/yourfile’. 47 | 48 | After we have all the configuration needed for our example, let’s run our sansa-examples. 49 | 50 | ``` 51 | docker build --rm=true -t sansa/sansa-examples-spark . 52 | ``` 53 | And then just run this image: 54 | ``` 55 | docker run --name sansa-examples-spark-app --net hadoop --link spark-master:spark-master \ 56 | -e ENABLE_INIT_DAEMON=false \ 57 | -d sansa/sansa-examples-spark 58 | 59 | ``` 60 | 61 | -------------------------------------------------------------------------------- /sansa-examples-flink/README.md: -------------------------------------------------------------------------------- 1 | # SANSA-Examples on Apache Flink 2 | This is a SANSA-Examples repo for Apache Flink. 3 | 4 | ## Running the application on a Flink standalone cluster 5 | 6 | To run the application on a standalone Flink cluster 7 | 8 | 1. Setup a Flink cluster 9 | 2. Build the application with Maven 10 | 11 | ``` 12 | git clone https://github.com/SANSA-Stack/SANSA-Examples.git 13 | cd SANSA-Examples/sansa-examples-flink 14 | 15 | mvn clean package 16 | 17 | ``` 18 | 19 | 3. Submit the application to the Flink cluster 20 | 21 | ``` 22 | cd /path/to/flink/installation 23 | ./bin/flink run -c \ 24 | net.sansa_stack.examples.flink.. \ 25 | /app/application.jar \ 26 | FLINK_APPLICATION_ARGUMENTS 27 | ``` 28 | 29 | ## Running the application on a Flink standalone cluster via Flink Docker using BDE Platform 30 | 31 | To run the SANSA-Examples application on BDE platform, execute the following commands: 32 | 33 | ``` 34 | git clone https://github.com/SANSA-Stack/SANSA-Examples.git 35 | cd SANSA-Examples/sansa-examples-flink 36 | 37 | make --directory config/csswrapper/ hosts 38 | 39 | docker network create hadoop 40 | 41 | docker-compose up -d 42 | ``` 43 | Note:To make it run, you may need to modify your /etc/hosts file. There is a Makefile, which will do it automatically for you (you should clean up your /etc/hosts after demo). 44 | 45 | After BDE platform is up and running, let’s throw some data into our HDFS now by using Hue FileBrowser runing in our network. To perform these actions navigate to 'hue' tab into http://demo.sansa-stack.local. Use “hue” username with any password to login into the FileBrowser (“hue” user is set up as a proxy user for HDFS, see hadoop.env for the configuration parameters). Click on “File Browser” in upper right corner of the screen and use GUI to create /user/root/input and /user/root/output folders and upload the data file into /input folder. 46 | Go to HDFS tab into http://demo.sansa-stack.local and check if the file exists under the path ‘/user/root/input/yourfile’. 47 | 48 | After we have all the configuration needed for our example, let’s run our sansa-examples. 49 | 50 | ``` 51 | docker build --rm=true -t sansa/sansa-examples-flink . 52 | ``` 53 | And then just run this image: 54 | ``` 55 | docker run --name flink-entityrank-app --net hadoop --link flink-master:flink-master \ 56 | -e ENABLE_INIT_DAEMON=false \ 57 | -e FLINK_MASTER_PORT_6123_TCP_ADDR=flink-master \ 58 | -e FLINK_MASTER_PORT_6123_TCP_PORT=6123 \ 59 | -d sansa/sansa-examples-flink 60 | ``` 61 | 62 | -------------------------------------------------------------------------------- /sansa-examples-flink/src/main/scala/net/sansa_stack/examples/flink/rdf/TripleOps.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.flink.rdf 2 | 3 | import scala.collection.mutable 4 | 5 | import net.sansa_stack.rdf.flink.io._ 6 | import net.sansa_stack.rdf.flink.model._ 7 | import org.apache.flink.api.scala._ 8 | import org.apache.flink.api.scala.ExecutionEnvironment 9 | import org.apache.jena.graph.{Node, NodeFactory} 10 | import org.apache.jena.riot.Lang 11 | 12 | object TripleOps { 13 | def main(args: Array[String]) { 14 | parser.parse(args, Config()) match { 15 | case Some(config) => 16 | run(config.in) 17 | case None => 18 | println(parser.usage) 19 | } 20 | } 21 | 22 | def run(input: String): Unit = { 23 | 24 | println("======================================") 25 | println("| Triple Ops example |") 26 | println("======================================") 27 | val env = ExecutionEnvironment.getExecutionEnvironment 28 | 29 | val triples = env.rdf(Lang.NTRIPLES)(input) 30 | 31 | triples.getTriples().collect().take(4).foreach(println(_)) 32 | // Triples filtered by subject ( "http://dbpedia.org/resource/Charles_Dickens" ) 33 | println("All triples related to Dickens:\n" + triples.find(Some(NodeFactory.createURI("http://commons.dbpedia.org/resource/Category:Places")), None, None).collect().mkString("\n")) 34 | 35 | // Triples filtered by predicate ( "http://dbpedia.org/ontology/influenced" ) 36 | println("All triples for predicate influenced:\n" + triples.find(None, Some(NodeFactory.createURI("http://dbpedia.org/ontology/influenced")), None).collect().mkString("\n")) 37 | 38 | // Triples filtered by object ( ) 39 | println("All triples influenced by Henry_James:\n" + triples.find(None, None, Some(NodeFactory.createURI(""))).collect().mkString("\n")) 40 | 41 | // println("Number of triples: " + rdfgraph.triples.distinct.count()) 42 | println("Number of subjects: " + triples.getSubjects.map(_.toString).distinct().count) 43 | println("Number of predicates: " + triples.getPredicates.map(_.toString).distinct.count()) 44 | println("Number of objects: " + triples.getPredicates.map(_.toString).distinct.count()) 45 | 46 | } 47 | case class Config(in: String = "") 48 | 49 | val parser = new scopt.OptionParser[Config]("Triple Ops example") { 50 | 51 | head(" Triple Ops example") 52 | 53 | opt[String]('i', "input").required().valueName(""). 54 | action((x, c) => c.copy(in = x)). 55 | text("path to file that contains the data (in N-Triples format)") 56 | help("help").text("prints this usage text") 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/clustering/RDFGraphPIClustering.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.ml.clustering 2 | 3 | import scala.collection.mutable 4 | 5 | import net.sansa_stack.ml.spark.clustering._ 6 | import net.sansa_stack.ml.spark.clustering.algorithms.RDFGraphPowerIterationClustering 7 | import net.sansa_stack.rdf.spark.io._ 8 | import net.sansa_stack.rdf.spark.model._ 9 | import org.apache.jena.riot.{ Lang, RDFDataMgr } 10 | import org.apache.log4j.{ Level, Logger } 11 | import org.apache.spark.sql.SparkSession 12 | 13 | object RDFGraphPIClustering { 14 | 15 | def main(args: Array[String]) { 16 | parser.parse(args, Config()) match { 17 | case Some(config) => 18 | run(config.in, config.out, config.k, config.maxIterations) 19 | case None => 20 | println(parser.usage) 21 | } 22 | } 23 | 24 | def run(input: String, output: String, k: Int, maxIterations: Int): Unit = { 25 | 26 | val spark = SparkSession.builder 27 | .appName(s"Power Iteration Clustering example ( $input )") 28 | .master("local[*]") 29 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 30 | .getOrCreate() 31 | System.setProperty("spark.akka.frameSize", "2000") 32 | 33 | println("============================================") 34 | println("| Power Iteration Clustering example |") 35 | println("============================================") 36 | 37 | val lang = Lang.NTRIPLES 38 | val triples = spark.rdf(lang)(input) 39 | 40 | val cluster = triples.cluster(ClusteringAlgorithm.RDFGraphPowerIterationClustering).asInstanceOf[RDFGraphPowerIterationClustering] 41 | .setK(k).setMaxIterations(maxIterations).run() 42 | 43 | cluster.collect.foreach(println) 44 | 45 | spark.stop 46 | 47 | } 48 | 49 | case class Config(in: String = "", out: String = "", k: Int = 2, maxIterations: Int = 5) 50 | 51 | val defaultParams = Config() 52 | 53 | val parser = new scopt.OptionParser[Config]("RDFGraphPIClustering") { 54 | 55 | head("PowerIterationClusteringExample: an example PIC app using concentric circles.") 56 | 57 | opt[String]('i', "input").required().valueName("") 58 | .text(s"path (local/hdfs) to file that contains the input files (in N-Triple format)") 59 | .action((x, c) => c.copy(in = x)) 60 | 61 | opt[String]('o', "out").required().valueName(""). 62 | action((x, c) => c.copy(out = x)). 63 | text("the output directory") 64 | 65 | opt[Int]('k', "k") 66 | .text(s"number of circles (/clusters), default: ${defaultParams.k}") 67 | .action((x, c) => c.copy(k = x)) 68 | 69 | opt[Int]("maxIterations") 70 | .text(s"number of iterations, default: ${defaultParams.maxIterations}") 71 | .action((x, c) => c.copy(maxIterations = x)) 72 | 73 | help("help").text("prints this usage text") 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/mining/MineRules.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.ml.mining 2 | 3 | import scala.collection.mutable 4 | 5 | import net.sansa_stack.ml.spark.mining.amieSpark.{ DfLoader, RDFGraphLoader } 6 | import net.sansa_stack.ml.spark.mining.amieSpark.KBObject.KB 7 | import net.sansa_stack.ml.spark.mining.amieSpark.MineRules.Algorithm 8 | import org.apache.spark.sql.SparkSession 9 | 10 | /* 11 | * Mine Rules 12 | * 13 | */ 14 | object MineRules { 15 | 16 | def main(args: Array[String]) { 17 | parser.parse(args, Config()) match { 18 | case Some(config) => 19 | run(config.in, config.out) 20 | case None => 21 | println(parser.usage) 22 | } 23 | } 24 | 25 | def run(input: String, outputPath: String): Unit = { 26 | 27 | println("======================================") 28 | println("| Mines the Rules example |") 29 | println("======================================") 30 | 31 | val spark = SparkSession.builder 32 | .appName(s" Mines the Rules example ( $input )") 33 | .master("local[*]") 34 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 35 | .getOrCreate() 36 | 37 | val hdfsPath = outputPath + "/" 38 | 39 | val know = new KB() 40 | know.sethdfsPath(hdfsPath) 41 | know.setKbSrc(input) 42 | 43 | know.setKbGraph(RDFGraphLoader.loadFromFile(input, spark.sparkContext, 2)) 44 | know.setDFTable(DfLoader.loadFromFileDF(know.getKbSrc, spark.sparkContext, spark.sqlContext, 2)) 45 | 46 | val algo = new Algorithm(know, 0.01, 3, 0.1, hdfsPath) 47 | 48 | // var erg = algo.ruleMining(sparkSession.sparkContext, sparkSession.sqlContext) 49 | // println(erg) 50 | var output = algo.ruleMining(spark.sparkContext, spark.sqlContext) 51 | 52 | var outString = output.map { x => 53 | var rdfTrp = x.getRule() 54 | var temp = "" 55 | for (i <- 0 to rdfTrp.length - 1) { 56 | if (i == 0) { 57 | temp = rdfTrp(i) + " <= " 58 | } else { 59 | temp += rdfTrp(i) + """ \u2227 """ 60 | } 61 | } 62 | temp = temp.stripSuffix(" \u2227 ") 63 | temp 64 | }.toSeq 65 | var rddOut = spark.sparkContext.parallelize(outString).repartition(1) 66 | 67 | rddOut.saveAsTextFile(outputPath + "/testOut") 68 | } 69 | 70 | case class Config( 71 | in: String = "", 72 | out: String = "") 73 | 74 | val parser = new scopt.OptionParser[Config]("Mines the Rules example") { 75 | 76 | head("Mines the Rules example") 77 | 78 | opt[String]('i', "input").required().valueName(""). 79 | action((x, c) => c.copy(in = x)). 80 | text("path to file that contains the data") 81 | 82 | opt[String]('o', "out").required().valueName(""). 83 | action((x, c) => c.copy(out = x)). 84 | text("the output directory") 85 | 86 | help("help").text("prints this usage text") 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/rdf/TripleOps.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.rdf 2 | 3 | import scala.collection.mutable 4 | 5 | import net.sansa_stack.rdf.spark.io._ 6 | import net.sansa_stack.rdf.spark.model._ 7 | import org.apache.jena.graph.NodeFactory 8 | import org.apache.jena.riot.Lang 9 | import org.apache.spark.sql.SparkSession 10 | 11 | object TripleOps { 12 | 13 | def main(args: Array[String]) { 14 | parser.parse(args, Config()) match { 15 | case Some(config) => 16 | run(config.in) 17 | case None => 18 | println(parser.usage) 19 | } 20 | } 21 | 22 | def run(input: String): Unit = { 23 | 24 | val spark = SparkSession.builder 25 | .appName(s"Triple Ops example $input") 26 | .master("local[*]") 27 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 28 | .getOrCreate() 29 | 30 | println("======================================") 31 | println("| Triple Ops example |") 32 | println("======================================") 33 | 34 | val lang = Lang.NTRIPLES 35 | val triples = spark.rdf(lang)(input) 36 | 37 | // Triples filtered by subject ( "http://dbpedia.org/resource/Charles_Dickens" ) 38 | println("All triples related to Dickens:\n" + triples.find(Some(NodeFactory.createURI("http://dbpedia.org/resource/Charles_Dickens")), None, None).collect().mkString("\n")) 39 | 40 | // Triples filtered by predicate ( "http://dbpedia.org/ontology/influenced" ) 41 | println("All triples for predicate influenced:\n" + triples.find(None, Some(NodeFactory.createURI("http://dbpedia.org/ontology/influenced")), None).collect().mkString("\n")) 42 | 43 | // Triples filtered by object ( ) 44 | println("All triples influenced by Henry_James:\n" + triples.find(None, None, Some(NodeFactory.createURI("http://dbpedia.org/resource/Henry_James"))).collect().mkString("\n")) 45 | 46 | println("Number of triples: " + triples.distinct.count()) 47 | println("Number of subjects: " + triples.getSubjects.distinct.count()) 48 | println("Number of predicates: " + triples.getPredicates.distinct.count()) 49 | println("Number of objects: " + triples.getObjects.distinct.count()) 50 | 51 | val subjects = triples.filterSubjects(_.isURI()).collect.mkString("\n") 52 | 53 | val predicates = triples.filterPredicates(_.isVariable()).collect.mkString("\n") 54 | val objects = triples.filterObjects(_.isLiteral()).collect.mkString("\n") 55 | 56 | // graph.getTriples.take(5).foreach(println(_)) 57 | 58 | spark.stop 59 | 60 | } 61 | // the config object 62 | case class Config(in: String = "") 63 | 64 | // the CLI parser 65 | val parser = new scopt.OptionParser[Config]("Triple Ops example") { 66 | 67 | head(" Triple Ops example") 68 | 69 | opt[String]('i', "input").required().valueName(""). 70 | action((x, c) => c.copy(in = x)). 71 | text("path to file that contains the data (in N-Triples format)") 72 | help("help").text("prints this usage text") 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/inference/axioms/RDFGraphInference.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.inference.axioms 2 | 3 | import net.sansa_stack.inference.rules.ReasoningProfile 4 | import net.sansa_stack.inference.rules.ReasoningProfile._ 5 | import net.sansa_stack.inference.spark.forwardchaining.axioms.{ForwardRuleReasonerOWLHorst, ForwardRuleReasonerRDFS} 6 | import net.sansa_stack.owl.spark.owl._ 7 | import org.apache.spark.sql.SparkSession 8 | 9 | object RDFGraphInference { 10 | 11 | def main(args: Array[String]) { 12 | parser.parse(args, Config()) match { 13 | case Some(config) => 14 | run(config.in, config.profile, config.parallelism) 15 | case None => 16 | println(parser.usage) 17 | } 18 | } 19 | 20 | def run(input: String, profile: ReasoningProfile, parallelism: Int): Unit = { 21 | 22 | // the SPARK config 23 | val spark = SparkSession.builder 24 | .appName(s"SPARK $profile Reasoning") 25 | .master("local[*]") 26 | .config("spark.hadoop.validateOutputSpecs", "false") // override output files 27 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 28 | .config("spark.default.parallelism", parallelism) 29 | .config("spark.ui.showConsoleProgress", "false") 30 | .config("spark.sql.shuffle.partitions", parallelism) 31 | .getOrCreate() 32 | 33 | // load axioms from disk 34 | var owlAxioms = spark.owl(Syntax.FUNCTIONAL)(input) 35 | println(s"|G| = ${owlAxioms.count()}") 36 | // create reasoner and compute inferred graph 37 | val inferredGraph = profile match { 38 | case RDFS => new ForwardRuleReasonerRDFS(spark.sparkContext, parallelism)(owlAxioms) 39 | case OWL_HORST => new ForwardRuleReasonerOWLHorst(spark.sparkContext, parallelism)(owlAxioms) 40 | case _ => 41 | throw new RuntimeException("Invalid profile: '" + profile + "'") 42 | } 43 | 44 | println(s"|G_inf| = ${inferredGraph.count()}") 45 | 46 | spark.stop() 47 | } 48 | 49 | case class Config( 50 | in: String = "", 51 | profile: ReasoningProfile = ReasoningProfile.RDFS, 52 | parallelism: Int = 4) 53 | 54 | // read ReasoningProfile enum 55 | implicit val profilesRead: scopt.Read[ReasoningProfile.Value] = 56 | scopt.Read.reads(ReasoningProfile forName _.toLowerCase()) 57 | 58 | // the CLI parser 59 | val parser = new scopt.OptionParser[Config]("RDFGraphMaterializer") { 60 | 61 | head("RDFGraphMaterializer (axioms)", "0.5.0") 62 | 63 | opt[String]('i', "input").required().valueName(""). 64 | action((x, c) => c.copy(in = x)). 65 | text("path to file or directory that contains the input files") 66 | 67 | opt[ReasoningProfile]('p', "profile").required().valueName("{rdfs | owl-horst}"). 68 | action((x, c) => c.copy(profile = x)). 69 | text("the reasoning profile") 70 | 71 | opt[Int]("parallelism").optional().action((x, c) => 72 | c.copy(parallelism = x)).text("the degree of parallelism, i.e. the number of Spark partitions used in the Spark operations") 73 | 74 | help("help").text("prints this usage text") 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/kge/CrossValidation.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.ml.kge 2 | 3 | import net.sansa_stack.ml.spark.kge.linkprediction.crossvalidation.{ kFold, Bootstrapping, Holdout } 4 | import net.sansa_stack.rdf.spark.kge.convertor.ByIndex 5 | import net.sansa_stack.rdf.spark.kge.triples.Triples 6 | import org.apache.spark.sql._ 7 | 8 | object CrossValidation { 9 | 10 | def main(args: Array[String]) { 11 | parser.parse(args, Config()) match { 12 | case Some(config) => 13 | run(config.in, config.technique, config.k) 14 | case None => 15 | println(parser.usage) 16 | } 17 | } 18 | 19 | def run(input: String, technique: String, k: Int): Unit = { 20 | 21 | val spark = SparkSession.builder 22 | .appName(s"Cross validation techniques example $input") 23 | .master("local[*]") 24 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 25 | .getOrCreate() 26 | 27 | println("==============================================") 28 | println("|Cross validation techniques example |") 29 | println("==============================================") 30 | 31 | val data = new Triples(input, "\t", false, false, spark) 32 | 33 | // getting 10 distinct entities in (s,p,o) i.e. subjects + objects and printing them 34 | data.getEntities().take(10).foreach(println) 35 | 36 | // getting 10 distinct predicates in (s,p,o) and printing them 37 | data.getEntities().take(10).foreach(println) 38 | 39 | // converting the original data to indexData 40 | val indexedData = new ByIndex(data.triples, spark) 41 | val numericData = indexedData.numeric() 42 | 43 | // getting 10 distinct (s,p,o) in their numeric (indexed) form and print them 44 | indexedData.numeric.take(10).foreach(println) 45 | 46 | val (train, test) = technique match { 47 | case "holdout" => new Holdout(numericData, 0.6f).crossValidation() 48 | case "bootstrapping" => new Bootstrapping(numericData).crossValidation() 49 | case "kFold" => new kFold(numericData, k, spark).crossValidation() 50 | case _ => 51 | throw new RuntimeException("'" + technique + "' - Not supported, yet.") 52 | } 53 | 54 | println("<< DONE >>") 55 | 56 | spark.stop 57 | 58 | } 59 | 60 | case class Config(in: String = "", technique: String = "", k: Int = 0) 61 | 62 | val parser = new scopt.OptionParser[Config]("Cross validation techniques example") { 63 | 64 | head("Cross validation techniques example") 65 | 66 | opt[String]('i', "input").required().valueName(""). 67 | action((x, c) => c.copy(in = x)). 68 | text("path to file that contains the data") 69 | 70 | opt[String]('t', "technique").required().valueName("{holdout | bootstrapping | kFold}"). 71 | action((x, c) => c.copy(technique = x)). 72 | text("cross validation techniques") 73 | 74 | opt[Int]("k").optional().valueName(""). 75 | action((x, c) => { 76 | c.copy(k = x) 77 | }). 78 | text("The k value (used only for technique'kFold')") 79 | 80 | checkConfig(c => 81 | if (c.technique == "kFold" && c.k == 0) failure("Option --k-Fold must not be empty if technique 'kFold' is set") 82 | else success) 83 | 84 | help("help").text("prints this usage text") 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /sansa-examples-spark/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | namenode: 4 | image: bde2020/hadoop-namenode:1.0.0 5 | hostname: namenode 6 | container_name: namenode 7 | networks: 8 | - hadoop 9 | volumes: 10 | - ./data/namenode:/hadoop/dfs/name 11 | environment: 12 | - CLUSTER_NAME=test 13 | - INIT_DAEMON_STEP=setup_hdfs 14 | - VIRTUAL_HOST=hdfs.demo.sansa-stack.local 15 | env_file: 16 | - ./config/hadoop/hadoop.env 17 | ports: 18 | - "50070:50070" 19 | - "8020:8020" 20 | datanode1: 21 | image: bde2020/hadoop-datanode:1.0.0 22 | hostname: datanode1 23 | container_name: datanode1 24 | networks: 25 | - hadoop 26 | volumes: 27 | - ./data/datanode1:/hadoop/dfs/data 28 | env_file: 29 | - ./config/hadoop/hadoop.env 30 | 31 | datanode2: 32 | image: bde2020/hadoop-datanode:1.0.0 33 | hostname: datanode2 34 | container_name: datanode2 35 | networks: 36 | - hadoop 37 | volumes: 38 | - ./data/datanode2:/hadoop/dfs/data 39 | env_file: 40 | - ./config/hadoop/hadoop.env 41 | 42 | filebrowser: 43 | image: bde2020/hdfs-filebrowser:3.9 44 | hostname: filebrowser 45 | container_name: filebrowser 46 | networks: 47 | - hadoop 48 | environment: 49 | - NAMENODE_HOST=namenode 50 | - VIRTUAL_HOST=hue.demo.sansa-stack.local 51 | - VIRTUAL_PORT=8088 52 | # ports: 53 | # - "8088:8088" 54 | 55 | master: 56 | image: bde2020/spark-master:2.0.1-hadoop2.7 57 | hostname: spark-master 58 | container_name: spark-master 59 | networks: 60 | - hadoop 61 | environment: 62 | - CORE_CONF_fs_defaultFS=hdfs://namenode:8020 63 | - VIRTUAL_HOST=spark-master.demo.sansa-stack.local 64 | - VIRTUAL_PORT=8080 65 | env_file: 66 | - ./config/hadoop/hadoop.env 67 | 68 | worker: 69 | image: bde2020/spark-worker:2.0.1-hadoop2.7 70 | hostname: spark-worker 71 | container_name: spark-worker 72 | networks: 73 | - hadoop 74 | environment: 75 | - CORE_CONF_fs_defaultFS=hdfs://namenode:8020 76 | - VIRTUAL_HOST=spark-worker.demo.sansa-stack.local 77 | - VIRTUAL_PORT=8081 78 | env_file: 79 | - ./config/hadoop/hadoop.env 80 | links: 81 | - "master:spark-master" 82 | 83 | integratorui: 84 | image: bde2020/integrator-ui:latest 85 | # build: ./config/csswrapper/integrator-ui/ 86 | hostname: integratorui 87 | container_name: integratorui 88 | networks: 89 | - hadoop 90 | volumes: 91 | - ./config/integrator:/app/config 92 | environment: 93 | - VIRTUAL_HOST=demo.sansa-stack.local 94 | 95 | csswrapper: 96 | build: ./config/csswrapper/ 97 | # image: bde2020/nginx-proxy-with-css:latest 98 | hostname: csswrapper 99 | container_name: csswrapper 100 | networks: 101 | - hadoop 102 | ports: 103 | - 80:80 104 | # volumes: 105 | # - /var/run/docker.sock:/tmp/docker.sock:ro 106 | links: 107 | - namenode:namenode 108 | - filebrowser:filebrowser 109 | - master:master 110 | - worker:worker 111 | - integratorui:integratorui 112 | depends_on: 113 | - namenode 114 | - filebrowser 115 | - master 116 | - worker 117 | - integratorui 118 | 119 | networks: 120 | hadoop: 121 | external: true 122 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/rdf/RDFQualityAssessment.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.rdf 2 | 3 | import java.io.File 4 | 5 | import scala.collection.mutable 6 | 7 | import net.sansa_stack.rdf.spark.io._ 8 | import net.sansa_stack.rdf.spark.qualityassessment._ 9 | import org.apache.jena.riot.Lang 10 | import org.apache.spark.sql.SparkSession 11 | 12 | object RDFQualityAssessment { 13 | 14 | def main(args: Array[String]) { 15 | parser.parse(args, Config()) match { 16 | case Some(config) => 17 | run(config.in, config.out) 18 | case None => 19 | println(parser.usage) 20 | } 21 | } 22 | 23 | def run(input: String, output: String): Unit = { 24 | 25 | val rdf_quality_file = new File(input).getName 26 | 27 | val spark = SparkSession.builder 28 | .appName(s"RDF Quality Assessment Example $rdf_quality_file") 29 | .master("local[*]") 30 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 31 | .getOrCreate() 32 | 33 | println("======================================") 34 | println("| RDF Quality Assessment Example |") 35 | println("======================================") 36 | 37 | val lang = Lang.NTRIPLES 38 | val triples = spark.rdf(lang)(input) 39 | 40 | // compute quality assessment 41 | val completeness_schema = triples.assessSchemaCompleteness() 42 | val completeness_interlinking = triples.assessInterlinkingCompleteness() 43 | val completeness_property = triples.assessPropertyCompleteness() 44 | 45 | val syntacticvalidity_literalnumeric = triples.assessLiteralNumericRangeChecker() 46 | val syntacticvalidity_XSDDatatypeCompatibleLiterals = triples.assessXSDDatatypeCompatibleLiterals() 47 | 48 | val availability_DereferenceableUris = triples.assessDereferenceableUris() 49 | 50 | val relevancy_CoverageDetail = triples.assessCoverageDetail() 51 | val relevancy_CoverageScope = triples.assessCoverageScope() 52 | val relevancy_AmountOfTriples = triples.assessAmountOfTriples() 53 | 54 | val performance_NoHashURIs = triples.assessNoHashUris() 55 | val understandability_LabeledResources = triples.assessLabeledResources() 56 | 57 | val AssessQualityStr = s""" 58 | completeness_schema:$completeness_schema 59 | completeness_interlinking:$completeness_interlinking 60 | completeness_property:$completeness_property 61 | syntacticvalidity_literalnumeric:$syntacticvalidity_literalnumeric 62 | syntacticvalidity_XSDDatatypeCompatibleLiterals:$syntacticvalidity_XSDDatatypeCompatibleLiterals 63 | availability_DereferenceableUris:$availability_DereferenceableUris 64 | relevancy_CoverageDetail:$relevancy_CoverageDetail 65 | relevancy_CoverageScope:$relevancy_CoverageScope 66 | relevancy_AmountOfTriples:$relevancy_AmountOfTriples 67 | performance_NoHashURIs:$performance_NoHashURIs 68 | understandability_LabeledResources:$understandability_LabeledResources 69 | """ 70 | 71 | println(s"\n AssessQuality for $rdf_quality_file :\n $AssessQualityStr") 72 | } 73 | 74 | case class Config( 75 | in: String = "", 76 | out: String = "") 77 | 78 | val parser = new scopt.OptionParser[Config]("RDF Quality Assessment Example") { 79 | 80 | head("RDF Quality Assessment Example") 81 | 82 | opt[String]('i', "input").required().valueName(""). 83 | action((x, c) => c.copy(in = x)). 84 | text("path to file that contains the data (in N-Triples format)") 85 | 86 | opt[String]('o', "out").required().valueName(""). 87 | action((x, c) => c.copy(out = x)). 88 | text("the output directory") 89 | 90 | help("help").text("prints this usage text") 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /sansa-examples-flink/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | namenode: 4 | image: bde2020/hadoop-namenode:1.0.0 5 | hostname: namenode 6 | container_name: namenode 7 | domainname: hadoop 8 | networks: 9 | - hadoop 10 | volumes: 11 | - ./data/namenode:/hadoop/dfs/name 12 | environment: 13 | - CLUSTER_NAME=test 14 | - VIRTUAL_HOST=hdfs.demo.sansa-stack.local 15 | env_file: 16 | - ./config/hadoop/hadoop.env 17 | ports: 18 | - "50070:50070" 19 | - "8020:8020" 20 | datanode1: 21 | image: bde2020/hadoop-datanode:1.0.0 22 | hostname: datanode1 23 | container_name: datanode1 24 | domainname: hadoop 25 | networks: 26 | - hadoop 27 | volumes: 28 | - ./data/datanode1:/hadoop/dfs/data 29 | env_file: 30 | - ./config/hadoop/hadoop.env 31 | 32 | datanode2: 33 | image: bde2020/hadoop-datanode:1.0.0 34 | hostname: datanode2 35 | container_name: datanode2 36 | domainname: hadoop 37 | networks: 38 | - hadoop 39 | volumes: 40 | - ./data/datanode2:/hadoop/dfs/data 41 | env_file: 42 | - ./config/hadoop/hadoop.env 43 | 44 | filebrowser: 45 | image: bde2020/hdfs-filebrowser:3.9 46 | hostname: filebrowser 47 | container_name: filebrowser 48 | domainname: hadoop 49 | networks: 50 | - hadoop 51 | environment: 52 | - NAMENODE_HOST=namenode 53 | - VIRTUAL_HOST=hue.demo.sansa-stack.local 54 | - VIRTUAL_PORT=8088 55 | # ports: 56 | # - "8088:8088" 57 | 58 | flink-master: 59 | image: bde2020/flink-master:1.1.3-hadoop2.7 60 | hostname: flink-master 61 | container_name: flink-master 62 | domainname: hadoop 63 | networks: 64 | - hadoop 65 | environment: 66 | - CORE_CONF_fs_defaultFS=hdfs://namenode:8020 67 | - YARN_CONF_yarn_resourcemanager_hostname=resourcemanager 68 | - VIRTUAL_HOST=flink-master.demo.sansa-stack.local 69 | - VIRTUAL_PORT=8080 70 | env_file: 71 | - ./config/hadoop/hadoop.env 72 | ports: 73 | - "8080:8080" 74 | - "8081:8081" 75 | 76 | flink-worker: 77 | image: bde2020/flink-worker:1.1.3-hadoop2.7 78 | hostname: flink-worker 79 | container_name: flink-worker 80 | domainname: hadoop 81 | networks: 82 | - hadoop 83 | environment: 84 | - CORE_CONF_fs_defaultFS=hdfs://namenode:8020 85 | - YARN_CONF_yarn_resourcemanager_hostname=resourcemanager 86 | - FLINK_MASTER_PORT_6123_TCP_ADDR=flink-master 87 | - VIRTUAL_HOST=flink-worker.demo.sansa-stack.local 88 | - VIRTUAL_PORT=8081 89 | env_file: 90 | - ./config/hadoop/hadoop.env 91 | links: 92 | - "flink-master" 93 | 94 | integratorui: 95 | image: bde2020/integrator-ui:latest 96 | hostname: integratorui 97 | container_name: integratorui 98 | domainname: hadoop 99 | networks: 100 | - hadoop 101 | volumes: 102 | - ./config/integrator:/app/config 103 | environment: 104 | - VIRTUAL_HOST=demo.sansa-stack.local 105 | 106 | csswrapper: 107 | # image: gezim/flink-starter-integrator-css-wrapper 108 | build: ./config/csswrapper/ 109 | hostname: csswrapper 110 | container_name: csswrapper 111 | domainname: hadoop 112 | networks: 113 | - hadoop 114 | ports: 115 | - 80:80 116 | links: 117 | - namenode:namenode 118 | - filebrowser:filebrowser 119 | - flink-master:flink-master 120 | - flink-worker:flink-worker 121 | - integratorui:integratorui 122 | depends_on: 123 | - namenode 124 | - filebrowser 125 | - flink-master 126 | - flink-worker 127 | - integratorui 128 | 129 | networks: 130 | hadoop: 131 | external: true 132 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/query/Sparklify.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.query 2 | 3 | import java.awt.Desktop 4 | import java.net.URI 5 | 6 | import net.sansa_stack.query.spark.sparqlify.{QueryExecutionFactorySparqlifySpark, SparqlifyUtils3} 7 | import net.sansa_stack.rdf.spark.io._ 8 | import net.sansa_stack.rdf.spark.partition.core.RdfPartitionUtilsSpark 9 | import org.aksw.jena_sparql_api.server.utils.FactoryBeanSparqlServer 10 | import org.apache.jena.riot.Lang 11 | import org.apache.spark.sql.SparkSession 12 | 13 | /** 14 | * Run SPARQL queries over Spark using Sparqlify approach. 15 | */ 16 | object Sparklify { 17 | 18 | def main(args: Array[String]) { 19 | parser.parse(args, Config()) match { 20 | case Some(config) => 21 | run(config.in, config.sparql, config.run, config.port) 22 | case None => 23 | println(parser.usage) 24 | } 25 | } 26 | 27 | def run(input: String, sparqlQuery: String = "", run: String = "cli", port: String = "7531"): Unit = { 28 | 29 | println("======================================") 30 | println("| Sparklify example |") 31 | println("======================================") 32 | 33 | val spark = SparkSession.builder 34 | .appName(s"Sparklify example ( $input )") 35 | .master("local[*]") 36 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 37 | .config("spark.kryo.registrator", String.join( 38 | ", ", 39 | "net.sansa_stack.rdf.spark.io.JenaKryoRegistrator", 40 | "net.sansa_stack.query.spark.sparqlify.KryoRegistratorSparqlify")) 41 | .getOrCreate() 42 | 43 | val lang = Lang.NTRIPLES 44 | val graphRdd = spark.rdf(lang)(input) 45 | 46 | run match { 47 | case "cli" => 48 | import net.sansa_stack.query.spark.query._ 49 | // val sparqlQuery = "SELECT * WHERE {?s ?p ?o} LIMIT 10" 50 | val result = graphRdd.sparql(sparqlQuery) 51 | result.rdd.foreach(println) 52 | case _ => 53 | val partitions = RdfPartitionUtilsSpark.partitionGraph(graphRdd) 54 | val rewriter = SparqlifyUtils3.createSparqlSqlRewriter(spark, partitions) 55 | 56 | val port = 7531 57 | 58 | val qef = new QueryExecutionFactorySparqlifySpark(spark, rewriter) 59 | val server = FactoryBeanSparqlServer.newInstance.setSparqlServiceFactory(qef).setPort(port).create() 60 | if (Desktop.isDesktopSupported) { 61 | Desktop.getDesktop.browse(URI.create("http://localhost:" + port + "/sparql")) 62 | } 63 | server.join() 64 | } 65 | 66 | spark.stop 67 | 68 | } 69 | 70 | case class Config(in: String = "", sparql: String = "SELECT * WHERE {?s ?p ?o} LIMIT 10", run: String = "cli", port: String = "7531") 71 | 72 | val parser = new scopt.OptionParser[Config]("Sparklify example") { 73 | 74 | head(" Sparqlify example") 75 | 76 | opt[String]('i', "input").required().valueName(""). 77 | action((x, c) => c.copy(in = x)). 78 | text("path to file that contains the data (in N-Triples format)") 79 | 80 | opt[String]('q', "sparql").optional().valueName(""). 81 | action((x, c) => c.copy(sparql = x)). 82 | text("a SPARQL query") 83 | 84 | opt[String]('r', "run").optional().valueName("Runner"). 85 | action((x, c) => c.copy(run = x)). 86 | text("Runner method, default:'cli'") 87 | 88 | opt[String]('p', "port").optional().valueName("port"). 89 | action((x, c) => c.copy(port = x)). 90 | text("port that SPARQL endpoint will be exposed, default:'7531'") 91 | 92 | checkConfig(c => 93 | if (c.run == "cli" && c.sparql.isEmpty) failure("Option --sparql must not be empty if cli is enabled") 94 | else success) 95 | 96 | help("help").text("prints this usage text") 97 | } 98 | 99 | } 100 | -------------------------------------------------------------------------------- /sansa-examples-flink/src/main/resources/ont_functional.owl: -------------------------------------------------------------------------------- 1 | Prefix(:=) 2 | Prefix(foo:=) 3 | Prefix(bar:=) 4 | Prefix(xsd:=) 5 | 6 | Ontology( 7 | 8 | #Import() 9 | # annotations 10 | Annotation(foo:hasName "Name") 11 | Annotation(bar:hasTitle "Title") 12 | Annotation(:description "A longer 13 | description running over 14 | several lines") 15 | 16 | ## declarations 17 | # class declarations 18 | Declaration(Annotation(foo:ann "some annotation") Class(bar:Cls1)) 19 | Declaration(Class(bar:Cls2)) 20 | # datatype declarations 21 | Declaration(Datatype(bar:dtype1)) 22 | Declaration(Datatype(bar:dtype2)) 23 | # object property declarations 24 | Declaration(ObjectProperty(bar:objProp1)) 25 | Declaration(ObjectProperty(bar:objProp2)) 26 | # data property declarations 27 | Declaration(DataProperty(bar:dataProp1)) 28 | Declaration(DataProperty(bar:dataProp2)) 29 | # annotation property declarations 30 | Declaration(AnnotationProperty(bar:annProp1)) 31 | Declaration(AnnotationProperty(bar:annProp2)) 32 | # named individual declarations 33 | Declaration(NamedIndividual(foo:indivA)) 34 | Declaration(NamedIndividual(foo:indivB)) 35 | 36 | ## annotation axioms 37 | AnnotationAssertion(bar:label bar:Cls1 "Class 1") 38 | SubAnnotationPropertyOf(bar:annProp1 bar:annProp2) 39 | AnnotationPropertyDomain(bar:annProp1 bar:Cls1) 40 | AnnotationPropertyRange(bar:annProp1 bar:Cls2) 41 | 42 | ## class expressions -- 20 43 | EquivalentClasses(bar:IntersectionCls ObjectIntersectionOf(bar:Cls1 bar:Cls2)) 44 | EquivalentClasses(bar:UnionCls ObjectUnionOf(bar:Cls1 bar:Cls2)) 45 | EquivalentClasses(bar:ComplementCls ObjectComplementOf(bar:Cls1)) 46 | EquivalentClasses(bar:AllIndividualsCls ObjectOneOf(foo:indivA foo:indivB)) 47 | EquivalentClasses(bar:SomeProp1Cls1 ObjectSomeValuesFrom(bar:objProp1 bar:Cls1)) 48 | EquivalentClasses(bar:AllProp1Cls1 ObjectAllValuesFrom(bar:objProp1 bar:Cls1)) 49 | EquivalentClasses(bar:HasValProp1IndivB ObjectHasValue(bar:objProp1 foo:indivB)) 50 | EquivalentClasses(bar:HasSelfProp1 ObjectHasSelf(bar:objProp1)) 51 | EquivalentClasses(bar:Min2Prop1Cls1 ObjectMinCardinality(2 bar:objProp1 bar:Cls1)) 52 | EquivalentClasses(bar:Max3Prop1Cls1 ObjectMaxCardinality(3 bar:objProp1 bar:Cls1)) 53 | EquivalentClasses(bar:Exact5Prop1Cls1 ObjectExactCardinality(5 bar:objProp1 bar:Cls1)) 54 | EquivalentClasses(bar:DataSomeIntLT20 DataSomeValuesFrom(bar:dataProp2 DatatypeRestriction(xsd:integer xsd:maxExclusive "20"^^xsd:integer))) 55 | EquivalentClasses(bar:DataAllIntGT10 DataAllValuesFrom(bar:dataProp2 DatatypeRestriction(xsd:integer xsd:minInclusive "10"^^xsd:integer))) 56 | EquivalentClasses(bar:DataHasVal5 DataHasValue(bar:dataProp2 "5"^^xsd:integer)) 57 | EquivalentClasses(bar:DataMin3Prop1 DataMinCardinality(3 bar:dataProp1)) 58 | EquivalentClasses(bar:DataMax2Prop1 DataMaxCardinality(2 bar:dataProp1)) 59 | EquivalentClasses(bar:DataExact5Prop1 DataExactCardinality(5 bar:dataProp1)) 60 | SubClassOf(bar:Cls1 bar:UnionCls) 61 | DisjointClasses(bar:DataMin3Prop1 bar:DataMax2Prop1) 62 | DisjointUnion(bar:Cl1OrNegate bar:Cls1 bar:ComplementCls1) 63 | 64 | ## object property axioms -- 40 65 | EquivalentObjectProperties(bar:invObjProp1 ObjectInverseOf(bar:objProp1)) 66 | SubObjectPropertyOf(bar:subObjProp1 bar:objProp1) 67 | EquivalentObjectProperties(bar:sameAsObjProp1 bar:objProp1) 68 | DisjointObjectProperties(bar:objProp1 bar:objProp2) 69 | InverseObjectProperties(bar:invObjProp1 bar:objProp1) 70 | ObjectPropertyDomain(bar:objProp1 bar:Cls1) 71 | ObjectPropertyRange(bar:objProp1 bar:AllIndividualsCls) 72 | FunctionalObjectProperty(bar:objProp2) 73 | InverseFunctionalObjectProperty(bar:invObjProp1) 74 | ReflexiveObjectProperty(bar:objProp1) 75 | IrreflexiveObjectProperty(bar:objProp2) 76 | SymmetricObjectProperty(bar:objProp2) 77 | AsymmetricObjectProperty(bar:asymmObjProp) 78 | TransitiveObjectProperty(bar:objProp1) 79 | 80 | ## data property axioms 81 | DataPropertyRange(bar:dataProp1 xsd:string) 82 | DataPropertyDomain(bar:dataProp1 bar:Cls1) 83 | DataPropertyRange(bar:dataProp2 xsd:int) 84 | DataPropertyDomain(bar:dataProp2 bar:Cls1) 85 | SubDataPropertyOf(bar:subDataProp1 bar:dataProp1) 86 | EquivalentDataProperties(bar:sameAsDataProp1 bar:dataProp1) 87 | DisjointDataProperties(bar:dataProp1 bar:dataProp2) 88 | FunctionalDataProperty(bar:dataProp1) 89 | HasKey(bar:Cls1 () (bar:dataProp1)) 90 | 91 | ## assertions -- 63 92 | SameIndividual(foo:sameAsIndivA foo:indivA) 93 | DifferentIndividuals(foo:indivA foo:indivB) 94 | ClassAssertion(bar:Cls1 foo:indivA) 95 | ObjectPropertyAssertion(bar:objProp1 foo:indivA foo:indivB) 96 | NegativeObjectPropertyAssertion(bar:Prop2 foo:indivB foo:indivA) 97 | DataPropertyAssertion(bar:dataProp1 foo:indivA "ABCD") 98 | DataPropertyAssertion(bar:dataProp1 foo:indivB "BCDE") 99 | NegativeDataPropertyAssertion(bar:dataProp2 foo:indivA "23"^^xsd:integer) 100 | ) 101 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/ont_functional.owl: -------------------------------------------------------------------------------- 1 | Prefix(:=) 2 | Prefix(foo:=) 3 | Prefix(bar:=) 4 | Prefix(xsd:=) 5 | 6 | Ontology( 7 | 8 | #Import() 9 | # annotations 10 | Annotation(foo:hasName "Name") 11 | Annotation(bar:hasTitle "Title") 12 | Annotation(:description "A longer 13 | description running over 14 | several lines") 15 | 16 | ## declarations 17 | # class declarations 18 | Declaration(Annotation(foo:ann "some annotation") Class(bar:Cls1)) 19 | Declaration(Class(bar:Cls2)) 20 | # datatype declarations 21 | Declaration(Datatype(bar:dtype1)) 22 | Declaration(Datatype(bar:dtype2)) 23 | # object property declarations 24 | Declaration(ObjectProperty(bar:objProp1)) 25 | Declaration(ObjectProperty(bar:objProp2)) 26 | # data property declarations 27 | Declaration(DataProperty(bar:dataProp1)) 28 | Declaration(DataProperty(bar:dataProp2)) 29 | # annotation property declarations 30 | Declaration(AnnotationProperty(bar:annProp1)) 31 | Declaration(AnnotationProperty(bar:annProp2)) 32 | # named individual declarations 33 | Declaration(NamedIndividual(foo:indivA)) 34 | Declaration(NamedIndividual(foo:indivB)) 35 | 36 | ## annotation axioms 37 | AnnotationAssertion(bar:label bar:Cls1 "Class 1") 38 | SubAnnotationPropertyOf(bar:annProp1 bar:annProp2) 39 | AnnotationPropertyDomain(bar:annProp1 bar:Cls1) 40 | AnnotationPropertyRange(bar:annProp1 bar:Cls2) 41 | 42 | ## class expressions -- 20 43 | EquivalentClasses(bar:IntersectionCls ObjectIntersectionOf(bar:Cls1 bar:Cls2)) 44 | EquivalentClasses(bar:UnionCls ObjectUnionOf(bar:Cls1 bar:Cls2)) 45 | EquivalentClasses(bar:ComplementCls ObjectComplementOf(bar:Cls1)) 46 | EquivalentClasses(bar:AllIndividualsCls ObjectOneOf(foo:indivA foo:indivB)) 47 | EquivalentClasses(bar:SomeProp1Cls1 ObjectSomeValuesFrom(bar:objProp1 bar:Cls1)) 48 | EquivalentClasses(bar:AllProp1Cls1 ObjectAllValuesFrom(bar:objProp1 bar:Cls1)) 49 | EquivalentClasses(bar:HasValProp1IndivB ObjectHasValue(bar:objProp1 foo:indivB)) 50 | EquivalentClasses(bar:HasSelfProp1 ObjectHasSelf(bar:objProp1)) 51 | EquivalentClasses(bar:Min2Prop1Cls1 ObjectMinCardinality(2 bar:objProp1 bar:Cls1)) 52 | EquivalentClasses(bar:Max3Prop1Cls1 ObjectMaxCardinality(3 bar:objProp1 bar:Cls1)) 53 | EquivalentClasses(bar:Exact5Prop1Cls1 ObjectExactCardinality(5 bar:objProp1 bar:Cls1)) 54 | EquivalentClasses(bar:DataSomeIntLT20 DataSomeValuesFrom(bar:dataProp2 DatatypeRestriction(xsd:integer xsd:maxExclusive "20"^^xsd:integer))) 55 | EquivalentClasses(bar:DataAllIntGT10 DataAllValuesFrom(bar:dataProp2 DatatypeRestriction(xsd:integer xsd:minInclusive "10"^^xsd:integer))) 56 | EquivalentClasses(bar:DataHasVal5 DataHasValue(bar:dataProp2 "5"^^xsd:integer)) 57 | EquivalentClasses(bar:DataMin3Prop1 DataMinCardinality(3 bar:dataProp1)) 58 | EquivalentClasses(bar:DataMax2Prop1 DataMaxCardinality(2 bar:dataProp1)) 59 | EquivalentClasses(bar:DataExact5Prop1 DataExactCardinality(5 bar:dataProp1)) 60 | SubClassOf(bar:Cls1 bar:UnionCls) 61 | DisjointClasses(bar:DataMin3Prop1 bar:DataMax2Prop1) 62 | DisjointUnion(bar:Cl1OrNegate bar:Cls1 bar:ComplementCls1) 63 | 64 | ## object property axioms -- 40 65 | EquivalentObjectProperties(bar:invObjProp1 ObjectInverseOf(bar:objProp1)) 66 | SubObjectPropertyOf(bar:subObjProp1 bar:objProp1) 67 | EquivalentObjectProperties(bar:sameAsObjProp1 bar:objProp1) 68 | DisjointObjectProperties(bar:objProp1 bar:objProp2) 69 | InverseObjectProperties(bar:invObjProp1 bar:objProp1) 70 | ObjectPropertyDomain(bar:objProp1 bar:Cls1) 71 | ObjectPropertyRange(bar:objProp1 bar:AllIndividualsCls) 72 | FunctionalObjectProperty(bar:objProp2) 73 | InverseFunctionalObjectProperty(bar:invObjProp1) 74 | ReflexiveObjectProperty(bar:objProp1) 75 | IrreflexiveObjectProperty(bar:objProp2) 76 | SymmetricObjectProperty(bar:objProp2) 77 | AsymmetricObjectProperty(bar:asymmObjProp) 78 | TransitiveObjectProperty(bar:objProp1) 79 | 80 | ## data property axioms 81 | DataPropertyRange(bar:dataProp1 xsd:string) 82 | DataPropertyDomain(bar:dataProp1 bar:Cls1) 83 | DataPropertyRange(bar:dataProp2 xsd:int) 84 | DataPropertyDomain(bar:dataProp2 bar:Cls1) 85 | SubDataPropertyOf(bar:subDataProp1 bar:dataProp1) 86 | EquivalentDataProperties(bar:sameAsDataProp1 bar:dataProp1) 87 | DisjointDataProperties(bar:dataProp1 bar:dataProp2) 88 | FunctionalDataProperty(bar:dataProp1) 89 | HasKey(bar:Cls1 () (bar:dataProp1)) 90 | 91 | ## assertions -- 63 92 | SameIndividual(foo:sameAsIndivA foo:indivA) 93 | DifferentIndividuals(foo:indivA foo:indivB) 94 | ClassAssertion(bar:Cls1 foo:indivA) 95 | ObjectPropertyAssertion(bar:objProp1 foo:indivA foo:indivB) 96 | NegativeObjectPropertyAssertion(bar:Prop2 foo:indivB foo:indivA) 97 | DataPropertyAssertion(bar:dataProp1 foo:indivA "ABCD") 98 | DataPropertyAssertion(bar:dataProp1 foo:indivB "BCDE") 99 | NegativeDataPropertyAssertion(bar:dataProp2 foo:indivA "23"^^xsd:integer) 100 | ) 101 | -------------------------------------------------------------------------------- /sansa-examples-flink/config/csswrapper/frontend/default.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Hadoop block 3 | # 4 | 5 | server { 6 | listen 80; 7 | server_name hdfs.demo.sansa-stack.local 8 | 9 | root /data; 10 | gzip on; 11 | 12 | location / { 13 | proxy_pass http://127.0.0.1:8000; 14 | proxy_set_header Accept-Encoding ""; 15 | } 16 | 17 | location /bde-css/ { 18 | } 19 | } 20 | 21 | server { 22 | listen 127.0.0.1:8000; 23 | location / { 24 | proxy_pass http://127.0.0.1:8001; 25 | sub_filter '' ' 26 | '; 27 | sub_filter_once on; 28 | proxy_set_header Accept-Encoding ""; 29 | } 30 | } 31 | 32 | server { 33 | listen 127.0.0.1:8001; 34 | gunzip on; 35 | location / { 36 | proxy_pass http://namenode:50070; 37 | proxy_set_header Accept-Encoding gzip; 38 | } 39 | } 40 | 41 | 42 | # 43 | # HDFS Hue 44 | # 45 | 46 | server { 47 | listen 80; 48 | server_name hue.demo.sansa-stack.local 49 | root /data; 50 | gzip on; 51 | 52 | location / { 53 | proxy_pass http://127.0.0.1:8006; 54 | proxy_set_header Accept-Encoding ""; 55 | proxy_hide_header X-Frame-Options; 56 | } 57 | 58 | location /bde-css/ { 59 | } 60 | } 61 | 62 | server { 63 | listen 127.0.0.1:8006; 64 | location / { 65 | proxy_pass http://127.0.0.1:8007; 66 | # sub_filter '' ' 67 | # '; 68 | # sub_filter_once on; 69 | sub_filter 'if (self == top){' 'if (true){'; 70 | sub_filter_once on; 71 | proxy_set_header Accept-Encoding ""; 72 | } 73 | } 74 | 75 | server { 76 | listen 127.0.0.1:8007; 77 | gunzip on; 78 | location / { 79 | proxy_pass http://filebrowser:8088; 80 | proxy_set_header Accept-Encoding gzip; 81 | } 82 | } 83 | 84 | # 85 | # Flink Master 86 | # 87 | 88 | server { 89 | listen 80; 90 | server_name flink-master.demo.sansa-stack.local 91 | 92 | root /data; 93 | gzip on; 94 | 95 | location / { 96 | proxy_pass http://127.0.0.1:8008; 97 | proxy_set_header Accept-Encoding ""; 98 | } 99 | 100 | location /bde-css/ { 101 | } 102 | } 103 | 104 | server { 105 | listen 127.0.0.1:8008; 106 | location / { 107 | proxy_pass http://127.0.0.1:8009; 108 | sub_filter '' ' 109 | '; 110 | sub_filter_once on; 111 | proxy_set_header Accept-Encoding ""; 112 | } 113 | } 114 | 115 | server { 116 | listen 127.0.0.1:8009; 117 | gunzip on; 118 | location / { 119 | proxy_pass http://flink-master:8081; 120 | proxy_set_header Accept-Encoding gzip; 121 | } 122 | } 123 | 124 | # 125 | # Flink Worker 126 | # 127 | 128 | server { 129 | listen 80; 130 | server_name flink-worker.demo.sansa-stack.local 131 | 132 | root /data; 133 | gzip on; 134 | 135 | location / { 136 | proxy_pass http://127.0.0.1:8010; 137 | proxy_set_header Accept-Encoding ""; 138 | } 139 | 140 | location /bde-css/ { 141 | } 142 | } 143 | 144 | server { 145 | listen 127.0.0.1:8010; 146 | location / { 147 | proxy_pass http://127.0.0.1:8011; 148 | # sub_filter '' ' 149 | # '; 150 | # sub_filter_once on; 151 | proxy_set_header Accept-Encoding ""; 152 | } 153 | } 154 | 155 | server { 156 | listen 127.0.0.1:8011; 157 | gunzip on; 158 | location / { 159 | proxy_pass http://flink-worker:8080; 160 | proxy_set_header Accept-Encoding gzip; 161 | } 162 | } 163 | 164 | # 165 | # Integrator 166 | # 167 | 168 | server { 169 | listen 80; 170 | server_name demo.sansa-stack.local 171 | 172 | root /data; 173 | gzip on; 174 | 175 | location / { 176 | proxy_pass http://127.0.0.1:8012; 177 | proxy_set_header Accept-Encoding ""; 178 | } 179 | 180 | location /bde-css/ { 181 | } 182 | } 183 | 184 | server { 185 | listen 127.0.0.1:8012; 186 | location / { 187 | proxy_pass http://127.0.0.1:8013; 188 | # sub_filter '' ' 189 | # '; 190 | # sub_filter_once on; 191 | proxy_set_header Accept-Encoding ""; 192 | } 193 | } 194 | 195 | server { 196 | listen 127.0.0.1:8013; 197 | gunzip on; 198 | location / { 199 | proxy_pass http://integratorui:80; 200 | proxy_set_header Accept-Encoding gzip; 201 | } 202 | } 203 | -------------------------------------------------------------------------------- /sansa-examples-spark/config/csswrapper/frontend/default.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Hadoop block 3 | # 4 | 5 | server { 6 | listen 80; 7 | server_name hdfs.demo.sansa-stack.local 8 | 9 | root /data; 10 | gzip on; 11 | 12 | location / { 13 | proxy_pass http://127.0.0.1:8000; 14 | proxy_set_header Accept-Encoding ""; 15 | } 16 | 17 | location /bde-css/ { 18 | } 19 | } 20 | 21 | server { 22 | listen 127.0.0.1:8000; 23 | location / { 24 | proxy_pass http://127.0.0.1:8001; 25 | sub_filter '' ' 26 | '; 27 | sub_filter_once on; 28 | proxy_set_header Accept-Encoding ""; 29 | } 30 | } 31 | 32 | server { 33 | listen 127.0.0.1:8001; 34 | gunzip on; 35 | location / { 36 | proxy_pass http://namenode:50070; 37 | proxy_set_header Accept-Encoding gzip; 38 | } 39 | } 40 | 41 | 42 | # 43 | # HDFS Hue 44 | # 45 | 46 | server { 47 | listen 80; 48 | server_name hue.demo.sansa-stack.local 49 | root /data; 50 | gzip on; 51 | 52 | location / { 53 | proxy_pass http://127.0.0.1:8006; 54 | proxy_set_header Accept-Encoding ""; 55 | proxy_hide_header X-Frame-Options; 56 | } 57 | 58 | location /bde-css/ { 59 | } 60 | } 61 | 62 | server { 63 | listen 127.0.0.1:8006; 64 | location / { 65 | proxy_pass http://127.0.0.1:8007; 66 | # sub_filter '' ' 67 | # '; 68 | # sub_filter_once on; 69 | sub_filter 'if (self == top){' 'if (true){'; 70 | sub_filter_once on; 71 | proxy_set_header Accept-Encoding ""; 72 | } 73 | } 74 | 75 | server { 76 | listen 127.0.0.1:8007; 77 | gunzip on; 78 | location / { 79 | proxy_pass http://filebrowser:8088; 80 | proxy_set_header Accept-Encoding gzip; 81 | } 82 | } 83 | 84 | # 85 | # Spark Master 86 | # 87 | 88 | server { 89 | listen 80; 90 | server_name spark-master.demo.sansa-stack.local 91 | 92 | root /data; 93 | gzip on; 94 | 95 | location / { 96 | proxy_pass http://127.0.0.1:8008; 97 | proxy_set_header Accept-Encoding ""; 98 | } 99 | 100 | location /bde-css/ { 101 | } 102 | } 103 | 104 | server { 105 | listen 127.0.0.1:8008; 106 | location / { 107 | proxy_pass http://127.0.0.1:8009; 108 | # sub_filter '' ' 109 | # '; 110 | # sub_filter_once on; 111 | proxy_set_header Accept-Encoding ""; 112 | } 113 | } 114 | 115 | server { 116 | listen 127.0.0.1:8009; 117 | gunzip on; 118 | location / { 119 | proxy_pass http://master:8080/home; 120 | proxy_set_header Accept-Encoding gzip; 121 | } 122 | } 123 | # 124 | # Spark Worker 125 | # 126 | 127 | server { 128 | listen 80; 129 | server_name spark-worker.demo.sansa-stack.local 130 | 131 | root /data; 132 | gzip on; 133 | 134 | location / { 135 | proxy_pass http://127.0.0.1:8010; 136 | proxy_set_header Accept-Encoding ""; 137 | } 138 | 139 | location /bde-css/ { 140 | } 141 | } 142 | 143 | server { 144 | listen 127.0.0.1:8010; 145 | location / { 146 | proxy_pass http://127.0.0.1:8011; 147 | # sub_filter '' ' 148 | # '; 149 | # sub_filter_once on; 150 | proxy_set_header Accept-Encoding ""; 151 | } 152 | } 153 | 154 | server { 155 | listen 127.0.0.1:8011; 156 | gunzip on; 157 | location / { 158 | proxy_pass http://worker:8081; 159 | proxy_set_header Accept-Encoding gzip; 160 | } 161 | } 162 | 163 | # 164 | # Integrator 165 | # 166 | 167 | server { 168 | listen 80; 169 | server_name demo.sansa-stack.local 170 | 171 | root /data; 172 | gzip on; 173 | 174 | location / { 175 | proxy_pass http://127.0.0.1:8012; 176 | proxy_set_header Accept-Encoding ""; 177 | } 178 | 179 | location /bde-css/ { 180 | } 181 | } 182 | 183 | server { 184 | listen 127.0.0.1:8012; 185 | location / { 186 | proxy_pass http://127.0.0.1:8013; 187 | # sub_filter '' ' 188 | # '; 189 | # sub_filter_once on; 190 | proxy_set_header Accept-Encoding ""; 191 | } 192 | } 193 | 194 | server { 195 | listen 127.0.0.1:8013; 196 | gunzip on; 197 | location / { 198 | proxy_pass http://integratorui:80; 199 | proxy_set_header Accept-Encoding gzip; 200 | } 201 | } 202 | 203 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/inference/triples/RDFGraphInference.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.inference.triples 2 | 3 | import java.net.URI 4 | 5 | import scala.collection.Seq 6 | 7 | import net.sansa_stack.inference.rules.{ RDFSLevel, ReasoningProfile } 8 | import net.sansa_stack.inference.rules.ReasoningProfile._ 9 | import net.sansa_stack.inference.spark.data.loader.RDFGraphLoader 10 | import net.sansa_stack.inference.spark.data.writer.RDFGraphWriter 11 | import net.sansa_stack.inference.spark.forwardchaining.triples.{ForwardRuleReasonerOWLHorst, ForwardRuleReasonerRDFS, TransitiveReasoner} 12 | import org.apache.jena.graph.{ Node, NodeFactory } 13 | import org.apache.spark.sql.SparkSession 14 | 15 | object RDFGraphInference { 16 | 17 | def main(args: Array[String]) { 18 | parser.parse(args, Config()) match { 19 | case Some(config) => 20 | run(config.in, config.out, config.profile, config.properties, config.writeToSingleFile, config.sortedOutput, config.parallelism) 21 | case None => 22 | println(parser.usage) 23 | } 24 | } 25 | 26 | def run(input: Seq[URI], output: URI, profile: ReasoningProfile, properties: Seq[Node] = Seq(), 27 | writeToSingleFile: Boolean, sortedOutput: Boolean, parallelism: Int): Unit = { 28 | 29 | // the SPARK config 30 | val spark = SparkSession.builder 31 | .appName(s"SPARK $profile Reasoning") 32 | .master("local[*]") 33 | .config("spark.hadoop.validateOutputSpecs", "false") // override output files 34 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 35 | .config("spark.default.parallelism", parallelism) 36 | .config("spark.ui.showConsoleProgress", "false") 37 | .config("spark.sql.shuffle.partitions", parallelism) 38 | .getOrCreate() 39 | 40 | // load triples from disk 41 | val graph = RDFGraphLoader.loadFromDisk(spark, input, parallelism) 42 | println(s"|G| = ${graph.size()}") 43 | 44 | // create reasoner 45 | val reasoner = profile match { 46 | case TRANSITIVE => new TransitiveReasoner(spark.sparkContext, properties, parallelism) 47 | case RDFS => new ForwardRuleReasonerRDFS(spark.sparkContext, parallelism) 48 | case RDFS_SIMPLE => 49 | val r = new ForwardRuleReasonerRDFS(spark.sparkContext, parallelism) 50 | r.level = RDFSLevel.SIMPLE 51 | r 52 | case OWL_HORST => new ForwardRuleReasonerOWLHorst(spark.sparkContext) 53 | } 54 | 55 | // compute inferred graph 56 | val inferredGraph = reasoner.apply(graph) 57 | println(s"|G_inf| = ${inferredGraph.size()}") 58 | 59 | // write triples to disk 60 | RDFGraphWriter.writeToDisk(inferredGraph, output.toString, writeToSingleFile, sortedOutput) 61 | 62 | spark.stop() 63 | } 64 | 65 | case class Config( 66 | in: Seq[URI] = Seq(), 67 | out: URI = new URI("."), 68 | properties: Seq[Node] = Seq(), 69 | profile: ReasoningProfile = ReasoningProfile.RDFS, 70 | writeToSingleFile: Boolean = false, 71 | sortedOutput: Boolean = false, 72 | parallelism: Int = 4) 73 | 74 | // read ReasoningProfile enum 75 | implicit val profilesRead: scopt.Read[ReasoningProfile.Value] = 76 | scopt.Read.reads(ReasoningProfile forName _.toLowerCase()) 77 | 78 | // read ReasoningProfile enum 79 | implicit val nodeRead: scopt.Read[Node] = 80 | scopt.Read.reads(NodeFactory.createURI(_)) 81 | 82 | // the CLI parser 83 | val parser = new scopt.OptionParser[Config]("RDFGraphMaterializer") { 84 | 85 | head("RDFGraphMaterializer", "0.1.0") 86 | 87 | opt[Seq[URI]]('i', "input").required().valueName(",,..."). 88 | action((x, c) => c.copy(in = x)). 89 | text("path to file or directory that contains the input files (in N-Triples format)") 90 | 91 | opt[URI]('o', "out").required().valueName(""). 92 | action((x, c) => c.copy(out = x)). 93 | text("the output directory") 94 | 95 | opt[Seq[Node]]("properties").optional().valueName(",,..."). 96 | action((x, c) => { 97 | c.copy(properties = x) 98 | }). 99 | text("list of properties for which the transitive closure will be computed (used only for profile 'transitive')") 100 | 101 | opt[ReasoningProfile]('p', "profile").required().valueName("{rdfs | rdfs-simple | owl-horst | transitive}"). 102 | action((x, c) => c.copy(profile = x)). 103 | text("the reasoning profile") 104 | 105 | opt[Unit]("single-file").optional().action((_, c) => 106 | c.copy(writeToSingleFile = true)).text("write the output to a single file in the output directory") 107 | 108 | opt[Unit]("sorted").optional().action((_, c) => 109 | c.copy(sortedOutput = true)).text("sorted output of the triples (per file)") 110 | 111 | opt[Int]("parallelism").optional().action((x, c) => 112 | c.copy(parallelism = x)).text("the degree of parallelism, i.e. the number of Spark partitions used in the Spark operations") 113 | 114 | help("help").text("prints this usage text") 115 | 116 | checkConfig(c => 117 | if (c.profile == TRANSITIVE && c.properties.isEmpty) failure("Option --properties must not be empty if profile 'transitive' is set") 118 | else success) 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/outliers/anomalydetection/AnomalyDetection.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.ml.outliers.anomalydetection 2 | 3 | import scala.collection.mutable 4 | 5 | import net.sansa_stack.ml.spark.outliers.anomalydetection._ 6 | import net.sansa_stack.rdf.spark.io._ 7 | import org.apache.jena.riot.Lang 8 | import org.apache.spark.rdd.RDD 9 | import org.apache.spark.sql.{ Dataset, Row, SaveMode, SparkSession } 10 | import org.apache.spark.storage.StorageLevel 11 | 12 | object AnomalyDetection { 13 | def main(args: Array[String]) { 14 | parser.parse(args, Config()) match { 15 | case Some(config) => 16 | run(config.in, config.threshold, config.anomalyListLimit, config.numofpartition, config.out) 17 | case None => 18 | println(parser.usage) 19 | } 20 | } 21 | 22 | def run( 23 | input: String, 24 | JSimThreshold: Double, 25 | anomalyListLimit: Int, 26 | numofpartition: Int, 27 | output: String): Unit = { 28 | 29 | println("==================================================") 30 | println("| Distributed Anomaly Detection |") 31 | println("==================================================") 32 | 33 | val spark = SparkSession.builder 34 | .appName(s"Anomaly Detection example ( $input )") 35 | .master("local[*]") 36 | .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 37 | .getOrCreate() 38 | 39 | // N-Triples Reader 40 | val lang = Lang.NTRIPLES 41 | val triplesRDD = spark.rdf(lang)(input).repartition(numofpartition).persist() 42 | 43 | // predicated that are not interesting for evaluation 44 | val wikiList = List("wikiPageRevisionID,wikiPageID") 45 | 46 | // filtering numeric literal having xsd type double,integer,nonNegativeInteger and squareKilometre 47 | val objList = List( 48 | "http://www.w3.org/2001/XMLSchema#double", 49 | "http://www.w3.org/2001/XMLSchema#integer", 50 | "http://www.w3.org/2001/XMLSchema#nonNegativeInteger", 51 | "http://dbpedia.org/datatype/squareKilometre") 52 | 53 | // helful for considering only Dbpedia type as their will be yago type,wikidata type also 54 | val triplesType = List("http://dbpedia.org/ontology") 55 | 56 | // some of the supertype which are present for most of the subject 57 | val listSuperType = List( 58 | "http://dbpedia.org/ontology/Activity", "http://dbpedia.org/ontology/Organisation", 59 | "http://dbpedia.org/ontology/Agent", "http://dbpedia.org/ontology/SportsLeague", 60 | "http://dbpedia.org/ontology/Person", "http://dbpedia.org/ontology/Athlete", 61 | "http://dbpedia.org/ontology/Event", "http://dbpedia.org/ontology/Place", 62 | "http://dbpedia.org/ontology/PopulatedPlace", "http://dbpedia.org/ontology/Region", 63 | "http://dbpedia.org/ontology/Species", "http://dbpedia.org/ontology/Eukaryote", 64 | "http://dbpedia.org/ontology/Location") 65 | 66 | // hypernym URI 67 | val hypernym = "http://purl.org/linguistics/gold/hypernym" 68 | 69 | var clusterOfSubject: RDD[(Set[(String, String, Object)])] = null 70 | println("AnomalyDetection-using ApproxSimilarityJoin function with the help of HashingTF ") 71 | 72 | val outDetection = new AnomalyWithHashingTF(triplesRDD, objList, triplesType, JSimThreshold, listSuperType, spark, hypernym, numofpartition) 73 | clusterOfSubject = outDetection.run() 74 | 75 | val setData = clusterOfSubject.repartition(1000).persist(StorageLevel.MEMORY_AND_DISK) 76 | val setDataStore = setData.map(f => f.toSeq) 77 | 78 | val setDataSize = setDataStore.filter(f => f.size > anomalyListLimit) 79 | 80 | val test = setDataSize.map(f => outDetection.iqr2(f, anomalyListLimit)) 81 | 82 | val testfilter = test.filter(f => f.size > 0) // .distinct() 83 | val testfilterDistinct = testfilter.flatMap(f => f) 84 | testfilterDistinct.saveAsTextFile(output) 85 | setData.unpersist() 86 | 87 | spark.stop() 88 | } 89 | 90 | case class Config( 91 | in: String = "", 92 | threshold: Double = 0.0, 93 | anomalyListLimit: Int = 0, 94 | numofpartition: Int = 0, 95 | out: String = "") 96 | 97 | val parser = new scopt.OptionParser[Config]("SANSA -Outlier Detection") { 98 | 99 | head("Detecting Numerical Outliers in dataset") 100 | 101 | opt[String]('i', "input").required().valueName(""). 102 | action((x, c) => c.copy(in = x)). 103 | text("path to file that contains RDF data (in N-Triples format)") 104 | 105 | // Jaccard similarity threshold value 106 | opt[Double]('t', "threshold").required(). 107 | action((x, c) => c.copy(threshold = x)). 108 | text("the Jaccard Similarity value") 109 | 110 | // number of partition 111 | opt[Int]('a', "numofpartition").required(). 112 | action((x, c) => c.copy(numofpartition = x)). 113 | text("Number of partition") 114 | 115 | // List limit for calculating IQR 116 | opt[Int]('c', "anomalyListLimit").required(). 117 | action((x, c) => c.copy(anomalyListLimit = x)). 118 | text("the outlier List Limit") 119 | 120 | // output file path 121 | opt[String]('o', "output").required().valueName(""). 122 | action((x, c) => c.copy(out = x)). 123 | text("the output directory") 124 | 125 | help("help").text("prints this usage text") 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | sansa-examples-parent_2.11 6 | 7 | 8 | net.sansa-stack 9 | sansa-parent 10 | 0.7.1 11 | 12 | 13 | pom 14 | SANSA-Examples - Parent 15 | SANSA examples 16 | 17 | 18 | Smart Data Analytics (SDA) Research Group 19 | http://sda.tech/ 20 | 21 | 22 | 2017 23 | 24 | http://sansa-stack.net/ 25 | 26 | 27 | 28 | GNU GENERAL PUBLIC LICENSE, Version 3 29 | http://www.gnu.org/licenses/gpl-3.0.txt 30 | repo 31 | 32 | 33 | 34 | 35 | 36 | jlehmann 37 | Jens Lehmann 38 | jens.lehmann@cs.uni-bonn.de 39 | Department of Computer Science, University of Bonn 40 | http://www.informatik.uni-bonn.de/ 41 | 42 | principle maintainer 43 | 44 | 0 45 | 46 | 47 | 48 | 49 | GitHub 50 | https://github.com/SANSA-Stack/SANSA-Examples 51 | 52 | 53 | 54 | 55 | sansa-examples-flink 56 | sansa-examples-spark 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | ${project.groupId} 65 | sansa-rdf-spark_${scala.binary.version} 66 | ${project.version} 67 | 68 | 69 | 70 | ${project.groupId} 71 | sansa-rdf-flink_${scala.binary.version} 72 | ${project.version} 73 | 74 | 75 | 76 | ${project.groupId} 77 | sansa-owl-spark_${scala.binary.version} 78 | ${project.version} 79 | 80 | 81 | ${project.groupId} 82 | sansa-owl-flink_${scala.binary.version} 83 | ${project.version} 84 | 85 | 86 | 87 | 88 | ${project.groupId} 89 | sansa-query-spark_${scala.binary.version} 90 | 91 | ${project.version} 92 | 93 | 94 | 95 | 96 | ${project.groupId} 97 | sansa-inference-spark_${scala.binary.version} 98 | ${sansa.version} 99 | 100 | 101 | ${project.groupId} 102 | sansa-inference-flink_${scala.binary.version} 103 | ${sansa.version} 104 | 105 | 106 | 107 | 108 | ${project.groupId} 109 | sansa-ml-spark_${scala.binary.version} 110 | ${project.version} 111 | 112 | 113 | ${project.groupId} 114 | sansa-ml-flink_${scala.binary.version} 115 | ${project.version} 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | root-dir 125 | 126 | 127 | ${project.basedir}/../../scalastyle-config.xml 128 | 129 | 130 | 131 | ${project.basedir}/../scalastyle-config.xml 132 | 133 | 134 | 135 | 136 | 137 | 138 | oss-sonatype 139 | oss-sonatype 140 | https://oss.sonatype.org/content/repositories/snapshots/ 141 | 142 | true 143 | 144 | 145 | 146 | apache-snapshot 147 | Apache repository (snapshots) 148 | https://repository.apache.org/content/repositories/snapshots/ 149 | 150 | true 151 | 152 | 153 | 154 | maven.aksw.internal 155 | AKSW Release Repository 156 | http://maven.aksw.org/archiva/repository/internal 157 | 158 | true 159 | 160 | 161 | false 162 | 163 | 164 | 165 | maven.aksw.snapshots 166 | AKSW Snapshot Repository 167 | http://maven.aksw.org/archiva/repository/snapshots 168 | 169 | false 170 | 171 | 172 | true 173 | 174 | 175 | 176 | 177 | 178 | -------------------------------------------------------------------------------- /sansa-examples-flink/src/main/scala/net/sansa_stack/examples/flink/inference/RDFGraphInference.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.flink.inference 2 | 3 | import java.io.{File, FileInputStream} 4 | import java.net.URI 5 | import java.util.Properties 6 | 7 | import scala.io.Source 8 | import com.typesafe.config.ConfigFactory 9 | import de.javakaffee.kryoserializers.UnmodifiableCollectionsSerializer 10 | import net.sansa_stack.inference.flink.data.{RDFGraphLoader, RDFGraphWriter} 11 | import net.sansa_stack.inference.flink.forwardchaining.{ForwardRuleReasonerOWLHorst, ForwardRuleReasonerRDFS} 12 | import net.sansa_stack.inference.rules.{RDFSLevel, ReasoningProfile} 13 | import net.sansa_stack.inference.rules.ReasoningProfile._ 14 | import org.apache.flink.api.java.utils.ParameterTool 15 | import org.apache.flink.api.scala.ExecutionEnvironment 16 | import org.apache.flink.configuration.Configuration 17 | import org.apache.flink.runtime.webmonitor.WebMonitorUtils 18 | 19 | object RDFGraphInference { 20 | 21 | def main(args: Array[String]) { 22 | parser.parse(args, Config()) match { 23 | case Some(config) => 24 | run( 25 | args, 26 | config.in, 27 | config.out, 28 | config.profile, 29 | config.writeToSingleFile, 30 | config.sortedOutput, 31 | config.propertiesFile, 32 | config.jobName) 33 | case None => 34 | println(parser.usage) 35 | } 36 | } 37 | 38 | def run( 39 | args: Array[String], 40 | input: Seq[URI], 41 | output: URI, 42 | profile: ReasoningProfile, 43 | writeToSingleFile: Boolean, 44 | sortedOutput: Boolean, 45 | propertiesFile: File, 46 | jobName: String): Unit = { 47 | 48 | // read reasoner optimization properties 49 | val reasonerConf = 50 | if (propertiesFile != null) ConfigFactory.parseFile(propertiesFile) 51 | else ConfigFactory.load("reasoner") 52 | 53 | // get params 54 | val params: ParameterTool = ParameterTool.fromArgs(args) 55 | 56 | println("======================================") 57 | println("| RDF Graph Inference |") 58 | println("======================================") 59 | 60 | val conf = new Configuration() 61 | conf.setInteger("taskmanager.network.numberOfBuffers", 3000) 62 | 63 | // set up the execution environment 64 | val env = ExecutionEnvironment.getExecutionEnvironment 65 | env.getConfig.disableSysoutLogging() 66 | 67 | // make parameters available in the web interface 68 | env.getConfig.setGlobalJobParameters(params) 69 | 70 | // load triples from disk 71 | val graph = RDFGraphLoader.loadFromDisk(input, env) 72 | println(s"|G| = ${graph.size}") 73 | 74 | // create reasoner 75 | val reasoner = profile match { 76 | case RDFS | RDFS_SIMPLE => 77 | val r = new ForwardRuleReasonerRDFS(env) 78 | r.useSchemaBroadCasting = reasonerConf.getBoolean("reasoner.rdfs.schema.broadcast") 79 | r.extractSchemaTriplesInAdvance = 80 | reasonerConf.getBoolean("reasoner.rdfs.schema.extractTriplesInAdvance") 81 | if (profile == RDFS_SIMPLE) r.level = RDFSLevel.SIMPLE 82 | r 83 | case OWL_HORST => new ForwardRuleReasonerOWLHorst(env) 84 | } 85 | 86 | // compute inferred graph 87 | val inferredGraph = reasoner.apply(graph) 88 | println(s"|G_inf| = ${inferredGraph.size}") 89 | 90 | val jn = if (jobName.isEmpty) s"RDF Graph Inference ($profile)" else jobName 91 | } 92 | 93 | // the config object 94 | case class Config( 95 | in: Seq[URI] = Seq(), 96 | out: URI = new URI("."), 97 | profile: ReasoningProfile = ReasoningProfile.RDFS, 98 | writeToSingleFile: Boolean = false, 99 | sortedOutput: Boolean = false, 100 | propertiesFile: File = null, 101 | jobName: String = "") // new File(getClass.getResource("reasoner.properties").toURI) 102 | 103 | // read ReasoningProfile enum 104 | implicit val profilesRead: scopt.Read[ReasoningProfile.Value] = 105 | scopt.Read.reads(ReasoningProfile forName _.toLowerCase()) 106 | 107 | // the CLI parser 108 | val parser = new scopt.OptionParser[Config]("RDFGraphMaterializer") { 109 | head("RDFGraphMaterializer", "0.1.0") 110 | 111 | opt[Seq[URI]]('i', "input") 112 | .required() 113 | .valueName("") 114 | .action((x, c) => c.copy(in = x)) 115 | .text("path to file or directory that contains the input files (in N-Triple format)") 116 | 117 | opt[URI]('o', "out") 118 | .required() 119 | .valueName("") 120 | .action((x, c) => c.copy(out = x)) 121 | .text("the output directory") 122 | 123 | opt[Unit]("single-file") 124 | .optional() 125 | .action((_, c) => c.copy(writeToSingleFile = true)) 126 | .text("write the output to a single file in the output directory") 127 | 128 | opt[Unit]("sorted") 129 | .optional() 130 | .action((_, c) => c.copy(sortedOutput = true)) 131 | .text("sorted output of the triples (per file)") 132 | 133 | opt[ReasoningProfile]('p', "profile") 134 | .required() 135 | .valueName("{rdfs | rdfs-simple | owl-horst}") 136 | .action((x, c) => c.copy(profile = x)) 137 | .text("the reasoning profile") 138 | 139 | opt[File]('p', "prop") 140 | .optional() 141 | .valueName("") 142 | .action((x, c) => c.copy(propertiesFile = x)) 143 | .text("the (optional) properties file which allows some more advanced options") 144 | 145 | opt[String]('j', "jobName") 146 | .optional() 147 | .valueName("") 148 | .action((x, c) => c.copy(jobName = x)) 149 | .text("the name of the Flink job that occurs also in the Web-UI") 150 | 151 | help("help").text("prints this usage text") 152 | 153 | } 154 | parser.showUsageOnError 155 | } 156 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/query/GraphQuery.scala: -------------------------------------------------------------------------------- 1 | package net.sansa_stack.examples.spark.query 2 | 3 | import scala.concurrent.duration.Duration 4 | 5 | import net.sansa_stack.query.spark.graph.jena.SparqlParser 6 | import net.sansa_stack.query.spark.graph.jena.model.{ Config => modelConfig, IntermediateResult, SparkExecutionModel } 7 | import net.sansa_stack.rdf.spark.partition.graph.algo._ 8 | import org.apache.jena.graph.Node 9 | import org.apache.jena.riot.Lang 10 | import org.apache.log4j.Logger 11 | import org.apache.spark.graphx.Graph 12 | 13 | object GraphQuery { 14 | 15 | def main(args: Array[String]): Unit = { 16 | 17 | parser.parse(args, Config()) match { 18 | case Some(config) => run(config) 19 | case None => 20 | println(parser.usage) 21 | } 22 | } 23 | 24 | def run(config: Config): Unit = { 25 | 26 | println("===========================================") 27 | println("| SANSA - Graph query example |") 28 | println("===========================================") 29 | 30 | val log = Logger.getLogger(GraphQuery.getClass) 31 | 32 | // set configures for query engine model 33 | modelConfig.setAppName("SANSA Graph Query") 34 | .setInputGraphFile(config.input) 35 | .setInputQueryFile(config.query.head) 36 | .setLang(Lang.NTRIPLES) 37 | .setMaster("local[*]") 38 | 39 | // load graph 40 | log.info("Start to load graph") 41 | 42 | SparkExecutionModel.createSparkSession() 43 | val session = SparkExecutionModel.getSession 44 | 45 | // apply graph partitioning algorithm 46 | val prevG = SparkExecutionModel.getGraph 47 | var g: Graph[Node, Node] = null 48 | var msg: String = null 49 | var numParts: Int = 0 50 | var numIters: Int = 0 51 | 52 | // Set number of partitions (if config.numParts is 0, number of partitions equals to that of previous graph) 53 | config.numParts match { 54 | case 0 => numParts = prevG.edges.partitions.length 55 | case other => numParts = other 56 | } 57 | 58 | config.numIters match { 59 | case 0 => 60 | case other => numIters = other 61 | } 62 | 63 | var partAlgo: PartitionAlgo[Node, Node] = null 64 | 65 | config.algo match { 66 | case "SSHP" => 67 | if (numIters == 0) { 68 | // Partition algorithm will use default number of iterations 69 | partAlgo = new SubjectHashPartition[Node, Node](prevG, session, numParts) 70 | } else { 71 | partAlgo = new SubjectHashPartition[Node, Node](prevG, session, numParts).setNumIterations(numIters) 72 | } 73 | msg = "Start to execute subject semantic hash partitioning" 74 | case "OSHP" => 75 | if (numIters == 0) { 76 | partAlgo = new ObjectHashPartition[Node, Node](prevG, session, numParts) 77 | } else { 78 | partAlgo = new ObjectHashPartition[Node, Node](prevG, session, numParts).setNumIterations(numIters) 79 | } 80 | msg = "Start to execute object semantic hash partitioning" 81 | case "SOSHP" => 82 | if (numIters == 0) { 83 | partAlgo = new SOHashPartition[Node, Node](prevG, session, numParts) 84 | } else { 85 | partAlgo = new SOHashPartition[Node, Node](prevG, session, numParts).setNumIterations(numIters) 86 | } 87 | msg = "Start to execute subject-object semantic hash partitioning" 88 | case "PP" => 89 | if (numIters == 0) { 90 | partAlgo = new PathPartition[Node, Node](prevG, session, numParts) 91 | } else { 92 | partAlgo = new PathPartition[Node, Node](prevG, session, numParts).setNumIterations(numIters) 93 | } 94 | msg = "Start to execute path partitioning" 95 | case "" => 96 | case other => println(s"the input $other doesn't match any options, no algorithm will be applied.") 97 | } 98 | 99 | var start = 0L 100 | var end = 0L 101 | 102 | if (partAlgo != null) { 103 | log.info(msg) 104 | start = System.currentTimeMillis() 105 | g = partAlgo.partitionBy().cache() 106 | SparkExecutionModel.loadGraph(g) 107 | end = System.currentTimeMillis() 108 | log.info("Graph partitioning execution time: " + Duration(end - start, "millis").toMillis + " ms") 109 | } 110 | 111 | // query executing 112 | log.info("Start to execute queries") 113 | 114 | config.query.foreach { path => 115 | log.info("Query file: " + path) 116 | modelConfig.setInputQueryFile(path) 117 | val sp = new SparqlParser(modelConfig.getInputQueryFile) 118 | sp.getOps.foreach { ops => 119 | val tag = ops.getTag 120 | log.info("Operation " + tag + " start") 121 | start = System.currentTimeMillis() 122 | ops.execute() 123 | end = System.currentTimeMillis() 124 | log.info(tag + " execution time: " + Duration(end - start, "millis").toMillis + " ms") 125 | } 126 | } 127 | 128 | // print results to console 129 | if (config.print) { 130 | log.info("print final result") 131 | val results = IntermediateResult.getFinalResult.cache() 132 | if (results.count() >= 10) { 133 | log.info("Too long results(more than 10)") 134 | } else { 135 | results.collect().foreach(println(_)) 136 | } 137 | results.unpersist() 138 | } 139 | } 140 | 141 | case class Config(input: String = "", query: Seq[String] = null, print: Boolean = false, algo: String = "", 142 | numParts: Int = 0, numIters: Int = 0) 143 | 144 | val parser: scopt.OptionParser[Config] = new scopt.OptionParser[Config]("Spark-Graph-Example") { 145 | 146 | head("SANSA-Query-Graph-Example") 147 | 148 | opt[String]('i', "input").required().valueName(""). 149 | action((x, c) => c.copy(input = x)). 150 | text("path to file that contains the data (in N-Triples format).") 151 | 152 | opt[Seq[String]]('q', "query").required().valueName(", ..."). 153 | action((x, c) => c.copy(query = x)). 154 | text("files that contain SPARQL queries.") 155 | 156 | opt[Boolean]('p', "print").optional().valueName("Boolean"). 157 | action((_, c) => c.copy(print = true)). 158 | text("print the result to the console(maximum 10 rows), default: false.") 159 | 160 | opt[String]('a', "algorithm").optional().valueName(""). 161 | action((x, c) => c.copy(algo = x)). 162 | text("choose one graph partitioning algorithm, default: no algorithm applied.") 163 | 164 | opt[Int]('n', "number of partitions").optional().valueName("") 165 | .action((x, c) => c.copy(numParts = x)) 166 | .text("set the number of partitions.") 167 | 168 | opt[Int]('t', "number of iterations").optional().valueName("") 169 | .action((x, c) => c.copy(numIters = x)) 170 | .text("set the number of iterations.") 171 | 172 | help("help").text("prints this usage text") 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /sansa-examples-flink/src/main/resources/ont_manchester.owl: -------------------------------------------------------------------------------- 1 | Prefix: : 2 | Prefix: bar: 3 | Prefix: foo: 4 | Prefix: owl: 5 | Prefix: rdf: 6 | Prefix: rdfs: 7 | Prefix: xml: 8 | Prefix: xsd: 9 | 10 | 11 | 12 | Ontology: 13 | 14 | 15 | Annotations: 16 | bar:hasTitle "Title", 17 | description "A longer 18 | description running over 19 | several lines", 20 | foo:hasName "Name" 21 | 22 | AnnotationProperty: bar:annProp1 23 | 24 | SubPropertyOf: 25 | bar:annProp2 26 | 27 | Domain: 28 | 29 | 30 | Range: 31 | 32 | 33 | 34 | AnnotationProperty: bar:annProp2 35 | 36 | 37 | AnnotationProperty: bar:hasTitle 38 | 39 | 40 | AnnotationProperty: bar:label 41 | 42 | 43 | AnnotationProperty: description 44 | 45 | 46 | AnnotationProperty: foo:ann 47 | 48 | 49 | AnnotationProperty: foo:hasName 50 | 51 | 52 | Datatype: bar:dtype1 53 | 54 | 55 | Datatype: bar:dtype2 56 | 57 | 58 | Datatype: rdf:PlainLiteral 59 | 60 | 61 | Datatype: rdfs:Literal 62 | 63 | 64 | Datatype: xsd:int 65 | 66 | 67 | Datatype: xsd:integer 68 | 69 | 70 | Datatype: xsd:string 71 | 72 | 73 | ObjectProperty: bar:Prop2 74 | 75 | 76 | ObjectProperty: bar:asymmObjProp 77 | 78 | Characteristics: 79 | Asymmetric 80 | 81 | 82 | ObjectProperty: bar:invObjProp1 83 | 84 | EquivalentTo: 85 | inverse (bar:objProp1) 86 | 87 | Characteristics: 88 | InverseFunctional 89 | 90 | InverseOf: 91 | bar:objProp1 92 | 93 | 94 | ObjectProperty: bar:objProp1 95 | 96 | EquivalentTo: 97 | bar:sameAsObjProp1 98 | 99 | DisjointWith: 100 | bar:objProp2 101 | 102 | Characteristics: 103 | Transitive, 104 | Reflexive 105 | 106 | Domain: 107 | bar:Cls1 108 | 109 | Range: 110 | bar:AllIndividualsCls 111 | 112 | InverseOf: 113 | bar:invObjProp1 114 | 115 | 116 | ObjectProperty: bar:objProp2 117 | 118 | DisjointWith: 119 | bar:objProp1 120 | 121 | Characteristics: 122 | Functional, 123 | Symmetric, 124 | Irreflexive 125 | 126 | 127 | ObjectProperty: bar:sameAsObjProp1 128 | 129 | EquivalentTo: 130 | bar:objProp1 131 | 132 | 133 | ObjectProperty: bar:subObjProp1 134 | 135 | SubPropertyOf: 136 | bar:objProp1 137 | 138 | 139 | DataProperty: bar:dataProp1 140 | 141 | Characteristics: 142 | Functional 143 | 144 | Domain: 145 | bar:Cls1 146 | 147 | Range: 148 | xsd:string 149 | 150 | EquivalentTo: 151 | bar:sameAsDataProp1 152 | 153 | DisjointWith: 154 | bar:dataProp2 155 | 156 | 157 | DataProperty: bar:dataProp2 158 | 159 | Domain: 160 | bar:Cls1 161 | 162 | Range: 163 | xsd:int 164 | 165 | DisjointWith: 166 | bar:dataProp1 167 | 168 | 169 | DataProperty: bar:sameAsDataProp1 170 | 171 | EquivalentTo: 172 | bar:dataProp1 173 | 174 | 175 | DataProperty: bar:subDataProp1 176 | 177 | SubPropertyOf: 178 | bar:dataProp1 179 | 180 | 181 | Class: bar:AllIndividualsCls 182 | 183 | EquivalentTo: 184 | {foo:indivA , foo:indivB} 185 | 186 | 187 | Class: bar:AllProp1Cls1 188 | 189 | EquivalentTo: 190 | bar:objProp1 only bar:Cls1 191 | 192 | 193 | Class: bar:Cl1OrNegate 194 | 195 | DisjointUnionOf: 196 | bar:Cls1, bar:ComplementCls1 197 | 198 | 199 | Class: bar:Cls1 200 | 201 | Annotations: 202 | bar:label "Class 1" 203 | 204 | SubClassOf: 205 | bar:UnionCls 206 | 207 | HasKey: 208 | bar:dataProp1 209 | 210 | 211 | Class: bar:Cls2 212 | 213 | 214 | Class: bar:ComplementCls 215 | 216 | EquivalentTo: 217 | not (bar:Cls1) 218 | 219 | 220 | Class: bar:ComplementCls1 221 | 222 | 223 | Class: bar:DataAllIntGT10 224 | 225 | EquivalentTo: 226 | bar:dataProp2 only xsd:integer[>= 10] 227 | 228 | 229 | Class: bar:DataExact5Prop1 230 | 231 | EquivalentTo: 232 | bar:dataProp1 exactly 5 rdfs:Literal 233 | 234 | 235 | Class: bar:DataHasVal5 236 | 237 | EquivalentTo: 238 | bar:dataProp2 value 5 239 | 240 | 241 | Class: bar:DataMax2Prop1 242 | 243 | EquivalentTo: 244 | bar:dataProp1 max 2 rdfs:Literal 245 | 246 | DisjointWith: 247 | bar:DataMin3Prop1 248 | 249 | 250 | Class: bar:DataMin3Prop1 251 | 252 | EquivalentTo: 253 | bar:dataProp1 min 3 rdfs:Literal 254 | 255 | DisjointWith: 256 | bar:DataMax2Prop1 257 | 258 | 259 | Class: bar:DataSomeIntLT20 260 | 261 | EquivalentTo: 262 | bar:dataProp2 some xsd:integer[< 20] 263 | 264 | 265 | Class: bar:Exact5Prop1Cls1 266 | 267 | EquivalentTo: 268 | bar:objProp1 exactly 5 bar:Cls1 269 | 270 | 271 | Class: bar:HasSelfProp1 272 | 273 | EquivalentTo: 274 | bar:objProp1 some Self 275 | 276 | 277 | Class: bar:HasValProp1IndivB 278 | 279 | EquivalentTo: 280 | bar:objProp1 value foo:indivB 281 | 282 | 283 | Class: bar:IntersectionCls 284 | 285 | EquivalentTo: 286 | bar:Cls1 287 | and bar:Cls2 288 | 289 | 290 | Class: bar:Max3Prop1Cls1 291 | 292 | EquivalentTo: 293 | bar:objProp1 max 3 bar:Cls1 294 | 295 | 296 | Class: bar:Min2Prop1Cls1 297 | 298 | EquivalentTo: 299 | bar:objProp1 min 2 bar:Cls1 300 | 301 | 302 | Class: bar:SomeProp1Cls1 303 | 304 | EquivalentTo: 305 | bar:objProp1 some bar:Cls1 306 | 307 | 308 | Class: bar:UnionCls 309 | 310 | EquivalentTo: 311 | bar:Cls1 or bar:Cls2 312 | 313 | 314 | Individual: foo:indivA 315 | 316 | Types: 317 | bar:Cls1 318 | 319 | Facts: 320 | bar:objProp1 foo:indivB, 321 | bar:dataProp1 "ABCD", 322 | not bar:dataProp2 23 323 | 324 | SameAs: 325 | foo:sameAsIndivA 326 | 327 | DifferentFrom: 328 | foo:indivB 329 | 330 | 331 | Individual: foo:indivB 332 | 333 | Facts: 334 | not bar:Prop2 foo:indivA, 335 | bar:dataProp1 "BCDE" 336 | 337 | DifferentFrom: 338 | foo:indivA 339 | 340 | 341 | Individual: foo:sameAsIndivA 342 | 343 | SameAs: 344 | foo:indivA 345 | 346 | 347 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/ont_manchester.owl: -------------------------------------------------------------------------------- 1 | Prefix: : 2 | Prefix: bar: 3 | Prefix: foo: 4 | Prefix: owl: 5 | Prefix: rdf: 6 | Prefix: rdfs: 7 | Prefix: xml: 8 | Prefix: xsd: 9 | 10 | 11 | 12 | Ontology: 13 | 14 | 15 | Annotations: 16 | bar:hasTitle "Title", 17 | description "A longer 18 | description running over 19 | several lines", 20 | foo:hasName "Name" 21 | 22 | AnnotationProperty: bar:annProp1 23 | 24 | SubPropertyOf: 25 | bar:annProp2 26 | 27 | Domain: 28 | 29 | 30 | Range: 31 | 32 | 33 | 34 | AnnotationProperty: bar:annProp2 35 | 36 | 37 | AnnotationProperty: bar:hasTitle 38 | 39 | 40 | AnnotationProperty: bar:label 41 | 42 | 43 | AnnotationProperty: description 44 | 45 | 46 | AnnotationProperty: foo:ann 47 | 48 | 49 | AnnotationProperty: foo:hasName 50 | 51 | 52 | Datatype: bar:dtype1 53 | 54 | 55 | Datatype: bar:dtype2 56 | 57 | 58 | Datatype: rdf:PlainLiteral 59 | 60 | 61 | Datatype: rdfs:Literal 62 | 63 | 64 | Datatype: xsd:int 65 | 66 | 67 | Datatype: xsd:integer 68 | 69 | 70 | Datatype: xsd:string 71 | 72 | 73 | ObjectProperty: bar:Prop2 74 | 75 | 76 | ObjectProperty: bar:asymmObjProp 77 | 78 | Characteristics: 79 | Asymmetric 80 | 81 | 82 | ObjectProperty: bar:invObjProp1 83 | 84 | EquivalentTo: 85 | inverse (bar:objProp1) 86 | 87 | Characteristics: 88 | InverseFunctional 89 | 90 | InverseOf: 91 | bar:objProp1 92 | 93 | 94 | ObjectProperty: bar:objProp1 95 | 96 | EquivalentTo: 97 | bar:sameAsObjProp1 98 | 99 | DisjointWith: 100 | bar:objProp2 101 | 102 | Characteristics: 103 | Transitive, 104 | Reflexive 105 | 106 | Domain: 107 | bar:Cls1 108 | 109 | Range: 110 | bar:AllIndividualsCls 111 | 112 | InverseOf: 113 | bar:invObjProp1 114 | 115 | 116 | ObjectProperty: bar:objProp2 117 | 118 | DisjointWith: 119 | bar:objProp1 120 | 121 | Characteristics: 122 | Functional, 123 | Symmetric, 124 | Irreflexive 125 | 126 | 127 | ObjectProperty: bar:sameAsObjProp1 128 | 129 | EquivalentTo: 130 | bar:objProp1 131 | 132 | 133 | ObjectProperty: bar:subObjProp1 134 | 135 | SubPropertyOf: 136 | bar:objProp1 137 | 138 | 139 | DataProperty: bar:dataProp1 140 | 141 | Characteristics: 142 | Functional 143 | 144 | Domain: 145 | bar:Cls1 146 | 147 | Range: 148 | xsd:string 149 | 150 | EquivalentTo: 151 | bar:sameAsDataProp1 152 | 153 | DisjointWith: 154 | bar:dataProp2 155 | 156 | 157 | DataProperty: bar:dataProp2 158 | 159 | Domain: 160 | bar:Cls1 161 | 162 | Range: 163 | xsd:int 164 | 165 | DisjointWith: 166 | bar:dataProp1 167 | 168 | 169 | DataProperty: bar:sameAsDataProp1 170 | 171 | EquivalentTo: 172 | bar:dataProp1 173 | 174 | 175 | DataProperty: bar:subDataProp1 176 | 177 | SubPropertyOf: 178 | bar:dataProp1 179 | 180 | 181 | Class: bar:AllIndividualsCls 182 | 183 | EquivalentTo: 184 | {foo:indivA , foo:indivB} 185 | 186 | 187 | Class: bar:AllProp1Cls1 188 | 189 | EquivalentTo: 190 | bar:objProp1 only bar:Cls1 191 | 192 | 193 | Class: bar:Cl1OrNegate 194 | 195 | DisjointUnionOf: 196 | bar:Cls1, bar:ComplementCls1 197 | 198 | 199 | Class: bar:Cls1 200 | 201 | Annotations: 202 | bar:label "Class 1" 203 | 204 | SubClassOf: 205 | bar:UnionCls 206 | 207 | HasKey: 208 | bar:dataProp1 209 | 210 | 211 | Class: bar:Cls2 212 | 213 | 214 | Class: bar:ComplementCls 215 | 216 | EquivalentTo: 217 | not (bar:Cls1) 218 | 219 | 220 | Class: bar:ComplementCls1 221 | 222 | 223 | Class: bar:DataAllIntGT10 224 | 225 | EquivalentTo: 226 | bar:dataProp2 only xsd:integer[>= 10] 227 | 228 | 229 | Class: bar:DataExact5Prop1 230 | 231 | EquivalentTo: 232 | bar:dataProp1 exactly 5 rdfs:Literal 233 | 234 | 235 | Class: bar:DataHasVal5 236 | 237 | EquivalentTo: 238 | bar:dataProp2 value 5 239 | 240 | 241 | Class: bar:DataMax2Prop1 242 | 243 | EquivalentTo: 244 | bar:dataProp1 max 2 rdfs:Literal 245 | 246 | DisjointWith: 247 | bar:DataMin3Prop1 248 | 249 | 250 | Class: bar:DataMin3Prop1 251 | 252 | EquivalentTo: 253 | bar:dataProp1 min 3 rdfs:Literal 254 | 255 | DisjointWith: 256 | bar:DataMax2Prop1 257 | 258 | 259 | Class: bar:DataSomeIntLT20 260 | 261 | EquivalentTo: 262 | bar:dataProp2 some xsd:integer[< 20] 263 | 264 | 265 | Class: bar:Exact5Prop1Cls1 266 | 267 | EquivalentTo: 268 | bar:objProp1 exactly 5 bar:Cls1 269 | 270 | 271 | Class: bar:HasSelfProp1 272 | 273 | EquivalentTo: 274 | bar:objProp1 some Self 275 | 276 | 277 | Class: bar:HasValProp1IndivB 278 | 279 | EquivalentTo: 280 | bar:objProp1 value foo:indivB 281 | 282 | 283 | Class: bar:IntersectionCls 284 | 285 | EquivalentTo: 286 | bar:Cls1 287 | and bar:Cls2 288 | 289 | 290 | Class: bar:Max3Prop1Cls1 291 | 292 | EquivalentTo: 293 | bar:objProp1 max 3 bar:Cls1 294 | 295 | 296 | Class: bar:Min2Prop1Cls1 297 | 298 | EquivalentTo: 299 | bar:objProp1 min 2 bar:Cls1 300 | 301 | 302 | Class: bar:SomeProp1Cls1 303 | 304 | EquivalentTo: 305 | bar:objProp1 some bar:Cls1 306 | 307 | 308 | Class: bar:UnionCls 309 | 310 | EquivalentTo: 311 | bar:Cls1 or bar:Cls2 312 | 313 | 314 | Individual: foo:indivA 315 | 316 | Types: 317 | bar:Cls1 318 | 319 | Facts: 320 | bar:objProp1 foo:indivB, 321 | bar:dataProp1 "ABCD", 322 | not bar:dataProp2 23 323 | 324 | SameAs: 325 | foo:sameAsIndivA 326 | 327 | DifferentFrom: 328 | foo:indivB 329 | 330 | 331 | Individual: foo:indivB 332 | 333 | Facts: 334 | not bar:Prop2 foo:indivA, 335 | bar:dataProp1 "BCDE" 336 | 337 | DifferentFrom: 338 | foo:indivA 339 | 340 | 341 | Individual: foo:sameAsIndivA 342 | 343 | SameAs: 344 | foo:indivA 345 | 346 | 347 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/mappings.ttl: -------------------------------------------------------------------------------- 1 | @prefix exp: 2 | @prefix rdfs: 3 | @prefix dcterms: 4 | @prefix schema: 5 | @prefix gr: 6 | @prefix npg: 7 | @prefix foaf: # correct http://xmlns.com/foaf/0.1/ 8 | @prefix edm: 9 | @prefix rr: 10 | @prefix rml: 11 | @prefix nosql: 12 | @prefix bsbm: 13 | @prefix dc: 14 | @prefix rev: 15 | 16 | <#ProducerMapping> 17 | rml:logicalSource [ 18 | rml:source "//Producer"; 19 | nosql:store nosql:jdbc 20 | ]; 21 | rr:subjectMap [ 22 | rr:template "http://example.com/{nr}"; 23 | rr:class bsbm:Producer 24 | ]; 25 | 26 | rr:predicateObjectMap [ 27 | rr:predicate edm:country; 28 | rr:objectMap [rml:reference "country"] 29 | ]; 30 | 31 | rr:predicateObjectMap [ 32 | rr:predicate rdfs:label; 33 | rr:objectMap [rml:reference "label"] 34 | ]; 35 | 36 | rr:predicateObjectMap [ 37 | rr:predicate bsbm:publisher; 38 | rr:objectMap [rml:reference "publisher"] 39 | ]; 40 | 41 | rr:predicateObjectMap [ 42 | rr:predicate foaf:homepage; 43 | rr:objectMap [rml:reference "homepage"] 44 | ]; 45 | 46 | rr:predicateObjectMap [ 47 | rr:predicate exp:publishDate; 48 | rr:objectMap [rml:reference "publishDate"] 49 | ]; 50 | 51 | rr:predicateObjectMap [ 52 | rr:predicate rdfs:comment; 53 | rr:objectMap [rml:reference "comment"] 54 | ]. 55 | 56 | <#ReviewMapping> 57 | rml:logicalSource [ 58 | rml:source "src/main/resources/Data/review.parquet"; 59 | nosql:store nosql:parquet 60 | ]; 61 | rr:subjectMap [ 62 | rr:template "http://example.com/{nr}"; 63 | rr:class schema:Review 64 | ]; 65 | 66 | rr:predicateObjectMap [ 67 | rr:predicate dc:publisher; 68 | rr:objectMap [rml:reference "publisher"] 69 | ]; 70 | 71 | rr:predicateObjectMap [ 72 | rr:predicate rev:text; 73 | rr:objectMap [rml:reference "text"] 74 | ]; 75 | 76 | rr:predicateObjectMap [ 77 | rr:predicate bsbm:producer; 78 | rr:objectMap [rml:reference "producer"] 79 | ]; 80 | 81 | rr:predicateObjectMap [ 82 | rr:predicate rev:reviewer; 83 | rr:objectMap [rml:reference "person"] 84 | ]; 85 | 86 | rr:predicateObjectMap [ 87 | rr:predicate bsbm:rating3; 88 | rr:objectMap [rml:reference "rating3"] 89 | ]; 90 | 91 | rr:predicateObjectMap [ 92 | rr:predicate dcterms:language; 93 | rr:objectMap [rml:reference "language"] 94 | ]; 95 | 96 | rr:predicateObjectMap [ 97 | rr:predicate bsbm:reviewFor; 98 | rr:objectMap [rml:reference "product"] 99 | ]; 100 | 101 | rr:predicateObjectMap [ 102 | rr:predicate dc:title; 103 | rr:objectMap [rml:reference "title"] 104 | ]; 105 | 106 | rr:predicateObjectMap [ 107 | rr:predicate bsbm:rating2; 108 | rr:objectMap [rml:reference "rating2"] 109 | ]; 110 | 111 | rr:predicateObjectMap [ 112 | rr:predicate bsbm:reviewDate; 113 | rr:objectMap [rml:reference "reviewDate"] 114 | ]; 115 | 116 | rr:predicateObjectMap [ 117 | rr:predicate exp:publishDate; 118 | rr:objectMap [rml:reference "publishDate"] 119 | ]; 120 | 121 | rr:predicateObjectMap [ 122 | rr:predicate bsbm:rating1; 123 | rr:objectMap [rml:reference "rating1"] 124 | ]; 125 | 126 | rr:predicateObjectMap [ 127 | rr:predicate bsbm:rating4; 128 | rr:objectMap [rml:reference "rating4"] 129 | ]. 130 | 131 | <#PersonMapping> 132 | rml:logicalSource [ 133 | rml:source "src/main/resources/Data/person.csv"; 134 | nosql:store nosql:csv 135 | ]; 136 | rr:subjectMap [ 137 | rr:template "http://example.com/{nr}"; 138 | rr:class foaf:Person 139 | ]; 140 | 141 | rr:predicateObjectMap [ 142 | rr:predicate edm:country; 143 | rr:objectMap [rml:reference "country"] 144 | ]; 145 | 146 | rr:predicateObjectMap [ 147 | rr:predicate dc:publisher; 148 | rr:objectMap [rml:reference "publisher"] 149 | ]; 150 | 151 | rr:predicateObjectMap [ 152 | rr:predicate foaf:mbox_sha1sum; 153 | rr:objectMap [rml:reference "mbox_sha1sum"] 154 | ]; 155 | 156 | rr:predicateObjectMap [ 157 | rr:predicate exp:publishDate; 158 | rr:objectMap [rml:reference "publishDate"] 159 | ]; 160 | 161 | rr:predicateObjectMap [ 162 | rr:predicate foaf:name; 163 | rr:objectMap [rml:reference "name"] 164 | ]. 165 | 166 | <#OfferMapping> 167 | rml:logicalSource [ 168 | rml:source "//Offer"; 169 | nosql:store nosql:mongodb 170 | ]; 171 | rr:subjectMap [ 172 | rr:template "http://example.com/{_id}"; 173 | rr:class schema:Offer 174 | ]; 175 | 176 | rr:predicateObjectMap [ 177 | rr:predicate bsbm:validTo; 178 | rr:objectMap [rml:reference "validTo"] 179 | ]; 180 | 181 | rr:predicateObjectMap [ 182 | rr:predicate dc:publisher; 183 | rr:objectMap [rml:reference "publisher"] 184 | ]; 185 | 186 | rr:predicateObjectMap [ 187 | rr:predicate bsbm:producer; 188 | rr:objectMap [rml:reference "producer"] 189 | ]; 190 | 191 | rr:predicateObjectMap [ 192 | rr:predicate bsbm:product; 193 | rr:objectMap [rml:reference "product"] 194 | ]; 195 | 196 | rr:predicateObjectMap [ 197 | rr:predicate gr:validFrom; 198 | rr:objectMap [rml:reference "validFrom"] 199 | ]; 200 | 201 | rr:predicateObjectMap [ 202 | rr:predicate bsbm:deliveryDays; 203 | rr:objectMap [rml:reference "deliveryDays"] 204 | ]; 205 | 206 | rr:predicateObjectMap [ 207 | rr:predicate exp:publishDate; 208 | rr:objectMap [rml:reference "publishDate"] 209 | ]; 210 | 211 | rr:predicateObjectMap [ 212 | rr:predicate npg:webpage; 213 | rr:objectMap [rml:reference "offerWebpage"] 214 | ]; 215 | 216 | rr:predicateObjectMap [ 217 | rr:predicate bsbm:price; 218 | rr:objectMap [rml:reference "price"] 219 | ]; 220 | 221 | rr:predicateObjectMap [ 222 | rr:predicate bsbm:vendor; 223 | rr:objectMap [rml:reference "vendor"] 224 | ]. 225 | 226 | <#ProductMapping> 227 | rml:logicalSource [ 228 | rml:source "//Product"; 229 | nosql:store nosql:cassandra 230 | ]; 231 | rr:subjectMap [ 232 | rr:template "http://example.com/{nr}"; 233 | rr:class bsbm:Product 234 | ]; 235 | 236 | rr:predicateObjectMap [ 237 | rr:predicate rdfs:label; 238 | rr:objectMap [rml:reference "label"] 239 | ]; 240 | 241 | rr:predicateObjectMap [ 242 | rr:predicate dc:publisher; 243 | rr:objectMap [rml:reference "publisher"] 244 | ]; 245 | 246 | rr:predicateObjectMap [ 247 | rr:predicate bsbm:producer; 248 | rr:objectMap [rml:reference "producer"] 249 | ]; 250 | 251 | rr:predicateObjectMap [ 252 | rr:predicate bsbm:productPropertyNumeric4; 253 | rr:objectMap [rml:reference "propertyNum4"] 254 | ]; 255 | 256 | rr:predicateObjectMap [ 257 | rr:predicate bsbm:productPropertyNumeric3; 258 | rr:objectMap [rml:reference "propertyNum3"] 259 | ]; 260 | 261 | rr:predicateObjectMap [ 262 | rr:predicate bsbm:productPropertyTextual6; 263 | rr:objectMap [rml:reference "propertyTex6"] 264 | ]; 265 | 266 | rr:predicateObjectMap [ 267 | rr:predicate bsbm:productPropertyTextual1; 268 | rr:objectMap [rml:reference "propertyTex1"] 269 | ]; 270 | 271 | rr:predicateObjectMap [ 272 | rr:predicate bsbm:productPropertyNumeric1; 273 | rr:objectMap [rml:reference "propertyNum1"] 274 | ]; 275 | 276 | rr:predicateObjectMap [ 277 | rr:predicate bsbm:productPropertyTextual4; 278 | rr:objectMap [rml:reference "propertyTex4"] 279 | ]; 280 | 281 | rr:predicateObjectMap [ 282 | rr:predicate bsbm:productPropertyNumeric5; 283 | rr:objectMap [rml:reference "propertyNum5"] 284 | ]; 285 | 286 | rr:predicateObjectMap [ 287 | rr:predicate bsbm:productPropertyNumeric2; 288 | rr:objectMap [rml:reference "propertyNum2"] 289 | ]; 290 | 291 | rr:predicateObjectMap [ 292 | rr:predicate exp:publishDate; 293 | rr:objectMap [rml:reference "publishDate"] 294 | ]; 295 | 296 | rr:predicateObjectMap [ 297 | rr:predicate bsbm:productPropertyTextual5; 298 | rr:objectMap [rml:reference "propertyTex5"] 299 | ]; 300 | 301 | rr:predicateObjectMap [ 302 | rr:predicate bsbm:productPropertyTextual3; 303 | rr:objectMap [rml:reference "propertyTex3"] 304 | ]; 305 | 306 | rr:predicateObjectMap [ 307 | rr:predicate rdfs:comment; 308 | rr:objectMap [rml:reference "comment"] 309 | ]; 310 | 311 | rr:predicateObjectMap [ 312 | rr:predicate bsbm:productPropertyNumeric6; 313 | rr:objectMap [rml:reference "propertyNum6"] 314 | ]; 315 | 316 | rr:predicateObjectMap [ 317 | rr:predicate bsbm:productPropertyTextual2; 318 | rr:objectMap [rml:reference "propertyTex2"] 319 | ]. -------------------------------------------------------------------------------- /sansa-examples-flink/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | 7 | 8 | net.sansa-stack 9 | sansa-examples-parent_2.11 10 | 0.7.1 11 | 12 | 13 | sansa-examples-flink_2.11 14 | SANSA Examples - Apache Flink 15 | SANSA examples for Apache Flink 16 | 17 | 18 | 19 | 20 | ${project.groupId} 21 | sansa-rdf-flink_${scala.binary.version} 22 | 23 | 24 | 25 | 26 | ${project.groupId} 27 | sansa-owl-flink_${scala.binary.version} 28 | 29 | 30 | org.mortbay.jetty 31 | jetty 32 | 33 | 34 | org.mortbay.jetty 35 | jetty-util 36 | 37 | 38 | 39 | 40 | 41 | 42 | ${project.groupId} 43 | sansa-inference-flink_${scala.binary.version} 44 | 45 | 46 | 47 | 48 | 49 | 50 | ${project.groupId} 51 | sansa-ml-flink_${scala.binary.version} 52 | 53 | 54 | 55 | 56 | org.scala-lang 57 | scala-library 58 | 59 | 60 | 61 | 62 | org.apache.flink 63 | flink-scala_${scala.binary.version} 64 | 65 | 66 | org.apache.flink 67 | flink-streaming-scala_${scala.binary.version} 68 | 69 | 70 | org.apache.flink 71 | flink-clients_${scala.binary.version} 72 | 73 | 74 | 75 | 76 | com.typesafe.scala-logging 77 | scala-logging_${scala.binary.version} 78 | 79 | 80 | 81 | 82 | com.github.scopt 83 | scopt_${scala.binary.version} 84 | 85 | 86 | 87 | 88 | junit 89 | junit 90 | test 91 | 92 | 93 | org.specs2 94 | specs2-core_${scala.binary.version} 95 | test 96 | 97 | 98 | org.scalatest 99 | scalatest_${scala.binary.version} 100 | test 101 | 102 | 103 | 104 | 105 | src/main/scala 106 | src/test/scala 107 | 108 | 109 | org.apache.maven.plugins 110 | maven-compiler-plugin 111 | 112 | 113 | net.alchim31.maven 114 | scala-maven-plugin 115 | 116 | 117 | 118 | compile 119 | testCompile 120 | 121 | 122 | 123 | 124 | 125 | org.apache.maven.plugins 126 | maven-shade-plugin 127 | 128 | 129 | package 130 | 131 | shade 132 | 133 | 134 | 135 | 136 | com.esotericsoftware.kryo:kryo 137 | com.esotericsoftware.minlog:minlog 138 | com.fasterxml.jackson.core:jackson* 139 | com.github.scopt:scopt_2.11 140 | com.google.code.findbugs:jsr305 141 | com.google.protobuf:protobuf-java 142 | com.jamesmurty.utils:java-xmlbuilder 143 | com.jcraft:jsch 144 | com.sun.jersey:jersey-core 145 | com.thoughtworks.paranamer:paranamer 146 | com.typesafe:config 147 | com.typesafe.akka:* 148 | com.twitter:chill_2.11 149 | commons-beanutils:commons-beanutils-bean-collections 150 | commons-cli:commons-cli 151 | commons-codec:commons-codec 152 | commons-configuration:commons-configuration 153 | commons-daemon:commons-daemon 154 | commons-digester:commons-digester 155 | commons-el:commons-el 156 | commons-lang:commons-lang 157 | commons-logging:commons-logging 158 | commons-net:commons-net 159 | commons-collections:commons-collections 160 | io.netty:netty* 161 | io.dropwizard.metrics:metrics* 162 | javax.activation:activation 163 | javax.servlet:servlet-api 164 | javax.xml.bind:jaxb-api 165 | javax.xml.stream:stax-api 166 | jline:jline 167 | junit:junit 168 | org.apache.avro:avro 169 | org.apache.commons:commons-compress 170 | org.apache.commons:commons-lang3 171 | org.apache.commons:commons-math3 172 | org.apache.flink:* 173 | org.apache.zookeeper:zookeeper 174 | org.clapper:grizzled-slf4j_2.11 175 | org.codehaus.jackson:jackson-* 176 | org.javassist:javassist 177 | org.mortbay.jetty:jetty-util 178 | org.objenesis:objenesis 179 | org.scala-lang:* 180 | org.uncommons.maths:uncommons-maths 181 | org.xerial.snappy:snappy-java 182 | xmlenc:xmlenc 183 | 184 | 185 | 186 | 187 | *:* 188 | 189 | META-INF/*.SF 190 | META-INF/*.DSA 191 | META-INF/*.RSA 192 | 193 | 194 | 195 | false 196 | 197 | 198 | 199 | 200 | 201 | org.scalastyle 202 | scalastyle-maven-plugin 203 | 204 | 205 | 206 | 207 | 208 | 209 | 211 | build-jar 212 | 213 | false 214 | 215 | 216 | 217 | org.apache.flink 218 | flink-scala_${scala.binary.version} 219 | provided 220 | 221 | 222 | org.apache.flink 223 | flink-streaming-scala_${scala.binary.version} 224 | provided 225 | 226 | 227 | org.apache.flink 228 | flink-clients_${scala.binary.version} 229 | provided 230 | 231 | 232 | 233 | 234 | 235 | 236 | maven.aksw.snapshots 237 | AKSW Snapshot Repository 238 | http://maven.aksw.org/archiva/repository/snapshots 239 | 240 | false 241 | 242 | 243 | true 244 | 245 | 246 | 247 | 248 | 249 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | Copyright, Smart Data Analytics Research Group (http://sda.tech) 179 | 180 | Licensed under the Apache License, Version 2.0 (the "License"); 181 | you may not use this file except in compliance with the License. 182 | You may obtain a copy of the License at 183 | 184 | http://www.apache.org/licenses/LICENSE-2.0 185 | 186 | Unless required by applicable law or agreed to in writing, software 187 | distributed under the License is distributed on an "AS IS" BASIS, 188 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 189 | See the License for the specific language governing permissions and 190 | limitations under the License. 191 | -------------------------------------------------------------------------------- /sansa-examples-spark/src/main/resources/datalake/data/product.csv/part-00000-62a2679b-bfab-43fe-80d3-d775b14cf02c.csv: -------------------------------------------------------------------------------- 1 | nr,label,comment,producer,propertyNum1,propertyNum2,propertyNum3,propertyNum4,propertyNum5,propertyNum6,propertyTex1,propertyTex2,propertyTex3,propertyTex4,propertyTex5,propertyTex6,publisher,publishDate 2 | 1,'manner gatemen','lordlings dialyzed hoardings palmitate resisters redesigned trowing fledging disinters occasionally refry objective comedown senders attendance calculous redux zed bidets subacute swinks berhymed pumping overassured outrush corteges chitters civilest chiffonniers kimonos protects epizootic centimos dismast boomage issues aggrieves sociably ammoniacs polliwogs labyrinths infatuates whiteout dissentients newmown flunkey titillated caduceus rediscovered breaststrokes schillings endorsement cheerleaders nonconcurrent intoned outpaces inkle superpowers habaneras subsoils paramours laughed',1,831,312,735,-1,150,-1,'guzzling jillion psychotherapists substantiation nonuple deluded snowmelt interlards overrefinement annoyed stuntedness calcimining stereophonically','recommendation embezzler reconviction misproportions discountenances callings defacers crummiest triglyceride','decentralizations impacting promulgations bibliotherapy murexes professorships locomotes durning lyncher spoonier abhorrence assize goglets','distracts universally trashily enervator',null,null,1,2000-11-01 3 | 2,'coterie','naughtiness illuminating careerers computerese brakeless mesozoa lineate fulminant batholiths mohawks exhalation paraguayan alcaldes foulings primordially almightily placed flukey improvises pommelled sententious bookmark rashers truces mordanted shunter praxeological causable compassed decertified transubstantiation automatize boxful befouling tragedienne visiting alliums triangulates hounders compressively camphorates mammons armories scrapes hanger nucleation loftless refractoriness nonhabitual paperer aridness jingliest sportswriters gained efficiently marshals tomogram tambura pureeing doughty compromised antineutrinos revertible picadors oddballs hominies drek irradiations fearlessness cortin hussy museful pupfish bulletproofing geminates nacre subsistence presifted abhors whereat wanes mooing refused biodegradability oghamic stouter venosities recopying supplantation buxomly foregoers pathologist welches comicality manifestos untangles mongols sluices demits inventers entitled taxability fancifulness claimed gastroenterology geotropically glenwood alack autochthonous nabob preempts alternativeness xviii fruiter deist electorally cooker voce abbeys composts jugsful glowing basset worshipfully rebait bushwhacker implorer jackknifing paraguayan enrolls blazonry dendrological pavilioned cully epistles foreshorten couth usurps legibilities yammered somnolently',1,1891,1040,1731,-1,992,-1,'scalded decoct practitioners infolds levered quartan calcined untransferable auditoria','charred payment linoleums cowsheds preconceive undergrounder nosier sawhorse coerces assn turgidities venins obliged homogenize','componential redemonstrates dewberries pearlers triplicates planked goddaughters largeness citator',null,'palpal thoroughly enactive swimmiest syrups',null,1,2005-03-08 4 | 3,'ahchoo','chanceman ventrals phlegmy vower matureness fictionalize iliads gasman tumours afeared tuneably insurrectionaries enfolds cisterns adduction leafage maharajas prancingly mannerless vitrines radiocast insulates stilbestrol compartmented appearers undercurrents gunnel hopes launchings deluder overemotional unfolder bioflavonoid snorter thawed instinctively halidome classed towages unctions carcase recollects germanely disputation ciscoes unsettle calculableness artiest disprovable soporose rankly fuguing pox recontamination windiness hypothermic chutzpahs lilliput cognoscing divestitive misbehaves culpableness mutilators biogeographic inmesh flumed apeak doweling reheels unshut avascular redirected wiverns graveled quae pithiest unities monks boniness dancing gleemen unimpeachably reaming trekked calculabilities sphinxes protozoans toasty understanding elks ultrasonography dreggier slashingly pimpernels survivors cultivating swaybacks immodestly pennsylvanian vitrine unpronounceable gallicism laggers undefeated deers heinousness cocobolo steadily gendarmerie blatantly spinosely totalizes invited preyers bandoleers airmailed quotationally intercuts updates digests accusatorially dusking stoppages littles cadaveric holsteins repertoires',1,594,434,227,-1,-1,-1,'whirs radiation overman violative adulators benumbs disaffectedly cuttages bluebeard','vichies resituates breads visard unfought adjunctly bractlets foamed durability amends retailer creaking inseminator sedatest rodeos','unreels voicer acidifiers shredding fistula uniformer chivies immunological grimacer spoilt admiringly',null,'hyperbolas knouted eulogists',null,1,2001-05-18 5 | 4,'reexhibit wrang tarts','bihourly prosiest matrixes jaggedest violinists dins archipelagos heighths limber azons acceptee husbander ashram relativeness grannies rectangles unearthing conies capered toeshoe fervour domination impishly satirically photonegative kaleidoscopic morticians eyewaters rapturousness animater granting twier geosynclinal relearns cosmopolis maizes gemmy unmixt mumbler laundries selenography unpin findings mistrusts porgy discontentedly bolter hulkier windily whoremaster sovietizes expellees reordain fondness nightspots boggier microvasculature fellatee holders inebrious upping mucking yugoslavs blondness appal premenstrually fiddled disfavors sketchers inhumanities tightest unsatisfied cherubically stonefly mentally buddhists atelier eighteens smartly retaliates marbleizing trappings egomaniacal undercook roadstead reascend dekaliter grinning retakers paintbrushes cichlid ashlars conventual smoothen gombos appurtenance botchy nonchalantly atremble thieve overflows daimons enwinding crystalloidal reproval nontaxable crossbars troupes photoreception tortuousness caromed creamier sphenoid authors nabbing mistimes enactment agoraphobic footslog boycotting overabounding cinerary vixenishly rearwards eczematous chuckler farming drudging ruinable soothest highted incontestabilities archdeacons agendums somatotypology garnished headband curves mows',1,940,290,1788,-1,-1,-1,'skinfuls uncertainty craving incas maenades fitfulness mas creditors fixity churchlier assumed routines','quartered mishmosh booms globoid syndicating orcas','visas deprecatingly conceptualist reactivation comebacks matchmaking disinformation muffled granulator basketries warthogs childbeds empowers',null,null,'pantomimist riverbanks loosens foretime managers updatable unzip bayonets bruins abstemiousness advocates destitutely increasing',1,2006-09-01 6 | 5,'vacillator mortifying','workaholics situating repartees mobilizers anorak magdalen inattentiveness filmstrips gusting runways tressiest obeyable lapps mooch defamatory whirs stealer pyramided motivates lapidates syllables showily orientates unhat smelting efficiencies calumniation adolescents loyalest steamboats excitability platy americanist photomicrograph wantonness parabolas massacring heatless episodes hopsack currying kb miscalled unweakened binned compactest pansophies palomino enureses ternate plottage brightly alkalinization underclerk fishmeal moulter valse kaleyards thaws hedonisms veiled tapes recension concusses enlargements mislabel requiems epitomizes clipsheets quixotries uplinking handsprings flexors concurrences snarls postulator involutions cortically upshifts dinette footfalls untaxed personification subdirectors fleabanes greases paradoxical discusses nondiscriminating heaped aroused machining broccolis synergist toppled techie scarlets scattiest magnetometer wiling pretensions impenetrableness argils feedbox halvahs sachems snobbiest impetigos hawknose zulus inadequately potbellied fetters sensuality revalidate elflock bucketful',1,834,450,736,202,27,-1,'egoisms welterweight friendship topsoiling securer reevaluated proclaimer murker awaiting ferried quinone dengue toxoids','unacceptance antibody reinflamed rehardens','steeper aerifies unum overbalanced disciplined vulnerably achromat gustative celebrants nonfreezing kindnesses fresher analogs viviparous cosets',null,null,null,1,2001-09-16 7 | 6,'desolates waging reveilles','poleward sagest impellers enjoyed tailpipes raying prevued flickering inshrining subscript resubmit grandstander pellagras cunts paleographer obsessiveness windlasses songfest inartistically epergnes guarani sonarman hassocks yardages alterability tetralogy treaded elevators anathematize surveyed evolvements juicier suffered commonweals wallpapering ionicity inaptness refreezes pepsine shammes reappraises squishiest ethicists bedraggling shivering crimsoned bondable archness occulter overdramatizes liverishness dandles boosting embalms restamp chaining strugglers skullduggeries hums manger pomades alternately pressurized gamed materfamilias supervenes darkest finer seels triste pronators giddiest tensing frauds vapouring sucroses emulsifiable cesarian octagons stoically suppurative',1,374,536,1567,813,-1,-1,'catarrhs triadisms foreseen wennier scurviest focuser recrowned cropped signatures plotting asks disembarkations rickettsiae clubmen reinduction','chilblains intertribal balsamic exotism reintroduced charade cubage oarless humanoids observingly mudcaps ubiquities decapitating','digged glockenspiels hearted sapid weeny colliers cylindrical treeing ferries proselyte victorians signifies','limeades iterances expressionist sculked supplanter commitment indemnifying sudoral calcifying captivator versified prating pithier daily nearsightedly',null,null,1,2004-07-20 8 | 7,'tither pettiness','buskin recompensable capacitances bootee lockets enticement disservices strategists licensors kennelled barding autocrats lathworks blueings tiresomely outdistanced gothicize ejectable materiels homogeneously paintiest sneezers workboat interphone ascii unconnected instrumentalist topographical disgruntling pederast sceptring racier evens trimeter banes misdemeanor webless rehinge mitigative defamers naturalist accustoms reclean valiancies pilled bearcats tents demultiplexes skulking publicized typecase supervisors escapeway recoinage blinding execs conformism treasures durums rondelle drawing oppressing bores coplots monogamousness zoospores unlikelihood preengaged overhands limekiln penetrator ultrasound lymphocytic radiolucencies chefs feoffment conventionalism cascading machos abundantly godchild frequented misplacing repairers surfy antedating chunter cancelers photoflash mistaught testacies',1,1900,774,66,-1,-1,-1,'solidest incarnation arrayers gruelingly honorands slobs','builders preadjusting cpl fosterage trulls fakers toepiece maximally bouldery hampshirites','dermatologies flopover noctambulation frizzler submissively reconsolidate clapping enunciators championed nigglingly tongers liquoring reminder podiatrists tussocks',null,null,null,1,2004-09-03 9 | 8,'resettling uncoagulated lowish','reprice renovating chevrolets refolds fantastical polarity ennuis franchisers undiluted macaroons overexcited habitability reaching ethers gratias biers wretchedly warps poetess forthrightness kinaesthetically lukewarmly decorators viselike ionized pumpernickel durations legitimation hazings protracting beechier monopolizes yids flavonols scaler letup condignly gipsies spoonily forging workups drapers oculist aldehydes subassociation forejudge graciousness carromed mal alpinisms alongshore proceeded institutionally ultraconservatives harpooner vises puffers trainful bordellos wayfarings cumquats jilter strewed imputing sibilantly venosities nosiness wharfs comediennes reprices manias trichroic valvar vaporizing obsolescently feedboxes radiophone antibacterial singlets soaper deists untangle undergraduates kwacha discombobulation chargers slumping servantship vittled jadishly superabundant gibbeting signatary frypan horsehides nonclassical sharecropped friendless flushers corrupted utilized emigres acolytes shouldering rassling surfs carvers braw weatherglasses soakers haggis hoisted cowlicks middlemost caroused reattach tenably tympanic binder foresters agamic busboys',1,1743,136,867,-1,-1,-1,'unrestored kneeler chaplet newts deckle vegas overeducating','replicates corks cinematheques charmers licitation geezer recombines admitters underpasses nighty ensnarled pardonable imperialness','cowiest crimpers unmuzzles repacify poilu',null,null,null,1,2003-07-24 10 | 9,'procreators taiwanese antigene','candors dictaphone youngsters stet millers impecuniousness likability comparts endemics reinsert clerkship halfbeak expansionary metaphysician effrontery helpfully hogs secures micks tacks oilstone guises clangs bendable maladministrative mopped halitoses quirts amortizable buggered dewclaw loners hydrozoon zinging reequips saddlebags exorcises laudability topside novelle chemoreceptor temporized bloodstreams housemaids sharks bannock resuscitative motets gruels choosier apologizes anticyclonic equalities struggled crappiness reuniter immixes ozonizes quashes unlivable backslider feminizing bretons overhauling streaky forejudge weepers dourness viceless hies tubercular refurnish taggers inquisitional rebecs mouthwash deaconing metastasis weans adjectivally troths apperceptive nonprotectively beckoning commiseration guttersnipes doctoring gymkhana sheepmen apologizers ortolan toadyish quatrains revalue straying restacks frivolously pierced fauces babus novelistic gills regresses fossilization lipless sulphurize ohmage procrastinator alienator travelled rivets thrivers pingrasses steps campier mismarks plushier proteinaceous bunco snoozes glitters finical simulants hippies skims engrave soundtracks huffish nonnegotiable eclectically hatsful shaftings disobeyers identifies wingspread',1,1504,1010,1517,90,1980,-1,'exceptionally replan aiming bedstraws tragically pollutants prefabricating isolation sextets rewarders approvement productive','salaamed minatory ruleless microsurgeons circumambulate sapphisms nonsexually adjoined noticing deescalated habiting touristy unequalled lucidities discords','matureness hydrocephaloid certifies undyingly doylies spellbinds prenatally govt unpolarized togae overjoying shirts reediness ecclesiastics awakens','arrowing corncake crumbliness recommits viperish monkery','unappetizingly quarrier speaks malthus overlook fatted archaists refreshed gayness',null,1,2005-07-22 11 | 10,'lignites rallying specters','filaree cirque vibrations leukemoid enquirer drossier prescience housewifeliness timed contentiousness constricting scramblers shivarees foilable dreidl tinfuls foolhardier downloading stuccos interpersonal doggish mislabel lowered solubility beguiler aboil slavey strolling prorating dimming descents benthos viced bruising hetero romps polymerically undecided runners libidinal fustic escapements obols sandlots channelizes notational gongs elks misspellings heedfully accelerative labella phlebotomy preeners diviners hugeness zilches amortizable roughness pullers remunerates doomsdays brisks coordinately unequaled stopcocks consistently bafflers drypoints nannies vialing trolleys ologist uncork rigatonis airhead remodification sereneness playsuit microtomy skewness reelecting prevailingly musicians sightings bylined reconveyed preconception overanxious',1,133,141,1580,1194,525,-1,'sinfully scampi slaveries mishandles ailment waggish tonicity ablutions randomizes innervations','healthiness lights disassociates spinel countenancing expedites roped helloed querists halloo assignment rendezvouses dentistries','climaxed filmlands frills poulticing nakedest jabs','chlorites amused psychologists cloches adducers requisitioner gapes tessellation consecratory stilting adders unclothes flabbiest detrain gardening','eyedroppers levied carroms uncourageous tormented destining',null,1,2005-09-29 12 | --------------------------------------------------------------------------------