├── test ├── arachne │ ├── aristotle_test.clj │ └── aristotle │ │ ├── reification_test.clj │ │ ├── registry_test.clj │ │ ├── graph_test.clj │ │ ├── inference_test.clj │ │ ├── validation_test.clj │ │ └── query_test.clj ├── .DS_Store ├── sample.rdf.edn ├── logback-test.xml ├── TheFirm.n3 └── foaf.rdf ├── src ├── data_readers.clj └── arachne │ ├── aristotle │ ├── rdf_edn.clj │ ├── validation.clj │ ├── inference.clj │ ├── query.clj │ ├── registry.clj │ ├── query │ │ ├── spec.clj │ │ └── compiler.clj │ └── graph.clj │ └── aristotle.clj ├── .gitignore ├── deps.edn ├── .circleci └── config.yml ├── LICENSE.txt └── readme.md /test/arachne/aristotle_test.clj: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arachne-framework/aristotle/HEAD/test/.DS_Store -------------------------------------------------------------------------------- /src/data_readers.clj: -------------------------------------------------------------------------------- 1 | {rdf/prefix arachne.aristotle.registry/read-prefix 2 | rdf/global-prefix arachne.aristotle.registry/read-global-prefix} 3 | -------------------------------------------------------------------------------- /test/sample.rdf.edn: -------------------------------------------------------------------------------- 1 | [#rdf/prefix [:ex "http://example.com/"] 2 | {:rdf/about :ex/luke 3 | :foaf/name "Luke" 4 | :foaf/knows _jim} 5 | {:rdf/about _jim 6 | :foaf/name "Jim"}] 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | pom.xml 5 | pom.xml.asc 6 | *.jar 7 | *.class 8 | /.lein-* 9 | /.nrepl-port 10 | .hgignore 11 | .hg/ 12 | .idea 13 | *.iml 14 | jena-docs 15 | .cpcache 16 | .rebl -------------------------------------------------------------------------------- /test/logback-test.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /deps.edn: -------------------------------------------------------------------------------- 1 | {:paths ["src"] 2 | :deps {org.clojure/clojure {:mvn/version "1.12.0"} 3 | org.apache.jena/apache-jena-libs {:mvn/version "5.3.0" 4 | :extension "pom"} 5 | ont-app/vocabulary {:mvn/version "0.1.7"} 6 | 7 | ;; Adds missing javax.xml.bind.DatatypeConverter in Java 9+ 8 | javax.xml.bind/jaxb-api {:mvn/version "2.4.0-b180830.0359"}} 9 | :aliases {:test {:extra-paths ["test"] 10 | :extra-deps {io.github.cognitect-labs/test-runner {:git/tag "v0.5.1" :git/sha "dfb30dd"} 11 | ch.qos.logback/logback-classic {:mvn/version "1.2.11"}} 12 | :main-opts ["-m" "cognitect.test-runner"] 13 | :exec-fn cognitect.test-runner.api/test}}} 14 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | # Clojure CircleCI 2.0 configuration file 2 | # 3 | # Check https://circleci.com/docs/2.0/language-clojure/ for more details 4 | # 5 | version: 2 6 | jobs: 7 | build: 8 | docker: 9 | # specify the version you desire here 10 | - image: circleci/clojure:tools-deps 11 | working_directory: ~/repo 12 | 13 | steps: 14 | - checkout 15 | 16 | # Download and cache dependencies 17 | - restore_cache: 18 | keys: 19 | - v1-dependencies-{{ checksum "deps.edn" }} 20 | # fallback to using the latest cache if no exact match is found 21 | - v1-dependencies- 22 | 23 | - run: clojure -Atest -Srepro -Sdescribe 24 | 25 | - save_cache: 26 | paths: 27 | - ~/.m2 28 | - ~/.gitlibs 29 | key: v1-dependencies-{{ checksum "deps.edn" }} 30 | 31 | # run tests! 32 | - run: clojure -Atest:run-tests 33 | -------------------------------------------------------------------------------- /src/arachne/aristotle/rdf_edn.clj: -------------------------------------------------------------------------------- 1 | (ns arachne.aristotle.rdf-edn 2 | "Reader/writer for RDF/EDN" 3 | (:require [clojure.edn :as edn] 4 | [arachne.aristotle.graph :as g]) 5 | (:import [org.apache.jena.riot LangBuilder RDFParserRegistry ReaderRIOTFactory ReaderRIOT] 6 | [org.apache.jena.riot.system ParserProfile StreamRDF] 7 | [org.apache.jena.atlas.web ContentType] 8 | [org.apache.jena.sparql.util Context] 9 | [java.io InputStream Reader InputStreamReader])) 10 | 11 | (def lang (-> (LangBuilder/create) 12 | (.langName "RDF/EDN") 13 | (.contentType "application/edn") 14 | (.addAltContentTypes (into-array String ["application/edn"])) 15 | (.addFileExtensions (into-array String ["edn"])) 16 | (.build))) 17 | 18 | (defn- read-edn 19 | "Read EDN from an input stream or Reader into the given StreamRDF output object" 20 | [input ^StreamRDF output] 21 | (let [data (edn/read-string {:readers *data-readers*} (slurp input)) 22 | triples (g/triples data)] 23 | (.start output) 24 | (doseq [t triples] 25 | (.triple output t)) 26 | (.finish output))) 27 | 28 | (defn riot-reader 29 | "Construct a new RIOT reader for EDN" 30 | [] 31 | (reify ReaderRIOT 32 | (^void read [this 33 | ^InputStream is 34 | ^String base 35 | ^ContentType ct 36 | ^StreamRDF output 37 | ^Context context] 38 | (read-edn is output)) 39 | (^void read [this 40 | ^Reader rdr 41 | ^String base 42 | ^ContentType ct 43 | ^StreamRDF output 44 | ^Context context] 45 | (read-edn rdr output)))) 46 | 47 | (def factory (reify ReaderRIOTFactory 48 | (create [_ lang profile] 49 | (riot-reader)))) 50 | 51 | (RDFParserRegistry/registerLangTriples lang factory) 52 | -------------------------------------------------------------------------------- /test/TheFirm.n3: -------------------------------------------------------------------------------- 1 | # Saved by TopBraid on Fri Nov 21 23:52:49 PST 2008 2 | # baseURI: http://www.workingontologist.org/Examples/Chapter6/TheFirm.owl 3 | 4 | @prefix xsd: . 5 | @prefix rdfs: . 6 | @prefix rdf: . 7 | @prefix daml: . 8 | @prefix : . 9 | @prefix owl: . 10 | 11 | 12 | a owl:Ontology ; 13 | owl:versionInfo "Created with TopBraid Composer"^^xsd:string . 14 | 15 | :Company 16 | a owl:Class ; 17 | rdfs:subClassOf owl:Thing . 18 | 19 | :Goldman 20 | a :Person ; 21 | :isEmployedBy :TheFirm . 22 | 23 | :Long 24 | a :Person ; 25 | :indirectlyContractsTo 26 | :TheFirm . 27 | 28 | :Person 29 | a owl:Class ; 30 | rdfs:subClassOf owl:Thing . 31 | 32 | :Spence 33 | a :Person ; 34 | :freeLancesTo :TheFirm . 35 | 36 | :TheFirm 37 | a :Company . 38 | 39 | :contractsTo 40 | a owl:ObjectProperty ; 41 | rdfs:domain :Person ; 42 | rdfs:range :Company ; 43 | rdfs:subPropertyOf :worksFor . 44 | 45 | :freeLancesTo 46 | a owl:ObjectProperty ; 47 | rdfs:domain :Person ; 48 | rdfs:range :Company ; 49 | rdfs:subPropertyOf :contractsTo . 50 | 51 | :indirectlyContractsTo 52 | a owl:ObjectProperty ; 53 | rdfs:domain :Person ; 54 | rdfs:range :Company ; 55 | rdfs:subPropertyOf :contractsTo . 56 | 57 | :isEmployedBy 58 | a owl:ObjectProperty ; 59 | rdfs:domain :Person ; 60 | rdfs:range :Company ; 61 | rdfs:subPropertyOf :worksFor . 62 | 63 | :worksFor 64 | a owl:ObjectProperty ; 65 | rdfs:domain :Person ; 66 | rdfs:range :Company . 67 | -------------------------------------------------------------------------------- /test/arachne/aristotle/reification_test.clj: -------------------------------------------------------------------------------- 1 | (ns arachne.aristotle.reification-test 2 | (:require [clojure.test :refer :all] 3 | [arachne.aristotle.registry :as reg] 4 | [arachne.aristotle.graph :as graph] 5 | [arachne.aristotle :as aa])) 6 | 7 | (reg/prefix 'foaf "http://xmlns.com/foaf/0.1/") 8 | 9 | (deftest reification-test 10 | (let [g (aa/add (aa/graph :simple) {:rdf/about "" 11 | :foaf/name "Luke" 12 | :foaf/knows {:rdf/about "" 13 | :foaf/name "Stuart"}})] 14 | 15 | (is (= 3 (count (graph/triples g)))) 16 | (let [g (graph/reify g "" "")] 17 | (is (= 18 (count (graph/triples g))))))) 18 | 19 | (comment 20 | ;; Reification Benchmarking 21 | 22 | (import '[java.util UUID]) 23 | 24 | (def entities (vec (repeatedly 5000 (fn [] 25 | (str ""))))) 26 | 27 | 28 | (def properties (vec (repeatedly 500 (fn [] 29 | (str ""))))) 30 | 31 | 32 | (defn rand-triple 33 | [] 34 | [(rand-nth entities) (rand-nth properties) (case (rand-int 3) 35 | 0 (rand-nth entities) 36 | 1 (rand) 37 | 2 (str (UUID/randomUUID)))]) 38 | 39 | (def n 100000) 40 | 41 | (time 42 | (let [g (aa/add (aa/graph :jena-mini) (repeatedly n rand-triple)) 43 | g (graph/reify g "" (str (UUID/randomUUID)))] 44 | (def the-g g))) 45 | 46 | 47 | (def the-g nil) 48 | 49 | (time (count (graph/triples the-g)))) 50 | 51 | ;; Results: 100k triples (before reification) in a :jena-mini graph 52 | ;; cost about 1.1G of heap. 53 | 54 | ;; :simple is much cheaper, can fit about 1M 55 | ;; triples (before reification) in a 2GB data structure. 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /src/arachne/aristotle/validation.clj: -------------------------------------------------------------------------------- 1 | (ns arachne.aristotle.validation 2 | "Utils for returning inference validation errors in a consistent way" 3 | (:require [arachne.aristotle :as a] 4 | [arachne.aristotle.graph :as g] 5 | [arachne.aristotle.registry :as reg] 6 | [arachne.aristotle.query :as q]) 7 | (:import [org.apache.jena.reasoner InfGraph ValidityReport ValidityReport$Report])) 8 | 9 | 10 | (defn built-in 11 | "Validator which discovers any validation errors returned by the 12 | Reasoner itself" 13 | [^InfGraph graph] 14 | (let [r (.validate graph)] 15 | (if (.isValid r) 16 | [] 17 | (map (fn [^ValidityReport$Report r] 18 | {::error? (boolean (.isError r)) 19 | ::type :inference 20 | ::jena-type (.getType r) 21 | ::description (.getDescription r)}) 22 | (iterator-seq (.getReports r)))))) 23 | 24 | (let [q (q/build 25 | '[:filter (< ?actual ?expected) 26 | [:group [?c ?e ?p ?expected] [?actual (count ?val)] 27 | [:join 28 | [:disjunction 29 | [:bgp [?c :owl/cardinality ?expected]] 30 | [:bgp [?c :owl/minCardinality ?expected]]] 31 | [:conditional 32 | [:bgp 33 | [?c :owl/onProperty ?p] 34 | [?e :rdf/type ?c]] 35 | [:bgp [?e ?p ?val]]]]]])] 36 | 37 | (defn min-cardinality 38 | "Return a validation error for all entities that do not conform to any 39 | minCardinality restrictions on their parent classes. 40 | 41 | This validator is only correct when using Jena's Owl mini 42 | reasoner. The full reasoner uses minCardinality to infer the 43 | existence of blank nodes as values of a minCardinality property 44 | which while technically valid is not helpful for determining if 45 | something is logically missing." 46 | [g] 47 | (mapv (fn [[entity property expected actual]] 48 | {::error? true 49 | ::type ::min-cardinality 50 | ::description (format "Min-cardinality violation on %s. Expected at least %s distinct values for property %s, got %s" 51 | entity expected property actual) 52 | ::details {:entity entity 53 | :property property 54 | :expected expected 55 | :actual actual}}) 56 | (q/run g '[?e ?p ?expected ?actual] q)))) 57 | 58 | (defn validate 59 | "Validate the given graph, returning a sequence of validation errors 60 | or warnings. Always returns validation errors from the internal 61 | reaswoner's own consistency checks, as well as any additional 62 | validators provided. 63 | 64 | Custom validators are functions which take a graph and return a 65 | collection of maps, each representing a validation error or 66 | warining. 67 | 68 | Unlike the built-in validators, custom validators may peform 69 | arbitrary logic (i.e, perform validations such as minCardinality 70 | that require a closed-world reasoning model instaed of OWL's 71 | open-world default.)" 72 | ([g] (validate g [])) 73 | ([g validators] 74 | (mapcat #(% g) (conj validators built-in)))) 75 | -------------------------------------------------------------------------------- /test/arachne/aristotle/registry_test.clj: -------------------------------------------------------------------------------- 1 | (ns arachne.aristotle.registry-test 2 | (:require [clojure.test :refer :all] 3 | [arachne.aristotle.registry :as reg]) 4 | (:import [clojure.lang ExceptionInfo])) 5 | 6 | 7 | (reg/alias :mike "http://example.com/people/#mike") 8 | (deftest kw-registration 9 | (is (= "http://example.com/people/#mike" (reg/iri :mike)))) 10 | 11 | 12 | (reg/prefix 'foaf "http://xmlns.com/foaf/0.1/") 13 | (deftest direct-prefix-registration 14 | (is (= "http://xmlns.com/foaf/0.1/name" (reg/iri :foaf/name)))) 15 | 16 | (deftest conflicts 17 | (testing "Prefix registration conflicts" 18 | (reg/prefix :aa.bb "http://aa.bb.com/") 19 | (is (thrown-with-msg? ExceptionInfo #"namespace is already registered to a different prefix" 20 | (reg/prefix :aa.bb "http://something-else.com/"))) 21 | (is (thrown-with-msg? ExceptionInfo #"prefix is already registered with a different namespace" 22 | (reg/prefix :something.else "http://aa.bb.com/")))) 23 | (testing "Alias registration conflicts" 24 | (reg/alias :abc "http://abc.com/") 25 | (is (thrown-with-msg? ExceptionInfo #"Cannot alias" 26 | (reg/alias :abc "http://something-else.com/"))) 27 | (is (thrown-with-msg? ExceptionInfo #"Cannot alias" 28 | (reg/alias :something-else "http://abc.com/"))))) 29 | 30 | (deftest fails-on-unknown-kw 31 | (is (thrown-with-msg? ExceptionInfo #"Could not determine IRI" 32 | (reg/iri :foaf.bff/bff))) 33 | (is (thrown-with-msg? ExceptionInfo #"Could not determine IRI" 34 | (reg/iri :billy-bob)))) 35 | 36 | 37 | (reg/prefix :fizz.* "http://example.com/fizz/") 38 | (reg/prefix :fizz.buzz.* "http://example.com/fizz/buzz/") 39 | (reg/prefix :fizz.buzz.bazz "http://example.com/fizzbuzzbazz/") 40 | (reg/prefix :fizz.buzz.booz "http://example.com/fizzbuzzbooz/") 41 | 42 | (deftest ns-prefixes 43 | (is (= "http://example.com/fizz/test1" (reg/iri :fizz/test1))) 44 | (is (= "http://example.com/fizz/flop/test1" (reg/iri :fizz.flop/test1))) 45 | (is (= "http://example.com/fizz/buzz/florp/psst/test1" 46 | (reg/iri :fizz.buzz.florp.psst/test1))) 47 | (is (= "http://example.com/fizzbuzzbazz/test1" (reg/iri :fizz.buzz.bazz/test1)))) 48 | 49 | (deftest kw-generation 50 | (is (= :fizz/test1 (reg/kw "http://example.com/fizz/test1"))) 51 | (is (= :fizz.flop/test1 (reg/kw "http://example.com/fizz/flop/test1"))) 52 | (is (= :fizz.buzz.florp.psst/test1 53 | (reg/kw "http://example.com/fizz/buzz/florp/psst/test1"))) 54 | (is (= :fizz.buzz.bazz/test1 (reg/kw "http://example.com/fizzbuzzbazz/test1"))) 55 | (is (nil? (reg/kw "http://example.com/fizzbuzzbazz/test1/foo"))) 56 | (is (= :foaf/name (reg/kw "http://xmlns.com/foaf/0.1/name"))) 57 | (is (= :mike (reg/kw "http://example.com/people/#mike"))) 58 | (is (nil? (reg/kw "http://this-is-not-registered#foobar")))) 59 | 60 | 61 | (reg/prefix :flotsam "http://flotsam.com/") 62 | (reg/prefix :flotsam.jetsam "http://flotsam.com/jetsam#") 63 | (reg/prefix :flotsam.jetsam.yep.* "http://flotsam.com/jetsam.yep/") 64 | 65 | (deftest overlapping-generation 66 | (is (= "http://flotsam.com/foo" (reg/iri :flotsam/foo))) 67 | (is (= :flotsam/foo (reg/kw "http://flotsam.com/foo"))) 68 | 69 | (is (= "http://flotsam.com/jetsam#foo" (reg/iri :flotsam.jetsam/foo))) 70 | (is (= :flotsam.jetsam/foo (reg/kw "http://flotsam.com/jetsam#foo"))) 71 | 72 | (is (= "http://flotsam.com/jetsam.yep/yip/foo" (reg/iri :flotsam.jetsam.yep.yip/foo))) 73 | (is (= :flotsam.jetsam.yep.yip/foo (reg/kw "http://flotsam.com/jetsam.yep/yip/foo")))) 74 | -------------------------------------------------------------------------------- /src/arachne/aristotle.clj: -------------------------------------------------------------------------------- 1 | (ns arachne.aristotle 2 | "Primary API" 3 | (:require [arachne.aristotle.graph :as g] 4 | [arachne.aristotle.query :as q] 5 | [arachne.aristotle.registry :as reg] 6 | [arachne.aristotle.inference :as inf] 7 | [arachne.aristotle.rdf-edn] 8 | [clojure.java.io :as io]) 9 | (:import [org.apache.jena.reasoner.rulesys GenericRuleReasoner] 10 | [org.apache.jena.graph GraphMemFactory Graph GraphUtil] 11 | [org.apache.jena.riot RDFDataMgr] 12 | [org.apache.jena.riot Lang] 13 | [java.net URL] 14 | [java.io File]) 15 | (:refer-clojure :exclude [read])) 16 | 17 | (defmulti graph 18 | "Build a new, empty graph of the specified type. 19 | 20 | Built-in types are: 21 | 22 | :simple - A basic in-memory RDF graph with no reasoner. 23 | :jena-mini - Jena's partial implementation of OWL Full with 24 | an in-memory store.. 25 | :jena-rules - Jena's GenericRuleReasoner. Takes a second argument, 26 | which is a collection of rules to use (see 27 | arachne.aristotle.inference for tools to create 28 | rules and some pre-built rulesets.)" 29 | (fn [type & _] type)) 30 | 31 | (defmethod graph :simple 32 | [_] 33 | (GraphMemFactory/createGraphMem)) 34 | 35 | (defmethod graph :jena-mini 36 | [_] 37 | (graph :jena-rules inf/mini-rules)) 38 | 39 | ;; Note: You'll probably want to include the basic tabling rule to 40 | ;; avoid infinite lookups on recursive backchains 41 | 42 | (defmethod graph :jena-rules 43 | [_ initial-rules] 44 | (let [reasoner (GenericRuleReasoner. initial-rules)] 45 | (.setOWLTranslation reasoner true) 46 | (.setTransitiveClosureCaching reasoner true) 47 | (.bind reasoner (GraphMemFactory/createGraphMem)))) 48 | 49 | (defn add 50 | "Add the given data to a graph, returning the graph. Data must satisfy 51 | arachne.aristotle.graph/AsTriples. If the data is a Graph it will be 52 | added directly." 53 | [graph data] 54 | (if (instance? Graph data) 55 | (GraphUtil/addInto ^Graph graph ^Graph data) 56 | (GraphUtil/add ^Graph graph ^java.util.List (g/triples data))) 57 | graph) 58 | 59 | (defn read 60 | "Load a file containing serialized RDF data into a graph, returning 61 | the graph. The file may be specified using: 62 | 63 | - String URIs, 64 | - java.net.URI, 65 | - java.net.URL 66 | - java.io.File" 67 | [^Graph graph file] 68 | (cond 69 | (string? file) (RDFDataMgr/read ^Graph graph ^String file) 70 | (uri? file) (RDFDataMgr/read ^Graph graph ^String (str file)) 71 | (instance? java.net.URL file) (RDFDataMgr/read graph (str (.toURI ^URL file))) 72 | (instance? java.io.File file) (RDFDataMgr/read graph 73 | (-> ^File file 74 | (.getAbsoluteFile) 75 | (.toURI) 76 | (str)))) 77 | graph) 78 | 79 | 80 | 81 | (def formats {:csv Lang/CSV 82 | :jsonld Lang/JSONLD 83 | :jsonld11 Lang/JSONLD11 84 | :n3 Lang/N3 85 | :nquads Lang/NQUADS 86 | :ntriples Lang/NTRIPLES 87 | :rdfjson Lang/RDFJSON 88 | :null Lang/RDFNULL 89 | :rdfthrift Lang/RDFTHRIFT 90 | :rdfxml Lang/RDFXML 91 | :shaclc Lang/SHACLC 92 | :trig Lang/TRIG 93 | :trix Lang/TRIX 94 | :tsv Lang/TSV 95 | :ttl Lang/TTL 96 | :turtle Lang/TTL}) 97 | 98 | (defn write 99 | "Write the contents of a graph to a file using the specified serialization format." 100 | [^Graph graph file format] 101 | (if-let [lang (formats format)] 102 | (with-open [out (io/output-stream file)] 103 | (RDFDataMgr/write out graph lang)))) 104 | -------------------------------------------------------------------------------- /src/arachne/aristotle/inference.clj: -------------------------------------------------------------------------------- 1 | (ns arachne.aristotle.inference 2 | "Tools for adding additional inference rules to a graph. 3 | 4 | See https://jena.apache.org/documentation/inference/" 5 | (:require [clojure.spec.alpha :as s] 6 | [arachne.aristotle.graph :as g] 7 | [arachne.aristotle.registry :as reg]) 8 | (:import [org.apache.jena.graph Triple Node_Blank Node_Variable] 9 | [org.apache.jena.reasoner.rulesys Rule] 10 | [org.apache.jena.reasoner TriplePattern] 11 | [org.apache.jena.reasoner.rulesys 12 | RuleReasoner FBRuleReasoner OWLFBRuleReasoner Node_RuleVariable] 13 | [org.apache.jena.reasoner InfGraph ReasonerRegistry] 14 | [java.util List])) 15 | 16 | (def ^:private ^:dynamic *assignments*) 17 | 18 | (defn- find-or-assign 19 | "Find an existing var in the same rule, or construct a new one." 20 | [^Node_Variable n] 21 | (let [name (str "?" (.getName n)) 22 | [_ assignments] (swap! *assignments* 23 | (fn [[idx assignments :as val]] 24 | (if (get assignments name) 25 | val 26 | [(inc idx) 27 | (assoc assignments name 28 | (Node_RuleVariable. name idx))])))] 29 | (get assignments name))) 30 | 31 | (defn- sub 32 | "Substitute a general RDF node for the type that should be used in a 33 | TriplePattern as part of a rule." 34 | [node] 35 | (cond 36 | (instance? Node_Blank node) (Node_RuleVariable/WILD) 37 | (instance? Node_Variable node) (find-or-assign node) 38 | :else node)) 39 | 40 | (defn- pattern 41 | [triples] 42 | (for [^Triple t (g/triples triples)] 43 | (TriplePattern. (sub (.getSubject t)) 44 | (sub (.getPredicate t)) 45 | (sub (.getObject t))))) 46 | 47 | (defn- coll-of? [class coll] 48 | (and (seqable? coll) 49 | (every? #(instance? class %) (seq coll)))) 50 | 51 | (defn- extract 52 | "Return a map of variable assignments used in the given object." 53 | [val] 54 | (cond 55 | (instance? Node_RuleVariable val) (if (= Node_RuleVariable/WILD val) 56 | {} 57 | {(.getName ^Node_RuleVariable val) val}) 58 | (instance? TriplePattern val) (merge (extract (.getSubject ^TriplePattern val)) 59 | (extract (.getPredicate ^TriplePattern val)) 60 | (extract (.getObject ^TriplePattern val))) 61 | (instance? Rule val) (apply merge (map extract (concat (.getHead ^Rule val) (.getBody ^Rule val)))) 62 | :else {})) 63 | 64 | (defn add 65 | "Given a graph, return a new graph with a reasoner including the given 66 | rules. This may be expensive, given that it rebuilds the reasoner 67 | for the entire graph." 68 | [^InfGraph g rules] 69 | (let [reasoner ^FBRuleReasoner (.getReasoner g)] 70 | (.addRules reasoner ^List rules) 71 | (.bind reasoner (.getRawGraph g)))) 72 | 73 | (defn rule 74 | "Create an implication rule. Takes the following keyword args: 75 | 76 | :name - name of the rule 77 | :body - The premesis, or left-hand-side of a rule. Specified as a 78 | data pattern using the `arachne.aristotle.graph/AsTriples` 79 | protocol. 80 | :head - the consequent, or right-hand-side of a rule. May be a data 81 | pattern or an instance of Rule. 82 | :dir - :forward if the rule is a forward-chaining rule, or :back for 83 | a backward-chaining rule. Defaults to :back." 84 | [& {:keys [^String name body head dir]}] 85 | (binding [*assignments* (let [vars (extract head)] 86 | (atom [(count vars) vars]))] 87 | (let [^List head (if (instance? Rule head) 88 | [head] 89 | (pattern head)) 90 | ^List body (pattern body)] 91 | (doto (Rule. name head body) 92 | (.setBackward (not (= :forward dir))) 93 | (.setNumVars (count (second @*assignments*))))))) 94 | 95 | (def owl-rules 96 | "The maximal set of OWL rules supported by Jena" 97 | (.getRules ^RuleReasoner (ReasonerRegistry/getOWLReasoner))) 98 | 99 | (def mini-rules 100 | "The OWL rules supported by Jena's mini Reasoner" 101 | (.getRules ^RuleReasoner (ReasonerRegistry/getOWLMiniReasoner))) 102 | 103 | (def table-all 104 | "Rule that calls the built in TableAll directive. This usually 105 | desirable, to prevent infinite circular backwards inferences." 106 | (Rule/parseRule "-> tableAll().")) 107 | -------------------------------------------------------------------------------- /test/arachne/aristotle/graph_test.clj: -------------------------------------------------------------------------------- 1 | (ns arachne.aristotle.graph-test 2 | (:require [clojure.test :refer :all] 3 | [arachne.aristotle :as ar] 4 | [arachne.aristotle.registry :as reg] 5 | [arachne.aristotle.graph :as graph] 6 | [arachne.aristotle.query :as q] 7 | [clojure.java.io :as io] 8 | [clojure.edn :as edn])) 9 | 10 | (reg/prefix 'foaf "http://xmlns.com/foaf/0.1/") 11 | (reg/prefix 'test "http://example.com/test/") 12 | (reg/prefix 'arachne "http://arachne-framework.org/#") 13 | 14 | (deftest nested-card-many 15 | (let [data [{:rdf/about :test/jane 16 | :foaf/name "Jane" 17 | :foaf/knows [{:rdf/about :test/bill 18 | :arachne/name "Bill"} 19 | {:rdf/about :test/nicole 20 | :arachne/name "Nicole"}]}] 21 | triples (graph/triples data)] 22 | (is (= 5 (count triples))))) 23 | 24 | (deftest load-rdf-edn 25 | (let [g (ar/read (ar/graph :simple) (io/resource "sample.rdf.edn"))] 26 | (is (= #{["Jim"]} 27 | (q/run g '[?name] 28 | '[:bgp 29 | ["" :foaf/knows ?person] 30 | [?person :foaf/name ?name]]))))) 31 | 32 | (defn- entity-with-name 33 | [data name] 34 | (ffirst 35 | (q/run 36 | (ar/add (ar/graph :simple) data) 37 | '[?p] 38 | '[:bgp [?p :foaf/name ?name]] 39 | {'?name name}))) 40 | 41 | (deftest inline-prefix-test 42 | (let [str "[#rdf/prefix [:foo \"http://foo.com/#\"] 43 | {:rdf/about :foo/luke 44 | :foaf/name \"Luke\"}]" 45 | data (edn/read-string {:readers *data-readers*} str)] 46 | (is (= "" (entity-with-name data "Luke"))))) 47 | 48 | (deftest global-prefix-test 49 | (testing "initial usage" 50 | (let [str "[#rdf/global-prefix [:baz \"http://baz.com/#\"] 51 | {:rdf/about :baz/luke 52 | :foaf/name \"Luke\"}]" 53 | data (edn/read-string {:readers *data-readers*} str)] 54 | (is (= :baz/luke (entity-with-name data "Luke"))))) 55 | (testing "subsequent usage" 56 | (let [data {:rdf/about :baz/jim 57 | :foaf/name "Jim"}] 58 | (is (= :baz/jim (entity-with-name data "Jim")))) 59 | (is (reg/prefix :baz "http://baz.com/#"))) 60 | (testing "conflict" 61 | (is (thrown-with-msg? Exception #"namespace is already registered" 62 | (reg/prefix :baz "http://bazbazbaz.com/#"))) 63 | (is (thrown-with-msg? Exception #"namespace is already registered" 64 | (edn/read-string {:readers *data-readers*} 65 | "#rdf/global-prefix [:baz \"http://bazbazbaz.com/#\"]"))))) 66 | 67 | (reg/prefix :ex "http://example2.com") 68 | 69 | (deftest symbol-type-test 70 | (let [data [{:rdf/about :ex/luke 71 | :ex/ctor 'foo.bar/biz}]] 72 | (is (= #{['foo.bar/biz]} 73 | (q/run 74 | (ar/add (ar/graph :simple) data) 75 | '[?ctor] 76 | '[:bgp [:ex/luke :ex/ctor ?ctor]]))))) 77 | 78 | (deftest reverse-keyword-test 79 | (let [data [{:rdf/about :test/luke 80 | :foaf/_knows :test/jon} 81 | {:rdf/about :test/hannah 82 | :foaf/_knows [{:rdf/about :test/luke}]}] 83 | g (ar/add (ar/graph :simple) data)] 84 | (is (= #{[:test/jon :test/luke] 85 | [:test/luke :test/hannah]} 86 | (q/run g '[?a ?b] 87 | '[:bgp [?a :foaf/knows ?b]]))))) 88 | 89 | (deftest empty-map-vals 90 | (let [data [{:rdf/about :test/luke 91 | :rdf/name "Luke" 92 | :foaf/knows [nil]}]] 93 | (is (= 1 (count (graph/triples data)))))) 94 | 95 | (deftest rdf-linked-lists 96 | "Convert Clojure lists to RDF linked lists" 97 | (let [data [{:rdf/about :test/race 98 | :test/starters (graph/rdf-list [:test/luke :test/stuart :test/joe])}] 99 | g (ar/add (ar/graph :simple) data)] 100 | (testing "RDF list triples" 101 | (is (not (empty? 102 | (q/run g '[?l0 ?l1 ?l2] 103 | '[:bgp 104 | [:test/race :test/starters ?l0] 105 | [?l0 :rdf/first :test/luke] 106 | [?l0 :rdf/rest ?l1] 107 | [?l1 :rdf/first :test/stuart] 108 | [?l1 :rdf/rest ?l2] 109 | [?l2 :rdf/first :test/joe] 110 | [?l2 :rdf/rest :rdf/nil]]))))))) 111 | 112 | (deftest rdf-containers 113 | "Conver Clojure data to RDF containers" 114 | (let [data [{:rdf/about :test/groceries 115 | :test/contents (graph/rdf-bag [:test/apple :test/banana :test/orange])}] 116 | g (ar/add (ar/graph :simple) data)] 117 | (testing "RDF bag triples" 118 | (is (not (empty? 119 | (q/run g '[?l0 ?l1 ?l2] 120 | '[:bgp 121 | [:test/groceries :test/contents ?bag] 122 | [?bag :rdf/_0 :test/apple] 123 | [?bag :rdf/_1 :test/banana] 124 | [?bag :rdf/_2 :test/orange]]))))))) 125 | 126 | -------------------------------------------------------------------------------- /test/arachne/aristotle/inference_test.clj: -------------------------------------------------------------------------------- 1 | (ns arachne.aristotle.inference-test 2 | (:require [clojure.test :refer :all] 3 | [arachne.aristotle :as aa] 4 | [arachne.aristotle.registry :as reg] 5 | [arachne.aristotle.inference :as inf] 6 | [arachne.aristotle.query :as q] 7 | [clojure.java.io :as io])) 8 | 9 | (reg/prefix 'daml "http://www.daml.org/2001/03/daml+oil#") 10 | (reg/prefix 'wo.tf "http://www.workingontologist.org/Examples/Chapter6/TheFirm.owl#") 11 | (reg/prefix :arachne "http://arachne-framework.org/#") 12 | 13 | (reg/prefix (ns-name *ns*) "http://example.com/#") 14 | 15 | (deftest basic-type-inference 16 | (let [g (aa/read (aa/graph :jena-mini) 17 | (io/resource "TheFirm.n3")) 18 | gls #{[:wo.tf/Goldman] 19 | [:wo.tf/Long] 20 | [:wo.tf/Spence]} 21 | withsmith (conj gls [:arachne/Smith]) 22 | ppl-query '[:bgp 23 | [?person :rdf/type :wo.tf/Person]] 24 | worksfor-query '[:bgp 25 | [?person :wo.tf/worksFor :wo.tf/TheFirm]]] 26 | (is (= gls (set (q/run g '[?person] ppl-query)))) 27 | (is (= gls (set (q/run g '[?person] worksfor-query)))) 28 | (let [g (aa/add g {:rdf/about :arachne/Smith 29 | :wo.tf/freeLancesTo :wo.tf/TheFirm})] 30 | (is (= withsmith (set (q/run g '[?person] ppl-query)))) 31 | (is (= withsmith (set (q/run g '[?person] worksfor-query))))))) 32 | 33 | (def pres-props 34 | [{:rdf/about :wo.tf/president 35 | :owl/class :owl/FunctionalProperty 36 | :rdfs/domain :wo.tf/Company 37 | :rdfs/range :wo.tf/Person 38 | :owl/inverseOf :wo.tf/presidentOf} 39 | {:rdf/about :wo.tf/presidentOf 40 | :rdfs/subPropertyOf :wo.tf/isEmployedBy} 41 | {:rdf/about :wo.tf/TheFirm 42 | :wo.tf/president :wo.tf/Flint}]) 43 | 44 | (deftest inverse-properties 45 | (let [g (aa/read (aa/graph :jena-mini) (io/resource "TheFirm.n3")) 46 | g (aa/add g pres-props)] 47 | (is 48 | (= #{[:wo.tf/TheFirm]} 49 | (q/run g '[?firm] 50 | '[:bgp 51 | [:wo.tf/Flint :wo.tf/worksFor ?firm]]))))) 52 | 53 | (def custom-ruleset 54 | [(inf/rule :body '[[?thing :arachne/eats ?food] 55 | [?food :rdf/type :arachne/Animal]] 56 | :head '[[?thing :arachne/carnivore true]])]) 57 | 58 | (deftest custom-rules 59 | (let [g (aa/add (aa/graph :jena-rules (concat inf/owl-rules custom-ruleset)) 60 | [{:rdf/about :arachne/leo 61 | :arachne/name "Leo" 62 | :arachne/eats :arachne/jumper} 63 | {:rdf/about :arachne/jumper 64 | :rdf/type :arachne/Gazelle} 65 | {:rdf/about :arachne/Gazelle 66 | :rdfs/subClassOf :arachne/Animal}])] 67 | (is (= #{[:arachne/leo]} 68 | (q/run g '[?e] '[:bgp 69 | [?e :arachne/carnivore true]]))))) 70 | 71 | (deftest functional-properties 72 | (let [g (aa/add (aa/graph :jena-mini) 73 | [{:rdf/about :arachne/legalSpouse 74 | :rdf/type [:owl/ObjectProperty :owl/FunctionalProperty] 75 | :rdfs/domain :arachne/Person 76 | :rdfs/range :arachne/Person} 77 | {:rdf/about :arachne/jon 78 | :arachne/name "John" 79 | :arachne/legalSpouse [{:rdf/about :arachne/will 80 | :arachne/name "William"}]} 81 | {:rdf/about :arachne/jon 82 | :arachne/legalSpouse [{:rdf/about :arachne/bill 83 | :arachne/name "Bill"}]}])] 84 | 85 | (is (= #{[:arachne/will] [:arachne/bill]} 86 | (q/run g '[?b] 87 | '[:bgp 88 | [?b :arachne/name "William"] 89 | [?b :arachne/name "Bill"]]))))) 90 | 91 | (reg/prefix :foaf "http://xmlns.com/foaf/0.1/") 92 | (reg/prefix :dc "http://purl.org/dc/elements/1.1/") 93 | 94 | (deftest custom-forward-rules 95 | (let [inverse-rule (inf/rule :body '[?p :owl/inverseOf ?q] 96 | :head (inf/rule :body '[?y ?q ?x] 97 | :head '[?x ?p ?y]) 98 | :dir :forward) 99 | knows-rule (inf/rule :body '[[?a :foaf/made ?work] 100 | [?b :foaf/made ?work]] 101 | :head '[?a :foaf/knows ?b]) 102 | g (aa/graph :jena-rules [inf/table-all inverse-rule knows-rule]) 103 | g (aa/read g (io/resource "foaf.rdf")) 104 | g (aa/add g [{:rdf/about ::practical-clojure 105 | :dc/title "Practical Clojure" 106 | :foaf/maker [::luke 107 | ::stuart]}])] 108 | (is (= #{[::stuart]} 109 | (q/run g '[?s] 110 | '[:filter (not= ::luke ?s) 111 | [:bgp [::luke :foaf/knows ?s]]]))))) 112 | 113 | (deftest dynamic-rules 114 | (let [g (aa/graph :jena-rules []) 115 | g (aa/read g (io/resource "foaf.rdf")) 116 | g (aa/add g [{:rdf/about ::practical-clojure 117 | :dc/title "Practical Clojure" 118 | :foaf/maker [::luke 119 | ::stuart]}]) 120 | g (inf/add g inf/mini-rules)] 121 | (q/run g '[?a] 122 | '[:bgp [?a :rdf/type :foaf/Agent]]))) 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /src/arachne/aristotle/query.clj: -------------------------------------------------------------------------------- 1 | (ns arachne.aristotle.query 2 | (:require [arachne.aristotle.registry :as reg] 3 | [arachne.aristotle.query.compiler :as qc] 4 | [arachne.aristotle.graph :as graph] 5 | [arachne.aristotle.query.spec :as qs] 6 | [clojure.spec.alpha :as s] 7 | [clojure.walk :as w]) 8 | (:import [org.apache.jena.query QueryFactory QueryExecutionFactory] 9 | [org.apache.jena.sparql.algebra AlgebraGenerator Algebra OpAsQuery Op] 10 | [org.apache.jena.sparql.algebra.op OpProject Op1 OpSequence] 11 | [com.sun.org.apache.xpath.internal.operations Mod] 12 | [org.apache.jena.graph Graph Triple Node] 13 | [org.apache.jena.sparql.engine.binding Binding])) 14 | 15 | 16 | (s/def ::run-args (s/cat :graph #(instance? Graph %) 17 | :bindings (s/? (s/coll-of ::graph/variable)) 18 | :query (s/or :op #(instance? Op %) 19 | :query ::qs/operation) 20 | :data (s/? ::qs/bindings))) 21 | 22 | (defn build 23 | "Build a Jena Operation object from the given query, represented as a 24 | Clojure data structure" 25 | [query] 26 | (let [op (qc/op query) 27 | op (Algebra/optimize op)] 28 | op)) 29 | 30 | (defn- bind-data 31 | "Wrap the given operation in an OpTable, establishing initial 32 | bindings for the vars in the data map." 33 | [op data] 34 | (OpSequence/create 35 | (qc/build-table data) 36 | op)) 37 | 38 | (defn- project 39 | "Wrap the operation in a projection over the specified vars." 40 | [op binding-vars] 41 | (OpProject. op (qc/var-seq binding-vars))) 42 | 43 | 44 | (defn run 45 | "Given a graph and a query (which may be either a precompiled instance 46 | of org.apache.sparql.algebra.Op, or a Query data structure), execute 47 | the query and return results. 48 | 49 | Results will be returned as a sequence of maps of variable bindings, 50 | unless an optional binding vector is passed as the first 51 | argument. If it is, results are returned as a set of vectors. 52 | 53 | Takes an optional final argument which is a map of initial variable 54 | bindings. This is how parameterized inputs are passed into the 55 | query." 56 | [& args] 57 | (let [{:keys [bindings graph query data] :as r} (s/conform ::run-args args) 58 | _ (when (= r ::s/invalid) (s/assert* ::run-args args)) 59 | operation (if (= :op (first query)) 60 | (second query) 61 | (build (w/prewalk identity (s/unform ::qs/operation (second query))))) 62 | data (when data (map second data)) 63 | operation (if data (bind-data operation data) operation) 64 | binding-vars (when bindings (qc/var-seq bindings)) 65 | operation (if binding-vars (project operation binding-vars) operation) 66 | result-seq (iterator-seq (Algebra/exec ^Op operation ^Graph graph))] 67 | (if binding-vars 68 | (into #{} (map (fn [^Binding binding] 69 | (mapv #(graph/data (.get binding %)) binding-vars)) 70 | result-seq)) 71 | (mapv (fn [^Binding binding] 72 | (into {} 73 | (for [var (iterator-seq (.vars binding))] 74 | [(graph/data var) (graph/data (.get binding var))]))) 75 | result-seq)))) 76 | 77 | (defn sparql 78 | "Return a SPARQL query string for the given Jena Operation (as returned from `build`). 79 | Useful mostly for debugging." 80 | [op] 81 | (OpAsQuery/asQuery op)) 82 | 83 | (defn parse 84 | "Parse a SPARQL query string into a Jena Operation" 85 | [^String query-str] 86 | (let [q (QueryFactory/create query-str)] 87 | (-> (AlgebraGenerator.) 88 | (.compile q) 89 | (Algebra/optimize)))) 90 | 91 | 92 | (s/def ::pull-pattern 93 | (s/coll-of ::pull-attr :min-count 1)) 94 | 95 | (s/def ::pull-attr 96 | (s/or :wildcard #{'*} 97 | :attr-name ::graph/iri 98 | :map ::pull-map)) 99 | 100 | (s/def ::pull-map 101 | (s/map-of ::graph/iri (s/or :pattern ::pull-pattern 102 | :recur #{'...} 103 | :recur-n int?) 104 | :conform-keys true)) 105 | 106 | (s/conform ::pull-pattern [:a/b '* :c/d {:foo/bar [:x/y]}]) 107 | 108 | (def ^:private pull-q 109 | (build '[:conditional 110 | [:bgp [?subj ?pred ?obj]] 111 | [:sequence 112 | [:bgp 113 | [?subj :rdf/type ?class] 114 | [?class :owl/onProperty ?pred]] 115 | [:disjunction 116 | [:bgp [?class :owl/cardinality 1]] 117 | [:bgp [?class :owl/maxCardinality 1]]]]])) 118 | 119 | (declare pull*) 120 | 121 | (defn- compile-pattern 122 | "Compile a pattern to a function." 123 | [pattern] 124 | (let [pattern (set pattern) 125 | wild? (pattern '*) 126 | keys (->> pattern 127 | (mapcat #(cond (map? %) (keys %) 128 | (= '* %) [] 129 | :else [%])) 130 | set) 131 | subs (->> pattern 132 | (filter map?) 133 | (apply merge) 134 | (map (fn [[k v]] 135 | (when (vector? v) [k (compile-pattern v)]))) 136 | (into {})) 137 | limits (->> pattern 138 | (filter map?) 139 | (apply merge) 140 | (map (fn [[k v]] 141 | (cond 142 | (int? v) [k v] 143 | (= '... v) [k Long/MAX_VALUE] 144 | :else nil))) 145 | (into {}))] 146 | (fn parse-val [graph depth pred val] 147 | (when (or wild? (keys pred)) 148 | (let [subparser (subs pred) 149 | limit (limits pred)] 150 | (cond 151 | subparser (pull* graph val subparser 0) 152 | limit (if (<= limit depth) 153 | val 154 | (pull* graph val parse-val (inc depth))) 155 | :else val)))))) 156 | 157 | (defn- pull* 158 | [graph subject parse-val depth] 159 | (let [results (run graph pull-q {'?subj subject})] 160 | (when-not (empty? results) 161 | (reduce (fn [acc {card1 '?class, pred '?pred, val '?obj}] 162 | (if-let [val (parse-val graph depth pred val)] 163 | (if card1 164 | (assoc acc pred val) 165 | (update acc pred (fnil conj #{}) val)) 166 | acc)) 167 | {:rdf/about subject} results)))) 168 | 169 | (defn pull 170 | "Get all the properties associated with a subject using syntax similar 171 | to Datomic's Pull. 172 | 173 | Cardinality-1 properties will be returned as single values, 174 | otherwise property values will be wrapped in sets. Graph must 175 | support OWL inferencing to make this determination." 176 | [graph subject pattern] 177 | (pull* graph subject (compile-pattern pattern) 0)) 178 | -------------------------------------------------------------------------------- /test/arachne/aristotle/validation_test.clj: -------------------------------------------------------------------------------- 1 | (ns arachne.aristotle.validation-test 2 | (:require [clojure.test :refer :all] 3 | [arachne.aristotle :as aa] 4 | [arachne.aristotle.registry :as reg] 5 | [arachne.aristotle.validation :as v] 6 | [clojure.java.io :as io])) 7 | 8 | (reg/prefix 'daml "http://www.daml.org/2001/03/daml+oil#") 9 | (reg/prefix 'wo.tf "http://www.workingontologist.org/Examples/Chapter6/TheFirm.owl#") 10 | (reg/prefix 'arachne "http://arachne-framework.org/#") 11 | 12 | (deftest disjoint-classes 13 | (let [g (aa/read (aa/graph :jena-mini) (io/resource "TheFirm.n3")) 14 | g (aa/add g {:rdf/about :wo.tf/TheFirm 15 | :wo.tf/freeLancesTo :wo.tf/TheFirm})] 16 | 17 | (is (empty? (v/validate g))) 18 | 19 | 20 | 21 | (let [g (aa/add g {:rdf/about :wo.tf/Company 22 | :owl/disjointWith :wo.tf/Person}) 23 | errors (v/validate g)] 24 | (is (= 2 (count errors))) 25 | (is (re-find #"disjoint" (::v/description (first errors)))) 26 | (is (re-find #"same and different" (::v/description (second errors))))))) 27 | 28 | (deftest functional-object-properties 29 | (let [g (aa/add (aa/graph :jena-mini) 30 | [{:rdf/about :arachne/legalSpouse 31 | :rdf/type [:owl/ObjectProperty :owl/FunctionalProperty] 32 | :rdfs/domain :arachne/Person 33 | :rdfs/range :arachne/Person} 34 | {:rdf/about :arachne/jon 35 | :arachne/name "John" 36 | :arachne/legalSpouse [{:rdf/about :arachne/will 37 | :arachne/name "William"}]} 38 | {:rdf/about :arachne/jon 39 | :arachne/legalSpouse [{:rdf/about :arachne/bill 40 | :arachne/name "Bill" 41 | :owl/differentFrom :arachne/will}]}])] 42 | (let [errors (v/validate g)] 43 | (is (not (empty? errors))) 44 | (is (some #(re-find #"too many values" (::v/jena-type %)) errors))))) 45 | 46 | (deftest functional-datatype-properties 47 | (let [g (aa/add (aa/graph :jena-mini) 48 | [{:rdf/about :arachne/name 49 | :rdf/type [:owl/DatatypeProperty :owl/FunctionalProperty] 50 | :rdfs/domain :arachne/Person 51 | :rdfs/range :xsd/string} 52 | {:rdf/about :arachne/jon 53 | :arachne/name #{"John" "Jeff"}}])] 54 | (let [errors (v/validate g)] 55 | (is (not (empty? errors))) 56 | (is (some #(re-find #"too many values" (::v/jena-type %)) errors))))) 57 | 58 | (deftest max-cardinality-datatype 59 | (let [g (aa/add (aa/graph :jena-mini) 60 | [{:rdf/about :arachne/Person 61 | :rdfs/subClassOf {:rdf/type :owl/Restriction 62 | :owl/onProperty :arachne/name 63 | :owl/maxCardinality 2}} 64 | {:rdf/about :arachne/name 65 | :rdf/type [:owl/DatatypeProperty] 66 | :rdfs/domain :arachne/Person 67 | :rdfs/range :xsd/string} 68 | {:rdf/about :arachne/jon 69 | :arachne/name #{"John" "Jeff" "James"}}])] 70 | (let [errors (v/validate g)] 71 | (is (not (empty? errors))) 72 | (is (some #(re-find #"too many values" (::v/jena-type %)) errors))))) 73 | 74 | (deftest max-cardinality-object 75 | (testing "max 1" 76 | (let [g (aa/add (aa/graph :jena-mini) 77 | [{:rdf/about :arachne/Person 78 | :rdfs/subClassOf {:rdf/type :owl/Restriction 79 | :owl/onProperty :arachne/friends 80 | :owl/maxCardinality 1}} 81 | {:rdf/about :arachne/friends 82 | :rdf/type [:owl/ObjectProperty] 83 | :rdfs/domain :arachne/Person 84 | :rdfs/range :arachne/Person} 85 | {:rdf/about :arachne/jon 86 | :arachne/name "John" 87 | :arachne/friends #{{:rdf/about :arachne/jeff 88 | :arachne/name "Jeff" 89 | :owl/differentFrom :arachne/jim} 90 | {:rdf/about :arachne/jim 91 | :arachne/name "James"}}}])] 92 | (let [errors (v/validate g)] 93 | (is (not (empty? errors))) 94 | (is (some #(re-find #"too many values" (::v/jena-type %)) errors)))) 95 | (testing "max N" 96 | (let [g (aa/add (aa/graph :jena-mini) 97 | [{:rdf/about :arachne/Person 98 | :rdfs/subClassOf {:rdf/type :owl/Restriction 99 | :owl/onProperty :arachne/friends 100 | :owl/maxCardinality 2}} 101 | {:rdf/about :arachne/friends 102 | :rdf/type [:owl/ObjectProperty] 103 | :rdfs/domain :arachne/Person 104 | :rdfs/range :arachne/Person} 105 | {:rdf/about :arachne/jon 106 | :arachne/name "John" 107 | :arachne/friends #{{:rdf/about :arachne/jeff 108 | :arachne/name "Jeff" 109 | :owl/differentFrom [:arachne/jim :arachne/sara]} 110 | {:rdf/about :arachne/jim 111 | :arachne/name "James" 112 | :owl/differentFrom [:arachne/sara :arachne/jeff]} 113 | {:rdf/about :arachne/sara 114 | :arachne/name "Sarah" 115 | :owl/differentFrom [:arachne/jim :arachne/jeff]}}}])] 116 | 117 | (let [errors (v/validate g)] 118 | ;; The reasoner doesn't support this currently and there isn't a 119 | ;; great way to write a query, so we'll do without 120 | (is (empty? errors))))))) 121 | 122 | (deftest min-cardinality 123 | (let [schema [{:rdf/about :arachne/Person 124 | :rdfs/subClassOf [{:rdf/type :owl/Restriction 125 | :owl/onProperty :arachne/name 126 | :owl/cardinality 1} 127 | {:rdf/type :owl/Restriction 128 | :owl/onProperty :arachne/friends 129 | :owl/minCardinality 2}]} 130 | {:rdf/about :arachne/name 131 | :rdf/type [:owl/DatatypeProperty] 132 | :rdfs/domain :arachne/Person 133 | :rdfs/range :xsd/string} 134 | {:rdf/about :arachne/friends 135 | :rdf/type [:owl/ObjectProperty] 136 | :rdfs/domain :arachne/Person 137 | :rdfs/range :arachne/Person}]] 138 | 139 | (let [g (-> (aa/graph :jena-mini) (aa/add schema) 140 | (aa/add [{:rdf/about :arachne/jon 141 | :arachne/name "John" 142 | :arachne/friends {:rdf/about :arachne/nicole}}]))] 143 | 144 | (let [errors (v/validate g [v/min-cardinality])] 145 | (is (= 3 (count errors))) 146 | (is (= #{::v/min-cardinality} (set (map ::v/type errors)))) 147 | (is (= {:arachne/name 1 148 | :arachne/friends 2} (frequencies 149 | (map (comp :property ::v/details) errors)))) 150 | (is (= {:arachne/jon 1 151 | :arachne/nicole 2} (frequencies 152 | (map (comp :entity ::v/details) errors)))))))) 153 | -------------------------------------------------------------------------------- /src/arachne/aristotle/registry.clj: -------------------------------------------------------------------------------- 1 | (ns arachne.aristotle.registry 2 | "Tools for mapping between IRIs and keywords" 3 | (:require [clojure.string :as str]) 4 | (:refer-clojure :exclude [find alias]) 5 | (:import [org.apache.jena.rdf.model.impl Util] 6 | [clojure.lang ExceptionInfo])) 7 | 8 | ;; Note: it would potentially be more performant to use a trie or 9 | ;; prefix tree instead of a normal map for the inverse prefix 10 | ;; tree. Punting until it becomes a problem. 11 | 12 | (defonce ^:dynamic *registry* {:prefixes {} 13 | :prefixes' {} 14 | :aliases {} 15 | :aliases' {}}) 16 | 17 | (defn- tokenize-ns [kw] 18 | (str/split (namespace kw) #"\.")) 19 | 20 | 21 | (defn- by-prefix 22 | "Return the IRI for a namespace matching a prefix in the registry tree." 23 | [registry kw] 24 | (when (namespace kw) 25 | (loop [registry registry 26 | [segment & more-segments] (tokenize-ns kw)] 27 | (when-let [match (get registry segment)] 28 | (cond 29 | (empty? more-segments) (cond 30 | (::= match) (str (::= match) (name kw)) 31 | (get match "*") (str (get match "*") (name kw)) 32 | :else nil) 33 | (get match (first more-segments)) (recur match more-segments) 34 | (contains? match "*") (str (get match "*") 35 | (str/join "/" more-segments) 36 | (when-not (empty? more-segments) "/") (name kw))))))) 37 | 38 | (defn- longest-prefix 39 | "Find the longest matching substring" 40 | [prefix-list s] 41 | (reduce (fn [curr prefix] 42 | (if (str/starts-with? s prefix) 43 | (if curr 44 | (if (< (count curr) (count prefix)) 45 | prefix 46 | curr) 47 | prefix) 48 | curr)) 49 | nil prefix-list)) 50 | 51 | (defn- lookup-prefix 52 | "Construct a keyword from an IRI using the prefix tree, returns nil if not possible." 53 | [registry iri] 54 | (when-let [prefix (longest-prefix (keys registry) iri)] 55 | (let [fragment (subs iri (count prefix)) 56 | fragment-seq (str/split fragment #"/") 57 | registration (get registry prefix) 58 | wild? (= "*" (last registration))] 59 | (if (not wild?) 60 | (when (= 1 (count fragment-seq)) (keyword (str/join "." registration) fragment)) 61 | (keyword 62 | (str/join "." (concat (drop-last registration) 63 | (drop-last fragment-seq))) 64 | (last fragment-seq)))))) 65 | 66 | (defn iri 67 | "Given a keyword, build a corresponding IRI, throwing an exception if this is not possible." 68 | [kw] 69 | (or (-> *registry* :aliases (get kw)) 70 | (by-prefix (:prefixes *registry*) kw) 71 | (throw (ex-info (format "Could not determine IRI for %s, no namespace, namespace prefix or alias found." kw) 72 | {:keyword kw})))) 73 | 74 | (defn kw 75 | "Return a keyword representing the given IRI. Returns nil if no matching 76 | keyword or namespace could be found in the registry." 77 | [iri] 78 | (or (-> *registry* :aliases' (get iri)) 79 | (lookup-prefix (:prefixes' *registry*) iri) 80 | nil)) 81 | 82 | (defn- assoc-in-uniquely 83 | "Like assoc-in, but (if prevent-overrides is true) throws an exception instead of overwriting an existing value" 84 | [m prevent-overrides? ks v] 85 | (update-in m ks (fn [e] 86 | (when (and prevent-overrides? e (not= e v)) 87 | (throw (ex-info "Mapping conflict" {::existing e}))) 88 | v))) 89 | 90 | 91 | ;; TODO: it shouldn't be possible to conflict with a non-wildcard 92 | ;; registration. The two can coexist. 93 | 94 | ;; TODO: we need to store non-wildcard registrations as a distinct map form. Not the same as a wildcard, but indicated somehow other than a non-associable form. 95 | 96 | (defn- throw-conflicting-prefix 97 | [registry namespace prefix existing] 98 | (throw (ex-info (format "Could not register namespace `%s` to IRI prefix `%s`: namespace is already registered to a different prefix, `%s`." 99 | namespace prefix existing) 100 | {:registry registry 101 | :namespace namespace 102 | :prefix prefix 103 | :existing existing}))) 104 | 105 | (defn- throw-conflicting-namespace 106 | [registry namespace prefix existing] 107 | (throw (ex-info (format "Could not register namespace `%s` to IRI prefix `%s`: IRI prefix is already registered with a different namespace (`%s`)." 108 | namespace prefix existing) 109 | {:registry registry 110 | :namespace namespace 111 | :prefix prefix 112 | :existing existing}))) 113 | 114 | (defn add-prefix 115 | "Return an updated registry map with the given prefix mapping." 116 | [registry prevent-overrides? namespace prefix] 117 | (let [segments (vec (str/split (name namespace) #"\.")) 118 | registry (try 119 | (update registry :prefixes assoc-in-uniquely prevent-overrides? 120 | (if (= "*" (last segments)) segments (conj segments ::=)) prefix) 121 | (catch ExceptionInfo e 122 | (if-let [existing (::existing (ex-data e))] 123 | (throw-conflicting-prefix registry (name namespace) prefix existing) 124 | (throw e)))) 125 | registry (try 126 | (update registry :prefixes' assoc-in-uniquely prevent-overrides? [prefix] segments) 127 | (catch ExceptionInfo e 128 | (if-let [existing (::existing (ex-data e))] 129 | (throw-conflicting-namespace registry (name namespace) prefix 130 | (str/join "." existing)) 131 | (throw e))))] 132 | registry)) 133 | 134 | (defn add-alias 135 | [registry prevent-overrides? kw iri] 136 | (try 137 | (-> registry 138 | (update :aliases assoc-in-uniquely prevent-overrides? [kw] iri) 139 | (update :aliases' assoc-in-uniquely prevent-overrides? [iri] kw)) 140 | (catch ExceptionInfo e 141 | (if-let [existing (::existing (ex-data e))] 142 | (throw (ex-info (format "Cannot alias `%s` to `%s`, already mapped to `%s`" 143 | kw iri existing) 144 | {:kw kw 145 | :iri :iri 146 | :existing existing})) 147 | (throw e))))) 148 | 149 | (defn prefix 150 | "Register a namespace as an RDF IRI prefix." 151 | [namespace iri-prefix] 152 | (alter-var-root #'*registry* add-prefix true namespace iri-prefix)) 153 | 154 | (defn alias 155 | "Register a mapping between a keyword and a specific IRI" 156 | [kw iri] 157 | (alter-var-root #'*registry* add-alias true kw iri)) 158 | 159 | (defrecord Prefix [prefix iri]) 160 | 161 | (defn read-prefix 162 | "Constructor for a prefix, called by data reader." 163 | [[prefix iri]] 164 | (->Prefix (str/trim (name prefix)) (str/trim (name iri)))) 165 | 166 | (defn read-global-prefix 167 | "Constructor for a global prefix, called by data reader." 168 | [[prefix iri]] 169 | (let [prefix (str/trim (name prefix)) 170 | iri (str/trim (name iri))] 171 | (arachne.aristotle.registry/prefix prefix iri) 172 | (->Prefix prefix iri))) 173 | 174 | (defn install-prefix 175 | "Install a prefix in the thread-local data reader" 176 | [prefix] 177 | (set! *registry* (add-prefix *registry* false (:prefix prefix) (:iri prefix)))) 178 | 179 | (defmacro with 180 | "Execute the supplied function with the specified prefix map in the thread-local registry" 181 | [prefix-map & body] 182 | `(binding [*registry* (reduce (fn [r# [ns# prefix#]] 183 | (add-prefix r# false ns# prefix#)) 184 | *registry* 185 | ~prefix-map)] 186 | ~@body)) 187 | 188 | (prefix 'rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#") 189 | (prefix 'rdfs "http://www.w3.org/2000/01/rdf-schema#") 190 | (prefix 'xsd "http://www.w3.org/2001/XMLSchema#") 191 | (prefix 'owl "http://www.w3.org/2002/07/owl#") 192 | (prefix 'owl2 "http://www.w3.org/2006/12/owl2#") 193 | -------------------------------------------------------------------------------- /test/arachne/aristotle/query_test.clj: -------------------------------------------------------------------------------- 1 | (ns arachne.aristotle.query-test 2 | (:require [clojure.test :refer :all] 3 | [arachne.aristotle.registry :as reg] 4 | [arachne.aristotle.query :as q] 5 | [arachne.aristotle :as aa] 6 | [clojure.java.io :as io])) 7 | 8 | (reg/prefix 'foaf "http://xmlns.com/foaf/0.1/") 9 | (reg/prefix 'socrata "http://www.socrata.com/rdf/terms#") 10 | (reg/prefix 'dcat "http://www.w3.org/ns/dcat#") 11 | (reg/prefix 'ods "http://open-data-standards.github.com/2012/01/open-data-standards#") 12 | (reg/prefix 'dcterm "http://purl.org/dc/terms/") 13 | (reg/prefix 'geo "http://www.w3.org/2003/01/geo/wgs84_pos#") 14 | (reg/prefix 'skos "http://www.w3.org/2004/02/skos/core#") 15 | (reg/prefix 'dsbase "http://data.lacity.org/resource/") 16 | (reg/prefix 'ds "https://data.lacity.org/resource/zzzz-zzzz/") 17 | (reg/prefix (ns-name *ns*) "http://example.com/arachne.aristotle-query-test#") 18 | 19 | 20 | (def test-graph (aa/read (aa/graph :simple) (io/resource "la_census.rdf"))) 21 | 22 | (deftest basic-query 23 | (is (= #{["57110"]} 24 | (q/run test-graph '[?pop] 25 | '[:bgp {:rdf/about ?e 26 | :ds/zip_code "90001" 27 | :ds/total_population ?pop}]))) 28 | (is (= #{["57110"]} 29 | (q/run test-graph '[?pop] 30 | '[:bgp 31 | [?e :ds/zip_code "90001"] 32 | [?e :ds/total_population ?pop]]))) 33 | (let [results (q/run test-graph 34 | '[:bgp 35 | [?e :ds/zip_code "90001"] 36 | [?e :ds/total_population ?pop]])] 37 | (is (= "57110" (get (first results) '?pop))))) 38 | 39 | (deftest functions+filters 40 | (is (= #{["90650"]} 41 | (q/run test-graph '[?zip] 42 | '[:filter (< 105000 (:xsd/integer ?pop)) 43 | [:bgp 44 | [?e :ds/zip_code ?zip] 45 | [?e :ds/total_population ?pop]]])))) 46 | 47 | (deftest aggregates 48 | (is (= #{[319 0 105549 33241]} 49 | (q/run test-graph '[?count ?min ?max ?avg] 50 | '[:extend [?avg (round ?avgn)] 51 | [:group [] [?count (count) 52 | ?min (min (:xsd/integer ?pop)) 53 | ?max (max (:xsd/integer ?pop)) 54 | ?avgn (avg (:xsd/integer ?pop))] 55 | [:bgp 56 | [_ :ds/total_population ?pop]]]])))) 57 | 58 | (deftest minus 59 | (is (= 5 (count (q/run test-graph 60 | '[:diff 61 | [:bgp [?zip :ds/total_population "0"]] 62 | [:bgp [?zip :ds/zip_code "90831"]]]))))) 63 | 64 | (deftest unions 65 | (is (= 2 (count (q/run test-graph 66 | '[:union 67 | [:bgp [?zip :ds/zip_code "92821"]] 68 | [:bgp [?zip :ds/zip_code "90831"]]]))))) 69 | 70 | 71 | (reg/prefix 'foaf "http://xmlns.com/foaf/0.1/") 72 | (reg/prefix 'test "http://example.com/test/") 73 | 74 | (def ca-graph (-> (aa/graph :simple) (aa/add [{:rdf/about :test/olivia 75 | :foaf/name "Olivia Person" 76 | :foaf/title "Dr"} 77 | {:rdf/about :test/frank 78 | :foaf/name "Frank Person" 79 | :foaf/title "Dr"} 80 | {:rdf/about :test/jenny 81 | :foaf/name "Jenny Person"} 82 | {:rdf/about :test/sophia 83 | :foaf/name "Sophie Person" 84 | :foaf/title "Commander"}]))) 85 | 86 | (deftest count-aggregates 87 | (is (= #{[4 3 4 2]} 88 | (q/run ca-graph 89 | '[?simple-count ?title-count ?distinct-count ?distinct-title-count] 90 | '[:group [] [?simple-count (count) 91 | ?title-count (count ?title) 92 | ?distinct-count (count (distinct)) 93 | ?distinct-title-count (count (distinct ?title))] 94 | [:conditional 95 | [:bgp [?p :foaf/name ?name]] 96 | [:bgp [?p :foaf/title ?title]]]])))) 97 | 98 | (deftest query-parameters 99 | (testing "single var, single value" 100 | (is (= #{["90001" "57110"]} 101 | (q/run test-graph '[?zip ?pop] 102 | '[:bgp 103 | [?e :ds/zip_code ?zip] 104 | [?e :socrata/rowID ?id] 105 | [?e :ds/total_population ?pop] 106 | [?e ?a ?v]] 107 | {'?zip "90001"})))) 108 | 109 | (testing "single var, multiple values." 110 | (is (= #{["90001" "57110"] 111 | ["90005" "37681"]} 112 | (q/run test-graph '[?zip ?pop] 113 | '[:bgp 114 | [?e :ds/zip_code ?zip] 115 | [?e :socrata/rowID ?id] 116 | [?e :ds/total_population ?pop] 117 | [?e ?a ?v]] 118 | {'?zip ["90001" "90005"]})))) 119 | 120 | (testing "multiple vars, single values." 121 | (is (= #{} 122 | (q/run test-graph 123 | '[?pop] '[:bgp 124 | [?e :ds/zip_code ?zip] 125 | [?e :socrata/rowID ?id] 126 | [?e :ds/total_population ?pop] 127 | [?e ?a ?v]] 128 | {'?zip "90001" 129 | '?id "228"})))) 130 | 131 | (testing "multiple vars, multiple values" 132 | (is (= #{["51223"]} 133 | (q/run test-graph 134 | '[?pop] '[:bgp 135 | [?e :ds/zip_code ?zip] 136 | [?e :socrata/rowID ?id] 137 | [?e :ds/total_population ?pop] 138 | [?e ?a ?v]] 139 | {'?zip ["90001" "90002"] 140 | '?id ["2" "3"]})))) 141 | 142 | (testing "relational values" 143 | (is (= #{["57110"]} 144 | (q/run test-graph 145 | '[?pop] '[:bgp 146 | [?e :ds/zip_code ?zip] 147 | [?e :socrata/rowID ?id] 148 | [?e :ds/total_population ?pop] 149 | [?e ?a ?v]] 150 | {'[?zip ?id] [["90001" "1"] 151 | ["90002" "0"]]})))) 152 | 153 | (testing "relational values, some unbound" 154 | (is (= #{["57110"] 155 | ["51223"]} 156 | (q/run test-graph '[?pop] 157 | '[:bgp 158 | [?e :ds/zip_code ?zip] 159 | [?e :socrata/rowID ?id] 160 | [?e :ds/total_population ?pop] 161 | [?e ?a ?v]] 162 | {'[?zip ?id] [["90001" "1"] 163 | ["90002" nil]]}))))) 164 | 165 | 166 | (def pull-graph (-> (aa/graph :jena-mini) 167 | (aa/add [[::name :rdfs/domain ::Person] 168 | {:rdf/about ::Person 169 | :rdfs/subClassOf [{:rdf/type :owl/Restriction 170 | :owl/onProperty ::name 171 | :owl/cardinality 1} 172 | {:rdf/type :owl/Restriction 173 | :owl/onProperty ::spouse 174 | :owl/maxCardinality 1}]}]) 175 | (aa/add [{:rdf/about ::luke 176 | ; :rdf/type ::Person 177 | ::age 32 178 | ::eyes "blue" 179 | ::hair "brown" 180 | ::name "Luke" 181 | ::spouse {:rdf/about ::hannah 182 | ::name "Hannah"} 183 | ::friends [{:rdf/about ::jim 184 | ::name "Jim" 185 | ::friends {:rdf/about ::sara 186 | ::name "Sara" 187 | ::friends {:rdf/about ::thom 188 | ::name "Thom"}}} 189 | {:rdf/about ::jamie 190 | ::name "Jamie"}]}]))) 191 | 192 | (deftest pull 193 | 194 | (testing "pull limited fields, with cardinality semantics" 195 | (is (= {:rdf/about ::luke ::name "Luke" ::spouse ::hannah ::age #{32}} 196 | (q/pull pull-graph ::luke [::spouse ::name ::age])))) 197 | 198 | (testing "pull all fields (incl. derived.)" 199 | (is (= #{:rdf/type :rdf/about 200 | :owl/sameAs 201 | ::spouse ::friends ::hair ::eyes ::age ::name} 202 | (set (keys (q/pull pull-graph ::luke [::name '*])))))) 203 | 204 | (testing "pull missing" 205 | (is (nil? (q/pull pull-graph ::poseidon [::name])))) 206 | 207 | (testing "nested patterns" 208 | (is (= {:rdf/about ::luke 209 | ::friends 210 | #{{:rdf/about ::jim 211 | ::name "Jim"} 212 | {:rdf/about ::jamie 213 | ::name "Jamie"}}, 214 | ::spouse 215 | {:rdf/about ::hannah 216 | ::name "Hannah"}} 217 | (q/pull pull-graph ::luke [{::spouse [::name] 218 | ::friends [::name]}])))) 219 | (testing "unlimited recursion" 220 | (is (= "Thom" 221 | (->> (q/pull pull-graph ::luke [::name {::friends '...}]) 222 | ::friends 223 | (filter #(= "Jim" (::name %))) 224 | first 225 | ::friends 226 | first 227 | ::friends 228 | first 229 | ::name)))) 230 | 231 | (testing "limited recursion" 232 | (is (= ::thom 233 | (->> (q/pull pull-graph ::luke [::name {::friends '2}]) 234 | ::friends 235 | (filter #(= "Jim" (::name %))) 236 | first 237 | ::friends 238 | first 239 | ::friends 240 | first))))) 241 | -------------------------------------------------------------------------------- /src/arachne/aristotle/query/spec.clj: -------------------------------------------------------------------------------- 1 | (ns arachne.aristotle.query.spec 2 | (:require [clojure.spec.alpha :as s] 3 | [arachne.aristotle.graph :as g])) 4 | 5 | (defmacro defd 6 | "Spec def, with a docstring. 7 | 8 | Docstring is currently ignored." 9 | [name docstr & body] 10 | `(s/def ~name ~@body)) 11 | 12 | (defd ::operation 13 | "A SPARQL algebra operation, which may be one of a variety of 14 | types." 15 | (s/or :bgp ::bgp 16 | :table ::table 17 | :distinct ::distinct 18 | :project ::project 19 | :filter ::filter 20 | :conditional ::conditional 21 | :dataset-names ::dataset-names 22 | :diff ::diff 23 | :disjunction ::disjunction 24 | :extend ::extend 25 | :graph ::graph 26 | :group ::group 27 | :join ::join 28 | :label ::label 29 | :left-join ::left-join 30 | :list ::list 31 | :minus ::minus 32 | :null ::null 33 | :order ::order 34 | :quad ::quad 35 | :quad-block ::quad-block 36 | :quad-pattern ::quad-pattern 37 | :reduced ::reduced 38 | :sequence ::sequence 39 | :slice ::slice 40 | :top-n ::top-n 41 | :union ::union)) 42 | 43 | ;; Data Structures 44 | 45 | (defd ::bindings 46 | "Var bindings used by :table op and as input bindings for queries." 47 | (s/coll-of 48 | (s/or :var->value (s/tuple ::g/variable (complement coll?)) 49 | :var->values (s/tuple ::g/variable 50 | (s/coll-of (complement coll?))) 51 | :vars->values (s/tuple (s/coll-of ::g/variable :kind vector?) 52 | (s/coll-of (s/coll-of (complement coll?) :kind vector?) 53 | :kind vector?))) 54 | :into #{})) 55 | 56 | (defd ::var-set 57 | "A specific set of logic variables." 58 | (s/coll-of ::g/variable :min-count 0)) 59 | 60 | (defd ::var-expr-list 61 | "A list of alternating var/expr bindings (similar to Clojure's 62 | `let`)" 63 | (s/cat :pairs (s/+ (s/cat :var ::g/variable :expr ::expr)))) 64 | 65 | (defd ::var-aggr-list 66 | "A list of alternating var/aggregate bindings (similar to Clojure's 67 | `let`)" 68 | (s/cat :pairs (s/+ (s/cat :var ::g/variable :aggr ::agg-expr)))) 69 | 70 | 71 | (defd ::sort-conditions 72 | "A list of alternating expresion/direction pairs." 73 | (s/cat :pairs (s/+ (s/cat :expr ::expr :direction #{:asc :desc})))) 74 | 75 | (defd ::quad 76 | "Quad represented as a 4-tuple" 77 | (s/tuple ::g/node ::g/node ::g/node ::g/node)) 78 | 79 | ;; Operations 80 | 81 | (defd ::bgp 82 | "A basic graph pattern. Multiple triples that will be matched 83 | against the data store." 84 | (s/cat :op #{:bgp} :triples (s/+ ::g/triples))) 85 | 86 | (defd ::table 87 | "Introduces a tabled set of possible bindings. Corresponds to VALUES 88 | clause in SPARQL. Each binding map entry corresponds to a separate 89 | underlying TableOp, combined using a sequence." 90 | (s/cat :op #{:table} :map ::bindings-map)) 91 | 92 | (defd ::distinct 93 | "Removes duplicate solutions from the solution set. Corresponds to 94 | SPARQL's DISTINCT keyword." 95 | (s/cat :op #{:distinct} :child ::operation)) 96 | 97 | (defd ::project 98 | "Retains only some of the variables in the solution set. Corresponds 99 | to SPARQL's SELECT clause." 100 | (s/cat :op #{:project} :vars ::var-set :child ::operation)) 101 | 102 | (defd ::filter 103 | "Filters results based on an expression. Corresponds to SPARQL's 104 | FILTER." 105 | (s/cat :op #{:filter} :exprs (s/+ ::expr) :child ::operation)) 106 | 107 | (defd ::conditional 108 | "Takes two child operations; results from the first child will be 109 | returned even if vars from the second are unbound. Corresponds to 110 | SPARQL's OPTIONAL." 111 | (s/cat :op #{:conditional} :base ::operation :optional ::operation)) 112 | 113 | (defd ::dataset-names 114 | "Not sure what this form does TBH. ARQ doesn't document it." 115 | (s/cat :op #{:dataset-names} :node ::g/node)) 116 | 117 | (defd ::diff 118 | "Return solutions that are present in one child or the other, but 119 | not both." 120 | (s/cat :op #{:diff} :a ::operation :b ::operation)) 121 | 122 | (defd ::disjunction 123 | "Logical disjunction between multiple operations." 124 | (s/cat :op #{:disjunction} :children (s/+ ::operation))) 125 | 126 | (defd ::extend 127 | "Bind one or more variables to expression results, within a body 128 | operation." 129 | (s/cat :op #{:extend} :vars (s/spec ::var-expr-list) :body ::operation)) 130 | 131 | (defd ::graph 132 | "Define a graph using a name and an operation." 133 | (s/cat :op #{:graph} :label ::g/node :body ::operation)) 134 | 135 | (defd ::group 136 | "Group results by a set of variables and expressions, optionally 137 | calling an aggregator function on the results. Corresponds to 138 | SPARQL's GROUP BY." 139 | (s/cat :op #{:group} 140 | :vars ::var-set 141 | :aggregators (s/spec ::var-aggr-list) 142 | :body ::operation)) 143 | 144 | (defd ::join 145 | "Join the result sets of two operations. Corresponds to a nested 146 | pattern in SPARQL." 147 | (s/cat :op #{:join} :left ::operation :right ::operation)) 148 | 149 | (defd ::label 150 | "Do-nothing operation to annotate the operation tree with arbitrary 151 | objects. Unlikely to be useful in Aristotle." 152 | (s/cat :op #{:label} :label any? :child ::operation)) 153 | 154 | (defd ::left-join 155 | "Outer join or logical union of two sub-operators, subject to the 156 | provided filter expressions. Equivalent to an OPTIONAL plus a filter 157 | expression in SPARQL." 158 | (s/cat :op #{:left-join} :left ::operation :right ::operation 159 | :exprs (s/+ ::expr-list))) 160 | 161 | (defd ::list 162 | "View of a result set as a list. Usually redundant in Aristotle." 163 | (s/cat :op #{:list} :child ::operation)) 164 | 165 | (defd ::minus 166 | "Return solutions in the first operation, with matching solutions in 167 | the second operation removed. Corresponds to SPARQL's MINUS." 168 | (s/cat :op #{:minus} :left ::operation :right ::operation)) 169 | 170 | (defd ::null 171 | "Operation representing the empty result set." 172 | (s/cat :op #{:null})) 173 | 174 | (defd ::order 175 | "Yield a sorted view of the result set, given some sort 176 | conditions. Corresponds to SPARQL's ORDER BY." 177 | (s/cat :op #{:order} :sort-conditions ::sort-conditions :child ::operation)) 178 | 179 | (defd ::quad 180 | "A single RDF quad" 181 | (s/cat :op #{:quad} :quad ::quad)) 182 | 183 | (defd ::quad-block 184 | "A quad pattern formed from multiple 4-tuples" 185 | (s/cat :op #{:quad-block} :quads (s/+ ::quad))) 186 | 187 | (defd ::quad-pattern 188 | "A logical quad pattern formed by supplying a graph identifier and 189 | one or more triple forms (parsed using Aristotle's standard triple 190 | format.)" 191 | (s/cat :op #{:quad-pattern} 192 | :graph-id ::g/node 193 | :triples (s/+ ::g/triples))) 194 | 195 | (defd ::reduced 196 | "Similar to :distinct in that it removes duplicate entries, more 197 | performant because it only removes _consecutive_ duplicate 198 | entries (meanting the result set may still contain duplicates." 199 | (s/cat :op #{:reduced} :child ::operation)) 200 | 201 | (defd ::sequence 202 | "A join-like operation where the result set from one operation can 203 | be fed directly into the next form, without any concern for scoping 204 | issues." 205 | (s/cat :op #{:sequence} :first ::operation :second ::operation)) 206 | 207 | (defd ::slice 208 | "Return a subset of the result set using a start and end 209 | index. Corresponds to SPARQL's LIMIT and OFFSET." 210 | (s/cat :op #{:slice} :child ::operation :start int? :length int?)) 211 | 212 | (defd ::top-n 213 | "Limit to the first N results of a result set. More efficient 214 | than :order combined with :slice because it does not need to realize 215 | the entire result set at once." 216 | (s/cat :op #{:top-n} 217 | :count int? 218 | :sort-conditions ::sort-conditions 219 | :child ::operation)) 220 | 221 | (defd ::union 222 | "Logical union of the result set from two operations. Corresponds to SPARQL's UNION" 223 | (s/cat :op #{:union} :left ::operation :right ::operation)) 224 | 225 | ;; Expressions 226 | 227 | (defd ::composite-expr 228 | "Expression form whose arguments are other expressions." 229 | (s/cat :name symbol? :args (s/+ ::expr))) 230 | 231 | (defd ::exists-expr 232 | "Expression which takes an operation as its argument. Returns true 233 | if the operation has a non-empty result set." 234 | (s/cat :e #{'exists} :op ::operation)) 235 | 236 | (defd ::not-exists-expr 237 | "Expression which takes an operation as its argument. Returns true 238 | if the operation has an empty result set." 239 | (s/cat :e #{'not-exists} :op ::operation)) 240 | 241 | (defd ::custom-expr 242 | "User-defined expression, with an IRI in 'function position'." 243 | (s/cat :fn ::g/iri :args (s/+ ::expr))) 244 | 245 | (defd ::expr 246 | "An expression that resolves to a value." 247 | (s/or :node ::g/node 248 | :exists ::exists-expr 249 | :not-exists ::not-exists-expr 250 | :composite-expr ::composite-expr 251 | :custom-expr ::custom-expr)) 252 | 253 | (defd ::count-agg-expr 254 | "Count expression. Concrete implementation depends on whether it has 255 | no args, a single variable arg, or a nested `distinct expression, 256 | which may or may not have a var (for a total of 4 possibilities.)" 257 | (s/or :simple-count (s/cat :expr #{'count}) 258 | :count-var (s/cat :expr #{'count} :var ::g/variable) 259 | :count-distinct (s/cat :expr #{'count} 260 | :distinct (s/spec (s/cat :expr #{'distinct}))) 261 | :count-distinct-var 262 | (s/cat :expr #{'count} 263 | :distinct (s/spec (s/cat :expr #{'distinct} :var ::g/variable))))) 264 | 265 | (defd ::agg-expr 266 | "An aggregate expression." 267 | (s/or :count ::count-agg-expr 268 | :sum (s/cat :type #{'sum} :arg ::expr) 269 | :avg (s/cat :type #{'avg} :arg ::expr) 270 | :min (s/cat :type #{'min} :arg ::expr) 271 | :max (s/cat :type #{'max} :arg ::expr) 272 | :sample (s/cat :type #{'sample} :arg ::expr) 273 | :group-concat (s/cat :type #{'group-concat} 274 | :expr ::expr :separator string?))) 275 | 276 | 277 | -------------------------------------------------------------------------------- /src/arachne/aristotle/graph.clj: -------------------------------------------------------------------------------- 1 | (ns arachne.aristotle.graph 2 | "Tools for converting Clojure data to an Jena Graph representation" 3 | (:require [arachne.aristotle.registry :as reg] 4 | [ont-app.vocabulary.lstr :refer [lang ->LangStr]] 5 | [clojure.spec.alpha :as s] 6 | [clojure.string :as str]) 7 | (:import [clojure.lang Keyword Symbol] 8 | [ont_app.vocabulary.lstr LangStr] 9 | [java.net URL URI] 10 | [java.util GregorianCalendar Calendar Date Map Collection List] 11 | [org.apache.jena.graph Node NodeFactory Triple GraphUtil Node_URI Node_Literal Node_Variable Node_Blank Graph] 12 | [org.apache.jena.datatypes.xsd XSDDatatype XSDDateTime] 13 | [javax.xml.bind DatatypeConverter] 14 | [org.apache.jena.riot RDFDataMgr] 15 | [org.apache.jena.reasoner TriplePattern]) 16 | (:refer-clojure :exclude [reify load])) 17 | 18 | (defn variable? 19 | [s] 20 | (and (symbol? s) (.startsWith (name s) "?"))) 21 | 22 | (defn uri-str? 23 | [o] 24 | (and (string? o) (re-matches #"^<.*>$" o))) 25 | 26 | (defn literal? 27 | [obj] 28 | (and (not (coll? obj)) 29 | (not (instance? java.util.Collection obj)))) 30 | 31 | (s/def ::variable variable?) 32 | 33 | (s/def ::iri (s/or :keyword keyword? 34 | :uri uri-str?)) 35 | 36 | (s/def ::literal literal?) 37 | 38 | (s/def ::named-blank #(and (symbol? %) (.startsWith (name %) "_"))) 39 | (s/def ::anon-blank #(= '_ %)) 40 | 41 | (s/def ::blank (s/or :anonymous ::anon-blank 42 | :named ::named-blank)) 43 | 44 | (s/def ::node (s/or :variable ::variable 45 | :blank ::blank 46 | :iri ::iri 47 | :literal ::literal)) 48 | 49 | (s/def ::triple (s/tuple ::node ::node ::node)) 50 | 51 | (defn graph? [obj] (instance? Graph obj)) 52 | 53 | (s/def ::triples (s/or :map map? 54 | :maps (s/coll-of map? :min-count 1) 55 | :triples (s/coll-of ::triple :min-count 1) 56 | :single-triple ::triple 57 | :graph graph? 58 | :empty #(and (coll? %) (empty? %)))) 59 | 60 | (defprotocol AsTriples 61 | "An object that can be converted to a collection of Jena Triples." 62 | (triples [obj] "Convert this object to a collection of Jena Triples")) 63 | 64 | (defprotocol AsNode 65 | "An object that can be interpreted as a node in an RDF graph." 66 | (node ^Node [obj] "Convert this object to a Jena RDFNode.")) 67 | 68 | (defprotocol AsClojureData 69 | "A Node that can be converted back to Clojure data" 70 | (data [node] "Convert this node to Clojure data")) 71 | 72 | (extend-protocol AsNode 73 | Keyword 74 | (node [kw] (NodeFactory/createURI (reg/iri kw))) 75 | URI 76 | (node [uri] (NodeFactory/createURI (.toString uri))) 77 | URL 78 | (node [url] (NodeFactory/createURI (.toString url))) 79 | Symbol 80 | (node [sym] 81 | (cond 82 | (= '_ sym) (NodeFactory/createBlankNode) 83 | (.startsWith (name sym) "_") (NodeFactory/createBlankNode 84 | (str (symbol (namespace sym) 85 | (subs (name sym) 1)))) 86 | (.startsWith (name sym) "?") (NodeFactory/createVariable (subs (name sym) 1)) 87 | (namespace sym) (NodeFactory/createURI (str "urn:clojure:" (namespace sym) "/" (name sym))) 88 | :else (NodeFactory/createURI (str "urn:clojure:" (name sym))))) 89 | String 90 | (node [obj] 91 | (if-let [uri (second (re-find #"^<(.*)>$" obj))] 92 | (NodeFactory/createURI uri) 93 | (NodeFactory/createLiteralByValue obj XSDDatatype/XSDstring))) 94 | LangStr 95 | (node [obj] 96 | (NodeFactory/createLiteral (str obj) (lang obj) XSDDatatype/XSDstring)) 97 | Long 98 | (node [obj] 99 | (NodeFactory/createLiteralByValue obj XSDDatatype/XSDlong)) 100 | Integer 101 | (node [obj] 102 | (NodeFactory/createLiteralByValue obj XSDDatatype/XSDinteger)) 103 | Double 104 | (node [obj] 105 | (NodeFactory/createLiteralByValue obj XSDDatatype/XSDdouble)) 106 | Float 107 | (node [obj] 108 | (NodeFactory/createLiteralByValue obj XSDDatatype/XSDfloat)) 109 | Boolean 110 | (node [obj] 111 | (NodeFactory/createLiteralByValue obj XSDDatatype/XSDboolean)) 112 | java.math.BigDecimal 113 | (node [obj] 114 | (NodeFactory/createLiteralByValue obj XSDDatatype/XSDdecimal)) 115 | Date 116 | (node [obj] 117 | (node (doto (GregorianCalendar.) (.setTime obj)))) 118 | Calendar 119 | (node [obj] 120 | (NodeFactory/createLiteral 121 | (DatatypeConverter/printDateTime obj) 122 | XSDDatatype/XSDdateTime)) 123 | Node 124 | (node [node] node)) 125 | 126 | (defn- subject-map 127 | "Given a set of triples with the same subject, emit a Clojure map" 128 | [subject triples] 129 | (->> triples 130 | (group-by #(.getPredicate ^Triple %)) 131 | (map (fn [[pred triples]] 132 | (let [objects (map #(data (.getObject ^Triple %)) triples)] 133 | [(data pred) (if (= 1 (count objects)) 134 | (first objects) 135 | objects)]))) 136 | (into {:rdf/about (data subject)}))) 137 | 138 | (defn graph->clj 139 | "Convert a Graph to a Clojure data structure. Optionally takes a 140 | filter function to filter maps before returning." 141 | ([g] (graph->clj g (constantly true))) 142 | ([^Graph g ffn] 143 | (->> (iterator-seq (.find g)) 144 | (group-by #(.getSubject ^Triple %)) 145 | (map (fn [[subject triples]] (subject-map subject triples))) 146 | (filter ffn)))) 147 | 148 | (extend-protocol AsClojureData 149 | nil 150 | (data [n] nil) 151 | Node_URI 152 | (data [n] (let [uri (.getURI n)] 153 | (or (reg/kw uri) 154 | (when (str/starts-with? uri "urn:clojure:") 155 | (symbol (str/replace uri "urn:clojure:" ""))) 156 | (str "<" uri ">")))) 157 | Node_Literal 158 | (data [^Node_Literal n] 159 | (if (= XSDDatatype/XSDdateTime (.getLiteralDatatype n)) 160 | (.getTime (.asCalendar ^XSDDateTime (.getLiteralValue n))) 161 | (if-let [lang (not-empty (.getLiteralLanguage n))] 162 | (->LangStr (.getLiteralValue n) lang) 163 | (.getLiteralValue n)))) 164 | Node_Variable 165 | (data [n] (symbol (str "?" (.getName n)))) 166 | 167 | Node_Blank 168 | (data [n] (symbol (str "_" (.getBlankNodeLabel n)))) 169 | 170 | Graph 171 | (data [g] (graph->clj g))) 172 | 173 | (defn- triple? 174 | "Does an object look like a triple?" 175 | [obj] 176 | (and (instance? List obj) 177 | (= 3 (count obj)) 178 | (not-any? coll? obj))) 179 | 180 | (defn triple 181 | "Build a Triple object" 182 | ([[s p o]] (triple s p o)) 183 | ([s p o] (Triple/create (node s) (node p) (node o)))) 184 | 185 | (defn- inv-triple 186 | "Create a triple from the given subject, predicate and object, 187 | inverting the triple if appropriate for the predicate." 188 | [s p o] 189 | (if (and (keyword? p) (.startsWith (name p) "_")) 190 | (let [p (keyword (namespace p) (subs (name p) 1))] 191 | (triple o p s)) 192 | (triple s p o))) 193 | 194 | (defn rdf-list 195 | "Create an RDF linked list from the given sequence of values" 196 | [[item & more]] 197 | {:rdf/type :rdf/List 198 | :rdf/first item 199 | :rdf/rest (if (seq more) 200 | (rdf-list more) 201 | :rdf/nil)}) 202 | 203 | (defn- numbered 204 | "Return an subject with the given items, numbered in order" 205 | [items] 206 | (zipmap 207 | (map #(str "<" (reg/iri (keyword "rdf" (str "_" %))) ">") (range (count items))) 208 | items)) 209 | 210 | (defn rdf-bag 211 | "Create an RDF Bag from the from the given collection of values" 212 | [items] 213 | (assoc (numbered items) :rdf/type :rdf/Bag)) 214 | 215 | (defn rdf-alt 216 | "Create an RDF Alt from the from the given collection of values" 217 | [items] 218 | (assoc (numbered items) :rdf/type :rdf/Alt)) 219 | 220 | (defn rdf-seq 221 | "Create an RDF Seq from the from the given collection of values" 222 | [items] 223 | (assoc (numbered items) :rdf/type :rdf/Seq)) 224 | 225 | (extend-protocol AsTriples 226 | 227 | arachne.aristotle.registry.Prefix 228 | (triples [prefix] 229 | (reg/install-prefix prefix) 230 | []) 231 | 232 | Triple 233 | (triples [triple] [triple]) 234 | 235 | Collection 236 | (triples [coll] 237 | (reg/with {} 238 | (if (triple? coll) 239 | [(apply triple (map node coll))] 240 | (mapcat triples coll)))) 241 | 242 | Map 243 | (triples [m] 244 | (reg/with {} 245 | (let [subject (if-let [about (:rdf/about m)] 246 | (node about) 247 | (NodeFactory/createBlankNode)) 248 | m (dissoc m :rdf/about) 249 | m (if (empty? m) 250 | {:rdf/type :rdfs/Resource} 251 | m) 252 | child-map-triples (fn [property child-map] 253 | (let [child-triples (triples child-map) 254 | child-subject (.getSubject ^Triple (first child-triples))] 255 | (cons 256 | (inv-triple subject property child-subject) 257 | child-triples)))] 258 | (mapcat (fn [[k v]] 259 | (cond 260 | (instance? Map v) 261 | (child-map-triples k v) 262 | 263 | (instance? Collection v) 264 | (mapcat (fn [child] 265 | (if (instance? Map child) 266 | (child-map-triples k child) 267 | [(inv-triple subject k child)])) (filter identity v)) 268 | :else 269 | [(inv-triple subject k v)])) 270 | m)))) 271 | 272 | Graph 273 | (triples [^Graph g] 274 | (.toSet (.find g (Triple/create (node '?s) (node '?p) (node '?o)))))) 275 | 276 | (defn reify 277 | "Given a graph, a property and an object, add reification triples to 278 | the graph an add a [statement property subject] triple on the 279 | reified statement." 280 | [graph property subject] 281 | (let [new-triples (mapcat (fn [^Triple t] 282 | (triples {:rdf/type :rdf/Statement 283 | :rdf/subject (.getSubject t) 284 | :rdf/predicate (.getPredicate t) 285 | :rdf/object (.getObject t) 286 | property subject})) 287 | (triples graph))] 288 | (GraphUtil/add ^Graph graph ^java.util.List new-triples)) 289 | graph) 290 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /src/arachne/aristotle/query/compiler.clj: -------------------------------------------------------------------------------- 1 | (ns arachne.aristotle.query.compiler 2 | (:require [arachne.aristotle.registry :as reg] 3 | [arachne.aristotle.graph :as graph] 4 | [clojure.spec.alpha :as s] 5 | [arachne.aristotle.graph :as g]) 6 | (:import [org.apache.jena.graph Node NodeFactory Triple Node_Variable Node_Blank] 7 | [org.apache.jena.sparql.expr Expr NodeValue ExprVar ExprList E_GreaterThan E_Equals E_LessThan E_GreaterThanOrEqual E_LogicalNot E_LogicalAnd E_LogicalOr E_NotEquals E_LessThanOrEqual E_BNode E_Bound E_Conditional E_Datatype E_DateTimeDay E_DateTimeHours E_DateTimeMinutes E_DateTimeMonth E_DateTimeSeconds E_DateTimeTimezone E_DateTimeYear E_Divide E_Exists E_IRI E_IsIRI E_IsBlank E_IsLiteral E_IsNumeric E_IsURI E_Add E_Lang E_LangMatches E_MD5 E_Multiply E_Subtract E_Now E_NumAbs E_NumCeiling E_NumFloor E_NumRound E_Random E_Regex E_SameTerm E_Str E_SHA1 E_SHA224 E_SHA256 E_SHA384 E_SHA512 E_StrAfter E_StrBefore E_StrConcat E_StrContains E_StrDatatype E_StrLength E_StrEndsWith E_StrStartsWith E_StrLang E_StrSubstring E_StrUpperCase E_StrUUID E_StrLowerCase E_UnaryPlus E_UnaryMinus E_URI E_Version E_UUID E_StrEncodeForURI E_StrReplace E_Coalesce E_OneOf E_NotOneOf E_Function E_NotExists ExprAggregator] 8 | [org.apache.jena.sparql.core BasicPattern Var VarExprList QuadPattern Quad] 9 | [org.apache.commons.lang3.reflect ConstructorUtils] 10 | [org.apache.jena.sparql.algebra OpAsQuery Algebra Table Op] 11 | [org.apache.jena.sparql.algebra.table Table1 TableN] 12 | [org.apache.jena.sparql.algebra.op OpDistinct OpProject OpFilter OpBGP OpConditional OpDatasetNames OpDiff OpDisjunction OpDistinctReduced OpExtend OpGraph OpGroup OpJoin OpLabel OpLeftJoin OpList OpMinus OpNull OpOrder OpQuad OpQuadBlock OpQuadPattern OpReduced OpSequence OpSlice OpTopN OpUnion OpTable] 13 | [org.apache.jena.sparql.expr.aggregate AggCount$AccCount AggSum AggAvg AggMin AggMax AggGroupConcat$AccGroupConcat AggSample$AccSample AggGroupConcat AggCount AggCountVar AggCountDistinct AggCountVarDistinct AggSample] 14 | [org.apache.jena.query SortCondition] 15 | [org.apache.jena.sparql.engine.binding BindingBuilder] 16 | [org.apache.jena.sparql.core Var] 17 | [java.util List])) 18 | 19 | (defn- replace-vars 20 | "Given a collection of Triples, mutate the triples to replace Node_variable 21 | objects with Var objects." 22 | [triples] 23 | (let [bnodes (atom {}) 24 | update-bnodes (fn [bnodes id] 25 | (if (bnodes id) 26 | bnodes 27 | (assoc bnodes id (Var/alloc ^String (str "?" (count bnodes)))))) 28 | replace (fn [node] 29 | (cond 30 | (instance? Node_Variable node) (Var/alloc ^Node_Variable node) 31 | (instance? Node_Blank node) 32 | (let [id (.getBlankNodeLabel ^Node_Blank node) 33 | bnodes (swap! bnodes update-bnodes id)] 34 | (bnodes id)) 35 | :else node))] 36 | (for [^Triple triple triples] 37 | (Triple/create 38 | (replace (.getSubject triple)) 39 | (replace (.getPredicate triple)) 40 | (replace (.getObject triple)))))) 41 | 42 | (defn- triples 43 | "Convert the given Clojure data structure to a set of Jena triples" 44 | [data] 45 | (s/assert* ::graph/triples data) 46 | (replace-vars (graph/triples data))) 47 | 48 | (defn var-seq 49 | "Convert a seq of variable names to a list of Var nodes" 50 | [s] 51 | (mapv #(Var/alloc (graph/node %)) s)) 52 | 53 | (declare op) 54 | (declare expr) 55 | (declare aggregator) 56 | 57 | (defn- var-expr-list 58 | "Given a vector of var/expr bindings (reminiscient of Clojure's `let`), return a Jena VarExprList with vars and exprs." 59 | [bindings] 60 | (let [vel (VarExprList.)] 61 | (doseq [[v e] (partition 2 bindings)] 62 | (.add vel (Var/alloc (graph/node v)) (expr e))) 63 | vel)) 64 | 65 | (defn- var-aggr-list 66 | "Given a vector of var/aggregate bindings return a Jena VarExprList with 67 | vars and aggregates" 68 | [bindings] 69 | (vec (for [[v e] (partition 2 bindings)] 70 | (ExprAggregator. (Var/alloc (graph/node v)) (aggregator e))))) 71 | 72 | (defn- sort-conditions 73 | "Given a seq of expressions and the keyword :asc or :desc, return a list of 74 | sort conditions." 75 | [conditions] 76 | (for [[e dir] (partition 2 conditions)] 77 | (SortCondition. ^Expr (expr e) (if (= :asc dir) 1 -1)))) 78 | 79 | (defn- quad-pattern 80 | "Parse the given Clojure data structure into a Jena QuadPattern object" 81 | ^QuadPattern [quads] 82 | (let [qp (QuadPattern.)] 83 | (doseq [[g s p o] quads] 84 | (let [quad (Quad. (g/node g) (g/triple s p o))] 85 | (.add qp quad))) 86 | qp)) 87 | 88 | (defn aggregator 89 | "Convert a Clojure data structure representing an aggregation expression to 90 | a Jena Aggregator object" 91 | [[op & [a1 a2 & _ :as args]]] 92 | (case op 93 | count (cond 94 | (symbol? a1) 95 | (AggCountVar. (expr a1)) 96 | 97 | (and (seq? a1) (= 'distinct (first a1)) (and (= 1 (count a1)))) 98 | (AggCountDistinct.) 99 | 100 | (and (seq? a1) (= 'distinct (first a1)) (and (= 2 (count a1)))) 101 | (AggCountVarDistinct. (expr (second a1))) 102 | 103 | :else (AggCount.)) 104 | sum (AggSum. (expr a1)) 105 | avg (AggAvg. (expr a1)) 106 | min (AggMin. (expr a1)) 107 | max (AggMax. (expr a1)) 108 | group-concat (AggGroupConcat. (expr a1) a2) 109 | sample (AggSample. a1))) 110 | 111 | (defn- table-bindings 112 | "Add a bindings map entry to the given table" 113 | [^TableN t [k v]] 114 | (if (coll? v) 115 | (doseq [node v] 116 | (let [bb (BindingBuilder/create)] 117 | (if (coll? k) 118 | (mapv #(when %2 (.add bb (Var/alloc (graph/node %1)) (graph/node %2))) k node) 119 | (.add bb (Var/alloc (graph/node k)) (graph/node node))) 120 | (.addBinding t (.build bb)))) 121 | (let [binding (BindingBuilder/create)] 122 | (.add binding (Var/alloc (graph/node k)) (graph/node v)) 123 | (.addBinding t (.build binding))))) 124 | 125 | (defn build-table 126 | "Given a bindings map, return an OpSequence including a nested table 127 | for each map." 128 | [bm] 129 | (->> bm 130 | (map #(let [t (TableN.)] 131 | (table-bindings t %) 132 | (OpTable/create t))) 133 | (reduce (fn [op t] 134 | (OpSequence/create op t))))) 135 | 136 | (defn op 137 | "Convert a Clojure data structure to an Arq Op" 138 | ^Op 139 | [[op-name & [a1 a2 & amore :as args]]] 140 | (case op-name 141 | :table (build-table a1) 142 | :distinct (OpDistinct/create (op a1)) 143 | :project (OpProject. (op a2) (var-seq a1)) 144 | :filter (OpFilter/filterBy (ExprList. ^List (map expr (butlast args))) (op (last args))) 145 | :bgp (OpBGP. (BasicPattern/wrap (mapcat triples args))) 146 | :conditional (OpConditional. (op a1) (op a2)) 147 | :dataset-names (OpDatasetNames. (graph/node a1)) 148 | :diff (OpDiff/create (op a1) (op a2)) 149 | :disjunction (OpDisjunction/create (op a1) (op a2)) 150 | :extend (OpExtend/create (op a2) (var-expr-list a1)) 151 | :graph (OpGraph. (graph/node a1) (op a2)) 152 | :group (OpGroup/create (op (first amore)) 153 | (reduce (fn [^VarExprList o v] 154 | (doto o 155 | (.add v))) 156 | (VarExprList.) (var-seq a1)) 157 | (var-aggr-list a2)) 158 | :join (OpJoin/create (op a1) (op a2)) 159 | :label (OpLabel/create a1 (op a2)) 160 | :left-join (OpLeftJoin/create (op a1) (op a2) (ExprList. ^List (map expr amore))) 161 | :list (OpList. (op a1)) 162 | :minus (OpMinus/create (op a1) (op a2)) 163 | :null (OpNull/create) 164 | :order (OpOrder. (op a2) (sort-conditions a1)) 165 | :quad (OpQuad. (.get (quad-pattern args) 0)) 166 | :quad-block (OpQuadBlock. (quad-pattern args)) 167 | :quad-pattern (OpQuadPattern. (graph/node a1) 168 | (BasicPattern/wrap (mapcat triples (rest args)))) 169 | :reduced (OpReduced/create (op a1)) 170 | :sequence (OpSequence/create (op a1) (op a2)) 171 | :slice (OpSlice. (op a1) (long a1) (long (first amore))) 172 | :top-n (OpTopN. (op (first amore)) (long a1) (sort-conditions a2)) 173 | :union (OpUnion. (op a1) (op a2)) 174 | :service (throw (ex-info "SPARQL federated queries not yet supported" {})) 175 | :path (throw (ex-info "SPARQL property paths not yet supported" {})) 176 | (throw (ex-info (str "Unknown operation " op-name) {:op-name op-name 177 | :args args})))) 178 | 179 | 180 | ;;https://github.com/apache/jena/blob/master/jena-extras/jena-querybuilder/src/main/java/org/apache/jena/arq/querybuilder/ExprFactory.java 181 | 182 | (def expr-class 183 | "Simple expressions that resolve to a class which takes Exprs in its 184 | constructor" 185 | {'* E_Multiply 186 | '/ E_Divide 187 | '< E_LessThan 188 | '<= E_LessThanOrEqual 189 | '= E_Equals 190 | '> E_GreaterThan 191 | '>= E_GreaterThanOrEqual 192 | 'abs E_NumAbs 193 | 'and E_LogicalAnd 194 | 'bnode E_BNode 195 | 'bound E_Bound 196 | 'ceil E_NumCeiling 197 | 'concat E_StrConcat 198 | 'contains E_StrContains 199 | 'datatype E_Datatype 200 | 'day E_DateTimeDay 201 | 'encode E_StrEncodeForURI 202 | 'floor E_NumFloor 203 | 'hours E_DateTimeHours 204 | 'if E_Conditional 205 | 'iri E_IRI 206 | 'uri E_URI 207 | 'isBlank E_IsBlank 208 | 'isIRI E_IsIRI 209 | 'isURI E_IsURI 210 | 'isLiteral E_IsLiteral 211 | 'isNumeric E_IsNumeric 212 | 'lang E_Lang 213 | 'langMatches E_LangMatches 214 | 'lcase E_StrLowerCase 215 | 'md5 E_MD5 216 | 'minutes E_DateTimeMinutes 217 | 'month E_DateTimeMonth 218 | 'not E_LogicalNot 219 | 'not= E_NotEquals 220 | 'now E_Now 221 | 'or E_LogicalOr 222 | 'rand E_Random 223 | 'regex E_Regex 224 | 'replace E_StrReplace 225 | 'round E_NumRound 226 | 'sameTerm E_SameTerm 227 | 'seconds E_DateTimeSeconds 228 | 'sha1 E_SHA1 229 | 'sha224 E_SHA224 230 | 'sha256 E_SHA256 231 | 'sha384 E_SHA384 232 | 'sha512 E_SHA512 233 | 'str E_Str 234 | 'strafter E_StrAfter 235 | 'strbefore E_StrBefore 236 | 'strdt E_StrDatatype 237 | 'strends E_StrEndsWith 238 | 'strlang E_StrLang 239 | 'strlen E_StrLength 240 | 'strstarts E_StrStartsWith 241 | 'struuid E_StrUUID 242 | 'substr E_StrSubstring 243 | 'timezone E_DateTimeTimezone 244 | 'tz E_DateTimeTimezone 245 | 'ucase E_StrUpperCase 246 | 'uuid E_UUID 247 | 'version E_Version 248 | 'year E_DateTimeYear}) 249 | 250 | 251 | (defn composite-expr 252 | "Convert a Clojure data structure representing a expression to an Arq Expr" 253 | [[f & args]] 254 | (cond 255 | (= f 'exists) (E_Exists. (op (first args))) 256 | (= f 'not-exists) (E_NotExists. (op (first args))) 257 | :else 258 | (let [args (map expr args) 259 | clazz (get expr-class f)] 260 | (cond 261 | clazz (ConstructorUtils/invokeConstructor clazz (into-array Object args)) 262 | (= f '+) (if (= 1 (count args)) 263 | (E_UnaryPlus. (first args)) 264 | (E_Add. (first args) (second args))) 265 | (= f '-) (if (= 1 (count args)) 266 | (E_UnaryMinus. (first args)) 267 | (E_Subtract. (first args) (second args))) 268 | (= f 'coalesce) (E_Coalesce. (ExprList. ^List args)) 269 | (= f 'in) (E_OneOf. (first args) (ExprList. ^List (rest args))) 270 | (= f 'not-in) (E_NotOneOf. (first args) (ExprList. ^List (rest args))) 271 | 272 | (s/valid? ::graph/iri f) (E_Function. (.getURI (graph/node f)) (ExprList. ^List args)) 273 | 274 | :else (throw (ex-info (str "Unknown expression type " f) {:expr f 275 | :args args})))))) 276 | 277 | 278 | (defn expr 279 | "Convert a Clojure data structure to an Arq Expr" 280 | [expr] 281 | (if (instance? java.util.List expr) 282 | (composite-expr expr) 283 | (let [node (graph/node expr)] 284 | (if (instance? Node_Variable node) 285 | (ExprVar. (Var/alloc ^Node_Variable node)) 286 | (NodeValue/makeNode node))))) 287 | 288 | (comment 289 | (require 'arachne.aristotle.query.spec) 290 | (s/conform :arachne.aristotle.query.spec/expr '(< 105000 (:xsd/integer ?pop))) 291 | #_.) 292 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Aristotle 2 | 3 | An RDF/OWL library for Clojure, providing a data-oriented wrapper for 4 | Apache Jena. 5 | 6 | Key features: 7 | 8 | - Read/write RDF graphs using idiomatic Clojure data structures. 9 | - SPARQL queries expressed using Clojure data structures. 10 | - Pluggable inferencing and reasoners. 11 | - Pluggable validators. 12 | 13 | ## Rationale 14 | 15 | RDF is a powerful framework for working with highly-annotated data in very abstract ways. Although it isn't perfect, it is highly researched, well defined and understood, and the industry standard for "rich" semi-structured, open-ended information modeling. 16 | 17 | Most of the existing Clojure tools for RDF are focused mostly on creating and manipulating RDF graphs in pure Clojure at a low level. I desired a more comprehensive library with the specific objective of bridging existing idioms for working with Clojure data to RDF graphs. 18 | 19 | Apache Jena is a very capable, well-designed library for working with RDF and the RDF ecosystem. It uses the Apache software license, which unlike many other RDF tools is compatible with Clojure's EPL. However, Jena's core APIs can only be described as agressively object-oriented. Since RDF is at its core highly data-oriented, and Clojure is also data-oriented, using an object-oriented or imperative API seems especially cumbersome. Aristotle attempts to preserve "good parts" of Jena, while replacing the cumbersome APIs with clean data-driven interfaces. 20 | 21 | Aristotle does not provide direct access to other RDF frameworks (such as RDF4j, JSONLD, Commons RDF, OWL API, etc.) However, Jena itself is highly pluggable, so if you need to interact with one of these other systems it is highly probably that a Jena adapter already exists or can be easily created. 22 | 23 | ## Index 24 | 25 | - [Data Model](#data-model) 26 | - [Literals](#literals) 27 | - [Data Structures](#data-structures) 28 | - [API](#api) 29 | - [Query](#query) 30 | - [Validation](#validation) 31 | 32 | ## Data Model 33 | 34 | To express RDF data as Clojure, Aristotle provides two protocols. `arachne.aristotle.graph/AsNode` converts Clojure literals to RDF Nodes of the appropriate type, while `arachne.aristotle.graph/AsTriples` converts Clojure data structures to sets of RDF triples. 35 | 36 | ### Literals 37 | 38 | Clojure primitive values map to Jena Node objects of the appropriate type. 39 | 40 | |Clojure Type|RDF Node| 41 | |------------|--------| 42 | |long|XSD Long| 43 | |double|XSD Double| 44 | |boolean|XSD Boolean| 45 | |java.math.BigDecimal|XSD Decimal| 46 | |java.util.Date|XSD DateTime| 47 | |java.util.Calendar|XSD DateTime| 48 | |string enclosed by angle brackets
(e.g, `""`)| IRI 49 | |other strings| XSD String| 50 | |keyword|IRI (see explanation of IRI/keyword registry below)| 51 | |java.net.URL|IRI| 52 | java.net.URI|IRI| 53 | |symbols starting with `?`| variable node (for patterns or queries)| 54 | |the symbol `_`|unique blank node| 55 | |symbols starting with `_`| named blank node| 56 | |other symbols| IRI of the form ``. 57 | 58 | #### IRI/Keyword Registry 59 | 60 | Since IRIs are usually long strings, and tend to be used repeatedly, using the full string expression can be cumbersome. Furthermore, Clojure tends to prefer keywords to strings, especially for property/attribute names and enumerated or constant values. 61 | 62 | Therefore, Aristotle provides a mechanism to associate a namespace with an IRI prefix. Keywords with a registered namespace will be converted to a corresponding IRI. 63 | 64 | Use the `arachne.aristotle.registry/prefix` function to declare a prefix. For example, 65 | 66 | ``` 67 | (reg/prefix 'foaf "http://xmlns.com/foaf/0.1/") 68 | ``` 69 | 70 | Then, keywords with a `:foaf` namespace will be interpreted as IRI nodes. For example, with the above declaration `:foaf/name` will be interpreted as ``. 71 | 72 | The following common namespace prefixes are defined by default: 73 | 74 | |Namespace |IRI Prefix| 75 | |----|-------| 76 | |rdf|``| 77 | |rdfs|``| 78 | |xsd|``| 79 | |owl|``| 80 | |owl2|``| 81 | 82 | The registry is stored in the global dynamic Var `arachne.aristotle.registry/*registry*`, which can be also overridden on a thread-local basis using the `arachne.aristotle.registry/with` macro, which takes a map of namespaces (as keywords) and IRI prefixes. For example: 83 | 84 | ```clojure 85 | (reg/with {'foaf "http://xmlns.com/foaf/0.1/" 86 | 'dc "http://purl.org/dc/elements/1.1/"} 87 | ;; Code using keywords with :foaf and :dc namespaces 88 | ) 89 | ``` 90 | 91 | You can also register a prefix in RDF/EDN data, using the `#rdf/prefix` tagged literal. The prefix will be added to the thread-local binding and is scoped to the same triple expansion. This allows you to define a prefix alongside the data that uses it, without installing it globally or managing it in your code. For example: 92 | 93 | ```clojure 94 | [#rdf/prefix [:ex "http://example.com/"] 95 | {:rdf/about :ex/luke 96 | :foaf/name "Luke"}] 97 | ``` 98 | 99 | #### Wildcard Prefixes 100 | 101 | Aristotle now allows you to register a RDF IRI prefix for a namespace *prefix*, rather than a fully specified namespace. To do so, use an asterisk in the symbol you provide to the `prefix` function: 102 | 103 | ```clojure 104 | (reg/prefix 'arachne.* "http://arachne-framework.org/vocab/1.0/") 105 | ``` 106 | 107 | This means that keywords with a namespace that starts with an `arachne` namespace segment will use the supplied prefix. Any additional namespace segments will be appended to the prefix, separated by a forward slash (`/`). 108 | 109 | Given the registration above, for example, the keyword `:arachne.http.request/body` would be interpreted as the IRI "" 110 | 111 | If multiple wildcard prefixes overlap, the system will use whichever is more specific, and will prefer non-wildcard registrations to wildcard registrations in the case of ambiguity. 112 | 113 | Using `#` or any other character as a prefix separator for wildcard prefixes, instead of `/`, is currently not supported. 114 | 115 | ### Data Structures 116 | 117 | You can use the `arachne.aristotle.graph/triples` function to convert any compatible Clojure data structure to a collection of RDF Triples (usually in practice it isn't necessary to call `triples` explicitly, as the higher-level APIs do it for you.) 118 | 119 | #### Single Triple 120 | 121 | A 3-element vector can be used to represent a single RDF Triple. For example: 122 | 123 | ```clojure 124 | (ns arachne.aristotle.example 125 | (:require [arachne.aristotle.registry :as reg] 126 | [arachne.aristotle.graph :as g])) 127 | 128 | (reg/prefix 'arachne.aristotle.example "http://arachne-framework.org/example#") 129 | 130 | (g/triples [::luke :foaf/firstName "Luke"]) 131 | ``` 132 | 133 | The call to `g/triples` returns a collection containing a single Jena Triple with a subject of ``, a predicate of `` an the string literal `"Luke"` as the object. 134 | 135 | #### Collections of Triples 136 | 137 | A collection of multiple triples works the same way. 138 | 139 | For example, 140 | 141 | ```clojure 142 | (g/triples '[[luke :foaf/firstName "Luke"] 143 | [luke :foaf/knows nola] 144 | [nola :foaf/firstName "Nola"]]) 145 | ``` 146 | 147 | Note the use of symbols; in this case, the nodes for both Luke and Nola are represented as blank nodes (without explicit IRIs.) 148 | 149 | #### Maps 150 | 151 | Maps may be used to represent multiple statements about a single subject, with each key indicating an RDF property. The subject of the map is indicated using the special `:rdf/about` key, which is *not* interpreted as a property, but rather as identifying the subject of the map. If no `:rdf/about` key is present, a blank node will be used as the subject. 152 | 153 | For example: 154 | 155 | ```clojure 156 | (g/triples {:rdf/about ::luke 157 | :foaf/firstName "Luke" 158 | :foaf/lastName "VanderHart"}) 159 | ``` 160 | 161 | This is equivalent to two triples: 162 | 163 | ``` 164 | "Luke" 165 | "VanderHart" 166 | ``` 167 | 168 | ##### Multiple Values 169 | 170 | If the value for a key is a single literal, it is interpreted as a single triple. If the value is a collection, it is intererpreted as multiple values for the same property. For example: 171 | 172 | ```clojure 173 | (g/triples {:rdf/about ::luke 174 | :foaf/made [::arachne ::aristotle ::quiescent]}) 175 | ``` 176 | 177 | Expands to: 178 | 179 | 180 | 181 | 182 | 183 | ##### Nested Maps 184 | 185 | In addition to literals, the values of keys may be additional maps (or collections of maps). The subject of the nested map will be both the object of the property under which it is specified, and the subject if statements in its own map. 186 | 187 | ```clojure 188 | (g/triples {:rdf/about ::luke 189 | :foaf/knows [{:rdf/about ::nola 190 | :foaf/name "Nola" 191 | :foaf/knows ::luke}} 192 | {:rdf/about ::Jim 193 | :foaf/name "Jim"}}]) 194 | ``` 195 | 196 | Expressed in expanded triples, this is: 197 | 198 | 199 | "Nola" 200 | 201 | 202 | "Jim" 203 | 204 | ## API 205 | 206 | Aristotle's primary API is exposed in its top-level namespace, `arachne.aristotle`, which defines functions to create and interact with _graphs_. 207 | 208 | A graph is a collection of RDF data, together with (optionally) logic and/or inferencing engines. Graphs may be stored in memory or be a facade to an external RDF database (although all the graph constructors shipped with Aristotle are for in-memory graphs.) 209 | 210 | Graphs are instances of `org.apache.jena.graph.Graph`, which are 211 | stateful mutable objects (mutability is too deeply ingrained in Jena 212 | to provide an immutable facade.) However, Aristotle's APIs are 213 | consistent in returning the model from any update operations, as if 214 | graphs were immutable Clojure-style collections. It is reccomended to 215 | rely on the return value of update operations, as if graphs were 216 | immutable, so your code does not break if immutable graph 217 | representations are ever supported. 218 | 219 | Jena Graphs are not thread-safe by default; make sure you limit concurrent graph access. 220 | 221 | ### Creating a Graph 222 | 223 | To create a new graph, invoke the `arachne.aristotle/graph` multimethod. The first argument to `graph` is a keyword specifying the type of graph to construct, additional arguments vary depending on the type of graph. 224 | 225 | Graph constructors provided by Aristotle include: 226 | 227 | |type|model| 228 | |----|-----| 229 | |:simple| Basic in-memory triple store with no inferencing capability | 230 | |:jena-mini| In-memory triple store that performs OWL 1 inferencing using Jena's "Mini" inferencer (a subset of OWL Full with restrictions on some of the less useful forward entailments.) 231 | |:jena-rules| In-memory triple store supporting custom rules, using Jena's [hybrid backward/forward rules engine](https://jena.apache.org/documentation/inference/#rules). Takes a collection of `org.apache.jena.reasoner.rulesys.Rule` objects as an additional argument (the prebuilt collection of rules for Jena Mini is provided at `arachne.aristotle.inference/mini-rules`) | 232 | 233 | Clients may wish to provide additional implementations of the `graph` multimethod to support additional underlying graphy or inference types; the only requirement is that the method return an instance of `org.apache.jena.rdf.graph.Graph`. For example, for your project, you may wish to create a Graph backed by on-disk or database storag, or which uses the more powerful Pellet reasoner, which has Jena integration but is not shipped with Aristotle due to license restrictions. 234 | 235 | Example: 236 | 237 | ``` 238 | (require '[arachne.aristotle :as aa]) 239 | (def m (aa/graph :jena-mini)) 240 | ``` 241 | 242 | ### Adding data to a graph 243 | 244 | In order to do anything useful with a graph, you must add additional facts. Facts may be added either programatically in your code, or by reading serialized data from a file or remote URL. 245 | 246 | #### Adding data programatically 247 | 248 | To add data programatically, use the `arachne.aristotle/add` function, which takes a graph and some data to add. The data is processed into RDF triples using `arachne.aristotle.graph/triples`, using the data format documented above. For example: 249 | 250 | ``` 251 | (require '[arachne.aristotle :as aa]) 252 | 253 | (def g (aa/graph :jena-mini)) 254 | 255 | (aa/add g {:rdf/about ::luke 256 | :foaf/firstName "Luke" 257 | :foaf/lastName "VanderHart"}) 258 | ``` 259 | 260 | #### Adding data from a file 261 | 262 | To add data from a file, use the `arachne.aristotle/read` function, which takes a graph and a file. The file may be specified by a: 263 | 264 | - String of the absolute or process-relative filename 265 | - java.net.URI 266 | - java.net.URL 267 | - java.io.File 268 | 269 | Jena will detect what format the file is in, which may be one of RDF/XML, Turtle, N3, or N-Triples. All of the statements it contains will be added to the graph. Example: 270 | 271 | ## Query 272 | 273 | Aristotle provides a data-oriented interface to Jena's SPARQL query engine. Queries themselves are expressed as Clojure data, and can be programatically generated and combined (similar to queries in Datomic.) 274 | 275 | To invoke a query, use the `arachne.aristotle.query/query` function, which takes a query data structure, a graph, and any query inputs. It returns the results of the query. 276 | 277 | SPARQL itself is string oriented, with a heavily lexical grammar that does not translate cleanly to data structures. However, SPARQL has an internal algebra that *is* very clean and composable. Aristotle's query data uses this internal SPARQL alegebra (which is exposed by Jena's ARQ data graph) ignoring SPARQL syntax. All queries expressible in SPARQL syntax are also expressible in Aristotle's query data, modulo some features that are not implemented yet (e.g, query fedration across remote data sources.) 278 | 279 | Unfortunately, the SPARQL algebra has no well documented syntax. A [rough overview](https://www.w3.org/2011/09/SparqlAlgebra/ARQalgebra) is available, and this readme will document some of the more common forms. For more details, see the [query specs](https://github.com/arachne-framework/aristotle/blob/master/src/arachne/aristotle/query/spec.clj) with their associated docstrings. 280 | 281 | Aristotle queries are expressed as compositions of algebraic operations, using the generalized form `[operation expression* sub-operation*]` These operation vectors may be nested arbitrarily. 282 | 283 | Expressions are specified using a Clojure list form, with the expression type as a symbol. These expressions take the general form `(expr-type arg*)`. 284 | 285 | ### Running Queries 286 | 287 | To run a query, use the `arachne.aristotle.query/run` function. This function takes a graph, an (optional) binding vector, a query, and (optionally) a map of variable bindings which serve as query inputs. 288 | 289 | If a binding vector is given, results will be returned as a set of tuples, one for each unique binding of the variables in the binding vector. 290 | 291 | If no binding vector is supplied, results will be returned as a sequence of query solutions, with each solution represented as a map of the variables it binds. In this case, solutions may not be unique (unless the query specifically inclues a `:distinct` operation.) 292 | 293 | Some examples follow: 294 | 295 | #### Sample: simple query 296 | 297 | ```clojure 298 | (require '[arachne.aristotle.query :as q]) 299 | 300 | (q/run my-graph '[:bgp [:example/luke :foaf/knows ?person] 301 | [?person :foaf/name ?name]]) 302 | ``` 303 | 304 | This query is a single pattern match (using a "basic graph pattern" or "bgp"), binding the `:foaf/name` property of each entity that is the subject of `:foaf/knows` for an entity identified by `:example/luke`. 305 | 306 | An example of the results that might be returned by this query is: 307 | 308 | ```clojure 309 | ({?person ?name "Jim"}, 310 | {?person ?name "Sara"}, 311 | {?person ?name "Jules"}) 312 | ``` 313 | 314 | #### Sample: simple query with result binding 315 | 316 | This is the same query, but using a binding vector 317 | 318 | ```clojure 319 | (q/run my-graph '[?name] 320 | '[:bgp [:example/luke :foaf/knows ?person] 321 | [?person :foaf/name ?name]]) 322 | ``` 323 | In this case, results would look like: 324 | 325 | ```clojure 326 | #{["Jim"] 327 | ["Sara"] 328 | ["Jules"]} 329 | ``` 330 | 331 | #### Sample: query with filtering expression 332 | 333 | This example expands on the previous query, using a `:filter` operation with an expression to only return acquaintances above the age of 18: 334 | 335 | ```clojure 336 | (q/run my-graph '[?name] 337 | '[:filter (< 18 ?age) 338 | '[:bgp [:example/luke :foaf/knows ?person] 339 | [?person :foaf/name ?name] 340 | [?person :foaf/age ?age]]]) 341 | ``` 342 | 343 | #### Sample: providing inputs 344 | 345 | This example is the same as those above, except instead of hardcoding the base individual as `:example/luke`, the starting individual is bound in a separate binding map provided to `q/run`. 346 | 347 | ```clojure 348 | (q/run my-graph '[?name] 349 | [:bgp [?individual :foaf/knows ?person] 350 | [?person :foaf/name ?name]] 351 | '{?individual :example/luke}) 352 | ``` 353 | 354 | It is also possible to bind multiple possibilities for the value of `?individual`: 355 | 356 | ```clojure 357 | (q/run my-graph '[?name] 358 | [:bgp [?individual :foaf/knows ?person] 359 | [?person :foaf/name ?name]] 360 | '{?individual #{:example/luke 361 | :example/carin 362 | :example/dan}}) 363 | ``` 364 | 365 | This will find the names of all persons who are known by Luke, Carin OR Dan. 366 | 367 | ### Precompiled Queries 368 | 369 | Queries can also be precompiled into a Jena Operation object, meaning they do not need to be parsed, interpreted, and optimized again every time they are invoked. To precompile a query, use the `arachne.aristotle.query/build` function: 370 | 371 | ```clojure 372 | (def friends-q (q/build '[:bgp [?individual :foaf/knows ?person] 373 | [?person :foaf/name ?name]])) 374 | ``` 375 | 376 | You can then use the precompiled query object (bound in this case to `friends-q` in calls to `arachne.aristotle.query/run`: 377 | 378 | ```clojure 379 | (q/run my-graph friends-q '{?individual :example/luke}) 380 | ``` 381 | 382 | The results will be exactly the same as using the inline version. 383 | 384 | ## Validation 385 | 386 | One common use case is to take a given Graph and "validate" it, ensuring its internal consistency (including whether entities in it conform to any OWL or RDFS schema that is present.) 387 | 388 | To do this, run the `arachne.aristotle.validation/validate` function. Passed only a graph, it will return any errors returned by the Jena Reasoner that was used when constructing the graph. The `:simple` reasoner will never return any errors, the `:jena-mini` reasoner will return OWL inconsistencies, etc. 389 | 390 | If validation is successfull, the validator will return nil or an empty list. If there were any errors, each error will be returned as a map containing details about the specific error type. 391 | 392 | #### Closed-World validation 393 | 394 | The built-in reasoners use the standard open-world assumption of RDF and OWL. This means that many scenarios that would intuitively be "invalid" to a human (such as a missing min-cardinality attribute) will not be identified, because the reasoner alwas operates under the assumption that it doesn't yet know all the facts. 395 | 396 | However, for certain use cases, it can be desirable to assert that yes, the graph actually does contain all pertinent facts, and that we want to make some assertions based on what the graph *actually* knows at a given moment, never mind what facts may be added in the future. 397 | 398 | To do this, you can pass additional validator functions to `validate`, providing a sequence of optional validators as a second argument. 399 | 400 | Each of these validator functions takes a graph as its argument, and returns a sequence of validation error maps. An empty sequence implies that the graph is valid. 401 | 402 | The "min-cardinality" situation mentioned above has a built in validator, `arachne.aristotle.validators/min-cardinality`. It works by running a SPARQL query on the provided graph that detects if any min-cardinality attributes are missing from entities known to be of an OWL class where they are supposed to be present. 403 | 404 | To use it, just provide it in the list of custom validators passed to `validate`: 405 | 406 | ```clojure 407 | (v/validate m [v/min-cardinality]) 408 | ``` 409 | 410 | This will return the set not only of built in OWL validation errors, but also any min-cardinality violations that are discovered. 411 | 412 | Of course, you can provide any additional validator functions as well. 413 | -------------------------------------------------------------------------------- /test/foaf.rdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 29 | 30 | 31 | 32 | 35 | 36 | 37 | 38 | 39 | 40 | 42 | 43 | 44 | 45 | 46 | Label Property 47 | A foaf:LabelProperty is any RDF property with texual values that serve as labels. 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 172 | 173 | 174 | 175 | 176 | 177 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | --------------------------------------------------------------------------------