├── .github ├── dependabot.yml └── workflows │ └── test.yml ├── .gitignore ├── LICENSE ├── README.md ├── benchmark ├── finland-aid.csv ├── finland-aid.xml ├── qald6t3-train-v1.2.csv ├── qald6t3-train-v1.2.xml ├── qald6t3-train-v1.3-ner.csv ├── qald6t3-train.csv └── qald6t3-train.xml ├── eclipse-format-cubeqa.xml ├── pom.xml └── src ├── main ├── java │ ├── de │ │ └── konradhoeffner │ │ │ └── commons │ │ │ ├── ListTree.java │ │ │ ├── Pair.java │ │ │ ├── StopWatch.java │ │ │ ├── Streams.java │ │ │ ├── TSVReader.java │ │ │ └── package-info.java │ └── org │ │ ├── aksw │ │ ├── cubeqa │ │ │ ├── Algorithm.java │ │ │ ├── AnswerType.java │ │ │ ├── Config.java │ │ │ ├── Cube.java │ │ │ ├── CubeSparql.java │ │ │ ├── Files.java │ │ │ ├── Replacer.java │ │ │ ├── StanfordTrees.java │ │ │ ├── StopWatches.java │ │ │ ├── Stopwords.java │ │ │ ├── benchmark │ │ │ │ ├── Benchmark.java │ │ │ │ ├── DataType.java │ │ │ │ ├── Nodes.java │ │ │ │ ├── NormalizingStringSet.java │ │ │ │ ├── Performance.java │ │ │ │ ├── Question.java │ │ │ │ └── package-info.java │ │ │ ├── detector │ │ │ │ ├── Aggregate.java │ │ │ │ ├── AggregateDetector.java │ │ │ │ ├── AggregateMapping.java │ │ │ │ ├── Detector.java │ │ │ │ ├── HalfInfiniteIntervalDetector.java │ │ │ │ ├── InPlaceDetector.java │ │ │ │ ├── InYearDetector.java │ │ │ │ ├── IntervalType.java │ │ │ │ ├── PerTimeDetector.java │ │ │ │ ├── TopDetector.java │ │ │ │ └── package-info.java │ │ │ ├── index │ │ │ │ ├── CubeIndex.java │ │ │ │ ├── Index.java │ │ │ │ ├── LabelIndex.java │ │ │ │ ├── Similarity.java │ │ │ │ ├── StringIndex.java │ │ │ │ └── package-info.java │ │ │ ├── package-info.java │ │ │ ├── property │ │ │ │ ├── ComponentProperty.java │ │ │ │ ├── PropertyType.java │ │ │ │ ├── package-info.java │ │ │ │ └── scorer │ │ │ │ │ ├── DatatypePropertyScorer.java │ │ │ │ │ ├── MultiSetScorer.java │ │ │ │ │ ├── NumericScorer.java │ │ │ │ │ ├── ObjectPropertyScorer.java │ │ │ │ │ ├── ParseScorer.java │ │ │ │ │ ├── ScoreResult.java │ │ │ │ │ ├── Scorer.java │ │ │ │ │ ├── Scorers.java │ │ │ │ │ ├── StringScorer.java │ │ │ │ │ └── temporal │ │ │ │ │ └── TemporalScorer.java │ │ │ ├── rdf │ │ │ │ ├── DataCube.java │ │ │ │ └── Owl.java │ │ │ ├── restriction │ │ │ │ ├── IntervalRestriction.java │ │ │ │ ├── Restriction.java │ │ │ │ ├── RestrictionWithPhrase.java │ │ │ │ ├── TopRestriction.java │ │ │ │ ├── UriRestriction.java │ │ │ │ ├── ValueRestriction.java │ │ │ │ └── package-info.java │ │ │ ├── scripts │ │ │ │ ├── Csv2Qald.java │ │ │ │ ├── EvaluateCubeIndex.java │ │ │ │ ├── EvaluateQBench1.java │ │ │ │ ├── EvaluateQald6T3Test.java │ │ │ │ ├── EvaluateQald6T3Train.java │ │ │ │ ├── FillCubeIndex.java │ │ │ │ ├── QaldInQaldOut.java │ │ │ │ ├── Service.java │ │ │ │ └── package-info.java │ │ │ └── template │ │ │ │ ├── Fragment.java │ │ │ │ ├── GreedyTemplator.java │ │ │ │ ├── Match.java │ │ │ │ ├── StanfordNlp.java │ │ │ │ ├── Template.java │ │ │ │ ├── Templator.java │ │ │ │ ├── WeightedTemplator.java │ │ │ │ └── package-info.java │ │ └── openqa │ │ │ └── component │ │ │ └── answerformulation │ │ │ └── queryparser │ │ │ └── impl │ │ │ ├── CubeQaQueryParser.java │ │ │ ├── CubeQaQueryParserFactory.java │ │ │ └── package-info.java │ │ └── kohsuke │ │ └── args4j │ │ └── MapParser.java └── resources │ ├── META-INF │ ├── org.aksw.openqa.component.answerformulation.queryparser.impl.CubeQaQueryParser.inf │ └── services │ │ ├── org.aksw.openqa.component.answerformulation.IQueryParserFactory │ │ └── org.aksw.openqa.component.answerformulation.queryparser.impl.CubeQaQueryParser.ini │ ├── aggregatemapping.tsv │ ├── benchmark │ ├── finland-aid.csv │ ├── finland-aid.xml │ ├── qald6t3-train-v1.1.csv │ ├── qald6t3-train-v1.1.xml │ ├── qald6t3-train-v1.2.csv │ └── qald6t3-train-v1.2.xml │ ├── finland-aid │ └── manuallabels.tsv │ ├── logback.xml │ ├── questions-finnland-aid.txt │ └── questions.txt └── test └── java └── org ├── aksw └── cubeqa │ ├── AggregateMappingTest.java │ ├── AlgorithmTest.java │ ├── AnswerTypeTest.java │ ├── ComponentPropertyTest.java │ ├── CubeSparqlTest.java │ ├── CubeTest.java │ ├── JenaNanBugTest.java │ ├── LoggingTest.java │ ├── ParserTest.java │ ├── ReplacerTest.java │ ├── benchmark │ ├── BenchmarkTest.java │ └── PerformanceTest.java │ ├── detector │ ├── AggregateDetectorTest.java │ ├── HalfInfiniteIntervalDetectorTest.java │ ├── InPlaceDetectorTest.java │ ├── InYearDetectorTest.java │ ├── PerTimeDetectorTest.java │ ├── QuestionWordDetectorTest.java │ └── TopDetectorTest.java │ ├── index │ ├── LabelIndexTest.java │ ├── SimilarityTest.java │ └── StemmerTest.java │ ├── property │ └── scorer │ │ ├── DateScorerTest.java │ │ ├── NumericScorerTest.java │ │ ├── ObjectPropertyScorerTest.java │ │ ├── ScorersTest.java │ │ ├── StringScorerTest.java │ │ └── temporal │ │ └── TemporalScorerTest.java │ └── template │ ├── CubeTemplateTest.java │ ├── CubeTemplatorNewTest.java │ └── StanfordNlpTest.java └── kohsuke └── args4j └── MapParserTest.java /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 2 | version: 2 3 | updates: 4 | - package-ecosystem: "github-actions" 5 | directory: "/.github/workflows" 6 | assignees: ["KonradHoeffner"] 7 | schedule: 8 | interval: "daily" 9 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Unit Tests 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - uses: actions/checkout@v4 11 | - name: Set up JDK 11 12 | uses: actions/setup-java@v4 13 | with: 14 | java-version: '11' 15 | distribution: 'adopt' 16 | - uses: actions/cache@v4 17 | with: 18 | path: ~/.m2/repository 19 | key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} 20 | restore-keys: | 21 | ${{ runner.os }}-maven- 22 | - name: Build with Maven 23 | run: mvn --batch-mode verify 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /src/main/java/org/aksw/cubeqa/scripts/TestRandomStuff.java 2 | src/main/java/org/aksw/cubeqa/scripts/Csv2Qald.java 3 | benchmark 4 | *.class 5 | *~ 6 | .idea 7 | # Mobile Tools for Java (J2ME) 8 | .mtj.tmp/ 9 | write.lock 10 | # testing random things 11 | Test.java 12 | 13 | # Package Files # 14 | *.jar 15 | *.war 16 | *.ear 17 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 18 | hs_err_pid* 19 | .claspath 20 | .classpath 21 | .project 22 | .settings 23 | cache 24 | *.log 25 | target 26 | *.log.* 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CubeQA—Question Answering on Statistical Linked Data 2 | [![test](https://github.com/AskNowQA/cubeqa/actions/workflows/test.yml/badge.svg)](https://github.com/AskNowQA/cubeqa/actions/workflows/test.yml) 3 | [![License: GNU GPLv3](https://img.shields.io/badge/license-GPL-blue)](LICENSE) 4 | [![JavaDoc](https://img.shields.io/badge/javadoc-here-green)](https://konradhoeffner.github.io/cubeqa) 5 | 6 | ## Abstract 7 | As an increasing amount of statistical data is published as RDF, intuitive ways of satisfying information needs and getting new insights out of this type of data becomes increasingly important. 8 | Question answering systems provide intuitive access to data by translating natural language queries into SPARQL, which is the native query language of RDF knowledge bases. 9 | Statistical data, however, is structurally very different from other data and cannot be queried using existing approaches. 10 | Building upon a question corpus established in previous work, we created a benchmark for evaluating questions on statistical Linked Data in order to evaluate statistical question answering algorithms and to stimulate further research. 11 | Furthermore, we designed a question answering algorithm for statistical data, which covers a wide range of question types. 12 | To our knowledge, this is the first question answering approach for statistical RDF data and could open up a new research area. 13 | Apart from providing evaluation results, we discuss future challenges in this field. 14 | 15 | ## Requirements 16 | * CubeQA 1.0 requires **Java 11**, Git and Maven 3 installed. 17 | * further versions may requirer higher Java versions. 18 | * Clone the project via "`git clone https://github.com/AKSW/cubeqa.git`" to get the current state. 19 | * You may checkout release 1.0 for a stable version that runs on Java 11. 20 | 21 | ### IDE Setup 22 | If you use an IDE, you also need to download and execute lombok.jar (doubleclick it, or run java -jar lombok.jar). Follow instructions. 23 | That is because CubeQA uses [Project Lombok](http://projectlombok.org/), which removes much boilerplate from Java. 24 | 25 | ## Benchmark 26 | CubeQA contains a benchmark ([View Benchmark](https://github.com/AKSW/cubeqa/tree/master/benchmark/)) that runs on 50 datasets of LinkedSpending ([Download](https://github.com/KonradHoeffner/linkedspending/releases/download/data-qbench2datasets/qbench2datasets.zip) | [Browse LinkedSpending](https://linkedspending.aksw.org/)). 27 | The benchmark source package is [`org.aksw.cubeqa.benchmark`](https://github.com/AKSW/cubeqa/tree/master/src/main/java/org/aksw/cubeqa/benchmark). 28 | 29 | ### Run the Evaluation yourself 30 | We believe that good science should be open and reproducible. Feel free to verify our claims by running our evaluation yourself. Please [contact us](mailto:konrad.hoeffner@uni-leipzig.de?subject=CubeQA%20Evaluation&body=Dear%20Konrad,) if you encounter issues. 31 | 32 | * run the evaluation main classes e.g. for QALD6 Task 3 training set via `mvn compile exec:java -Dexec.mainClass="org.aksw.cubeqa.scripts.EvaluateQald6T3Train"`. 33 | * You will see the results on the console and also in the file `benchmark/qbench.csv`. 34 | 35 | The evaluation code and the JUnit tests are preconfigured to use the SPARQL endpoint but but that is not active anymore. 36 | You can install and load your own SPARQL endpoint and change the configuration to use your own endpoint as described below. 37 | 38 | #### Load the Datasets into your own Virtuoso Endpoint 39 | * install [OpenLink Virtuoso](http://virtuoso.openlinksw.com/) (a different triple store may work as well) on your machine and load the datasets (see below) 40 | * download the [datasets](https://github.com/KonradHoeffner/linkedspending/releases/download/data-qbench2datasets/qbench2datasets.zip) 41 | * upload the [LinkedSpending ontology](https://raw.githubusercontent.com/KonradHoeffner/linkedspending/master/schema/ontology.ttl) into graph and add that graph to the graph group 42 | * upload each .nt file into graph `http://linkedspending.aksw.org/` and add them to graph group 43 | * you can automate this with the `virtloadbench` script adapted to your use case 44 | * then go to the folder containing the dataset ntriples files and execute the shell command `ls | sed "s|\\.nt||" | xargs -I @ virtloadbench @.nt http://linkedspending.aksw.org/@` 45 | * alternative virtload scripts are at 46 | * in http:///conductor add prefixes qb: , ls: and lso: 47 | * set the URI, such as "localhost:8890" (default) in org.aksw.cubeqa.CubeSparql.java. 48 | * start Virtuoso 49 | 50 | 51 | ## Graphical User Interface 52 | CubeQA can be used as a plugin for [openQA](https://bitbucket.org/emarx/openqa/wiki/FAQ), which offers a graphical user interface. 53 | 54 | ## Warning: Research Prototype 55 | While CubeQA is implemented in Java using Maven so it theoretically should run everywhere, it is under development, using snapshots and generally 56 | of the status of a research prototype so I don't give any guarantee of it successfully running on your machine but I'm happy to help with your questions (best to open a new issue). 57 | CubeQA was part of my PhD thesis and is not my current research topic, so I can perform maintenance only very rarely. 58 | While I do plan on creating a version 2 eventually, this will just be a quick move to Java 16. 59 | If you want to know more about current research, I recommend reading "R. Cocco, M. Atzori, and C. Zaniolo. Machine learning of SPARQL templates for Question Answering over LinkedSpending. In 2019 IEEE 28th International Conference on Enabling Technologies: 60 | Infrastructure for Collaborative Enterprises (WETICE), pages 156–161, 06 2019." ([IEEE page](https://ieeexplore.ieee.org/document/8795383), [PDF](http://ceur-ws.org/Vol-2400/paper-22.pdf)). 61 | 62 | ## License 63 | The source code of CubeQA is freely available under the GPLv3 license (see the LICENSE file), which requires you to publish derivative works under the same license. If this creates a licensing conflict or for commercial usage, please [contact us](mailto:konrad.hoeffner@uni-leipzig.de?subject=CubeQA%20License&body=Dear%20Konrad,). 64 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | org.aksw.cubeqa 5 | cubeqa 6 | 1.0.0 7 | 8 | 11 9 | 11 10 | UTF-8 11 | UTF-8 12 | 5.5.5 13 | 2.0.7 14 | 2.5 15 | 16 | 17 | 18 | 19 | com.github.mpkorstanje 20 | simmetrics-core 21 | 4.1.1 22 | 23 | 24 | args4j 25 | args4j 26 | 2.33 27 | 28 | 29 | org.junit.jupiter 30 | junit-jupiter-api 31 | 5.9.2 32 | test 33 | 34 | 35 | org.apache.commons 36 | commons-csv 37 | 1.10.0 38 | 39 | 40 | joda-time 41 | joda-time 42 | 2.12.4 43 | 44 | 45 | org.apache.lucene 46 | lucene-analyzers-common 47 | ${lucene.version} 48 | 49 | 50 | org.apache.lucene 51 | lucene-suggest 52 | ${lucene.version} 53 | 54 | 55 | org.apache.lucene 56 | lucene-queryparser 57 | ${lucene.version} 58 | 59 | 60 | org.apache.lucene 61 | lucene-codecs 62 | ${lucene.version} 63 | 64 | 65 | com.google.guava 66 | guava 67 | 31.1-jre 68 | 69 | 70 | org.projectlombok 71 | lombok 72 | 1.18.26 73 | 74 | 75 | org.apache.commons 76 | commons-collections4 77 | 4.4 78 | 79 | 80 | org.apache.jena 81 | apache-jena-libs 82 | pom 83 | 4.4.0 84 | 85 | 86 | log4j 87 | log4j 88 | 89 | 90 | slf4j-log4j12 91 | org.slf4j 92 | 93 | 94 | 95 | 96 | edu.stanford.nlp 97 | stanford-corenlp 98 | 4.5.3 99 | 100 | 101 | xml-apis 102 | xml-apis 103 | 104 | 105 | 106 | 107 | edu.stanford.nlp 108 | stanford-corenlp 109 | 4.5.3 110 | models 111 | 112 | 113 | org.slf4j 114 | slf4j-api 115 | 116 | 117 | ch.qos.logback 118 | logback-classic 119 | 1.4.6 120 | 121 | 122 | org.glassfish 123 | javax.json 124 | 1.1.4 125 | 126 | 127 | org.aksw.openqa 128 | engine 129 | 0.0.7-beta 130 | 131 | 132 | 133 | 134 | 135 | 136 | org.slf4j 137 | slf4j-api 138 | ${slf4j.version} 139 | 140 | 142 | 143 | org.apache.httpcomponents 144 | httpclient 145 | 4.5.14 146 | 147 | 148 | 149 | 150 | 151 | 152 | maven.aksw.internal 153 | University Leipzig, AKSW Maven2 Repository 154 | https://maven.aksw.org/repository/internal 155 | 156 | 163 | 164 | UK 165 | UK Central 166 | https://repo1.maven.org/maven2/uk/ 167 | 168 | 169 | 170 | 171 | 172 | maven-assembly-plugin 173 | 174 | 175 | jar-with-dependencies 176 | 177 | 178 | 179 | 180 | 181 | 182 | -------------------------------------------------------------------------------- /src/main/java/de/konradhoeffner/commons/ListTree.java: -------------------------------------------------------------------------------- 1 | package de.konradhoeffner.commons; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.stream.Collectors; 6 | 7 | public class ListTree 8 | { 9 | public final T item; 10 | 11 | public ListTree (T item) {this.item=item;} 12 | 13 | public List> children = new ArrayList<>(); 14 | 15 | /** @return all nodes in the tree */ 16 | public List> nodes() 17 | { 18 | List> nodes = new ArrayList<>(children); 19 | for (ListTree child: children) {nodes.addAll(child.nodes());} 20 | return nodes; 21 | } 22 | 23 | /** @return the items of all nodes in the tree */ 24 | public List items() 25 | { 26 | return nodes().stream().map(n->n.item).collect(Collectors.toList()); 27 | } 28 | 29 | public void add(T item) {children.add(new ListTree<>(item));} 30 | 31 | //public boolean isEmpty() {return } 32 | } -------------------------------------------------------------------------------- /src/main/java/de/konradhoeffner/commons/Pair.java: -------------------------------------------------------------------------------- 1 | package de.konradhoeffner.commons; 2 | 3 | /** Generic class for Pairs. */ 4 | public class Pair 5 | { 6 | public final A a; 7 | public final B b; 8 | 9 | public A getA() {return a;} 10 | public B getB() {return b;} 11 | 12 | public Pair(A a, B b) 13 | { 14 | super(); 15 | this.a = a; 16 | this.b = b; 17 | } 18 | 19 | @Override public String toString() 20 | { 21 | return "(" + a + ", " + b + ')'; 22 | } 23 | 24 | @Override public int hashCode() 25 | { 26 | final int prime = 31; 27 | int result = 1; 28 | result = prime * result + ((a == null) ? 0 : a.hashCode()); 29 | result = prime * result + ((b == null) ? 0 : b.hashCode()); 30 | return result; 31 | } 32 | 33 | @Override public boolean equals(Object obj) 34 | { 35 | if (this == obj) return true; 36 | if (obj == null) return false; 37 | if (!(obj instanceof Pair)) return false; 38 | @SuppressWarnings("rawtypes") 39 | Pair other = (Pair) obj; 40 | if (a == null) 41 | { 42 | if (other.a != null) return false; 43 | } 44 | else if (!a.equals(other.a)) return false; 45 | if (b == null) 46 | { 47 | if (other.b != null) return false; 48 | } 49 | else if (!b.equals(other.b)) return false; 50 | return true; 51 | } 52 | } -------------------------------------------------------------------------------- /src/main/java/de/konradhoeffner/commons/StopWatch.java: -------------------------------------------------------------------------------- 1 | package de.konradhoeffner.commons; 2 | 3 | /** @author Konrad Höffner, original version taken from Corey Goldberg (free license, see http://www.goldb.org/stopwatchjava.html). */ 4 | public class StopWatch 5 | { 6 | private long startTime = 0; 7 | private long stopTime = 0; 8 | private boolean running = false; 9 | 10 | public void start() { 11 | this.startTime = System.currentTimeMillis(); 12 | this.running = true; 13 | } 14 | 15 | 16 | public void stop() { 17 | this.stopTime = System.currentTimeMillis(); 18 | this.running = false; 19 | } 20 | 21 | //elaspsed time in milliseconds 22 | public long getElapsedTime() { 23 | long elapsed; 24 | if (running) { 25 | elapsed = (System.currentTimeMillis() - startTime); 26 | } 27 | else { 28 | elapsed = (stopTime - startTime); 29 | } 30 | return elapsed; 31 | } 32 | 33 | 34 | //elaspsed time in seconds 35 | public long getElapsedTimeSecs() { 36 | long elapsed; 37 | if (running) { 38 | elapsed = ((System.currentTimeMillis() - startTime) / 1000); 39 | } 40 | else { 41 | elapsed = ((stopTime - startTime) / 1000); 42 | } 43 | return elapsed; 44 | } 45 | 46 | @Override 47 | public String toString() 48 | { 49 | return String.valueOf(getElapsedTime()); 50 | } 51 | 52 | 53 | //sample usage 54 | public static void main(String[] args) { 55 | StopWatch s = new StopWatch(); 56 | s.start(); 57 | //code you want to time goes here 58 | s.stop(); 59 | System.out.println("elapsed time in milliseconds: " + s.getElapsedTime()); 60 | } 61 | } -------------------------------------------------------------------------------- /src/main/java/de/konradhoeffner/commons/Streams.java: -------------------------------------------------------------------------------- 1 | package de.konradhoeffner.commons; 2 | 3 | import java.util.Iterator; 4 | import java.util.Spliterator; 5 | import java.util.Spliterators; 6 | import java.util.concurrent.Callable; 7 | import java.util.stream.Stream; 8 | import java.util.stream.StreamSupport; 9 | 10 | /**Streams from iterators.*/ 11 | public class Streams 12 | { 13 | static public Stream stream(Iterator it) 14 | { 15 | Stream targetStream = StreamSupport.stream( 16 | Spliterators.spliteratorUnknownSize(it, Spliterator.ORDERED),false); 17 | return targetStream; 18 | } 19 | 20 | public static V propagate(Callable callable){ 21 | try { 22 | return callable.call(); 23 | } catch (Exception e) { 24 | throw new RuntimeException(e); 25 | } 26 | } 27 | } -------------------------------------------------------------------------------- /src/main/java/de/konradhoeffner/commons/TSVReader.java: -------------------------------------------------------------------------------- 1 | package de.konradhoeffner.commons; 2 | 3 | import java.io.Closeable; 4 | import java.io.File; 5 | import java.io.FileNotFoundException; 6 | import java.io.IOException; 7 | import java.io.InputStream; 8 | import java.util.Scanner; 9 | 10 | /** 11 | * Reader for the tab separated values format (a basic table format without escapings or anything 12 | * where the rows are separated by tabulators). 13 | **/ 14 | public class TSVReader implements Closeable 15 | { 16 | final Scanner in; 17 | String peekLine = null; 18 | 19 | public TSVReader(InputStream stream) throws FileNotFoundException 20 | { 21 | in = new Scanner(stream); 22 | } 23 | 24 | /** Constructs a new TSVReader which produces values scanned from the specified input stream. */ 25 | public TSVReader(File f) throws FileNotFoundException 26 | { 27 | in = new Scanner(f); 28 | } 29 | 30 | public boolean hasNextTokens() 31 | { 32 | if (peekLine != null) return true; 33 | if (!in.hasNextLine()) { return false; } 34 | String line = in.nextLine().trim(); 35 | if (line.isEmpty()) { return hasNextTokens(); } 36 | this.peekLine = line; 37 | return true; 38 | } 39 | 40 | public String[] nextTokens() 41 | { 42 | if (!hasNextTokens()) return null; 43 | String[] tokens = peekLine.split("\t+"); 44 | peekLine = null; 45 | return tokens; 46 | } 47 | 48 | @Override public void close() throws IOException 49 | { 50 | in.close(); 51 | } 52 | } -------------------------------------------------------------------------------- /src/main/java/de/konradhoeffner/commons/package-info.java: -------------------------------------------------------------------------------- 1 | /**General utility methods collection (not all self developed). */ 2 | package de.konradhoeffner.commons; -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/Algorithm.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa; 2 | 3 | import org.aksw.cubeqa.template.Template; 4 | import org.aksw.cubeqa.template.WeightedTemplator; 5 | 6 | /** Calls the templator which does the main work. */ 7 | public class Algorithm 8 | { 9 | 10 | public Template template(String cubeName, String question) 11 | { 12 | Template template = new WeightedTemplator(Cube.getInstance(cubeName)).buildTemplate(question); 13 | return template; 14 | } 15 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/AnswerType.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa; 2 | 3 | import java.util.*; 4 | import de.konradhoeffner.commons.Pair; 5 | 6 | public enum AnswerType 7 | { 8 | UNCOUNTABLE,COUNT,COUNTABLE,TEMPORAL,AFFIRMATIVE,LOCATION,ENTITY; 9 | 10 | /** map of lower case question words to expected answer types*/ 11 | static public Map> ofQuestionWord = new HashMap<>(); 12 | 13 | static 14 | { 15 | // what and which debatable 16 | ofQuestionWord.put("what", EnumSet.of(UNCOUNTABLE,COUNTABLE,TEMPORAL,LOCATION,ENTITY)); 17 | ofQuestionWord.put("which", EnumSet.of(TEMPORAL,LOCATION,ENTITY)); 18 | ofQuestionWord.put("how many",EnumSet.of(COUNTABLE,COUNT)); 19 | ofQuestionWord.put("how much",EnumSet.of(UNCOUNTABLE)); 20 | ofQuestionWord.put("when", EnumSet.of(TEMPORAL)); 21 | Arrays.asList("is","do","are","where","was","did").stream() 22 | .forEach(w->ofQuestionWord.put(w, EnumSet.of(AFFIRMATIVE))); 23 | ofQuestionWord.put("where", EnumSet.of(LOCATION)); 24 | } 25 | 26 | static public EnumSet ofQuestion(String question) 27 | { 28 | return ofQuestionWord.get( 29 | ofQuestionWord.keySet().stream().filter(w->question.toLowerCase().startsWith(w)).findFirst().orElse("what")); // default to "what", which can be anything 30 | } 31 | 32 | static public Optional>> eatAndQuestionWord(String question) 33 | { 34 | Optional questionWord = ofQuestionWord.keySet().stream().filter(w->question.toLowerCase().startsWith(w)).findFirst(); 35 | if(!questionWord.isPresent()) {return Optional.empty();} 36 | return Optional.of(new Pair<>(questionWord.get(), ofQuestionWord.get(questionWord.get()))); 37 | } 38 | 39 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/Config.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa; 2 | 3 | import java.io.File; 4 | 5 | import org.kohsuke.args4j.Option; 6 | 7 | public enum Config 8 | { 9 | INSTANCE; 10 | 11 | @Option(name="-folder",metaVar="",usage="Output folder where CubeQA will put its cache and other files. The default value is local to the program if running outside a jar, otherwise $tempdir/cubeqa.") 12 | public File folder; 13 | 14 | @Option(name="-endpoint",usage="SPARQL endpoint URL") 15 | public String endpoint = "http://linkedspending.aksw.org/sparql"; 16 | 17 | @Option(name="-graphs",usage="space separated graph list") 18 | public String[] graphs = {"http://linkedgeodata.org/","http://linkedspending.aksw.org/","http://dbpedia.org"}; 19 | 20 | @Option(name="-intervalMinSimilarity",usage="Sets the minimum similarity for named entity detection in intervals") 21 | public double intervalMinSimilarity = 0.3; 22 | 23 | @Option(name="-indexNonExactMatchMinLength") 24 | public int indexNonExactMatchMinLength = 6; 25 | 26 | @Option(name="-indexMinLuceneScore") 27 | public float indexMinLuceneScore = 3; 28 | 29 | @Option(name="-indexMinScore") 30 | public double indexMinScore = 0.4; 31 | 32 | public double placeMinScore = 0.7; 33 | 34 | @Option(name="-scorerPropertyNameMinScore") 35 | public double scorerPropertyNameMinScore = 0.6; 36 | 37 | public enum IndexQueries {EXACT,FUZZY,ANALYZED,BOTH} 38 | 39 | @Option(name="-indexQueries") 40 | public IndexQueries indexQueries = IndexQueries.BOTH; 41 | 42 | @Option(name="-boostTemporal") 43 | public double boostTemporal = 0.99; 44 | @Option(name="-boostNumeric") 45 | public double boostNumeric = 0.98; 46 | @Option(name="-boostString") 47 | public double boostString = 0.95; 48 | 49 | // change may require cache deletion to take effect 50 | @Option(name="-useManualLabels") 51 | public boolean useManualLabels = false; 52 | 53 | @Option(name="-useCubeCache") 54 | public boolean USE_CUBE_CACHE = true; 55 | 56 | public boolean removeStopWords = true; 57 | 58 | public boolean useDefaultAnswerProperty = true; 59 | 60 | /** For values which are only referenced by value, not by property name. 61 | Happens very often in practice (e.g. most people say "in 2010" and not "in the year of 2010") so I recommend to set the config parameter to true. */ 62 | public boolean findNamelessReferences = true; 63 | 64 | /** True, iff datasets from the benchmark are predetermined (algorithm doesn't have to search for it based on the query, leading to higher performance). */ 65 | public boolean givenDataSets= true; 66 | 67 | public boolean useAnswerTypes = true; 68 | 69 | // @Option(name="-indexDoNonExactMatch") 70 | // public boolean indexDoNonExactMatch = true; 71 | // 72 | // @Option(name="-indexDoAnalyzedMatching") 73 | // public boolean indexDoAnalyzedMatching = true; 74 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/Cube.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa; 2 | 3 | import java.util.*; 4 | import java.io.*; 5 | import org.aksw.cubeqa.property.ComponentProperty; 6 | import org.apache.commons.collections4.MultiValuedMap; 7 | import org.apache.commons.collections4.multimap.HashSetValuedHashMap; 8 | import org.apache.jena.query.QuerySolution; 9 | import org.apache.jena.query.ResultSet; 10 | import org.apache.jena.rdf.model.RDFNode; 11 | import de.konradhoeffner.commons.TSVReader; 12 | import lombok.RequiredArgsConstructor; 13 | import lombok.ToString; 14 | 15 | /** Represents an RDF Data Cube with its component properties */ 16 | @RequiredArgsConstructor 17 | @ToString(of="uri") 18 | public class Cube implements Serializable 19 | { 20 | private static final long serialVersionUID = 1L; 21 | 22 | public final String name; 23 | public final String uri; 24 | public final String label; 25 | public final String comment; 26 | 27 | // public final Set labels = new TreeSet(); 28 | public final Map properties; 29 | 30 | public final CubeSparql sparql; 31 | 32 | static Map instances = new HashMap<>(); 33 | /** manually created additional labels in case the original labels are not good enough*/ 34 | public final MultiValuedMap manualLabels; 35 | 36 | static private File cacheFolder = Files.localFolder("cache"); 37 | static {cacheFolder.mkdir();} 38 | 39 | static public Cube finlandAid() 40 | { 41 | return Cube.getInstance("finland-aid"); 42 | } 43 | 44 | static String extractName(RDFNode node) 45 | { 46 | String uri = node.asResource().getURI(); 47 | return uri.substring(uri.lastIndexOf("/")+1); 48 | } 49 | 50 | static public String linkedSpendingUri(String name) {return "http://linkedspending.aksw.org/instance/"+name;} 51 | static public String linkedSpendingCubeName(String uri) {return uri.replaceAll("http://linkedspending.aksw.org/instance/", "");} 52 | 53 | public String probablyUniqueAsciiId() 54 | { 55 | return uri.replaceAll("[^A-Za-z0-9]", ""); 56 | } 57 | 58 | private static File cubeFile(String cubeName) {return new File(cacheFolder, cubeName+".ser");} 59 | 60 | private static synchronized Optional loadCube(String cubeName) 61 | { 62 | File f = cubeFile(cubeName); 63 | try(ObjectInputStream in = new ObjectInputStream(new FileInputStream(f))) 64 | { 65 | return Optional.of((Cube)in.readObject()); 66 | } 67 | catch(InvalidClassException e) {f.delete();return Optional.empty();} 68 | catch (FileNotFoundException e) {return Optional.empty();} 69 | catch (ClassNotFoundException | IOException e) {throw new RuntimeException("Error loading cube file "+f,e);} 70 | } 71 | 72 | private void save() 73 | { 74 | System.setProperty("sun.io.serialization.extendedDebugInfo", "true"); 75 | synchronized(Cube.class) 76 | { 77 | try(ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(cubeFile(this.name)))) 78 | { 79 | out.writeObject(this); 80 | } 81 | catch (IOException e) {throw new RuntimeException(e);} 82 | } 83 | } 84 | 85 | public static synchronized Cube getInstance(String cubeName) 86 | { 87 | Cube c = instances.get(cubeName); 88 | if(c==null) 89 | { 90 | if(Config.INSTANCE.USE_CUBE_CACHE) 91 | { 92 | Optional loadedCube = loadCube(cubeName); 93 | if(loadedCube.isPresent()) 94 | { 95 | c = loadedCube.get(); 96 | instances.put(cubeName, c); 97 | return c; 98 | }; 99 | } 100 | String uri = linkedSpendingUri(cubeName); 101 | 102 | Map properties = new HashMap<>(); 103 | 104 | String cubeLabel = ""; 105 | String cubeComment = ""; 106 | String labelCommentQuery = "select * {<"+uri+"> rdfs:label ?label. <"+uri+"> rdfs:comment ?comment.}"; 107 | try 108 | { 109 | QuerySolution qsLcq = CubeSparql.getLinkedSpendingInstanceForName(cubeName).select(labelCommentQuery).nextSolution(); 110 | cubeLabel = qsLcq.contains("label")?qsLcq.get("label").asLiteral().getLexicalForm():""; 111 | cubeComment = qsLcq.contains("comment")?qsLcq.get("comment").asLiteral().getLexicalForm():""; 112 | } catch(NoSuchElementException e) {throw new RuntimeException("Error with query: "+labelCommentQuery,e);} 113 | 114 | String query = "select distinct ?p "+// //?type ?label "+ 115 | "from "+ 116 | "from "+ 117 | "{"+ 118 | " ls:"+cubeName+" qb:structure ?dsd. ?dsd qb:component ?comp."+ 119 | " {?comp qb:dimension ?p.} UNION {?comp qb:attribute ?p.} UNION {?comp qb:measure ?p.} "+ 120 | // " ?p a ?type. FILTER (?type != <"+RDF.Property.getURI()+"> && ?type != <"+DataModel.DataCube.ComponentProperty.getURI()+">)"+ 121 | // " OPTIONAL {?p rdfs:label ?label}"+ 122 | "}"; 123 | ResultSet rs = CubeSparql.getLinkedSpendingInstanceForName(cubeName).select(query); 124 | 125 | MultiValuedMap manualLabels = new HashSetValuedHashMap<>(); 126 | 127 | if(Config.INSTANCE.useManualLabels) 128 | { 129 | try(InputStream labelStream = Cube.class.getClassLoader().getResourceAsStream(cubeName+"/manuallabels.tsv")) 130 | { 131 | // if(labelStream==null) throw new RuntimeException("manual labels not found");// for testing 132 | if(labelStream!=null) 133 | { 134 | try(TSVReader reader = new TSVReader(labelStream)) 135 | { 136 | while(reader.hasNextTokens()) 137 | { 138 | String[] tokens = reader.nextTokens(); 139 | Arrays.stream(tokens, 1, tokens.length).forEach(label->manualLabels.put(tokens[0], label)); 140 | } 141 | } 142 | } 143 | } 144 | catch (IOException e) {throw new RuntimeException("Exception reading additional labels from tsv file.",e);} 145 | } 146 | // TODO: make the multi map unmodifiable 147 | // at this point, the properties are not yet initialized. This is done afterwards as the cube instance is needed for some ComponentProperty.getInstance() 148 | c = new Cube(cubeName,uri,cubeLabel,cubeComment, properties,CubeSparql.getLinkedSpendingInstanceForUri(uri),manualLabels); 149 | instances.put(cubeName, c); 150 | while(rs.hasNext()) 151 | { 152 | QuerySolution qs = rs.nextSolution(); 153 | 154 | // because of ComponentProperty's multiton pattern, having the same property multiple times is not a problem and in fact necessary for multiple labels 155 | String propertyUri = qs.get("p").asResource().getURI(); 156 | 157 | ComponentProperty property = ComponentProperty.getInstance(c, propertyUri);//, qs.get("type").asResource().getURI()); 158 | // only properties with scorer are useful for us 159 | properties.put(propertyUri, property); 160 | // if(qs.contains("label")) {property.labels.add(qs.get("label").asLiteral().getLexicalForm());} 161 | } 162 | } 163 | c.save(); 164 | return c; 165 | } 166 | 167 | // static Set fromEndpoint(String endpointUrl, String cubeName) 168 | // { 169 | // Set cubes = new HashSet<>(); 170 | // String query = "select ?qb ?id {?qb a qb:DataSet. }"; 171 | // return cubes; 172 | // } 173 | 174 | @Override public int hashCode() {return uri.hashCode();} 175 | 176 | @Override public boolean equals(Object obj) 177 | { 178 | if(!(obj instanceof Cube)) { 179 | return false; 180 | } 181 | return this.uri.equals(((Cube)obj).uri); 182 | } 183 | 184 | public ComponentProperty getDefaultAnswerProperty() 185 | { 186 | return ComponentProperty.getInstance(this,"http://linkedspending.aksw.org/ontology/"+name+"-amount"); 187 | } 188 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/CubeSparql.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.io.ByteArrayOutputStream; 6 | import java.io.Serializable; 7 | import org.apache.jena.query.*; 8 | import org.apache.jena.sparql.exec.http.QueryExecutionHTTP; 9 | import org.apache.jena.sparql.exec.http.QueryExecutionHTTPBuilder; 10 | import org.apache.jena.vocabulary.DCTerms; 11 | import org.apache.jena.vocabulary.RDFS; 12 | import org.apache.jena.vocabulary.XSD; 13 | 14 | import de.konradhoeffner.commons.StopWatch; 15 | import org.aksw.cubeqa.rdf.DataCube; 16 | 17 | /** Interface to SPARQL. */ 18 | public class CubeSparql implements Serializable 19 | { 20 | private static final long serialVersionUID = 1L; 21 | 22 | static private CubeSparql finlandAid = null; 23 | static public synchronized CubeSparql finlandAid() 24 | { 25 | if(finlandAid==null) {finlandAid = getLinkedSpendingInstanceForName("finland-aid");} 26 | return finlandAid; 27 | } 28 | 29 | public final String cubeUri; 30 | public final String prefixInstance; 31 | public final String prefixOntology; 32 | public final String superGraph; 33 | private final String endpoint; 34 | private final String prefixes; 35 | private List defaultGraphs = new ArrayList<>(); 36 | 37 | static public CubeSparql getLinkedSpendingInstanceForName(String cubeName) 38 | { 39 | return getLinkedSpendingInstanceForUri(Cube.linkedSpendingUri(cubeName)); 40 | } 41 | 42 | static public CubeSparql getLinkedSpendingInstanceForUri(String cubeUri) 43 | { 44 | CubeSparql cs = new CubeSparql(cubeUri, 45 | "http://linkedspending.aksw.org/instance/", 46 | "http://linkedspending.aksw.org/ontology/", 47 | "http://linkedspending.aksw.org/", 48 | // local Virtuoso SPARQL endpoint has a NAN bug 49 | // "http://localhost:8890/sparql"); 50 | // "http://linkedspending.aksw.org/sparql"); 51 | "http://cubeqa.aksw.org/sparql"); 52 | cs.defaultGraphs.add("http://linkedspending.aksw.org/ontology/"); 53 | cs.defaultGraphs.add("http://linkedspending.aksw.org/"+cubeUri.substring(cubeUri.lastIndexOf('/')+1)); 54 | return cs; 55 | } 56 | 57 | public CubeSparql(String cubeUri, String prefixInstance, String prefixOntology, String superGraph, String endpoint) 58 | { 59 | this.cubeUri=cubeUri; 60 | this.prefixInstance = prefixInstance; 61 | this.prefixOntology = prefixOntology; 62 | this.superGraph = superGraph; 63 | this.endpoint = endpoint; 64 | this.prefixes = "prefix dcterms: <"+DCTerms.getURI()+">\n" 65 | // +">\n prefix lso: <"+prefixOntology 66 | +"prefix : <"+prefixInstance+">\n" 67 | +"prefix ls: \n" 68 | +"prefix qb: <"+DataCube.BASE+">\n" 69 | +"prefix xsd: <"+XSD.NS+">\n" 70 | +"prefix rdfs: <"+RDFS.uri+">\n"; 71 | } 72 | 73 | String cubeUrl(String datasetName) {return prefixInstance+datasetName;} 74 | 75 | public boolean ask(String query) 76 | { 77 | StopWatch watch = StopWatches.INSTANCE.getWatch("sparql"); 78 | watch.start(); 79 | QueryExecutionHTTPBuilder builder = QueryExecutionHTTP.create().endpoint(endpoint).query(prefixes+query); 80 | defaultGraphs.forEach(builder::addDefaultGraphURI); 81 | try(QueryExecution qe = builder.build()) 82 | { 83 | return qe.execAsk(); 84 | } catch(Exception e) {throw new RuntimeException("Error on SPARQL ASK on endpoint "+endpoint+" with query:\n"+query,e);} 85 | finally {watch.stop();} 86 | } 87 | 88 | public ResultSetRewindable select(String query) 89 | { 90 | query = prefixes+query; 91 | StopWatch watch = StopWatches.INSTANCE.getWatch("sparql"); 92 | watch.start(); 93 | QueryExecutionHTTPBuilder builder = QueryExecutionHTTP.create().endpoint(endpoint); 94 | defaultGraphs.forEach(builder::addDefaultGraphURI); 95 | try(QueryExecution qe = builder.query(query).build()) 96 | { 97 | 98 | return ResultSetFactory.copyResults(qe.execSelect()); 99 | } catch(Exception e) {throw new RuntimeException("Error on SPARQL SELECT on endpoint "+endpoint+" with query:\n"+query,e);} 100 | finally {watch.stop();} 101 | } 102 | 103 | public static String suffix(String uri) 104 | { 105 | return uri.substring(Math.max(uri.lastIndexOf('/'),uri.lastIndexOf('#'))+1); 106 | } 107 | 108 | public static String jsonQueryResults(ResultSet rs) 109 | { 110 | ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); 111 | ResultSetFormatter.outputAsJSON(outputStream, ResultSetFactory.copyResults(rs)); 112 | return new String(outputStream.toByteArray()); 113 | } 114 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/Files.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa; 2 | 3 | import java.io.File; 4 | 5 | /** Determines the location of output files. Prefered location is cacheFolder config. */ 6 | public class Files 7 | { 8 | /** Returns Tests if this code runs from inside a jar file. 9 | * @return true if running inside a jar, otherwise false */ 10 | public static boolean isRunningInJar() 11 | { 12 | String className = Files.class.getName().replace('.', '/'); 13 | String classJar = Files.class.getResource("/" + className + ".class").toString(); 14 | return classJar.startsWith("jar:"); 15 | } 16 | 17 | /** 18 | * @param name the folder name 19 | * @return a folder local to the code, if running outside a jar, otherwise a folder in the temporary directory. 20 | * Warning: temporary directories may be deleted at system reboot. */ 21 | public static File localFolder(String name) 22 | { 23 | if(Config.INSTANCE.folder!=null) return new File(Config.INSTANCE.folder,"name"); 24 | return new File(new File(isRunningInJar()?System.getProperty("java.io.tmpdir"):"."),name); 25 | } 26 | 27 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/Replacer.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | import java.util.regex.Matcher; 6 | import java.util.regex.Pattern; 7 | 8 | public class Replacer 9 | { 10 | static private Map numbers = new HashMap<>(); 11 | 12 | static private Pattern pattern(String numberRef) 13 | { 14 | return Pattern.compile("([0-9]([.,][0-9]*)?) "+numberRef); 15 | } 16 | static 17 | { 18 | numbers.put(pattern("hundred"),100); 19 | numbers.put(pattern("thousand"),1000); 20 | numbers.put(pattern("million"),1000_000); 21 | numbers.put(pattern("billion"),1000_000_000); 22 | } 23 | 24 | public static String replace(String query) 25 | { 26 | for(Pattern p: numbers.keySet()) 27 | { 28 | Matcher m = p.matcher(query); 29 | while(m.find()) 30 | { 31 | double d = Double.valueOf(m.group(1).replace(',', '.')); 32 | query = query.replace(m.group(0), String.valueOf((int)(d*numbers.get(p)))); 33 | } 34 | } 35 | return query; 36 | } 37 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/StanfordTrees.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa; 2 | 3 | import edu.stanford.nlp.trees.Tree; 4 | import java.util.List; 5 | import java.util.Set; 6 | 7 | /** Utility class for stanford trees. 8 | * @author Konrad Höffner */ 9 | public class StanfordTrees 10 | { 11 | static public String phrase(Tree tree) {return tree.getLeaves().toString().replace(", ", " ").replaceAll("[\\[\\]]", "").trim();} 12 | 13 | // tree.remove is unsupported 14 | static public void removeChild(Tree tree, Tree child) 15 | { 16 | List children = tree.getChildrenAsList(); 17 | children.remove(child); 18 | tree.setChildren(children); 19 | } 20 | 21 | static public void removeChildren(Tree tree, Set children) 22 | { 23 | List allChildren = tree.getChildrenAsList(); 24 | allChildren.removeAll(children); 25 | tree.setChildren(allChildren); 26 | } 27 | 28 | static public void removeSubtree(Tree tree, Tree child) 29 | { 30 | List children = tree.getChildrenAsList(); 31 | children.remove(child); 32 | tree.setChildren(children); 33 | for(Tree subTree: tree.getChildrenAsList()) removeSubtree(subTree, child); 34 | } 35 | 36 | static public boolean isTag(Tree tree, String tag) {return tree.label().value().equals(tag);} 37 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/StopWatches.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | import de.konradhoeffner.commons.StopWatch; 6 | import org.apache.commons.collections4.MultiValuedMap; 7 | import org.apache.commons.collections4.multimap.HashSetValuedHashMap; 8 | import lombok.ToString; 9 | 10 | @ToString 11 | public enum StopWatches 12 | { 13 | INSTANCE; 14 | 15 | MultiValuedMap watches = new HashSetValuedHashMap<>(); 16 | 17 | public StopWatch getWatch(String category) 18 | { 19 | StopWatch watch = new StopWatch(); 20 | watches.put(category, watch); 21 | return watch; 22 | } 23 | 24 | public Map elapsedTimesMs() 25 | { 26 | Map times = new HashMap<>(); 27 | watches.keySet().stream().forEachOrdered(c->times.put(c, watches.get(c).stream().mapToLong(StopWatch::getElapsedTime).sum())); 28 | return times; 29 | } 30 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/Stopwords.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa; 2 | 3 | import java.util.*; 4 | 5 | public class Stopwords 6 | { 7 | static public final Set STOPWORDS = 8 | new HashSet<>(Arrays.asList( 9 | "a", "an", /*"and",*/ "are", "as", "at", "be", "but", "by", 10 | "for", "if", "in", "into", "is", "it", 11 | "no", "not", "of", "on", "or", "such", 12 | "that", "the", "their", "then", "there", "these", 13 | "they", "this", "to", "was", "will", "with" 14 | ,"does","do","did")); 15 | 16 | static public final Set QUESTION_WORDS = 17 | new HashSet<>(Arrays.asList( 18 | "what","how many","how","is","why","will","where","when")); 19 | 20 | static public final Set PROPERTY_WORDS = 21 | new HashSet<>(Arrays.asList( 22 | "reference","recipient")); 23 | 24 | static public final Set FINLAND_AID_WORDS = 25 | new HashSet<>(Arrays.asList( 26 | "aid")); 27 | 28 | public static String remove(String s, Set words) 29 | { 30 | for(String word: words) 31 | { 32 | s=s.replaceAll("(?i) "+word+" "," "); 33 | s=s.replaceAll("(?i)^"+word+" ",""); 34 | s=s.replaceAll("(?i) "+word+"$",""); 35 | s=s.replaceAll(" "," "); 36 | } 37 | return s.trim(); 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/benchmark/DataType.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.benchmark; 2 | 3 | import java.util.Arrays; 4 | import java.util.Set; 5 | import java.util.stream.Collectors; 6 | import org.apache.jena.rdf.model.RDFNode; 7 | import org.apache.jena.rdf.model.Resource; 8 | import org.apache.jena.vocabulary.XSD; 9 | 10 | enum DataType 11 | { 12 | NUMBER,STRING,URI,BOOLEAN,DATE,YEAR,DATETIME; 13 | 14 | static final Set xsdNumeric = Arrays.asList(XSD.decimal,XSD.xbyte,XSD.xshort,XSD.xint,XSD.xlong,XSD.xfloat, 15 | XSD.xdouble,XSD.integer,XSD.positiveInteger,XSD.negativeInteger,XSD.nonNegativeInteger,XSD.nonPositiveInteger) 16 | .stream().map(Resource::getURI).collect(Collectors.toSet()); 17 | 18 | public static DataType typeOf(RDFNode node) 19 | { 20 | if(node.isResource()) {return URI;} 21 | String typeUri = node.asLiteral().getDatatypeURI(); 22 | if(typeUri==null||typeUri.equals(XSD.xstring.getURI())) {return STRING;} 23 | if(xsdNumeric.contains(typeUri)) {return NUMBER;} 24 | if(typeUri.equals(XSD.xboolean.getURI())) {return BOOLEAN;} 25 | if(typeUri.equals(XSD.date.getURI())) {return DATE;} 26 | if(typeUri.equals(XSD.gYear.getURI())) {return YEAR;} 27 | if(typeUri.equals(XSD.dateTime.getURI())) {return DATETIME;} 28 | throw new IllegalArgumentException("unknown type :"+typeUri); 29 | // if(range.equals(XSD.gYear.getURI())) {return TemporalScorer.yearScorer(this);} 30 | // if(range.equals(XSD.date.getURI())||range.equals(XSD.dateTime.getURI())) {return TemporalScorer.dateScorer(this);} 31 | } 32 | }; -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/benchmark/Nodes.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.benchmark; 2 | 3 | import java.util.*; 4 | import java.util.stream.Collectors; 5 | import org.w3c.dom.*; 6 | 7 | public class Nodes 8 | { 9 | /** converts a NodeList to java.util.List of Node */ 10 | static List list(NodeList nodeList) 11 | { 12 | List list = new ArrayList<>(); 13 | for(int i=0;i childElements(Element e, String tagName) 18 | { 19 | return list(e.getElementsByTagName(tagName)).stream().map(node->(Element)node).collect(Collectors.toList()); 20 | } 21 | 22 | static List childElements(Element e) 23 | { 24 | return list(e.getChildNodes()).stream().filter(c->c.getNodeType()==Node.ELEMENT_NODE).map(node->(Element)node).collect(Collectors.toList()); 25 | } 26 | 27 | /** returns only the text directly in the tag but not in subtags.*/ 28 | static String directTextContent(Node node) 29 | { 30 | return 31 | list(node.getChildNodes()).stream() 32 | .filter(c->c.getNodeType()==Node.TEXT_NODE) 33 | .map(Node::getTextContent) 34 | .reduce("",(s,t)->s+t); 35 | } 36 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/benchmark/NormalizingStringSet.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.benchmark; 2 | 3 | import java.util.HashSet; 4 | import java.util.function.Function; 5 | import lombok.RequiredArgsConstructor; 6 | 7 | /** Applies a function to all added strings.*/ 8 | @RequiredArgsConstructor 9 | public class NormalizingStringSet extends HashSet 10 | { 11 | final Function f; 12 | 13 | @Override public boolean add(String e) 14 | { 15 | return super.add(f.apply(e)); 16 | } 17 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/benchmark/Performance.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.benchmark; 2 | 3 | import java.util.*; 4 | import lombok.*; 5 | import lombok.extern.slf4j.Slf4j; 6 | 7 | /** Represents and calculates precision, recall and f-score. */ 8 | @RequiredArgsConstructor 9 | @EqualsAndHashCode 10 | @Slf4j 11 | @Getter 12 | public class Performance 13 | { 14 | public final double precision; 15 | public final double recall; 16 | public final boolean empty; 17 | public String jsonAnswer; 18 | public String query = null; 19 | 20 | 21 | public static final Performance performance(Set> correct, Set> found) 22 | { 23 | return performance(correct,found,false); 24 | } 25 | 26 | public static final Performance performance(Set> correct, Set> found, boolean alreadyNormalized) 27 | { 28 | if(correct.isEmpty()) { 29 | throw new IllegalArgumentException("correct answer is empty"); 30 | } 31 | if(found.isEmpty()) { 32 | return new Performance(0,0,true); 33 | } 34 | // align maps so that the keys are named the same 35 | // TODO: improve this, instanceofs are messy 36 | Map firstCorrect = correct.iterator().next(); 37 | 38 | if(!alreadyNormalized&&!firstCorrect.containsKey("")) 39 | { 40 | Map firstFound = found.iterator().next(); 41 | if(firstFound.containsKey("")) { 42 | return new Performance(0,0,true); 43 | } 44 | 45 | // TODO this may fail on optionals 46 | if(firstCorrect.size()!=firstFound.size()) {return new Performance(0,0,true);}// unequal dimension count 47 | if(firstCorrect.size()>2) { 48 | throw new RuntimeException("more than 2 answer dimensions not supported"); 49 | } 50 | // TODO this is so ugly but it's late at night and deadline in 12 hours, improve later 51 | Iterator it = firstCorrect.keySet().iterator(); 52 | String key1 = it.next(); 53 | String key2 = it.next(); 54 | 55 | Set> normalizedFoundMap1 = new HashSet<>(); 56 | Set> normalizedFoundMap2 = new HashSet<>(); 57 | for(Map m: correct) 58 | { 59 | Map nm1 = new HashMap<>(); 60 | Map nm2 = new HashMap<>(); 61 | // this is so ugly, fix problem at an earlier stage then bandaid here 62 | nm1.put(key1,m.get(key1)); 63 | nm1.put(key2,m.get(key2)); 64 | nm2.put(key1,m.get(key2)); 65 | nm2.put(key2,m.get(key1)); 66 | normalizedFoundMap1.add(nm1); 67 | normalizedFoundMap2.add(nm2); 68 | } 69 | Performance p1 = performance(correct, normalizedFoundMap1, true); 70 | Performance p2 = performance(correct, normalizedFoundMap2, true); 71 | return p1.fscore()>p2.fscore()?p1:p2; 72 | 73 | // throw new RuntimeException("its a map"); 74 | } 75 | Set correctFound = new HashSet<>(found); 76 | log.info("{}",correct); 77 | log.info("{}",found); 78 | correctFound.retainAll(correct); 79 | return performance(correct.size(),found.size(),correctFound.size()); 80 | } 81 | 82 | public static final Performance performance(int correct, int found, int correctFound) 83 | { 84 | if(correct==0) 85 | { 86 | // throw new IllegalArgumentException("correct==0"); 87 | log.error("no correct answer"); 88 | return new Performance(0, 0,true); 89 | } 90 | if(found==0) { 91 | return new Performance(0,0,true); 92 | } 93 | return new Performance((double)correctFound/found,(double)correctFound/correct,false); 94 | } 95 | 96 | double fscore() {return fscore(1);} 97 | 98 | double fscore(double beta) 99 | { 100 | if(precision+recall==0) { 101 | return 0; 102 | } 103 | return (1+beta*beta)*(precision*recall)/(beta*beta*precision+recall); 104 | } 105 | 106 | @Override public String toString() 107 | { 108 | return "Performance(precision="+precision+", recall="+recall+", f-score="+fscore()+")"; 109 | } 110 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/benchmark/Question.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.benchmark; 2 | 3 | import java.util.*; 4 | import lombok.EqualsAndHashCode; 5 | import lombok.ToString; 6 | 7 | /**A single benchmark question along with a correct SPARQL query.*/ 8 | @ToString 9 | @EqualsAndHashCode 10 | public class Question 11 | { 12 | public final String cubeUri; 13 | /** The natural language question representation.*/ 14 | public final String string; 15 | /** correct SPARQL query to answer the question*/ 16 | public final String query; 17 | /** answer set containing a map from variable name to value*/ 18 | public final Set> answers; 19 | public final Map dataTypes; 20 | public String jsonAnswer; 21 | 22 | public Question(String cubeUri, String string, String query) 23 | { 24 | this.cubeUri=cubeUri; 25 | this.string=string; 26 | this.query=query; 27 | this.answers=null; 28 | this.dataTypes=null; 29 | } 30 | 31 | public Question(String cubeUri, String string, String query, Set> answers, Map answerTypes) 32 | { 33 | this.cubeUri=cubeUri; 34 | this.string=string; 35 | this.query=query; 36 | // if(answers.isEmpty()) {throw new IllegalArgumentException("empty answer set");} 37 | // answers.stream().filter(Map::isEmpty).findFirst().ifPresent 38 | // (x->{throw new IllegalArgumentException("empty answer for question "+string+", query "+query);}); 39 | this.answers=Collections.unmodifiableSet(answers); 40 | this.dataTypes=Collections.unmodifiableMap(answerTypes); 41 | } 42 | 43 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/benchmark/package-info.java: -------------------------------------------------------------------------------- 1 | /** Sets of questions that form a benchmark to evaluate RDF Data Cube question answering. */ 2 | package org.aksw.cubeqa.benchmark; -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/detector/Aggregate.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.detector; 2 | /** SPARQL aggregates*/ 3 | public enum Aggregate 4 | { 5 | MIN,MAX,COUNT,AVG,SUM; 6 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/detector/AggregateDetector.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.detector; 2 | 3 | import java.util.*; 4 | import java.util.stream.Collectors; 5 | import lombok.*; 6 | import lombok.extern.slf4j.Slf4j; 7 | import org.aksw.cubeqa.Cube; 8 | import org.aksw.cubeqa.template.Fragment; 9 | import de.konradhoeffner.commons.TSVReader; 10 | 11 | /** Detects SPARQL Aggregates COUNT, SUM, AVG. 12 | * MAX and MIN are also supported but should be handled by ORDER LIMIT instead in an entity with that property is searched for (e.g. the department with the highest budget).*/ 13 | @Slf4j 14 | public enum AggregateDetector implements Detector 15 | { 16 | INSTANCE; 17 | 18 | public final Map aggregateMap; 19 | 20 | @SneakyThrows 21 | private AggregateDetector() 22 | { 23 | Map aggregateMap = new HashMap<>(); 24 | try(TSVReader in = new TSVReader(this.getClass().getClassLoader().getResourceAsStream("aggregatemapping.tsv"))) 25 | { 26 | while(in.hasNextTokens()) 27 | { 28 | String[] tokens = in.nextTokens(); 29 | // lowercase 30 | aggregateMap.put(tokens[0].toLowerCase(),Aggregate.valueOf(tokens[1].toUpperCase())); 31 | // Capitalized 32 | aggregateMap.put((tokens[0].charAt(0)+"").toUpperCase()+tokens[0].substring(1),Aggregate.valueOf(tokens[1].toUpperCase())); 33 | } 34 | } 35 | this.aggregateMap = Collections.unmodifiableMap(aggregateMap); 36 | } 37 | 38 | static public Set aggregatesContained(String phrase) 39 | { 40 | return INSTANCE.aggregateMap.keySet().stream() 41 | .filter(phrase::contains).map(INSTANCE.aggregateMap::get).collect(Collectors.toSet()); 42 | } 43 | 44 | @Override public Set detect(Cube cube, String phrase) 45 | { 46 | Set fragments = new HashSet<>(); 47 | // for now only return up to one aggregate 48 | aggregateMap.keySet().stream().filter(phrase::contains).findFirst().ifPresent(s-> 49 | { 50 | Aggregate aggregate = aggregateMap.get(s); 51 | fragments.add(new Fragment(cube, s, 52 | Collections.emptySet(), Collections.emptySet(), Collections.emptySet(), Collections.singleton(aggregate), Collections.emptySet())); 53 | log.debug("Found aggregate "+aggregate+ " in phrase '"+s+"'"); 54 | }); 55 | return fragments; 56 | } 57 | 58 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/detector/AggregateMapping.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.detector; 2 | 3 | import java.util.Collections; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | import java.util.Set; 7 | import java.util.stream.Collectors; 8 | import lombok.SneakyThrows; 9 | import de.konradhoeffner.commons.TSVReader; 10 | 11 | /** Mapping from natural language phrase to SPARQL aggregate function.*/ 12 | public enum AggregateMapping 13 | { 14 | INSTANCE; 15 | 16 | public final Map aggregateMap; 17 | 18 | @SneakyThrows 19 | private AggregateMapping() 20 | { 21 | Map aggregateMap = new HashMap<>(); 22 | try(TSVReader in = new TSVReader(this.getClass().getClassLoader().getResourceAsStream("aggregatemapping.tsv"))) 23 | { 24 | while(in.hasNextTokens()) 25 | { 26 | String[] tokens = in.nextTokens(); 27 | aggregateMap.put(tokens[0],Aggregate.valueOf(tokens[1].toUpperCase())); 28 | } 29 | } 30 | this.aggregateMap = Collections.unmodifiableMap(aggregateMap); 31 | } 32 | 33 | static public Set aggregatesContained(String phrase) 34 | { 35 | return INSTANCE.aggregateMap.keySet().stream() 36 | .filter(phrase::contains).map(INSTANCE.aggregateMap::get).collect(Collectors.toSet()); 37 | } 38 | 39 | static public Set aggregatesReferenced(String phrase) 40 | { 41 | return INSTANCE.aggregateMap.keySet().stream() 42 | .filter(phrase::equalsIgnoreCase).map(INSTANCE.aggregateMap::get).collect(Collectors.toSet()); 43 | } 44 | 45 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/detector/Detector.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.detector; 2 | 3 | import java.util.*; 4 | import org.aksw.cubeqa.Cube; 5 | import org.aksw.cubeqa.property.ComponentProperty; 6 | import org.aksw.cubeqa.property.scorer.ScoreResult; 7 | import org.aksw.cubeqa.property.scorer.Scorers; 8 | import org.aksw.cubeqa.template.Fragment; 9 | 10 | /**Abstract class for a Detector, which is called before scorers and transforms certain keyphrases into additional query modifiers, such as aggregates. 11 | * A detector can find several or no matches in a phrase. 12 | * */ 13 | public interface Detector 14 | { 15 | /** Detection is supposed to not overlap in phrases.*/ 16 | public Set detect(Cube cube, String phrase); 17 | 18 | // TODO: generalize this,as per time detector always uses finland aid as of now 19 | public static final List DETECTORS = Arrays.asList(HalfInfiniteIntervalDetector.INSTANCE,TopDetector.INSTANCE,PerTimeDetector.INSTANCE,InYearDetector.INSTANCE,InPlaceDetector.INSTANCE,AggregateDetector.INSTANCE); 20 | 21 | static final String PHRASE_REGEX = "([a-zA-Züöäéèô'-]+(\\s[a-zA-Züöäéèô,'-]+)*)"; 22 | static final String WORD_REGEX = "([a-zA-Züöäéèô'-]+)"; 23 | 24 | static public Set matchPart(Cube cube, String phrase) 25 | { 26 | Set partScores = new HashSet<>(); 27 | String[] tokens = phrase.split("\\s"); 28 | StringBuilder sb = new StringBuilder(); 29 | for(int i=0;i0) sb.append(" "); 32 | sb.append(tokens[i]); 33 | String part = sb.toString(); 34 | Map nameRefs = Scorers.scorePhraseProperties(cube,part); 35 | nameRefs.entrySet().forEach(e-> 36 | { 37 | partScores.add(new ScoreResult(e.getKey(), part, e.getValue()));//part.length()/phrase.length() 38 | }); 39 | } 40 | return partScores; 41 | } 42 | 43 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/detector/HalfInfiniteIntervalDetector.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.detector; 2 | 3 | import static org.aksw.cubeqa.detector.IntervalType.*; 4 | import java.util.*; 5 | import java.util.Map.Entry; 6 | import java.util.regex.Matcher; 7 | import java.util.regex.Pattern; 8 | import java.util.stream.Collectors; 9 | import org.aksw.cubeqa.Cube; 10 | import org.aksw.cubeqa.property.scorer.ScoreResult; 11 | import org.aksw.cubeqa.restriction.*; 12 | import org.aksw.cubeqa.template.Fragment; 13 | import lombok.*; 14 | import lombok.extern.slf4j.Slf4j; 15 | 16 | /**Detects numerical intervals with one infinite endpoint.*/ 17 | @Slf4j 18 | public enum HalfInfiniteIntervalDetector implements Detector 19 | { 20 | INSTANCE; 21 | 22 | private static final double MIN_SIMILARITY = 0.3; 23 | final static String[][] KEYWORDS = new String[][] { { ">", "more than", "larger than" }, 24 | { ">=", "at least", "no less than" }, { "<", "less than", "smaller than" }, { "at most", "up to including" } }; 25 | final static IntervalType[] INTERVAL_TYPES = { LEFT_OPEN, LEFT_CLOSED, RIGHT_OPEN, RIGHT_CLOSED }; 26 | 27 | /** The matching group of a regular expression which contains the number (endpoint) of the interval.**/ 28 | private static final Map NUMBER_GROUP = new HashMap<>(); 29 | /** The matching group of a regular expression which contains the phrase (unit) of the interval.**/ 30 | private static final Map PHRASE_GROUP = new HashMap<>(); 31 | 32 | final static Map PATTERN_TO_TYPE; 33 | 34 | static 35 | { 36 | Map patternToType = new HashMap<>(); 37 | for (int i = 0; i < KEYWORDS.length; i++) 38 | { 39 | final int ii = i; 40 | Arrays.stream( 41 | KEYWORDS[ii]).map( 42 | HalfInfiniteIntervalDetector::patterns).flatMap( 43 | Set::stream).forEach( 44 | p -> patternToType.put( 45 | p, 46 | INTERVAL_TYPES[ii])); 47 | } 48 | PATTERN_TO_TYPE = Collections.unmodifiableMap(patternToType); 49 | } 50 | 51 | private static Set patterns(String keyword) 52 | { 53 | Set patterns = new HashSet<>(); 54 | { 55 | Pattern p = Pattern.compile( 56 | keyword + "\\s+(\\d+)\\s+" + PHRASE_REGEX, 57 | Pattern.CASE_INSENSITIVE); 58 | patterns.add(p); 59 | NUMBER_GROUP.put(p, 1); 60 | PHRASE_GROUP.put(p, 2); 61 | } 62 | { 63 | Pattern p = Pattern.compile( 64 | PHRASE_REGEX + "\\s+(of )?" + keyword + "\\s+(\\d+)", 65 | Pattern.CASE_INSENSITIVE); 66 | patterns.add(p); 67 | NUMBER_GROUP.put(p, 4); 68 | PHRASE_GROUP.put(p, 1); 69 | } 70 | { 71 | Pattern p = Pattern.compile( 72 | keyword + "\\s+(\\d+)\\s+" + WORD_REGEX, 73 | Pattern.CASE_INSENSITIVE); 74 | patterns.add(p); 75 | NUMBER_GROUP.put(p, 1); 76 | PHRASE_GROUP.put(p, 2); 77 | } 78 | { 79 | Pattern p = Pattern.compile( 80 | WORD_REGEX + "\\s+(of )?" + keyword + "\\s+(\\d+)", 81 | Pattern.CASE_INSENSITIVE); 82 | patterns.add(p); 83 | NUMBER_GROUP.put(p, 3); 84 | PHRASE_GROUP.put(p, 1); 85 | } 86 | return patterns; 87 | } 88 | 89 | @RequiredArgsConstructor 90 | class ScoredRestriction 91 | { 92 | public final Restriction restriction; 93 | public final double score; 94 | public final String phrase; 95 | public final int matchBegin; 96 | public final int matchEnd; 97 | } 98 | 99 | @Override public Set detect(Cube cube, String phrase) 100 | { 101 | Set fragments = new HashSet<>(); 102 | // phrase = Stopwords.remove(phrase,Stopwords.STOPWORDS); 103 | Set srs = new HashSet<>(); 104 | 105 | for (Entry e : PATTERN_TO_TYPE.entrySet()) 106 | { 107 | Matcher matcher = e.getKey().matcher(phrase); 108 | while (matcher.find()) 109 | { 110 | Pattern pattern = e.getKey(); 111 | // TODO floats also 112 | int n = Integer.parseInt(matcher.group(NUMBER_GROUP.get(pattern))); 113 | String w = matcher.group(PHRASE_GROUP.get(pattern)); 114 | 115 | Set results = Detector.matchPart( 116 | cube, 117 | w).stream().filter(sr->sr.getScore()>=MIN_SIMILARITY).collect(Collectors.toSet()); 118 | if (!results.isEmpty()) 119 | { 120 | ScoreResult max = results.stream().max(Comparator.comparing(ScoreResult::getScore)).get(); 121 | RestrictionWithPhrase restriction = null; 122 | 123 | switch (e.getValue()) 124 | { 125 | case LEFT_CLOSED: 126 | restriction = new IntervalRestriction(max.property, max.value, n, Double.POSITIVE_INFINITY, false); 127 | break; 128 | case LEFT_OPEN: 129 | restriction = new IntervalRestriction(max.property, max.value, n, Double.POSITIVE_INFINITY, true); 130 | break; 131 | case RIGHT_CLOSED: 132 | restriction = new IntervalRestriction(max.property, max.value, Double.NEGATIVE_INFINITY, n, false); 133 | break; 134 | case RIGHT_OPEN: 135 | restriction = new IntervalRestriction(max.property, max.value, Double.NEGATIVE_INFINITY, n, true); 136 | break; 137 | } 138 | // TODO what are scored restrictions good for? 139 | srs.add(new ScoredRestriction(restriction, max.score, matcher.group(0),matcher.start(),matcher.end())); 140 | Fragment fragment = new Fragment(cube, matcher.group(0)); 141 | fragment.getRestrictions().add(restriction); 142 | fragments.add(fragment); 143 | phrase = phrase.replace(matcher.group(0), "").replace(" "," "); 144 | log.debug("detected restriction "+restriction+" in phrase "+matcher.group(0)); 145 | } 146 | } 147 | } 148 | // TODO get overlap and throw out low score ones 149 | 150 | return fragments; 151 | } 152 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/detector/InPlaceDetector.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.detector; 2 | 3 | import java.util.*; 4 | import java.util.regex.Matcher; 5 | import java.util.regex.Pattern; 6 | import java.util.stream.Collectors; 7 | import org.aksw.cubeqa.Config; 8 | import org.aksw.cubeqa.Cube; 9 | import org.aksw.cubeqa.property.ComponentProperty; 10 | import org.aksw.cubeqa.restriction.UriRestriction; 11 | import org.aksw.cubeqa.restriction.ValueRestriction; 12 | import org.aksw.cubeqa.template.Fragment; 13 | import org.apache.jena.vocabulary.XSD; 14 | 15 | /** Detects phrases like "in Yemen" or "in the City of Washington". **/ 16 | public enum InPlaceDetector implements Detector 17 | { 18 | INSTANCE; 19 | 20 | String regex = "in(?: the)?((?: [A-Z][a-z]+)*(?: of)?(?: [A-Z][a-z]+)*)"; // proper noun sequence 21 | Pattern pattern = Pattern.compile(regex); 22 | 23 | @Override public Set detect(final Cube cube, final String phrase) 24 | { 25 | Set fragments = new HashSet<>(); 26 | String restPhrase = phrase; 27 | // can be either a string or an object property 28 | List placeProperties = cube.properties.values().stream() 29 | .filter(p->p.range==null||p.range.equals(XSD.xstring.getURI())||!p.range.startsWith(XSD.getURI())).collect(Collectors.toList()); 30 | if(!placeProperties.isEmpty()) 31 | { 32 | Matcher matcher; 33 | while((matcher = pattern.matcher(restPhrase)).find()) 34 | { 35 | Matcher finalMatcher = matcher; 36 | restPhrase = phrase.replace(matcher.group(0), " ").replaceAll("\\s+"," "); 37 | String place = matcher.group(1); 38 | for(ComponentProperty p: placeProperties) 39 | { 40 | p.scorer.score(place).ifPresent(scoreResult->{ 41 | if(scoreResult.score>=Config.INSTANCE.placeMinScore) 42 | { 43 | Fragment fragment = new Fragment(cube, finalMatcher.group(0)); 44 | if(scoreResult.value.startsWith("http")) // TODO: implement more elegantly 45 | { 46 | fragment.getRestrictions().add(new UriRestriction(p,scoreResult.value)); 47 | } else 48 | { 49 | fragment.getRestrictions().add(new ValueRestriction(p,scoreResult.value)); 50 | } 51 | fragments.add(fragment); 52 | // break; 53 | } 54 | }); 55 | } 56 | } 57 | } 58 | // fragments.add(new Fragment(cube, restPhrase)); 59 | return fragments; 60 | } 61 | 62 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/detector/InYearDetector.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.detector; 2 | 3 | import java.util.*; 4 | import java.util.regex.Matcher; 5 | import java.util.regex.Pattern; 6 | import java.util.stream.Collectors; 7 | import org.aksw.cubeqa.Config; 8 | import org.aksw.cubeqa.Cube; 9 | import org.aksw.cubeqa.property.ComponentProperty; 10 | import org.aksw.cubeqa.property.scorer.ScoreResult; 11 | import org.aksw.cubeqa.restriction.ValueRestriction; 12 | import org.aksw.cubeqa.template.Fragment; 13 | import org.apache.jena.vocabulary.XSD; 14 | 15 | /** Detects phrases like "in 2009" with the year ranging from 1000 to 2999. **/ 16 | public enum InYearDetector implements Detector 17 | { 18 | INSTANCE; 19 | 20 | String regex = "in (?:(?:the )?year (?:of )?)?([1-2][0-9]{3})"; 21 | Pattern pattern = Pattern.compile(regex); 22 | 23 | @Override public Set detect(final Cube cube, final String phrase) 24 | { 25 | Set fragments = new HashSet<>(); 26 | String restPhrase = phrase; 27 | List yearProperties = cube.properties.values().stream().filter(p->XSD.gYear.getURI().equals(p.range)).collect(Collectors.toList()); 28 | if(!yearProperties.isEmpty()) 29 | { 30 | Matcher matcher; 31 | while((matcher = pattern.matcher(restPhrase)).find()) 32 | { 33 | restPhrase = phrase.replace(matcher.group(0), " ").replaceAll("\\s+"," "); 34 | String year = matcher.group(1); 35 | for(ComponentProperty p: yearProperties) 36 | { 37 | Optional res = p.scorer.score(year); 38 | if(res.isPresent()&&res.get().score>=Config.INSTANCE.boostTemporal) 39 | { 40 | Fragment fragment = new Fragment(cube, matcher.group(0)); 41 | fragment.getRestrictions().add(new ValueRestriction(p,year)); 42 | fragments.add(fragment); 43 | break; 44 | } 45 | } 46 | } 47 | } 48 | // fragments.add(new Fragment(cube, restPhrase)); 49 | return fragments; 50 | } 51 | 52 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/detector/IntervalType.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.detector; 2 | 3 | /**One endpoint is always infinity or negative infinity */ 4 | public enum IntervalType 5 | { 6 | LEFT_OPEN,LEFT_CLOSED,RIGHT_OPEN,RIGHT_CLOSED; 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/detector/PerTimeDetector.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.detector; 2 | 3 | import java.util.*; 4 | import java.util.regex.Matcher; 5 | import java.util.regex.Pattern; 6 | import java.util.stream.Collectors; 7 | import org.aksw.cubeqa.Cube; 8 | import org.aksw.cubeqa.property.ComponentProperty; 9 | import org.aksw.cubeqa.template.Fragment; 10 | import org.apache.lucene.search.spell.NGramDistance; 11 | import org.apache.lucene.search.spell.StringDistance; 12 | import org.apache.jena.rdf.model.Resource; 13 | import org.apache.jena.vocabulary.XSD; 14 | import lombok.ToString; 15 | import lombok.extern.slf4j.Slf4j; 16 | 17 | /** manages phrases like "per month" or "per year", "a year". 18 | * Program flow needs to be adapted because time units can also be dimensions and year which should get preferential treatment. 19 | * maybe put in priority values for each detector and scorer? or detectors can be overwritten? 20 | * Or detectors should apply only once with find of regexes on whole phrase for faster runtime and easier program flow? 21 | **/ 22 | @Slf4j 23 | public enum PerTimeDetector implements Detector 24 | { 25 | INSTANCE; 26 | 27 | protected static transient StringDistance similarity = new NGramDistance(); 28 | 29 | @Override public Set detect(Cube cube, String phrase) 30 | { 31 | List timeUnits = getTimeUnits(cube); 32 | 33 | Set fragments = new HashSet<>(); 34 | for(TimeUnit timeUnit: timeUnits) 35 | { 36 | for(Pattern pattern: timeUnit.patterns) 37 | { 38 | Matcher matcher = pattern.matcher(phrase); 39 | while(matcher.find()) 40 | { 41 | Fragment fragment = new Fragment(cube, matcher.group(0).trim()); 42 | fragment.getPerProperties().add(timeUnit.property.get()); 43 | fragments.add(fragment); 44 | phrase = phrase.replace(matcher.group(0), " ").replace(" "," "); 45 | log.debug("detected property "+timeUnit.property.get()+" with data type "+timeUnit.property.get().range); 46 | } 47 | } 48 | } 49 | return fragments; 50 | } 51 | 52 | /** unit of time, such as day, month or year */ 53 | @ToString 54 | static class TimeUnit 55 | { 56 | public final Cube cube; 57 | public final Set patterns = new HashSet<>(); 58 | public final Optional property; 59 | 60 | /** 61 | * @param cube 62 | * @param label the surface form, e.g. "year" 63 | * @param dataType the XSD datatype representing the time unit, e.g. XSD.gYear. 64 | */ 65 | public TimeUnit(Cube cube, String label, Resource dataType) 66 | { 67 | if(label==null) throw new IllegalArgumentException("label is null" ); 68 | this.cube=cube; 69 | String[] prepositions = {"per","a","each"}; 70 | List prePatterns = Arrays.asList("per "+label,label.replaceAll("y$","i")+"ly"); 71 | for(String prePattern: prePatterns) 72 | { 73 | patterns.add(Pattern.compile("(?i)(^|[\\s,])"+prePattern+"($|[\\s.])")); 74 | // patterns.add(Pattern.compile("(?i)[^\\s,]"+prePattern+"[\\s,.$]")); 75 | } 76 | Set candidates = cube.properties.values().stream().filter(p->dataType.getURI().equals(p.range)).collect(Collectors.toSet()); 77 | if(candidates.isEmpty()) 78 | { 79 | property = Optional.empty(); 80 | return; 81 | } 82 | ComponentProperty bestCandidate; 83 | if(candidates.size()==1) 84 | { 85 | bestCandidate = candidates.iterator().next(); 86 | } else 87 | { 88 | // multiple properties with the right data type, which one has the highest string similarity? 89 | bestCandidate = candidates.stream().max(Comparator.comparing( 90 | p->p.labels.stream().max(Comparator.comparing(l->similarity.getDistance(label, l))).get())).get(); 91 | } 92 | property = Optional.of(bestCandidate); 93 | } 94 | } 95 | 96 | static Map> cubeToTimeUnits = new HashMap<>(); 97 | 98 | static private List getTimeUnits(Cube cube) 99 | { 100 | List timeUnits = cubeToTimeUnits.get(cube); 101 | if(timeUnits==null) 102 | { 103 | synchronized(cube) 104 | { 105 | if(timeUnits==null) { 106 | timeUnits = Arrays.asList( 107 | new TimeUnit(cube,"day",XSD.gDay), 108 | new TimeUnit(cube,"day",XSD.date), 109 | new TimeUnit(cube,"month",XSD.gMonth), 110 | new TimeUnit(cube,"year",XSD.gYear) 111 | ).stream().filter(tu->tu.property.isPresent()).collect(Collectors.toList()); 112 | cubeToTimeUnits.put(cube, timeUnits); 113 | } 114 | } 115 | } 116 | return timeUnits; 117 | } 118 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/detector/TopDetector.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.detector; 2 | 3 | import java.util.*; 4 | import java.util.Map.Entry; 5 | import java.util.regex.Matcher; 6 | import java.util.regex.Pattern; 7 | import org.aksw.cubeqa.AnswerType; 8 | import org.aksw.cubeqa.Cube; 9 | import org.aksw.cubeqa.property.ComponentProperty; 10 | import org.aksw.cubeqa.property.scorer.ScoreResult; 11 | import org.aksw.cubeqa.restriction.TopRestriction; 12 | import org.aksw.cubeqa.restriction.TopRestriction.OrderModifier; 13 | import org.aksw.cubeqa.template.Fragment; 14 | import lombok.Data; 15 | 16 | /** Detects "highest n" or "lowest n" type phrases. 17 | * Patterns: [keyword] [n] [measure] [dimension value] 18 | * Example: [Top] [10] [aid receiving] [geographic areas] 19 | * */ 20 | public enum TopDetector implements Detector 21 | { 22 | INSTANCE; 23 | 24 | @Data 25 | public static class TopDetectorResult 26 | { 27 | final OrderModifier modifier; 28 | final int n; 29 | final String ref; 30 | } 31 | 32 | final String[][] keywords = new String[][] {{"highest number","highest amount","top","most","highest","largest","biggest"},{"least","smallest","lowest"}}; 33 | // final String[][] keywords = new String[][] {{"highest"},{}}; 34 | 35 | final Map numberPatternModifier = new HashMap<>(); 36 | final Map noNumberPatternModifier = new HashMap<>(); 37 | 38 | private static Set numberPatterns(String keyword) 39 | { 40 | Set patterns = new HashSet<>(); 41 | final String PHRASE = "(\\w+(\\s\\w+)*)"; 42 | // patterns.add(Pattern.compile("([+-]?\\d+([\\.,]\\d+)?)\\s+"+keyword+"\\s+(\\w+)")); 43 | patterns.add(Pattern.compile("(\\d+)\\s+"+keyword+"\\s+"+PHRASE,Pattern.CASE_INSENSITIVE)); 44 | patterns.add(Pattern.compile(keyword+"\\s+(\\d+)\\s+"+PHRASE,Pattern.CASE_INSENSITIVE)); 45 | return patterns; 46 | } 47 | 48 | private static Pattern noNumberPattern(String keyword) 49 | { 50 | new HashSet<>(); 51 | final String PHRASE = "(\\w+(\\s\\w+)*)"; 52 | return Pattern.compile("[\\s,]+"+keyword+"[\\s,]+"+PHRASE,Pattern.CASE_INSENSITIVE); 53 | } 54 | 55 | private TopDetector() 56 | { 57 | Arrays.stream(keywords[0]).map(TopDetector::numberPatterns).flatMap(Set::stream).forEach(p->numberPatternModifier.put(p,OrderModifier.DESC)); 58 | Arrays.stream(keywords[1]).map(TopDetector::numberPatterns).flatMap(Set::stream).forEach(p->numberPatternModifier.put(p,OrderModifier.ASC)); 59 | Arrays.stream(keywords[0]).map(TopDetector::noNumberPattern).forEach(p->noNumberPatternModifier.put(p,OrderModifier.DESC)); 60 | Arrays.stream(keywords[1]).map(TopDetector::noNumberPattern).forEach(p->noNumberPatternModifier.put(p,OrderModifier.ASC)); 61 | } 62 | 63 | @Override public Set detect(final Cube cube, final String phrase) 64 | { 65 | Set fragments = new HashSet<>(); 66 | String restPhrase = phrase; 67 | // with numbers first as searching without numbers first would discard the numbers of the numbered ones 68 | for(Entry e: numberPatternModifier.entrySet()) 69 | { 70 | Matcher matcher = e.getKey().matcher(restPhrase); 71 | while(matcher.find()) 72 | { 73 | restPhrase = phrase.replace(matcher.group(0), " ").replaceAll("\\s+"," "); 74 | int n = Integer.parseInt(matcher.group(1)); 75 | String w = matcher.group(2); 76 | Set results = Detector.matchPart(cube, w); 77 | if(results.isEmpty()) // unknown property, use default answer property 78 | { 79 | Fragment fragment = new Fragment(cube, matcher.group(0).replace(w, "")); 80 | fragment.getRestrictions().add(new TopRestriction(cube.getDefaultAnswerProperty(),n,e.getValue())); 81 | fragments.add(fragment); 82 | } else 83 | { 84 | ScoreResult max = results.stream().max(Comparator.comparing(ScoreResult::getScore)).get(); 85 | 86 | Fragment fragment = new Fragment(cube, matcher.group(0)); 87 | ComponentProperty restrictionProperty = ((max.property.answerType==AnswerType.COUNTABLE)||(max.property.answerType==AnswerType.UNCOUNTABLE))? 88 | max.property:cube.getDefaultAnswerProperty(); 89 | fragment.getRestrictions().add(new TopRestriction(restrictionProperty,n,e.getValue())); 90 | fragment.getAnswerProperties().add(max.property); 91 | fragments.add(fragment); 92 | // TODO: check this function for the top 10 aided countries case 93 | // TODO: make sure each part is matched only once 94 | } 95 | } 96 | } 97 | for(Entry e: noNumberPatternModifier.entrySet()) 98 | { 99 | Matcher matcher = e.getKey().matcher(restPhrase); 100 | while(matcher.find()) 101 | { 102 | restPhrase = phrase.replace(matcher.group(0), " ").replaceAll("\\s+"," "); 103 | String w = matcher.group(1); 104 | Set results = Detector.matchPart(cube, w); 105 | if(results.isEmpty()) // unknown property, use default answer property 106 | { 107 | Fragment fragment = new Fragment(cube, matcher.group(0).replace(w, "")); 108 | fragment.getRestrictions().add(new TopRestriction(cube.getDefaultAnswerProperty(),1,e.getValue())); 109 | fragments.add(fragment); 110 | } else 111 | { 112 | ScoreResult max = results.stream().max(Comparator.comparing(ScoreResult::getScore)).get(); 113 | Fragment fragment = new Fragment(cube, matcher.group(0)); 114 | fragment.getRestrictions().add(new TopRestriction(max.property,1,e.getValue())); 115 | fragments.add(fragment); 116 | } 117 | } 118 | } 119 | return fragments; 120 | } 121 | 122 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/detector/package-info.java: -------------------------------------------------------------------------------- 1 | /**Dectectors are called before scorers outside of the recursive process and transform certain keyphrases into additional query modifiers, such as aggregates. */ 2 | package org.aksw.cubeqa.detector; -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/index/CubeIndex.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.index; 2 | 3 | import java.util.*; 4 | import java.io.File; 5 | import org.aksw.cubeqa.Cube; 6 | import org.aksw.cubeqa.Files; 7 | import org.apache.lucene.analysis.Analyzer; 8 | import org.apache.lucene.analysis.en.EnglishAnalyzer; 9 | import org.apache.lucene.document.*; 10 | import org.apache.lucene.index.*; 11 | import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; 12 | import org.apache.lucene.search.*; 13 | import org.apache.lucene.store.Directory; 14 | import org.apache.lucene.store.FSDirectory; 15 | 16 | import lombok.SneakyThrows; 17 | import lombok.extern.slf4j.Slf4j; 18 | 19 | /** Finds the right cube for a query. Implemented as Lucene index. Modified copy of {@link Index}.*/ 20 | @Slf4j 21 | public enum CubeIndex 22 | { 23 | INSTANCE; 24 | 25 | private static final Analyzer analyzer = new EnglishAnalyzer(); 26 | private static final int NUMBER_OF_HITS = 3; 27 | 28 | private IndexWriter indexWriter; 29 | private final Directory dir; 30 | private IndexReader reader; 31 | 32 | @SneakyThrows 33 | private CubeIndex() 34 | { 35 | File folder = new File(new File(Files.localFolder("cache"),"lucene"),"cubecache"); 36 | folder.mkdirs(); 37 | dir = FSDirectory.open(folder.toPath()); 38 | if(DirectoryReader.indexExists(dir)) 39 | { 40 | reader = DirectoryReader.open(dir); 41 | } 42 | } 43 | 44 | @SneakyThrows 45 | public synchronized void add(Cube cube) 46 | { 47 | if(indexWriter==null) {throw new IllegalStateException("indexWriter is null, call startWrites() first.");} 48 | Document doc = new Document(); 49 | 50 | doc.add(new StringField("uri", cube.uri, Field.Store.YES)); 51 | { 52 | TextField labelField = new TextField("label", Index.normalize(cube.label),Field.Store.NO); 53 | doc.add(labelField); 54 | } 55 | { 56 | TextField commentField = new TextField("comment", Index.normalize(cube.comment),Field.Store.NO); 57 | doc.add(commentField); 58 | } 59 | doc.add(new TextField("properties", 60 | Index.normalize( 61 | cube.properties.values().stream().flatMap(p->p.getLabels().stream()).reduce("", (a,b)->a+" "+b)), 62 | Field.Store.NO)); 63 | 64 | indexWriter.addDocument(doc); 65 | } 66 | 67 | /** You can fill the index only once right now.*/ 68 | @SneakyThrows public void fill(Set cubes) 69 | { 70 | if(!DirectoryReader.indexExists(dir)) 71 | { 72 | 73 | startWrites(); 74 | cubes.stream().forEach(this::add); 75 | stopWrites(); 76 | } 77 | reader = DirectoryReader.open(dir); 78 | } 79 | 80 | @SneakyThrows 81 | final protected synchronized void stopWrites() 82 | { 83 | indexWriter.close(); 84 | indexWriter=null; 85 | } 86 | 87 | @SneakyThrows protected 88 | final synchronized void startWrites() 89 | { 90 | if(indexWriter==null) 91 | { 92 | IndexWriterConfig config = new IndexWriterConfig(analyzer); 93 | indexWriter = new IndexWriter(dir, config); 94 | } 95 | } 96 | 97 | @SneakyThrows 98 | public List getCubeUris(String question) 99 | { 100 | List cubes = new ArrayList<>(NUMBER_OF_HITS); 101 | MultiFieldQueryParser queryParser = new MultiFieldQueryParser(new String[] {"label", "comment","properties"},analyzer); 102 | IndexSearcher searcher = new IndexSearcher(reader); 103 | Query q = queryParser.parse(Index.normalize(question)); 104 | 105 | TopScoreDocCollector collector = TopScoreDocCollector.create(NUMBER_OF_HITS); 106 | searcher.search(q, collector); 107 | ScoreDoc[] hits = collector.topDocs().scoreDocs; 108 | for(ScoreDoc hit: hits) 109 | { 110 | log.debug("{}",searcher.doc(hit.doc).get("uri")); 111 | log.debug("{}",searcher.doc(hit.doc).get("label")); 112 | log.debug("{}",searcher.doc(hit.doc).get("comment")); 113 | log.debug("{}",searcher.doc(hit.doc).get("properties")); 114 | cubes.add(searcher.doc(hit.doc).get("uri")); 115 | } 116 | return cubes; 117 | } 118 | 119 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/index/Index.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.index; 2 | 3 | import java.util.*; 4 | import java.util.stream.Collectors; 5 | import java.io.File; 6 | import de.konradhoeffner.commons.StopWatch; 7 | import org.aksw.cubeqa.Config; 8 | import org.aksw.cubeqa.Files; 9 | import org.aksw.cubeqa.StopWatches; 10 | import org.aksw.cubeqa.property.ComponentProperty; 11 | import org.apache.lucene.analysis.Analyzer; 12 | import org.apache.lucene.analysis.en.EnglishAnalyzer; 13 | import org.apache.lucene.document.Document; 14 | import org.apache.lucene.index.*; 15 | import org.apache.lucene.queryparser.classic.QueryParser; 16 | import org.apache.lucene.search.*; 17 | import org.apache.lucene.search.spell.NGramDistance; 18 | import org.apache.lucene.search.spell.StringDistance; 19 | import org.apache.lucene.store.Directory; 20 | import org.apache.lucene.store.FSDirectory; 21 | import lombok.SneakyThrows; 22 | import lombok.extern.slf4j.Slf4j; 23 | 24 | /** Lucene index reading and writing abstract superclass. */ 25 | @Slf4j 26 | public abstract class Index 27 | { 28 | // {log.setLevel(Level.ALL);} 29 | protected static final Analyzer analyzer = new EnglishAnalyzer(); 30 | protected static final QueryParser parser = new QueryParser("textlabel", analyzer); 31 | private static final int NUMBER_OF_HITS = 5; 32 | // TODO: make sure instances for multiple cubes are not conflicting, property uris may not be unique 33 | protected final ComponentProperty property; 34 | 35 | protected IndexWriter indexWriter; 36 | protected final Directory dir; 37 | protected IndexReader reader; 38 | 39 | protected static StringDistance distance = new NGramDistance(); 40 | 41 | protected File subFolder; 42 | 43 | final public boolean isEmpty() 44 | { 45 | return !subFolder.exists(); 46 | } 47 | 48 | @SneakyThrows 49 | protected Index(ComponentProperty property) 50 | { 51 | this.property=property; 52 | File folder = new File(new File(Files.localFolder("cache"),"lucene"),property.cube.probablyUniqueAsciiId()); 53 | folder.mkdirs(); 54 | subFolder = new File(folder,property.shortName()); 55 | dir = FSDirectory.open(subFolder.toPath()); 56 | } 57 | 58 | /** Replaces "&" with "and" and removes all [^A-Za-z0-9 ] characters.*/ 59 | static protected String normalize(String s) 60 | { 61 | return s.replace("&", "and").replaceAll("[^A-Za-z0-9 ]", "").toLowerCase().trim(); 62 | } 63 | 64 | @SneakyThrows 65 | final protected synchronized void stopWrites() 66 | { 67 | indexWriter.close(); 68 | indexWriter=null; 69 | } 70 | 71 | @SneakyThrows protected 72 | final synchronized void startWrites() 73 | { 74 | if(indexWriter==null) 75 | { 76 | IndexWriterConfig config = new IndexWriterConfig(analyzer); 77 | indexWriter = new IndexWriter(dir, config); 78 | } 79 | } 80 | 81 | @SneakyThrows 82 | protected Map getIdWithScore(String s, String fieldName, double minScore) 83 | { 84 | StopWatch watch = StopWatches.INSTANCE.getWatch("indexread"); 85 | watch.start(); 86 | try 87 | { 88 | Map idWithScore = new HashMap<>(); 89 | String ns=normalize(s); 90 | if(ns.isEmpty()) {return idWithScore;} 91 | 92 | List queries = new LinkedList<>(); 93 | if((Config.INSTANCE.indexQueries==Config.IndexQueries.FUZZY||Config.INSTANCE.indexQueries==Config.IndexQueries.BOTH) 94 | &&ns.length()>=Config.INSTANCE.indexNonExactMatchMinLength) 95 | { 96 | queries.add(new FuzzyQuery(new Term("stringlabel",ns))); 97 | } 98 | if(Config.INSTANCE.indexQueries==Config.IndexQueries.ANALYZED||Config.INSTANCE.indexQueries==Config.IndexQueries.BOTH) 99 | { 100 | queries.add(parser.parse(ns)); 101 | } 102 | 103 | IndexSearcher searcher = new IndexSearcher(reader); 104 | 105 | for(Query q: queries) 106 | { 107 | TopScoreDocCollector collector = TopScoreDocCollector.create(NUMBER_OF_HITS); 108 | searcher.search(q, collector); 109 | ScoreDoc[] hits = collector.topDocs().scoreDocs; 110 | Map idWithUnnormalizedScore = new HashMap<>(); 111 | boolean fuzzy = (q instanceof FuzzyQuery); 112 | for(ScoreDoc hit: hits) 113 | { 114 | Document doc = searcher.doc(hit.doc); 115 | log.trace("Query "+q+" results in "+Arrays.toString(doc.getValues("originallabel"))); 116 | if(fuzzy) 117 | { 118 | Arrays.stream(doc.getValues("originallabel")).filter(l->l.length()>3).forEach( 119 | l->idWithScore.put(doc.get(fieldName), (double)distance.getDistance(ns, normalize(l)))); 120 | } 121 | else 122 | { 123 | if(hit.score>=Config.INSTANCE.indexMinLuceneScore) 124 | { 125 | log.trace(searcher.explain(q, hit.doc).toString()); 126 | Arrays.stream(doc.getValues("originallabel")).filter(l->l.length()>3) 127 | // even if transposed should have some minimal string distance // update: no id doesn't, goes as low as 0.05 128 | // .filter(l->distance.getDistance(ns, normalize(l))>0.5) 129 | // original label is the document from lucene which can be much longer than our string so we make sure they are not too dissimilar in length 130 | .filter(l->ns.length()*2>normalize(l).length()) 131 | .forEach( 132 | l->idWithUnnormalizedScore.put(doc.get(fieldName), (double) hit.score)); 133 | } 134 | } 135 | // log.debug("label index result labels "+Arrays.toString(doc.getValues("originallabel"))+", uri "+doc.get("uri")+" score "+score); 136 | } 137 | if(hits.length>0) 138 | { 139 | if(!fuzzy) 140 | { 141 | double max = idWithUnnormalizedScore.values().stream().reduce(0.0, Double::max); 142 | idWithUnnormalizedScore.forEach((ss,l)->{idWithScore.put(ss,l/max);}); 143 | } 144 | break; // only use second index when fuzzy one doesn't work 145 | } 146 | } 147 | // only keep elements with a score of at least minScore 148 | return idWithScore.keySet().stream().filter(id->idWithScore.get(id)>=minScore).collect(Collectors.toMap(id->id, id->idWithScore.get(id))); 149 | } catch (Exception e) {throw new RuntimeException(e);} 150 | finally {watch.stop();} 151 | } 152 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/index/LabelIndex.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.index; 2 | 3 | import java.io.IOException; 4 | import java.util.*; 5 | import java.util.function.Function; 6 | import lombok.SneakyThrows; 7 | import lombok.extern.slf4j.Slf4j; 8 | import org.aksw.cubeqa.property.ComponentProperty; 9 | import org.apache.lucene.document.*; 10 | import org.apache.lucene.index.DirectoryReader; 11 | 12 | /** Lucene index for labels, used by ObjectPropertyScorer. 13 | */ 14 | @Slf4j 15 | public class LabelIndex extends Index 16 | { 17 | private static final Map instances = new HashMap<>(); 18 | private LabelIndex(ComponentProperty property) {super(property);} 19 | 20 | public static synchronized LabelIndex getInstance(ComponentProperty property) 21 | { 22 | LabelIndex index = instances.get(property.uri); 23 | if(index==null) 24 | { 25 | index = new LabelIndex(property); 26 | instances.put(property.uri,index); 27 | } 28 | return index; 29 | } 30 | 31 | @SneakyThrows 32 | public void fill(Set uris, Function> labelFunction) 33 | { 34 | if(!DirectoryReader.indexExists(dir)) 35 | { 36 | startWrites(); 37 | for(String uri: uris) 38 | { 39 | Set labels = labelFunction.apply(uri); 40 | add(uri, labels); 41 | } 42 | stopWrites(); 43 | } 44 | reader = DirectoryReader.open(dir); 45 | } 46 | 47 | @SneakyThrows 48 | public Map getUrisWithScore(String s, double minScore) 49 | { 50 | return getIdWithScore(s, "uri",minScore); 51 | } 52 | 53 | public void add(String uri, Set labels) throws IOException 54 | { 55 | if(indexWriter==null) throw new IllegalStateException("indexWriter is null, call startWrites() first."); 56 | Document doc = new Document(); 57 | if(uri==null) 58 | { 59 | log.error("label index add uri==null. the following labels will not be added: "+labels); 60 | return; 61 | } 62 | doc.add(new StringField("uri", uri, Field.Store.YES)); 63 | // doc.add(new TextField("cube", cube.name, Field.Store.YES)); 64 | // doc.add(new TextField("property", cube.name, Field.Store.YES)); 65 | 66 | labels.forEach(l-> 67 | { 68 | doc.add(new Field("stringlabel", normalize(l), StringField.TYPE_STORED)); 69 | doc.add(new Field("textlabel", normalize(l), TextField.TYPE_STORED)); 70 | doc.add(new Field("originallabel", l, StringField.TYPE_STORED)); 71 | }); 72 | 73 | indexWriter.addDocument(doc); 74 | } 75 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/index/Similarity.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.index; 2 | 3 | import org.apache.lucene.analysis.en.EnglishAnalyzer; 4 | import org.apache.lucene.queryparser.classic.QueryParser; 5 | import org.simmetrics.StringMetric; 6 | import org.simmetrics.metrics.StringMetrics; 7 | import lombok.SneakyThrows; 8 | 9 | public class Similarity 10 | { 11 | static final QueryParser parser = new QueryParser("",new EnglishAnalyzer()); 12 | static final StringMetric metric = StringMetrics.qGramsDistance(); 13 | 14 | /** @param s word or phrase or general string 15 | /** @param t another word or phrase or general string 16 | /** @return a similarity value between 0 (totally different) and 1 (exactly equal). applies stemming and lower case. */ 17 | @SneakyThrows 18 | static public float similarity(String s, String t) 19 | { 20 | return metric.compare(parser.parse(QueryParser.escape(s)).toString(), parser.parse(QueryParser.escape(t)).toString()); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/index/StringIndex.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.index; 2 | 3 | import java.util.*; 4 | import java.io.IOException; 5 | import org.aksw.cubeqa.Config; 6 | import org.aksw.cubeqa.property.ComponentProperty; 7 | import org.apache.lucene.document.*; 8 | import org.apache.lucene.index.DirectoryReader; 9 | import lombok.SneakyThrows; 10 | 11 | /** Index for String scorer. */ 12 | public class StringIndex extends Index 13 | { 14 | private static final Map instances = new HashMap<>(); 15 | private StringIndex(ComponentProperty property) {super(property);} 16 | public static synchronized StringIndex getInstance(ComponentProperty property) 17 | { 18 | StringIndex index = instances.get(property.uri); 19 | if(index==null) 20 | { 21 | index = new StringIndex(property); 22 | instances.put(property.uri,index); 23 | } 24 | return index; 25 | } 26 | 27 | @SneakyThrows 28 | public void fill(Set strings) 29 | { 30 | if(!DirectoryReader.indexExists(dir)) 31 | { 32 | startWrites(); 33 | for(String s: strings) 34 | { 35 | add(s); 36 | } 37 | stopWrites(); 38 | } 39 | reader = DirectoryReader.open(dir); 40 | } 41 | 42 | @SneakyThrows 43 | public Map getStringsWithScore(String s) 44 | { 45 | return getIdWithScore(s, "originallabel",Config.INSTANCE.indexMinScore); 46 | } 47 | 48 | public void add(String s) throws IOException 49 | { 50 | if(indexWriter==null) { 51 | throw new IllegalStateException("indexWriter is null, call startWrites() first."); 52 | } 53 | Document doc = new Document(); 54 | doc.add(new Field("stringlabel", normalize(s), StringField.TYPE_STORED)); 55 | doc.add(new Field("textlabel", normalize(s), TextField.TYPE_STORED)); 56 | doc.add(new Field("originallabel", s, StringField.TYPE_STORED)); 57 | indexWriter.addDocument(doc); 58 | } 59 | 60 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/index/package-info.java: -------------------------------------------------------------------------------- 1 | /**Lucene index reading and writing. */ 2 | package org.aksw.cubeqa.index; -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/package-info.java: -------------------------------------------------------------------------------- 1 | /** Natural Language Question Answering on RDF Data Cubes*/ 2 | package org.aksw.cubeqa; -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/property/PropertyType.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.property; 2 | 3 | import org.aksw.cubeqa.rdf.DataCube; 4 | 5 | public enum PropertyType 6 | { 7 | ATTRIBUTE,DIMENSION,MEASURE; 8 | 9 | static public PropertyType ofRdfType(String type) 10 | { 11 | switch(type) 12 | { 13 | case DataCube.ATTRIBUTE_PROPERTY_URI:return ATTRIBUTE; 14 | case DataCube.DIMENSION_PROPERTY_URI:return DIMENSION; 15 | case DataCube.MEASURE_PROPERTY_URI:return MEASURE; 16 | default:throw new IllegalArgumentException(type+" is not a subtype of qb:ComponentProperty"); 17 | } 18 | } 19 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/property/package-info.java: -------------------------------------------------------------------------------- 1 | /** Contains Component Properties (Dimensions, Attributes and Measures) and Scorers. */ 2 | package org.aksw.cubeqa.property; -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/property/scorer/DatatypePropertyScorer.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.property.scorer; 2 | 3 | import java.util.Collections; 4 | import org.aksw.cubeqa.property.ComponentProperty; 5 | 6 | /** Abstract superclass for data type properties, whose values have literals. */ 7 | public abstract class DatatypePropertyScorer extends MultiSetScorer 8 | { 9 | public DatatypePropertyScorer(ComponentProperty property) 10 | { 11 | super(property,node->Collections.singleton(node.asLiteral().getLexicalForm())); 12 | } 13 | 14 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/property/scorer/MultiSetScorer.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.property.scorer; 2 | 3 | import java.util.Optional; 4 | import java.util.Set; 5 | import java.util.function.Function; 6 | import lombok.extern.slf4j.Slf4j; 7 | import org.aksw.cubeqa.property.ComponentProperty; 8 | import com.google.common.collect.HashMultiset; 9 | import com.google.common.collect.Multiset; 10 | import org.apache.jena.query.QuerySolution; 11 | import org.apache.jena.query.ResultSet; 12 | import org.apache.jena.rdf.model.RDFNode; 13 | 14 | @Slf4j 15 | public abstract class MultiSetScorer extends Scorer 16 | { 17 | final protected Multiset values = HashMultiset.create(); 18 | // final protected HashMap valueToNode = new HashMap<>(); 19 | final protected int maxCount; 20 | 21 | private static final long serialVersionUID = 1L; 22 | 23 | public MultiSetScorer(ComponentProperty property, Function> f) 24 | { 25 | super(property); 26 | ResultSet rs = queryValues(); 27 | while(rs.hasNext()) 28 | { 29 | QuerySolution qs = rs.next(); 30 | RDFNode node = qs.get("value"); 31 | f.apply(node).forEach(s-> 32 | { 33 | // valueToNode.put(s, node); 34 | values.add(s, qs.get("cnt").asLiteral().getInt()); 35 | }); 36 | } 37 | 38 | Optional max = values.elementSet().stream().map(s->values.count(s)).max(Integer::compare); 39 | if(!max.isPresent()) 40 | { 41 | log.warn("no values for property "+property+": "+values); 42 | maxCount=0; 43 | } 44 | else 45 | { 46 | maxCount = max.get(); 47 | } 48 | } 49 | 50 | 51 | // protected double countScore(int count) 52 | // { 53 | // // +1 to prevent div by 0 the nearer the score to the max, the higher the value, but don't fall of too steep so use log. 54 | // if(count==0) return 0; 55 | // return Math.sqrt(Math.log(count+1)/Math.log(maxCount+1)); // ad hoc, sqrt is to have a less steep falloff 56 | // } 57 | 58 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/property/scorer/NumericScorer.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.property.scorer; 2 | 3 | import java.util.Optional; 4 | import org.aksw.cubeqa.Config; 5 | import org.aksw.cubeqa.property.ComponentProperty; 6 | import com.google.common.collect.Range; 7 | import org.apache.jena.datatypes.DatatypeFormatException; 8 | import org.apache.jena.query.QuerySolution; 9 | import lombok.extern.slf4j.Slf4j; 10 | 11 | /** tests if a number is included in the range. */ 12 | @Slf4j 13 | public class NumericScorer extends Scorer 14 | { 15 | final Range range; 16 | 17 | public NumericScorer(ComponentProperty property) 18 | { 19 | super(property); 20 | // triggers Virtuoso bug https://github.com/openlink/virtuoso-opensource/issues/354 on some versions 21 | String query = "select (min(xsd:double(?d)) as ?min) (max(xsd:double(?d)) as ?max) {?o a qb:Observation. ?o qb:dataSet <"+property.cube.uri+">." 22 | + "?o <"+property.uri+"> ?d.}"; 23 | QuerySolution qs = property.cube.sparql.select(query).next(); 24 | log.trace(query); 25 | Range range2; 26 | try {range2 = Range.closed(qs.get("min").asLiteral().getDouble(), qs.get("max").asLiteral().getDouble());} 27 | // virtuoso bug 28 | catch(DatatypeFormatException e) 29 | { 30 | log.error("Virtuoso Bug for property "+property+", query:\n"+query,e); 31 | range2 = Range.closed(Double.MIN_VALUE, Double.MAX_VALUE); 32 | } 33 | range=range2; 34 | } 35 | 36 | @Override public Optional score(String value) 37 | { 38 | try 39 | { 40 | double d = Double.valueOf(value); 41 | return Optional.of(new ScoreResult(property, value, range.contains(d)?Config.INSTANCE.boostNumeric:0.0)); 42 | } 43 | catch(NumberFormatException e) {return Optional.empty();} 44 | } 45 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/property/scorer/ObjectPropertyScorer.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.property.scorer; 2 | 3 | import java.util.*; 4 | import java.util.stream.Collectors; 5 | import org.aksw.cubeqa.Config; 6 | import org.aksw.cubeqa.index.LabelIndex; 7 | import org.aksw.cubeqa.property.ComponentProperty; 8 | import de.konradhoeffner.commons.Streams; 9 | 10 | /** Scores object properties, which scores exclusively by count in relation to maxCount.*/ 11 | public class ObjectPropertyScorer extends MultiSetScorer 12 | { 13 | transient LabelIndex index; 14 | private static final double THRESHOLD = 0.4; 15 | 16 | private synchronized void loadOrCreateIndex() 17 | { 18 | if(index==null) 19 | { 20 | index = LabelIndex.getInstance(property); 21 | index.fill(values.elementSet(), this::getLabels); 22 | } 23 | } 24 | 25 | public ObjectPropertyScorer(ComponentProperty property) 26 | { 27 | super(property,node->Collections.singleton(node.asResource().getURI())); 28 | } 29 | 30 | @Override public Optional score(String value) 31 | { 32 | loadOrCreateIndex(); 33 | 34 | Map urisWithScore = index.getUrisWithScore(value,Config.INSTANCE.indexMinScore); 35 | 36 | return urisWithScore.keySet().stream() 37 | .filter(s->urisWithScore.get(s)>THRESHOLD) 38 | .max(Comparator.comparing(urisWithScore::get)) 39 | .map(uri->new ScoreResult(property, uri, urisWithScore.get(uri))); 40 | } 41 | 42 | public Set getLabels(String uri) 43 | { 44 | String query = "select ?l {<"+uri+"> rdfs:label ?l}"; 45 | return Streams.stream(property.cube.sparql.select(query)).map(qs->qs.get("l").asLiteral().getLexicalForm()).collect(Collectors.toSet()); 46 | } 47 | 48 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/property/scorer/ParseScorer.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.property.scorer; 2 | 3 | import java.time.Year; 4 | import java.util.HashMap; 5 | import java.util.LinkedList; 6 | import java.util.List; 7 | import java.util.Map; 8 | import java.util.Optional; 9 | import java.util.function.Function; 10 | import lombok.extern.slf4j.Slf4j; 11 | import org.aksw.cubeqa.property.ComponentProperty; 12 | import com.google.common.collect.HashMultiset; 13 | import com.google.common.collect.Multiset; 14 | import com.google.common.collect.Multiset.Entry; 15 | 16 | /** Parses string value using a given function. 17 | * @param The type the string gets parsed to.*/ 18 | @Slf4j 19 | public class ParseScorer extends DatatypePropertyScorer 20 | { 21 | final Multiset parsed = HashMultiset.create(); 22 | Map parsedToOriginal = new HashMap<>(); 23 | // final Function parseFunc; 24 | 25 | public ParseScorer(ComponentProperty property, Function parseFunc) 26 | { 27 | super(property); 28 | List unparseable = new LinkedList<>(); 29 | // this.parseFunc = parseFunc; 30 | for(Entry e: values.entrySet()) 31 | { 32 | try 33 | { 34 | T parsedValue = parseFunc.apply(e.getElement()); 35 | parsedToOriginal.put(parsedValue, e.getElement()); 36 | parsed.add(parsedValue, e.getCount()); 37 | } 38 | catch(Exception ex) {unparseable.add(e.getElement());} 39 | } 40 | if(!unparseable.isEmpty()) 41 | {log.warn(property+": could not parse "+unparseable.size()+" of "+(unparseable.size()+parsed.size())+": "+unparseable);} 42 | // values.clear(); // from now on we only touch the integer years 43 | } 44 | 45 | @Override public Optional score(String value) 46 | { 47 | Year year = Year.parse(value.replaceAll("\\+[0-9][0-9]:[0-9][0-9]","")); 48 | // double cs = countScore(parsed.count(year)); 49 | if(parsed.count(year)==0) return Optional.empty(); 50 | return Optional.of(new ScoreResult(property, parsedToOriginal.get(parsed), 1)); 51 | 52 | // TODO temporal bugfix, no serialization of function interface? 53 | //return countScore(parsed.count(parseFunc.apply(value))); 54 | } 55 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/property/scorer/ScoreResult.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.property.scorer; 2 | 3 | import java.io.Serializable; 4 | import org.aksw.cubeqa.property.ComponentProperty; 5 | import org.aksw.cubeqa.restriction.*; 6 | import lombok.*; 7 | 8 | /** Result of a scoring operation with property, value and score. */ 9 | @RequiredArgsConstructor 10 | @Getter 11 | @EqualsAndHashCode 12 | @ToString 13 | public class ScoreResult implements Serializable 14 | { 15 | public final ComponentProperty property; 16 | public final String value; 17 | 18 | /** 0 - not a match at all , 1 - perfect match */ 19 | public final double score; 20 | 21 | public Restriction toRestriction() 22 | { 23 | if(property.scorer instanceof ObjectPropertyScorer) 24 | { 25 | return new UriRestriction(property, value); 26 | } 27 | // if(property.scorer instanceof DatatypePropertyScorer) 28 | // { 29 | return new ValueRestriction(property, value); 30 | // } 31 | 32 | } 33 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/property/scorer/Scorer.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.property.scorer; 2 | 3 | import java.util.Arrays; 4 | import java.util.Optional; 5 | import java.util.stream.Stream; 6 | import java.io.Serializable; 7 | import org.aksw.cubeqa.CubeSparql; 8 | import org.aksw.cubeqa.property.ComponentProperty; 9 | import org.apache.jena.query.ResultSet; 10 | import org.apache.jena.rdf.model.RDFNode; 11 | import de.konradhoeffner.commons.Streams; 12 | 13 | /** Scorers match phrases or words to component property values. 14 | * Scorers are used when a phrase does not match to a component property label. 15 | * Scorers return a similarity value in [0,1].*/ 16 | public abstract class Scorer implements Serializable 17 | { 18 | private static final long serialVersionUID = 1L; 19 | protected final ComponentProperty property; 20 | 21 | public Scorer(ComponentProperty property) 22 | { 23 | this.property=property; 24 | } 25 | 26 | /** @param phrase a word or phrase 27 | * @return the score result of that phrase */ 28 | abstract public Optional score(String phrase); 29 | 30 | protected ResultSet queryValues() 31 | { 32 | String query = "select distinct ?value (count(?value) as ?cnt)" 33 | + "{?obs a qb:Observation. ?obs <"+property.uri+"> ?value. } group by ?value"; 34 | ResultSet rs = CubeSparql.getLinkedSpendingInstanceForName(property.cube.name).select(query); 35 | return rs; 36 | } 37 | 38 | protected Stream valueStream() 39 | { 40 | return Streams.stream(queryValues()).map(qs->qs.get("value")); 41 | } 42 | 43 | static protected float closestValue(float[] sorted, float key) 44 | { 45 | if(sorted.length==1) {return sorted[0];} // trivial case 46 | if(keysorted[sorted.length-1]) {return sorted[sorted.length-1];} // upper boundary 48 | int pos = Arrays.binarySearch(sorted, key); 49 | if(pos>=0) {return sorted[pos];} // we found an exact match 50 | // we didn't find an exact match, now we have two candidates: insertion point and insertion point-1 (we excluded the trivial case before) 51 | // pos = -ip-1 | +ip -pos => ip = -pos-1 52 | int ip = -pos-1; 53 | 54 | float closest; 55 | if(sorted[ip]-keysorted[sorted.length-1]) {return sorted[sorted.length-1];} // upper boundary 65 | int pos = Arrays.binarySearch(sorted, key); 66 | if(pos>=0) {return sorted[pos];} // we found an exact match 67 | // we didn't find an exact match, now we have two candidates: insertion point and insertion point-1 (we excluded the trivial case before) 68 | // pos = -ip-1 | +ip -pos => ip = -pos-1 69 | int ip = -pos-1; 70 | long closest; 71 | if(sorted[ip]-key as well as ScoreResult contains a property reference but this way it is easier to use 18 | public static Map scorePhraseValues(Cube cube, String phrase) 19 | { 20 | return 21 | cube.properties.values().stream() 22 | .map(p->p.scorer.score(phrase)) 23 | .filter(Optional::isPresent) 24 | .map(Optional::get) 25 | // .filter(s->s.score>THRESHOLD) 26 | .collect(Collectors.toMap(result->result.property, result->result)); 27 | } 28 | 29 | public static Map scorePhraseProperties(Cube cube, String phrase) 30 | { 31 | return 32 | cube.properties.values().stream() 33 | .map(p->new Pair<>(p, p.match(phrase))) 34 | .filter(p->p.b>Config.INSTANCE.scorerPropertyNameMinScore) 35 | .collect(Collectors.toMap(p->p.a, p->p.b)); 36 | } 37 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/property/scorer/StringScorer.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.property.scorer; 2 | 3 | import java.util.Comparator; 4 | import java.util.Map; 5 | import java.util.Optional; 6 | import java.util.stream.Collectors; 7 | import org.aksw.cubeqa.Config; 8 | import org.aksw.cubeqa.index.StringIndex; 9 | import org.aksw.cubeqa.property.ComponentProperty; 10 | import org.apache.lucene.search.spell.NGramDistance; 11 | import org.apache.lucene.search.spell.StringDistance; 12 | 13 | /** Scorer for data type properties. */ 14 | public class StringScorer extends DatatypePropertyScorer 15 | { 16 | private static final long serialVersionUID = 1L; 17 | private static final double THRESHOLD = 0.4; 18 | protected static transient StringDistance similarity = new NGramDistance(); 19 | 20 | transient private StringIndex index; 21 | 22 | public synchronized StringIndex loadOrCreateIndex() 23 | { 24 | if(index==null) 25 | { 26 | index = StringIndex.getInstance(property); 27 | index.fill(valueStream().map(node->node.asLiteral().getLexicalForm()).collect(Collectors.toSet())); 28 | } 29 | return index; 30 | } 31 | 32 | public StringScorer(ComponentProperty property) 33 | { 34 | super(property); 35 | } 36 | 37 | @Override public Optional score(String value) 38 | { 39 | loadOrCreateIndex(); 40 | 41 | Map stringsWithScore = index.getStringsWithScore(value); 42 | return stringsWithScore.keySet().stream() 43 | .filter(s->stringsWithScore.get(s)>THRESHOLD) 44 | .max(Comparator.comparing(stringsWithScore::get)) 45 | .map(s->new ScoreResult(property, s, Config.INSTANCE.boostString*stringsWithScore.get(s))); 46 | } 47 | 48 | // @Override public Optional unsafeScore(String value) 49 | // { 50 | // // TODO: wordnet,solr 51 | // // double cs = countScore(values.count(value)); 52 | // // if(cs!=0) {return cs;} 53 | // if(values.count(value)>0) return Optional.of(new ScoreResult(property, value, 1)); 54 | // double maxScore = 0; 55 | // 56 | // String maxValue = null; 57 | // for(String s: values.elementSet()) 58 | // { 59 | // double sim = similarity.getDistance(value, s); 60 | // if(simmaxScore) 63 | // { 64 | // maxScore = sim; 65 | // maxValue = s; 66 | // } 67 | // // score = Math.max(score, sim*countScore(values.count(s))); 68 | // } 69 | // if(maxValue==null) {return Optional.empty();} 70 | // return Optional.of(new ScoreResult(property, maxValue, maxScore)); 71 | // // values.elementSet().stream().map(s->Similarity.getSimilarity(value, s)).filter(sim->sim>THRESHOLD) 72 | // // .map(sim->sim*countScore(values.count(s),maxCount)); 73 | // } 74 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/property/scorer/temporal/TemporalScorer.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.property.scorer.temporal; 2 | 3 | import java.time.Year; 4 | import java.time.format.DateTimeParseException; 5 | import java.util.*; 6 | import java.util.function.Function; 7 | import java.util.regex.Matcher; 8 | import java.util.regex.Pattern; 9 | import lombok.extern.slf4j.Slf4j; 10 | import org.aksw.cubeqa.Config; 11 | import org.aksw.cubeqa.property.ComponentProperty; 12 | import org.aksw.cubeqa.property.scorer.ScoreResult; 13 | import org.aksw.cubeqa.property.scorer.Scorer; 14 | import org.joda.time.Instant; 15 | import org.joda.time.Interval; 16 | import org.apache.jena.rdf.model.Literal; 17 | import org.apache.jena.rdf.model.RDFNode; 18 | 19 | /** Scorer for temporal intervals. */ 20 | @Slf4j 21 | public class TemporalScorer extends Scorer 22 | { 23 | private static final long MS_PER_DAY = 24*3600*1000; 24 | static Pattern yearPattern = Pattern.compile("^[+-]?[\\d]+"); 25 | 26 | protected Set intervals = new HashSet<>(); 27 | 28 | /** Use when the property has datatype of year, e.g. xsd:gYear. */ 29 | public static TemporalScorer yearScorer(ComponentProperty property) 30 | { 31 | return new TemporalScorer(property,TemporalScorer::parseAsYear); 32 | } 33 | 34 | /** Use when the property has datatype of date, e.g. xsd:date. */ 35 | public static TemporalScorer dateScorer(ComponentProperty property) 36 | { 37 | return new TemporalScorer(property,TemporalScorer::parseAsDate); 38 | } 39 | 40 | protected TemporalScorer(ComponentProperty property,Function parse) 41 | { 42 | super(property); 43 | Set unparseable = new HashSet<>(); 44 | valueStream().map(RDFNode::asLiteral).map(Literal::getLexicalForm).forEach(s-> 45 | { 46 | try {intervals.add(parse.apply(s));} 47 | catch(Exception e ) {if(unparseable.size()<10) unparseable.add(s);} 48 | }); 49 | if(!unparseable.isEmpty()) {log.warn("could not parse years "+unparseable);} 50 | } 51 | 52 | @Override public Optional score(String value) 53 | { 54 | Interval questionInterval = null; 55 | try 56 | { 57 | questionInterval = parseAsDate(value); 58 | } 59 | catch(IllegalArgumentException | IllegalStateException | StringIndexOutOfBoundsException | DateTimeParseException e) 60 | { 61 | try 62 | { 63 | questionInterval = parseAsYear(value); 64 | } 65 | catch (IllegalArgumentException | IllegalStateException | StringIndexOutOfBoundsException | DateTimeParseException f) {return Optional.empty();} 66 | } 67 | for(Interval interval: intervals) 68 | { 69 | if(interval.equals(questionInterval)||questionInterval.contains(interval)) 70 | { 71 | double score = Config.INSTANCE.boostTemporal; 72 | //if(property.range.equals("http://www.w3.org/2001/XMLSchema#date")) 73 | return Optional.of(new ScoreResult(property, value, score)); 74 | } 75 | } 76 | return Optional.empty(); 77 | } 78 | 79 | static protected Interval parseAsYear(String s) throws IllegalArgumentException, IllegalStateException, StringIndexOutOfBoundsException, DateTimeParseException 80 | { 81 | Matcher m = yearPattern.matcher(s.trim()); 82 | m.find(); 83 | Year y = Year.parse(m.group(0)); 84 | // yoda intervals are left closed, right open 85 | return new Interval(Instant.parse(y.getValue()+"-01-01").getMillis(), Instant.parse((y.getValue()+1)+"-01-01").getMillis()); 86 | } 87 | 88 | static protected Interval parseAsDate(String s) throws IllegalArgumentException, IllegalStateException, StringIndexOutOfBoundsException, DateTimeParseException 89 | { 90 | s = s.trim().substring(0, "1999-01-23".length()); // only date, no time 91 | return new Interval(Instant.parse(s).getMillis(),Instant.parse(s).getMillis()+MS_PER_DAY); 92 | } 93 | 94 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/rdf/DataCube.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.rdf; 2 | 3 | import org.apache.jena.rdf.model.Property; 4 | import org.apache.jena.rdf.model.Resource; 5 | import org.apache.jena.rdf.model.*; 6 | 7 | /** RDF Data Cube vocabulary */ 8 | public class DataCube 9 | { 10 | static public final String BASE = "http://purl.org/linked-data/cube#"; 11 | // in addition to myResource.getURI() because switch statements need constants 12 | static public final String DIMENSION_PROPERTY_URI = BASE+"DimensionProperty"; 13 | static public final String MEASURE_PROPERTY_URI = BASE+"MeasureProperty"; 14 | static public final String ATTRIBUTE_PROPERTY_URI = BASE+"AttributeProperty"; 15 | 16 | static public final Resource DataStructureDefinition = ResourceFactory 17 | .createResource(BASE + "DataStructureDefinition"); 18 | static public final Resource DataSet = ResourceFactory.createResource(BASE + "DataSet"); 19 | static public final Resource ComponentProperty = ResourceFactory.createResource(BASE + "ComponentProperty"); 20 | static public final Resource DimensionProperty = ResourceFactory.createResource(BASE + "DimensionProperty"); 21 | static public final Resource MeasureProperty = ResourceFactory.createResource(BASE + "MeasureProperty"); 22 | static public final Resource AttributeProperty = ResourceFactory.createResource(BASE + "AttributeProperty"); 23 | static public final Resource SliceKey = ResourceFactory.createResource(BASE + "SliceKey"); 24 | static public final Resource HierarchicalCodeList = ResourceFactory.createResource(BASE + "HierarchicalCodeList"); 25 | static public final Resource ComponentSpecification = ResourceFactory.createResource(BASE + "ComponentSpecification"); 26 | static public final Resource Observation = ResourceFactory.createResource(BASE + "Observation"); 27 | static public final Resource Slice = ResourceFactory.createResource(BASE + "Slice"); 28 | 29 | static public final Property component = ResourceFactory.createProperty(BASE + "component"); 30 | static public final Property dataSet = ResourceFactory.createProperty(BASE + "dataSet"); 31 | static public final Property structure = ResourceFactory.createProperty(BASE + "structure"); 32 | static public final Property componentProperty = ResourceFactory.createProperty(BASE + "componentProperty"); 33 | static public final Property dimension = ResourceFactory.createProperty(BASE + "dimension"); 34 | static public final Property measure = ResourceFactory.createProperty(BASE + "measure"); 35 | static public final Property attribute = ResourceFactory.createProperty(BASE + "attribute"); 36 | static public final Property concept = ResourceFactory.createProperty(BASE + "concept"); 37 | static public final Property slice = ResourceFactory.createProperty(BASE + "slice"); 38 | static public final Property sliceStructure = ResourceFactory.createProperty(BASE + "sliceStructure"); 39 | static public final Property parentChildProperty = ResourceFactory.createProperty(BASE + "parentChildProperty"); 40 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/rdf/Owl.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.rdf; 2 | 3 | public class Owl 4 | { 5 | static public final String base = "http://www.w3.org/2002/07/owl#"; 6 | static public final String OBJECT_PROPERTY_URI = base + "ObjectProperty"; 7 | static public final String DATATYPE_PROPERTY_URI = base+ "DatatypeProperty"; 8 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/restriction/IntervalRestriction.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.restriction; 2 | 3 | import java.util.HashSet; 4 | import java.util.Set; 5 | import org.aksw.cubeqa.property.ComponentProperty; 6 | import lombok.EqualsAndHashCode; 7 | 8 | /** restriction on a value from a given interval **/ 9 | 10 | @EqualsAndHashCode(callSuper=true) 11 | public class IntervalRestriction extends RestrictionWithPhrase 12 | { 13 | // TODO chose an existing interval class and intersect multiple intervals for the same property. Low priority though as I expect almost no 14 | // sentences to specify multiple restrictions on the same property. 15 | double leftEndpoint; 16 | double rightEndpoint; 17 | boolean open; 18 | 19 | @Override public Set wherePatterns() 20 | { 21 | HashSet terms = new HashSet<>(); 22 | 23 | if(leftEndpoint>Double.NEGATIVE_INFINITY) 24 | { 25 | String leftComparator = open?">":">="; 26 | String leftFilter = "filter(?"+property.var+" "+leftComparator+" \""+leftEndpoint+"\"). "; 27 | terms.add(leftFilter); 28 | } 29 | if(rightEndpoint ?"+property.var+"."); 36 | return terms; 37 | } 38 | 39 | public IntervalRestriction(ComponentProperty property, String phrase, double leftEndpoint, double rightEndpoint, boolean open) 40 | { 41 | super(property,phrase); 42 | this.leftEndpoint = leftEndpoint; 43 | this.rightEndpoint = rightEndpoint; 44 | this.open = open; 45 | } 46 | 47 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/restriction/Restriction.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.restriction; 2 | 3 | import java.util.Collections; 4 | import java.util.Set; 5 | import java.util.concurrent.atomic.AtomicInteger; 6 | import org.aksw.cubeqa.property.ComponentProperty; 7 | import lombok.*; 8 | 9 | /** A restriction on the values of a component property.*/ 10 | // subclasses must implement equals and hash code including those of this class (which compares the property) 11 | @RequiredArgsConstructor 12 | @Getter 13 | @EqualsAndHashCode(of={"property"}) 14 | public abstract class Restriction 15 | { 16 | final ComponentProperty property; 17 | 18 | public Set wherePatterns() {return Collections.emptySet();} 19 | public Set orderLimitPatterns() {return Collections.emptySet();} 20 | 21 | final String uniqueVar = "?v"+instanceCounter.getAndIncrement(); 22 | 23 | static AtomicInteger instanceCounter = new AtomicInteger(0); 24 | static final String OBS_VAR = " ?obs "; 25 | 26 | @Override public String toString() 27 | { 28 | return "Restriction on property "+property+" with where patterns: "+wherePatterns()+" and order limit patterns "+orderLimitPatterns(); 29 | } 30 | 31 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/restriction/RestrictionWithPhrase.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.restriction; 2 | 3 | import org.aksw.cubeqa.property.ComponentProperty; 4 | 5 | public class RestrictionWithPhrase extends Restriction 6 | { 7 | public final String phrase; 8 | 9 | public RestrictionWithPhrase(ComponentProperty property, String phrase) 10 | { 11 | super(property); 12 | this.phrase = phrase; 13 | } 14 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/restriction/TopRestriction.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.restriction; 2 | 3 | import java.util.Collections; 4 | import java.util.Set; 5 | import org.aksw.cubeqa.property.ComponentProperty; 6 | import lombok.EqualsAndHashCode; 7 | 8 | /** Restriction for top n or bottom n items. */ 9 | @EqualsAndHashCode(callSuper=true) 10 | public class TopRestriction extends Restriction 11 | { 12 | public enum OrderModifier {ASC,DESC}; 13 | final int n; 14 | final OrderModifier modifier; 15 | 16 | @Override public Set orderLimitPatterns() 17 | { 18 | return Collections.singleton("order by "+modifier.toString()+"(sum(xsd:decimal(?"+property.var+"))) limit "+n); 19 | } 20 | 21 | public TopRestriction(ComponentProperty property, int n, OrderModifier modifier) 22 | { 23 | super(property); 24 | this.n=n; 25 | this.modifier=modifier; 26 | } 27 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/restriction/UriRestriction.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.restriction; 2 | 3 | import java.util.Collections; 4 | import java.util.Set; 5 | import lombok.EqualsAndHashCode; 6 | import org.aksw.cubeqa.property.ComponentProperty; 7 | 8 | /** Restriction of an object property to a specific object URI.**/ 9 | @EqualsAndHashCode(callSuper=true) 10 | public class UriRestriction extends Restriction 11 | { 12 | String uri; 13 | 14 | @Override public Set wherePatterns() 15 | { 16 | return Collections.singleton("?obs <"+property.uri+"> <"+uri+">."); 17 | } 18 | 19 | public UriRestriction(ComponentProperty property, String uri) 20 | { 21 | super(property); 22 | this.uri=uri; 23 | } 24 | 25 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/restriction/ValueRestriction.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.restriction; 2 | 3 | import java.util.Collections; 4 | import java.util.Set; 5 | import org.aksw.cubeqa.property.ComponentProperty; 6 | import org.apache.jena.vocabulary.XSD; 7 | import lombok.EqualsAndHashCode; 8 | import lombok.extern.slf4j.Slf4j; 9 | 10 | /** Restriction on a literal value. **/ 11 | @EqualsAndHashCode(callSuper=true) 12 | @Slf4j 13 | public class ValueRestriction extends Restriction 14 | { 15 | final String value; 16 | 17 | @Override public Set wherePatterns() 18 | { 19 | // TODO: add datatypes from range or somewhere else 20 | String pattern; 21 | String range = property.range; 22 | if(range==null||!(range.startsWith(XSD.getURI()))) 23 | { 24 | pattern = OBS_VAR+" <"+property.uri+"> "+uniqueVar+".\nfilter(str("+uniqueVar+")=\""+value+"\")."; 25 | } else 26 | if(range.equals(XSD.gYear.getURI())) 27 | { 28 | if(!value.matches("[0-9]+")) 29 | { 30 | //TODO readd exception 31 | // throw new RuntimeException("'"+value+"' is not a valid year"); 32 | log.error("'"+value+"' is not a valid year"); 33 | return Collections.emptySet(); 34 | } 35 | pattern = OBS_VAR+" <"+property.uri+"> "+uniqueVar+".\nfilter(year("+uniqueVar+")="+value+")."; 36 | } 37 | else 38 | if(range.equals(XSD.xstring.getURI())) 39 | { 40 | pattern = OBS_VAR+" <"+property.uri+"> \""+value+"\"."; 41 | } else 42 | { 43 | pattern = OBS_VAR+" <"+property.uri+"> \""+value+"\"^^<"+range+">."; 44 | } 45 | 46 | return Collections.singleton(pattern); 47 | // String literal = "\""+value+"\""; 48 | // return Collections.singleton("?obs <"+property+"> \""+literal+"\""); 49 | } 50 | 51 | public ValueRestriction(ComponentProperty property, String value) 52 | { 53 | super(property); 54 | this.value = value; 55 | } 56 | 57 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/restriction/package-info.java: -------------------------------------------------------------------------------- 1 | /** Restrictions are inclusion criteria on cube cells which generate parts of a sparql query. */ 2 | package org.aksw.cubeqa.restriction; -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/scripts/Csv2Qald.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.scripts; 2 | 3 | import java.util.Scanner; 4 | import java.io.IOException; 5 | import org.aksw.cubeqa.benchmark.Benchmark; 6 | 7 | public class Csv2Qald 8 | { 9 | public static void main(String[] args) throws IOException 10 | { 11 | System.out.println("type csv2qald to continue"); 12 | try(Scanner in = new Scanner(System.in)) 13 | {if(!in.nextLine().equals("csv2qald")) {System.out.println("wrong phrase. terminated.");return;}} 14 | System.out.println("Converting csv to QALD xml format and generating answers from SPARQL endpoint. This may take a while."); 15 | Benchmark.fromCsv("qald6t3-train-v1.2").saveAsQald(); 16 | } 17 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/scripts/EvaluateCubeIndex.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.scripts; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import org.aksw.cubeqa.benchmark.Benchmark; 6 | import org.aksw.cubeqa.benchmark.Question; 7 | import org.aksw.cubeqa.index.CubeIndex; 8 | import lombok.extern.slf4j.Slf4j; 9 | 10 | @Slf4j 11 | public class EvaluateCubeIndex 12 | { 13 | // assumes the index is already filled with the QBench2 cubes 14 | 15 | public static void main(String[] args) 16 | { 17 | Benchmark bench = Benchmark.fromQald("qbench2"); 18 | List equal = new ArrayList<>(bench.questions.size()); 19 | for(Question q: bench.questions) 20 | { 21 | List found = CubeIndex.INSTANCE.getCubeUris(q.string); 22 | boolean correct = q.cubeUri.equals(found.get(0)); 23 | if(!correct) 24 | { 25 | log.info(q.cubeUri+" "+found.get(0)); 26 | } 27 | equal.add(correct); 28 | // log.info(q.string+"correct cube "+q.cubeUri+", found cubes: "+cubes); 29 | } 30 | System.out.println(equal.stream().filter(e->e).count()); 31 | } 32 | 33 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/scripts/EvaluateQBench1.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.scripts; 2 | 3 | import org.aksw.cubeqa.Algorithm; 4 | import org.aksw.cubeqa.benchmark.Benchmark; 5 | 6 | /** Evaluates the old single-dataset benchmark on finland-aid. */ 7 | public class EvaluateQBench1 8 | { 9 | public static void main(String[] args) 10 | { 11 | long start = System.currentTimeMillis(); 12 | Benchmark.fromQald("finland-aid").evaluate(new Algorithm(),6); 13 | System.out.println(System.currentTimeMillis()-start+" ms"); 14 | } 15 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/scripts/EvaluateQald6T3Test.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.scripts; 2 | 3 | import java.io.IOException; 4 | 5 | import org.aksw.cubeqa.Algorithm; 6 | import org.aksw.cubeqa.benchmark.Benchmark; 7 | 8 | /** Evaluates the QALD 6 Task 3 test benchmark with 50 questions. 9 | * @see https://qald.aksw.org/index.php?x=challenge&q=6. */ 10 | public class EvaluateQald6T3Test 11 | { 12 | public static void main(String[] args) throws IOException 13 | { 14 | long start = System.currentTimeMillis(); 15 | Benchmark.fromJson("qald6t3-test").evaluate(new Algorithm()); 16 | System.out.println(System.currentTimeMillis()-start+" ms"); 17 | } 18 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/scripts/EvaluateQald6T3Train.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.scripts; 2 | 3 | import org.aksw.cubeqa.Algorithm; 4 | import org.aksw.cubeqa.benchmark.Benchmark; 5 | 6 | /** Evaluates the QALD 6 Task 3 train benchmark with 100 questions. 7 | * @see https://qald.aksw.org/index.php?x=challenge&q=6. */ 8 | public class EvaluateQald6T3Train 9 | { 10 | public static void main(String[] args) 11 | { 12 | long start = System.currentTimeMillis(); 13 | Benchmark.fromQald("qald6t3-train-v1.2").evaluate(new Algorithm()); 14 | System.out.println(System.currentTimeMillis()-start+" ms"); 15 | } 16 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/scripts/FillCubeIndex.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.scripts; 2 | 3 | import java.util.*; 4 | import java.util.stream.Collectors; 5 | import org.aksw.cubeqa.Cube; 6 | import org.aksw.cubeqa.index.CubeIndex; 7 | 8 | /**Fills the cube index for all 50 cubes of the qbench2 benchmark. */ 9 | public class FillCubeIndex 10 | { 11 | static String[] cubeNames = {"town_of_cary_expenditures","town_of_cary_revenues","city-of-whiteacre-spending","maldives_proposed_expenditure_2015","ministry_of_health","newcastle-city-council-payments-over-500","uk-local-walthamforest","cheshire_west_and_chester_april_2013","city-of-redacre-spending","wandsworthspending_2013","iw-council-spending-2012-13-test","city-of-springfield-budget","pscs_ca_cities","dc-city-salaries","big-lottery-fund-grants","uk-local-gloucestershirev1","ukgov-finances-cra","iati_cordaid_af","ca-local-toronto","ie_charity_exp","frontex","dcc_exp_budget2013","financial_aid","concern2012","dc-vendors-contractors","cameroon_visualisation","nominettrust_funding","618ac3ec98384f44a9ef142356ce476d","propbudg13","nyc-council-member-items","fingal_exp_budget","cm-nwr-investments","ug_budget_subcategories","faith","finland-aid","pvd2014proposed","pbw-ct","trends_in_civi_tech_open_gov","e27f4ef7601446798cfa733a06cea8d9","f0bd947d9854445987d6ece304840a3c","scottish-spending-jan13","oakland-adopted-budget-fy-2011-13-expenditures","allexp13budg","lobbying_dataset_qy","mcc_spend","alb_planned_budget_2013","ke-county-exp","al_planned_budget_2007to2013","external-debts","ddc94682cc95482a8deefc60596686fc"}; 12 | 13 | public static void main(String[] args) 14 | { 15 | System.out.println("type fillcube to continue"); 16 | try(Scanner in = new Scanner(System.in)) 17 | {if(!in.nextLine().equals("fillcube")) {System.out.println("wrong phrase. terminated.");return;}} 18 | Set cubes = Arrays.stream(cubeNames).map(Cube::getInstance).collect(Collectors.toSet()); 19 | CubeIndex.INSTANCE.fill(cubes); 20 | } 21 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/scripts/QaldInQaldOut.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.scripts; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | 6 | import org.aksw.cubeqa.Files; 7 | import org.aksw.cubeqa.benchmark.Benchmark; 8 | 9 | /** Test script which reads in qald and saves it back so that we can check if something gets lost.*/ 10 | public class QaldInQaldOut 11 | { 12 | 13 | public static void main(String[] args) throws IOException 14 | { 15 | Benchmark.fromQald("qald6t3-train-v1.2").saveAsQald(new File(Files.localFolder("benchmark"),"test.xml")); 16 | } 17 | 18 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/scripts/Service.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.scripts; 2 | 3 | import java.util.List; 4 | import java.util.Scanner; 5 | import java.io.IOException; 6 | import org.aksw.cubeqa.Algorithm; 7 | import org.aksw.cubeqa.Cube; 8 | import org.aksw.cubeqa.index.CubeIndex; 9 | import org.apache.jena.query.ResultSetFormatter; 10 | import edu.stanford.nlp.io.StringOutputStream; 11 | 12 | /** Web service that answers questions as strings with the W3C SPARQL Query Results XML Format. 13 | * See {@link https://www.w3.org/TR/rdf-sparql-XMLres}. */ 14 | public class Service 15 | { 16 | public static String answerJson(String question) throws IOException 17 | { 18 | List uris = CubeIndex.INSTANCE.getCubeUris(question); 19 | if(uris.isEmpty()) {return "";} 20 | 21 | String cubeName = Cube.linkedSpendingCubeName(uris.get(0)); 22 | try(StringOutputStream out = new StringOutputStream()) 23 | { 24 | ResultSetFormatter.outputAsJSON(out, 25 | Cube.getInstance(cubeName).sparql.select( 26 | new Algorithm().template(cubeName, question).sparqlQuery() 27 | )); 28 | return out.toString(); 29 | } 30 | } 31 | 32 | public static void main(String[] args) throws IOException 33 | { 34 | System.out.println("Command line version of CubeQA."); 35 | String question; 36 | try(Scanner in = new Scanner(System.in)) 37 | { 38 | do 39 | { 40 | System.out.println("Please enter a question (ENTER for exit)."); 41 | question = in.nextLine(); 42 | System.out.println(answerJson(question)); 43 | } while(!question.isEmpty()); 44 | } 45 | } 46 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/scripts/package-info.java: -------------------------------------------------------------------------------- 1 | /** Contains executables for a specific task. */ 2 | package org.aksw.cubeqa.scripts; -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/template/GreedyTemplator.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.template; 2 | 3 | import edu.stanford.nlp.trees.Tree; 4 | import lombok.extern.slf4j.Slf4j; 5 | import org.aksw.cubeqa.Cube; 6 | import static org.aksw.cubeqa.StanfordTrees.phrase; 7 | import java.util.*; 8 | import java.util.function.Predicate; 9 | import java.util.stream.Collectors; 10 | 11 | /** Used in the ISWC 2016 paper and for the qald6t3 challenge.*/ 12 | @Slf4j 13 | public class GreedyTemplator extends Templator 14 | { 15 | public GreedyTemplator(Cube cube) 16 | { 17 | super(cube); 18 | } 19 | 20 | /** The recursive algorithm. */ 21 | protected Fragment visitRecursive(Tree tree) 22 | { 23 | while (/*!tree.isPreTerminal()&&*/tree.children().length == 1) 24 | { 25 | // skipping down 26 | tree = tree.getChild(0); 27 | } 28 | String phrase = phrase(tree); 29 | if (phrase.length() < PHRASE_MIN_LENGTH) 30 | { 31 | log.trace("phrase less than " + PHRASE_MIN_LENGTH + " characters, skipped: " + phrase); 32 | return new Fragment(cube, phrase); 33 | } 34 | 35 | if (phrase.length() > PHRASE_MAX_LENGTH) 36 | { 37 | log.trace("phrase '" + phrase + "' more than " + PHRASE_MAX_LENGTH + " characters, skipping matching try"); 38 | } else 39 | { 40 | log.trace("visiting tree " + tree); 41 | log.trace("Phrase \"" + phrase + "\"..."); 42 | // either we detected nothing or only part of the phrase 43 | 44 | Match matchResult = identify(phrase); 45 | // whole phrase matched, subtrees skipped 46 | if (!matchResult.isEmpty()) 47 | { 48 | log.trace("matched to " + matchResult); 49 | return matchResult.toFragment(cube); 50 | } 51 | } 52 | // either we didn't match because the phrase is too long or matching didn't find anything, so match subtrees separately 53 | log.trace("unmatched, looking at subtrees"); 54 | List childFragments = fragments(tree.getChildrenAsList(), x -> true); 55 | if (childFragments.isEmpty()) 56 | { 57 | return new Fragment(cube, phrase); 58 | } 59 | List childFragmentsWithRefs = childFragments.stream().filter(f -> !f.isEmpty()).collect(Collectors.toList()); 60 | List childFragmentsWithoutRefs = new LinkedList<>(childFragments); 61 | childFragmentsWithoutRefs.removeAll(childFragmentsWithRefs); 62 | 63 | List usefulChildFragments = new ArrayList<>(childFragmentsWithRefs); 64 | // we could throw unmatched fragments away but we try to combine them into something useful first 65 | if (!childFragmentsWithoutRefs.isEmpty()) 66 | { 67 | String childFragmentsWithoutRefsPhrase = Fragment.combine(childFragmentsWithoutRefs).phrase; 68 | // too small, throw away 69 | if (childFragmentsWithoutRefsPhrase.length() < 3) 70 | { 71 | log.trace("unmatched fragment \"" + childFragmentsWithoutRefsPhrase + "\" length < 3, skipped"); 72 | } 73 | // it's not small, but is it useful? do all the unmatched fragments match something? 74 | else 75 | { 76 | // TODO check partial combinations too 77 | Match unmatchedResult = identify(childFragmentsWithoutRefsPhrase); 78 | log.trace("unmatched fragments with phrase \"" + unmatchedResult.phrase + "\""); 79 | // unmatchedFragments.stream().map(f->f.phrase).collect(Collectors.toList()); 80 | 81 | if (unmatchedResult.isEmpty()) 82 | { 83 | log.trace("unmatched fragment combination does not match anything."); 84 | } else 85 | { 86 | log.trace("unmatched fragment combination matched to " + unmatchedResult); 87 | usefulChildFragments.add(unmatchedResult.toFragment(cube)); 88 | } 89 | } 90 | } 91 | if (usefulChildFragments.isEmpty()) 92 | { 93 | log.trace("no match found for phrase \"" + phrase + "\""); 94 | return new Fragment(cube, phrase); 95 | } else 96 | { 97 | return Fragment.combine(usefulChildFragments); 98 | } 99 | } 100 | 101 | public Template buildTemplate(String question) 102 | { 103 | preprocess(question); 104 | Fragment rootFragment = visitRecursive(root); 105 | Template finalTemplate = Fragment.combine(Arrays.asList(rootFragment, detectFragment)).toTemplate(eats).get(); 106 | return finalTemplate; 107 | } 108 | 109 | /** 110 | * Sublist of trees that satisfiy the given predicate 111 | */ 112 | protected List fragments(List trees, Predicate predicate) 113 | { 114 | return trees.stream() 115 | .map(this::visitRecursive) 116 | .filter(predicate) 117 | .collect(Collectors.toList()); 118 | } 119 | 120 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/template/Match.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.template; 2 | 3 | import java.util.*; 4 | import org.aksw.cubeqa.Cube; 5 | import org.aksw.cubeqa.property.ComponentProperty; 6 | import org.aksw.cubeqa.property.scorer.ScoreResult; 7 | import de.konradhoeffner.commons.Pair; 8 | import lombok.*; 9 | 10 | /** Result of a match on a part of the parse tree. Gets combined with other match results to a template fragment. */ 11 | @EqualsAndHashCode 12 | @Getter 13 | @ToString 14 | class Match 15 | { 16 | /** A phrase contained in the question.*/ 17 | public final String phrase; 18 | // /** Character index of the start of the phrase in the original question. **/ 19 | // public final int phraseIndex; 20 | /** the estimated probability that the phrase refers to a property with a given property label */ 21 | public final Map nameRefs; 22 | /** the estimated probability that the phrase refers to a property with a given property value*/ 23 | public final Map valueRefs; 24 | 25 | public final double score; 26 | 27 | public void join(Match otherResult) 28 | { 29 | Set nameValue = this.nameRefs.keySet(); 30 | nameValue.retainAll(otherResult.valueRefs.keySet()); 31 | // nameValue.retainAll(otherResult.valueRefs.stream().map(ScoreResult::getProperty).collect(Collectors.toSet())); 32 | 33 | nameValue.stream().map(property->new Pair<>(property,nameRefs.get(property)*valueRefs.get(property).score)) 34 | .max(Comparator.comparing(Pair::getB)); 35 | } 36 | 37 | public boolean isEmpty() 38 | { 39 | return nameRefs.isEmpty()&&valueRefs.isEmpty(); 40 | } 41 | 42 | public Match(String phrase, /* int phraseIndex,*/ Map nameRefs, Map valueRefs) 43 | { 44 | this.phrase = phrase; 45 | // this.phraseIndex=phraseIndex; 46 | this.nameRefs = nameRefs; 47 | this.valueRefs = valueRefs; 48 | score = Math.max(nameRefs.values().stream().reduce(0.0,Double::max), 49 | valueRefs.values().stream().mapToDouble(ScoreResult::getScore).max().orElse(0)); 50 | } 51 | 52 | public Fragment toFragment(Cube cube) 53 | { 54 | return new Fragment(cube, phrase, new HashSet<>(), new HashSet<>(),new HashSet<>(),new HashSet<>(), Collections.singleton(this)); 55 | } 56 | 57 | 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/template/StanfordNlp.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.template; 2 | 3 | import java.util.List; 4 | import java.util.Properties; 5 | import java.io.PrintStream; 6 | import edu.stanford.nlp.io.NullOutputStream; 7 | import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation; 8 | import edu.stanford.nlp.pipeline.Annotation; 9 | import edu.stanford.nlp.pipeline.StanfordCoreNLP; 10 | import edu.stanford.nlp.trees.Tree; 11 | import edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation; 12 | import edu.stanford.nlp.util.CoreMap; 13 | import lombok.RequiredArgsConstructor; 14 | import lombok.ToString; 15 | import lombok.extern.slf4j.Slf4j; 16 | 17 | /** Stanford Core NLP utility class. */ 18 | @Slf4j 19 | public class StanfordNlp 20 | { 21 | static private final StanfordCoreNLP treeParser; 22 | // static private final StanfordCoreNLP lemmatizer; 23 | 24 | @RequiredArgsConstructor 25 | @ToString 26 | static public class ParseResult 27 | { 28 | final Tree parseTree; 29 | final String pos; 30 | } 31 | 32 | static 33 | { 34 | // disable logging 35 | // TODO do this more elegantly 36 | PrintStream err = System.err; 37 | try(PrintStream nulls = new PrintStream(new NullOutputStream())) 38 | { 39 | System.setErr(nulls); 40 | Properties props = new Properties(); 41 | props.put("annotators", "tokenize, ssplit, pos, parse"); 42 | treeParser = new StanfordCoreNLP(props); 43 | System.setErr(err); 44 | // { 45 | // Properties props = new Properties(); 46 | // props.put("annotators", "tokenize, ssplit, pos, lemma"); 47 | // lemmatizer = new StanfordCoreNLP(props); 48 | // } 49 | // enable logging 50 | } 51 | } 52 | 53 | public static Tree parse(String sentence) 54 | { 55 | log.trace("parsing sentence: '"+sentence+"' as tree"); 56 | Annotation document = new Annotation(sentence); 57 | treeParser.annotate(document); 58 | List sentences = document.get(SentencesAnnotation.class); 59 | return sentences.get(0).get(TreeAnnotation.class); 60 | // return new ParseResult(sentences.get(0).get(TreeAnnotation.class),document.get(PartOfSpeechAnnotation.class)); 61 | } 62 | 63 | // public static String lemmatize(String text) 64 | // { 65 | // Annotation document = lemmatizer.process(text); 66 | //StringBuilder sb = new StringBuilder(); 67 | // for(CoreMap sentence: document.get(SentencesAnnotation.class)) 68 | // { 69 | // for(CoreLabel token: sentence.get(TokensAnnotation.class)) 70 | // { 71 | // String word = token.get(TextAnnotation.class); 72 | // String lemma = token.get(LemmaAnnotation.class); 73 | // sb.append(" "+lemma); 74 | // } 75 | // } 76 | // return sb.toString().substring(1); 77 | // } 78 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/template/Template.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.template; 2 | 3 | import java.util.*; 4 | import java.util.stream.Collectors; 5 | import org.aksw.cubeqa.Cube; 6 | import org.aksw.cubeqa.detector.Aggregate; 7 | import org.aksw.cubeqa.property.ComponentProperty; 8 | import org.aksw.cubeqa.restriction.Restriction; 9 | import lombok.RequiredArgsConstructor; 10 | 11 | /** Template for a data cube query. */ 12 | @RequiredArgsConstructor 13 | public class Template 14 | { 15 | public final Cube cube; 16 | 17 | final Set restrictions; 18 | final Set answerProperties; 19 | final Set perProperties; 20 | final Set aggregates; 21 | 22 | /** true, iff it contains at least one answer property */ 23 | boolean isComplete() 24 | { 25 | return !answerProperties.isEmpty(); 26 | } 27 | 28 | /** Generates a SPARQL query out of the template.*/ 29 | public String sparqlQuery() 30 | { 31 | if(!isComplete()) {throw new IllegalStateException("not complete");} 32 | List wherePatterns = restrictions.stream().flatMap(r->r.wherePatterns().stream()).collect(Collectors.toList()); 33 | wherePatterns.add("?obs qb:dataSet <"+cube.uri+">."); 34 | wherePatterns.add("?obs a qb:Observation."); 35 | 36 | List orderLimitPatterns = restrictions.stream().flatMap(r->r.orderLimitPatterns().stream()).collect(Collectors.toList()); 37 | if(orderLimitPatterns.size()>1) { 38 | throw new IllegalArgumentException("more than one orderlimit pattern"); 39 | } 40 | 41 | StringBuilder sb = new StringBuilder(); 42 | String resultDef; 43 | ComponentProperty answerProperty = answerProperties.iterator().next(); 44 | switch(answerProperty.answerType) 45 | { 46 | case UNCOUNTABLE: resultDef = "xsd:decimal(?"+answerProperty.var+")";break; 47 | case COUNTABLE: resultDef = "xsd:integer(?"+answerProperty.var+")";break; 48 | case ENTITY: 49 | case TEMPORAL: 50 | // resultDef = "distinct(?"+answerProperties.iterator().next().var+")";break; 51 | default: 52 | // those aggregates can only apply to numbers 53 | aggregates.remove(Aggregate.SUM); 54 | aggregates.remove(Aggregate.AVG); 55 | aggregates.remove(Aggregate.MIN); 56 | aggregates.remove(Aggregate.MAX); 57 | resultDef = "distinct(?"+answerProperty.var+")"; 58 | } 59 | 60 | if(!aggregates.isEmpty()) 61 | { 62 | Aggregate aggregate = aggregates.iterator().next(); // only one is supported yet 63 | resultDef = aggregate+"("+resultDef+")"; 64 | } 65 | sb.append("select "+resultDef+" "); 66 | perProperties.removeAll(answerProperties); 67 | for(ComponentProperty p: perProperties) {sb.append(" ?"+p.var);} 68 | sb.append("\n{\n"); 69 | for(String pattern: wherePatterns) {sb.append(pattern);sb.append("\n");} 70 | // all properties that are referenced in a where pattern 71 | Set whereProperties = cube.properties.values().stream().filter(p-> 72 | wherePatterns.stream().reduce((a,b)->a+b).get() 73 | .contains(p.uri)).collect(Collectors.toSet()); 74 | for(ComponentProperty p: answerProperties) {sb.append("?obs <"+p.uri+"> ?"+p.var+".");sb.append("\n");} 75 | for(ComponentProperty p: perProperties) {sb.append("?obs <"+p.uri+"> ?"+p.var+".");sb.append("\n");} 76 | // those properties are used in order limit patterns and need to have their own triple pattern as well 77 | List otherProperties = restrictions.stream().map(Restriction::getProperty).collect(Collectors.toList()); 78 | // otherProperties.removeAll(answerProperties); 79 | otherProperties.removeAll(perProperties); 80 | otherProperties.removeAll(whereProperties); 81 | for(ComponentProperty p: otherProperties) {sb.append("?obs <"+p.uri+"> ?"+p.var+".");sb.append("\n");} 82 | sb.append("}"); 83 | if(!orderLimitPatterns.isEmpty()) 84 | { 85 | sb.append(orderLimitPatterns.iterator().next()); 86 | } 87 | return sb.toString(); 88 | } 89 | 90 | Set allProperties() 91 | { 92 | Set properties = new HashSet<>(answerProperties); 93 | properties.addAll(perProperties); 94 | // TODO restrictions should never have null properties, investiage 95 | restrictions.stream().map(Restriction::getProperty).forEach(properties::add); 96 | return properties; 97 | } 98 | 99 | // public static Pair precisionRecallProperties(CubeTemplate standard, CubeTemplate candidate) 100 | // { 101 | // log.debug("property type: "+candidate.allProperties().iterator().next().propertyType); 102 | // Set found = candidate.allProperties(); 103 | // 104 | // //.stream().filter(p->p.propertyType==PropertyType.DIMENSION).collect(Collectors.toSet()); 105 | // Set foundCorrect = new HashSet<>(found); 106 | // Set correct = standard.allProperties(); 107 | // //.stream().filter(p->p.propertyType==PropertyType.DIMENSION).collect(Collectors.toSet()); 108 | // foundCorrect.retainAll(correct); 109 | // if(found.size()==0||correct.size()==0) return null; 110 | // log.debug("found: "+found); 111 | // log.debug("correct: "+correct); 112 | // return new Pair((double)foundCorrect.size()/found.size(),(double)foundCorrect.size()/correct.size()); 113 | // } 114 | // 115 | // public static Pair precisionRecallDimensions(CubeTemplate standard, CubeTemplate candidate) 116 | // { 117 | // log.debug("property type: "+candidate.allProperties().iterator().next().propertyType); 118 | // Set found = candidate.allProperties() 119 | //// TODO how can p be null?? 120 | // .stream().filter(p->p!=null&&p.propertyType==PropertyType.ATTRIBUTE).collect(Collectors.toSet()); 121 | // Set foundCorrect = new HashSet<>(found); 122 | // Set correct = standard.allProperties() 123 | // .stream().filter(p->p!=null&&p.propertyType==PropertyType.ATTRIBUTE).collect(Collectors.toSet()); 124 | // foundCorrect.retainAll(correct); 125 | // if(found.size()==0||correct.size()==0) return null; 126 | // log.debug("found: "+found); 127 | // log.debug("correct: "+correct); 128 | // return new Pair((double)foundCorrect.size()/found.size(),(double)foundCorrect.size()/correct.size()); 129 | // } 130 | // 131 | // public static Pair precisionRecallRestrictions(CubeTemplate standard, CubeTemplate candidate) 132 | // { 133 | // Set found = candidate.restrictions; 134 | // 135 | // //.stream().filter(p->p.propertyType==PropertyType.DIMENSION).collect(Collectors.toSet()); 136 | // Set foundCorrect = new HashSet<>(found); 137 | // Set correct = standard.restrictions; 138 | // //.stream().filter(p->p.propertyType==PropertyType.DIMENSION).collect(Collectors.toSet()); 139 | // foundCorrect.retainAll(correct); 140 | // if(found.size()==0||correct.size()==0) return null; 141 | // return new Pair((double)foundCorrect.size()/found.size(),(double)foundCorrect.size()/correct.size()); 142 | // } 143 | 144 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/template/Templator.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.template; 2 | 3 | import de.konradhoeffner.commons.Pair; 4 | import de.konradhoeffner.commons.StopWatch; 5 | import edu.stanford.nlp.trees.Tree; 6 | import lombok.extern.slf4j.Slf4j; 7 | import org.aksw.cubeqa.*; 8 | import org.aksw.cubeqa.detector.Detector; 9 | import org.aksw.cubeqa.property.ComponentProperty; 10 | import org.aksw.cubeqa.property.scorer.ScoreResult; 11 | import org.aksw.cubeqa.property.scorer.Scorers; 12 | import java.util.*; 13 | 14 | /** Generates the query template for a question. */ 15 | @Slf4j 16 | public abstract class Templator 17 | { 18 | // {log.setLevel(Level.ALL);} 19 | protected static final int PHRASE_MIN_LENGTH = 3; 20 | protected static final int PHRASE_MAX_LENGTH = 30; 21 | 22 | protected final Cube cube; 23 | protected Fragment detectFragment; 24 | protected Tree root; 25 | protected EnumSet eats; 26 | 27 | public Templator(Cube cube) {this.cube=cube;} 28 | 29 | abstract public Template buildTemplate(String question); 30 | 31 | protected void preprocess(String question) 32 | { 33 | String replaced = Replacer.replace(question); 34 | if(!replaced.equals(question)) 35 | { 36 | question=replaced; 37 | log.info("Replacement: "+question); 38 | } 39 | StopWatch eatWatch = StopWatches.INSTANCE.getWatch("eat"); 40 | eatWatch.start(); 41 | Optional>> oPair = AnswerType.eatAndQuestionWord(question); 42 | eatWatch.stop(); 43 | eats = EnumSet.allOf(AnswerType.class); 44 | if(!oPair.isPresent()) {log.warn("no question word found for question '"+question+"': no answer type restriction possible.");} 45 | else 46 | { 47 | String questionWord = oPair.get().a; 48 | eats = oPair.get().b; 49 | question = question.substring(questionWord.length()); 50 | } 51 | 52 | String noStop = question; 53 | if(Config.INSTANCE.removeStopWords) 54 | { 55 | // noStop = Stopwords.remove(noStop, Stopwords.FINLAND_AID_WORDS); 56 | //noStop = Stopwords.remove(noStop, Stopwords.PROPERTY_WORDS); 57 | noStop = Stopwords.remove(noStop, Stopwords.STOPWORDS); 58 | } 59 | if(!question.equals(noStop)) {log.info("removed stop words, result: "+noStop);} 60 | StopWatch detectWatch = StopWatches.INSTANCE.getWatch("detect"); 61 | detectWatch.start(); 62 | Pair detectResult = detect(noStop); 63 | detectWatch.stop(); 64 | StopWatch parseWatch = StopWatches.INSTANCE.getWatch("parse"); 65 | parseWatch.start(); 66 | root = StanfordNlp.parse(detectResult.b); 67 | parseWatch.stop(); 68 | detectFragment=detectResult.a; 69 | } 70 | 71 | /** @param question the full question used on all detectors 72 | * @return the combined detected fragment and the leftover phrase 73 | */ 74 | Pair detect(final String question) 75 | { 76 | Fragment allDetectorFragment = null; 77 | 78 | String reducedPhrase = question; 79 | for(Detector detector: Detector.DETECTORS) 80 | { 81 | Set detectorResults = detector.detect(cube,reducedPhrase); 82 | if(!detectorResults.isEmpty()) 83 | { 84 | for(Fragment fragment: detectorResults) 85 | { 86 | reducedPhrase = question.replace(fragment.phrase,"").replace(" ", " "); 87 | if(reducedPhrase.equals(question)) { 88 | throw new IllegalArgumentException("fragment phrase '"+fragment.phrase+"' not found in whole phrase "+question); 89 | } 90 | log.debug("Detector "+detector.getClass().getSimpleName()+" matched part: '"+fragment.phrase+"', left over phrase: "+reducedPhrase); 91 | } 92 | // keep results from earlier used detectors 93 | if(allDetectorFragment!=null) {detectorResults.add(allDetectorFragment);} 94 | allDetectorFragment = Fragment.combine(new ArrayList<>(detectorResults)); 95 | } 96 | } 97 | if(allDetectorFragment==null) {return new Pair<>(new Fragment(cube,""),reducedPhrase);} 98 | return new Pair<>(allDetectorFragment,reducedPhrase); 99 | } 100 | 101 | 102 | public Match identify(String phrase/*, int phraseIndex*/) 103 | { 104 | StopWatch scoreWatch = StopWatches.INSTANCE.getWatch("score"); 105 | scoreWatch.start(); 106 | Map nameRefs = Scorers.scorePhraseProperties(cube,phrase); 107 | Map valueRefs = Scorers.scorePhraseValues(cube,phrase); 108 | scoreWatch.stop(); 109 | return new Match(phrase,/* phraseIndex, */nameRefs, valueRefs); 110 | } 111 | 112 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/template/WeightedTemplator.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.template; 2 | 3 | import de.konradhoeffner.commons.ListTree; 4 | import edu.stanford.nlp.trees.Tree; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.aksw.cubeqa.Cube; 7 | import static org.aksw.cubeqa.StanfordTrees.phrase; 8 | import java.util.*; 9 | 10 | /** Generates the Cube Template. */ 11 | @Slf4j 12 | public class WeightedTemplator extends Templator 13 | { 14 | public WeightedTemplator(Cube cube) {super(cube);} 15 | 16 | public Template buildTemplate(String question) 17 | { 18 | preprocess(question); 19 | ListTree fragments = visitRecursive(root); 20 | 21 | //Template finalTemplate = Fragment.combine(Arrays.asList(rootFragment, detectFragment)).toTemplate(eats).get(); 22 | //return finalTemplate; 23 | return Fragment.combine(Arrays.asList(detectFragment,Fragment.combine(fragments.items()))).toTemplate(eats).get(); 24 | } 25 | 26 | protected ListTree visitRecursive(Tree tree) 27 | { 28 | ListTree fragments = new ListTree<>(new Fragment(cube, phrase(tree))); 29 | visitRecursive(tree, fragments, 0); 30 | return fragments; 31 | } 32 | 33 | /** The recursive algorithm. */ 34 | protected void visitRecursive(Tree parseTree, ListTree parent, int depth) 35 | { 36 | String phrase = phrase(parseTree); 37 | if (phrase.length() < PHRASE_MIN_LENGTH) 38 | { 39 | log.trace("phrase less than " + PHRASE_MIN_LENGTH + " characters, skipped: " + phrase); 40 | parent.children.add(new ListTree<>(new Fragment(cube, phrase))); 41 | // don't go deeper, we are too short already 42 | return; 43 | } 44 | while (parseTree.children().length == 1) 45 | { 46 | parseTree = parseTree.getChild(0); // skipping down 47 | } 48 | 49 | ListTree cursor = null; 50 | if(depth==0) {cursor=parent;} // don't match the full question (could be changed later for higher recall, lower precision?) 51 | else 52 | { 53 | if (phrase.length() > PHRASE_MAX_LENGTH) 54 | { 55 | log.trace("phrase '" + phrase + "' more than " + PHRASE_MAX_LENGTH + " characters, skipping matching try"); 56 | // as with the minimum length, we add an empty fragment, but this time we keep going down 57 | cursor = new ListTree<>(new Fragment(cube, phrase)); 58 | } else 59 | { 60 | log.trace("visiting tree " + parseTree); 61 | log.trace("Phrase \"" + phrase + "\"..."); 62 | Match matchResult = identify(phrase); 63 | if (!matchResult.isEmpty()) 64 | { 65 | log.trace("matched to " + matchResult); 66 | cursor = new ListTree<>(matchResult.toFragment(cube)); 67 | // in the greedy algorithm we return here, because we found something, but for the weighted we keep looking for better options 68 | // don't use leftovers right now because we keep recursing anyways 69 | // did we match everything or just part? 70 | // String leftover = phrase.replaceAll(matchResult.phrase, "").trim(); 71 | // if (!leftover.isEmpty()) 72 | // { 73 | // // this leftover child will not be recursed but maybe it can be reused later in combination with something else 74 | // parent.add(new Fragment(cube, leftover)); 75 | // } 76 | } 77 | else 78 | { 79 | cursor = new ListTree<>(new Fragment(cube, phrase)); 80 | } 81 | } 82 | parent.children.add(cursor); 83 | } 84 | 85 | // Match subtrees now, regardless of whether we matched something in this step or not. 86 | log.trace("looking at subtrees"); 87 | //List childFragments = parseTree.getChildrenAsList().stream().flatMap(t->visitRecursive(t).stream()).collect(Collectors.toList()); 88 | for (Tree child : parseTree.getChildrenAsList()) 89 | { 90 | visitRecursive(child, cursor,depth+1); 91 | } 92 | } 93 | 94 | // Fragment combine(ListTree fragments) 95 | // { 96 | // List childFragmentsWithRefs = fragments.items().stream().filter(f -> !f.isEmpty()).collect(Collectors.toList()); 97 | // List childFragmentsWithoutRefs = new LinkedList<>(fragments.items()); 98 | // childFragmentsWithoutRefs.removeAll(childFragmentsWithRefs); 99 | // 100 | // List usefulChildFragments = new ArrayList<>(childFragmentsWithRefs); 101 | // // we could throw unmatched fragments away but we try to combine them into something useful first 102 | //// if (!childFragmentsWithoutRefs.isEmpty()) 103 | //// { 104 | //// String childFragmentsWithoutRefsPhrase = Fragment.combine(childFragmentsWithoutRefs).phrase; 105 | //// // too small, throw away 106 | //// if (childFragmentsWithoutRefsPhrase.length() < 3) 107 | //// { 108 | //// log.trace("unmatched fragment \"" + childFragmentsWithoutRefsPhrase + "\" length < 3, skipped"); 109 | //// } 110 | //// // it's not small, but is it useful? do all the unmatched fragments match something? 111 | //// else 112 | //// { 113 | //// // TODO check partial combinations too 114 | //// Match unmatchedResult = identify(childFragmentsWithoutRefsPhrase); 115 | //// log.trace("unmatched fragments with phrase \"" + unmatchedResult.phrase + "\""); 116 | //// // unmatchedFragments.stream().map(f->f.phrase).collect(Collectors.toList()); 117 | //// 118 | //// if (unmatchedResult.isEmpty()) 119 | //// { 120 | //// log.trace("unmatched fragment combination does not match anything."); 121 | //// } else 122 | //// { 123 | //// log.trace("unmatched fragment combination matched to " + unmatchedResult); 124 | //// usefulChildFragments.add(unmatchedResult.toFragment(cube)); 125 | //// } 126 | //// } 127 | //// } 128 | // if (usefulChildFragments.isEmpty()) 129 | // { 130 | // log.trace("no match found for phrase \"" + phrase + "\""); 131 | // parent.add(new Fragment(cube, phrase)); 132 | // return parent; 133 | // } else 134 | // { 135 | // parent.add(Fragment.combine(usefulChildFragments)); 136 | // return parent; 137 | // } 138 | // } 139 | 140 | /* 141 | public static Fragment combine(ListTree fragments) 142 | { 143 | StopWatch fragmentCombineWatch = StopWatches.INSTANCE.getWatch("fragmentcombine"); 144 | fragmentCombineWatch.start(); 145 | // if(fragments.isEmpty()) 146 | //{throw new IllegalArgumentException("empty fragment set, can't combine");} 147 | // {log.warn("empty fragment set, combination empty");} 148 | 149 | // *** new sets are unions over all fragment sets ********************************************************** 150 | if(fragments.nodes().stream().map(f->f.item.cube.uri).collect(Collectors.toSet()).size()>1) { 151 | throw new IllegalArgumentException("different cube uris, can't combine"); 152 | } 153 | // TODO join restrictions if possible (e.g. intervals for numericals, detect impossibilities) 154 | Set restrictions = new HashSet<>(); 155 | Set answerProperties = new HashSet<>(); 156 | Set perProperties = new HashSet<>(); 157 | Set aggregates = new HashSet<>(); 158 | Set matchResults = new HashSet<>(); 159 | fragments.nodes().forEach(node-> 160 | { 161 | Fragment f = node.item; 162 | restrictions.addAll(f.restrictions); 163 | answerProperties.addAll(f.answerProperties); 164 | perProperties.addAll(f.perProperties); 165 | aggregates.addAll(f.aggregates); 166 | }); 167 | // *** phrases are added in list order with space in between *********************************************** 168 | String combinedPhrase = fragments.stream().map(Fragment::getPhrase).reduce("", (a,b)->a+" "+b).trim(); 169 | Fragment fragment = new Fragment(fragments.iterator().next().cube,combinedPhrase, 170 | restrictions, answerProperties, perProperties, aggregates,matchResults); 171 | 172 | // *** combining match results ***************************************************************************** 173 | // **** get all properties that are not yet assigned but somewhere referenced both as name and as value 174 | // strictly, they should be referenced in different matchresult objects but that calculation would be too complicated, sort that out later 175 | Set properties = fragment.unreferredProperties(); 176 | Set fragmentsMatchResults = fragments.stream().map(Fragment::getMatches).map(Set::stream).flatMap(id->id).collect(Collectors.toSet()); 177 | properties.retainAll(fragmentsMatchResults.stream().map(mr->mr.nameRefs.keySet()).flatMap(Set::stream).collect(Collectors.toSet())); 178 | properties.retainAll(fragmentsMatchResults .stream().map(mr->mr.valueRefs.keySet()).flatMap(Set::stream).collect(Collectors.toSet())); 179 | for(ComponentProperty property: properties) 180 | { 181 | // greedy algorithm, does not work when highestNameRef has the only value Ref TODO intelligently check more pairs 182 | // we should always get a highest name in the first iteration per construction of fragmentsMatchResults 183 | // but later this one can be used for another property, so use ifpresent 184 | 185 | fragmentsMatchResults.stream().max(Comparator.comparingDouble(mr->mr.nameRefs.get(property)==null?0:mr.nameRefs.get(property))) 186 | .ifPresent(highestNameRef-> 187 | { 188 | fragmentsMatchResults.stream().filter(mr->mr!=highestNameRef) 189 | .max(Comparator.comparingDouble(mr->mr.valueRefs.get(property)==null?0:mr.valueRefs.get(property).score)) 190 | .ifPresent(highestValueRef-> 191 | { 192 | if(highestNameRef.nameRefs.get(property)!=null&&highestValueRef.valueRefs.get(property)!=null) 193 | { 194 | double score = highestNameRef.nameRefs.get(property)*highestValueRef.valueRefs.get(property).score; 195 | if(score>MIN_COMBINED_SCORE) 196 | { 197 | restrictions.add(highestValueRef.valueRefs.get(property).toRestriction()); 198 | fragmentsMatchResults.remove(highestNameRef); 199 | fragmentsMatchResults.remove(highestValueRef); 200 | } 201 | } 202 | }); 203 | }); 204 | } 205 | // add back all non used match results 206 | matchResults.addAll(fragmentsMatchResults); 207 | // **** end combining match resuls ************************************************************************* 208 | 209 | // Set nameValue = this.nameRefs.keySet(); 210 | // nameValue.retainAll(otherResult.valueRefs.keySet()); 211 | 212 | fragmentCombineWatch.stop(); 213 | return fragment; 214 | } 215 | */ 216 | 217 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/cubeqa/template/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | /** 5 | * @author konrad 6 | * 7 | */ 8 | package org.aksw.cubeqa.template; -------------------------------------------------------------------------------- /src/main/java/org/aksw/openqa/component/answerformulation/queryparser/impl/CubeQaQueryParser.java: -------------------------------------------------------------------------------- 1 | package org.aksw.openqa.component.answerformulation.queryparser.impl; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | import java.util.List; 6 | import java.util.Map; 7 | import org.aksw.cubeqa.Algorithm; 8 | import org.aksw.cubeqa.Config; 9 | import org.aksw.cubeqa.Cube; 10 | import org.aksw.cubeqa.index.CubeIndex; 11 | import org.aksw.openqa.Properties; 12 | import org.aksw.openqa.component.answerformulation.AbstractQueryParser; 13 | import org.aksw.openqa.component.context.IContext; 14 | import org.aksw.openqa.component.param.IParamMap; 15 | import org.aksw.openqa.component.param.IResultMap; 16 | import org.aksw.openqa.component.param.ResultMap; 17 | import org.aksw.openqa.component.providers.impl.ServiceProvider; 18 | import org.aksw.openqa.component.service.cache.ICacheService; 19 | import org.kohsuke.args4j.CmdLineParser; 20 | import org.kohsuke.args4j.MapParser; 21 | 22 | /** @author {@linkplain http://konradhoeffner.de} */ 23 | public class CubeQaQueryParser extends AbstractQueryParser { 24 | 25 | // private static Logger logger = Logger.getLogger(CubeQaQueryParser.class); 26 | // 27 | // // Component params 28 | public final static String END_POINT_PARAM = "END_POINT"; 29 | public final static String DEFAULT_GRAPHS_PARAM = "DEFAULT_GRAPH"; 30 | // 31 | public final static String CACHE_CONTEXT = "cubeqa"; 32 | 33 | public CubeQaQueryParser(Map params) 34 | { 35 | super(params); 36 | } 37 | 38 | Algorithm algorithm = null; 39 | 40 | @Override 41 | public boolean canProcess(IParamMap token) { 42 | String q = (String) token.getParam(Properties.Literal.TEXT); 43 | return q != null; 44 | } 45 | // 46 | // @SuppressWarnings("unchecked") 47 | @Override 48 | public List process(IParamMap paramMap, ServiceProvider serviceProvider, IContext context) throws Exception 49 | { 50 | ICacheService cacheService = serviceProvider.get(ICacheService.class); 51 | String question = (String) paramMap.getParam(Properties.Literal.TEXT); 52 | String sparqlQuery = cacheService.get(CACHE_CONTEXT, question, String.class); 53 | // cache miss 54 | if(sparqlQuery == null) 55 | { 56 | List uris = CubeIndex.INSTANCE.getCubeUris(question); 57 | if(uris.isEmpty()) {return Collections.emptyList();} 58 | String cubeName = Cube.linkedSpendingCubeName(uris.get(0)); 59 | sparqlQuery = algorithm.template(cubeName, question).sparqlQuery(); 60 | cacheService.put(CACHE_CONTEXT, question, sparqlQuery); 61 | } 62 | 63 | List results = new ArrayList(); 64 | 65 | ResultMap r = new ResultMap(); 66 | r.setParam(Properties.SPARQL, sparqlQuery); 67 | results.add(r); 68 | return results; 69 | } 70 | 71 | @Override 72 | public void setProperties(Map params) 73 | { 74 | // String endPoint = (String) params.get(END_POINT_PARAM); 75 | // TODO: get properties from config 76 | // TODO: super.setProperties first or last or doesn't matter? ask edgard 77 | super.setProperties(params); // saving parameters into the Interpreter 78 | // args4j is made for command line options (-this -that) but we can transform our map in such a string so we don't have to assign each parameter by hand 79 | MapParser parser = new MapParser(new CmdLineParser(Config.INSTANCE)); 80 | parser.parse(params); 81 | } 82 | 83 | @Override public void startup() 84 | { 85 | algorithm = new Algorithm(); 86 | } 87 | // 88 | // @Override 89 | // public void shutdown() { 90 | // } 91 | 92 | @Override 93 | public String getVersion() { 94 | return "0.9"; 95 | } 96 | 97 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/openqa/component/answerformulation/queryparser/impl/CubeQaQueryParserFactory.java: -------------------------------------------------------------------------------- 1 | package org.aksw.openqa.component.answerformulation.queryparser.impl; 2 | 3 | import java.util.Map; 4 | import org.aksw.openqa.component.answerformulation.AbstractQueryParserFactory; 5 | import org.aksw.openqa.component.answerformulation.IQueryParser; 6 | 7 | public class CubeQaQueryParserFactory extends AbstractQueryParserFactory 8 | { 9 | @Override public IQueryParser create(Map params) 10 | { 11 | return create(CubeQaQueryParser.class, params); 12 | } 13 | } -------------------------------------------------------------------------------- /src/main/java/org/aksw/openqa/component/answerformulation/queryparser/impl/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | /** 5 | * @author konrad 6 | * 7 | */ 8 | package org.aksw.openqa.component.answerformulation.queryparser.impl; -------------------------------------------------------------------------------- /src/main/java/org/kohsuke/args4j/MapParser.java: -------------------------------------------------------------------------------- 1 | package org.kohsuke.args4j; 2 | 3 | import java.util.Map; 4 | import java.util.stream.Collectors; 5 | 6 | import lombok.SneakyThrows; 7 | 8 | /** Co-opt args4j CmdLineParser to accept parameter maps instead. */ 9 | public class MapParser 10 | { 11 | private CmdLineParser parser; 12 | 13 | public MapParser(CmdLineParser parser) {this.parser=parser;} 14 | 15 | @SneakyThrows 16 | public void parse(Map args) 17 | { 18 | parser.parseArgument( 19 | args.entrySet().stream().map(e->"-"+e.getKey()+"="+e.getValue()).collect(Collectors.toList())); 20 | } 21 | } -------------------------------------------------------------------------------- /src/main/resources/META-INF/org.aksw.openqa.component.answerformulation.queryparser.impl.CubeQaQueryParser.inf: -------------------------------------------------------------------------------- 1 | label=CubeQaQueryParser 2 | author=Konrad Höffner 3 | contact=http://aksw.org/KonradHoeffner 4 | description=CubeQA plugin for OpenQA. See http://aksw.org/Projects/CubeQA.html. 5 | license=GPLv3 -------------------------------------------------------------------------------- /src/main/resources/META-INF/services/org.aksw.openqa.component.answerformulation.IQueryParserFactory: -------------------------------------------------------------------------------- 1 | org.aksw.openqa.component.answerformulation.queryparser.impl.CubeQaQueryParserFactory -------------------------------------------------------------------------------- /src/main/resources/META-INF/services/org.aksw.openqa.component.answerformulation.queryparser.impl.CubeQaQueryParser.ini: -------------------------------------------------------------------------------- 1 | endpoint=http://linkedspending.aksw.org/sparql 2 | graphs=http://linkedgeodata.org/;http://linkedspending.aksw.org/;http://dbpedia.org 3 | intervalMinSimilarity=0.3 4 | indexNonExactMatchMinLength=6 5 | indexMinLuceneScore = 3 6 | indexMinScore = 0.4 7 | placeMinScore = 0.7 8 | scorerPropertyNameMinScore = 0.6 9 | indexQueries = BOTH 10 | boostTemporal = 0.99 11 | boostNumeric = 0.98 12 | boostString = 0.95 -------------------------------------------------------------------------------- /src/main/resources/aggregatemapping.tsv: -------------------------------------------------------------------------------- 1 | maximum max 2 | max max 3 | min min 4 | minimum min 5 | sum sum 6 | total sum 7 | average avg 8 | the biggest max 9 | the smallest min 10 | number of count 11 | how many count -------------------------------------------------------------------------------- /src/main/resources/finland-aid/manuallabels.tsv: -------------------------------------------------------------------------------- 1 | http://linkedspending.aksw.org/ontology/finland-aid-recipient-country country 2 | http://linkedspending.aksw.org/ontology/finland-aid-amount money aid amount of aid 3 | http://linkedspending.aksw.org/ontology/DateComponentSpecification time date 4 | -------------------------------------------------------------------------------- /src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | cubeqa.log 8 | %m%n 9 | 10 | cubeqa%i.log.zip 11 | 1 12 | 3 13 | 14 | 15 | 16 | 17 | 18 | System.out 19 | %.-1level %c %m%n 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/main/resources/questions-finnland-aid.txt: -------------------------------------------------------------------------------- 1 | What was the average aid to environment per month in year 2010? 2 | What is the average yearly aid over the Finnfund channel? 3 | How much money was invested to strengthen civil society in Yemen? 4 | How much is committed in Ethiopia? 5 | How much does Peru receive for drinking water supply a year? 6 | How many countries have extended amounts of > 1000000 $ per year? 7 | What was the average aid to Egypt over the last 10 years? 8 | Which were the top 10 aided countries in Europe in 2011? 9 | How much money give Finnish Red Cross on Malaria Control? 10 | What is the amount of aid and the amount of commitments per country per year? 11 | How much money did the Egyptian government receive for Disaster prevention and preparedness? 12 | What's the aid of the 10 richest and 10 poorest countries? 13 | How much did Uruguay receive? 14 | How much aid is received by Zambia on a single day? 15 | How much money goes into food crop production over time? 16 | Which type of sector is receiving the most in a particular country? 17 | In South and Central Asia, how much biodiversity aid is there? 18 | How many percent of main sector aid of a country is spent on that countries administrative costs? 19 | For what is the money, invested into Sierra Leona, used for? 20 | How much money Embassy of Finland contribute to Egyptian projects? 21 | Where goes the aid? 22 | How much money receives each Asian country from Fida International? 23 | How much education aid African countries get per year? 24 | What is average aid amount per aid sector? 25 | How much money gets each country in 2008? 26 | How much extended amounts are given to Tajikistan for Rescheduling and refinancing? 27 | Which country has the highest amount of commitments? 28 | How much air receives a country? 29 | Top 10 aid receivers in America? 30 | What is the total biodiversity aid from all sectors for countries with populations greater than 10,000,000? 31 | Where is the biggest aid to environment? 32 | How much money Nepal receives for Environmental policy and administrative management? -------------------------------------------------------------------------------- /src/main/resources/questions.txt: -------------------------------------------------------------------------------- 1 | What was the average student grade per semester in year 2010? 2 | What is the average monthly income of a German citizen? 3 | How much money was invested to fight bicycle thefts in Leipzig? 4 | How many citizens live in a ? 5 | How much does Germany spend on research a year? 6 | How many diseases have a rate of >100 deaths per year? 7 | What was the average inflation in Germany over the last 10 years? 8 | Which were the top 10 funded research institutions in Europe in 2013? 9 | How much money spend on ? 10 | What is the number of deaths and the number of clinical trials per disease per year? 11 | How much money did the German government spend for infrastructure projects in 2013? 12 | What's the gross domestic product of the 10 richest and 10 poorest countries? 13 | How much did building cost? 14 | How many of the current drugs being sold in developing countries are making profits? 15 | How many kids are born in Berlin on a single day? 16 | How much money goes into police over time? 17 | Which type of products are costing the most to produce in a particular country? 18 | In Germany, how many hospitals are there? 19 | How many percent of tax money of a person is spend on that person's utilities (including public means)? 20 | For what is the money, invested in the police, used for? 21 | How much money Germany contribute to European projects? 22 | Where goes my taxes? 23 | How much money sends each EU country to the EU? 24 | How much European projects German Universities get per year? 25 | What is average energy use per income level? 26 | How much money gets each country from the EU? 27 | How many foreign researchers are working in Germany by governmental fund? 28 | Which geographical region has the highest rate of population growth? 29 | How much earns a politician? 30 | Top 10 taxpayer companies in Germany? 31 | What is the total death rate for all diseases for countries with populations greater than 10,000,000? 32 | Where is the biggest per person income? 33 | How much money Germany spend to support other countries? -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/AggregateMappingTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import java.util.Collections; 5 | import org.aksw.cubeqa.detector.Aggregate; 6 | import org.aksw.cubeqa.detector.AggregateMapping; 7 | import org.junit.jupiter.api.Test; 8 | 9 | public class AggregateMappingTest 10 | { 11 | 12 | @Test public void testFind() 13 | { 14 | assertEquals(AggregateMapping.aggregatesContained("What was the average aid to environment per month in year 2010?"),Collections.singleton(Aggregate.AVG)); 15 | assertEquals(AggregateMapping.aggregatesContained("What is the total biodiversity aid from all sectors for countries with populations greater than 10,000,000?"),Collections.singleton(Aggregate.SUM)); 16 | assertEquals(AggregateMapping.aggregatesContained("How much does Peru receive for drinking water supply a year?"),Collections.EMPTY_SET); 17 | } 18 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/AlgorithmTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import static org.junit.jupiter.api.Assertions.assertTrue; 5 | import org.aksw.cubeqa.template.Template; 6 | import org.junit.jupiter.api.Test; 7 | import org.apache.jena.query.ResultSet; 8 | 9 | import lombok.extern.slf4j.Slf4j; 10 | 11 | @Slf4j 12 | public class AlgorithmTest 13 | { 14 | final String[] questions = 15 | { 16 | "How much did the Philippines receive in the year of 2007?", 17 | "How much money was given to strengthen civil society in Yemen?", 18 | "How much did the top 10 aided countries get in 2008?", 19 | // "What was the average aid to environment per month in year 2010?" 20 | // ,"How much wood would a wood chuck chuck?" 21 | }; 22 | 23 | // TODO: find out why the sector is not found in AlgorithmTest even when boostString is set to 0.1 (in ObjectPropertyScorerTest it works) 24 | @Test public void testAnswer() 25 | { 26 | // for(String question: questions) 27 | String question = questions[0]; 28 | { 29 | Template t = new Algorithm().template("finland-aid",question); 30 | ResultSet rs = t.cube.sparql.select(t.sparqlQuery()); 31 | assertTrue(rs.hasNext()); 32 | log.debug(t.sparqlQuery()); 33 | log.debug(rs.getResultVars().get(0)); 34 | if(question==questions[1]) assertEquals(rs.next().get(rs.getResultVars().get(0)).asLiteral().getInt(),180000); 35 | } 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/AnswerTypeTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import static org.aksw.cubeqa.AnswerType.*; 5 | import java.util.EnumSet; 6 | import org.junit.jupiter.api.Test; 7 | 8 | public class AnswerTypeTest 9 | { 10 | public void test(String question, EnumSet types) 11 | { 12 | assertEquals(ofQuestion(question),types); 13 | } 14 | 15 | public void test(String question, AnswerType type) 16 | { 17 | assertEquals(ofQuestion(question),EnumSet.of(type)); 18 | } 19 | 20 | @Test public void testOfQuestion() 21 | { 22 | // TODO: recheck all with the table from the paper 23 | test("How much aid receives a country?",UNCOUNTABLE); 24 | test("What is the total Dignity International aid from all sectors?",EnumSet.of(UNCOUNTABLE,COUNTABLE, TEMPORAL,LOCATION, ENTITY)); 25 | test("How much money Nepal receives for Environmental policy and administrative management?",UNCOUNTABLE); 26 | test("Top 10 aid receiving geographic areas in south east and central asia?",EnumSet.of(UNCOUNTABLE,COUNTABLE, TEMPORAL,LOCATION, ENTITY)); 27 | test("Which country has the lowest amount of commitments?",EnumSet.of(TEMPORAL,LOCATION, ENTITY)); 28 | test("How many countries had amounts of more than 1000000 € in 2010?",EnumSet.of(COUNTABLE,COUNT)); 29 | test("Where is the biggest aid for medical services?",LOCATION); 30 | test("When did Paraguy get money from the Finish Red Cross?",TEMPORAL); 31 | test("Did the Ukraine receive money in 2009?",AFFIRMATIVE); 32 | test("Was Ethiopia given money for primary education?",AFFIRMATIVE); 33 | } 34 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/ComponentPropertyTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa; 2 | 3 | import java.util.Set; 4 | import java.util.stream.Collectors; 5 | import org.aksw.cubeqa.property.ComponentProperty; 6 | import org.junit.jupiter.api.Disabled; 7 | import org.junit.jupiter.api.Test; 8 | import org.simmetrics.StringMetric; 9 | import org.simmetrics.metrics.StringMetrics; 10 | import lombok.extern.slf4j.Slf4j; 11 | import static org.junit.jupiter.api.Assertions.*; 12 | 13 | @Slf4j 14 | public class ComponentPropertyTest 15 | { 16 | @Test public void testVar() 17 | { 18 | Cube c = Cube.finlandAid(); 19 | Set vars = c.properties.values().stream().map(p->p.var).collect(Collectors.toSet()); 20 | for(ComponentProperty p: c.properties.values()) {log.debug(p.var+" "+p.uri);} 21 | log.debug(Cube.finlandAid().properties.get("http://linkedspending.aksw.org/ontology/finland-aid-amount").var); 22 | log.debug("{}",vars); 23 | assertEquals(vars.size(),c.properties.size()); 24 | } 25 | 26 | // does not seem to be available by default in JUnit 4 or 5 27 | static void assertGreaterOrEquals(double large, double small) {assertTrue(large>=small,large+" is not larger than or equal to "+small);} 28 | static void assertGreater(double large, double small) {assertTrue(large>small,large+" is not larger than "+small);} 29 | static void assertSmaller(double small, double large) {assertTrue(small0.6); 51 | } 52 | } 53 | 54 | @Disabled 55 | @Test public void testDistances() 56 | { 57 | StringMetric similarity = StringMetrics.qGramsDistance(); 58 | System.out.println(similarity.compare("amountsextended","amountsextended")); 59 | System.out.println(similarity.compare("amounts extended","extended amounts")); 60 | System.out.println(similarity.compare("amountsextended","extendedamounts")); 61 | System.out.println(similarity.compare("nestle","nestlé")); 62 | System.out.println(similarity.compare("nerf","berg")); 63 | } 64 | 65 | @Test public void testRanges() 66 | { 67 | Cube c = Cube.finlandAid(); 68 | // log.debug(c.properties.values()); 69 | Set ranges = c.properties.values().stream().map(p->p.range).collect(Collectors.toSet()); 70 | assertTrue(ranges.contains("http://www.w3.org/2001/XMLSchema#date")); 71 | assertTrue(ranges.contains("http://www.w3.org/2001/XMLSchema#string")); 72 | assertTrue(ranges.contains("http://www.w3.org/2001/XMLSchema#gYear")); 73 | // log.debug(ranges); 74 | } 75 | 76 | // @Test public void testGetInstance() 77 | // { 78 | // ComponentProperty amount = ComponentProperty.getInstance(Cube.getInstance("finland-aid"), "http://linkedspending.aksw.org/ontology/finland-aid-amounts-extended"); 79 | // log.debug(amount.range); 80 | // log.debug(amount.labels); 81 | // 82 | // } 83 | 84 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/CubeSparqlTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import org.junit.jupiter.api.Test; 5 | 6 | public class CubeSparqlTest 7 | { 8 | 9 | @Test public void testLinkedSpending() 10 | { 11 | CubeSparql sparql = CubeSparql.getLinkedSpendingInstanceForName("finland-aid"); 12 | String query = "SELECT (COUNT(DISTINCT(?dim)) AS ?count) {?dim a qb:DimensionProperty}"; 13 | int dimensions = sparql.select(query).nextSolution().get("count").asLiteral().getInt(); 14 | assertEquals(4,dimensions); 15 | } 16 | 17 | @Test public void testPageSize() 18 | { 19 | String query = "select ?s {?s ?p ?o.} limit 1000"; 20 | assertEquals(1000,Cube.finlandAid().sparql.select(query).size(),0); 21 | } 22 | 23 | 24 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/CubeTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertTrue; 4 | import java.io.File; 5 | import org.junit.jupiter.api.Test; 6 | 7 | import lombok.extern.slf4j.Slf4j; 8 | 9 | @Slf4j 10 | public class CubeTest 11 | { 12 | 13 | @Test public void testSerialization() 14 | { 15 | Cube cube = Cube.getInstance("finland-aid"); 16 | assertTrue(new File(Files.localFolder("cache"),"finland-aid.ser").exists()); 17 | log.debug("{}",cube.properties.size()); 18 | log.debug("{}",cube.properties); 19 | } 20 | 21 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/JenaNanBugTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertTrue; 4 | import org.junit.jupiter.api.Test; 5 | import org.apache.jena.query.QuerySolution; 6 | import org.apache.jena.query.ResultSet; 7 | import org.apache.jena.sparql.exec.http.QueryExecutionHTTP; 8 | 9 | public class JenaNanBugTest 10 | { 11 | 12 | @Test public void test() 13 | { 14 | final String PREFIXES = "PREFIX xsd: PREFIX qb: "; 15 | final String query = PREFIXES+" select (min(xsd:double(?d)) as ?min) " 16 | + "{?o a qb:Observation. ?o qb:dataSet .?o ?d.} limit 1"; 17 | try(QueryExecutionHTTP qe = QueryExecutionHTTP.service("http://linkedspending.aksw.org/sparql", query)) 18 | { 19 | ResultSet rs = qe.execSelect(); 20 | QuerySolution qs = rs.nextSolution(); 21 | assertTrue(qs.get("min").asLiteral().getDouble()==0); 22 | } 23 | } 24 | 25 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/LoggingTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa; 2 | 3 | import org.junit.jupiter.api.Test; 4 | import lombok.extern.slf4j.Slf4j; 5 | 6 | @Slf4j 7 | public class LoggingTest 8 | { 9 | 10 | @Test 11 | public void test() 12 | { 13 | log.trace("testing log trace"); 14 | log.debug("testing log debug"); 15 | log.info("testing log info"); 16 | log.warn("testing log warn"); 17 | log.error("testing log error"); 18 | } 19 | 20 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/ParserTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import org.aksw.cubeqa.template.StanfordNlp; 5 | import org.junit.jupiter.api.Test; 6 | import edu.stanford.nlp.trees.Tree; 7 | import lombok.extern.slf4j.Slf4j; 8 | 9 | public class ParserTest 10 | { 11 | @Test 12 | public void test() 13 | { 14 | Tree parsed = StanfordNlp.parse("How much did the top 10 aided countries get in 2008?"); 15 | assertEquals( 16 | "(ROOT (SBARQ (WHADJP (WRB How) (RB much)) (SQ (VBD did) (NP (DT the) (JJ top) (CD 10)) (VP (VBN aided) (SBAR (S (NP (NNS countries)) (VP (VBP get) (PP (IN in) (NP (CD 2008)))))))) (. ?)))", 17 | parsed.toString()); 18 | } 19 | 20 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/ReplacerTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import org.junit.jupiter.api.Test; 5 | 6 | public class ReplacerTest 7 | { 8 | 9 | @Test public void testReplace() 10 | { 11 | String original = "Which 1.5 thousand agencies in the 3,7 million Maldives have proposed expenditure amounts of more than 1 billion Maldivian rufiyaa in 2015?"; 12 | String replaced = "Which 1500 agencies in the 3700000 Maldives have proposed expenditure amounts of more than 1000000000 Maldivian rufiyaa in 2015?"; 13 | assertEquals(replaced,Replacer.replace(original)); 14 | } 15 | 16 | } 17 | -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/benchmark/BenchmarkTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.benchmark; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import static org.junit.jupiter.api.Assertions.assertTrue; 5 | import java.io.File; 6 | import java.io.FileInputStream; 7 | import java.io.IOException; 8 | import org.aksw.cubeqa.Algorithm; 9 | import org.aksw.cubeqa.CubeSparql; 10 | import org.aksw.cubeqa.Files; 11 | import org.junit.jupiter.api.Test; 12 | import org.apache.jena.rdf.model.ResourceFactory; 13 | import org.apache.jena.vocabulary.RDF; 14 | 15 | public class BenchmarkTest 16 | { 17 | @Test public void testCompleteQuestion() 18 | { 19 | Question q = Benchmark.completeQuestion(CubeSparql.finlandAid(), "some string","ask {?s ?p ?o.}"); 20 | assertTrue(q.answers.size()==1); 21 | assertTrue(q.dataTypes.get("")==DataType.BOOLEAN); 22 | assertTrue(q.answers.iterator().next().get("").equals("true")); 23 | } 24 | 25 | @Test public void testEvaluate() 26 | { 27 | // TODO: choose a faster example, 10 s is too long for a test 28 | Benchmark.fromQald("qald6t3-train-v1.2").evaluate(new Algorithm(),6); 29 | } 30 | 31 | @Test public void testFromCsv() throws IOException 32 | { 33 | assertEquals(Benchmark.fromCsv("qald6t3-train-v1.2").questions.get(0).string,"How much was spent on public works and utilities by the Town of Cary in 2011?"); 34 | } 35 | 36 | @Test public void testNodeString() 37 | { 38 | assertEquals("http://www.w3.org/1999/02/22-rdf-syntax-ns#type",Benchmark.nodeString(RDF.type)); 39 | assertEquals("hello world",Benchmark.nodeString(ResourceFactory.createLangLiteral("hello world", "en"))); 40 | } 41 | 42 | @Test public void testFromQald() 43 | { 44 | Benchmark b = Benchmark.fromQald("finland-aid"); 45 | assertTrue(b.questions.size()==100); 46 | assertEquals("What was the average aid committed per month in year 2010?",b.questions.get(0).string); 47 | assertEquals(DataType.NUMBER,b.questions.get(0).dataTypes.get("")); 48 | assertTrue(b.questions.get(0).answers.iterator().next().get("").toString().startsWith("134145226.83")); 49 | } 50 | 51 | @Test public void testSaveAndLoadQald() throws IOException 52 | { 53 | Benchmark b = Benchmark.fromQald("finland-aid"); 54 | b.saveAsQald(new File(Files.localFolder("benchmark"),"test.xml")); 55 | Benchmark c = Benchmark.fromQald("finland-aid",new FileInputStream(new File(Files.localFolder("benchmark"),"test.xml"))); 56 | for(int i=0;i<100;i++) 57 | { 58 | Question q = b.questions.get(i); 59 | Question r = c.questions.get(i); 60 | // to get more targeted debug output in case of inequalities 61 | assertEquals(q.string,r.string); 62 | assertEquals(q.query,r.query); 63 | assertEquals(q.dataTypes,r.dataTypes); 64 | assertEquals(q.answers,r.answers); 65 | assertEquals(q,r); 66 | } 67 | } 68 | 69 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/benchmark/PerformanceTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.benchmark; 2 | 3 | public class PerformanceTest 4 | { 5 | 6 | // @Test public void test() 7 | // { 8 | // Performance p1 = Performance.performance(new HashSet<>(Arrays.asList("Alice","Trudy","Bob","John")), new HashSet<>(Arrays.asList("Alice","Marvin"))); 9 | // assertTrue(p1.equals(new Performance(0.5, 0.25,false))); 10 | // } 11 | 12 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/detector/AggregateDetectorTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.detector; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import static org.aksw.cubeqa.detector.Aggregate.*; 5 | import org.junit.jupiter.api.Test; 6 | 7 | public class AggregateDetectorTest 8 | { 9 | String[] questions = { 10 | "What is the total aid to the Anti Corruption Commission in the Maldives in 2015?", 11 | "What was the average Uganda health budget over all districts in 2014?", 12 | "What is the average salary of an Engineering Technician in Washington DC?", 13 | "What was the total budget on Technical Services in City of Toronto in 2009?" 14 | }; 15 | 16 | Aggregate[] aggregates = {SUM,AVG,AVG,SUM}; 17 | 18 | @Test public void testDetect() 19 | { 20 | for(int i=0; i fragments = HalfInfiniteIntervalDetector.INSTANCE.detect(cube,"How many countries had amounts of more than 1000000 € in 2010?"); 16 | Fragment fragment = Fragment.combine(fragments); 17 | assertTrue(fragment.getRestrictions().size()==1); 18 | Restriction restriction = fragment.getRestrictions().iterator().next(); 19 | assertEquals(restriction.getProperty().uri,"http://linkedspending.aksw.org/ontology/finland-aid-amount"); 20 | assertTrue(restriction.wherePatterns().size()==2); 21 | assertTrue(restriction.wherePatterns().toString().contains("> \"1000000")); 22 | } 23 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/detector/InPlaceDetectorTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.detector; 2 | 3 | import static org.junit.jupiter.api.Assertions.*; 4 | import java.util.Set; 5 | import org.aksw.cubeqa.Cube; 6 | import org.aksw.cubeqa.restriction.Restriction; 7 | import org.aksw.cubeqa.restriction.UriRestriction; 8 | import org.aksw.cubeqa.template.Fragment; 9 | import org.junit.jupiter.api.Test; 10 | 11 | import lombok.extern.slf4j.Slf4j; 12 | 13 | @Slf4j 14 | public class InPlaceDetectorTest 15 | { 16 | // TODO: add multi word places like "the United States of America". 17 | String[][] q = { 18 | {"How much money was given to strengthen civil society in Yemen?","in Yemen","https://openspending.org/finland-aid/recipient-country/ye"}, 19 | 20 | }; 21 | 22 | @Test public void testDetect() 23 | { 24 | for(int i=0;i fragments = InPlaceDetector.INSTANCE.detect(Cube.finlandAid(),q[i][0]); 27 | log.debug("{}",fragments); 28 | assertEquals(1,fragments.size()); 29 | Fragment f = fragments.stream().filter(ff->ff.getRestrictions().size()==1).findFirst().get(); 30 | Restriction r = f.getRestrictions().iterator().next(); // there should be only one 31 | assertEquals(UriRestriction.class,r.getClass()); // temporal restrictions are value restrictions with a filter 32 | assertTrue(r.getProperty().uri.equals("http://linkedspending.aksw.org/ontology/finland-aid-recipient-country")); 33 | assertTrue(r.wherePatterns().iterator().next().contains("")); 34 | assertEquals(q[i][1],f.getPhrase()); 35 | } 36 | 37 | } 38 | 39 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/detector/InYearDetectorTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.detector; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import static org.junit.jupiter.api.Assertions.assertTrue; 5 | import java.util.Set; 6 | import org.aksw.cubeqa.Cube; 7 | import org.aksw.cubeqa.restriction.Restriction; 8 | import org.aksw.cubeqa.restriction.ValueRestriction; 9 | import org.aksw.cubeqa.template.Fragment; 10 | import org.junit.jupiter.api.Test; 11 | 12 | public class InYearDetectorTest 13 | { 14 | String[][] q = { 15 | {"What was the average aid committed per month in year 2010?","in year 2010","2010"}, 16 | {"Which were the top 10 aided countries in 2011?","in 2011","2011"}, 17 | {"How much did the Philippines receive in the year of 2007?","in the year of 2007","2007"} 18 | }; 19 | 20 | @Test public void testDetect() 21 | { 22 | for(int i=0;i fragments = InYearDetector.INSTANCE.detect(Cube.finlandAid(),q[i][0]); 25 | assertEquals(1,fragments.size()); 26 | Fragment f = fragments.stream().filter(ff->ff.getRestrictions().size()==1).findFirst().get(); 27 | Restriction r = f.getRestrictions().iterator().next(); // there should be only one 28 | assertEquals(ValueRestriction.class,r.getClass()); // temporal restrictions are value restrictions with a filter 29 | assertTrue(r.getProperty().uri.equals("http://linkedspending.aksw.org/ontology/refYear")); 30 | assertTrue(r.wherePatterns().iterator().next().matches("(?s).*filter\\(year\\(\\?v[a-z0-9]+\\)="+q[i][2]+".*")); // correct year 31 | assertEquals(q[i][1],f.getPhrase()); 32 | } 33 | 34 | } 35 | 36 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/detector/PerTimeDetectorTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.detector; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertTrue; 4 | import org.aksw.cubeqa.Cube; 5 | import org.aksw.cubeqa.property.ComponentProperty; 6 | import org.junit.jupiter.api.Test; 7 | 8 | public class PerTimeDetectorTest 9 | { 10 | 11 | @Test public void testDetect() 12 | { 13 | ComponentProperty refYear = Cube.finlandAid().properties.get("http://linkedspending.aksw.org/ontology/refYear"); 14 | // ComponentProperty refMonth= Cube.finlandAid().properties.get("http://linkedspending.aksw.org/ontology/refMonth"); 15 | // ComponentProperty refDay = Cube.finlandAid().properties.get("http://linkedspending.aksw.org/ontology/refDay"); 16 | ComponentProperty refDate = Cube.finlandAid().properties.get("http://linkedspending.aksw.org/ontology/refDate"); 17 | 18 | assertTrue(PerTimeDetector.INSTANCE.detect(Cube.finlandAid(), "per year").iterator().next().getPerProperties().contains(refYear)); 19 | assertTrue(PerTimeDetector.INSTANCE.detect(Cube.finlandAid(), " per year").iterator().next().getPerProperties().contains(refYear)); 20 | assertTrue(PerTimeDetector.INSTANCE.detect(Cube.finlandAid(), " yearly.").iterator().next().getPerProperties().contains(refYear)); 21 | 22 | // refMonth is not used in the QALD 6 Task 3 Dataset 23 | // assertTrue(PerTimeDetector.INSTANCE.detect(Cube.finlandAid(), "monthly").iterator().next().getPerProperties().contains(refMonth)); 24 | // refDay is not used in the QALD 6 Task 3 Dataset 25 | // assertTrue(PerTimeDetector.INSTANCE.detect(Cube.finlandAid(), "daily").iterator().next().getPerProperties().contains(refDay)); 26 | assertTrue(PerTimeDetector.INSTANCE.detect(Cube.finlandAid(), "daily").iterator().next().getPerProperties().contains(refDate)); 27 | 28 | assertTrue(PerTimeDetector.INSTANCE.detect(Cube.finlandAid(), "hyper year").isEmpty()); 29 | assertTrue(PerTimeDetector.INSTANCE.detect(Cube.finlandAid(), "per yearo").isEmpty()); 30 | } 31 | 32 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/detector/QuestionWordDetectorTest.java: -------------------------------------------------------------------------------- 1 | //package org.aksw.cubeqa.detector; 2 | // 3 | //import static org.junit.jupiter.api.Assertions.assertTrue; 4 | //import org.junit.jupiter.api.Test; 5 | // 6 | //public class QuestionWordDetectorTest 7 | //{ 8 | // 9 | // @Test public void test() 10 | // { 11 | //// new AnswerTypeDetector(); 12 | // } 13 | // 14 | //} 15 | -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/detector/TopDetectorTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.detector; 2 | 3 | import static org.junit.jupiter.api.Assertions.*; 4 | import java.util.regex.Pattern; 5 | import org.aksw.cubeqa.Cube; 6 | import org.aksw.cubeqa.restriction.Restriction; 7 | import org.junit.jupiter.api.Test; 8 | 9 | import lombok.extern.slf4j.Slf4j; 10 | 11 | @Slf4j 12 | public class TopDetectorTest 13 | { 14 | /** uses CubeTemplateFragment.toString to test more easily, may break if CubeTemplateFragment.toString changes*/ 15 | @Test public void testDetect10HighestAmounts() 16 | { 17 | Cube cube = Cube.finlandAid(); 18 | { 19 | Restriction r = TopDetector.INSTANCE.detect(cube,"10 highest amounts").iterator().next().getRestrictions().iterator().next(); 20 | log.debug("{}",r); 21 | assertTrue(Pattern.matches("(?i).*order by DESC\\(.*\\) limit 10.*", r.orderLimitPatterns().iterator().next())); 22 | assertEquals("http://linkedspending.aksw.org/ontology/finland-aid-amount",r.getProperty().uri); 23 | } 24 | } 25 | 26 | @Test public void testDetect5LowAmounts() 27 | { 28 | Cube cube = Cube.finlandAid(); 29 | { 30 | Restriction r = TopDetector.INSTANCE.detect(cube,"5 lowest amounts").iterator().next().getRestrictions().iterator().next(); 31 | log.debug("{}",r); 32 | assertTrue(Pattern.matches("(?i).*order by ASC\\(.*\\) limit 5.*", r.orderLimitPatterns().iterator().next())); 33 | assertEquals("http://linkedspending.aksw.org/ontology/finland-aid-amount",r.getProperty().uri); 34 | } 35 | } 36 | 37 | @Test public void testDetectTop10AidedCountries() 38 | { 39 | Cube cube = Cube.finlandAid(); 40 | { 41 | Restriction r = TopDetector.INSTANCE.detect(cube,"top 10 aided countries").iterator().next().getRestrictions().iterator().next(); 42 | log.debug("{}",r); 43 | assertTrue(Pattern.matches("(?i).*order by DESC\\(.*\\) limit 10.*", r.orderLimitPatterns().iterator().next())); 44 | assertEquals("http://linkedspending.aksw.org/ontology/finland-aid-amount",r.getProperty().uri); 45 | } 46 | } 47 | // { 48 | // String ds = TopDetector.INSTANCE.detect(cube,"top 5 amounts").toString(); 49 | // assertTrue(Pattern.matches("(?i).*order by DESC\\(\\?v[0-9]+\\) limit 5.*", ds)); 50 | // assertTrue(ds.contains("http://linkedspending.aksw.org/ontology/finland-aid-amount ")); 51 | //} 52 | //{ 53 | // String ds = TopDetector.INSTANCE.detect(cube,"7 lowest extended amounts").toString(); 54 | // assertTrue(Pattern.matches("(?i).*order by ASC\\(\\?v[0-9]+\\) limit 7.*", ds)); 55 | // assertTrue(ds.contains("http://linkedspending.aksw.org/ontology/finland-aid-amounts-extended ")); 56 | //} 57 | 58 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/index/LabelIndexTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.index; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import static org.junit.jupiter.api.Assertions.assertTrue; 5 | import org.aksw.cubeqa.Cube; 6 | import org.aksw.cubeqa.property.ComponentProperty; 7 | import org.junit.jupiter.api.Test; 8 | import com.google.common.collect.Range; 9 | 10 | public class LabelIndexTest 11 | { 12 | 13 | @Test public void test() 14 | { 15 | Cube cube = Cube.getInstance("finland-aid"); 16 | ComponentProperty property = cube.properties.get("http://linkedspending.aksw.org/ontology/finland-aid-recipient-country"); 17 | 18 | assertTrue(Range.closed(0.5, 0.95).contains(property.scorer.score("Egyppt").get().score)); 19 | assertEquals(1,property.scorer.score("Egypt").get().score,0); 20 | } 21 | 22 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/index/SimilarityTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.index; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | public class SimilarityTest 6 | { 7 | 8 | @Test public void testSimilarity() 9 | { 10 | System.out.println(Similarity.similarity("amounts extended", "amounts extended")); 11 | System.out.println(Similarity.similarity("extended amounts", "amounts extended")); 12 | System.out.println(Similarity.similarity("Extended Amounts", "amounts extended")); 13 | System.out.println(Similarity.similarity("extended amount", "amounts extended")); 14 | System.out.println(Similarity.similarity("Philippines", "Phillipines")); 15 | System.out.println(Similarity.similarity("Malaysia, Philippines", "Phillipines")); 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/index/StemmerTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.index; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import org.apache.lucene.analysis.en.EnglishAnalyzer; 5 | import org.apache.lucene.queryparser.classic.ParseException; 6 | import org.apache.lucene.queryparser.classic.QueryParser; 7 | import org.junit.jupiter.api.Disabled; 8 | import org.junit.jupiter.api.Test; 9 | 10 | public class StemmerTest 11 | { 12 | EnglishAnalyzer en_an = new EnglishAnalyzer(); 13 | QueryParser parser = new QueryParser("", en_an); 14 | 15 | @Test public void stemmStrengtheningTest() throws ParseException 16 | { 17 | assertEquals(parser.parse("strengthening civil society"),parser.parse("strengthen civil society")); 18 | } 19 | 20 | // expected to fail, Lucene English stemmer not aggressive enough 21 | @Disabled @Test public void stemmEgyptianTest() throws ParseException 22 | { 23 | assertEquals(parser.parse("egyptian"),parser.parse("egypt")); 24 | } 25 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/property/scorer/DateScorerTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.property.scorer; 2 | 3 | import static org.junit.jupiter.api.Assertions.*; 4 | import org.aksw.cubeqa.Cube; 5 | import org.aksw.cubeqa.property.scorer.temporal.TemporalScorer; 6 | import org.junit.jupiter.api.Test; 7 | 8 | public class DateScorerTest 9 | { 10 | private static final String START_DATE = "http://linkedspending.aksw.org/ontology/finland-aid-start-date"; 11 | 12 | @Test public void testScore() 13 | { 14 | Cube cube = Cube.getInstance("finland-aid"); 15 | TemporalScorer scorer = TemporalScorer.dateScorer(cube.properties.get(START_DATE)); 16 | // String[] dates = {"2006-01-01","2006-01-02","2006-02-01","2014-04-01"}; 17 | assertEquals(scorer.score("2007-03-01").get().property.uri,START_DATE); 18 | assertEquals(scorer.score("2006-01-31").get().property.uri,START_DATE); 19 | assertEquals(scorer.score("2006-02-01").get().property.uri,START_DATE); 20 | // the next day of a day that is in 21 | assertFalse(scorer.score("2006-03-02").isPresent()); 22 | // not in at all 23 | assertFalse(scorer.score("2006-01-02").isPresent()); 24 | assertFalse(scorer.score("2014-04-01").isPresent()); 25 | } 26 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/property/scorer/NumericScorerTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.property.scorer; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import org.aksw.cubeqa.Config; 5 | import org.aksw.cubeqa.Cube; 6 | import org.junit.jupiter.api.Test; 7 | 8 | public class NumericScorerTest 9 | { 10 | 11 | @Test public void testScore() 12 | { 13 | Cube cube = Cube.getInstance("finland-aid"); 14 | NumericScorer scorer = new NumericScorer(cube.properties.get("http://linkedspending.aksw.org/ontology/finland-aid-amount")); 15 | assertEquals(scorer.score("0").get().score,Config.INSTANCE.boostNumeric,0); 16 | assertEquals(scorer.score("180000").get().score,Config.INSTANCE.boostNumeric,0); 17 | assertEquals(scorer.score("4312").get().score,Config.INSTANCE.boostNumeric,0); 18 | assertEquals(scorer.score("123456789").get().score,0,0); 19 | } 20 | 21 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/property/scorer/ObjectPropertyScorerTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.property.scorer; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertTrue; 4 | import org.aksw.cubeqa.Cube; 5 | import org.junit.jupiter.api.Test; 6 | 7 | public class ObjectPropertyScorerTest 8 | { 9 | @Test public void testRecipientCountry() 10 | { 11 | Cube cube = Cube.getInstance("finland-aid"); 12 | Scorer scorer = cube.properties.get("http://linkedspending.aksw.org/ontology/finland-aid-recipient-country").scorer; 13 | assertTrue(scorer.getClass().equals(ObjectPropertyScorer.class)); 14 | ScoreResult score = scorer.score("Tajikistan").get(); 15 | 16 | assertTrue(score.value.equals("https://openspending.org/finland-aid/recipient-country/tj")&&score.score==1); 17 | score=scorer.score("Tajikystan").get(); 18 | assertTrue(score.value.equals("https://openspending.org/finland-aid/recipient-country/tj")&&score.score<1&&score.score>0.6); 19 | score = scorer.score("Lao People's Democratic Republic").get(); 20 | assertTrue(score.value.equals("https://openspending.org/finland-aid/recipient-country/la")&&score.score==1); 21 | score = scorer.score("Lao Peoples Demokratic Republic").get(); 22 | assertTrue(score.value.equals("https://openspending.org/finland-aid/recipient-country/la")&&score.score<1&&score.score>0.6); 23 | } 24 | 25 | @Test public void testSector() 26 | { 27 | Cube cube = Cube.getInstance("finland-aid"); 28 | 29 | Scorer scorer = cube.properties.get("http://linkedspending.aksw.org/ontology/finland-aid-sector").scorer; 30 | assertTrue(scorer.getClass().equals(ObjectPropertyScorer.class)); 31 | { 32 | ScoreResult score = scorer.score("Strengthening civil society").get(); 33 | assertTrue(score.value.equals("https://openspending.org/finland-aid/sector/15150")&&score.score==1); 34 | } 35 | { 36 | ScoreResult score = scorer.score("strengthen civil society").get(); 37 | assertTrue(score.value.equals("https://openspending.org/finland-aid/sector/15150")&&score.score==1); 38 | } 39 | // not possible to match with levensthein automaton of with less than 3 max edit distance 40 | // score = scorer.score("Strengthen civil society").get(); 41 | // assertTrue(score.value.equals("https://openspending.org/finland-aid/sector/15150")&&score.score<1&&score.score>0.6); 42 | } 43 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/property/scorer/ScorersTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.property.scorer; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import java.util.Collection; 5 | import java.util.Comparator; 6 | import org.aksw.cubeqa.Cube; 7 | import org.junit.jupiter.api.Test; 8 | import lombok.extern.slf4j.Slf4j; 9 | 10 | @Slf4j 11 | public class ScorersTest 12 | { 13 | private void test(String s) 14 | { 15 | log.debug("{}",Scorers.scorePhraseProperties(Cube.finlandAid(),s)); 16 | } 17 | 18 | /** @param expected either a uri (objectproperty) or a label lexical form (datatypeproperty) */ 19 | void test(String s,String expected) 20 | { 21 | Collection scores = Scorers.scorePhraseValues(Cube.finlandAid(),s).values(); 22 | assertEquals(expected,scores.stream().max(Comparator.comparing(ScoreResult::getScore)).get().value); 23 | } 24 | 25 | @Test public void testScorePhraseValues() 26 | { 27 | // test("on health education","https://openspending.org/finland-aid/sector/12261"); 28 | test("Finnish Red Cross","Finnish Red Cross"); 29 | test("Malaria Control","https://openspending.org/finland-aid/sector/12262"); 30 | test("Environmental policy and administrative management"); 31 | test("Nepal","https://openspending.org/finland-aid/recipient-country/np"); 32 | test("Rescheduling and financing","https://openspending.org/finland-aid/sector/60040"); 33 | test("Finland Embassy","Embassy of Finland"); 34 | } 35 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/property/scorer/StringScorerTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.property.scorer; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import org.aksw.cubeqa.Config; 5 | import org.aksw.cubeqa.Cube; 6 | import org.junit.jupiter.api.Test; 7 | 8 | public class StringScorerTest 9 | { 10 | public void test(ScoreResult score, String value) 11 | { 12 | assertEquals(value,score.value); 13 | assertEquals(Config.INSTANCE.boostString,score.score,0); 14 | } 15 | 16 | @Test public void testChannelOfDeliveryName() 17 | { 18 | Cube cube = Cube.getInstance("finland-aid"); 19 | Scorer scorer = cube.properties.get("http://linkedspending.aksw.org/ontology/finland-aid-channel-of-delivery-name").scorer; 20 | assertEquals(StringScorer.class,scorer.getClass()); 21 | // test(scorer.score("Finnish Red Cross").get(),"Finnish Red Cross"); 22 | test(scorer.score("Finland Embassy").get(),"Embassy of Finland"); 23 | } 24 | 25 | @Test public void testTargetArea() 26 | { 27 | Cube cube = Cube.getInstance("finland-aid"); 28 | StringScorer scorer = (StringScorer) cube.properties.get("http://linkedspending.aksw.org/ontology/finland-aid-geographical-target-area").scorer; 29 | // test(scorer.score("Finnish Red Cross").get(),"Finnish Red Cross"); 30 | test(scorer.score("Philippines").get(),"Malaysia, Philippines"); 31 | } 32 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/property/scorer/temporal/TemporalScorerTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.property.scorer.temporal; 2 | 3 | import static org.junit.jupiter.api.Assertions.*; 4 | import org.aksw.cubeqa.Cube; 5 | import org.aksw.cubeqa.property.ComponentProperty; 6 | import org.joda.time.Instant; 7 | import org.joda.time.Interval; 8 | import org.junit.jupiter.api.Test; 9 | 10 | public class TemporalScorerTest 11 | { 12 | 13 | @Test public void testYearScorer() 14 | { 15 | Cube cube = Cube.getInstance("finland-aid"); 16 | ComponentProperty property = cube.properties.get("http://linkedspending.aksw.org/ontology/refYear"); 17 | assertFalse(property.scorer.score("2005").isPresent()); 18 | assertTrue(property.scorer.score("2006").isPresent()); 19 | assertTrue(property.scorer.score("2007").isPresent()); 20 | assertTrue(property.scorer.score("2011").isPresent()); 21 | assertFalse(property.scorer.score("2012").isPresent()); 22 | } 23 | 24 | @Test public void testDateScorer() 25 | { 26 | Cube cube = Cube.getInstance("finland-aid"); 27 | // modified is a time but date scorer only uses the date substring 28 | ComponentProperty property = cube.properties.get("http://linkedspending.aksw.org/ontology/refDate"); 29 | // assertTrue(property.scorer.score("2009").isPresent()); 30 | assertFalse(property.scorer.score("1999").isPresent()); 31 | assertTrue(property.scorer.score("2009-01-01").isPresent()); 32 | assertFalse(property.scorer.score("2009-06-07").isPresent()); 33 | } 34 | 35 | @Test public void testParseAsYear() 36 | { 37 | Interval y2014 = TemporalScorer.parseAsYear("2014"); 38 | assertFalse(y2014.contains(Instant.parse("2013-12-31"))); 39 | assertTrue(y2014.contains(Instant.parse("2014-01-01"))); 40 | assertTrue(y2014.contains(Instant.parse("2014-12-31"))); 41 | assertFalse(y2014.contains(Instant.parse("2015-01-01"))); 42 | } 43 | 44 | @Test public void testParseAsDate() 45 | { 46 | Interval d20140101 = TemporalScorer.parseAsDate("2014-01-01"); 47 | assertFalse(d20140101.contains(Instant.parse("2013-12-31"))); 48 | assertTrue(d20140101.contains(Instant.parse("2014-01-01"))); 49 | assertFalse(d20140101.contains(Instant.parse("2014-01-02"))); 50 | assertFalse(d20140101.contains(Instant.parse("2015-01-01"))); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/template/CubeTemplateTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.template; 2 | 3 | import java.util.Collections; 4 | import java.util.HashSet; 5 | import java.util.Set; 6 | import org.aksw.cubeqa.Cube; 7 | import org.aksw.cubeqa.detector.Aggregate; 8 | import org.aksw.cubeqa.property.ComponentProperty; 9 | import org.aksw.cubeqa.restriction.Restriction; 10 | import org.aksw.cubeqa.restriction.UriRestriction; 11 | import org.junit.jupiter.api.Test; 12 | 13 | public class CubeTemplateTest 14 | { 15 | 16 | @Test public void testSparqlQuery() 17 | { 18 | Cube cube = Cube.getInstance("finland-aid"); 19 | ComponentProperty receipientCountry = cube.properties.get("http://linkedspending.aksw.org/ontology/finland-aid-recipient-country"); 20 | ComponentProperty amount = cube.properties.get("http://linkedspending.aksw.org/ontology/finland-aid-amount"); 21 | Set restrictions = new HashSet<>(); 22 | 23 | // ComponentProperty receipientCountry = new ComponentProperty("finland-aid", "http://linkedspending.aksw.org/ontology/finland-aid-recipient-country-spec",null); 24 | restrictions.add(new UriRestriction(receipientCountry,"https://openspending.org/finland-aid/recipient-country/cn")); 25 | Template ct = new Template(cube,restrictions, 26 | Collections.singleton(amount), 27 | Collections.singleton(ComponentProperty.getInstance(cube, "http://linkedspending.aksw.org/ontology/refYear")), 28 | Collections.singleton(Aggregate.SUM)); 29 | // log.debug(ct.sparqlQuery()); 30 | // TODO check the query 31 | } 32 | 33 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/template/CubeTemplatorNewTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.template; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertTrue; 4 | import org.aksw.cubeqa.Cube; 5 | import org.junit.jupiter.api.Test; 6 | import lombok.extern.slf4j.Slf4j; 7 | 8 | @Slf4j 9 | public class CubeTemplatorNewTest 10 | { 11 | @Test public void templatorTest() 12 | { 13 | Templator templator = new GreedyTemplator(Cube.getInstance("finland-aid")); 14 | Match wholePhraseResult = templator.identify("How much money was invested to strengthen civil society in Yemen?"); 15 | assertTrue(wholePhraseResult.isEmpty(),wholePhraseResult.toString()); 16 | log.debug("{}",templator.identify("Yemen")); 17 | log.debug("{}",templator.identify("strengthening civil society")); 18 | } 19 | } -------------------------------------------------------------------------------- /src/test/java/org/aksw/cubeqa/template/StanfordNlpTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.cubeqa.template; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import org.junit.jupiter.api.Test; 5 | import edu.stanford.nlp.trees.Tree; 6 | 7 | public class StanfordNlpTest 8 | { 9 | @Test 10 | public void testParse() 11 | { 12 | Tree tree = StanfordNlp.parse("How much did the Philippines receive in the year of 2007?"); 13 | assertEquals("(ROOT (SBARQ (WHADJP (WRB How) (RB much)) (SQ (VBD did) (NP (DT the) (NNP Philippines)) (VP (VB receive) (PP (IN in) (NP (NP (DT the) (NN year)) (PP (IN of) (NP (CD 2007))))))) (. ?)))", tree.toString()); 14 | } 15 | } -------------------------------------------------------------------------------- /src/test/java/org/kohsuke/args4j/MapParserTest.java: -------------------------------------------------------------------------------- 1 | package org.kohsuke.args4j; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | import org.junit.jupiter.api.Test; 8 | 9 | public class MapParserTest 10 | { 11 | @Option(name="-testStringAbc") 12 | String testStringAbc; 13 | 14 | @Option(name="-testStringNull") 15 | String testStringNull; 16 | 17 | @Option(name="-testInt") 18 | int testInt; 19 | 20 | @Option(name="-testFloat") 21 | float testFloat; 22 | 23 | enum Number {ONE,TWO,THREE} 24 | @Option(name="-testEnum") 25 | Number testEnum; 26 | 27 | 28 | @Test 29 | public void test() 30 | { 31 | MapParser parser = new MapParser(new CmdLineParser(this)); 32 | Map parameters = new HashMap<>(); 33 | parameters.put("testStringAbc","abc"); 34 | parameters.put("testInt","7"); 35 | parameters.put("testFloat",0.7); 36 | parameters.put("testEnum",Number.THREE); 37 | parser.parse(parameters); 38 | assertEquals(testStringAbc,"abc"); 39 | assertEquals(testStringNull,null); 40 | assertEquals(testInt,7); 41 | assertEquals(testFloat,0.7,0.00001); 42 | assertEquals(testEnum,Number.THREE); 43 | } 44 | 45 | } --------------------------------------------------------------------------------