├── .gitignore ├── README.md ├── environment.yml ├── java ├── bootstrapping │ ├── pom.xml │ └── src │ │ └── main │ │ ├── 0_pattern │ │ ├── ExtractedPattern.java │ │ ├── PatternExtractionStep.java │ │ ├── PatternExtractor.java │ │ ├── PatternSelectionRules.java │ │ ├── PatternSelector.java │ │ └── PatternStatistic.java │ │ ├── 1_instances │ │ ├── ExtractedInstance.java │ │ ├── Instance.java │ │ ├── InstanceExtractionStep.java │ │ ├── InstanceExtractor.java │ │ ├── InstanceSelector.java │ │ └── InstanceStatistic.java │ │ ├── 2_pathPattern │ │ ├── DepNode.java │ │ ├── DiGraph.java │ │ ├── PathExtractor.java │ │ ├── PathPattern.java │ │ └── PathPatternGeneralizer.java │ │ ├── java │ │ ├── LuceneQueryFramework.java │ │ ├── Main.java │ │ ├── ParallelExtractor.java │ │ ├── PreprocessedSentence.java │ │ ├── SeedLoader.java │ │ └── WikipediaExtractor.java │ │ └── resources │ │ └── log4j2.xml └── extraction │ ├── pom.xml │ └── src │ └── main │ ├── 1_causalityExtraction │ ├── CausalityExtractor.java │ ├── DepNode.java │ ├── DiGraph.java │ ├── GeneralSentence.java │ ├── MainExtractor.java │ ├── Match.java │ ├── PathPattern.java │ └── PatternLoader.java │ ├── 2_wikipediaExtraction │ ├── ArticleHandler.java │ ├── Section.java │ ├── SwebleResult.java │ ├── SwebleVisitor.java │ ├── WikipediaExtractionThread.java │ ├── WikipediaParser.java │ └── WikipediaSentence.java │ ├── 3_clueWebExtraction │ ├── ClueWebParser.java │ └── ClueWebSentence.java │ ├── 4_potthastTools │ ├── JerichoHtmlSentenceExtractor.java │ ├── PotthastJerichoExtractor.java │ ├── README.md │ ├── StopWordFilter.java │ ├── StopWordList.java │ ├── TextFilter.java │ ├── WordFilter.java │ └── WordMatchFilter.java │ ├── java │ └── Main.java │ └── resources │ └── log4j2.xml ├── notebooks ├── 01-concept-spotting │ ├── 01-texts-training.ipynb │ ├── 02-texts-spotting-wikipedia.ipynb │ ├── 03-texts-spotting-clueweb.ipynb │ ├── 04-infoboxes-training.ipynb │ ├── 05-infoboxes-spotting.ipynb │ ├── 06-lists-training.ipynb │ └── 07-lists-spotting.ipynb ├── 02-graph-construction │ └── 01-graph-construction.ipynb ├── 03-graph-analysis │ ├── 01-knowledge-bases-overview.ipynb │ └── 02-graph-statistics.ipynb ├── 04-graph-evaluation │ ├── 01-graph-evaluation-precision.ipynb │ ├── 02-qa-corpus-construction.ipynb │ └── 03-graph-evaluation-recall.ipynb └── load-into-neo4j.ipynb └── scripts ├── bootstrapping.sh ├── extraction-clueweb12.sh └── extraction-wikipedia.sh /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/README.md -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/environment.yml -------------------------------------------------------------------------------- /java/bootstrapping/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/pom.xml -------------------------------------------------------------------------------- /java/bootstrapping/src/main/0_pattern/ExtractedPattern.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/0_pattern/ExtractedPattern.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/0_pattern/PatternExtractionStep.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/0_pattern/PatternExtractionStep.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/0_pattern/PatternExtractor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/0_pattern/PatternExtractor.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/0_pattern/PatternSelectionRules.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/0_pattern/PatternSelectionRules.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/0_pattern/PatternSelector.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/0_pattern/PatternSelector.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/0_pattern/PatternStatistic.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/0_pattern/PatternStatistic.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/1_instances/ExtractedInstance.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/1_instances/ExtractedInstance.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/1_instances/Instance.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/1_instances/Instance.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/1_instances/InstanceExtractionStep.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/1_instances/InstanceExtractionStep.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/1_instances/InstanceExtractor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/1_instances/InstanceExtractor.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/1_instances/InstanceSelector.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/1_instances/InstanceSelector.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/1_instances/InstanceStatistic.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/1_instances/InstanceStatistic.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/2_pathPattern/DepNode.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/2_pathPattern/DepNode.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/2_pathPattern/DiGraph.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/2_pathPattern/DiGraph.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/2_pathPattern/PathExtractor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/2_pathPattern/PathExtractor.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/2_pathPattern/PathPattern.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/2_pathPattern/PathPattern.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/2_pathPattern/PathPatternGeneralizer.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/2_pathPattern/PathPatternGeneralizer.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/java/LuceneQueryFramework.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/java/LuceneQueryFramework.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/java/Main.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/java/Main.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/java/ParallelExtractor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/java/ParallelExtractor.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/java/PreprocessedSentence.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/java/PreprocessedSentence.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/java/SeedLoader.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/java/SeedLoader.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/java/WikipediaExtractor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/java/WikipediaExtractor.java -------------------------------------------------------------------------------- /java/bootstrapping/src/main/resources/log4j2.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/bootstrapping/src/main/resources/log4j2.xml -------------------------------------------------------------------------------- /java/extraction/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/pom.xml -------------------------------------------------------------------------------- /java/extraction/src/main/1_causalityExtraction/CausalityExtractor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/1_causalityExtraction/CausalityExtractor.java -------------------------------------------------------------------------------- /java/extraction/src/main/1_causalityExtraction/DepNode.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/1_causalityExtraction/DepNode.java -------------------------------------------------------------------------------- /java/extraction/src/main/1_causalityExtraction/DiGraph.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/1_causalityExtraction/DiGraph.java -------------------------------------------------------------------------------- /java/extraction/src/main/1_causalityExtraction/GeneralSentence.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/1_causalityExtraction/GeneralSentence.java -------------------------------------------------------------------------------- /java/extraction/src/main/1_causalityExtraction/MainExtractor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/1_causalityExtraction/MainExtractor.java -------------------------------------------------------------------------------- /java/extraction/src/main/1_causalityExtraction/Match.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/1_causalityExtraction/Match.java -------------------------------------------------------------------------------- /java/extraction/src/main/1_causalityExtraction/PathPattern.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/1_causalityExtraction/PathPattern.java -------------------------------------------------------------------------------- /java/extraction/src/main/1_causalityExtraction/PatternLoader.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/1_causalityExtraction/PatternLoader.java -------------------------------------------------------------------------------- /java/extraction/src/main/2_wikipediaExtraction/ArticleHandler.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/2_wikipediaExtraction/ArticleHandler.java -------------------------------------------------------------------------------- /java/extraction/src/main/2_wikipediaExtraction/Section.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/2_wikipediaExtraction/Section.java -------------------------------------------------------------------------------- /java/extraction/src/main/2_wikipediaExtraction/SwebleResult.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/2_wikipediaExtraction/SwebleResult.java -------------------------------------------------------------------------------- /java/extraction/src/main/2_wikipediaExtraction/SwebleVisitor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/2_wikipediaExtraction/SwebleVisitor.java -------------------------------------------------------------------------------- /java/extraction/src/main/2_wikipediaExtraction/WikipediaExtractionThread.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/2_wikipediaExtraction/WikipediaExtractionThread.java -------------------------------------------------------------------------------- /java/extraction/src/main/2_wikipediaExtraction/WikipediaParser.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/2_wikipediaExtraction/WikipediaParser.java -------------------------------------------------------------------------------- /java/extraction/src/main/2_wikipediaExtraction/WikipediaSentence.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/2_wikipediaExtraction/WikipediaSentence.java -------------------------------------------------------------------------------- /java/extraction/src/main/3_clueWebExtraction/ClueWebParser.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/3_clueWebExtraction/ClueWebParser.java -------------------------------------------------------------------------------- /java/extraction/src/main/3_clueWebExtraction/ClueWebSentence.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/3_clueWebExtraction/ClueWebSentence.java -------------------------------------------------------------------------------- /java/extraction/src/main/4_potthastTools/JerichoHtmlSentenceExtractor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/4_potthastTools/JerichoHtmlSentenceExtractor.java -------------------------------------------------------------------------------- /java/extraction/src/main/4_potthastTools/PotthastJerichoExtractor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/4_potthastTools/PotthastJerichoExtractor.java -------------------------------------------------------------------------------- /java/extraction/src/main/4_potthastTools/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/4_potthastTools/README.md -------------------------------------------------------------------------------- /java/extraction/src/main/4_potthastTools/StopWordFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/4_potthastTools/StopWordFilter.java -------------------------------------------------------------------------------- /java/extraction/src/main/4_potthastTools/StopWordList.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/4_potthastTools/StopWordList.java -------------------------------------------------------------------------------- /java/extraction/src/main/4_potthastTools/TextFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/4_potthastTools/TextFilter.java -------------------------------------------------------------------------------- /java/extraction/src/main/4_potthastTools/WordFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/4_potthastTools/WordFilter.java -------------------------------------------------------------------------------- /java/extraction/src/main/4_potthastTools/WordMatchFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/4_potthastTools/WordMatchFilter.java -------------------------------------------------------------------------------- /java/extraction/src/main/java/Main.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/java/Main.java -------------------------------------------------------------------------------- /java/extraction/src/main/resources/log4j2.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/java/extraction/src/main/resources/log4j2.xml -------------------------------------------------------------------------------- /notebooks/01-concept-spotting/01-texts-training.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/notebooks/01-concept-spotting/01-texts-training.ipynb -------------------------------------------------------------------------------- /notebooks/01-concept-spotting/02-texts-spotting-wikipedia.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/notebooks/01-concept-spotting/02-texts-spotting-wikipedia.ipynb -------------------------------------------------------------------------------- /notebooks/01-concept-spotting/03-texts-spotting-clueweb.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/notebooks/01-concept-spotting/03-texts-spotting-clueweb.ipynb -------------------------------------------------------------------------------- /notebooks/01-concept-spotting/04-infoboxes-training.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/notebooks/01-concept-spotting/04-infoboxes-training.ipynb -------------------------------------------------------------------------------- /notebooks/01-concept-spotting/05-infoboxes-spotting.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/notebooks/01-concept-spotting/05-infoboxes-spotting.ipynb -------------------------------------------------------------------------------- /notebooks/01-concept-spotting/06-lists-training.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/notebooks/01-concept-spotting/06-lists-training.ipynb -------------------------------------------------------------------------------- /notebooks/01-concept-spotting/07-lists-spotting.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/notebooks/01-concept-spotting/07-lists-spotting.ipynb -------------------------------------------------------------------------------- /notebooks/02-graph-construction/01-graph-construction.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/notebooks/02-graph-construction/01-graph-construction.ipynb -------------------------------------------------------------------------------- /notebooks/03-graph-analysis/01-knowledge-bases-overview.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/notebooks/03-graph-analysis/01-knowledge-bases-overview.ipynb -------------------------------------------------------------------------------- /notebooks/03-graph-analysis/02-graph-statistics.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/notebooks/03-graph-analysis/02-graph-statistics.ipynb -------------------------------------------------------------------------------- /notebooks/04-graph-evaluation/01-graph-evaluation-precision.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/notebooks/04-graph-evaluation/01-graph-evaluation-precision.ipynb -------------------------------------------------------------------------------- /notebooks/04-graph-evaluation/02-qa-corpus-construction.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/notebooks/04-graph-evaluation/02-qa-corpus-construction.ipynb -------------------------------------------------------------------------------- /notebooks/04-graph-evaluation/03-graph-evaluation-recall.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/notebooks/04-graph-evaluation/03-graph-evaluation-recall.ipynb -------------------------------------------------------------------------------- /notebooks/load-into-neo4j.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/notebooks/load-into-neo4j.ipynb -------------------------------------------------------------------------------- /scripts/bootstrapping.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/scripts/bootstrapping.sh -------------------------------------------------------------------------------- /scripts/extraction-clueweb12.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/scripts/extraction-clueweb12.sh -------------------------------------------------------------------------------- /scripts/extraction-wikipedia.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causenet-org/CIKM-20/HEAD/scripts/extraction-wikipedia.sh --------------------------------------------------------------------------------