├── gradle.properties ├── .gitignore ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── .travis.yml ├── .gitattributes ├── src ├── main │ ├── groovy │ │ └── komenti │ │ │ ├── klib │ │ │ ├── Label.groovy │ │ │ ├── TermTriple.groovy │ │ │ ├── Annotation.groovy │ │ │ ├── Term.groovy │ │ │ ├── AnnotationList.groovy │ │ │ ├── PDFReader.groovy │ │ │ ├── TermTripleList.groovy │ │ │ ├── Vocabulary.groovy │ │ │ ├── ElasticSearch.groovy │ │ │ ├── Sentence.groovy │ │ │ ├── OntologyBuilder.groovy │ │ │ ├── KomentLib.groovy │ │ │ └── Komentisto.groovy │ │ │ ├── App.groovy │ │ │ └── Komenti.groovy │ └── resources │ │ ├── templates │ │ ├── roster.json │ │ ├── roster_with_abstract_download.json │ │ ├── roster_mine_relationship.json │ │ └── roster_suggest_axiom.json │ │ └── words │ │ ├── family.txt │ │ └── uncertain.txt └── test │ ├── resources │ ├── annotate_this.txt │ ├── pubmed20n0688_abstract_12336.txt │ └── go_labels_test.txt │ └── groovy │ └── komenti │ ├── VocabularyTest.groovy │ ├── KomentistoTest.groovy │ └── AppTest.groovy ├── settings.gradle ├── gradlew.bat ├── gradlew ├── README.md └── LICENSE /gradle.properties: -------------------------------------------------------------------------------- 1 | org.gradle.jvmargs=-Xmx6G -Xms6G 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore Gradle project-specific cache directory 2 | .gradle 3 | build 4 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/debuos512/komenti/HEAD/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: groovy 2 | dist: bionic 3 | sudo: required 4 | install: gradle --no-daemon assemble 5 | script: gradle --no-daemon check 6 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # 2 | # https://help.github.com/articles/dealing-with-line-endings/ 3 | # 4 | # These are explicitly windows files and should use crlf 5 | *.bat text eol=crlf 6 | 7 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.2-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /src/main/groovy/komenti/klib/Label.groovy: -------------------------------------------------------------------------------- 1 | package klib 2 | 3 | class Label { 4 | def label 5 | def iri 6 | def group 7 | def ontology 8 | def priority 9 | 10 | String toString() { 11 | [label, iri, group, ontology, priority].join('\t') 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/main/groovy/komenti/klib/TermTriple.groovy: -------------------------------------------------------------------------------- 1 | package klib 2 | 3 | class TermTriple { 4 | Term subject 5 | Term relation 6 | Term object 7 | 8 | String toString() { 9 | subject.toString() + " -> " + relation.toString() + " -> " + object.toString() 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * This file was generated by the Gradle 'init' task. 3 | * 4 | * The settings file is used to specify which projects to include in your build. 5 | * 6 | * Detailed information about configuring a multi-project build in Gradle can be found 7 | * in the user manual at https://docs.gradle.org/6.3/userguide/multi_project_builds.html 8 | */ 9 | 10 | rootProject.name = 'komenti' 11 | -------------------------------------------------------------------------------- /src/main/groovy/komenti/klib/Annotation.groovy: -------------------------------------------------------------------------------- 1 | package klib 2 | 3 | class Annotation implements Serializable { 4 | def documentId 5 | def termIri 6 | def conceptLabel 7 | def matchedText 8 | def group 9 | def tags 10 | def sentenceId 11 | def text 12 | 13 | String toString() { 14 | [documentId, termIri, conceptLabel, matchedText, group, tags.join(','), sentenceId, text].join('\t') 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/main/resources/templates/roster.json: -------------------------------------------------------------------------------- 1 | { 2 | "commands": [ 3 | { 4 | "id": "class_query", 5 | "command": "query", 6 | "args": { 7 | "ontology": "HP", 8 | "query": "hypertension", 9 | "out": "./out/labels.txt", 10 | "lemmatise": true 11 | } 12 | }, 13 | { 14 | "command": "annotate", 15 | "args": { 16 | "text": "./out/abstracts/", 17 | "labels": "./out/labels.txt", 18 | "out": "./out/annotations.txt" 19 | } 20 | } 21 | ] 22 | } 23 | -------------------------------------------------------------------------------- /src/main/resources/words/family.txt: -------------------------------------------------------------------------------- 1 | father 2 | sisters 3 | mother 4 | brother 5 | maternal 6 | sister 7 | son 8 | mum 9 | dad 10 | daughter 11 | grandmother 12 | grandfather 13 | grandson 14 | granddaughter 15 | uncle 16 | aunt 17 | cousin 18 | nephew 19 | niece 20 | father-in-law 21 | mother-in-law 22 | relatives 23 | brother-in-law 24 | sister-in-law 25 | great-grandfather 26 | great-grandmother 27 | step-daughter 28 | step-father 29 | step-mother 30 | step-brother 31 | step-sister 32 | step-son 33 | step-uncle 34 | step-aunt 35 | step-grandfather 36 | step-grandmother 37 | half-brother 38 | half-sister 39 | ex-husband 40 | ex-wife 41 | husband 42 | wife 43 | sibling 44 | family 45 | uncles 46 | -------------------------------------------------------------------------------- /src/main/resources/templates/roster_with_abstract_download.json: -------------------------------------------------------------------------------- 1 | { 2 | "commands": [ 3 | { 4 | "id": "class_query", 5 | "command": "query", 6 | "args": { 7 | "ontology": "HP", 8 | "query": "hypertension", 9 | "out": "./out/labels.txt", 10 | "lemmatise": true 11 | } 12 | }, 13 | { 14 | "command": "get_abstracts", 15 | "args": { 16 | "labels": "./out/labels.txt", 17 | "out": "./out/abstracts/" 18 | } 19 | }, 20 | { 21 | "command": "annotate", 22 | "args": { 23 | "text": "./out/abstracts/", 24 | "labels": "./out/labels.txt", 25 | "out": "./out/annotations.txt" 26 | } 27 | } 28 | ] 29 | } 30 | -------------------------------------------------------------------------------- /src/test/resources/annotate_this.txt: -------------------------------------------------------------------------------- 1 | Apoptotic DNA fragmentation is a key feature of apoptosis, a type of programmed cell death. Apoptosis is characterized by the activation of endogenous endonucleases, particularly the caspase-3 activated DNase (CAD),[1] with subsequent cleavage of nuclear DNA into internucleosomal fragments of roughly 180 base pairs (bp) and multiples thereof (360, 540 etc.). The apoptotic DNA fragmentation is being used as a marker of apoptosis and for identification of apoptotic cells either via the DNA laddering assay,[2] the TUNEL assay,[3][4] or the by detection of cells with fractional DNA content ("sub G1 cells") on DNA content frequency histograms e.g. as in the Nicoletti assay.[5][6] There is no such thing as apoptotic DNA fragmentation. 2 | -------------------------------------------------------------------------------- /src/test/groovy/komenti/VocabularyTest.groovy: -------------------------------------------------------------------------------- 1 | package komenti 2 | 3 | import spock.lang.Specification 4 | import spock.lang.Shared 5 | 6 | import klib.* 7 | 8 | class VocabularyTest extends Specification { 9 | @Shared vocabulary 10 | 11 | def "load_vocabulary"() { 12 | given: 13 | def testFile = getClass().getResource('/go_labels_test.txt').toURI() 14 | when: 15 | vocabulary = Vocabulary.loadFile(testFile) 16 | then: 17 | vocabulary instanceof Vocabulary 18 | } 19 | 20 | def "get label"() { 21 | given: 22 | def iri = "http://purl.obolibrary.org/obo/GO_0036483" 23 | def expectedLabel = "endoplasmic reticulum stress-induced neuron apoptosis" 24 | def unexpectedLabel = "biscuit juice" 25 | when: 26 | def eLabels = vocabulary.entityLabels(iri) 27 | then: 28 | eLabels.size() == 5 29 | then: 30 | eLabels.contains(expectedLabel) 31 | then: 32 | !eLabels.contains(unexpectedLabel) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/resources/templates/roster_mine_relationship.json: -------------------------------------------------------------------------------- 1 | { 2 | "commands": [ 3 | { 4 | "id": "class_query", 5 | "command": "query", 6 | "args": { 7 | "ontology": "HP", 8 | "class-list": "", 9 | "out": "./out/labels.txt", 10 | "lemmatise": true 11 | } 12 | }, 13 | { 14 | "command": "get_abstracts", 15 | "args": { 16 | "labels": "./out/labels.txt", 17 | "out": "./out/abstracts/", 18 | "group-by-query": true, 19 | "conjunction": true 20 | } 21 | }, 22 | { 23 | "command": "annotate", 24 | "args": { 25 | "text": "./out/abstracts/", 26 | "labels": "./out/labels.txt", 27 | "out": "./out/annotations.txt" 28 | } 29 | }, 30 | { 31 | "command": "summarise_entity_pair", 32 | "args": { 33 | "class-list": "", 34 | "labels": "./out/labels.txt", 35 | "annotation-file": "./out/annotations.txt" 36 | } 37 | } 38 | ] 39 | } 40 | -------------------------------------------------------------------------------- /src/test/groovy/komenti/KomentistoTest.groovy: -------------------------------------------------------------------------------- 1 | package komenti 2 | 3 | import spock.lang.Specification 4 | import spock.lang.Shared 5 | 6 | import klib.* 7 | 8 | class KomentistoTest extends Specification { 9 | @Shared komentisto 10 | 11 | def setupSpec() { 12 | def testFile = getClass().getResource('/go_triple_vocab.txt').toURI() 13 | def vocabulary = Vocabulary.loadFile(testFile) 14 | 15 | komentisto = new Komentisto(vocabulary, 16 | false, 17 | false, 18 | false, 19 | true, 20 | false, 21 | 1) 22 | } 23 | 24 | def "extract_triples"() { 25 | given: 26 | def aFile = getClass().getResource('/pubmed20n0688_abstract_12336.txt').toURI() 27 | def aText = new File(aFile).text 28 | when: 29 | def out = komentisto.extractTriples("test", aText, true) 30 | then: 31 | out.each { 32 | println it 33 | } 34 | } 35 | 36 | def "multilevel_specifiers"() { 37 | given: 38 | def input = "blue developmental process apoptosis was observed in apoptosis" 39 | when: 40 | def out = komentisto.extractTriples("test", input, true) 41 | then: 42 | out.each { 43 | println it.subject 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/groovy/komenti/klib/Term.groovy: -------------------------------------------------------------------------------- 1 | package klib 2 | 3 | class Term implements Serializable { 4 | Term parentTerm 5 | def iri 6 | def label 7 | def specificLabel 8 | Annotation originalAnnotation 9 | 10 | Term(String iri, String label) { 11 | this.iri = iri 12 | this.label = label 13 | } 14 | 15 | Term(Term parentTerm, Term specifiedTerm) { 16 | this(specifiedTerm.iri, specifiedTerm.label) 17 | this.parentTerm = parentTerm 18 | } 19 | 20 | Term(Term parentTerm, String iri, String label) { 21 | this(iri, label) 22 | this.parentTerm = parentTerm 23 | } 24 | 25 | Term(Term parentTerm, String iri, String label, Annotation originalAnnotation) { 26 | this(iri, label) 27 | this.parentTerm = parentTerm 28 | this.originalAnnotation = originalAnnotation 29 | } 30 | 31 | Term(String iri, String label, Annotation originalAnnotation) { 32 | this(iri, label) 33 | this.originalAnnotation = originalAnnotation 34 | } 35 | 36 | static Term fromAnnotation(Annotation a) { 37 | new Term(a.termIri, a.conceptLabel, a) 38 | } 39 | 40 | String toString() { 41 | if(parentTerm) { 42 | parentTerm.toString() + " $label<$iri>" 43 | } else { 44 | "$label<$iri>" 45 | } 46 | } 47 | 48 | String getLabel() { 49 | if(parentTerm) { 50 | parentTerm.getLabel() + " " + label.toString() 51 | } else { 52 | label.toString() 53 | } 54 | } 55 | 56 | String getSpecificLabel() { 57 | label 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/test/resources/pubmed20n0688_abstract_12336.txt: -------------------------------------------------------------------------------- 1 | Harnessing the ability of cytotoxic T lymphocytes (CTLs) to recognize and eradicate tumor or pathogen-infected cells is a critical goal of modern immune-based therapies. Although multiple immunization strategies efficiently induce high levels of antigen-specific CTLs, the initial increase is typically followed by a rapid contraction phase resulting in a sharp decline in the frequency of functional CTLs. We describe a novel approach to immunotherapy based on a transplantation of low numbers of antigen-expressing hematopoietic stem cells (HSCs) following nonmyeloablative or partially myeloablative conditioning. Continuous antigen presentation by a limited number of differentiated transgenic hematopoietic cells results in an induction and prolonged maintenance of fully functional effector T cell responses in a mouse model. Recipient animals display high levels of antigen-specific CTLs four months following transplantation in contrast to dendritic cell-immunized animals in which the response typically declines at 4-6 weeks post-immunization. Majority of HSC-induced antigen-specific CD8+ T cells display central memory phenotype, efficiently kill target cells in vivo, and protect recipients against tumor growth in a preventive setting. Furthermore, we confirm previously published observation that high level engraftment of antigen-expressing HSCs following myeloablative conditioning results in tolerance and an absence of specific cytotoxic activity in vivo. In conclusion, the data presented here supports potential application of immunization by limited transplantation of antigen-expressing HSCs for the prevention and treatment of cancer and therapeutic immunization of chronic infectious diseases such as HIV-1/AIDS. 2 | -------------------------------------------------------------------------------- /src/main/groovy/komenti/klib/AnnotationList.groovy: -------------------------------------------------------------------------------- 1 | package klib 2 | 3 | import groovy.transform.MapConstructor 4 | 5 | @MapConstructor 6 | class AnnotationList implements Iterable { 7 | List a = [] 8 | def writeMode 9 | def annPath 10 | def outWriter 11 | 12 | // This constructor rather than the MapConstructor when you want write Mode 13 | AnnotationList(annPath, writeMode) { 14 | this.writeMode = writeMode 15 | this.annPath = annPath 16 | if(writeMode) { 17 | outWriter = new BufferedWriter(new FileWriter(annPath)) 18 | } 19 | } 20 | 21 | def add(List ans) { 22 | ans.each { an -> 23 | a << an 24 | if(writeMode) { 25 | outWriter.write(an.toString() + '\n') 26 | if((a.size() % 500) == 0) { outWriter.flush() } 27 | } 28 | } 29 | } 30 | 31 | def finishWrite() { 32 | outWriter.flush() 33 | outWriter.close() 34 | writeMode = false 35 | } 36 | 37 | def byGroup(g) { 38 | a.findAll { it.group == g } 39 | } 40 | 41 | def byDocument(d) { 42 | a.findAll { it.documentId == d } 43 | } 44 | 45 | def bySentence(s) { 46 | a.findAll { it.sentenceId == s } 47 | } 48 | 49 | @Override 50 | Iterator iterator() { 51 | a.iterator() 52 | } 53 | 54 | static def loadFile(fileName) { 55 | def ans = new File(fileName).text.split('\n').collect { 56 | it = it.split('\t') 57 | new Annotation( 58 | documentId: it[0], 59 | termIri: it[1], 60 | conceptLabel: it[2].replaceAll('\\\\',''), 61 | matchedText: it[3], 62 | group: it[4], 63 | tags: it[5] != "" ? it[5].split(',') : [], 64 | sentenceId: it[6], 65 | text: it[7] 66 | ) 67 | } 68 | 69 | new AnnotationList(a: ans) 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/groovy/komenti/klib/PDFReader.groovy: -------------------------------------------------------------------------------- 1 | package klib 2 | 3 | import org.apache.pdfbox.pdmodel.PDDocument 4 | import org.apache.pdfbox.text.PDFTextStripper 5 | 6 | public class PDFReader { 7 | def pages = [] 8 | 9 | def PDFReader(file) { 10 | def reader 11 | try { 12 | reader = PDDocument.load(file) 13 | def stripper = new PDFTextStripper() 14 | stripper.setAddMoreFormatting(true) 15 | 16 | (1..reader.getNumberOfPages()).each { 17 | stripper.setStartPage(it) 18 | stripper.setEndPage(it) 19 | 20 | def text = stripper.getText(reader).toLowerCase() 21 | 22 | text = text.replaceAll('\n\n\n', '. ') 23 | text = text.replaceAll('\u2022', '. ') 24 | text = text.replaceAll('–', '. ') 25 | text = text.replaceAll('\\s-', '. ') 26 | text = text.replaceAll('–\\s', '. ') 27 | text = text.replaceAll('\\s-', '. ') 28 | text = text.replaceAll('–\\s', '. ') 29 | text = text.replaceAll('\\s+', ' ') 30 | text = text.replaceAll(', \\?', '. ?') 31 | text = text.replaceAll('\\.', '. ') 32 | 33 | // this sucks 34 | text = text.replaceAll('m edication', 'medication') 35 | text = text.replaceAll('a llergies', 'allergies') 36 | text = text.replaceAll('p ast', 'past') 37 | 38 | text = text.replaceAll('past', '. past') 39 | text = text.replaceAll('medication', '. medication') 40 | text = text.replaceAll('allergies', '. allergies') 41 | 42 | 43 | pages << text 44 | } 45 | 46 | reader.close() 47 | } catch(e) { 48 | println "Failed to load document!" 49 | e.printStackTrace() 50 | } 51 | } 52 | 53 | Iterator iterator() { 54 | return pages.iterator() 55 | } 56 | 57 | def getText() { 58 | this.collect().join('\n') 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/resources/templates/roster_suggest_axiom.json: -------------------------------------------------------------------------------- 1 | { 2 | "commands": [ 3 | { 4 | "id": "class_query", 5 | "command": "query", 6 | "args": { 7 | "ontology": "HP", 8 | "class-list": "", 9 | "out": "./out/labels.txt", 10 | "override-group": "class" 11 | } 12 | }, 13 | { 14 | "id": "entity_query", 15 | "command": "query", 16 | "args": { 17 | "ontology": "HP", 18 | "class-list": "", 19 | "out": "./out/labels.txt", 20 | "append": true, 21 | "query-type": "equivalent", 22 | "override-group": "entity" 23 | } 24 | }, 25 | { 26 | "id": "quality_query", 27 | "command": "query", 28 | "args": { 29 | "ontology": "HP", 30 | "class-list": "", 31 | "out": "./out/labels.txt", 32 | "append": true, 33 | "lemmatise": true, 34 | "override-group": "quality" 35 | } 36 | }, 37 | { 38 | "id": "relation_query", 39 | "command": "query", 40 | "args": { 41 | "ontology": "RO", 42 | "object-properties": "true", 43 | "out": "./out/labels.txt", 44 | "append": true, 45 | "lemmatise": true, 46 | "override-group": "relation" 47 | } 48 | }, 49 | { 50 | "command": "get_metadata", 51 | "args": { 52 | "labels": "./out/labels.txt", 53 | "out": "./out/ls/", 54 | "exclude-groups": "entity,quality,relation", 55 | "decompose-entities": true 56 | } 57 | }, 58 | { 59 | "command": "annotate", 60 | "args": { 61 | "text": "./out/ls/", 62 | "labels": "./out/labels.txt", 63 | "out": "./out/annotations.txt", 64 | "per-line": true 65 | } 66 | }, 67 | { 68 | "command": "suggest_axiom", 69 | "args": { 70 | "default-relation": "", 71 | "default-entity": "", 72 | "annotation-file": "./out/annotations.txt", 73 | "labels": "./out/labels.txt" 74 | } 75 | } 76 | ] 77 | } 78 | -------------------------------------------------------------------------------- /src/main/groovy/komenti/klib/TermTripleList.groovy: -------------------------------------------------------------------------------- 1 | package klib 2 | 3 | import groovy.json.* 4 | import groovy.transform.MapConstructor 5 | 6 | @MapConstructor 7 | class TermTripleList implements Iterable { 8 | List a = [] 9 | def annPath 10 | def writeMode 11 | 12 | TermTripleList(annPath, writeMode) { 13 | this.writeMode = writeMode 14 | this.annPath = annPath 15 | } 16 | 17 | @Override 18 | Iterator iterator() { 19 | a.iterator() 20 | } 21 | 22 | def add(List ans) { 23 | ans.each { an -> 24 | add(an) 25 | } 26 | } 27 | 28 | def add(TermTriple an) { 29 | // kind of inefficient; for concurrency purposes 30 | if(!a.asImmutable().any { it.toString() == an.toString() }) { 31 | a << an 32 | if(writeMode) { 33 | //if((a.size() % 500) == 0) { write() } 34 | //write() 35 | } 36 | } 37 | } 38 | 39 | def write() { 40 | new File(annPath).text = new JsonBuilder(a).toPrettyString() 41 | new File("string_$annPath".replace('json','txt')).text = a.collect { it.toString() }.join('\n') 42 | } 43 | 44 | def finishWrite() { write() } 45 | 46 | static def loadFile(fileName) { 47 | def processTerm // done this way to support recursion 48 | // TODO currently ignoring the originalAnnotation, will just involve some more casting 49 | processTerm = { t -> // this is kind of a pain, i don't really get why it can't do it iself 50 | Annotation a 51 | if(t.originalAnnotation) { 52 | a = new Annotation(t.originalAnnotation) 53 | } 54 | if(t.parentTerm) { 55 | t.parentTerm = processTerm(t.parentTerm) 56 | new Term(t.parentTerm, t.iri, t.specificLabel, a) 57 | } else { 58 | new Term(t.iri, t.specificLabel, a) 59 | } 60 | } 61 | def ans = new JsonSlurper().parse(new File(fileName)).collect { 62 | new TermTriple( 63 | subject: processTerm(it.subject), 64 | relation: processTerm(it.relation), 65 | object: processTerm(it.object), 66 | ) 67 | } 68 | 69 | new TermTripleList(a: ans, annPath: fileName) 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/groovy/komenti/klib/Vocabulary.groovy: -------------------------------------------------------------------------------- 1 | package klib 2 | 3 | import java.util.concurrent.* 4 | 5 | class Vocabulary implements Iterable