├── .gitignore ├── INTERNAL_INSTRUCTION.md ├── LICENSE.txt ├── README.md ├── input └── one-question.json ├── pom.xml └── src └── main ├── java └── edu │ └── cmu │ └── lti │ └── oaqa │ ├── baseqa │ ├── CasSerializer.java │ ├── abstract_query │ │ ├── BagOfTokenAbstractQueryGenerator.java │ │ ├── ConceptAbstractQueryGenerator.java │ │ ├── TokenConceptAbstractQueryGenerator.java │ │ └── TokenSelectionAbstractQueryGenerator.java │ ├── answer │ │ ├── CavUtil.java │ │ ├── collective_score │ │ │ └── scorers │ │ │ │ ├── DistanceCollectiveAnswerScorer.java │ │ │ │ ├── EditDistanceCollectiveAnswerScorer.java │ │ │ │ ├── OriginalCollectiveAnswerScorer.java │ │ │ │ ├── ShapeDistanceCollectiveAnswerScorer.java │ │ │ │ └── TypeCoercionCollectiveAnswerScorer.java │ │ ├── generate │ │ │ ├── CavGenerationManager.java │ │ │ └── generators │ │ │ │ ├── CavCoveringConceptCavGenerator.java │ │ │ │ ├── CavGenerator.java │ │ │ │ ├── ChoiceCavGenerator.java │ │ │ │ ├── ConceptCavGenerator.java │ │ │ │ ├── CoveringPhraseCavGenerator.java │ │ │ │ └── QuantityCavGenerator.java │ │ ├── modify │ │ │ ├── AnswerModificationManager.java │ │ │ ├── CavModificationManager.java │ │ │ └── modifiers │ │ │ │ ├── AnswerModifier.java │ │ │ │ ├── CavMerger.java │ │ │ │ ├── CavModifier.java │ │ │ │ └── ListAnswerPruner.java │ │ ├── score │ │ │ ├── AnswerCandidateProvider.java │ │ │ ├── SimpleAnswerScorer.java │ │ │ └── scorers │ │ │ │ ├── AnswerTypeAnswerScorer.java │ │ │ │ ├── AvgCoveredTokenCountAnswerScorer.java │ │ │ │ ├── CaoCountAnswerScorer.java │ │ │ │ ├── ConceptOverlappingCountAnswerScorer.java │ │ │ │ ├── ConceptProximityAnswerScorer.java │ │ │ │ ├── ConceptTypeAnswerScorer.java │ │ │ │ ├── FocusAnswerScorer.java │ │ │ │ ├── FocusOverlappingCountAnswerScorer.java │ │ │ │ ├── FocusProximityAnswerScorer.java │ │ │ │ ├── NameCountAnswerScorer.java │ │ │ │ ├── ParseAnswerScorer.java │ │ │ │ ├── ParseHeadProximityAnswerScorer.java │ │ │ │ ├── StopwordCountAnswerScorer.java │ │ │ │ ├── TokenOverlappingCountAnswerScorer.java │ │ │ │ ├── TokenProximityAnswerScorer.java │ │ │ │ └── TypeCoercionAnswerScorer.java │ │ └── yesno │ │ │ ├── AllYesYesNoAnswerPredictor.java │ │ │ ├── YesNoAnswerPredictor.java │ │ │ ├── YesNoAnswerTrainer.java │ │ │ └── scorers │ │ │ ├── AlternateAnswerYesNoScorer.java │ │ │ ├── ConceptOverlapYesNoScorer.java │ │ │ ├── ExpectedAnswerOverlapYesNoScorer.java │ │ │ ├── NegationYesNoScorer.java │ │ │ ├── SentimentYesNoScorer.java │ │ │ ├── TokenOverlapYesNoScorer.java │ │ │ └── YesNoScorer.java │ ├── answer_type │ │ ├── AnswerTypeCVPredictLoader.java │ │ ├── AnswerTypeClassifierPredictor.java │ │ ├── AnswerTypeClassifierTrainer.java │ │ ├── ConceptGSAnswerTypeLabeler.java │ │ ├── ConceptSearchGSAnswerTypeLabeler.java │ │ ├── GSAnswerTypeLabeler.java │ │ └── QuestionAnswerTypes.java │ ├── concept │ │ ├── rerank │ │ │ ├── ConceptSearchResultCandidateProvider.java │ │ │ ├── LuceneInMemoryConceptReranker.java │ │ │ ├── MatchingNameConceptReranker.java │ │ │ ├── ScoreSummationConceptReranker.java │ │ │ ├── WeightingSearchIdConceptReranker.java │ │ │ └── scorers │ │ │ │ ├── LuceneConceptScorer.java │ │ │ │ ├── MatchingNameConceptScorer.java │ │ │ │ └── OriginalScoreConceptScorer.java │ │ └── retrieval │ │ │ └── LuceneConceptRetrievalExecutor.java │ ├── document │ │ ├── rerank │ │ │ ├── DocumentCandidateProvider.java │ │ │ ├── LogRegDocumentReranker.java │ │ │ └── scorers │ │ │ │ ├── LuceneDocumentScorer.java │ │ │ │ └── OriginalScoreDocumentScorer.java │ │ └── retrieval │ │ │ └── LuceneDocumentRetrievalExecutor.java │ ├── evidence │ │ ├── PassageToViewCopier.java │ │ ├── concept │ │ │ ├── ConceptMerger.java │ │ │ ├── ConceptSearcher.java │ │ │ └── PassageConceptRecognizer.java │ │ └── parse │ │ │ └── PassageParser.java │ ├── learning_base │ │ ├── AbstractCandidateProvider.java │ │ ├── AbstractScorer.java │ │ ├── CVPredictLoader.java │ │ ├── CandidateProvider.java │ │ ├── ClassifierPredictor.java │ │ ├── ClassifierTrainer.java │ │ └── Scorer.java │ ├── passage │ │ ├── RetrievalUtil.java │ │ ├── rerank │ │ │ ├── PassageCandidateProvider.java │ │ │ └── scorers │ │ │ │ ├── LuceneInMemoryPassageScorer.java │ │ │ │ ├── MetaInfoPassageScorer.java │ │ │ │ └── OriginalScorePassageScorer.java │ │ └── retrieval │ │ │ ├── DocumentToPassageConverter.java │ │ │ ├── ImprovedLuceneInMemorySentenceRetrievalExecutor.java │ │ │ └── LuceneInMemorySentenceRetrievalExecutor.java │ ├── preprocess │ │ ├── ConceptCacheUtil.java │ │ ├── PassageConceptCache.java │ │ └── QuestionConceptCache.java │ ├── providers │ │ ├── kb │ │ │ ├── ConceptProvider.java │ │ │ ├── ConceptSearchProvider.java │ │ │ ├── FrequentPhraseConceptProvider.java │ │ │ ├── LingPipeNerConceptProvider.java │ │ │ ├── OpenNlpChunkerConceptProvider.java │ │ │ └── SynonymExpansionProvider.java │ │ ├── ml │ │ │ └── classifiers │ │ │ │ ├── ClassifierProvider.java │ │ │ │ ├── FeatureConstructorProvider.java │ │ │ │ ├── FeatureConstructorProviderImpl.java │ │ │ │ ├── LibLinearProvider.java │ │ │ │ ├── LibSvmProvider.java │ │ │ │ ├── MekaProvider.java │ │ │ │ └── WekaProvider.java │ │ ├── parser │ │ │ ├── ClearParserProvider.java │ │ │ ├── LingPipeParserProvider.java │ │ │ └── ParserProvider.java │ │ └── query │ │ │ ├── BagOfPhraseQueryStringConstructor.java │ │ │ ├── BooleanBagOfPhraseQueryStringConstructor.java │ │ │ ├── LuceneQueryStringConstructor.java │ │ │ └── QueryStringConstructor.java │ └── question │ │ ├── concept │ │ └── QuestionConceptRecognizer.java │ │ ├── focus │ │ └── QuestionFocusExtractor.java │ │ ├── parse │ │ ├── QuestionLemmaDedowncaserDenormalizer.java │ │ ├── QuestionParser.java │ │ └── QuestionRegExProperNounTagger.java │ │ └── yesno │ │ └── YesNoToFactoidQuestionConverter.java │ ├── bioasq │ ├── concept │ │ ├── rerank │ │ │ └── scorers │ │ │ │ └── GoPubMedConceptRetrievalScorer.java │ │ └── retrieval │ │ │ ├── GoPubMedConceptRetrievalExecutor.java │ │ │ └── GoPubMedSeparateConceptRetrievalExecutor.java │ ├── document │ │ └── retrieval │ │ │ ├── DiXuProprietaryDocumentRetrievalClient.java │ │ │ └── GoPubMedDocumentRetrievalExecutor.java │ ├── eval │ │ └── calculator │ │ │ ├── AnswerEvalCalculator.java │ │ │ └── AnswerEvalMeasure.java │ ├── passage │ │ └── PmcContentSetter.java │ ├── triple │ │ └── retrieval │ │ │ └── GoPubMedTripleRetrievalExecutor.java │ └── util │ │ └── BioASQUtil.java │ └── bioqa │ └── providers │ ├── kb │ ├── CachedMetaMapConceptProvider.java │ ├── CachedTmToolConceptProvider.java │ ├── CachedUtsConceptSearchProvider.java │ ├── CachedUtsSynonymExpansionProvider.java │ ├── MetaMapConceptConvertUtil.java │ ├── MetaMapConceptProvider.java │ ├── MetaMapObject.java │ ├── PubAnnotation.java │ ├── PubAnnotationConvertUtil.java │ ├── TmToolConceptProvider.java │ ├── TmToolConceptProviderException.java │ ├── UtsConceptSearchProvider.java │ └── UtsSynonymExpansionProvider.java │ └── query │ ├── PubMedQueryStringConstructor.java │ └── PubMedQueryStringConstructorExample.java ├── resources ├── baseqa │ ├── abstract_query │ │ ├── bag-of-token.yaml │ │ ├── concept-required.yaml │ │ ├── concept.yaml │ │ ├── token-concept.yaml │ │ ├── token-selection-pos-stoplist.yaml │ │ ├── token-selection-pos.yaml │ │ └── token-selection.yaml │ ├── answer │ │ ├── collective_score │ │ │ ├── classifier-predict.yaml │ │ │ ├── classifier-train.yaml │ │ │ └── scorers │ │ │ │ ├── distance.yaml │ │ │ │ ├── edit-distance.yaml │ │ │ │ ├── original.yaml │ │ │ │ ├── shape-distance.yaml │ │ │ │ └── type-coercion.yaml │ │ ├── generate │ │ │ ├── generate.yaml │ │ │ └── generators │ │ │ │ ├── cav-covering-concept.yaml │ │ │ │ ├── choice.yaml │ │ │ │ ├── concept.yaml │ │ │ │ ├── covering-phrase.yaml │ │ │ │ └── quantity.yaml │ │ ├── modify │ │ │ ├── modifiers │ │ │ │ ├── list-50.yaml │ │ │ │ ├── list-70.yaml │ │ │ │ ├── merge.yaml │ │ │ │ ├── ratio-72.yaml │ │ │ │ └── ratio-88.yaml │ │ │ ├── modify.yaml │ │ │ └── pruner.yaml │ │ ├── score │ │ │ ├── candidate-provider.yaml │ │ │ ├── classifier-predict.yaml │ │ │ ├── classifier-train.yaml │ │ │ ├── cv-load.yaml │ │ │ ├── scorers │ │ │ │ ├── answer-type.yaml │ │ │ │ ├── avg-covered-token-count.yaml │ │ │ │ ├── cao-count.yaml │ │ │ │ ├── concept-overlap-count.yaml │ │ │ │ ├── concept-proximity.yaml │ │ │ │ ├── concept-type.yaml │ │ │ │ ├── focus-overlap-count.yaml │ │ │ │ ├── focus-proximity.yaml │ │ │ │ ├── focus.yaml │ │ │ │ ├── name-count.yaml │ │ │ │ ├── parse.yaml │ │ │ │ ├── parsehead-proximity.yaml │ │ │ │ ├── stopword-count.yaml │ │ │ │ ├── token-overlap-count.yaml │ │ │ │ ├── token-proximity.yaml │ │ │ │ └── type-coercion.yaml │ │ │ └── simple.yaml │ │ └── yesno │ │ │ ├── all-yes.yaml │ │ │ ├── predict.yaml │ │ │ ├── scorers │ │ │ ├── alternate-answer.yaml │ │ │ ├── concept-overlap.yaml │ │ │ ├── expected-answer-overlap.yaml │ │ │ ├── negation.yaml │ │ │ ├── sentiment.yaml │ │ │ └── token-overlap.yaml │ │ │ └── train.yaml │ ├── answer_type │ │ ├── feature-constructor.yaml │ │ ├── gslabel-concept-search.yaml │ │ ├── gslabel-concept.yaml │ │ ├── load-cv.yaml │ │ ├── predict.yaml │ │ └── train.yaml │ ├── cas-serialize.yaml │ ├── concept │ │ ├── rerank │ │ │ ├── candidate-provider.yaml │ │ │ ├── classifier-predict.yaml │ │ │ ├── classifier-train.yaml │ │ │ ├── lucene.yaml │ │ │ ├── name-match.yaml │ │ │ ├── score-sum.yaml │ │ │ ├── scorers │ │ │ │ ├── lucene.yaml │ │ │ │ ├── name-match.yaml │ │ │ │ └── original-score.yaml │ │ │ └── searchid-weight.yaml │ │ └── retrieval │ │ │ └── lucene.yaml │ ├── document │ │ ├── rerank │ │ │ ├── candidate-provider.yaml │ │ │ ├── classifier-predict.yaml │ │ │ ├── classifier-train.yaml │ │ │ ├── cv-load.yaml │ │ │ ├── logreg.yaml │ │ │ └── scorers │ │ │ │ ├── lucene.yaml │ │ │ │ └── original-score.yaml │ │ └── retrieval │ │ │ └── lucene.yaml │ ├── evidence │ │ ├── concept │ │ │ ├── frequent-phrase.yaml │ │ │ ├── merge.yaml │ │ │ ├── opennlp-np.yaml │ │ │ ├── opennlp-npppnp.yaml │ │ │ └── search.yaml │ │ ├── parse │ │ │ ├── clearnlp-general.yaml │ │ │ ├── lingpipe-indoeuro-brown.yaml │ │ │ └── lingpipe-regex-brown.yaml │ │ └── passage-to-view.yaml │ ├── learning_base │ │ ├── classifier-predict.yaml │ │ ├── classifier-train.yaml │ │ └── cv-load.yaml │ ├── passage │ │ ├── rerank │ │ │ ├── candidate-provider.yaml │ │ │ ├── classifier-predict.yaml │ │ │ ├── classifier-train.yaml │ │ │ └── scorers │ │ │ │ ├── lucene.yaml │ │ │ │ ├── meta-info.yaml │ │ │ │ └── original-score.yaml │ │ └── retrieval │ │ │ ├── document-to-passage.yaml │ │ │ ├── lucene-sentence-improve.yaml │ │ │ └── lucene-sentence.yaml │ ├── preprocess │ │ ├── passage-concept-cache.yaml │ │ └── question-concept-cache.yaml │ ├── providers │ │ ├── kb │ │ │ ├── frequent-phrase.yaml │ │ │ ├── opennlp-np.yaml │ │ │ ├── opennlp-npppnp.yaml │ │ │ └── opennlp.yaml │ │ ├── ml │ │ │ └── classifiers │ │ │ │ ├── feature-constructor.yaml │ │ │ │ ├── liblinear.yaml │ │ │ │ ├── libsvm.yaml │ │ │ │ ├── meka-pcc.yaml │ │ │ │ ├── meka.yaml │ │ │ │ ├── weka-cvr.yaml │ │ │ │ ├── weka-knn.yaml │ │ │ │ ├── weka-logistic.yaml │ │ │ │ ├── weka-reptree.yaml │ │ │ │ └── weka.yaml │ │ └── parser │ │ │ ├── clearnlp-general.yaml │ │ │ ├── lingpipe-indoeuro-brown.yaml │ │ │ └── lingpipe-regex-brown.yaml │ └── question │ │ ├── concept │ │ ├── opennlp-np.yaml │ │ └── opennlp-npppnp.yaml │ │ ├── focus.yaml │ │ ├── parse │ │ ├── clearnlp-general.yaml │ │ ├── lingpipe-indoeuro-brown.yaml │ │ └── lingpipe-regex-brown.yaml │ │ └── yesno │ │ └── yesno-to-factoid.yaml ├── bioasq │ ├── collection │ │ └── json │ │ │ └── json-cas-consumer.yaml │ ├── concept │ │ ├── rerank │ │ │ ├── classifier-predict.yaml │ │ │ ├── classifier-train.yaml │ │ │ ├── liblinear-predict.yaml │ │ │ ├── liblinear-train.yaml │ │ │ ├── liblinear.yaml │ │ │ ├── ontology-logreg-params.txt │ │ │ ├── scorers │ │ │ │ └── gopubmed.yaml │ │ │ └── searchid-weight.yaml │ │ └── retrieval │ │ │ ├── gopubmed-separate.yaml │ │ │ └── gopubmed.yaml │ ├── document │ │ └── retrieval │ │ │ ├── dixu-proprietary.yaml │ │ │ └── gopubmed.yaml │ ├── eval │ │ └── calculator │ │ │ └── answer-eval-calculator.yaml │ ├── gs │ │ └── bioasq-qa-gs-decorator.yaml │ ├── passage │ │ ├── pmc-content.yaml.template │ │ └── retrieval │ │ │ └── document-to-passage.yaml │ ├── phase-a-evaluate.yaml │ ├── phase-a-test.yaml │ ├── phase-a-train-concept-document.yaml │ ├── phase-a-train-snippet.yaml │ ├── phase-b-evaluate-factoid-list.yaml │ ├── phase-b-evaluate-yesno.yaml │ ├── phase-b-test-factoid-list.yaml │ ├── phase-b-test-yesno.yaml │ ├── phase-b-train-answer-collective-score.yaml │ ├── phase-b-train-answer-score.yaml │ ├── phase-b-train-answer-type.yaml │ ├── phase-b-train-yesno.yaml │ ├── preprocess-answer-type-gslabel.yaml │ ├── preprocess-kb-cache.yaml │ ├── triple │ │ └── retrieval │ │ │ └── gopubmed.yaml │ └── yesno-evidence-alternate.yaml ├── bioqa │ ├── abstract_query │ │ └── token-concept.yaml │ ├── answer │ │ ├── cao │ │ │ ├── predict-crf.yaml │ │ │ ├── predict-hmm.yaml │ │ │ ├── train-crf.yaml │ │ │ ├── train-hmm.yaml │ │ │ ├── transducer-crf.yaml │ │ │ └── transducer-hmm.yaml │ │ ├── collective_score │ │ │ ├── liblinear-predict.yaml │ │ │ ├── liblinear-train.yaml │ │ │ ├── liblinear.yaml │ │ │ ├── weka-cvr-predict.yaml │ │ │ ├── weka-cvr-train.yaml │ │ │ └── weka-cvr.yaml │ │ ├── generate │ │ │ ├── generate.yaml │ │ │ └── generators │ │ │ │ └── concept.yaml │ │ ├── score │ │ │ ├── classifier-predict.yaml │ │ │ ├── classifier-train.yaml │ │ │ ├── cv-load-liblinear.yaml │ │ │ ├── liblinear-predict.yaml │ │ │ ├── liblinear-train.yaml │ │ │ ├── liblinear.yaml │ │ │ ├── scorers │ │ │ │ ├── concept-proximity.yaml │ │ │ │ ├── parsehead-proximity.yaml │ │ │ │ ├── stopword-count.yaml │ │ │ │ └── token-proximity.yaml │ │ │ ├── weka-cvr-predict.yaml │ │ │ ├── weka-cvr-train.yaml │ │ │ └── weka-cvr.yaml │ │ └── yesno │ │ │ ├── liblinear-predict.yaml │ │ │ ├── liblinear-train.yaml │ │ │ ├── liblinear.yaml │ │ │ ├── predict.yaml │ │ │ ├── scorers │ │ │ ├── alternate-answer.yaml │ │ │ └── token-overlap.yaml │ │ │ ├── train.yaml │ │ │ ├── weka-cvr-predict.yaml │ │ │ ├── weka-cvr-train.yaml │ │ │ ├── weka-cvr.yaml │ │ │ ├── weka-logistic-predict.yaml │ │ │ ├── weka-logistic-train.yaml │ │ │ └── weka-logistic.yaml │ ├── answer_type │ │ ├── gslabel-tmtool.yaml │ │ ├── gslabel-uts.yaml │ │ ├── liblinear-null-predict.yaml │ │ ├── liblinear-null-train.yaml │ │ ├── liblinear-null.yaml │ │ ├── liblinear-predict.yaml │ │ ├── liblinear-train.yaml │ │ ├── liblinear.yaml │ │ ├── load-cv-liblinear-null.yaml │ │ ├── load-cv-liblinear.yaml │ │ ├── meka-pcc-null-predict.yaml │ │ ├── meka-pcc-null-train.yaml │ │ ├── meka-pcc-null.yaml │ │ ├── meka-pcc-predict.yaml │ │ ├── meka-pcc-train.yaml │ │ ├── meka-pcc.yaml │ │ ├── weka-knn-null-predict.yaml │ │ ├── weka-knn-null-train.yaml │ │ ├── weka-knn-null.yaml │ │ ├── weka-knn-predict.yaml │ │ ├── weka-knn-train.yaml │ │ ├── weka-knn.yaml │ │ ├── weka-reptree-null-predict.yaml │ │ ├── weka-reptree-null-train.yaml │ │ ├── weka-reptree-null.yaml │ │ ├── weka-reptree-predict.yaml │ │ ├── weka-reptree-train.yaml │ │ └── weka-reptree.yaml │ ├── concept │ │ ├── .gitignore │ │ ├── rerank │ │ │ ├── classifier-predict.yaml │ │ │ ├── classifier-train.yaml │ │ │ ├── liblinear-predict.yaml │ │ │ ├── liblinear-train.yaml │ │ │ ├── liblinear.yaml │ │ │ ├── scorers │ │ │ │ └── lucene-bioconcept.yaml.template │ │ │ ├── weka-logistic-predict.yaml │ │ │ ├── weka-logistic-train.yaml │ │ │ └── weka-logistic.yaml │ │ └── retrieval │ │ │ └── lucene-bioconcept.yaml.template │ ├── document │ │ ├── .gitignore │ │ ├── rerank │ │ │ ├── classifier-predict.yaml │ │ │ ├── classifier-train.yaml │ │ │ ├── cv-load-liblinear.yaml │ │ │ ├── liblinear-predict.yaml │ │ │ ├── liblinear-train.yaml │ │ │ ├── liblinear.yaml │ │ │ ├── logreg-params.txt │ │ │ ├── logreg-pubmed.yaml │ │ │ ├── scorers │ │ │ │ └── lucene-medline.yaml.template │ │ │ ├── weka-logistic-predict.yaml │ │ │ ├── weka-logistic-train.yaml │ │ │ └── weka-logistic.yaml │ │ └── retrieval │ │ │ └── lucene-medline.yaml.template │ ├── evidence │ │ ├── concept │ │ │ ├── lingpipe-genetag.yaml │ │ │ ├── lingpipe-genia.yaml │ │ │ ├── metamap-cached.yaml │ │ │ ├── search-uts-cached.yaml │ │ │ ├── search-uts.yaml │ │ │ ├── tmtool-cached.yaml │ │ │ └── tmtool.yaml │ │ └── parse │ │ │ ├── clearnlp-bioinformatics.yaml │ │ │ ├── clearnlp-medical.yaml │ │ │ ├── lingpipe-indoeuro-genia.yaml │ │ │ ├── lingpipe-indoeuro-medpost.yaml │ │ │ ├── lingpipe-regex-genia.yaml │ │ │ └── lingpipe-regex-medpost.yaml │ ├── passage │ │ ├── rerank │ │ │ ├── classifier-predict.yaml │ │ │ ├── classifier-train.yaml │ │ │ ├── liblinear-predict.yaml │ │ │ ├── liblinear-train.yaml │ │ │ ├── liblinear.yaml │ │ │ └── scorers │ │ │ │ └── lucene.yaml │ │ └── retrieval │ │ │ ├── lucene-sentence-medline-improve.yaml │ │ │ └── lucene-sentence-medline.yaml │ ├── preprocess │ │ ├── passage-kb-cache.yaml │ │ └── question-kb-cache.yaml │ ├── providers │ │ ├── kb │ │ │ ├── .gitignore │ │ │ ├── concept-search-uts-cached.yaml.template │ │ │ ├── concept-search-uts.yaml.template │ │ │ ├── lingpipe-genetag.yaml │ │ │ ├── lingpipe-genia.yaml │ │ │ ├── metamap-cached.yaml.template │ │ │ ├── metamap.yaml.template │ │ │ ├── synonym-uts-cached.yaml.template │ │ │ ├── synonym-uts.yaml.template │ │ │ ├── tmtool-cached.yaml │ │ │ └── tmtool.yaml │ │ └── parser │ │ │ ├── clearnlp-bioinformatics.yaml │ │ │ ├── clearnlp-medical.yaml │ │ │ ├── lingpipe-indoeuro-genia.yaml │ │ │ ├── lingpipe-indoeuro-medpost.yaml │ │ │ ├── lingpipe-regex-genia.yaml │ │ │ └── lingpipe-regex-medpost.yaml │ └── question │ │ ├── concept │ │ ├── lingpipe-genetag.yaml │ │ ├── lingpipe-genia.yaml │ │ ├── metamap-cached.yaml │ │ ├── metamap.yaml │ │ ├── tmtool-cached.yaml │ │ └── tmtool.yaml │ │ └── parse │ │ ├── clearnlp-bioinformatics.yaml │ │ ├── clearnlp-medical.yaml │ │ ├── lingpipe-indoeuro-genia.yaml │ │ ├── lingpipe-indoeuro-medpost.yaml │ │ ├── lingpipe-regex-genia.yaml │ │ └── lingpipe-regex-medpost.yaml ├── dictionaries │ ├── allowed-umls-types.txt │ ├── bioconcept-uri-prefix.tsv │ ├── negation-cues.txt │ ├── negative-words.txt │ ├── noun-tags.txt │ ├── pos-tags.txt │ ├── positive-words.txt │ ├── quantity-question-words.txt │ ├── stoplist-gene-modified.txt │ ├── stoplist-gene-top5k.txt │ ├── stoplist-gene.txt │ ├── stoplist-top5k.tsv │ ├── stoplist-top5k.txt │ └── stoplist.txt ├── log4j.properties ├── models │ ├── bioasq │ │ └── concept │ │ │ └── rerank │ │ │ ├── 4b-dev-liblinear.cv │ │ │ ├── 4b-dev-liblinear.findex │ │ │ ├── 4b-dev-liblinear.lindex │ │ │ └── 4b-dev-liblinear.model │ └── bioqa │ │ ├── answer │ │ ├── collective_score │ │ │ ├── 4b-dev-liblinear.findex │ │ │ ├── 4b-dev-liblinear.lindex │ │ │ ├── 4b-dev-liblinear.model │ │ │ ├── 4b-dev-weka-cvr.dataset-schema │ │ │ └── 4b-dev-weka-cvr.model │ │ ├── score │ │ │ ├── 4b-dev-liblinear.findex │ │ │ ├── 4b-dev-liblinear.lindex │ │ │ ├── 4b-dev-liblinear.model │ │ │ ├── 4b-dev-weka-cvr.dataset-schema │ │ │ └── 4b-dev-weka-cvr.model │ │ └── yesno │ │ │ ├── 4b-dev-liblinear.findex │ │ │ ├── 4b-dev-liblinear.lindex │ │ │ ├── 4b-dev-liblinear.model │ │ │ ├── 4b-dev-weka-cvr.dataset-schema │ │ │ ├── 4b-dev-weka-cvr.model │ │ │ ├── 4b-dev-weka-logistic.dataset-schema │ │ │ ├── 4b-dev-weka-logistic.model │ │ │ ├── 4b-dev-weka-other.dataset-schema │ │ │ ├── 4b-dev-weka-other.model │ │ │ └── 4b-dev-weka.arff │ │ ├── answer_type │ │ ├── 4b-dev-liblinear-null.findex │ │ ├── 4b-dev-liblinear-null.lindex │ │ ├── 4b-dev-liblinear-null.model │ │ ├── 4b-dev-liblinear.findex │ │ ├── 4b-dev-liblinear.lindex │ │ └── 4b-dev-liblinear.model │ │ ├── concept │ │ └── rerank │ │ │ ├── 4b-dev-liblinear.findex │ │ │ ├── 4b-dev-liblinear.lindex │ │ │ ├── 4b-dev-liblinear.model │ │ │ ├── 4b-dev-weka-logistic.dataset-schema │ │ │ ├── 4b-dev-weka-logistic.model │ │ │ └── 4b-dev-weka.arff │ │ ├── document │ │ └── rerank │ │ │ ├── 4b-dev-liblinear.findex │ │ │ ├── 4b-dev-liblinear.lindex │ │ │ ├── 4b-dev-liblinear.model │ │ │ ├── 4b-dev-weka-logistic.dataset-schema │ │ │ ├── 4b-dev-weka-logistic.model │ │ │ └── 4b-dev-weka.arff │ │ └── passage │ │ └── rerank │ │ ├── 4b-dev-liblinear.findex │ │ ├── 4b-dev-liblinear.lindex │ │ └── 4b-dev-liblinear.model └── properties │ ├── direct-gopubmed-concept.properties │ ├── direct-gopubmed-concept.properties.old │ ├── direct-gopubmed-document.properties │ ├── direct-gopubmed-document.properties.old │ └── direct-gopubmed-triple.properties └── script └── bioasq-dev-fixer.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | 3 | # Package Files # 4 | *.jar 5 | *.war 6 | *.ear 7 | /target/ 8 | /persistence/ 9 | /result/ 10 | /input/ 11 | /src/main/resources/*-cache/ -------------------------------------------------------------------------------- /INTERNAL_INSTRUCTION.md: -------------------------------------------------------------------------------- 1 | Internal Resource Preparation Instruction for OAQA Biomedical Question Answering (BioASQ) System 2 | ================================================================================================ 3 | 4 | _Note: If you are not a CMU OAQA person, please refer to the general [README](README.md) for preparing the resource._ 5 | 6 | 1. You need to contact [Zi Yang](http://www.cs.cmu.edu/~ziy/) to obtain the UMLS account, if you don't have one nor plan to register one, and our local copies of the resources. Uncompress the `.tgz` file (15G). 7 | * `bioasq-internal-resources/index` directory has two Lucene indexes 8 | * `bioasq-internal-resources/index/medline16n-lucene/` is for the Medline corpus 9 | * `bioasq-internal-resources/index/bioconcept-lucene/` is for the biomedical ontology dumps 10 | * `bioasq-internal-resources/input` directory contains the test files and the original `4b-dev.json` development set 11 | * `bioasq-internal-resources/medline16n.db3` is the sqlite database that has the `pmid2abstract` table 12 | 1. You need to generate the `4b-dev.json.auto.fulltext` file using `4b-dev.json` and `medline16n.db3` 13 | 1. Install the Python [`editdistance`](https://pypi.python.org/pypi/editdistance) package. 14 | 1. Download the python script [`bioasq-dev-fixer.py`](src/main/script/bioasq-dev-fixer.py) 15 | 1. Fix the formatting errors in the development file. 16 | ``` 17 | python bioasq-dev-fixer.py path_to_4b-dev.json path_to_medline16n.db3 4b-dev.json.auto.fulltext 18 | ``` 19 | 1. The resulting file should have a md5 of `db72a8fe3f1b3d605b9c39efdd21249d`. 20 | 1. Now you can continue on to the `Install` section in the [README](README.md). 21 | -------------------------------------------------------------------------------- /input/one-question.json: -------------------------------------------------------------------------------- 1 | { 2 | "questions": [ 3 | { 4 | "body": "What is the role of MMP-1 in breast cancer?", 5 | "type": "factoid", 6 | "id": "0" 7 | } 8 | ] 9 | } -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/CasSerializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa; 16 | 17 | import java.io.File; 18 | import java.io.IOException; 19 | 20 | import org.apache.uima.UIMAException; 21 | import org.apache.uima.UimaContext; 22 | import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; 23 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 24 | import org.apache.uima.fit.factory.JCasFactory; 25 | import org.apache.uima.fit.util.CasIOUtil; 26 | import org.apache.uima.jcas.JCas; 27 | import org.apache.uima.resource.ResourceInitializationException; 28 | import org.apache.uima.util.CasCopier; 29 | 30 | import com.google.common.base.Strings; 31 | 32 | import edu.cmu.lti.oaqa.baseqa.util.UimaContextHelper; 33 | import edu.cmu.lti.oaqa.ecd.phase.ProcessingStepUtils; 34 | 35 | /** 36 | * This utility {@link JCasAnnotator_ImplBase} serializes the {@link JCas} into a xmi 37 | * file, whose directory can be specified in the descriptor via the parameter dir. 38 | * 39 | * @author Zi Yang created on 4/21/15 40 | */ 41 | public class CasSerializer extends JCasAnnotator_ImplBase { 42 | 43 | private String typesystem; 44 | 45 | private String dir; 46 | 47 | @Override 48 | public void initialize(UimaContext context) throws ResourceInitializationException { 49 | super.initialize(context); 50 | typesystem = UimaContextHelper.getConfigParameterStringValue(context, "typesystem"); 51 | dir = UimaContextHelper.getConfigParameterStringValue(context, "dir"); 52 | } 53 | 54 | @Override 55 | public void process(JCas jcas) throws AnalysisEngineProcessException { 56 | try { 57 | JCas copied = JCasFactory.createJCas(typesystem); 58 | CasCopier.copyCas(jcas.getCas(), copied.getCas(), true, true); 59 | String id = Strings.padStart(ProcessingStepUtils.getSequenceId(jcas), 4, '0'); 60 | CasIOUtil.writeXmi(copied, new File(dir, id + ".xmi")); 61 | } catch (IOException | UIMAException e) { 62 | e.printStackTrace(); 63 | } 64 | } 65 | 66 | } 67 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/abstract_query/BagOfTokenAbstractQueryGenerator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.abstract_query; 16 | 17 | import edu.cmu.lti.oaqa.type.nlp.Token; 18 | import edu.cmu.lti.oaqa.type.retrieval.AtomicQueryConcept; 19 | import edu.cmu.lti.oaqa.util.TypeConstants; 20 | import edu.cmu.lti.oaqa.util.TypeFactory; 21 | import edu.cmu.lti.oaqa.util.TypeUtil; 22 | import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; 23 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 24 | import org.apache.uima.jcas.JCas; 25 | 26 | import java.util.List; 27 | import java.util.Objects; 28 | 29 | import static java.util.stream.Collectors.toList; 30 | 31 | /** 32 | * Create an {@link edu.cmu.lti.oaqa.type.retrieval.AbstractQuery} by grouping only the {@link Token}s. 33 | * 34 | * @see TokenSelectionAbstractQueryGenerator 35 | * @author Zi Yang created on 11/4/14 36 | */ 37 | public class BagOfTokenAbstractQueryGenerator extends JCasAnnotator_ImplBase { 38 | 39 | private static final TypeConstants.ConceptType CONCEPT_TYPE = TypeConstants.ConceptType.KEYWORD_TYPE; 40 | 41 | @Override 42 | public void process(JCas jcas) throws AnalysisEngineProcessException { 43 | List tokens = TypeUtil.getOrderedTokens(jcas); 44 | List qconcepts = tokens.stream().map(token -> { 45 | String originalText = token.getCoveredText(); 46 | String text = Objects.toString(token.getLemmaForm(), originalText); 47 | return TypeFactory.createAtomicQueryConcept(jcas, CONCEPT_TYPE, text, originalText); 48 | } ).collect(toList()); 49 | TypeFactory.createAbstractQuery(jcas, qconcepts).addToIndexes(); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/answer/collective_score/scorers/OriginalCollectiveAnswerScorer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.answer.collective_score.scorers; 16 | 17 | import com.google.common.collect.ImmutableMap; 18 | import edu.cmu.lti.oaqa.baseqa.learning_base.AbstractScorer; 19 | import edu.cmu.lti.oaqa.type.answer.Answer; 20 | import edu.cmu.lti.oaqa.util.TypeUtil; 21 | import org.apache.uima.jcas.JCas; 22 | 23 | import java.util.List; 24 | import java.util.Map; 25 | import java.util.stream.IntStream; 26 | 27 | import static java.util.stream.Collectors.toMap; 28 | 29 | /** 30 | * A collective answer scorer that copies the original answer score from the individual answer 31 | * scoring process. 32 | * 33 | * @author Zi Yang created on 5/15/15 34 | */ 35 | public class OriginalCollectiveAnswerScorer extends AbstractScorer { 36 | 37 | private Map answer2irank; 38 | 39 | @Override 40 | public void prepare(JCas jcas) { 41 | List answers = TypeUtil.getRankedAnswers(jcas); 42 | answer2irank = IntStream.range(0, answers.size()).boxed() 43 | .collect(toMap(answers::get, i -> 1.0 / (1.0 + i))); 44 | } 45 | 46 | @Override 47 | public Map score(JCas jcas, Answer answer) { 48 | return ImmutableMap. builder().put("orig-score", answer.getScore()) 49 | .put("orig-rank", answer2irank.getOrDefault(answer, 0.0)).build(); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/answer/generate/generators/CavGenerator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.answer.generate.generators; 16 | 17 | import java.util.List; 18 | 19 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 20 | import org.apache.uima.jcas.JCas; 21 | import org.apache.uima.resource.Resource; 22 | 23 | import edu.cmu.lti.oaqa.type.answer.CandidateAnswerVariant; 24 | 25 | /** 26 | * An interface that defines a {@link CandidateAnswerVariant} provider, generating a list of 27 | * {@link CandidateAnswerVariant} from the input {@link JCas}. Additionally, one can specify in 28 | * which cases this provider is called using the {@link #accept(JCas)} method if this is designed 29 | * to be used by some types of questions. Each {@link CavGenerator} instance should be integrated 30 | * into the {@link edu.cmu.lti.oaqa.baseqa.answer.generate.CavGenerationManager}. 31 | * 32 | * @author Zi Yang created on 4/15/15 33 | */ 34 | public interface CavGenerator extends Resource { 35 | 36 | boolean accept(JCas jcas) throws AnalysisEngineProcessException; 37 | 38 | List generate(JCas jcas) throws AnalysisEngineProcessException; 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/answer/modify/modifiers/AnswerModifier.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.answer.modify.modifiers; 16 | 17 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 18 | import org.apache.uima.jcas.JCas; 19 | import org.apache.uima.resource.Resource; 20 | 21 | /** 22 | * An interface that modifies existing {@link edu.cmu.lti.oaqa.type.answer.Answer} for certain types 23 | * of inputs, pluggable into {@link edu.cmu.lti.oaqa.baseqa.answer.modify.AnswerModificationManager} 24 | * to get executed. 25 | * 26 | * @see edu.cmu.lti.oaqa.baseqa.answer.modify.AnswerModificationManager 27 | * @see CavModifier 28 | * 29 | * @author Zi Yang created on 5/1/15 30 | */ 31 | public interface AnswerModifier extends Resource { 32 | 33 | boolean accept(JCas jcas) throws AnalysisEngineProcessException; 34 | 35 | void modify(JCas jcas) throws AnalysisEngineProcessException; 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/answer/modify/modifiers/CavModifier.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.answer.modify.modifiers; 16 | 17 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 18 | import org.apache.uima.jcas.JCas; 19 | import org.apache.uima.resource.Resource; 20 | 21 | /** 22 | * An interface that modifies existing {@link edu.cmu.lti.oaqa.type.answer.CandidateAnswerVariant} 23 | * for certain types of inputs, pluggable into 24 | * {@link edu.cmu.lti.oaqa.baseqa.answer.modify.CavModificationManager} to get executed. 25 | * 26 | * @see edu.cmu.lti.oaqa.baseqa.answer.modify.CavModificationManager 27 | * @see AnswerModifier 28 | * 29 | * @author Zi Yang created on 4/15/15 30 | */ 31 | public interface CavModifier extends Resource { 32 | 33 | boolean accept(JCas jcas) throws AnalysisEngineProcessException; 34 | 35 | void modify(JCas jcas) throws AnalysisEngineProcessException; 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/answer/score/scorers/AnswerTypeAnswerScorer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.answer.score.scorers; 16 | 17 | import com.google.common.collect.ImmutableMap; 18 | import edu.cmu.lti.oaqa.baseqa.learning_base.AbstractScorer; 19 | import edu.cmu.lti.oaqa.type.answer.Answer; 20 | import edu.cmu.lti.oaqa.type.input.Question; 21 | import edu.cmu.lti.oaqa.type.nlp.LexicalAnswerType; 22 | import edu.cmu.lti.oaqa.util.TypeUtil; 23 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 24 | import org.apache.uima.jcas.JCas; 25 | 26 | import java.util.Collection; 27 | import java.util.Map; 28 | 29 | /** 30 | * An instance of an {@link AbstractScorer} for {@link Answer}s that uses nominal raw 31 | * {@link edu.cmu.lti.oaqa.type.answer.AnswerType} texts as features, and generates a score of 1.0 32 | * for this particular feature, and 0.0 otherwise. 33 | * 34 | * @see TypeCoercionAnswerScorer 35 | * 36 | * @author Zi Yang created on 4/25/16 37 | */ 38 | public class AnswerTypeAnswerScorer extends AbstractScorer { 39 | 40 | private Map feat2value; 41 | 42 | @Override 43 | public void prepare(JCas jcas) throws AnalysisEngineProcessException { 44 | Collection answerTypes = TypeUtil.getLexicalAnswerTypes(jcas); 45 | ImmutableMap.Builder builder = ImmutableMap.builder(); 46 | for (LexicalAnswerType answerType : answerTypes) { 47 | builder.put("at-" + answerType.getLabel(), 1.0); 48 | } 49 | Question question = TypeUtil.getQuestion(jcas); 50 | builder.put("qt-" + question.getQuestionType(), 1.0); 51 | feat2value = builder.build(); 52 | } 53 | 54 | @Override 55 | public Map score(JCas jcas, Answer answer) { 56 | return feat2value; 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/answer/score/scorers/AvgCoveredTokenCountAnswerScorer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.answer.score.scorers; 16 | 17 | import com.google.common.collect.ImmutableMap; 18 | import edu.cmu.lti.oaqa.baseqa.learning_base.AbstractScorer; 19 | import edu.cmu.lti.oaqa.type.answer.Answer; 20 | import edu.cmu.lti.oaqa.type.answer.CandidateAnswerOccurrence; 21 | import edu.cmu.lti.oaqa.type.nlp.Token; 22 | import edu.cmu.lti.oaqa.util.TypeUtil; 23 | import org.apache.uima.fit.util.JCasUtil; 24 | import org.apache.uima.jcas.JCas; 25 | 26 | import java.util.Collection; 27 | import java.util.Map; 28 | import java.util.Set; 29 | 30 | import static java.util.stream.Collectors.toSet; 31 | 32 | /** 33 | * An instance of an {@link AbstractScorer} for {@link Answer}s that calculates the average length 34 | * of {@link CandidateAnswerOccurrence}s. 35 | * 36 | * @author Zi Yang created on 4/17/15 37 | */ 38 | public class AvgCoveredTokenCountAnswerScorer extends AbstractScorer { 39 | 40 | @Override 41 | public Map score(JCas jcas, Answer answer) { 42 | Set caos = TypeUtil.getCandidateAnswerVariants(answer).stream() 43 | .map(TypeUtil::getCandidateAnswerOccurrences).flatMap(Collection::stream) 44 | .collect(toSet()); 45 | double count = caos.stream().mapToInt(cao -> JCasUtil.selectCovered(Token.class, cao).size()) 46 | .average().orElse(0); 47 | return ImmutableMap.of("avg-covered-token-count", count); 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/answer/score/scorers/CaoCountAnswerScorer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.answer.score.scorers; 16 | 17 | import com.google.common.collect.ImmutableMap; 18 | import edu.cmu.lti.oaqa.baseqa.learning_base.AbstractScorer; 19 | import edu.cmu.lti.oaqa.type.answer.Answer; 20 | import edu.cmu.lti.oaqa.type.answer.CandidateAnswerOccurrence; 21 | import edu.cmu.lti.oaqa.type.nlp.Token; 22 | import edu.cmu.lti.oaqa.util.TypeUtil; 23 | import org.apache.uima.fit.util.JCasUtil; 24 | import org.apache.uima.jcas.JCas; 25 | 26 | import java.util.Collection; 27 | import java.util.Map; 28 | import java.util.Set; 29 | 30 | import static java.util.stream.Collectors.toSet; 31 | 32 | /** 33 | * An instance of an {@link AbstractScorer} for {@link Answer}s that calculates the number of 34 | * {@link CandidateAnswerOccurrence}s and the total number of tokens of all the 35 | * {@link CandidateAnswerOccurrence}s. 36 | * 37 | * @author Zi Yang created on 4/17/15 38 | */ 39 | public class CaoCountAnswerScorer extends AbstractScorer { 40 | 41 | @Override 42 | public Map score(JCas jcas, Answer answer) { 43 | Set caos = TypeUtil.getCandidateAnswerVariants(answer).stream() 44 | .map(TypeUtil::getCandidateAnswerOccurrences).flatMap(Collection::stream) 45 | .collect(toSet()); 46 | double count = caos.size(); 47 | double tokenCount = caos.stream() 48 | .mapToInt(cao -> JCasUtil.selectCovered(Token.class, cao).size()).sum(); 49 | return ImmutableMap.of("cao-count", count, "token-count", tokenCount); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/answer/score/scorers/FocusAnswerScorer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.answer.score.scorers; 16 | 17 | import com.google.common.collect.ImmutableMap; 18 | import edu.cmu.lti.oaqa.baseqa.learning_base.AbstractScorer; 19 | import edu.cmu.lti.oaqa.type.answer.Answer; 20 | import edu.cmu.lti.oaqa.type.nlp.Focus; 21 | import edu.cmu.lti.oaqa.util.TypeUtil; 22 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 23 | import org.apache.uima.jcas.JCas; 24 | 25 | import java.util.Map; 26 | 27 | /** 28 | * An instance of an {@link AbstractScorer} for {@link Answer}s that uses raw 29 | * {@link Focus} text as features, and generates a score of 1.0 for this particular feature, and 0.0 30 | * otherwise. 31 | * 32 | * @see FocusOverlappingCountAnswerScorer 33 | * @see FocusProximityAnswerScorer 34 | * 35 | * @author Zi Yang created on 4/25/16 36 | */ 37 | public class FocusAnswerScorer extends AbstractScorer { 38 | 39 | private Map feat2value; 40 | 41 | @Override 42 | public void prepare(JCas jcas) throws AnalysisEngineProcessException { 43 | Focus focus = TypeUtil.getFocus(jcas); 44 | if (focus == null) { 45 | feat2value = ImmutableMap.of(); 46 | return; 47 | } 48 | feat2value = ImmutableMap.of("focus-token-" + focus.getToken().getCoveredText(), 1.0, 49 | "focus-label-" + focus.getLabel(), 1.0); 50 | } 51 | 52 | @Override 53 | public Map score(JCas jcas, Answer answer) { 54 | return feat2value; 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/answer/score/scorers/NameCountAnswerScorer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.answer.score.scorers; 16 | 17 | import com.google.common.collect.ImmutableMap; 18 | import edu.cmu.lti.oaqa.baseqa.learning_base.AbstractScorer; 19 | import edu.cmu.lti.oaqa.type.answer.Answer; 20 | import edu.cmu.lti.oaqa.util.TypeUtil; 21 | import org.apache.uima.jcas.JCas; 22 | 23 | import java.util.Map; 24 | 25 | /** 26 | * An instance of an {@link AbstractScorer} for {@link Answer}s that counts the total number of 27 | * variant names. 28 | * 29 | * @author Zi Yang created on 4/17/15 30 | */ 31 | public class NameCountAnswerScorer extends AbstractScorer { 32 | 33 | @Override 34 | public Map score(JCas jcas, Answer answer) { 35 | int value = TypeUtil.getCandidateAnswerVariantNames(answer).size(); 36 | return ImmutableMap.of("name-count", (double) value); 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/answer/yesno/AllYesYesNoAnswerPredictor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.answer.yesno; 16 | 17 | import edu.cmu.lti.oaqa.util.TypeFactory; 18 | import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; 19 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 20 | import org.apache.uima.jcas.JCas; 21 | 22 | import java.util.Collections; 23 | 24 | /** 25 | * This dummy YESNO answerer always creates an answer of "yes" regardless of the question. 26 | * 27 | * @author Zi Yang created on 5/5/16 28 | */ 29 | public class AllYesYesNoAnswerPredictor extends JCasAnnotator_ImplBase { 30 | 31 | @Override 32 | public void process(JCas jcas) throws AnalysisEngineProcessException { 33 | TypeFactory.createAnswer(jcas, Collections.singletonList("yes")).addToIndexes(); 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/concept/rerank/scorers/OriginalScoreConceptScorer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.concept.rerank.scorers; 16 | 17 | import com.google.common.collect.ImmutableMap; 18 | import edu.cmu.lti.oaqa.baseqa.learning_base.AbstractScorer; 19 | import edu.cmu.lti.oaqa.type.retrieval.ConceptSearchResult; 20 | import org.apache.uima.jcas.JCas; 21 | 22 | import java.util.Map; 23 | 24 | /** 25 | * An instance of an {@link AbstractScorer} for {@link ConceptSearchResult}s that simply copies 26 | * the original score from the {@link ConceptSearchResult}. 27 | * 28 | * @see edu.cmu.lti.oaqa.baseqa.concept.retrieval.LuceneConceptRetrievalExecutor 29 | * 30 | * @author Zi Yang created on 4/6/16 31 | */ 32 | public class OriginalScoreConceptScorer extends AbstractScorer { 33 | 34 | @Override 35 | public Map score(JCas jcas, ConceptSearchResult result) { 36 | return ImmutableMap.of("original/rank", 1.0 / (result.getRank() + 1.0), "original/score", 37 | result.getScore()); 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/document/rerank/DocumentCandidateProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.document.rerank; 16 | 17 | import edu.cmu.lti.oaqa.baseqa.learning_base.AbstractCandidateProvider; 18 | import edu.cmu.lti.oaqa.baseqa.util.ViewType; 19 | import edu.cmu.lti.oaqa.type.retrieval.Document; 20 | import edu.cmu.lti.oaqa.util.TypeUtil; 21 | import org.apache.uima.jcas.JCas; 22 | 23 | import java.util.Collection; 24 | 25 | /** 26 | * An {@link AbstractCandidateProvider} for {@link Document}, used in relevant document reranking 27 | * training (via {@link edu.cmu.lti.oaqa.baseqa.learning_base.ClassifierTrainer}), relevant 28 | * document prediction (via {@link edu.cmu.lti.oaqa.baseqa.learning_base.ClassifierPredictor}), and 29 | * cross-validation prediction loading (via 30 | * {@link edu.cmu.lti.oaqa.baseqa.learning_base.CVPredictLoader}). 31 | * 32 | * @see edu.cmu.lti.oaqa.baseqa.learning_base.ClassifierTrainer 33 | * @see edu.cmu.lti.oaqa.baseqa.learning_base.ClassifierPredictor 34 | * @see edu.cmu.lti.oaqa.baseqa.learning_base.CVPredictLoader 35 | * 36 | * @author @author Zi Yang created on 4/10/16 37 | */ 38 | public class DocumentCandidateProvider extends AbstractCandidateProvider { 39 | 40 | @Override 41 | public Collection getCandidates(JCas jcas) { 42 | return TypeUtil.getRankedDocuments(jcas); 43 | } 44 | 45 | @Override 46 | public void setScoreRank(Document candidate, double score, int rank) { 47 | candidate.setScore(score); 48 | candidate.setRank(rank); 49 | } 50 | 51 | @Override 52 | public Collection getGoldStandards(JCas jcas) { 53 | return TypeUtil.getRankedDocuments(ViewType.getGsView(jcas)); 54 | } 55 | 56 | @Override 57 | public boolean match(Document candidate, Collection gs) { 58 | return gs.stream().map(Document::getUri) 59 | .anyMatch(gsUri -> candidate.getUri().equals(gsUri)); 60 | } 61 | 62 | @Override 63 | public String getUri(Document candidate) { 64 | return candidate.getUri(); 65 | } 66 | 67 | @Override 68 | public String toString(Document candidate) { 69 | return TypeUtil.toString(candidate); 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/document/rerank/scorers/OriginalScoreDocumentScorer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.document.rerank.scorers; 16 | 17 | import com.google.common.collect.ImmutableMap; 18 | import edu.cmu.lti.oaqa.baseqa.learning_base.AbstractScorer; 19 | import edu.cmu.lti.oaqa.type.retrieval.Document; 20 | import org.apache.uima.jcas.JCas; 21 | 22 | import java.util.Map; 23 | 24 | /** 25 | * An instance of an {@link AbstractScorer} for {@link Document}s that simply copies 26 | * the original score from the {@link Document}. 27 | * 28 | * @see edu.cmu.lti.oaqa.baseqa.document.retrieval.LuceneDocumentRetrievalExecutor 29 | * 30 | * @author Zi Yang created on 4/6/16 31 | */ 32 | public class OriginalScoreDocumentScorer extends AbstractScorer { 33 | 34 | @Override 35 | public Map score(JCas jcas, Document result) { 36 | return ImmutableMap.of("original/rank", 1.0 / (result.getRank() + 1.0), "original/score", 37 | result.getScore()); 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/evidence/parse/PassageParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.evidence.parse; 16 | 17 | import edu.cmu.lti.oaqa.baseqa.providers.parser.ParserProvider; 18 | import edu.cmu.lti.oaqa.baseqa.util.ProviderCache; 19 | import edu.cmu.lti.oaqa.baseqa.util.UimaContextHelper; 20 | import edu.cmu.lti.oaqa.baseqa.util.ViewType; 21 | import edu.cmu.lti.oaqa.type.nlp.Token; 22 | import org.apache.uima.UimaContext; 23 | import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; 24 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 25 | import org.apache.uima.jcas.JCas; 26 | import org.apache.uima.resource.ResourceInitializationException; 27 | 28 | import java.util.List; 29 | 30 | /** 31 | * This {@link JCasAnnotator_ImplBase} uses a {@link ParserProvider} to parse and annotate the 32 | * passages residing in the individual views. 33 | * 34 | * @see ParserProvider 35 | * 36 | * @author Zi Yang created on 4/12/15 37 | */ 38 | public class PassageParser extends JCasAnnotator_ImplBase { 39 | 40 | private ParserProvider parserProvider; 41 | 42 | private String viewNamePrefix; 43 | 44 | @Override 45 | public void initialize(UimaContext context) throws ResourceInitializationException { 46 | super.initialize(context); 47 | String parserProviderName = UimaContextHelper.getConfigParameterStringValue(context, 48 | "parser-provider"); 49 | parserProvider = ProviderCache.getProvider(parserProviderName, ParserProvider.class); 50 | viewNamePrefix = UimaContextHelper.getConfigParameterStringValue(context, "view-name-prefix"); 51 | } 52 | 53 | @Override 54 | public void process(JCas jcas) throws AnalysisEngineProcessException { 55 | ViewType.listViews(jcas, viewNamePrefix).stream().map(parserProvider::parseDependency) 56 | .flatMap(List::stream).forEach(Token::addToIndexes); 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/learning_base/AbstractCandidateProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.learning_base; 16 | 17 | import edu.cmu.lti.oaqa.ecd.config.ConfigurableProvider; 18 | import org.apache.uima.resource.ResourceSpecifier; 19 | 20 | import java.util.Map; 21 | 22 | /** 23 | * An abstract class for the interface {@link CandidateProvider} that can be configured using 24 | * {@link ConfigurableProvider#initialize(ResourceSpecifier, Map)}}. 25 | * 26 | * @author Zi Yang created on 5/9/16 27 | */ 28 | public abstract class AbstractCandidateProvider extends ConfigurableProvider 29 | implements CandidateProvider { 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/learning_base/AbstractScorer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.learning_base; 16 | 17 | import edu.cmu.lti.oaqa.ecd.config.ConfigurableProvider; 18 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 19 | import org.apache.uima.jcas.JCas; 20 | import org.apache.uima.resource.Resource; 21 | import org.apache.uima.resource.ResourceSpecifier; 22 | 23 | import java.util.Map; 24 | 25 | /** 26 | * An abstract class for the interface {@link Scorer} that can be configured using 27 | * {@link ConfigurableProvider#initialize(ResourceSpecifier, Map)}. 28 | * 29 | * @author Zi Yang created on 5/9/16 30 | */ 31 | public abstract class AbstractScorer extends ConfigurableProvider implements Scorer { 32 | } -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/learning_base/CandidateProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.learning_base; 16 | 17 | import org.apache.uima.jcas.JCas; 18 | import org.apache.uima.resource.Resource; 19 | 20 | import java.util.Collection; 21 | import java.util.List; 22 | 23 | /** 24 | * An interface that standardizes basic operations across various basic data types. 25 | * An implemented class can be plugged into a 26 | * {@link edu.cmu.lti.oaqa.baseqa.learning_base.ClassifierTrainer} for classification training, a 27 | * {@link edu.cmu.lti.oaqa.baseqa.learning_base.ClassifierPredictor} for classification predication, 28 | * and cross-validation prediction loading (via 29 | * {@link edu.cmu.lti.oaqa.baseqa.learning_base.CVPredictLoader}). 30 | * 31 | * @see edu.cmu.lti.oaqa.baseqa.learning_base.ClassifierTrainer 32 | * @see edu.cmu.lti.oaqa.baseqa.learning_base.ClassifierPredictor 33 | * @see edu.cmu.lti.oaqa.baseqa.learning_base.CVPredictLoader 34 | * 35 | * @author Zi Yang created on 5/9/16 36 | */ 37 | public interface CandidateProvider extends Resource { 38 | 39 | Collection getCandidates(JCas jcas); 40 | 41 | void setScoreRank(T candidate, double score, int rank); 42 | 43 | String getUri(T candidate); 44 | 45 | Collection getGoldStandards(JCas jcas); 46 | 47 | boolean match(T candidate, Collection gs); 48 | 49 | String toString(T candidate); 50 | 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/passage/rerank/scorers/MetaInfoPassageScorer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.passage.rerank.scorers; 16 | 17 | import com.google.common.collect.ImmutableMap; 18 | import edu.cmu.lti.oaqa.baseqa.learning_base.AbstractScorer; 19 | import edu.cmu.lti.oaqa.ecd.config.ConfigurableProvider; 20 | import edu.cmu.lti.oaqa.type.retrieval.Passage; 21 | import org.apache.uima.jcas.JCas; 22 | 23 | import java.util.Map; 24 | 25 | /** 26 | * An instance of an {@link AbstractScorer} for {@link Passage}s that produces binary features for 27 | * the meta info, such as section label, begin offset, end offset, and the length of the passage. 28 | * 29 | * @author Zi Yang created on 4/6/16 30 | */ 31 | public class MetaInfoPassageScorer extends AbstractScorer { 32 | 33 | private static final int WIDTH = 10; 34 | 35 | @Override 36 | public Map score(JCas jcas, Passage result) { 37 | int begin = result.getOffsetInBeginSection(); 38 | int end = result.getOffsetInEndSection(); 39 | return ImmutableMap 40 | .of("section-label/" + result.getBeginSection(), 1.0, "begin-offset/" + begin / WIDTH, 41 | 1.0, "end-offset/" + end / WIDTH, 1.0, "length/" + (end - begin) / WIDTH, 1.0); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/passage/rerank/scorers/OriginalScorePassageScorer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.passage.rerank.scorers; 16 | 17 | import com.google.common.collect.ImmutableMap; 18 | import edu.cmu.lti.oaqa.baseqa.learning_base.AbstractScorer; 19 | import edu.cmu.lti.oaqa.type.retrieval.Passage; 20 | import org.apache.uima.jcas.JCas; 21 | 22 | import java.util.Map; 23 | 24 | /** 25 | * An instance of an {@link AbstractScorer} for {@link Passage}s that simply copies 26 | * the original score from the {@link Passage} candidate generation step, which can be just the 27 | * the score of the document that the passage is retrieved from. 28 | * 29 | * @see edu.cmu.lti.oaqa.baseqa.passage.retrieval.DocumentToPassageConverter 30 | * 31 | * @author Zi Yang created on 4/6/16 32 | */ 33 | public class OriginalScorePassageScorer extends AbstractScorer { 34 | 35 | @Override 36 | public Map score(JCas jcas, Passage result) { 37 | return ImmutableMap.of("original/rank", 1.0 / (result.getRank() + 1.0), "original/score", 38 | result.getScore()); 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/preprocess/ConceptCacheUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.preprocess; 16 | 17 | import static java.util.stream.Collectors.toSet; 18 | 19 | import java.util.ArrayList; 20 | import java.util.Collection; 21 | import java.util.List; 22 | import java.util.Set; 23 | 24 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 25 | 26 | import edu.cmu.lti.oaqa.baseqa.providers.kb.ConceptProvider; 27 | import edu.cmu.lti.oaqa.baseqa.providers.kb.SynonymExpansionProvider; 28 | import edu.cmu.lti.oaqa.type.kb.Concept; 29 | import edu.cmu.lti.oaqa.util.TypeUtil; 30 | 31 | /** 32 | * A utility class for concept caching. 33 | * 34 | * TODO: a standardized interface for all caching services. 35 | * 36 | * @author Zi Yang created on 4/20/15 37 | */ 38 | class ConceptCacheUtil { 39 | 40 | static void cacheTexts(List texts, List conceptProviders, 41 | List synonymExpansionProviders) throws AnalysisEngineProcessException { 42 | List concepts = new ArrayList<>(); 43 | for (ConceptProvider conceptProvider : conceptProviders) { 44 | concepts.addAll(conceptProvider.getConcepts(texts, conceptProvider.getClass().getName())); 45 | } 46 | for (SynonymExpansionProvider synonymExpansionProvider : synonymExpansionProviders) { 47 | Set ids = concepts.stream().map(TypeUtil::getConceptIds).flatMap(Collection::stream) 48 | .filter(synonymExpansionProvider::accept).collect(toSet()); 49 | synonymExpansionProvider.getSynonyms(ids); 50 | } 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/providers/kb/ConceptSearchProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.providers.kb; 16 | 17 | import edu.cmu.lti.oaqa.type.kb.Concept; 18 | import org.apache.uima.UIMAException; 19 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 20 | import org.apache.uima.fit.factory.JCasFactory; 21 | import org.apache.uima.jcas.JCas; 22 | import org.apache.uima.resource.Resource; 23 | 24 | import java.util.List; 25 | import java.util.Optional; 26 | 27 | /** 28 | *

29 | * An implementation of this interface can look up a concept (given by its name) in an ontology, and 30 | * return the most relevant {@link Concept}. 31 | *

32 | *

33 | * In comparison, {@link ConceptProvider} is often used before a {@link ConceptSearchProvider}, 34 | * which identifies the concepts from a plain text. 35 | * The concepts identified from {@link ConceptProvider} do not have to be existing entries in any 36 | * ontology, since some {@link ConceptProvider}s make "guess" based on their morphological 37 | * structures. 38 | *

39 | * 40 | * @see ConceptProvider 41 | * 42 | * @author Zi Yang created on 4/4/15 43 | */ 44 | public interface ConceptSearchProvider extends Resource { 45 | 46 | default Optional search(String string) throws AnalysisEngineProcessException { 47 | JCas jcas; 48 | try { 49 | jcas = JCasFactory.createJCas(); 50 | } catch (UIMAException e) { 51 | throw new AnalysisEngineProcessException(e); 52 | } 53 | return search(jcas, string); 54 | } 55 | 56 | Optional search(JCas jcas, String string) throws AnalysisEngineProcessException; 57 | 58 | default Optional search(JCas jcas, String string, String searchType) 59 | throws AnalysisEngineProcessException { 60 | return search(jcas, string, searchType, 1).stream().findFirst(); 61 | } 62 | 63 | List search(JCas jcas, String string, String searchType, int hits) 64 | throws AnalysisEngineProcessException; 65 | 66 | } 67 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/providers/kb/SynonymExpansionProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.providers.kb; 16 | 17 | import java.util.Collection; 18 | import java.util.Map; 19 | import java.util.Set; 20 | 21 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 22 | import org.apache.uima.resource.Resource; 23 | 24 | /** 25 | * An implementation of this interface can identify the list of synonyms for a given concept ID. 26 | * It should also override the {@link #accept(String)} method to filter the ID that is supported 27 | * by the particular synonym expansion provider. 28 | * 29 | * @author Zi Yang created on 4/20/15 30 | */ 31 | public interface SynonymExpansionProvider extends Resource { 32 | 33 | boolean accept(String id); 34 | 35 | Set getSynonyms(String id) throws AnalysisEngineProcessException; 36 | 37 | Map> getSynonyms(Collection ids) 38 | throws AnalysisEngineProcessException; 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/providers/ml/classifiers/FeatureConstructorProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers; 16 | 17 | import java.util.Map; 18 | 19 | import org.apache.uima.jcas.JCas; 20 | import org.apache.uima.resource.Resource; 21 | 22 | /** 23 | * An interface that creates a feature vector from an input {@link JCas}. 24 | * 25 | * TODO: To be migrated to {@link edu.cmu.lti.oaqa.baseqa.learning_base.Scorer}, 26 | * where a candidate is given in the method 27 | * {@link edu.cmu.lti.oaqa.baseqa.learning_base.Scorer#score(JCas, Object)}. 28 | * 29 | * @see edu.cmu.lti.oaqa.baseqa.learning_base.Scorer 30 | * 31 | * @author Zi Yang created on 4/5/15 32 | */ 33 | public interface FeatureConstructorProvider extends Resource { 34 | 35 | Map constructFeatures(JCas jcas); 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/providers/parser/ParserProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.providers.parser; 16 | 17 | import edu.cmu.lti.oaqa.type.nlp.Token; 18 | import org.apache.uima.jcas.JCas; 19 | import org.apache.uima.resource.Resource; 20 | 21 | import java.util.List; 22 | 23 | /** 24 | * An implementation of this interface should wrap a parser to {@link #tokenize(JCas)}, 25 | * {@link #tagPartOfSpeech(JCas)}, {@link #lemmatize(JCas)}, and {@link #parseDependency(JCas)} of 26 | * the document text in a input {@link JCas}. 27 | * 28 | * @author Zi Yang created on 3/12/16 29 | */ 30 | public interface ParserProvider extends Resource { 31 | 32 | List tokenize(JCas jcas); 33 | 34 | void tagPartOfSpeech(JCas jcas, List tokens); 35 | 36 | default List tagPartOfSpeech(JCas jcas) { 37 | List tokens = tokenize(jcas); 38 | tagPartOfSpeech(jcas, tokens); 39 | return tokens; 40 | } 41 | 42 | void lemmatize(JCas jcas, List tokens); 43 | 44 | default List lemmatize(JCas jcas) { 45 | List tokens = tokenize(jcas); 46 | tagPartOfSpeech(jcas, tokens); 47 | lemmatize(jcas, tokens); 48 | return tokens; 49 | } 50 | 51 | void parseDependency(JCas jcas, List tokens); 52 | 53 | default List parseDependency(JCas jcas) { 54 | List tokens = tokenize(jcas); 55 | tagPartOfSpeech(jcas, tokens); 56 | lemmatize(jcas, tokens); 57 | parseDependency(jcas, tokens); 58 | return tokens; 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/baseqa/question/parse/QuestionParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.baseqa.question.parse; 16 | 17 | import edu.cmu.lti.oaqa.baseqa.providers.parser.ParserProvider; 18 | import edu.cmu.lti.oaqa.baseqa.util.ProviderCache; 19 | import edu.cmu.lti.oaqa.baseqa.util.UimaContextHelper; 20 | import edu.cmu.lti.oaqa.type.nlp.Token; 21 | import org.apache.uima.UimaContext; 22 | import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; 23 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 24 | import org.apache.uima.jcas.JCas; 25 | import org.apache.uima.resource.ResourceInitializationException; 26 | 27 | /** 28 | * This {@link JCasAnnotator_ImplBase} uses a {@link ParserProvider} to parse and annotate the 29 | * question in the main view. 30 | * 31 | * @see ParserProvider 32 | * 33 | * @author Zi Yang created on 4/12/15 34 | */ 35 | public class QuestionParser extends JCasAnnotator_ImplBase { 36 | 37 | private ParserProvider parserProvider; 38 | 39 | @Override 40 | public void initialize(UimaContext context) throws ResourceInitializationException { 41 | super.initialize(context); 42 | String nlpProviderName = UimaContextHelper.getConfigParameterStringValue(context, 43 | "parser-provider"); 44 | parserProvider = ProviderCache.getProvider(nlpProviderName, ParserProvider.class); 45 | } 46 | 47 | @Override 48 | public void process(JCas jcas) throws AnalysisEngineProcessException { 49 | parserProvider.parseDependency(jcas).forEach(Token::addToIndexes); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/bioasq/eval/calculator/AnswerEvalMeasure.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.bioasq.eval.calculator; 16 | 17 | import edu.cmu.lti.oaqa.baseqa.eval.Measure; 18 | 19 | /** 20 | * Definitions of BioASQ Phase B Factoid, List, and YesNo QA evaluation metrics. 21 | * 22 | * @see AnswerEvalCalculator 23 | * 24 | * @author Zi Yang created on 4/29/15 25 | */ 26 | enum AnswerEvalMeasure implements Measure { 27 | 28 | // PER-TOPIC FACTOID QUESTION ANSWER MEASURES 29 | FACTOID_COUNT, FACTOID_STRICT_RETRIEVED, FACTOID_LENIENT_RETRIEVED, FACTOID_RECIPROCAL_RANK, 30 | 31 | // ACCUMULATED FACTOID QUESTION ANSWER MEASURES 32 | FACTOID_STRICT_ACCURACY, FACTOID_LENIENT_ACCURACY, FACTOID_MRR, 33 | 34 | // PER-TOPIC LIST QUESTION ANSWER MEASURES 35 | LIST_COUNT, LIST_PRECISION, LIST_RECALL, LIST_F1, 36 | 37 | // ACCUMULATED LIST QUESTION ANSWER MEASURES 38 | LIST_MEAN_PRECISION, LIST_MEAN_RECALL, LIST_MEAN_F1, 39 | 40 | // PER-TOPIC YESNO QUESTION ANSWER MEASURES 41 | YESNO_CORRECT, YESNO_TRUE_POS, YESNO_TRUE_NEG, 42 | 43 | // ACCUMULATED YESNO QUESTION ANSWER MEASURES 44 | YESNO_COUNT, YESNO_MEAN_ACCURACY, YESNO_MEAN_POS_ACCURACY, YESNO_MEAN_NEG_ACCURACY; 45 | 46 | static { 47 | for (AnswerEvalMeasure measure : values()) { 48 | Measure.name2measure.put(measure.getName(), measure); 49 | } 50 | } 51 | 52 | @Override 53 | public String getName() { 54 | return name(); 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/bioqa/providers/kb/TmToolConceptProviderException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.bioqa.providers.kb; 16 | 17 | import java.util.Collection; 18 | 19 | /** 20 | * This {@link RuntimeException} gives detailed debugging information for 21 | * {@link TmToolConceptProvider} related errors. 22 | * 23 | * @see TmToolConceptProvider 24 | * 25 | * @author Zi Yang created on 3/25/16. 26 | */ 27 | class TmToolConceptProviderException extends RuntimeException { 28 | 29 | private TmToolConceptProviderException(String message) { 30 | super(message); 31 | } 32 | 33 | private TmToolConceptProviderException(String message, Throwable e) { 34 | super(message, e); 35 | } 36 | 37 | static TmToolConceptProviderException unequalVolume(String trigger, int sentVolume, 38 | int recvVolume) { 39 | return new TmToolConceptProviderException( 40 | "Unequal volume at " + trigger + ": sent " + sentVolume + " texts and received " + 41 | recvVolume + "."); 42 | } 43 | 44 | static TmToolConceptProviderException unequalTextLength(String trigger, String sentText, 45 | String recvText) { 46 | return new TmToolConceptProviderException( 47 | "Unequal text length at " + trigger + ":\nSent: " + sentText + "\nRecv: " + recvText); 48 | } 49 | 50 | static TmToolConceptProviderException textChanged(String sentText, String recvText, 51 | String trigger) { 52 | return new TmToolConceptProviderException("Error at trigger " + trigger); 53 | } 54 | 55 | static TmToolConceptProviderException unknownException(String trigger, Throwable e) { 56 | return new TmToolConceptProviderException("Error at trigger " + trigger, e); 57 | } 58 | 59 | static TmToolConceptProviderException offsetOutOfBounds(String sentText, 60 | Collection denotations, Throwable e) { 61 | return new TmToolConceptProviderException( 62 | "Offset out of bounds:\nSent: " + sentText + "\nSpan: " + denotations); 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /src/main/java/edu/cmu/lti/oaqa/bioqa/providers/query/PubMedQueryStringConstructor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations 12 | * under the License. 13 | */ 14 | 15 | package edu.cmu.lti.oaqa.bioqa.providers.query; 16 | 17 | import com.google.common.base.Strings; 18 | import edu.cmu.lti.oaqa.baseqa.providers.query.BooleanBagOfPhraseQueryStringConstructor; 19 | 20 | import java.util.Collection; 21 | 22 | /** 23 | * This {@link edu.cmu.lti.oaqa.baseqa.providers.query.QueryStringConstructor} extends the 24 | * {@link BooleanBagOfPhraseQueryStringConstructor} to allow to include field information within 25 | * pairs of square brackets for PubMed 26 | * search. 27 | * 28 | * @see BooleanBagOfPhraseQueryStringConstructor 29 | * 30 | * @author Zi Yang created on 10/8/14 31 | */ 32 | public class PubMedQueryStringConstructor extends BooleanBagOfPhraseQueryStringConstructor { 33 | 34 | public static final String FIELD_PREFIX = "["; 35 | 36 | public static final String FIELD_SUFFIX = "]"; 37 | 38 | public PubMedQueryStringConstructor() { 39 | super(); 40 | } 41 | 42 | @Override 43 | public String formatQueryConcept(String formatQueryField, String formatQueryText) { 44 | return formatQueryText + Strings.nullToEmpty(formatQueryField); 45 | } 46 | 47 | @Override 48 | public String formatQueryField(Collection namedEntityTypes, String conceptType) { 49 | return namedEntityTypes.isEmpty() ? null 50 | : (FIELD_PREFIX + namedEntityTypes.stream().findFirst().get() + FIELD_SUFFIX); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/abstract_query/bag-of-token.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.abstract_query.BagOfTokenAbstractQueryGenerator 2 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/abstract_query/concept-required.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.abstract_query.ConceptAbstractQueryGenerator 2 | 3 | use-weight: false 4 | use-type: false 5 | required: true -------------------------------------------------------------------------------- /src/main/resources/baseqa/abstract_query/concept.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.abstract_query.ConceptAbstractQueryGenerator 2 | 3 | use-weight: false 4 | use-type: false 5 | required: false -------------------------------------------------------------------------------- /src/main/resources/baseqa/abstract_query/token-concept.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.abstract_query.TokenConceptAbstractQueryGenerator 2 | 3 | stoplist-path: /dictionaries/stoplist.txt 4 | use-weight: false 5 | use-type: false 6 | #pos-tags-path: /dictionaries/pos-tags.txt 7 | #noun-tags-path: /dictionaries/noun-tags.txt -------------------------------------------------------------------------------- /src/main/resources/baseqa/abstract_query/token-selection-pos-stoplist.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.abstract_query.token-selection-pos 2 | 3 | stoplist-path: /dictionaries/stoplist.txt 4 | pos-tags-path: /dictionaries/pos-tags.txt 5 | noun-tags-path: /dictionaries/noun-tags.txt -------------------------------------------------------------------------------- /src/main/resources/baseqa/abstract_query/token-selection-pos.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.abstract_query.token-selection 2 | 3 | # explanations see https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html 4 | pos-tags-path: /dictionaries/pos-tags.txt 5 | noun-tags-path: /dictionaries/noun-tags.txt 6 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/abstract_query/token-selection.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.abstract_query.TokenSelectionAbstractQueryGenerator 2 | 3 | # add parameter values for pos-tags-path and stoplist-path 4 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/collective_score/classifier-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.learning_base.classifier-predict 2 | 3 | candidate-provider: 'inherit: baseqa.answer.score.candidate-provider' 4 | scorers: | 5 | - inherit: baseqa.answer.collective_score.scorers.original 6 | - inherit: baseqa.answer.collective_score.scorers.distance 7 | - inherit: baseqa.answer.collective_score.scorers.edit-distance 8 | - inherit: baseqa.answer.collective_score.scorers.type-coercion 9 | - inherit: baseqa.answer.collective_score.scorers.shape-distance 10 | # add "classifier" -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/collective_score/classifier-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.learning_base.classifier-train 2 | 3 | candidate-provider: 'inherit: baseqa.answer.score.candidate-provider' 4 | scorers: | 5 | - inherit: baseqa.answer.collective_score.scorers.original 6 | - inherit: baseqa.answer.collective_score.scorers.distance 7 | - inherit: baseqa.answer.collective_score.scorers.edit-distance 8 | - inherit: baseqa.answer.collective_score.scorers.type-coercion 9 | - inherit: baseqa.answer.collective_score.scorers.shape-distance 10 | resample-type: DOWN 11 | # add "classifier" -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/collective_score/scorers/distance.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.collective_score.scorers.DistanceCollectiveAnswerScorer 2 | 3 | top-limit: [1, 3, 5, 10] 4 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/collective_score/scorers/edit-distance.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.collective_score.scorers.EditDistanceCollectiveAnswerScorer 2 | 3 | top-limit: [1, 3, 5, 10] 4 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/collective_score/scorers/original.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.collective_score.scorers.OriginalCollectiveAnswerScorer 2 | 3 | top-limit: [1, 3, 5, 10] 4 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/collective_score/scorers/shape-distance.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.collective_score.scorers.ShapeDistanceCollectiveAnswerScorer 2 | 3 | top-limit: [1, 3, 5, 10] 4 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/collective_score/scorers/type-coercion.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.collective_score.scorers.TypeCoercionCollectiveAnswerScorer 2 | 3 | top-limit: [1, 3, 5, 10] 4 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/generate/generate.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.generate.CavGenerationManager 2 | 3 | generators: | 4 | - inherit: baseqa.answer.generate.generators.choice 5 | - inherit: baseqa.answer.generate.generators.quantity 6 | - inherit: baseqa.answer.generate.generators.concept 7 | - inherit: baseqa.answer.generate.generators.cav-covering-concept 8 | # - inherit: baseqa.answer.generate.generators.covering-phrase -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/generate/generators/cav-covering-concept.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.generate.generators.CavCoveringConceptCavGenerator 2 | 3 | filter-question-concepts: false 4 | filter-question-tokens: false 5 | stoplist: null -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/generate/generators/choice.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.generate.generators.ChoiceCavGenerator 2 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/generate/generators/concept.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.generate.generators.ConceptCavGenerator 2 | 3 | filter-question-concepts: true 4 | filter-question-tokens: true 5 | stoplist: /dictionaries/stoplist-top5k.txt 6 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/generate/generators/covering-phrase.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.generate.generators.CoveringPhraseCavGenerator 2 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/generate/generators/quantity.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.generate.generators.QuantityCavGenerator 2 | 3 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/modify/modifiers/list-50.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.modify.modifiers.ListAnswerPruner 2 | 3 | threshold: 0.50 4 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/modify/modifiers/list-70.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.modify.modifiers.ListAnswerPruner 2 | 3 | threshold: 0.70 4 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/modify/modifiers/merge.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.modify.modifiers.CavMerger 2 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/modify/modifiers/ratio-72.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.modify.modifiers.ListAnswerPruner 2 | 3 | ratio: 0.72 4 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/modify/modifiers/ratio-88.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.modify.modifiers.ListAnswerPruner 2 | 3 | ratio: 0.88 4 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/modify/modify.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.modify.CavModificationManager 2 | 3 | handlers: | 4 | - inherit: baseqa.answer.modify.modifiers.merge 5 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/modify/pruner.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.modify.AnswerModificationManager 2 | 3 | handlers: | 4 | - inherit: baseqa.answer.modify.modifiers.list-70 5 | # - inherit: baseqa.answer.modify.modifiers.ratio-72 6 | # - inherit: baseqa.answer.modify.modifiers.ratio-88 7 | # - inherit: baseqa.answer.modify.modifiers.list-50 8 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/candidate-provider.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.score.AnswerCandidateProvider -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/classifier-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.learning_base.classifier-predict 2 | 3 | candidate-provider: 'inherit: baseqa.answer.score.candidate-provider' 4 | scorers: | 5 | - inherit: baseqa.answer.score.scorers.type-coercion 6 | - inherit: baseqa.answer.score.scorers.cao-count 7 | - inherit: baseqa.answer.score.scorers.name-count 8 | - inherit: baseqa.answer.score.scorers.avg-covered-token-count 9 | - inherit: baseqa.answer.score.scorers.stopword-count 10 | - inherit: baseqa.answer.score.scorers.token-overlap-count 11 | - inherit: baseqa.answer.score.scorers.concept-overlap-count 12 | - inherit: baseqa.answer.score.scorers.token-proximity 13 | - inherit: baseqa.answer.score.scorers.concept-proximity 14 | - inherit: baseqa.answer.score.scorers.focus-overlap-count 15 | - inherit: baseqa.answer.score.scorers.parsehead-proximity 16 | - inherit: baseqa.answer.score.scorers.focus 17 | - inherit: baseqa.answer.score.scorers.concept-type 18 | - inherit: baseqa.answer.score.scorers.answer-type 19 | # - inherit: baseqa.answer.score.scorers.parse 20 | # add "classifier", "feature-file" -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/classifier-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.learning_base.classifier-train 2 | 3 | candidate-provider: 'inherit: baseqa.answer.score.candidate-provider' 4 | scorers: | 5 | - inherit: baseqa.answer.score.scorers.type-coercion 6 | - inherit: baseqa.answer.score.scorers.cao-count 7 | - inherit: baseqa.answer.score.scorers.name-count 8 | - inherit: baseqa.answer.score.scorers.avg-covered-token-count 9 | - inherit: baseqa.answer.score.scorers.stopword-count 10 | - inherit: baseqa.answer.score.scorers.token-overlap-count 11 | - inherit: baseqa.answer.score.scorers.concept-overlap-count 12 | - inherit: baseqa.answer.score.scorers.token-proximity 13 | - inherit: baseqa.answer.score.scorers.concept-proximity 14 | - inherit: baseqa.answer.score.scorers.focus-overlap-count 15 | - inherit: baseqa.answer.score.scorers.parsehead-proximity 16 | - inherit: baseqa.answer.score.scorers.focus 17 | - inherit: baseqa.answer.score.scorers.concept-type 18 | - inherit: baseqa.answer.score.scorers.answer-type 19 | # - inherit: baseqa.answer.score.scorers.parse 20 | resample-type: DOWN 21 | # add "classifier", "cv-predict-file" -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/cv-load.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.learning_base.cv-load 2 | 3 | candidate-provider: 'inherit: baseqa.answer.score.candidate-provider' 4 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/scorers/answer-type.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.score.scorers.AnswerTypeAnswerScorer 2 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/scorers/avg-covered-token-count.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.score.scorers.AvgCoveredTokenCountAnswerScorer -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/scorers/cao-count.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.score.scorers.CaoCountAnswerScorer -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/scorers/concept-overlap-count.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.score.scorers.ConceptOverlappingCountAnswerScorer 2 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/scorers/concept-proximity.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.score.scorers.ConceptProximityAnswerScorer 2 | 3 | stoplist: /dictionaries/stoplist-top5k.txt 4 | window-size: 10 5 | infinity: .inf 6 | smoothing: 5.0 -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/scorers/concept-type.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.score.scorers.ConceptTypeAnswerScorer 2 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/scorers/focus-overlap-count.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.score.scorers.FocusOverlappingCountAnswerScorer 2 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/scorers/focus-proximity.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.score.scorers.FocusProximityAnswerScorer 2 | 3 | window-size: 10 4 | infinity: .inf 5 | smoothing: 5.0 -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/scorers/focus.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.score.scorers.FocusAnswerScorer 2 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/scorers/name-count.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.score.scorers.NameCountAnswerScorer -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/scorers/parse.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.score.scorers.ParseAnswerScorer -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/scorers/parsehead-proximity.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.score.scorers.ParseHeadProximityAnswerScorer 2 | 3 | stoplist: /dictionaries/stoplist-top5k.txt 4 | window-size: 10 5 | infinity: .inf 6 | smoothing: 5.0 -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/scorers/stopword-count.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.score.scorers.StopwordCountAnswerScorer 2 | 3 | stoplist: /dictionaries/stoplist-top5k.txt -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/scorers/token-overlap-count.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.score.scorers.TokenOverlappingCountAnswerScorer 2 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/scorers/token-proximity.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.score.scorers.TokenProximityAnswerScorer 2 | 3 | stoplist: /dictionaries/stoplist-top5k.txt 4 | window-size: 10 5 | infinity: .inf 6 | smoothing: 5.0 -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/scorers/type-coercion.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.score.scorers.TypeCoercionAnswerScorer 2 | 3 | at-limits: [1, 3] 4 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/score/simple.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.score.SimpleAnswerScorer 2 | 3 | type-coer-smoothing: 5.0 -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/yesno/all-yes.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.yesno.AllYesYesNoAnswerPredictor -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/yesno/predict.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.yesno.YesNoAnswerPredictor 2 | 3 | scorers: | 4 | - inherit: baseqa.answer.yesno.scorers.concept-overlap 5 | - inherit: baseqa.answer.yesno.scorers.token-overlap 6 | - inherit: baseqa.answer.yesno.scorers.expected-answer-overlap 7 | - inherit: baseqa.answer.yesno.scorers.sentiment 8 | - inherit: baseqa.answer.yesno.scorers.negation 9 | # requires "classifier" -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/yesno/scorers/alternate-answer.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.yesno.scorers.AlternateAnswerYesNoScorer 2 | 3 | # requires "pipeline" -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/yesno/scorers/concept-overlap.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.yesno.scorers.ConceptOverlapYesNoScorer -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/yesno/scorers/expected-answer-overlap.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.yesno.scorers.ExpectedAnswerOverlapYesNoScorer 2 | 3 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/yesno/scorers/negation.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.yesno.scorers.NegationYesNoScorer 2 | 3 | negation-cues-path: /dictionaries/negation-cues.txt 4 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/yesno/scorers/sentiment.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.yesno.scorers.SentimentYesNoScorer 2 | 3 | positive-wordlist-path: /dictionaries/positive-words.txt 4 | negative-wordlist-path: /dictionaries/negative-words.txt 5 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/yesno/scorers/token-overlap.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.yesno.scorers.TokenOverlapYesNoScorer 2 | 3 | stoplist-path: /dictionaries/stoplist.txt 4 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer/yesno/train.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer.yesno.YesNoAnswerTrainer 2 | 3 | scorers: | 4 | - inherit: baseqa.answer.yesno.scorers.concept-overlap 5 | - inherit: baseqa.answer.yesno.scorers.token-overlap 6 | - inherit: baseqa.answer.yesno.scorers.expected-answer-overlap 7 | - inherit: baseqa.answer.yesno.scorers.sentiment 8 | - inherit: baseqa.answer.yesno.scorers.negation 9 | resample-type: DOWN 10 | # requires "classifier", "cv-predict-file" -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer_type/feature-constructor.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.feature-constructor 2 | 3 | quantity-question-words-path: /dictionaries/quantity-question-words.txt -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer_type/gslabel-concept-search.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer_type.ConceptSearchGSAnswerTypeLabeler 2 | 3 | quantity-question-words-path: /dictionaries/quantity-question-words.txt 4 | 5 | # add values for parameters "concept-search-provider" and "at-gslabel-file" 6 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer_type/gslabel-concept.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer_type.ConceptGSAnswerTypeLabeler 2 | 3 | quantity-question-words-path: /dictionaries/quantity-question-words.txt 4 | 5 | # add values for parameters "concept-provider" and "at-gslabel-file" 6 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer_type/load-cv.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer_type.AnswerTypeCVPredictLoader -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer_type/predict.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer_type.AnswerTypeClassifierPredictor 2 | 3 | feature-constructor: 'inherit: baseqa.answer_type.feature-constructor' 4 | # requires parameter "model" -------------------------------------------------------------------------------- /src/main/resources/baseqa/answer_type/train.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.answer_type.AnswerTypeClassifierTrainer 2 | 3 | feature-constructor: 'inherit: baseqa.answer_type.feature-constructor' 4 | cv-predict-limit: 5 5 | # requires parameter "classifier", "at-gslabel-files", "null-type", "type-ratio-threshold" 6 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/cas-serialize.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.CasSerializer 2 | 3 | dir: result/ 4 | typesystem: baseqa.type.OAQATypes -------------------------------------------------------------------------------- /src/main/resources/baseqa/concept/rerank/candidate-provider.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.concept.rerank.ConceptSearchResultCandidateProvider -------------------------------------------------------------------------------- /src/main/resources/baseqa/concept/rerank/classifier-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.learning_base.classifier-predict 2 | 3 | candidate-provider: 'inherit: baseqa.concept.rerank.candidate-provider' 4 | scorers: | 5 | - inherit: baseqa.concept.rerank.scorers.original-score 6 | - inherit: baseqa.concept.rerank.scorers.name-match 7 | # add "classifier", "feature-file" -------------------------------------------------------------------------------- /src/main/resources/baseqa/concept/rerank/classifier-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.learning_base.classifier-train 2 | 3 | candidate-provider: 'inherit: baseqa.concept.rerank.candidate-provider' 4 | scorers: | 5 | - inherit: baseqa.concept.rerank.scorers.original-score 6 | - inherit: baseqa.concept.rerank.scorers.name-match 7 | resample-type: DOWN 8 | # add "classifier", "cv-predict-file" -------------------------------------------------------------------------------- /src/main/resources/baseqa/concept/rerank/lucene.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.concept.rerank.LuceneInMemoryConceptReranker 2 | 3 | query-string-contructor: edu.cmu.lti.oaqa.baseqa.providers.query.LuceneQueryStringConstructor 4 | query-analyzer: org.apache.lucene.analysis.standard.StandardAnalyzer 5 | hits: 100 6 | limit: 10 7 | rerank-weight: 1.0 -------------------------------------------------------------------------------- /src/main/resources/baseqa/concept/rerank/name-match.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.concept.rerank.MatchingNameConceptReranker 2 | 3 | bonus: 100.0 -------------------------------------------------------------------------------- /src/main/resources/baseqa/concept/rerank/score-sum.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.concept.rerank.ScoreSummationConceptReranker 2 | 3 | limit: 5 -------------------------------------------------------------------------------- /src/main/resources/baseqa/concept/rerank/scorers/lucene.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.concept.rerank.scorers.LuceneConceptScorer 2 | 3 | stoplist-path: /dictionaries/stoplist.txt 4 | hits: 100 5 | 6 | # requires "fields", "index", "id-field", "source-field", "uri-prefix" 7 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/concept/rerank/scorers/name-match.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.concept.rerank.scorers.MatchingNameConceptScorer -------------------------------------------------------------------------------- /src/main/resources/baseqa/concept/rerank/scorers/original-score.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.concept.rerank.scorers.OriginalScoreConceptScorer -------------------------------------------------------------------------------- /src/main/resources/baseqa/concept/rerank/searchid-weight.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.concept.rerank.WeightingSearchIdConceptReranker 2 | 3 | # requires "weights-path" parameter -------------------------------------------------------------------------------- /src/main/resources/baseqa/concept/retrieval/lucene.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.concept.retrieval.LuceneConceptRetrievalExecutor 2 | 3 | query-string-constructor: edu.cmu.lti.oaqa.baseqa.providers.query.LuceneQueryStringConstructor 4 | query-analyzer: org.apache.lucene.analysis.standard.StandardAnalyzer 5 | hits: 100 6 | # requires "fields", "index", "id-field", "name-field", "source-field", "uri-prefix" -------------------------------------------------------------------------------- /src/main/resources/baseqa/document/rerank/candidate-provider.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.document.rerank.DocumentCandidateProvider -------------------------------------------------------------------------------- /src/main/resources/baseqa/document/rerank/classifier-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.learning_base.classifier-predict 2 | 3 | candidate-provider: 'inherit: baseqa.document.rerank.candidate-provider' 4 | scorers: | 5 | - inherit: baseqa.document.rerank.scorers.original-score 6 | # add "classifier", "feature-file" -------------------------------------------------------------------------------- /src/main/resources/baseqa/document/rerank/classifier-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.learning_base.classifier-train 2 | 3 | candidate-provider: 'inherit: baseqa.document.rerank.candidate-provider' 4 | scorers: | 5 | - inherit: baseqa.document.rerank.scorers.original-score 6 | resample-type: DOWN 7 | # add "classifier", "cv-predict-file" -------------------------------------------------------------------------------- /src/main/resources/baseqa/document/rerank/cv-load.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.learning_base.cv-load 2 | 3 | candidate-provider: 'inherit: baseqa.document.rerank.candidate-provider' 4 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/document/rerank/logreg.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.document.rerank.LogRegDocumentReranker 2 | 3 | query-string-contructor: edu.cmu.lti.oaqa.baseqa.providers.query.LuceneQueryStringConstructor 4 | query-analyzer: org.apache.lucene.analysis.standard.StandardAnalyzer 5 | hits: 100 6 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/document/rerank/scorers/lucene.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.document.rerank.scorers.LuceneDocumentScorer 2 | 3 | hits: 100 4 | stoplist-path: /dictionaries/stoplist.txt 5 | # requires "fields", "index", "id-field", "uri-prefix" -------------------------------------------------------------------------------- /src/main/resources/baseqa/document/rerank/scorers/original-score.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.document.rerank.scorers.OriginalScoreDocumentScorer -------------------------------------------------------------------------------- /src/main/resources/baseqa/document/retrieval/lucene.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.document.retrieval.LuceneDocumentRetrievalExecutor 2 | 3 | query-string-constructor: edu.cmu.lti.oaqa.baseqa.providers.query.LuceneQueryStringConstructor 4 | query-analyzer: org.apache.lucene.analysis.standard.StandardAnalyzer 5 | hits: 100 6 | # requires "fields", "index", "id-field", "title-field", "text-field", "uri-prefix" -------------------------------------------------------------------------------- /src/main/resources/baseqa/evidence/concept/frequent-phrase.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.evidence.concept.PassageConceptRecognizer 2 | 3 | concept-provider: 'inherit: baseqa.providers.kb.frequent-phrase' 4 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/baseqa/evidence/concept/merge.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.evidence.concept.ConceptMerger 2 | 3 | include-default-view: true 4 | view-name-prefix: ptv 5 | use-name: true -------------------------------------------------------------------------------- /src/main/resources/baseqa/evidence/concept/opennlp-np.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.evidence.concept.PassageConceptRecognizer 2 | 3 | concept-provider: 'inherit: baseqa.providers.kb.opennlp-np' 4 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/baseqa/evidence/concept/opennlp-npppnp.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.evidence.concept.PassageConceptRecognizer 2 | 3 | concept-provider: 'inherit: baseqa.providers.kb.opennlp-npppnp' 4 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/baseqa/evidence/concept/search.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.evidence.concept.ConceptSearcher 2 | 3 | # add "concept-search-provider", "synonym-expansion-provider" -------------------------------------------------------------------------------- /src/main/resources/baseqa/evidence/parse/clearnlp-general.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.evidence.parse.PassageParser 2 | 3 | parser-provider: 'inherit: baseqa.providers.parser.clearnlp-general' 4 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/baseqa/evidence/parse/lingpipe-indoeuro-brown.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.evidence.parse.PassageParser 2 | 3 | parser-provider: 'inherit: baseqa.providers.parser.lingpipe-indoeuro-brown' 4 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/baseqa/evidence/parse/lingpipe-regex-brown.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.evidence.parse.PassageParser 2 | 3 | parser-provider: 'inherit: baseqa.providers.parser.lingpipe-regex-brown' 4 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/baseqa/evidence/passage-to-view.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.evidence.PassageToViewCopier 2 | 3 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/baseqa/learning_base/classifier-predict.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.learning_base.ClassifierPredictor 2 | 3 | # requires "candidate-provider", "scorers", "classifier", "feature-file" -------------------------------------------------------------------------------- /src/main/resources/baseqa/learning_base/classifier-train.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.learning_base.ClassifierTrainer 2 | 3 | # requires "candidate-provider", "scorers", "classifier", "cv-predict-file", "at-least-one-correct", "resample-type" -------------------------------------------------------------------------------- /src/main/resources/baseqa/learning_base/cv-load.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.learning_base.CVPredictLoader 2 | 3 | # requires "candidate-provider", "cv-predict-file" -------------------------------------------------------------------------------- /src/main/resources/baseqa/passage/rerank/candidate-provider.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.passage.rerank.PassageCandidateProvider -------------------------------------------------------------------------------- /src/main/resources/baseqa/passage/rerank/classifier-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.learning_base.classifier-predict 2 | 3 | candidate-provider: 'inherit: baseqa.passage.rerank.candidate-provider' 4 | scorers: | 5 | - inherit: baseqa.passage.rerank.scorers.original-score 6 | - inherit: baseqa.passage.rerank.scorers.meta-info 7 | - inherit: baseqa.passage.rerank.scorers.lucene 8 | # add "classifier", "feature-file" -------------------------------------------------------------------------------- /src/main/resources/baseqa/passage/rerank/classifier-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.learning_base.classifier-train 2 | 3 | candidate-provider: 'inherit: baseqa.passage.rerank.candidate-provider' 4 | scorers: | 5 | - inherit: baseqa.passage.rerank.scorers.original-score 6 | - inherit: baseqa.passage.rerank.scorers.meta-info 7 | - inherit: baseqa.passage.rerank.scorers.lucene 8 | resample-type: DOWN 9 | # add "classifier", "cv-predict-file" -------------------------------------------------------------------------------- /src/main/resources/baseqa/passage/rerank/scorers/lucene.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.passage.rerank.scorers.LuceneInMemoryPassageScorer 2 | 3 | hits: 100 4 | stoplist-path: /dictionaries/stoplist.txt -------------------------------------------------------------------------------- /src/main/resources/baseqa/passage/rerank/scorers/meta-info.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.passage.rerank.scorers.MetaInfoPassageScorer -------------------------------------------------------------------------------- /src/main/resources/baseqa/passage/rerank/scorers/original-score.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.passage.rerank.scorers.OriginalScorePassageScorer -------------------------------------------------------------------------------- /src/main/resources/baseqa/passage/retrieval/document-to-passage.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.passage.retrieval.DocumentToPassageConverter 2 | 3 | include-title-abstract: true 4 | include-sections: true 5 | only-for-documents-missing-sections: true 6 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/passage/retrieval/lucene-sentence-improve.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.passage.retrieval.ImprovedLuceneInMemorySentenceRetrievalExecutor 2 | 3 | query-string-constructor: edu.cmu.lti.oaqa.baseqa.providers.query.LuceneQueryStringConstructor 4 | query-analyzer: org.apache.lucene.analysis.standard.StandardAnalyzer 5 | hits: 200 6 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/passage/retrieval/lucene-sentence.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.passage.retrieval.LuceneInMemorySentenceRetrievalExecutor 2 | 3 | query-string-constructor: edu.cmu.lti.oaqa.baseqa.providers.query.LuceneQueryStringConstructor 4 | query-analyzer: org.apache.lucene.analysis.standard.StandardAnalyzer 5 | hits: 200 6 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/preprocess/passage-concept-cache.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.preprocess.PassageConceptCache 2 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/preprocess/question-concept-cache.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.preprocess.QuestionConceptCache 2 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/providers/kb/frequent-phrase.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.providers.kb.FrequentPhraseConceptProvider 2 | 3 | max-length: 6 4 | min-freq: 2 5 | min-freq-ratio: 0.0 6 | type: AVERAGE 7 | score-threshold: 1.0 8 | score-ratio-threshold: 0.8 9 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/providers/kb/opennlp-np.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.kb.opennlp 2 | 3 | type: NP 4 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/providers/kb/opennlp-npppnp.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.kb.opennlp 2 | 3 | type: NP,PP,NP 4 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/providers/kb/opennlp.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.providers.kb.OpenNlpChunkerConceptProvider 2 | 3 | chunker-model: /en-chunker.bin 4 | min-length: 2 -------------------------------------------------------------------------------- /src/main/resources/baseqa/providers/ml/classifiers/feature-constructor.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.FeatureConstructorProviderImpl 2 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/providers/ml/classifiers/liblinear.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.LibLinearProvider 2 | 3 | # add parameters "feat-index-file", "label-index-file", "model-file", "balance-weight", 4 | # "solver-type" -------------------------------------------------------------------------------- /src/main/resources/baseqa/providers/ml/classifiers/libsvm.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.LibSvmProvider 2 | 3 | # add parameters "feat-index-file", "label-index-file", "model-file" -------------------------------------------------------------------------------- /src/main/resources/baseqa/providers/ml/classifiers/meka-pcc.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.meka 2 | 3 | classifier-name: meka.classifiers.multilabel.PCC 4 | options: [-W, weka.classifiers.meta.LogitBoost] 5 | 6 | # add parameters "model-file", "dataset-schema-file" -------------------------------------------------------------------------------- /src/main/resources/baseqa/providers/ml/classifiers/meka.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.MekaProvider 2 | 3 | # add parameters "model-file", "classifier-name", "options", "dataset-schema-file" -------------------------------------------------------------------------------- /src/main/resources/baseqa/providers/ml/classifiers/weka-cvr.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.weka 2 | 3 | classifier-name: weka.classifiers.meta.ClassificationViaRegression 4 | options: [] 5 | 6 | #classifier-name: weka.classifiers.meta.AttributeSelectedClassifier 7 | #options: 8 | # - '-E' 9 | # - 'weka.attributeSelection.CorrelationAttributeEval' 10 | # - '-S' 11 | # - 'weka.attributeSelection.Ranker -T -1.7976931348623157E308 -N 100' 12 | # - '-W' 13 | # - 'weka.classifiers.functions.SimpleLogistic' 14 | 15 | #classifier-name: weka.classifiers.meta.ClassificationViaClustering 16 | #options: [] 17 | 18 | # add parameters "model-file", "dataset-schema-file" -------------------------------------------------------------------------------- /src/main/resources/baseqa/providers/ml/classifiers/weka-knn.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.weka 2 | 3 | classifier-name: weka.classifiers.lazy.IBk 4 | options: [] 5 | 6 | # add parameters "model-file", "dataset-schema-file" -------------------------------------------------------------------------------- /src/main/resources/baseqa/providers/ml/classifiers/weka-logistic.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.weka 2 | 3 | classifier-name: weka.classifiers.functions.SimpleLogistic 4 | options: [] 5 | 6 | # add parameters "model-file", "dataset-schema-file" -------------------------------------------------------------------------------- /src/main/resources/baseqa/providers/ml/classifiers/weka-reptree.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.weka 2 | 3 | classifier-name: weka.classifiers.trees.REPTree 4 | options: [] 5 | 6 | # add parameters "model-file", "dataset-schema-file" -------------------------------------------------------------------------------- /src/main/resources/baseqa/providers/ml/classifiers/weka.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.WekaProvider 2 | 3 | # add parameters "model-file", "classifier-name", "options", "dataset-schema-file" -------------------------------------------------------------------------------- /src/main/resources/baseqa/providers/parser/clearnlp-general.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.providers.parser.ClearParserProvider 2 | 3 | pos-model: /general-en-pos.xz 4 | dep-model: /general-en-dep.xz -------------------------------------------------------------------------------- /src/main/resources/baseqa/providers/parser/lingpipe-indoeuro-brown.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.providers.parser.LingPipeParserProvider 2 | 3 | token-factory: com.aliasi.tokenizer.IndoEuropeanTokenizerFactory 4 | token-factory-params: [] 5 | pos-model: /pos-en-general-brown.HiddenMarkovModel -------------------------------------------------------------------------------- /src/main/resources/baseqa/providers/parser/lingpipe-regex-brown.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.providers.parser.LingPipeParserProvider 2 | 3 | token-factory: com.aliasi.tokenizer.RegExTokenizerFactory 4 | token-factory-params: ['([-''\d\p{L}]|\([-''\d\p{L}]+\))*[-''\d\p{L}]+|\S'] 5 | pos-model: /pos-en-general-brown.HiddenMarkovModel -------------------------------------------------------------------------------- /src/main/resources/baseqa/question/concept/opennlp-np.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.question.concept.QuestionConceptRecognizer 2 | 3 | concept-provider: 'inherit: baseqa.providers.kb.opennlp-np' -------------------------------------------------------------------------------- /src/main/resources/baseqa/question/concept/opennlp-npppnp.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.question.concept.QuestionConceptRecognizer 2 | 3 | concept-provider: 'inherit: baseqa.providers.kb.opennlp-npppnp' -------------------------------------------------------------------------------- /src/main/resources/baseqa/question/focus.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.question.focus.QuestionFocusExtractor 2 | -------------------------------------------------------------------------------- /src/main/resources/baseqa/question/parse/clearnlp-general.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.question.parse.QuestionParser 2 | 3 | parser-provider: 'inherit: baseqa.providers.parser.clearnlp-general' -------------------------------------------------------------------------------- /src/main/resources/baseqa/question/parse/lingpipe-indoeuro-brown.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.question.parse.QuestionParser 2 | 3 | parser-provider: 'inherit: baseqa.providers.parser.lingpipe-indoeuro-brown' -------------------------------------------------------------------------------- /src/main/resources/baseqa/question/parse/lingpipe-regex-brown.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.question.parse.QuestionParser 2 | 3 | parser-provider: 'inherit: baseqa.providers.parser.lingpipe-regex-brown' -------------------------------------------------------------------------------- /src/main/resources/baseqa/question/yesno/yesno-to-factoid.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.question.yesno.YesNoToFactoidQuestionConverter -------------------------------------------------------------------------------- /src/main/resources/bioasq/collection/json/json-cas-consumer.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.collection.json.json-cas-consumer 2 | 3 | document-limit: 10 4 | snippet-limit: 10 5 | concept-limit: 10 6 | triple-limit: 10 7 | factoid-answer-limit: 5 8 | list-answer-limit: 100 9 | 10 | result-dir: result/ 11 | -------------------------------------------------------------------------------- /src/main/resources/bioasq/concept/rerank/classifier-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.concept.rerank.classifier-predict 2 | 3 | scorers: | 4 | - inherit: baseqa.concept.rerank.scorers.original-score 5 | - inherit: baseqa.concept.rerank.scorers.name-match 6 | - inherit: bioasq.concept.rerank.scorers.gopubmed -------------------------------------------------------------------------------- /src/main/resources/bioasq/concept/rerank/classifier-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.concept.rerank.classifier-train 2 | 3 | scorers: | 4 | - inherit: baseqa.concept.rerank.scorers.original-score 5 | - inherit: baseqa.concept.rerank.scorers.name-match 6 | - inherit: bioasq.concept.rerank.scorers.gopubmed -------------------------------------------------------------------------------- /src/main/resources/bioasq/concept/rerank/liblinear-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioasq.concept.rerank.classifier-predict 2 | 3 | classifier: 'inherit: bioasq.concept.rerank.liblinear' 4 | feature-file: result/concept-rerank-predict-liblinear.tsv -------------------------------------------------------------------------------- /src/main/resources/bioasq/concept/rerank/liblinear-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioasq.concept.rerank.classifier-train 2 | 3 | classifier: 'inherit: bioasq.concept.rerank.liblinear' 4 | cv-predict-file: src/main/resources/models/bioasq/concept/rerank/4b-dev-liblinear.cv -------------------------------------------------------------------------------- /src/main/resources/bioasq/concept/rerank/liblinear.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.liblinear 2 | 3 | feat-index-file: src/main/resources/models/bioasq/concept/rerank/4b-dev-liblinear.findex 4 | label-index-file: src/main/resources/models/bioasq/concept/rerank/4b-dev-liblinear.lindex 5 | model-file: src/main/resources/models/bioasq/concept/rerank/4b-dev-liblinear.model 6 | solver-type: L1R_LR # L2R_L1LOSS_SVC_DUAL, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVC, L2R_L2LOSS_SVC_DUAL, L1R_L2LOSS_SVC, L2R_LR 7 | balance-weight: false 8 | -------------------------------------------------------------------------------- /src/main/resources/bioasq/concept/rerank/ontology-logreg-params.txt: -------------------------------------------------------------------------------- 1 | DISEASE -4.9734 12.3044 2 | JOCHEM -9.6081 34.8384 3 | MESH -4.9894 12.9642 4 | UNIPROT -7.3101 13.8336 5 | GENE -3.0187 5.8931 -------------------------------------------------------------------------------- /src/main/resources/bioasq/concept/rerank/scorers/gopubmed.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.bioasq.concept.rerank.scorers.GoPubMedConceptRetrievalScorer 2 | 3 | pages: 1 4 | hits: 100 5 | conf: /properties/direct-gopubmed-concept.properties 6 | timeout: 2 7 | stoplist-path: /dictionaries/stoplist.txt 8 | -------------------------------------------------------------------------------- /src/main/resources/bioasq/concept/rerank/searchid-weight.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.rerank.concept.searchid-weight 2 | 3 | weights-path: /bioasq/concept/rerank/ontology-logreg-params.txt -------------------------------------------------------------------------------- /src/main/resources/bioasq/concept/retrieval/gopubmed-separate.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.bioasq.concept.retrieval.GoPubMedSeparateConceptRetrievalExecutor 2 | 3 | pages: 1 4 | hits: 10 5 | conf: /properties/direct-gopubmed-concept.properties # /properties/metal-gopubmed-concept.properties 6 | timeout: 1 7 | limit: 500 8 | -------------------------------------------------------------------------------- /src/main/resources/bioasq/concept/retrieval/gopubmed.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.bioasq.concept.retrieval.GoPubMedConceptRetrievalExecutor 2 | 3 | pages: 1 4 | hits: 100 5 | conf: /properties/direct-gopubmed-concept.properties # /properties/metal-gopubmed-concept.properties 6 | timeout: 1 # 4 7 | limit: 500 8 | -------------------------------------------------------------------------------- /src/main/resources/bioasq/document/retrieval/dixu-proprietary.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.bioasq.document.retrieval.DiXuProprietaryDocumentRetrievalClient 2 | 3 | host: 128.2.190.25 4 | port: 10008 5 | uri-prefix: http://www.ncbi.nlm.nih.gov/pubmed/ 6 | -------------------------------------------------------------------------------- /src/main/resources/bioasq/document/retrieval/gopubmed.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.bioasq.document.retrieval.GoPubMedDocumentRetrievalExecutor 2 | 3 | pages: 4 4 | hits: 100 5 | conf: /properties/direct-gopubmed-document.properties # /properties/metal-gopubmed-document.properties -------------------------------------------------------------------------------- /src/main/resources/bioasq/eval/calculator/answer-eval-calculator.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.bioasq.eval.calculator.AnswerEvalCalculator 2 | 3 | # No parameter needed 4 | -------------------------------------------------------------------------------- /src/main/resources/bioasq/gs/bioasq-qa-gs-decorator.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.collection.json.json-gs-decorator 2 | 3 | file: input/4b-dev.json.auto.fulltext 4 | #file: input/3b-dev.json.fixed 5 | #file: input/4b-dev.json.auto.abstract -------------------------------------------------------------------------------- /src/main/resources/bioasq/passage/pmc-content.yaml.template: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.bioasq.passage.PmcContentSetter 2 | 3 | url-format: # ADD THE FORMATABLE URL HERE, E.G. http://localhost:8080/pmc/%s 4 | -------------------------------------------------------------------------------- /src/main/resources/bioasq/passage/retrieval/document-to-passage.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.passage.retrieval.document-to-passage 2 | 3 | include-title-abstract: true 4 | include-sections: false 5 | only-for-documents-missing-sections: false 6 | -------------------------------------------------------------------------------- /src/main/resources/bioasq/phase-a-test.yaml: -------------------------------------------------------------------------------- 1 | # execute 2 | # mvn exec:exec -Dconfig=bioasq.phase-a-test 3 | # to test the pipeline 4 | 5 | configuration: 6 | name: phase-a-test 7 | author: ziy 8 | 9 | persistence-provider: 10 | inherit: baseqa.persistence.local-sqlite-persistence-provider 11 | 12 | collection-reader: 13 | inherit: baseqa.collection.json.json-collection-reader 14 | dataset: BIOASQ-QA 15 | file: 16 | # - input/4b-1-a.json 17 | # - input/4b-2-a.json 18 | # - input/4b-3-a.json 19 | # - input/4b-4-a.json 20 | - input/4b-5-a.json 21 | persistence-provider: | 22 | inherit: baseqa.persistence.local-sqlite-persistence-provider 23 | 24 | pipeline: 25 | - inherit: ecd.phase 26 | name: question-parse 27 | options: | 28 | - inherit: bioqa.question.parse.clearnlp-bioinformatics 29 | 30 | - inherit: ecd.phase 31 | name: question-concept-metamap 32 | options: | 33 | - inherit: bioqa.question.concept.metamap-cached 34 | 35 | - inherit: ecd.phase 36 | name: question-concept-tmtool 37 | options: | 38 | - inherit: bioqa.question.concept.tmtool-cached 39 | 40 | - inherit: ecd.phase 41 | name: question-concept-lingpipe-genia 42 | options: | 43 | - inherit: bioqa.question.concept.lingpipe-genia 44 | 45 | - inherit: ecd.phase 46 | name: concept-search-uts 47 | options: | 48 | - inherit: bioqa.evidence.concept.search-uts-cached 49 | 50 | - inherit: ecd.phase 51 | name: concept-merge 52 | options: | 53 | - inherit: baseqa.evidence.concept.merge 54 | 55 | - inherit: ecd.phase 56 | name: abstract-query-primary 57 | options: | 58 | - inherit: baseqa.abstract_query.token-concept 59 | 60 | # concept 61 | - inherit: ecd.phase 62 | name: concept-retrieval 63 | options: | 64 | - inherit: bioqa.concept.retrieval.lucene-bioconcept 65 | 66 | - inherit: ecd.phase 67 | name: concept-rerank 68 | options: | 69 | - inherit: bioqa.concept.rerank.liblinear-predict 70 | 71 | # document 72 | - inherit: ecd.phase 73 | name: document-retrieval 74 | options: | 75 | - inherit: bioqa.document.retrieval.lucene-medline 76 | 77 | - inherit: ecd.phase 78 | name: document-rerank 79 | options: | 80 | - inherit: bioqa.document.rerank.liblinear-predict 81 | 82 | # snippet 83 | - inherit: ecd.phase 84 | name: passage-retrieval 85 | options: | 86 | - inherit: bioasq.passage.retrieval.document-to-passage 87 | 88 | - inherit: ecd.phase 89 | name: passage-rerank 90 | options: | 91 | - inherit: bioqa.passage.rerank.liblinear-predict 92 | - inherit: base.noop 93 | 94 | post-process: 95 | # submission 96 | - inherit: bioasq.collection.json.json-cas-consumer 97 | -------------------------------------------------------------------------------- /src/main/resources/bioasq/phase-a-train-snippet.yaml: -------------------------------------------------------------------------------- 1 | # execute 2 | # mvn exec:exec -Dconfig=bioasq.phase-a-train-snippet 3 | # to test the pipeline 4 | 5 | configuration: 6 | name: phase-a-train-snippet 7 | author: ziy 8 | 9 | persistence-provider: 10 | inherit: baseqa.persistence.local-sqlite-persistence-provider 11 | 12 | collection-reader: 13 | inherit: baseqa.collection.json.json-collection-reader 14 | dataset: BIOASQ-QA 15 | file: 16 | - input/dryrun-a.json 17 | - input/1b-1-a.json 18 | - input/1b-2-a.json 19 | - input/1b-3-a.json 20 | - input/2b-1-a.json 21 | - input/2b-2-a.json 22 | - input/2b-3-a.json 23 | - input/2b-4-a.json 24 | - input/2b-5-a.json 25 | - input/3b-1-a.json 26 | - input/3b-2-a.json 27 | - input/3b-3-a.json 28 | - input/3b-4-a.json 29 | # - input/3b-5-a.json 30 | decorators: | 31 | - inherit: bioasq.gs.bioasq-qa-gs-decorator 32 | persistence-provider: | 33 | inherit: baseqa.persistence.local-sqlite-persistence-provider 34 | 35 | pipeline: 36 | - inherit: ecd.phase 37 | name: question-parse 38 | options: | 39 | - inherit: bioqa.question.parse.clearnlp-bioinformatics 40 | 41 | - inherit: ecd.phase 42 | name: question-concept-metamap 43 | options: | 44 | - inherit: bioqa.question.concept.metamap-cached 45 | 46 | - inherit: ecd.phase 47 | name: question-concept-tmtool 48 | options: | 49 | - inherit: bioqa.question.concept.tmtool-cached 50 | 51 | - inherit: ecd.phase 52 | name: question-concept-lingpipe-genia 53 | options: | 54 | - inherit: bioqa.question.concept.lingpipe-genia 55 | 56 | - inherit: ecd.phase 57 | name: concept-search-uts 58 | options: | 59 | - inherit: bioqa.evidence.concept.search-uts-cached 60 | 61 | - inherit: ecd.phase 62 | name: concept-merge 63 | options: | 64 | - inherit: baseqa.evidence.concept.merge 65 | 66 | - inherit: ecd.phase 67 | name: abstract-query-primary 68 | options: | 69 | - inherit: baseqa.abstract_query.token-concept 70 | 71 | # document 72 | - inherit: ecd.phase 73 | name: document-retrieval 74 | options: | 75 | - inherit: bioqa.document.retrieval.lucene-medline 76 | 77 | - inherit: ecd.phase 78 | name: document-rerank 79 | options: | 80 | - inherit: bioqa.document.rerank.cv-load-liblinear 81 | 82 | # snippet 83 | - inherit: ecd.phase 84 | name: passage-retrieval 85 | options: | 86 | - inherit: bioasq.passage.retrieval.document-to-passage 87 | 88 | - inherit: ecd.phase 89 | name: passage-rerank-train-liblinear 90 | options: | 91 | - inherit: bioqa.passage.rerank.liblinear-train 92 | -------------------------------------------------------------------------------- /src/main/resources/bioasq/phase-b-test-yesno.yaml: -------------------------------------------------------------------------------- 1 | # execute 2 | # mvn exec:exec -Dconfig=bioasq.phase-b-test-yesno 3 | # to test the pipeline 4 | 5 | configuration: 6 | name: phase-b-test-yesno 7 | author: ziy 8 | 9 | persistence-provider: 10 | inherit: baseqa.persistence.local-sqlite-persistence-provider 11 | 12 | collection-reader: 13 | inherit: baseqa.collection.json.json-collection-reader 14 | dataset: BIOASQ-QA 15 | file: 16 | - input/4b-5-b.json 17 | # - input/4b-all-b-no.json 18 | type: [yesno] 19 | persistence-provider: | 20 | inherit: baseqa.persistence.local-sqlite-persistence-provider 21 | 22 | pipeline: 23 | - inherit: ecd.phase 24 | name: question-parse 25 | options: | 26 | - inherit: bioqa.question.parse.clearnlp-bioinformatics 27 | 28 | - inherit: ecd.phase 29 | name: question-concept-metamap 30 | options: | 31 | - inherit: bioqa.question.concept.metamap-cached 32 | 33 | - inherit: ecd.phase 34 | name: question-concept-tmtool 35 | options: | 36 | - inherit: bioqa.question.concept.tmtool-cached 37 | 38 | - inherit: ecd.phase 39 | name: question-concept-lingpipe-genia 40 | options: | 41 | - inherit: bioqa.question.concept.lingpipe-genia 42 | 43 | - inherit: ecd.phase 44 | name: passage-to-view 45 | options: | 46 | - inherit: baseqa.evidence.passage-to-view 47 | 48 | - inherit: ecd.phase 49 | name: evidence-parse 50 | options: | 51 | - inherit: bioqa.evidence.parse.clearnlp-bioinformatics 52 | 53 | - inherit: ecd.phase 54 | name: evidence-concept-metamap 55 | options: | 56 | - inherit: bioqa.evidence.concept.metamap-cached 57 | 58 | - inherit: ecd.phase 59 | name: evidence-concept-tmtool 60 | options: | 61 | - inherit: bioqa.evidence.concept.tmtool-cached 62 | 63 | - inherit: ecd.phase 64 | name: evidence-concept-lingpipe-genia 65 | options: | 66 | - inherit: bioqa.evidence.concept.lingpipe-genia 67 | 68 | - inherit: ecd.phase 69 | name: evidence-concept-frequent-phrase 70 | options: | 71 | - inherit: baseqa.evidence.concept.frequent-phrase 72 | 73 | - inherit: ecd.phase 74 | name: concept-search-uts 75 | options: | 76 | - inherit: bioqa.evidence.concept.search-uts-cached 77 | 78 | - inherit: ecd.phase 79 | name: concept-merge 80 | options: | 81 | - inherit: baseqa.evidence.concept.merge 82 | 83 | - inherit: ecd.phase 84 | name: answer-yesno 85 | options: | 86 | - inherit: bioqa.answer.yesno.weka-cvr-predict 87 | # - inherit: baseqa.answer.yesno.all-yes 88 | 89 | # - inherit: baseqa.cas-serialize 90 | 91 | post-process: 92 | # submission 93 | - inherit: bioasq.collection.json.json-cas-consumer 94 | -------------------------------------------------------------------------------- /src/main/resources/bioasq/phase-b-train-answer-type.yaml: -------------------------------------------------------------------------------- 1 | # execute 2 | # mvn exec:exec -Dconfig=bioasq.phase-b-train-answer-type 3 | # to test the pipeline 4 | 5 | configuration: 6 | name: phase-b-train-answer-type 7 | author: ziy 8 | 9 | persistence-provider: 10 | inherit: baseqa.persistence.local-sqlite-persistence-provider 11 | 12 | collection-reader: 13 | inherit: baseqa.collection.json.json-collection-reader 14 | dataset: BIOASQ-QA 15 | file: 16 | - input/dryrun-a.json 17 | - input/1b-1-a.json 18 | - input/1b-2-a.json 19 | - input/1b-3-a.json 20 | - input/2b-1-a.json 21 | - input/2b-2-a.json 22 | - input/2b-3-a.json 23 | - input/2b-4-a.json 24 | - input/2b-5-a.json 25 | - input/3b-1-a.json 26 | - input/3b-2-a.json 27 | - input/3b-3-a.json 28 | - input/3b-4-a.json 29 | # - input/3b-5-a.json 30 | type: [factoid, list] 31 | decorators: | 32 | - inherit: bioasq.gs.bioasq-qa-gs-decorator 33 | persistence-provider: | 34 | inherit: baseqa.persistence.local-sqlite-persistence-provider 35 | 36 | pipeline: 37 | - inherit: ecd.phase 38 | name: question-parse 39 | options: | 40 | - inherit: bioqa.question.parse.clearnlp-bioinformatics 41 | 42 | - inherit: ecd.phase 43 | name: question-concept-metamap 44 | options: | 45 | - inherit: bioqa.question.concept.metamap-cached 46 | 47 | - inherit: ecd.phase 48 | name: question-concept-tmtool 49 | options: | 50 | - inherit: bioqa.question.concept.tmtool-cached 51 | 52 | - inherit: ecd.phase 53 | name: question-concept-lingpipe-genia 54 | options: | 55 | - inherit: bioqa.question.concept.lingpipe-genia 56 | 57 | - inherit: ecd.phase 58 | name: question-focus 59 | options: | 60 | - inherit: baseqa.question.focus 61 | 62 | - inherit: ecd.phase 63 | name: concept-search-uts 64 | options: | 65 | - inherit: bioqa.evidence.concept.search-uts-cached 66 | 67 | - inherit: ecd.phase 68 | name: concept-merge 69 | options: | 70 | - inherit: baseqa.evidence.concept.merge 71 | 72 | - inherit: ecd.phase 73 | name: answer-type 74 | options: | 75 | - inherit: bioqa.answer_type.liblinear-train 76 | - inherit: bioqa.answer_type.liblinear-null-train 77 | # - inherit: bioqa.answer_type.train-weka-reptree 78 | # - inherit: bioqa.answer_type.train-weka-reptree-null 79 | # - inherit: bioqa.answer_type.train-weka-knn 80 | # - inherit: bioqa.answer_type.train-weka-knn-null 81 | # - inherit: bioqa.answer_type.train-meka-pcc 82 | # - inherit: bioqa.answer_type.train-meka-pcc-null 83 | -------------------------------------------------------------------------------- /src/main/resources/bioasq/preprocess-answer-type-gslabel.yaml: -------------------------------------------------------------------------------- 1 | # execute 2 | # mvn exec:exec -Dconfig=bioasq.preprocess-answer-type-gslabel 3 | # to test the pipeline 4 | 5 | configuration: 6 | name: preprocess-answer-type-gslabel 7 | author: ziy 8 | 9 | persistence-provider: 10 | inherit: baseqa.persistence.local-sqlite-persistence-provider 11 | 12 | collection-reader: 13 | inherit: baseqa.collection.json.json-collection-reader 14 | dataset: BIOASQ-QA 15 | file: 16 | - input/dryrun-b.json 17 | - input/1b-1-b.json 18 | - input/1b-2-b.json 19 | - input/1b-3-b.json 20 | - input/2b-1-b.json 21 | - input/2b-2-b.json 22 | - input/2b-3-b.json 23 | - input/2b-4-b.json 24 | - input/2b-5-b.json 25 | - input/3b-1-b.json 26 | - input/3b-2-b.json 27 | - input/3b-3-b.json 28 | - input/3b-4-b.json 29 | - input/3b-5-b.json 30 | type: [factoid, list] 31 | decorators: | 32 | - inherit: bioasq.gs.bioasq-qa-gs-decorator 33 | persistence-provider: | 34 | inherit: baseqa.persistence.local-sqlite-persistence-provider 35 | 36 | pipeline: 37 | - inherit: ecd.phase 38 | name: question-parse 39 | options: | 40 | - inherit: bioqa.question.parse.clearnlp-bioinformatics 41 | 42 | - inherit: ecd.phase 43 | name: answer-type-gslabel-uts 44 | options: | 45 | - inherit: bioqa.answer_type.gslabel-uts 46 | 47 | - inherit: ecd.phase 48 | name: answer-type-gslabel-tmtool 49 | option-timeout: 30 50 | options: | 51 | - inherit: bioqa.answer_type.gslabel-tmtool -------------------------------------------------------------------------------- /src/main/resources/bioasq/preprocess-kb-cache.yaml: -------------------------------------------------------------------------------- 1 | # execute 2 | # mvn exec:exec -Dconfig=bioasq.preprocess-kb-cache 3 | # to test the pipeline 4 | 5 | configuration: 6 | name: preprocess-kb-cache 7 | author: ziy 8 | 9 | persistence-provider: 10 | inherit: baseqa.persistence.local-sqlite-persistence-provider 11 | 12 | collection-reader: 13 | inherit: baseqa.collection.json.json-collection-reader 14 | dataset: BIOASQ-QA 15 | file: 16 | # - input/one-question.json 17 | - input/dryrun-b.json 18 | - input/1b-1-b.json 19 | - input/1b-2-b.json 20 | - input/1b-3-b.json 21 | - input/2b-1-b.json 22 | - input/2b-2-b.json 23 | - input/2b-3-b.json 24 | - input/2b-4-b.json 25 | - input/2b-5-b.json 26 | - input/3b-1-b.json 27 | - input/3b-2-b.json 28 | - input/3b-3-b.json 29 | - input/3b-4-b.json 30 | - input/3b-5-b.json 31 | - input/4b-1-b.json 32 | - input/4b-2-b.json 33 | - input/4b-3-b.json 34 | - input/4b-4-b.json 35 | - input/4b-5-b.json 36 | #- input/4b-dev.json.auto.abstract 37 | decorators: | 38 | - inherit: bioasq.gs.bioasq-qa-gs-decorator 39 | persistence-provider: | 40 | inherit: baseqa.persistence.local-sqlite-persistence-provider 41 | 42 | pipeline: 43 | - inherit: ecd.phase 44 | name: question-metamap-cache 45 | option-timeout: 30 46 | options: | 47 | - inherit: bioqa.preprocess.question-kb-cache 48 | 49 | - inherit: ecd.phase 50 | name: passage-metamap-cache 51 | option-timeout: 30 52 | options: | 53 | - inherit: bioqa.preprocess.passage-kb-cache 54 | -------------------------------------------------------------------------------- /src/main/resources/bioasq/triple/retrieval/gopubmed.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.bioasq.triple.retrieval.GoPubMedTripleRetrievalExecutor 2 | 3 | pages: 1 4 | hits: 10 5 | conf: /properties/direct-gopubmed-triple.properties # /properties/metal-gopubmed-triple.properties 6 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/abstract_query/token-concept.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.abstract_query.token-concept 2 | 3 | stoplist-path: /dictionaries/stoplist-gene-top5k.txt 4 | use-weight: false 5 | use-type: false 6 | #pos-tags-path: /dictionaries/pos-tags.txt 7 | #noun-tags-path: /dictionaries/noun-tags.txt -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/cao/predict-crf.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.cao.predict 2 | 3 | transducer: 'inherit: bioqa.answer.cao.transducer-crf' 4 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/cao/predict-hmm.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.cao.predict 2 | 3 | transducer: 'inherit: bioqa.answer.cao.transducer-hmm' 4 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/cao/train-crf.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.cao.train 2 | 3 | transducer: 'inherit: bioqa.answer.cao.transducer-crf' 4 | sample-o2o: 1.0 5 | window-size: 20 6 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/cao/train-hmm.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.cao.train 2 | 3 | transducer: 'inherit: bioqa.answer.cao.transducer-hmm' 4 | sample-o2o: 1.0 5 | window-size: 50 6 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/cao/transducer-crf.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.transducers.crf 2 | 3 | model-file: src/main/resources/models/bioqa/answer/cao/3b-dev-crf-model -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/cao/transducer-hmm.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.transducers.hmm 2 | 3 | model-file: src/main/resources/models/bioqa/answer/cao/3b-dev-hmm-model -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/collective_score/liblinear-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.collective_score.classifier-predict 2 | 3 | classifier: 'inherit: bioqa.answer.collective_score.liblinear' 4 | feature-file: result/answer-collective-score-predict-liblinear.tsv -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/collective_score/liblinear-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.collective_score.classifier-train 2 | 3 | classifier: 'inherit: bioqa.answer.collective_score.liblinear' -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/collective_score/liblinear.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.liblinear 2 | 3 | feat-index-file: src/main/resources/models/bioqa/answer/collective_score/4b-dev-liblinear.findex 4 | label-index-file: src/main/resources/models/bioqa/answer/collective_score/4b-dev-liblinear.lindex 5 | model-file: src/main/resources/models/bioqa/answer/collective_score/4b-dev-liblinear.model 6 | solver-type: L1R_LR # L2R_LR, L2R_L1LOSS_SVC_DUAL, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVC, L2R_L2LOSS_SVC_DUAL, L1R_L2LOSS_SVC 7 | balance-weight: false 8 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/collective_score/weka-cvr-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.collective_score.classifier-predict 2 | 3 | classifier: 'inherit: bioqa.answer.collective_score.weka-cvr' 4 | feature-file: result/answer-collective-score-predict-weka-cvr.tsv -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/collective_score/weka-cvr-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.collective_score.classifier-train 2 | 3 | classifier: 'inherit: bioqa.answer.collective_score.weka-cvr' 4 | resample-type: DOWN -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/collective_score/weka-cvr.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.weka-cvr 2 | 3 | model-file: src/main/resources/models/bioqa/answer/collective_score/4b-dev-weka-cvr.model 4 | dataset-schema-file: src/main/resources/models/bioqa/answer/collective_score/4b-dev-weka-cvr.dataset-schema 5 | balance-weight: true 6 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/generate/generate.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.generate.generate 2 | 3 | generators: | 4 | - inherit: baseqa.answer.generate.generators.choice 5 | - inherit: baseqa.answer.generate.generators.quantity 6 | - inherit: bioqa.answer.generate.generators.concept 7 | - inherit: baseqa.answer.generate.generators.cav-covering-concept 8 | # - inherit: baseqa.answer.generate.generators.covering-phrase 9 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/generate/generators/concept.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.generate.generators.concept 2 | 3 | filter-question-concepts: false 4 | filter-question-tokens: false 5 | stoplist: null # /dictionaries/stoplist-gene-top5k.txt # stoplist-gene-modified.txt, stoplist-top5k.txt -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/score/classifier-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.score.classifier-predict 2 | 3 | scorers: | 4 | - inherit: baseqa.answer.score.scorers.type-coercion 5 | - inherit: baseqa.answer.score.scorers.cao-count 6 | - inherit: baseqa.answer.score.scorers.name-count 7 | - inherit: baseqa.answer.score.scorers.avg-covered-token-count 8 | - inherit: bioqa.answer.score.scorers.stopword-count 9 | - inherit: baseqa.answer.score.scorers.token-overlap-count 10 | - inherit: baseqa.answer.score.scorers.concept-overlap-count 11 | - inherit: bioqa.answer.score.scorers.token-proximity 12 | - inherit: bioqa.answer.score.scorers.concept-proximity 13 | - inherit: baseqa.answer.score.scorers.focus-overlap-count 14 | - inherit: bioqa.answer.score.scorers.parsehead-proximity 15 | - inherit: baseqa.answer.score.scorers.answer-type 16 | - inherit: baseqa.answer.score.scorers.focus 17 | - inherit: baseqa.answer.score.scorers.concept-type -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/score/classifier-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.score.classifier-train 2 | 3 | scorers: | 4 | - inherit: baseqa.answer.score.scorers.type-coercion 5 | - inherit: baseqa.answer.score.scorers.cao-count 6 | - inherit: baseqa.answer.score.scorers.name-count 7 | - inherit: baseqa.answer.score.scorers.avg-covered-token-count 8 | - inherit: bioqa.answer.score.scorers.stopword-count 9 | - inherit: baseqa.answer.score.scorers.token-overlap-count 10 | - inherit: baseqa.answer.score.scorers.concept-overlap-count 11 | - inherit: bioqa.answer.score.scorers.token-proximity 12 | - inherit: bioqa.answer.score.scorers.concept-proximity 13 | - inherit: baseqa.answer.score.scorers.focus-overlap-count 14 | - inherit: bioqa.answer.score.scorers.parsehead-proximity 15 | - inherit: baseqa.answer.score.scorers.answer-type 16 | - inherit: baseqa.answer.score.scorers.focus 17 | - inherit: baseqa.answer.score.scorers.concept-type -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/score/cv-load-liblinear.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.score.cv-load 2 | 3 | cv-predict-file: /models/bioqa/answer/score/4b-dev-liblinear.cv 4 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/score/liblinear-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.answer.score.classifier-predict 2 | 3 | classifier: 'inherit: bioqa.answer.score.liblinear' 4 | feature-file: result/answer-score-predict-liblinear.tsv -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/score/liblinear-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.answer.score.classifier-train 2 | 3 | classifier: 'inherit: bioqa.answer.score.liblinear' 4 | cv-predict-file: src/main/resources/models/bioqa/answer/score/4b-dev-liblinear.cv 5 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/score/liblinear.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.liblinear 2 | 3 | feat-index-file: src/main/resources/models/bioqa/answer/score/4b-dev-liblinear.findex 4 | label-index-file: src/main/resources/models/bioqa/answer/score/4b-dev-liblinear.lindex 5 | model-file: src/main/resources/models/bioqa/answer/score/4b-dev-liblinear.model 6 | solver-type: L1R_LR # L2R_LR, L2R_L1LOSS_SVC_DUAL, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVC, L2R_L2LOSS_SVC_DUAL, L1R_L2LOSS_SVC 7 | balance-weight: true 8 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/score/scorers/concept-proximity.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.score.scorers.concept-proximity 2 | 3 | stoplist: /dictionaries/stoplist-gene-top5k.txt -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/score/scorers/parsehead-proximity.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.score.scorers.parsehead-proximity 2 | 3 | stoplist: /dictionaries/stoplist-gene-top5k.txt -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/score/scorers/stopword-count.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.score.scorers.stopword-count 2 | 3 | stoplist: /dictionaries/stoplist-gene-top5k.txt -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/score/scorers/token-proximity.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.score.scorers.token-proximity 2 | 3 | stoplist: /dictionaries/stoplist-gene-top5k.txt -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/score/weka-cvr-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.answer.score.classifier-predict 2 | 3 | classifier: 'inherit: bioqa.answer.score.weka-cvr' 4 | feature-file: result/answer-score-predict-weka-cvr.tsv -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/score/weka-cvr-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.answer.score.classifier-train 2 | 3 | classifier: 'inherit: bioqa.answer.score.weka-cvr' 4 | cv-predict-file: src/main/resources/models/bioqa/answer/score/4b-dev-weka-cvr.cv 5 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/score/weka-cvr.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.weka-cvr 2 | 3 | model-file: src/main/resources/models/bioqa/answer/score/4b-dev-weka-cvr.model 4 | dataset-schema-file: src/main/resources/models/bioqa/answer/score/4b-dev-weka-cvr.dataset-schema 5 | balance-weight: true 6 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/yesno/liblinear-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.answer.yesno.predict 2 | 3 | classifier: 'inherit: bioqa.answer.yesno.liblinear' 4 | feature-file: result/answer-yesno-predict-liblinear.tsv -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/yesno/liblinear-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.answer.yesno.train 2 | 3 | classifier: 'inherit: bioqa.answer.yesno.liblinear' 4 | cv-predict-file: src/main/resources/models/bioqa/answer/yesno/4b-dev-liblinear.cv -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/yesno/liblinear.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.liblinear 2 | 3 | feat-index-file: src/main/resources/models/bioqa/answer/yesno/4b-dev-liblinear.findex 4 | label-index-file: src/main/resources/models/bioqa/answer/yesno/4b-dev-liblinear.lindex 5 | model-file: src/main/resources/models/bioqa/answer/yesno/4b-dev-liblinear.model 6 | solver-type: L1R_LR # L2R_LR, L2R_L1LOSS_SVC_DUAL, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVC, L2R_L2LOSS_SVC_DUAL, L1R_L2LOSS_SVC 7 | balance-weight: true 8 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/yesno/predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.yesno.predict 2 | 3 | scorers: | 4 | - inherit: baseqa.answer.yesno.scorers.concept-overlap 5 | - inherit: bioqa.answer.yesno.scorers.token-overlap 6 | - inherit: baseqa.answer.yesno.scorers.expected-answer-overlap 7 | - inherit: baseqa.answer.yesno.scorers.sentiment 8 | - inherit: baseqa.answer.yesno.scorers.negation 9 | - inherit: bioqa.answer.yesno.scorers.alternate-answer 10 | # requires "classifier" -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/yesno/scorers/alternate-answer.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.yesno.scorers.alternate-answer 2 | 3 | factoid-pipeline: | 4 | - inherit: baseqa.question.yesno.yesno-to-factoid 5 | - inherit: bioqa.answer.generate.generate 6 | - inherit: baseqa.answer.modify.modify 7 | - inherit: bioqa.answer.score.liblinear-predict -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/yesno/scorers/token-overlap.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.yesno.scorers.token-overlap 2 | 3 | stoplist-path: /dictionaries/stoplist-gene-top5k.txt -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/yesno/train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer.yesno.train 2 | 3 | scorers: | 4 | - inherit: baseqa.answer.yesno.scorers.concept-overlap 5 | - inherit: bioqa.answer.yesno.scorers.token-overlap 6 | - inherit: baseqa.answer.yesno.scorers.expected-answer-overlap 7 | - inherit: baseqa.answer.yesno.scorers.sentiment 8 | - inherit: baseqa.answer.yesno.scorers.negation 9 | - inherit: bioqa.answer.yesno.scorers.alternate-answer 10 | # requires "classifier", "cv-predict-file" -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/yesno/weka-cvr-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.answer.yesno.predict 2 | 3 | classifier: 'inherit: bioqa.answer.yesno.weka-cvr' 4 | feature-file: result/answer-yesno-predict-weka-cvr.tsv -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/yesno/weka-cvr-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.answer.yesno.train 2 | 3 | classifier: 'inherit: bioqa.answer.yesno.weka-cvr' 4 | cv-predict-file: src/main/resources/models/bioqa/answer/yesno/4b-dev-weka-cvr.cv -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/yesno/weka-cvr.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.weka-cvr 2 | 3 | model-file: src/main/resources/models/bioqa/answer/yesno/4b-dev-weka-cvr.model 4 | dataset-schema-file: src/main/resources/models/bioqa/answer/yesno/4b-dev-weka-cvr.dataset-schema 5 | dataset-export: src/main/resources/models/bioqa/answer/yesno/4b-dev-weka.arff 6 | balance-weight: true 7 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/yesno/weka-logistic-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.answer.yesno.predict 2 | 3 | classifier: 'inherit: bioqa.answer.yesno.weka-logistic' 4 | feature-file: result/answer-yesno-predict-weka-logistic.tsv -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/yesno/weka-logistic-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.answer.yesno.train 2 | 3 | classifier: 'inherit: bioqa.answer.yesno.weka-logistic' 4 | cv-predict-file: src/main/resources/models/bioqa/answer/yesno/4b-dev-weka-logistic.cv 5 | resample-type: UP -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer/yesno/weka-logistic.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.weka-logistic 2 | 3 | model-file: src/main/resources/models/bioqa/answer/yesno/4b-dev-weka-logistic.model 4 | dataset-schema-file: src/main/resources/models/bioqa/answer/yesno/4b-dev-weka-logistic.dataset-schema 5 | dataset-export: src/main/resources/models/bioqa/answer/yesno/4b-dev-weka.arff 6 | balance-weight: true 7 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/gslabel-tmtool.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer_type.gslabel-concept 2 | 3 | at-gslabel-file: src/main/resources/models/bioqa/answer_type/4b-dev-gslabel-tmtool.json 4 | concept-provider: 'inherit: bioqa.providers.kb.tmtool-cached' 5 | batch-size: 100 -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/gslabel-uts.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer_type.gslabel-concept-search 2 | 3 | at-gslabel-file: src/main/resources/models/bioqa/answer_type/4b-dev-gslabel-uts.json 4 | concept-search-provider: 'inherit: bioqa.providers.kb.concept-search-uts-cached' 5 | batch-size: 1 -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/liblinear-null-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer_type.predict 2 | 3 | classifier: 'inherit: bioqa.answer_type.liblinear-null' 4 | predict-file: result/answer-type-predict-liblinear-null.tsv 5 | limit: 5 -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/liblinear-null-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer_type.train 2 | 3 | classifier: 'inherit: bioqa.answer_type.liblinear-null' 4 | cv-predict-file: src/main/resources/models/bioqa/answer_type/4b-dev-liblinear-null.cv 5 | at-gslabel-files: 6 | - /models/bioqa/answer_type/4b-dev-gslabel-uts.json 7 | - /models/bioqa/answer_type/4b-dev-gslabel-tmtool.json 8 | null-type: true 9 | type-ratio-threshold: 0.5 -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/liblinear-null.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.liblinear 2 | 3 | feat-index-file: src/main/resources/models/bioqa/answer_type/4b-dev-liblinear-null.findex 4 | label-index-file: src/main/resources/models/bioqa/answer_type/4b-dev-liblinear-null.lindex 5 | model-file: src/main/resources/models/bioqa/answer_type/4b-dev-liblinear-null.model 6 | solver-type: L1R_LR # L2R_L1LOSS_SVC_DUAL, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVC, L2R_L2LOSS_SVC_DUAL, L1R_L2LOSS_SVC, L2R_LR 7 | balance-weight: false 8 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/liblinear-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer_type.predict 2 | 3 | classifier: 'inherit: bioqa.answer_type.liblinear' 4 | predict-file: result/answer-type-predict-liblinear.tsv 5 | 6 | cross-opts: 7 | limit: [5] -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/liblinear-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer_type.train 2 | 3 | classifier: 'inherit: bioqa.answer_type.liblinear' 4 | cv-predict-file: src/main/resources/models/bioqa/answer_type/4b-dev-liblinear.cv 5 | at-gslabel-files: 6 | - /models/bioqa/answer_type/4b-dev-gslabel-uts.json 7 | - /models/bioqa/answer_type/4b-dev-gslabel-tmtool.json 8 | null-type: false 9 | type-ratio-threshold: 0.5 -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/liblinear.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.liblinear 2 | 3 | feat-index-file: src/main/resources/models/bioqa/answer_type/4b-dev-liblinear.findex 4 | label-index-file: src/main/resources/models/bioqa/answer_type/4b-dev-liblinear.lindex 5 | model-file: src/main/resources/models/bioqa/answer_type/4b-dev-liblinear.model 6 | solver-type: L1R_LR # L2R_L1LOSS_SVC_DUAL, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVC, L2R_L2LOSS_SVC_DUAL, L1R_L2LOSS_SVC, L2R_LR 7 | balance-weight: false 8 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/load-cv-liblinear-null.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer_type.load-cv 2 | 3 | cv-predict-file: /models/bioqa/answer_type/4b-dev-liblinear-null.cv -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/load-cv-liblinear.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer_type.load-cv 2 | 3 | cv-predict-file: /models/bioqa/answer_type/4b-dev-liblinear.cv -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/meka-pcc-null-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer_type.predict 2 | 3 | classifier: 'inherit: bioqa.answer_type.meka-pcc-null' 4 | predict-file: result/answer-type-predict-meka-pcc-null.tsv 5 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/meka-pcc-null-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer_type.train 2 | 3 | classifier: 'inherit: bioqa.answer_type.meka-pcc-null' 4 | cv-predict-file: src/main/resources/models/bioqa/answer_type/4b-dev-meka-pcc-null.cv 5 | at-gslabel-files: 6 | - /models/bioqa/answer_type/4b-dev-gslabel-uts.json 7 | - /models/bioqa/answer_type/4b-dev-gslabel-tmtool.json 8 | null-type: true 9 | type-ratio-threshold: 0.5 -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/meka-pcc-null.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.meka-pcc 2 | 3 | model-file: src/main/resources/models/bioqa/answer_type/4b-dev-meka-pcc-null.model 4 | dataset-schema-file: src/main/resources/models/bioqa/answer_type/4b-dev-meka-pcc-null.dataset-schema -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/meka-pcc-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer_type.predict 2 | 3 | classifier: 'inherit: bioqa.answer_type.meka-pcc' 4 | predict-file: result/answer-type-predict-meka-pcc.tsv 5 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/meka-pcc-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer_type.train 2 | 3 | classifier: 'inherit: bioqa.answer_type.meka-pcc' 4 | cv-predict-file: src/main/resources/models/bioqa/answer_type/4b-dev-meka-pcc.cv 5 | at-gslabel-files: 6 | - /models/bioqa/answer_type/4b-dev-gslabel-uts.json 7 | - /models/bioqa/answer_type/4b-dev-gslabel-tmtool.json 8 | null-type: false 9 | type-ratio-threshold: 0.5 -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/meka-pcc.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.meka-pcc 2 | 3 | model-file: src/main/resources/models/bioqa/answer_type/4b-dev-meka-pcc.model 4 | dataset-schema-file: src/main/resources/models/bioqa/answer_type/4b-dev-meka-pcc.dataset-schema -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/weka-knn-null-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer_type.predict 2 | 3 | classifier: 'inherit: bioqa.answer_type.weka-knn-null' 4 | predict-file: result/answer-type-predict-weka-knn-null.tsv 5 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/weka-knn-null-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer_type.train 2 | 3 | classifier: 'inherit: bioqa.answer_type.weka-knn-null' 4 | cv-predict-file: src/main/resources/models/bioqa/answer_type/4b-dev-weka-knn-null.cv 5 | at-gslabel-files: 6 | - /models/bioqa/answer_type/4b-dev-gslabel-uts.json 7 | - /models/bioqa/answer_type/4b-dev-gslabel-tmtool.json 8 | null-type: true 9 | type-ratio-threshold: 0.5 -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/weka-knn-null.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.weka-knn 2 | 3 | model-file: src/main/resources/models/bioqa/answer_type/4b-dev-weka-knn-null.model 4 | dataset-schema-file: src/main/resources/models/bioqa/answer_type/4b-dev-weka-knn-null.dataset-schema -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/weka-knn-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer_type.predict 2 | 3 | classifier: 'inherit: bioqa.answer_type.weka-knn' 4 | predict-file: result/answer-type-predict-weka-knn.tsv 5 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/weka-knn-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer_type.train 2 | 3 | classifier: 'inherit: bioqa.answer_type.weka-knn' 4 | cv-predict-file: src/main/resources/models/bioqa/answer_type/4b-dev-weka-knn.cv 5 | at-gslabel-files: 6 | - /models/bioqa/answer_type/4b-dev-gslabel-uts.json 7 | - /models/bioqa/answer_type/4b-dev-gslabel-tmtool.json 8 | null-type: false 9 | type-ratio-threshold: 0.5 -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/weka-knn.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.weka-knn 2 | 3 | model-file: src/main/resources/models/bioqa/answer_type/4b-dev-weka-knn.model 4 | dataset-schema-file: src/main/resources/models/bioqa/answer_type/4b-dev-weka-knn.dataset-schema -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/weka-reptree-null-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer_type.predict 2 | 3 | classifier: 'inherit: bioqa.answer_type.weka-reptree-null' 4 | predict-file: result/answer-type-predict-weka-reptree-null.tsv 5 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/weka-reptree-null-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer_type.train 2 | 3 | classifier: 'inherit: bioqa.answer_type.weka-reptree-null' 4 | cv-predict-file: src/main/resources/models/bioqa/answer_type/4b-dev-weka-reptree-null.cv 5 | at-gslabel-files: 6 | - /models/bioqa/answer_type/4b-dev-gslabel-uts.json 7 | - /models/bioqa/answer_type/4b-dev-gslabel-tmtool.json 8 | null-type: true 9 | type-ratio-threshold: 0.5 -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/weka-reptree-null.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.weka-reptree 2 | 3 | model-file: src/main/resources/models/bioqa/answer_type/4b-dev-weka-reptree-null.model 4 | dataset-schema-file: src/main/resources/models/bioqa/answer_type/4b-dev-weka-reptree-null.dataset-schema -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/weka-reptree-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer_type.predict 2 | 3 | classifier: 'inherit: bioqa.answer_type.weka-reptree' 4 | predict-file: result/answer-type-predict-weka-reptree.tsv 5 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/weka-reptree-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.answer_type.train 2 | 3 | classifier: 'inherit: bioqa.answer_type.weka-reptree' 4 | cv-predict-file: src/main/resources/models/bioqa/answer_type/4b-dev-weka-reptree.cv 5 | at-gslabel-files: 6 | - /models/bioqa/answer_type/4b-dev-gslabel-uts.json 7 | - /models/bioqa/answer_type/4b-dev-gslabel-tmtool.json 8 | null-type: false 9 | type-ratio-threshold: 0.5 -------------------------------------------------------------------------------- /src/main/resources/bioqa/answer_type/weka-reptree.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.weka-reptree 2 | 3 | model-file: src/main/resources/models/bioqa/answer_type/4b-dev-weka-reptree.model 4 | dataset-schema-file: src/main/resources/models/bioqa/answer_type/4b-dev-weka-reptree.dataset-schema -------------------------------------------------------------------------------- /src/main/resources/bioqa/concept/.gitignore: -------------------------------------------------------------------------------- 1 | rerank/scorers/lucene-bioconcept.yaml 2 | retrieval/lucene-bioconcept.yaml -------------------------------------------------------------------------------- /src/main/resources/bioqa/concept/rerank/classifier-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.concept.rerank.classifier-predict 2 | 3 | scorers: | 4 | - inherit: baseqa.concept.rerank.scorers.original-score 5 | - inherit: baseqa.concept.rerank.scorers.name-match 6 | - inherit: bioqa.concept.rerank.scorers.lucene-bioconcept -------------------------------------------------------------------------------- /src/main/resources/bioqa/concept/rerank/classifier-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.concept.rerank.classifier-train 2 | 3 | scorers: | 4 | - inherit: baseqa.concept.rerank.scorers.original-score 5 | - inherit: baseqa.concept.rerank.scorers.name-match 6 | - inherit: bioqa.concept.rerank.scorers.lucene-bioconcept -------------------------------------------------------------------------------- /src/main/resources/bioqa/concept/rerank/liblinear-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.concept.rerank.classifier-predict 2 | 3 | classifier: 'inherit: bioqa.concept.rerank.liblinear' 4 | feature-file: result/concept-rerank-predict-liblinear.tsv -------------------------------------------------------------------------------- /src/main/resources/bioqa/concept/rerank/liblinear-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.concept.rerank.classifier-train 2 | 3 | classifier: 'inherit: bioqa.concept.rerank.liblinear' 4 | cv-predict-file: src/main/resources/models/bioqa/concept/rerank/4b-dev-liblinear.cv 5 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/concept/rerank/liblinear.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.liblinear 2 | 3 | feat-index-file: src/main/resources/models/bioqa/concept/rerank/4b-dev-liblinear.findex 4 | label-index-file: src/main/resources/models/bioqa/concept/rerank/4b-dev-liblinear.lindex 5 | model-file: src/main/resources/models/bioqa/concept/rerank/4b-dev-liblinear.model 6 | solver-type: L1R_LR # L2R_L1LOSS_SVC_DUAL, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVC, L2R_L2LOSS_SVC_DUAL, L1R_L2LOSS_SVC, L2R_LR 7 | balance-weight: true 8 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/concept/rerank/scorers/lucene-bioconcept.yaml.template: -------------------------------------------------------------------------------- 1 | inherit: baseqa.concept.rerank.scorers.lucene 2 | 3 | fields: [name, synonym, definition] 4 | index: # ADD INDEX PATH HERE, E.G. ../index/bioconcept-lucene 5 | id-field: id 6 | source-field: source 7 | uri-prefix: /dictionaries/bioconcept-uri-prefix.tsv 8 | stoplist-path: /dictionaries/stoplist-gene-top5k.txt -------------------------------------------------------------------------------- /src/main/resources/bioqa/concept/rerank/weka-logistic-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.concept.rerank.classifier-predict 2 | 3 | classifier: 'inherit: bioqa.concept.rerank.weka-logistic' 4 | feature-file: result/concept-rerank-predict-weka-logistic.tsv -------------------------------------------------------------------------------- /src/main/resources/bioqa/concept/rerank/weka-logistic-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.concept.rerank.classifier-train 2 | 3 | classifier: 'inherit: bioqa.concept.rerank.weka-logistic' 4 | cv-predict-file: src/main/resources/models/bioqa/concept/rerank/4b-dev-weka-logistic.cv 5 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/concept/rerank/weka-logistic.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.weka-logistic 2 | 3 | model-file: src/main/resources/models/bioqa/concept/rerank/4b-dev-weka-logistic.model 4 | dataset-schema-file: src/main/resources/models/bioqa/concept/rerank/4b-dev-weka-logistic.dataset-schema 5 | dataset-export: src/main/resources/models/bioqa/concept/rerank/4b-dev-weka.arff 6 | balance-weight: false 7 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/concept/retrieval/lucene-bioconcept.yaml.template: -------------------------------------------------------------------------------- 1 | inherit: baseqa.concept.retrieval.lucene 2 | 3 | fields: [name, synonym, definition] 4 | index: # ADD INDEX PATH HERE, E.G. ../index/bioconcept-lucene 5 | id-field: id 6 | name-field: name 7 | source-field: source 8 | uri-prefix: /dictionaries/bioconcept-uri-prefix.tsv -------------------------------------------------------------------------------- /src/main/resources/bioqa/document/.gitignore: -------------------------------------------------------------------------------- 1 | rerank/scorers/lucene-medline.yaml 2 | retrieval/lucene-medline.yaml -------------------------------------------------------------------------------- /src/main/resources/bioqa/document/rerank/classifier-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.document.rerank.classifier-predict 2 | 3 | scorers: | 4 | - inherit: baseqa.document.rerank.scorers.original-score 5 | - inherit: bioqa.document.rerank.scorers.lucene-medline -------------------------------------------------------------------------------- /src/main/resources/bioqa/document/rerank/classifier-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.document.rerank.classifier-train 2 | 3 | scorers: | 4 | - inherit: baseqa.document.rerank.scorers.original-score 5 | - inherit: bioqa.document.rerank.scorers.lucene-medline -------------------------------------------------------------------------------- /src/main/resources/bioqa/document/rerank/cv-load-liblinear.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.document.rerank.cv-load 2 | 3 | cv-predict-file: /models/bioqa/document/rerank/4b-dev-liblinear.cv -------------------------------------------------------------------------------- /src/main/resources/bioqa/document/rerank/liblinear-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.document.rerank.classifier-predict 2 | 3 | classifier: 'inherit: bioqa.document.rerank.liblinear' 4 | feature-file: result/document-rerank-predict-liblinear.tsv -------------------------------------------------------------------------------- /src/main/resources/bioqa/document/rerank/liblinear-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.document.rerank.classifier-train 2 | 3 | classifier: 'inherit: bioqa.document.rerank.liblinear' 4 | cv-predict-file: src/main/resources/models/bioqa/document/rerank/4b-dev-liblinear.cv 5 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/document/rerank/liblinear.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.liblinear 2 | 3 | feat-index-file: src/main/resources/models/bioqa/document/rerank/4b-dev-liblinear.findex 4 | label-index-file: src/main/resources/models/bioqa/document/rerank/4b-dev-liblinear.lindex 5 | model-file: src/main/resources/models/bioqa/document/rerank/4b-dev-liblinear.model 6 | solver-type: L1R_LR # L2R_L1LOSS_SVC_DUAL, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVC, L2R_L2LOSS_SVC_DUAL, L1R_L2LOSS_SVC, L2R_LR 7 | balance-weight: true 8 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/document/rerank/logreg-params.txt: -------------------------------------------------------------------------------- 1 | -3.5976 -0.0115 0.2500 0.2350 -------------------------------------------------------------------------------- /src/main/resources/bioqa/document/rerank/logreg-pubmed.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.document.rerank.logreg 2 | 3 | query-string-contructor: edu.cmu.lti.oaqa.bioqa.providers.query.PubMedQueryStringConstructor 4 | doc-logreg-params: /bioqa/document/rerank/logreg-params.txt 5 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/document/rerank/scorers/lucene-medline.yaml.template: -------------------------------------------------------------------------------- 1 | inherit: baseqa.document.rerank.scorers.lucene 2 | 3 | fields: [articleTitle, abstractText] 4 | index: # ADD IDNEX PATH HERE, E.G. ../index/medline16n-lucene 5 | id-field: pmid 6 | uri-prefix: http://www.ncbi.nlm.nih.gov/pubmed/ 7 | stoplist-path: /dictionaries/stoplist-gene-top5k.txt -------------------------------------------------------------------------------- /src/main/resources/bioqa/document/rerank/weka-logistic-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.document.rerank.classifier-predict 2 | 3 | classifier: 'inherit: bioqa.document.rerank.weka-logistic' 4 | feature-file: result/document-rerank-predict-weka-logistic.tsv -------------------------------------------------------------------------------- /src/main/resources/bioqa/document/rerank/weka-logistic-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.document.rerank.classifier-train 2 | 3 | classifier: 'inherit: bioqa.document.rerank.weka-logistic' 4 | cv-predict-file: src/main/resources/models/bioqa/document/rerank/4b-dev-weka-logistic.cv 5 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/document/rerank/weka-logistic.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.weka-logistic 2 | 3 | model-file: src/main/resources/models/bioqa/document/rerank/4b-dev-weka-logistic.model 4 | dataset-schema-file: src/main/resources/models/bioqa/document/rerank/4b-dev-weka-logistic.dataset-schema 5 | dataset-export: src/main/resources/models/bioqa/document/rerank/4b-dev-weka.arff 6 | balance-weight: false 7 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/document/retrieval/lucene-medline.yaml.template: -------------------------------------------------------------------------------- 1 | inherit: baseqa.document.retrieval.lucene 2 | 3 | fields: [articleTitle, abstractText] 4 | index: # ADD IDNEX PATH HERE, E.G. ../index/medline16n-lucene 5 | id-field: pmid 6 | title-field: articleTitle 7 | text-field: abstractText 8 | uri-prefix: http://www.ncbi.nlm.nih.gov/pubmed/ -------------------------------------------------------------------------------- /src/main/resources/bioqa/evidence/concept/lingpipe-genetag.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.evidence.concept.PassageConceptRecognizer 2 | 3 | concept-provider: 'inherit: bioqa.providers.kb.lingpipe-genetag' 4 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/bioqa/evidence/concept/lingpipe-genia.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.evidence.concept.PassageConceptRecognizer 2 | 3 | concept-provider: 'inherit: bioqa.providers.kb.lingpipe-genia' 4 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/bioqa/evidence/concept/metamap-cached.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.evidence.concept.PassageConceptRecognizer 2 | 3 | allowed-concept-types: /dictionaries/allowed-umls-types.txt 4 | concept-provider: 'inherit: bioqa.providers.kb.metamap-cached' 5 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/bioqa/evidence/concept/search-uts-cached.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.evidence.concept.search 2 | 3 | concept-search-provider: 'inherit: bioqa.providers.kb.concept-search-uts-cached' 4 | synonym-expansion-provider: 'inherit: bioqa.providers.kb.synonym-uts-cached' -------------------------------------------------------------------------------- /src/main/resources/bioqa/evidence/concept/search-uts.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.evidence.concept.search 2 | 3 | concept-search-provider: 'inherit: bioqa.providers.kb.concept-search-uts' 4 | synonym-expansion-provider: 'inherit: bioqa.providers.kb.synonym-uts' -------------------------------------------------------------------------------- /src/main/resources/bioqa/evidence/concept/tmtool-cached.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.evidence.concept.PassageConceptRecognizer 2 | 3 | concept-provider: 'inherit: bioqa.providers.kb.tmtool-cached' 4 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/bioqa/evidence/concept/tmtool.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.evidence.concept.PassageConceptRecognizer 2 | 3 | concept-provider: 'inherit: bioqa.providers.kb.tmtool' 4 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/bioqa/evidence/parse/clearnlp-bioinformatics.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.evidence.parse.PassageParser 2 | 3 | parser-provider: 'inherit: bioqa.providers.parser.clearnlp-bioinformatics' 4 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/bioqa/evidence/parse/clearnlp-medical.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.evidence.parse.PassageParser 2 | 3 | parser-provider: 'inherit: bioqa.providers.parser.clearnlp-medical' 4 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/bioqa/evidence/parse/lingpipe-indoeuro-genia.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.evidence.parse.PassageParser 2 | 3 | parser-provider: 'inherit: bioqa.providers.parser.lingpipe-indoeuro-genia' 4 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/bioqa/evidence/parse/lingpipe-indoeuro-medpost.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.evidence.parse.PassageParser 2 | 3 | parser-provider: 'inherit: bioqa.providers.parser.lingpipe-indoeuro-medpost' 4 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/bioqa/evidence/parse/lingpipe-regex-genia.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.evidence.parse.PassageParser 2 | 3 | parser-provider: 'inherit: bioqa.providers.parser.lingpipe-regex-genia' 4 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/bioqa/evidence/parse/lingpipe-regex-medpost.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.evidence.parse.PassageParser 2 | 3 | parser-provider: 'inherit: bioqa.providers.parser.lingpipe-regex-medpost' 4 | view-name-prefix: ptv -------------------------------------------------------------------------------- /src/main/resources/bioqa/passage/rerank/classifier-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.passage.rerank.classifier-predict 2 | 3 | scorers: | 4 | - inherit: baseqa.passage.rerank.scorers.original-score 5 | - inherit: baseqa.passage.rerank.scorers.meta-info 6 | - inherit: bioqa.passage.rerank.scorers.lucene 7 | # add "classifier", "feature-file" -------------------------------------------------------------------------------- /src/main/resources/bioqa/passage/rerank/classifier-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.passage.rerank.classifier-train 2 | 3 | scorers: | 4 | - inherit: baseqa.passage.rerank.scorers.original-score 5 | - inherit: baseqa.passage.rerank.scorers.meta-info 6 | - inherit: bioqa.passage.rerank.scorers.lucene 7 | resample-type: DOWN 8 | # add "classifier", "cv-predict-file" -------------------------------------------------------------------------------- /src/main/resources/bioqa/passage/rerank/liblinear-predict.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.passage.rerank.classifier-predict 2 | 3 | classifier: 'inherit: bioqa.passage.rerank.liblinear' 4 | feature-file: result/passage-rerank-predict-liblinear.tsv -------------------------------------------------------------------------------- /src/main/resources/bioqa/passage/rerank/liblinear-train.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.passage.rerank.classifier-train 2 | 3 | classifier: 'inherit: bioqa.passage.rerank.liblinear' 4 | cv-predict-file: src/main/resources/models/bioqa/passage/rerank/4b-dev-liblinear.cv -------------------------------------------------------------------------------- /src/main/resources/bioqa/passage/rerank/liblinear.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.providers.ml.classifiers.liblinear 2 | 3 | feat-index-file: src/main/resources/models/bioqa/passage/rerank/4b-dev-liblinear.findex 4 | label-index-file: src/main/resources/models/bioqa/passage/rerank/4b-dev-liblinear.lindex 5 | model-file: src/main/resources/models/bioqa/passage/rerank/4b-dev-liblinear.model 6 | solver-type: L1R_LR # L2R_L1LOSS_SVC_DUAL, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVC, L2R_L2LOSS_SVC_DUAL, L1R_L2LOSS_SVC, L2R_LR 7 | balance-weight: true 8 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/passage/rerank/scorers/lucene.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.passage.rerank.scorers.lucene 2 | 3 | stoplist-path: /dictionaries/stoplist-gene-top5k.txt -------------------------------------------------------------------------------- /src/main/resources/bioqa/passage/retrieval/lucene-sentence-medline-improve.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.passage.retrieval.lucene-sentence-improve 2 | 3 | sentence-model: com.aliasi.sentences.MedlineSentenceModel 4 | parser-provider: 'inherit: bioqa.providers.parser.clearnlp-bioinformatics' 5 | hits: 100 6 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/passage/retrieval/lucene-sentence-medline.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.passage.retrieval.lucene-sentence 2 | 3 | sentence-model: com.aliasi.sentences.MedlineSentenceModel 4 | parser-provider: 'inherit: bioqa.providers.parser.clearnlp-bioinformatics' 5 | hits: 100 6 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/preprocess/passage-kb-cache.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.preprocess.passage-concept-cache 2 | 3 | batch-size: 100 4 | concept-providers: | 5 | - inherit: bioqa.providers.kb.tmtool-cached 6 | - inherit: bioqa.providers.kb.metamap-cached 7 | synonym-expansion-providers: | 8 | - inherit: bioqa.providers.kb.synonym-uts-cached 9 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/preprocess/question-kb-cache.yaml: -------------------------------------------------------------------------------- 1 | inherit: baseqa.preprocess.question-concept-cache 2 | 3 | concept-providers: | 4 | - inherit: bioqa.providers.kb.tmtool-cached 5 | - inherit: bioqa.providers.kb.metamap-cached 6 | synonym-expansion-providers: | 7 | - inherit: bioqa.providers.kb.synonym-uts-cached 8 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/providers/kb/.gitignore: -------------------------------------------------------------------------------- 1 | concept-search-uts.yaml 2 | concept-search-uts-cached.yaml 3 | metamap.yaml 4 | metamap-cached.yaml 5 | synonym-uts.yaml 6 | synonym-uts-cached.yaml 7 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/providers/kb/concept-search-uts-cached.yaml.template: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.bioqa.providers.kb.CachedUtsConceptSearchProvider 2 | 3 | service: http://umlsks.nlm.nih.gov 4 | version: # ADD VERSION HERE, E.G. 2015AB 5 | username: # ADD USERNAME HERE 6 | password: # ADD PASSWORD HERE 7 | 8 | db-file: src/main/resources/concept-search-cache/uts-cache.mapdb 9 | map-name: uts-cache -------------------------------------------------------------------------------- /src/main/resources/bioqa/providers/kb/concept-search-uts.yaml.template: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.bioqa.providers.kb.UtsConceptSearchProvider 2 | 3 | service: http://umlsks.nlm.nih.gov 4 | version: # ADD VERSION HERE, E.G. 2015AB 5 | username: # ADD USERNAME HERE 6 | password: # ADD PASSWORD HERE -------------------------------------------------------------------------------- /src/main/resources/bioqa/providers/kb/lingpipe-genetag.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.providers.kb.LingPipeNerConceptProvider 2 | 3 | chunker-model: /ne-en-bio-genetag.HmmChunker 4 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/providers/kb/lingpipe-genia.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.providers.kb.LingPipeNerConceptProvider 2 | 3 | chunker-model: /ne-en-bio-genia.TokenShapeChunker 4 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/providers/kb/metamap-cached.yaml.template: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.bioqa.providers.kb.CachedMetaMapConceptProvider 2 | 3 | version: # ADD VERSION HERE, E.G. '1516' 4 | username: # ADD USERNAME HERE 5 | password: # ADD PASSWORD HERE 6 | email: # ADD EMAIL ADD 7 | 8 | db-file: src/main/resources/metamap-cache/metamap-cache.mapdb 9 | map-name: metamap-cache 10 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/providers/kb/metamap.yaml.template: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.bioqa.providers.kb.MetaMapConceptProvider 2 | 3 | version: # ADD VERSION HERE, E.G. '1516' 4 | username: # ADD USERNAME HERE 5 | password: # ADD PASSWORD HERE 6 | email: # ADD EMAIL ADD 7 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/providers/kb/synonym-uts-cached.yaml.template: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.bioqa.providers.kb.CachedUtsSynonymExpansionProvider 2 | 3 | service: http://umlsks.nlm.nih.gov 4 | version: # ADD VERSION HERE, E.G. 2015AB 5 | username: # ADD USERNAME HERE 6 | password: # ADD PASSWORD HERE 7 | nthreads: 100 8 | timeout: 5 9 | 10 | db-file: src/main/resources/synonym-cache/uts-cache.mapdb 11 | map-name: uts-cache -------------------------------------------------------------------------------- /src/main/resources/bioqa/providers/kb/synonym-uts.yaml.template: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.bioqa.providers.kb.UtsSynonymExpansionProvider 2 | 3 | service: http://umlsks.nlm.nih.gov 4 | version: # ADD VERSION HERE, E.G. 2015AB 5 | username: # ADD USERNAME HERE 6 | password: # ADD PASSWORD HERE 7 | nthreads: 100 8 | timeout: 5 9 | 10 | db-file: src/main/resources/synonym-cache/uts-cache.mapdb 11 | map-name: uts-cache -------------------------------------------------------------------------------- /src/main/resources/bioqa/providers/kb/tmtool-cached.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.bioqa.providers.kb.CachedTmToolConceptProvider 2 | 3 | triggers: [tmChem, DNorm, tmVar, GNormPlus] 4 | 5 | db-file: src/main/resources/tmtool-cache/tmtool-cache.mapdb 6 | map-name: tmtool-cache 7 | -------------------------------------------------------------------------------- /src/main/resources/bioqa/providers/kb/tmtool.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.bioqa.providers.kb.TmToolConceptProvider 2 | 3 | triggers: [tmChem, DNorm, tmVar, GNormPlus] -------------------------------------------------------------------------------- /src/main/resources/bioqa/providers/parser/clearnlp-bioinformatics.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.providers.parser.ClearParserProvider 2 | 3 | pos-model: /bioinformatics-en-pos.xz 4 | dep-model: /bioinformatics-en-dep.xz -------------------------------------------------------------------------------- /src/main/resources/bioqa/providers/parser/clearnlp-medical.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.providers.parser.ClearParserProvider 2 | 3 | pos-model: /medical-en-pos.xz 4 | dep-model: /medical-en-dep.xz -------------------------------------------------------------------------------- /src/main/resources/bioqa/providers/parser/lingpipe-indoeuro-genia.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.providers.parser.LingPipeParserProvider 2 | 3 | token-factory: com.aliasi.tokenizer.IndoEuropeanTokenizerFactory 4 | token-factory-params: [] 5 | pos-model: /pos-en-bio-genia.HiddenMarkovModel -------------------------------------------------------------------------------- /src/main/resources/bioqa/providers/parser/lingpipe-indoeuro-medpost.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.providers.parser.LingPipeParserProvider 2 | 3 | token-factory: com.aliasi.tokenizer.IndoEuropeanTokenizerFactory 4 | token-factory-params: [] 5 | pos-model: /pos-en-bio-medpost.HiddenMarkovModel -------------------------------------------------------------------------------- /src/main/resources/bioqa/providers/parser/lingpipe-regex-genia.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.providers.parser.LingPipeParserProvider 2 | 3 | token-factory: com.aliasi.tokenizer.RegExTokenizerFactory 4 | token-factory-params: ['([-''\d\p{L}]|\([-''\d\p{L}]+\))*[-''\d\p{L}]+|\S'] 5 | pos-model: /pos-en-bio-genia.HiddenMarkovModel -------------------------------------------------------------------------------- /src/main/resources/bioqa/providers/parser/lingpipe-regex-medpost.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.providers.parser.LingPipeParserProvider 2 | 3 | token-factory: com.aliasi.tokenizer.RegExTokenizerFactory 4 | token-factory-params: ['([-''\d\p{L}]|\([-''\d\p{L}]+\))*[-''\d\p{L}]+|\S'] 5 | pos-model: /pos-en-bio-medpost.HiddenMarkovModel -------------------------------------------------------------------------------- /src/main/resources/bioqa/question/concept/lingpipe-genetag.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.question.concept.QuestionConceptRecognizer 2 | 3 | concept-provider: 'inherit: bioqa.providers.kb.lingpipe-genetag' -------------------------------------------------------------------------------- /src/main/resources/bioqa/question/concept/lingpipe-genia.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.question.concept.QuestionConceptRecognizer 2 | 3 | concept-provider: 'inherit: bioqa.providers.kb.lingpipe-genia' -------------------------------------------------------------------------------- /src/main/resources/bioqa/question/concept/metamap-cached.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.question.concept.metamap 2 | 3 | concept-provider: 'inherit: bioqa.providers.kb.metamap-cached' -------------------------------------------------------------------------------- /src/main/resources/bioqa/question/concept/metamap.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.question.concept.QuestionConceptRecognizer 2 | 3 | concept-provider: 'inherit: bioqa.providers.kb.metamap' -------------------------------------------------------------------------------- /src/main/resources/bioqa/question/concept/tmtool-cached.yaml: -------------------------------------------------------------------------------- 1 | inherit: bioqa.question.concept.tmtool 2 | 3 | concept-provider: 'inherit: bioqa.providers.kb.tmtool-cached' -------------------------------------------------------------------------------- /src/main/resources/bioqa/question/concept/tmtool.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.question.concept.QuestionConceptRecognizer 2 | 3 | concept-provider: 'inherit: bioqa.providers.kb.tmtool' -------------------------------------------------------------------------------- /src/main/resources/bioqa/question/parse/clearnlp-bioinformatics.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.question.parse.QuestionParser 2 | 3 | parser-provider: 'inherit: bioqa.providers.parser.clearnlp-bioinformatics' -------------------------------------------------------------------------------- /src/main/resources/bioqa/question/parse/clearnlp-medical.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.question.parse.QuestionParser 2 | 3 | parser-provider: 'inherit: bioqa.providers.parser.clearnlp-medical' -------------------------------------------------------------------------------- /src/main/resources/bioqa/question/parse/lingpipe-indoeuro-genia.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.question.parse.QuestionParser 2 | 3 | parser-provider: 'inherit: bioqa.providers.parser.lingpipe-indoeuro-genia' -------------------------------------------------------------------------------- /src/main/resources/bioqa/question/parse/lingpipe-indoeuro-medpost.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.question.parse.QuestionParser 2 | 3 | parser-provider: 'inherit: bioqa.providers.parser.lingpipe-indoeuro-medpost' -------------------------------------------------------------------------------- /src/main/resources/bioqa/question/parse/lingpipe-regex-genia.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.question.parse.QuestionParser 2 | 3 | parser-provider: 'inherit: bioqa.providers.parser.lingpipe-regex-genia' -------------------------------------------------------------------------------- /src/main/resources/bioqa/question/parse/lingpipe-regex-medpost.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.baseqa.question.parse.QuestionParser 2 | 3 | parser-provider: 'inherit: bioqa.providers.parser.lingpipe-regex-medpost' -------------------------------------------------------------------------------- /src/main/resources/dictionaries/allowed-umls-types.txt: -------------------------------------------------------------------------------- 1 | umls:aapp 2 | umls:amas 3 | umls:anab 4 | umls:antb 5 | umls:bacs 6 | umls:bact 7 | umls:bdsu 8 | umls:bird 9 | umls:blor 10 | umls:bmod 11 | umls:bpoc 12 | umls:bsoj 13 | umls:carb 14 | umls:celc 15 | umls:celf 16 | umls:cell 17 | umls:cgab 18 | umls:clas 19 | umls:clna 20 | umls:clnd 21 | umls:comd 22 | umls:diap 23 | umls:dora 24 | umls:dsyn 25 | umls:elii 26 | umls:emod 27 | umls:enzy 28 | umls:euka 29 | umls:fish 30 | umls:fndg 31 | umls:fngs 32 | umls:food 33 | umls:ftcn 34 | umls:genf 35 | umls:geoa 36 | umls:gngm 37 | umls:hlca 38 | umls:hops 39 | umls:horm 40 | umls:idcn 41 | umls:imft 42 | umls:inbe 43 | umls:inch 44 | umls:inpr 45 | umls:irda 46 | umls:lbpr 47 | umls:lipd 48 | umls:mamm 49 | umls:mbrt 50 | umls:medd 51 | umls:menp 52 | umls:mobd 53 | umls:moft 54 | umls:neop 55 | umls:nnon 56 | umls:nsba 57 | umls:opco 58 | umls:orch 59 | umls:orga 60 | umls:orgf 61 | umls:ortf 62 | umls:patf 63 | umls:phpr 64 | umls:phsf 65 | umls:phsu 66 | umls:plnt 67 | umls:popg 68 | umls:qlco 69 | umls:qnco 70 | umls:rcpt 71 | umls:resa 72 | umls:resd 73 | umls:sbst 74 | umls:sosy 75 | umls:spco 76 | umls:strd' 77 | umls:tisu 78 | umls:tmco 79 | umls:topp 80 | umls:virs 81 | umls:vita -------------------------------------------------------------------------------- /src/main/resources/dictionaries/bioconcept-uri-prefix.tsv: -------------------------------------------------------------------------------- 1 | DISEASE http://www.disease-ontology.org/api/metadata/ 2 | GENE http://amigo.geneontology.org/cgi-bin/amigo/term_details?term= 3 | JOCHEM http://www.biosemantics.org/jochem# 4 | MESH http://www.nlm.nih.gov/cgi/mesh/2016/MB_cgi?field=uid&exact=Find+Exact+Term&term= 5 | UNIPROT http://www.uniprot.org/uniprot/ -------------------------------------------------------------------------------- /src/main/resources/dictionaries/negation-cues.txt: -------------------------------------------------------------------------------- 1 | aint 2 | cannot 3 | cant 4 | darent 5 | didnt 6 | doesnt 7 | dont 8 | hadnt 9 | hardly 10 | hasnt 11 | havent 12 | havnt 13 | isnt 14 | lack 15 | lacking 16 | lacks 17 | mightnt 18 | mustnt 19 | neednt 20 | neither 21 | never 22 | no 23 | nobody 24 | none 25 | nor 26 | not 27 | nothing 28 | nowhere 29 | n’t 30 | oughtnt 31 | shant 32 | shouldnt 33 | wasnt 34 | without 35 | wouldnt -------------------------------------------------------------------------------- /src/main/resources/dictionaries/noun-tags.txt: -------------------------------------------------------------------------------- 1 | NN 2 | NNS 3 | NNP 4 | NNPS -------------------------------------------------------------------------------- /src/main/resources/dictionaries/pos-tags.txt: -------------------------------------------------------------------------------- 1 | FW 2 | JJ 3 | JJR 4 | JJS 5 | NN 6 | NNS 7 | NNP 8 | NNPS 9 | VB 10 | VBD 11 | VBG 12 | VBN 13 | VBP 14 | VBZ -------------------------------------------------------------------------------- /src/main/resources/dictionaries/quantity-question-words.txt: -------------------------------------------------------------------------------- 1 | how many 2 | how much 3 | how large 4 | how long 5 | diameter 6 | value 7 | rate 8 | percentage 9 | incidence 10 | prevalence 11 | proportion 12 | number -------------------------------------------------------------------------------- /src/main/resources/dictionaries/stoplist-gene-modified.txt: -------------------------------------------------------------------------------- 1 | a 2 | about 3 | again 4 | all 5 | almost 6 | also 7 | although 8 | always 9 | among 10 | an 11 | and 12 | another 13 | any 14 | are 15 | as 16 | at 17 | be 18 | because 19 | been 20 | before 21 | being 22 | between 23 | both 24 | but 25 | by 26 | can 27 | could 28 | did 29 | do 30 | does 31 | done 32 | due 33 | during 34 | each 35 | either 36 | enough 37 | especially 38 | etc 39 | for 40 | found 41 | from 42 | further 43 | gene 44 | genes 45 | genome 46 | genomes 47 | had 48 | has 49 | have 50 | having 51 | here 52 | how 53 | however 54 | i 55 | if 56 | in 57 | into 58 | is 59 | it 60 | its 61 | itself 62 | just 63 | kg 64 | km 65 | made 66 | mainly 67 | make 68 | may 69 | mg 70 | might 71 | ml 72 | mm 73 | most 74 | mostly 75 | must 76 | nearly 77 | neither 78 | no 79 | nor 80 | obtained 81 | of 82 | often 83 | on 84 | our 85 | overall 86 | perhaps 87 | protein 88 | quite 89 | rather 90 | really 91 | regarding 92 | seem 93 | seen 94 | sequence 95 | several 96 | should 97 | show 98 | showed 99 | shown 100 | shows 101 | significantly 102 | since 103 | so 104 | some 105 | such 106 | than 107 | that 108 | the 109 | their 110 | theirs 111 | them 112 | then 113 | there 114 | therefore 115 | these 116 | they 117 | this 118 | those 119 | through 120 | thus 121 | to 122 | upon 123 | use 124 | used 125 | using 126 | various 127 | very 128 | was 129 | we 130 | were 131 | what 132 | when 133 | which 134 | while 135 | with 136 | within 137 | without 138 | would -------------------------------------------------------------------------------- /src/main/resources/dictionaries/stoplist-gene.txt: -------------------------------------------------------------------------------- 1 | a 2 | about 3 | again 4 | all 5 | almost 6 | also 7 | although 8 | always 9 | among 10 | an 11 | and 12 | another 13 | any 14 | are 15 | as 16 | at 17 | be 18 | because 19 | been 20 | before 21 | being 22 | between 23 | both 24 | but 25 | by 26 | can 27 | could 28 | did 29 | do 30 | does 31 | done 32 | due 33 | during 34 | each 35 | either 36 | enough 37 | especially 38 | etc 39 | for 40 | found 41 | from 42 | further 43 | gene 44 | had 45 | has 46 | have 47 | having 48 | here 49 | how 50 | however 51 | i 52 | if 53 | in 54 | into 55 | is 56 | it 57 | its 58 | itself 59 | just 60 | kg 61 | km 62 | made 63 | mainly 64 | make 65 | may 66 | mg 67 | might 68 | ml 69 | mm 70 | most 71 | mostly 72 | must 73 | nearly 74 | neither 75 | no 76 | nor 77 | obtained 78 | of 79 | often 80 | on 81 | our 82 | overall 83 | perhaps 84 | protein 85 | quite 86 | rather 87 | really 88 | regarding 89 | seem 90 | seen 91 | sequence 92 | several 93 | should 94 | show 95 | showed 96 | shown 97 | shows 98 | significantly 99 | since 100 | so 101 | some 102 | such 103 | than 104 | that 105 | the 106 | their 107 | theirs 108 | them 109 | then 110 | there 111 | therefore 112 | these 113 | they 114 | this 115 | those 116 | through 117 | thus 118 | to 119 | upon 120 | use 121 | used 122 | using 123 | various 124 | very 125 | was 126 | we 127 | were 128 | what 129 | when 130 | which 131 | while 132 | with 133 | within 134 | without 135 | would -------------------------------------------------------------------------------- /src/main/resources/dictionaries/stoplist.txt: -------------------------------------------------------------------------------- 1 | a 2 | about 3 | again 4 | all 5 | almost 6 | also 7 | although 8 | always 9 | among 10 | an 11 | and 12 | another 13 | any 14 | are 15 | as 16 | at 17 | be 18 | because 19 | been 20 | before 21 | being 22 | between 23 | both 24 | but 25 | by 26 | can 27 | could 28 | did 29 | do 30 | does 31 | done 32 | due 33 | during 34 | each 35 | either 36 | enough 37 | especially 38 | etc 39 | for 40 | found 41 | from 42 | further 43 | gene 44 | had 45 | has 46 | have 47 | having 48 | here 49 | how 50 | however 51 | i 52 | if 53 | in 54 | into 55 | is 56 | it 57 | its 58 | itself 59 | just 60 | kg 61 | km 62 | made 63 | mainly 64 | make 65 | may 66 | mg 67 | might 68 | ml 69 | mm 70 | most 71 | mostly 72 | must 73 | nearly 74 | neither 75 | no 76 | nor 77 | obtained 78 | of 79 | often 80 | on 81 | our 82 | overall 83 | perhaps 84 | protein 85 | quite 86 | rather 87 | really 88 | regarding 89 | seem 90 | seen 91 | sequence 92 | several 93 | should 94 | show 95 | showed 96 | shown 97 | shows 98 | significantly 99 | since 100 | so 101 | some 102 | such 103 | than 104 | that 105 | the 106 | their 107 | theirs 108 | them 109 | then 110 | there 111 | therefore 112 | these 113 | they 114 | this 115 | those 116 | through 117 | thus 118 | to 119 | upon 120 | use 121 | used 122 | using 123 | various 124 | very 125 | was 126 | we 127 | were 128 | what 129 | when 130 | which 131 | while 132 | with 133 | within 134 | without 135 | would -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Open Advancement Question Answering (OAQA) Project Copyright 2017 Carnegie Mellon University 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | # in compliance with the License. You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software distributed under the License 10 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | # or implied. See the License for the specific language governing permissions and limitations 12 | # under the License. 13 | # 14 | 15 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 16 | log4j.appender.stdout.Target=System.out 17 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 18 | log4j.appender.stdout.layout.ConversionPattern=[%p] [%c{1}] %m%n 19 | log4j.logger.edu.cmu.lti.oaqa=DEBUG, stdout -------------------------------------------------------------------------------- /src/main/resources/models/bioasq/concept/rerank/4b-dev-liblinear.findex: -------------------------------------------------------------------------------- 1 | 1 tokens_concatenated@MESH/rank 2 | 2 cmention_names_concatenated@UNIPROT/score 3 | 3 w_id_concept_names_individual@JOCHEM/score 4 | 4 cmention_names_individual@DISEASE/rank 5 | 5 concept_names_concatenated@UNIPROT/score 6 | 6 tokens_concatenated@DISEASE/rank 7 | 7 cmention_names_individual@JOCHEM/rank 8 | 8 w_id_concept_names_individual@DISEASE/score 9 | 9 tokens_concatenated@GENE/rank 10 | 10 concept_names_concatenated@DISEASE/rank 11 | 11 w_id_concept_names_individual@UNIPROT/rank 12 | 12 cmention_names_concatenated@MESH/rank 13 | 13 concept_names_concatenated@GENE/rank 14 | 14 cmention_names_individual@UNIPROT/score 15 | 15 cmention_names_concatenated@DISEASE/rank 16 | 16 concept_names_concatenated@MESH/score 17 | 17 w_id_concept_names_individual@UNIPROT/score 18 | 18 tokens_concatenated@JOCHEM/score 19 | 19 cmention_names_individual@UNIPROT/rank 20 | 20 cmention_names_concatenated@GENE/rank 21 | 21 w_id_concept_names_individual@MESH/score 22 | 22 concept-name-match 23 | 23 original/score 24 | 24 original/rank 25 | 25 concept_names_concatenated@JOCHEM/score 26 | 26 cmention_names_concatenated@GENE/score 27 | 27 concept_names_concatenated@DISEASE/score 28 | 28 cmention_names_individual@JOCHEM/score 29 | 29 tokens_concatenated@GENE/score 30 | 30 cmention_names_concatenated@DISEASE/score 31 | 31 w_id_concept_names_individual@MESH/rank 32 | 32 cmention_names_individual@MESH/score 33 | 33 concept_names_concatenated@JOCHEM/rank 34 | 34 cmention_names_individual@MESH/rank 35 | 35 cmention_names_individual@DISEASE/score 36 | 36 concept_names_concatenated@UNIPROT/rank 37 | 37 tokens_concatenated@UNIPROT/rank 38 | 38 w_id_concept_names_individual@JOCHEM/rank 39 | 39 cmention_names_concatenated@MESH/score 40 | 40 w_id_concept_names_individual@GENE/rank 41 | 41 cmention_names_individual@GENE/rank 42 | 42 concept_names_concatenated@GENE/score 43 | 43 w_id_concept_names_individual@GENE/score 44 | 44 cmention_names_concatenated@UNIPROT/rank 45 | 45 cmention_names_concatenated@JOCHEM/rank 46 | 46 w_id_concept_names_individual@DISEASE/rank 47 | 47 cmention-name-match 48 | 48 tokens_concatenated@JOCHEM/rank 49 | 49 tokens_concatenated@DISEASE/score 50 | 50 cmention_names_individual@GENE/score 51 | 51 concept_names_concatenated@MESH/rank 52 | 52 tokens_concatenated@MESH/score 53 | 53 tokens_concatenated@UNIPROT/score 54 | 54 cmention_names_concatenated@JOCHEM/score -------------------------------------------------------------------------------- /src/main/resources/models/bioasq/concept/rerank/4b-dev-liblinear.lindex: -------------------------------------------------------------------------------- 1 | 2 true 2 | 1 false -------------------------------------------------------------------------------- /src/main/resources/models/bioasq/concept/rerank/4b-dev-liblinear.model: -------------------------------------------------------------------------------- 1 | solver_type L1R_LR 2 | nr_class 2 3 | label 1 2 4 | nr_feature 53 5 | bias 0.000000000000000 6 | w 7 | -0.4623546270993043 8 | 0 9 | 0.3994119587204691 10 | -1.569117167451501 11 | 0.3114856122461142 12 | 0 13 | 0 14 | -0.02279194141608566 15 | 0 16 | 0 17 | 0 18 | 0 19 | 0 20 | -0.1760443750805484 21 | 0 22 | 0.1732120675784405 23 | 0 24 | -1.113047689458425 25 | 0 26 | 0 27 | -0.1233853912498214 28 | -1.891144054938629 29 | 0.08844002407650575 30 | -5.226937073850687 31 | 0.2617539510046160 32 | 0 33 | 0 34 | 0.006884867429180922 35 | 0 36 | -0.7042847154175214 37 | 0 38 | 0.4368007810015496 39 | 0 40 | -3.075296977163223 41 | 0.3056294515482870 42 | 0 43 | 0 44 | 0 45 | -0.2564862488899015 46 | 0 47 | 0 48 | 0 49 | 4.056526829336180 50 | 0 51 | 0 52 | 0.5563161839949544 53 | -0.8041624243772714 54 | 0 55 | -0.4678671582635070 56 | 0 57 | 0 58 | -0.7151584713292598 59 | -1.376197824893477 60 | 0 61 | -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/answer/collective_score/4b-dev-liblinear.lindex: -------------------------------------------------------------------------------- 1 | 2 true 2 | 1 false -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/answer/collective_score/4b-dev-weka-cvr.dataset-schema: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oaqa/bioasq/e77fbbb8b9e85226e31f53b16dd2a79a77d59f1b/src/main/resources/models/bioqa/answer/collective_score/4b-dev-weka-cvr.dataset-schema -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/answer/collective_score/4b-dev-weka-cvr.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oaqa/bioasq/e77fbbb8b9e85226e31f53b16dd2a79a77d59f1b/src/main/resources/models/bioqa/answer/collective_score/4b-dev-weka-cvr.model -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/answer/score/4b-dev-liblinear.lindex: -------------------------------------------------------------------------------- 1 | 2 false 2 | 1 true -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/answer/score/4b-dev-weka-cvr.dataset-schema: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oaqa/bioasq/e77fbbb8b9e85226e31f53b16dd2a79a77d59f1b/src/main/resources/models/bioqa/answer/score/4b-dev-weka-cvr.dataset-schema -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/answer/score/4b-dev-weka-cvr.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oaqa/bioasq/e77fbbb8b9e85226e31f53b16dd2a79a77d59f1b/src/main/resources/models/bioqa/answer/score/4b-dev-weka-cvr.model -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/answer/yesno/4b-dev-liblinear.lindex: -------------------------------------------------------------------------------- 1 | 2 no 2 | 1 yes -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/answer/yesno/4b-dev-weka-cvr.dataset-schema: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oaqa/bioasq/e77fbbb8b9e85226e31f53b16dd2a79a77d59f1b/src/main/resources/models/bioqa/answer/yesno/4b-dev-weka-cvr.dataset-schema -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/answer/yesno/4b-dev-weka-cvr.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oaqa/bioasq/e77fbbb8b9e85226e31f53b16dd2a79a77d59f1b/src/main/resources/models/bioqa/answer/yesno/4b-dev-weka-cvr.model -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/answer/yesno/4b-dev-weka-logistic.dataset-schema: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oaqa/bioasq/e77fbbb8b9e85226e31f53b16dd2a79a77d59f1b/src/main/resources/models/bioqa/answer/yesno/4b-dev-weka-logistic.dataset-schema -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/answer/yesno/4b-dev-weka-logistic.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oaqa/bioasq/e77fbbb8b9e85226e31f53b16dd2a79a77d59f1b/src/main/resources/models/bioqa/answer/yesno/4b-dev-weka-logistic.model -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/answer/yesno/4b-dev-weka-other.dataset-schema: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oaqa/bioasq/e77fbbb8b9e85226e31f53b16dd2a79a77d59f1b/src/main/resources/models/bioqa/answer/yesno/4b-dev-weka-other.dataset-schema -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/answer/yesno/4b-dev-weka-other.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oaqa/bioasq/e77fbbb8b9e85226e31f53b16dd2a79a77d59f1b/src/main/resources/models/bioqa/answer/yesno/4b-dev-weka-other.model -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/answer_type/4b-dev-liblinear-null.lindex: -------------------------------------------------------------------------------- 1 | 17 umls:gngm 2 | 40 umls:lbpr 3 | 23 umls:virs 4 | 46 tmtool:SNP 5 | 9 umls:bpoc 6 | 15 umls:dsyn 7 | 32 tmtool:Chemical 8 | 55 umls:inpo 9 | 38 umls:clas 10 | 7 umls:aapp 11 | 13 umls:fngs 12 | 30 umls:genf 13 | 53 umls:ftcn 14 | 36 umls:sosy 15 | 59 umls:orga 16 | 5 umls:fndg 17 | 22 umls:comd 18 | 45 umls:spco 19 | 28 umls:celf 20 | 51 umls:cgab 21 | 34 tmtool:Disease 22 | 57 tmtool:Species 23 | 3 null 24 | 20 umls:moft 25 | 43 umls:antb 26 | 26 umls:emod 27 | 49 umls:ortf 28 | 1 _CHOICE 29 | 18 umls:enzy 30 | 41 tmtool:ProteinMutation 31 | 24 umls:horm 32 | 47 umls:diap 33 | 10 umls:orch 34 | 16 umls:bact 35 | 33 tmtool:Gene 36 | 56 umls:cell 37 | 39 umls:qlco 38 | 8 umls:phsu 39 | 14 _QUANTITY 40 | 31 umls:phsf 41 | 54 umls:inpr 42 | 37 umls:rcpt 43 | 6 umls:tisu 44 | 12 umls:neop 45 | 29 umls:topp 46 | 52 umls:bsoj 47 | 35 umls:medd 48 | 58 umls:mbrt 49 | 4 umls:bacs 50 | 21 umls:euka 51 | 44 umls:elii 52 | 27 umls:mobd 53 | 50 umls:amas 54 | 2 umls:nnon 55 | 19 umls:qnco 56 | 42 umls:dora 57 | 25 umls:imft 58 | 48 umls:geoa 59 | 11 umls:celc -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/answer_type/4b-dev-liblinear.lindex: -------------------------------------------------------------------------------- 1 | 23 umls:gngm 2 | 46 umls:geoa 3 | 69 umls:resd 4 | 15 umls:elii 5 | 38 umls:fish 6 | 61 tmtool:Disease 7 | 84 umls:inpo 8 | 7 umls:phsu 9 | 30 umls:patf 10 | 53 umls:inbe 11 | 76 umls:mbrt 12 | 22 umls:blor 13 | 45 umls:lbpr 14 | 34 umls:antb 15 | 57 umls:inpr 16 | 80 umls:anab 17 | 3 umls:bacs 18 | 26 umls:dsyn 19 | 49 umls:ortf 20 | 72 umls:prog 21 | 18 _QUANTITY 22 | 41 umls:comd 23 | 64 umls:resa 24 | 10 umls:celc 25 | 33 umls:rcpt 26 | 56 umls:amas 27 | 79 umls:phpr 28 | 68 umls:nusq 29 | 14 umls:diap 30 | 37 umls:cgab 31 | 60 tmtool:Gene 32 | 83 tmtool:FamilyName 33 | 6 umls:aapp 34 | 29 umls:ftcn 35 | 52 umls:clna 36 | 75 umls:orga 37 | 21 umls:qlco 38 | 44 umls:imft 39 | 67 umls:hlca 40 | 2 umls:nnon 41 | 25 umls:amph 42 | 48 umls:phsf 43 | 71 tmtool:Species 44 | 17 umls:medd 45 | 40 umls:menp 46 | 63 umls:bsoj 47 | 9 umls:orch 48 | 32 umls:bact 49 | 55 umls:irda 50 | 78 umls:bmod 51 | 13 umls:fngs 52 | 36 umls:moft 53 | 59 umls:inch 54 | 82 umls:tmco 55 | 5 umls:tisu 56 | 28 umls:mobd 57 | 51 umls:orgf 58 | 74 tmtool:SNP 59 | 20 umls:vita 60 | 43 umls:sosy 61 | 66 umls:dora 62 | 1 _CHOICE 63 | 24 umls:food 64 | 47 umls:emod 65 | 70 tmtool:ProteinMutation 66 | 16 umls:enzy 67 | 39 umls:sbst 68 | 62 umls:bdsu 69 | 85 umls:npop 70 | 8 umls:bpoc 71 | 31 umls:euka 72 | 54 umls:genf 73 | 77 umls:idcn 74 | 12 umls:topp 75 | 35 umls:qnco 76 | 58 tmtool:Chemical 77 | 81 umls:plnt 78 | 4 umls:fndg 79 | 27 umls:clas 80 | 50 umls:celf 81 | 73 umls:cell 82 | 19 umls:virs 83 | 42 umls:horm 84 | 65 umls:spco 85 | 11 umls:neop -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/concept/rerank/4b-dev-liblinear.lindex: -------------------------------------------------------------------------------- 1 | 2 false 2 | 1 true -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/concept/rerank/4b-dev-weka-logistic.dataset-schema: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oaqa/bioasq/e77fbbb8b9e85226e31f53b16dd2a79a77d59f1b/src/main/resources/models/bioqa/concept/rerank/4b-dev-weka-logistic.dataset-schema -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/concept/rerank/4b-dev-weka-logistic.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oaqa/bioasq/e77fbbb8b9e85226e31f53b16dd2a79a77d59f1b/src/main/resources/models/bioqa/concept/rerank/4b-dev-weka-logistic.model -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/document/rerank/4b-dev-liblinear.lindex: -------------------------------------------------------------------------------- 1 | 2 true 2 | 1 false -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/document/rerank/4b-dev-weka-logistic.dataset-schema: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oaqa/bioasq/e77fbbb8b9e85226e31f53b16dd2a79a77d59f1b/src/main/resources/models/bioqa/document/rerank/4b-dev-weka-logistic.dataset-schema -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/document/rerank/4b-dev-weka-logistic.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oaqa/bioasq/e77fbbb8b9e85226e31f53b16dd2a79a77d59f1b/src/main/resources/models/bioqa/document/rerank/4b-dev-weka-logistic.model -------------------------------------------------------------------------------- /src/main/resources/models/bioqa/passage/rerank/4b-dev-liblinear.lindex: -------------------------------------------------------------------------------- 1 | 2 false 2 | 1 true -------------------------------------------------------------------------------- /src/main/resources/properties/direct-gopubmed-concept.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | # in compliance with the License. You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software distributed under the License 10 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | # or implied. See the License for the specific language governing permissions and limitations 12 | # under the License. 13 | # 14 | 15 | disease.ontology.service = http://bioasq.org:8000/do 16 | gene.ontology.service = http://bioasq.org:8000/go 17 | jochem.service = http://bioasq.org:8000/jochem 18 | mesh.service = http://bioasq.org:8000/mesh 19 | uniprot.service = http://bioasq.org:8000/uniprot 20 | require.session.url = true 21 | session.refresh.interval = 9 22 | -------------------------------------------------------------------------------- /src/main/resources/properties/direct-gopubmed-concept.properties.old: -------------------------------------------------------------------------------- 1 | disease.ontology.service = http://gopubmed.org/web/bioasq/doid/json 2 | gene.ontology.service = http://gopubmed.org/web/bioasq/go/json 3 | jochem.service = http://gopubmed.org/web/bioasq/jochem/json 4 | mesh.service = http://gopubmed.org/web/bioasq/mesh/json 5 | uniprot.service = http://gopubmed.org/web/bioasq/uniprot/json 6 | require.session.url = true 7 | session.refresh.interval = 9 8 | -------------------------------------------------------------------------------- /src/main/resources/properties/direct-gopubmed-document.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | # in compliance with the License. You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software distributed under the License 10 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | # or implied. See the License for the specific language governing permissions and limitations 12 | # under the License. 13 | # 14 | 15 | pubmed.search.service = http://bioasq.org:8000/pubmed 16 | require.session.url = true 17 | session.refresh.interval = 9 18 | -------------------------------------------------------------------------------- /src/main/resources/properties/direct-gopubmed-document.properties.old: -------------------------------------------------------------------------------- 1 | pubmed.search.service = http://gopubmed.org/web/gopubmedbeta/bioasq/pubmed 2 | require.session.url = true 3 | session.refresh.interval = 9 4 | -------------------------------------------------------------------------------- /src/main/resources/properties/direct-gopubmed-triple.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | # in compliance with the License. You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software distributed under the License 10 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | # or implied. See the License for the specific language governing permissions and limitations 12 | # under the License. 13 | # 14 | 15 | linked.life.data.service = http://gopubmed.org/web/bioasq/linkedlifedata2/triples 16 | require.session.url = true 17 | session.refresh.interval = 9 18 | --------------------------------------------------------------------------------