├── .gitignore ├── ChangeLog.txt ├── README.md ├── de.tudarmstadt.ukp.dkpro.wsd-asl └── pom.xml ├── de.tudarmstadt.ukp.dkpro.wsd-gpl ├── .license-header.txt ├── LICENSE.txt ├── README.txt └── pom.xml ├── de.tudarmstadt.ukp.dkpro.wsd.alignment ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── alignment │ │ ├── WordNetVersionAlignment.java │ │ └── package-info.java │ └── test │ ├── java │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── alignment │ │ └── WordNetVersionAlignmentTest.java │ └── resources │ └── alignment_test.txt ├── de.tudarmstadt.ukp.dkpro.wsd.core ├── pom.xml └── src │ ├── main │ ├── java │ │ └── de │ │ │ └── tudarmstadt │ │ │ └── ukp │ │ │ └── dkpro │ │ │ └── wsd │ │ │ ├── Pair.java │ │ │ ├── UnorderedPair.java │ │ │ ├── WSDException.java │ │ │ ├── WSDUtils.java │ │ │ ├── algorithm │ │ │ ├── AbstractWSDAlgorithm.java │ │ │ ├── AllSensesBaseline.java │ │ │ ├── FirstSenseBaseline.java │ │ │ ├── FrequencyWeightedAllSensesBaseline.java │ │ │ ├── MonosemousOnlyBaseline.java │ │ │ ├── MostFrequentSenseBaseline.java │ │ │ ├── RandomSenseBaseline.java │ │ │ ├── WSDAlgorithm.java │ │ │ ├── WSDAlgorithmCollectiveBasic.java │ │ │ ├── WSDAlgorithmCollectivePOS.java │ │ │ ├── WSDAlgorithmCollectiveSequentialBasic.java │ │ │ ├── WSDAlgorithmContextBasic.java │ │ │ ├── WSDAlgorithmContextPOS.java │ │ │ ├── WSDAlgorithmDocumentBasic.java │ │ │ ├── WSDAlgorithmDocumentDependentBasic.java │ │ │ ├── WSDAlgorithmDocumentTextBasic.java │ │ │ ├── WSDAlgorithmIndividualBasic.java │ │ │ ├── WSDAlgorithmIndividualPOS.java │ │ │ └── package-info.java │ │ │ ├── annotator │ │ │ ├── WSDAnnotatorBase.java │ │ │ ├── WSDAnnotatorBaseCollective.java │ │ │ ├── WSDAnnotatorBaseContext.java │ │ │ ├── WSDAnnotatorBaseDocument.java │ │ │ ├── WSDAnnotatorBaseDocumentCollective.java │ │ │ ├── WSDAnnotatorBaseDocumentDependent.java │ │ │ ├── WSDAnnotatorBaseIndividual.java │ │ │ ├── WSDAnnotatorCollectiveBasic.java │ │ │ ├── WSDAnnotatorCollectivePOS.java │ │ │ ├── WSDAnnotatorCollectiveSequentialBasic.java │ │ │ ├── WSDAnnotatorContextBasic.java │ │ │ ├── WSDAnnotatorContextPOS.java │ │ │ ├── WSDAnnotatorDocumentCollectiveBasic.java │ │ │ ├── WSDAnnotatorDocumentDependentBasic.java │ │ │ ├── WSDAnnotatorIndividualBasic.java │ │ │ ├── WSDAnnotatorIndividualPOS.java │ │ │ ├── WSDAnnotatorStringList.java │ │ │ ├── WSDAnnotatorTinyContextBasic.java │ │ │ └── package-info.java │ │ │ ├── candidates │ │ │ ├── SenseClusterer.java │ │ │ ├── SenseConfidenceClusterer.java │ │ │ ├── SenseConfidenceNormalizer.java │ │ │ ├── SenseConverter.java │ │ │ ├── SenseMapper.java │ │ │ ├── StringToWikipediaConverter.java │ │ │ ├── WSDItemAnnotator.java │ │ │ └── package-info.java │ │ │ ├── package-info.java │ │ │ ├── resource │ │ │ ├── WSDResourceBase.java │ │ │ ├── WSDResourceBasic.java │ │ │ ├── WSDResourceCollectiveBasic.java │ │ │ ├── WSDResourceCollectivePOS.java │ │ │ ├── WSDResourceCollectiveSequentialBasic.java │ │ │ ├── WSDResourceContextBasic.java │ │ │ ├── WSDResourceContextPOS.java │ │ │ ├── WSDResourceDocumentBasic.java │ │ │ ├── WSDResourceDocumentDependentBasic.java │ │ │ ├── WSDResourceDocumentTextBasic.java │ │ │ ├── WSDResourceIndividualBasic.java │ │ │ ├── WSDResourceIndividualPOS.java │ │ │ ├── WSDResourceStringList.java │ │ │ └── package-info.java │ │ │ ├── si │ │ │ ├── FixedSenseInventory.java │ │ │ ├── POS.java │ │ │ ├── SenseAlignment.java │ │ │ ├── SenseDictionary.java │ │ │ ├── SenseInventory.java │ │ │ ├── SenseInventoryBase.java │ │ │ ├── SenseInventoryException.java │ │ │ ├── SenseTaxonomy.java │ │ │ ├── SenseWeightedInventory.java │ │ │ ├── TestSenseInventory.java │ │ │ ├── package-info.java │ │ │ └── resource │ │ │ │ ├── FixedSenseInventoryResource.java │ │ │ │ ├── SenseInventoryResourceBase.java │ │ │ │ ├── TestSenseInventoryResource.java │ │ │ │ └── package-info.java │ │ │ └── type │ │ │ ├── LexicalItemConstituent.java │ │ │ ├── LexicalItemConstituent_Type.java │ │ │ ├── Sense.java │ │ │ ├── Sense_Type.java │ │ │ ├── WSDItem.java │ │ │ ├── WSDItem_Type.java │ │ │ ├── WSDResult.java │ │ │ ├── WSDResult_Type.java │ │ │ └── package-info.java │ └── resources │ │ ├── META-INF │ │ └── org.apache.uima.fit │ │ │ └── types.txt │ │ ├── desc │ │ └── type │ │ │ ├── Sense.xml │ │ │ ├── WSDItem.xml │ │ │ └── WSDResult.xml │ │ └── stopwords │ │ ├── stoplist_de.txt │ │ ├── stoplist_en.txt │ │ ├── stoplist_et.txt │ │ └── stoplist_shorter_en.txt │ └── test │ ├── java │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ ├── algorithm │ │ ├── MostFrequentSenseBaselineTest.java │ │ └── WSDAnnotatorTest.java │ │ ├── candidates │ │ ├── SenseMapperTest.java │ │ ├── SenseWeightNormalizerTest.java │ │ └── WSDItemAnnotatorTest.java │ │ └── si │ │ ├── FixedSenseInventoryTest.java │ │ └── resource │ │ └── SenseInventoryResourceTest.java │ └── resources │ └── text │ └── test.txt ├── de.tudarmstadt.ukp.dkpro.wsd.evaluation ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── evaluation │ │ ├── AbstractClusterEvaluator.java │ │ ├── AbstractSingleExactMatchEvaluator.java │ │ ├── AbstractWSDEvaluator.java │ │ ├── ClusterEvaluatorText.java │ │ ├── ConfusionMatrix.java │ │ ├── CorpusStatistics.java │ │ ├── EvaluationTable.java │ │ ├── EvaluationTableHTML.java │ │ ├── MultipleExactMatchEvaluator.java │ │ ├── PrecisionRecallGraph.java │ │ ├── SingleExactMatchEvaluatorHTML.java │ │ ├── SingleExactMatchEvaluatorText.java │ │ └── package-info.java │ └── test │ ├── java │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── evaluation │ │ ├── AbstractSingleExactMatchEvaluatorTest.java │ │ ├── ConfusionMatrixTest.java │ │ ├── ExactMatchEvaluatorTest.java │ │ └── SingleExactMatchEvaluatorTextTest.java │ └── resources │ ├── AbstractSingleExactMatchEvaluatorTest │ ├── d00_modified.xml │ └── senseval │ │ ├── senseval_backoff_1.key │ │ ├── senseval_backoff_2.key │ │ └── senseval_test_algorithm.key │ └── SingleExactMatchEvaluatorTextTest │ ├── semcor.eval.xml │ ├── semcor.gold.01.key │ ├── semcor.gold.02.key │ ├── semcor.test.01.key │ ├── semcor.test.02.key │ ├── semcor.test.03.key │ ├── semcor.test.04.key │ ├── semcor.test.05.key │ └── semcor.test.06.key ├── de.tudarmstadt.ukp.dkpro.wsd.examples-gpl ├── .license-header.txt ├── LICENSE.txt ├── pom.xml └── src │ └── main │ ├── java │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── examples │ │ ├── EnglishStopLemmatizer.java │ │ ├── GraphVisualizationExample.java │ │ ├── Semeval1EnCGAWExample.java │ │ ├── Senseval2EnAWExample.java │ │ ├── Senseval2EnLSExample.java │ │ └── package-info.java │ └── resources │ ├── extjwnl_properties.xml │ └── log4j.properties ├── de.tudarmstadt.ukp.dkpro.wsd.graphconnectivity.wikipedia ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── graphconnectivity │ │ └── iterative │ │ ├── algorithm │ │ ├── RandomSequentialDisambiguation.java │ │ └── SequentialGraphDisambiguation.java │ │ ├── util │ │ ├── DisambiguationEdge.java │ │ ├── DisambiguationEdgeTransformer.java │ │ ├── DisambiguationVertex.java │ │ └── DisambiguationVertexTransformer.java │ │ └── wikipedia │ │ ├── algorithm │ │ ├── LinkInformationSequentialDisambiguation.java │ │ └── LinkMeasureSequentialDisambiguation.java │ │ └── util │ │ ├── IncomingLinksCache.java │ │ └── SimilarityCache.java │ └── test │ ├── java │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── graphconnectivity │ │ └── iterative │ │ └── algorithm │ │ ├── PageRankWithPriorsTest.java │ │ └── RandomSequentialDisambiguationTest.java │ └── resources │ └── dictionary │ └── SpitkovskyChang │ ├── dict_google.ser │ └── license.txt ├── de.tudarmstadt.ukp.dkpro.wsd.graphconnectivity ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── graphconnectivity │ │ └── algorithm │ │ ├── DegreeCentralityWSD.java │ │ ├── GraphConnectivityWSD.java │ │ ├── GraphVisualizer.java │ │ ├── GraphVisualizerResource.java │ │ ├── JungGraphVisualizer.java │ │ ├── WSDResourceDegreeCentrality.java │ │ └── package-info.java │ └── test │ └── java │ └── de │ └── tudarmstadt │ └── ukp │ └── dkpro │ └── wsd │ └── graphconnectivity │ └── algorithm │ └── DegreeCentralityWSDTest.java ├── de.tudarmstadt.ukp.dkpro.wsd.io ├── pom.xml └── src │ ├── main │ ├── java │ │ └── de │ │ │ └── tudarmstadt │ │ │ └── ukp │ │ │ └── dkpro │ │ │ └── wsd │ │ │ └── io │ │ │ ├── reader │ │ │ ├── AidaReader.java │ │ │ ├── MASCReader.java │ │ │ ├── NullEntityResolver.java │ │ │ ├── SemCorXMLReader.java │ │ │ ├── WebCAGeXMLReader.java │ │ │ └── package-info.java │ │ │ └── writer │ │ │ ├── WSDWriter.java │ │ │ └── package-info.java │ └── resources │ │ └── tagmapping │ │ └── en-brown-pos.map │ └── test │ ├── java │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── io │ │ ├── reader │ │ ├── AidaReaderTest.java │ │ ├── MASCReaderTest.java │ │ ├── SemCorXMLReaderTest.java │ │ └── WebCAGeXMLReaderTest.java │ │ └── writer │ │ └── WSDWriterTest.java │ └── resources │ └── aida_test.tsv ├── de.tudarmstadt.ukp.dkpro.wsd.lesk ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── lesk │ │ ├── algorithm │ │ ├── Lesk.java │ │ ├── OriginalLesk.java │ │ ├── SimplifiedAlignedExtendedLesk.java │ │ ├── SimplifiedAlignedLesk.java │ │ ├── SimplifiedExpandedLesk.java │ │ ├── SimplifiedExtendedExpandedLesk.java │ │ ├── SimplifiedExtendedLesk.java │ │ ├── SimplifiedLesk.java │ │ └── package-info.java │ │ ├── resource │ │ ├── WSDResourceSimplifiedAlignedExtendedLesk.java │ │ ├── WSDResourceSimplifiedAlignedLesk.java │ │ ├── WSDResourceSimplifiedExpandedLesk.java │ │ ├── WSDResourceSimplifiedExtendedExpandedLesk.java │ │ ├── WSDResourceSimplifiedExtendedLesk.java │ │ └── WSDResourceSimplifiedLesk.java │ │ └── util │ │ ├── normalization │ │ ├── FirstObjects.java │ │ ├── FirstUniqueObjects.java │ │ ├── MostObjects.java │ │ ├── MostUniqueObjects.java │ │ ├── NoNormalization.java │ │ ├── NormalizationStrategy.java │ │ ├── ProductMagnitude.java │ │ ├── SecondObjects.java │ │ ├── SecondUniqueObjects.java │ │ ├── TotalObjects.java │ │ └── TotalUniqueObjects.java │ │ ├── overlap │ │ ├── DotProduct.java │ │ ├── OverlapStrategy.java │ │ ├── PairedOverlap.java │ │ └── SetOverlap.java │ │ └── tokenization │ │ ├── AbstractLexicalExpander.java │ │ ├── StringSplit.java │ │ └── TokenizationStrategy.java │ └── test │ └── java │ └── de │ └── tudarmstadt │ └── ukp │ └── dkpro │ └── wsd │ └── algorithms │ └── lesk │ ├── LeskTest.java │ ├── OriginalLeskTest.java │ ├── SimplifiedLeskTest.java │ └── util │ ├── normalization │ └── NormalizationTests.java │ └── overlap │ └── OverlapTests.java ├── de.tudarmstadt.ukp.dkpro.wsd.linkbased.wikipedia ├── pom.xml └── src │ └── main │ ├── java │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── linkbased │ │ └── algorithm │ │ ├── WikipediaCommonnessMethod.java │ │ └── WikipediaRelatednessMethod.java │ └── resources │ └── text │ └── text.txt ├── de.tudarmstadt.ukp.dkpro.wsd.senseval ├── pom.xml └── src │ ├── main │ ├── java │ │ └── de │ │ │ └── tudarmstadt │ │ │ └── ukp │ │ │ └── dkpro │ │ │ └── wsd │ │ │ └── senseval │ │ │ ├── candidates │ │ │ └── WordNetSenseKeyToSenseval.java │ │ │ ├── reader │ │ │ ├── Semeval1AWReader.java │ │ │ ├── Semeval2AWReader.java │ │ │ ├── Senseval2AWReader.java │ │ │ ├── Senseval2LSReader.java │ │ │ ├── SensevalAWReader.java │ │ │ ├── SensevalAnswerKeyReader.java │ │ │ └── SensevalReader.java │ │ │ └── writer │ │ │ └── SensevalAnswerKeyWriter.java │ └── resources │ │ ├── README.txt │ │ ├── fix-semeval1-en-cgaw-test-key.sh │ │ ├── fix_mihalcea_senseval2.sh │ │ ├── fix_mihalcea_senseval3.sh │ │ ├── senseval2-en-aw-test.patch │ │ ├── senseval2-en-ls-test-key.patch │ │ ├── senseval2-en-ls-train-key.patch │ │ └── wordnet_senseval.tsv │ └── test │ └── java │ └── de │ └── tudarmstadt │ └── ukp │ └── dkpro │ └── wsd │ └── senseval │ ├── reader │ ├── Semeval1AWReaderTest.java │ ├── Semeval2AWReaderTest.java │ ├── Senseval2AWReaderTest.java │ ├── Senseval2LSReaderTest.java │ └── SensevalAnswerKeyReaderTest.java │ └── writer │ └── SensevalAnswerKeyWriterTest.java ├── de.tudarmstadt.ukp.dkpro.wsd.si.dictionary ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── si │ │ └── dictionary │ │ ├── GoogleDictionaryInventory.java │ │ ├── IDocumentDependentDictionary.java │ │ ├── IGoogleDictionary.java │ │ ├── IUkbDictionary.java │ │ ├── UkbDictionaryInventory.java │ │ ├── UkbDocumentDependentDictionaryInventory.java │ │ ├── resource │ │ ├── GoogleDictionaryInventoryResource.java │ │ └── UkbDictionaryInventoryResource.java │ │ └── util │ │ ├── AbstractDictionary.java │ │ ├── Dictionary.java │ │ ├── DictionaryWithoutFrequencies.java │ │ ├── GoogleDictionary.java │ │ └── UkbDictionary.java │ └── test │ ├── java │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── si │ │ └── dictionary │ │ ├── GoogleDictionaryTest.java │ │ └── SimpleDictionaryTest.java │ └── resources │ └── dictionary │ └── SpitkovskyChang │ ├── LICENSE.txt │ ├── dict_google.ser │ ├── dict_google.txt │ ├── dict_google.txt.bz2 │ └── needed_mentions.txt ├── de.tudarmstadt.ukp.dkpro.wsd.si.germanet-gpl ├── .license-header.txt ├── LICENSE.txt ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ ├── tudarmstadt │ │ └── ukp │ │ │ └── dkpro │ │ │ └── wsd │ │ │ └── si │ │ │ └── germanet │ │ │ ├── GermaNetLexUnitSenseInventory.java │ │ │ └── resource │ │ │ └── GermaNetLexUnitSenseInventoryResource.java │ │ └── tuebingen │ │ └── uni │ │ └── sfs │ │ └── germanet │ │ └── api │ │ ├── CompoundCategory.java │ │ ├── CompoundInfo.java │ │ ├── CompoundProperty.java │ │ ├── ConRel.java │ │ ├── EwnRel.java │ │ ├── Example.java │ │ ├── Frame.java │ │ ├── GermaNet.java │ │ ├── IliLoader.java │ │ ├── IliRecord.java │ │ ├── LexRel.java │ │ ├── LexUnit.java │ │ ├── RelationLoader.java │ │ ├── StaxLoader.java │ │ ├── Synset.java │ │ ├── SynsetLoader.java │ │ ├── WiktionaryLoader.java │ │ ├── WiktionaryParaphrase.java │ │ ├── WordCategory.java │ │ └── WordClass.java │ └── test │ ├── java │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── si │ │ └── germanet │ │ ├── GermaNetLexUnitSenseInventoryTest.java │ │ └── resource │ │ └── GermaNetLexUnitSenseInventoryResourceTest.java │ └── resources │ └── log4j.properties ├── de.tudarmstadt.ukp.dkpro.wsd.si.linkdatabase ├── pom.xml └── src │ └── main │ └── java │ └── de │ └── tudarmstadt │ └── ukp │ └── dkpro │ └── wsd │ └── si │ └── linkdatabase │ ├── LinkDatabase.java │ ├── LinkDatabaseInventory.java │ ├── LinkDatabaseInventoryResource.java │ ├── MySqlConnection.java │ ├── PerformanceLinkInformationReader.java │ ├── PerformanceLinkInformationWriter.java │ └── util │ ├── LinkIdentification.java │ ├── LinkInformation.java │ └── MapUtil.java ├── de.tudarmstadt.ukp.dkpro.wsd.si.lsr ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── si │ │ └── lsr │ │ ├── LsrSenseInventory.java │ │ ├── LsrToWordNetSynsetOffset.java │ │ ├── package-info.java │ │ ├── resource │ │ └── LsrSenseInventoryResource.java │ │ └── util │ │ └── LsrSenseInventoryUtil.java │ └── test │ └── java │ └── de │ └── tudarmstadt │ └── ukp │ └── dkpro │ └── wsd │ └── si │ └── lsr │ ├── LsrSenseInventoryTest.java │ ├── LsrToWordNetSynsetOffsetTest.java │ └── resource │ └── LsrSenseInventoryResourceTest.java ├── de.tudarmstadt.ukp.dkpro.wsd.si.twsi-gpl ├── .license-header.txt ├── LICENSE.txt ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── si │ │ └── twsi │ │ ├── TwsiSenseInventory.java │ │ ├── TwsiSenseInventoryBase.java │ │ └── resource │ │ └── TwsiSenseInventoryResource.java │ └── test │ └── java │ └── de │ └── tudarmstadt │ └── ukp │ └── dkpro │ └── wsd │ └── si │ └── twsi │ └── TwsiSenseInventoryTest.java ├── de.tudarmstadt.ukp.dkpro.wsd.si.uby ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── si │ │ └── uby │ │ ├── UbySenseIdToGermaNetLUID.java │ │ ├── UbySenseIdToWordNetSenseKey.java │ │ ├── UbySenseIdToWordNetSynset.java │ │ ├── UbySenseInventory.java │ │ ├── package-info.java │ │ └── resource │ │ └── UbySenseInventoryResource.java │ └── test │ └── java │ └── de │ └── tudarmstadt │ └── ukp │ └── dkpro │ └── wsd │ └── si │ └── uby │ ├── UbySenseInventoryTest.java │ └── resource │ └── UbySenseInventoryResourceTest.java ├── de.tudarmstadt.ukp.dkpro.wsd.si.wordnet ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── si │ │ └── wordnet │ │ ├── WordNetPlusPlusSenseInventory.java │ │ ├── WordNetSenseInventoryBase.java │ │ ├── WordNetSenseKeySenseInventory.java │ │ ├── WordNetSenseKeySenseInventoryBase.java │ │ ├── WordNetSynsetSenseInventory.java │ │ ├── WordNetSynsetSenseInventoryBase.java │ │ ├── candidates │ │ ├── WordNetSenseConfidenceClusterer.java │ │ ├── WordNetSenseKeyToSynset.java │ │ └── WordNetSynsetToSenseKey.java │ │ ├── package-info.java │ │ └── resource │ │ ├── WordNetPlusPlusSenseInventoryResource.java │ │ ├── WordNetSenseInventoryResourceBase.java │ │ ├── WordNetSenseKeySenseInventoryResource.java │ │ └── WordNetSynsetSenseInventoryResource.java │ └── test │ └── java │ └── de │ └── tudarmstadt │ └── ukp │ └── dkpro │ └── wsd │ └── si │ └── wordnet │ ├── WordNetPlusPlusSenseInventoryTest.java │ ├── WordNetSenseKeySenseInventoryTest.java │ ├── WordNetSynsetSenseInventoryTest.java │ └── resource │ ├── WordNetPlusPlusSenseInventoryResourceTest.java │ ├── WordNetSenseKeySenseInventoryResourceTest.java │ └── WordNetSynsetSenseInventoryResourceTest.java ├── de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl ├── .license-header.txt ├── LICENSE.txt ├── README.txt ├── pom.xml └── src │ ├── main │ └── java │ │ ├── de │ │ └── tudarmstadt │ │ │ └── ukp │ │ │ └── dkpro │ │ │ └── wsd │ │ │ └── supervised │ │ │ └── ims │ │ │ ├── ImsWsdDisambiguator.java │ │ │ ├── annotator │ │ │ └── ImsWSDAnnotator.java │ │ │ └── resource │ │ │ └── ImsWsdDisambiguatorResource.java │ │ └── sg │ │ └── edu │ │ └── nus │ │ └── comp │ │ └── nlp │ │ └── ims │ │ ├── classifiers │ │ ├── APreloadEvaluator.java │ │ ├── CGISEvaluator.java │ │ ├── CGISModelTrainer.java │ │ ├── CLibLinearEvaluator.java │ │ ├── CLibLinearTrainer.java │ │ ├── CLibSVMEvaluator.java │ │ ├── CLibSVMTrainer.java │ │ ├── CMultiClassesSVM.java │ │ ├── CWekaEvaluator.java │ │ ├── CWekaModelTrainer.java │ │ ├── IEvaluator.java │ │ ├── IModelTrainer.java │ │ └── package.html │ │ ├── corpus │ │ ├── ACorpus.java │ │ ├── AItem.java │ │ ├── ASentence.java │ │ ├── CAllWordsCoarseTaskCorpus.java │ │ ├── CAllWordsFineTaskCorpus.java │ │ ├── CAllWordsPlainCorpus.java │ │ ├── CItem.java │ │ ├── CLexicalCorpus.java │ │ ├── CSentence.java │ │ ├── ICorpus.java │ │ ├── IItem.java │ │ ├── ISentence.java │ │ └── package.html │ │ ├── feature │ │ ├── ABinaryFeature.java │ │ ├── AListFeature.java │ │ ├── ANumericFeature.java │ │ ├── CAllWordsFeatureExtractorCombination.java │ │ ├── CCollocation.java │ │ ├── CCollocationExtractor.java │ │ ├── CDoubleFeature.java │ │ ├── CFeatureExtractorCombination.java │ │ ├── CPOSFeature.java │ │ ├── CPOSFeatureExtractor.java │ │ ├── CSurroundingWord.java │ │ ├── CSurroundingWordExtractor.java │ │ ├── IFeature.java │ │ ├── IFeatureExtractor.java │ │ └── package.html │ │ ├── implement │ │ ├── CTestThread.java │ │ ├── CTester.java │ │ ├── CTrainModel.java │ │ ├── WSDTest.java │ │ └── package.html │ │ ├── instance │ │ ├── CInstance.java │ │ ├── CInstanceExtractor.java │ │ ├── IInstance.java │ │ ├── IInstanceExtractor.java │ │ └── package.html │ │ ├── io │ │ ├── CAllWordsResultWriter.java │ │ ├── CFullResultWriter.java │ │ ├── CGISLexeltWriter.java │ │ ├── CLibLinearLexeltWriter.java │ │ ├── CLibSVMLexeltWriter.java │ │ ├── CModelWriter.java │ │ ├── CPlainCorpusInlineWriter.java │ │ ├── CPlainCorpusResultWriter.java │ │ ├── CResultWriter.java │ │ ├── CWekaLexeltWriter.java │ │ ├── CWekaSparseLexeltWriter.java │ │ ├── ILexeltWriter.java │ │ ├── IModelWriter.java │ │ ├── IResultWriter.java │ │ └── package.html │ │ ├── lexelt │ │ ├── ALexelt.java │ │ ├── AListFeatureSelector.java │ │ ├── CCollocationFeatureSelector.java │ │ ├── CFeatureSelectorCombination.java │ │ ├── CLexelt.java │ │ ├── CModelInfo.java │ │ ├── CPOSFeatureSelector.java │ │ ├── CResultInfo.java │ │ ├── CStatistic.java │ │ ├── CSurroundingWordFeatureSelector.java │ │ ├── IFeatureSelector.java │ │ ├── ILexelt.java │ │ ├── IStatistic.java │ │ └── package.html │ │ └── util │ │ ├── APTBPOSTagger.java │ │ ├── CAmendLexeltCorpus.java │ │ ├── CArgumentManager.java │ │ ├── CJWNL.java │ │ ├── COpenNLPPOSTagger.java │ │ ├── COpenNLPSentenceSplitter.java │ │ ├── COpenNLPTokenizer.java │ │ ├── CPTBWNLemmatizer.java │ │ ├── CPair.java │ │ ├── CPennTreeBankTokenizer.java │ │ ├── CScorer.java │ │ ├── CSurroundingWordFilter.java │ │ ├── CWordNetLemmatizer.java │ │ ├── CWordNetSenseIndex.java │ │ ├── ILemmatizer.java │ │ ├── IPOSTagger.java │ │ ├── ISenseIndex.java │ │ ├── ISentenceSplitter.java │ │ ├── ITokenizer.java │ │ └── package.html │ └── test │ ├── java │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── supervised │ │ └── ims │ │ ├── ImsWsdDisambiguatorTest.java │ │ └── annotator │ │ └── ImsWSDAnnotatorTest.java │ └── resources │ └── plain │ └── test.txt ├── de.tudarmstadt.ukp.dkpro.wsd.supervised.twsi-gpl ├── .license-header.txt ├── LICENSE.txt ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── supervised │ │ └── twsi │ │ ├── TwsiWsdDisambiguator.java │ │ └── resource │ │ └── TwsiWsdDisambiguatorResource.java │ └── test │ └── java │ └── de │ └── tudarmstadt │ └── ukp │ └── dkpro │ └── wsd │ └── supervised │ └── twsi │ └── TwsiWsdDisambiguatorTest.java ├── de.tudarmstadt.ukp.dkpro.wsd.tackbp ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── tackbp │ │ ├── annotator │ │ ├── WSDAnnotatorDocumentBasic.java │ │ └── WSDAnnotatorDocumentDependentBasic.java │ │ └── reader │ │ ├── TacKbpDocument.java │ │ ├── TacKbpDocumentCreator.java │ │ └── TacKbpOfficialFormatReader.java │ └── test │ └── java │ └── de │ └── tudarmstadt │ └── ukp │ └── dkpro │ └── wsd │ └── tackbp │ └── reader │ └── TacKbpOfficialFormatReaderTest.java ├── de.tudarmstadt.ukp.dkpro.wsd.testing ├── pom.xml └── src │ └── main │ ├── java │ └── de │ │ └── tudarmstadt │ │ └── ukp │ │ └── dkpro │ │ └── wsd │ │ └── testing │ │ └── package-info.java │ └── resources │ ├── masc │ └── tell-v │ │ ├── tell-v-wn.xml │ │ └── tell-v.txt │ ├── semcor │ ├── br-a01.xml │ └── br-a02.xml │ ├── senseval │ ├── index.sense │ ├── semeval1aw.dtd │ ├── semeval1aw.xml │ ├── semeval2aw.dtd │ ├── semeval2aw.xml │ ├── senseval2aw.dtd │ ├── senseval2aw.xml │ ├── senseval2ls.dtd │ ├── senseval2ls.key │ ├── senseval2ls.xml │ ├── senseval2ls_lsr.key │ └── senseval2ls_test.key │ └── webcage │ ├── webcage.dtd │ ├── webcage0.xml │ └── webcage1.xml ├── de.tudarmstadt.ukp.dkpro.wsd.wrapper-gpl ├── .license-header.txt ├── LICENSE.txt ├── pom.xml └── src │ ├── main │ ├── java │ │ └── de │ │ │ └── tudarmstadt │ │ │ └── ukp │ │ │ └── dkpro │ │ │ └── wsd │ │ │ └── wrapper │ │ │ ├── Disambiguator.java │ │ │ ├── Disambiguator_ImplBase.java │ │ │ ├── LinkDatabaseLinkMeasureDisambiguator.java │ │ │ └── LinkDatabaseMFSDisambiguator.java │ └── resources │ │ └── stopwords │ │ ├── english_keyphrase_stopwords.txt │ │ ├── english_stopwords.txt │ │ ├── german_stopwords.txt │ │ ├── punctuation.txt │ │ └── stoplist_de.txt │ └── test │ └── java │ └── de │ └── tudarmstadt │ └── ukp │ └── dkpro │ └── wsd │ └── wrapper │ ├── LinkDatabaseLinkMeasureDisambiguatorTest.java │ └── LinkDatabaseMFSDisambiguatorTest.java ├── de.tudarmstadt.ukp.dkpro.wsd.wsi ├── README.txt ├── pom.xml └── src │ └── main │ └── java │ └── de │ └── tudarmstadt │ └── ukp │ └── dkpro │ └── wsd │ └── wsi │ ├── algorithm │ ├── SenseInductionAlgorithm.java │ ├── SimpleGraphClusteringInductionAlgorithm.java │ └── WSIAlgorithmBase.java │ ├── annotator │ ├── AddWSDItemToWSITopicAnnotator.java │ ├── ExternalSenseInductionResource.java │ ├── JSONSenseInventoryResource.java │ ├── JSONWSIAnnotator.java │ ├── Semeval2013Task11Evaluator.java │ ├── SenseInductionResourceBase.java │ ├── SimpleGraphBasedSenseInductionResource.java │ └── WSIAnnotator.java │ ├── io │ └── AMBIENTReader.java │ ├── si │ └── InducedSenseInventory.java │ └── type │ ├── WSITopic.java │ └── WSITopic_Type.java └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | **/target/ 3 | pom.xml.tag 4 | pom.xml.releaseBackup 5 | pom.xml.versionsBackup 6 | pom.xml.next 7 | release.properties 8 | dependency-reduced-pom.xml 9 | buildNumber.properties 10 | .mvn/timing.properties 11 | .settings 12 | .classpath 13 | .project 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DKPro WSD # 2 | 3 | DKPro WSD is a modular, extensible Java framework for word sense 4 | disambiguation. It provides UIMA components which encapsulate corpus 5 | readers, linguistic annotators, lexical semantic resources, 6 | disambiguation algorithms, and evaluation and reporting tools. 7 | 8 | Most modules of DKPro WSD are licensed under the terms of the Apache 9 | License; the others are licensed under the terms of the GNU General 10 | Public License. 11 | 12 | For more information, visit the 13 | [DKPro WSD website](https://code.google.com/p/dkpro-wsd/) (still on 14 | Google Code) or join the 15 | [DKPro WSD mailing list](https://groups.google.com/forum/#!forum/dkpro-wsd-users). 16 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd-gpl/.license-header.txt: -------------------------------------------------------------------------------- 1 | Copyright ${year} 2 | Ubiquitous Knowledge Processing (UKP) Lab 3 | Technische Universität Darmstadt 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd-gpl/README.txt: -------------------------------------------------------------------------------- 1 | DKPro WSD GPL 2 | ============= 3 | 4 | DKPro WSD is a modular, extensible Java framework for word sense 5 | disambiguation. It provides UIMA components which encapsulate corpus 6 | readers, linguistic annotators, lexical semantic resources, 7 | disambiguation algorithms, and evaluation and reporting tools. 8 | 9 | This project contains DKPro WSD modules which are licensed under 10 | the terms of the GNU General Public License. There is another project, 11 | DKPro WSD, which contains further modules licensed under the 12 | terms of the Apache License. 13 | 14 | Both projects can be obtained from their websites on Google Code: 15 | 16 | https://code.google.com/p/dkpro-wsd/ 17 | https://code.google.com/p/dkpro-wsd-gpl/ 18 | 19 | Bug reports: 20 | 21 | https://code.google.com/p/dkpro-wsd/issues/list 22 | 23 | Mailing list: 24 | 25 | https://groups.google.com/forum/#!forum/dkpro-wsd-users 26 | 27 | Please refer to the websites for installation and usage instructions. 28 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.alignment/pom.xml: -------------------------------------------------------------------------------- 1 | 18 | 19 | 4.0.0 20 | 21 | de.tudarmstadt.ukp.dkpro.wsd-asl 22 | ../de.tudarmstadt.ukp.dkpro.wsd-asl 23 | de.tudarmstadt.ukp.dkpro.wsd 24 | 1.3.0-SNAPSHOT 25 | 26 | de.tudarmstadt.ukp.dkpro.wsd.alignment 27 | 28 | 29 | de.tudarmstadt.ukp.dkpro.wsd 30 | de.tudarmstadt.ukp.dkpro.wsd.core 31 | 32 | 33 | DKPro WSD - Alignment 34 | DKPro WSD modules for working with alignments between sense inventories. 35 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.alignment/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/alignment/package-info.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * This package contains classes for working with alignments between 21 | * sense inventories. 22 | */ 23 | package de.tudarmstadt.ukp.dkpro.wsd.alignment; -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.alignment/src/test/resources/alignment_test.txt: -------------------------------------------------------------------------------- 1 | 03531808 03493207 0.966 2 | 03537241 02735863 0.501 03498113 0.808 03498437 0.101 3 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/WSDException.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd; 20 | 21 | public class WSDException extends Exception { 22 | 23 | public WSDException(Exception e) { 24 | super(e); 25 | } 26 | 27 | public WSDException(String message) { 28 | super(message); 29 | } 30 | 31 | /** 32 | * 33 | */ 34 | private static final long serialVersionUID = 1L; 35 | 36 | } 37 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/algorithm/WSDAlgorithm.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.algorithm; 20 | 21 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventory; 22 | 23 | /** 24 | * A basic interface for word sense disambiguation algorithms. 25 | * 26 | * @author Tristan Miller 27 | * 28 | */ 29 | public interface WSDAlgorithm { 30 | 31 | public String getDisambiguationMethod(); 32 | 33 | public SenseInventory getSenseInventory(); 34 | 35 | public void setSenseInventory(SenseInventory senseInventory); 36 | } 37 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/algorithm/WSDAlgorithmCollectiveBasic.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.algorithm; 20 | 21 | import java.util.Collection; 22 | import java.util.Map; 23 | 24 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 25 | 26 | /** 27 | * An interface for word sense disambiguation algorithms which disambiguate an 28 | * entire collection of subjects at once. The algorithms take as input a 29 | * collection of subjects of disambiguation (typically lemmas) represented by 30 | * {@link String}s. Each subject is assigned a mapping of sense IDs to 31 | * confidence values. 32 | * 33 | * @author Nicolai Erbs 34 | * 35 | */ 36 | public interface WSDAlgorithmCollectiveBasic 37 | extends WSDAlgorithm 38 | { 39 | public Map> getDisambiguation( 40 | Collection sods) 41 | throws SenseInventoryException; 42 | 43 | } 44 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/algorithm/WSDAlgorithmContextBasic.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.algorithm; 20 | 21 | import java.util.Map; 22 | 23 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 24 | 25 | /** 26 | * An interface for word sense disambiguation algorithms which take a single 27 | * subject of disambiguation (typically a lemma) represented by a {@link String}, 28 | * along with the context in which this subject occurs (represented by another 29 | * {@link String}), and return a map of sense IDs to confidence values. 30 | * 31 | * @author Tristan Miller 32 | * 33 | */ 34 | public interface WSDAlgorithmContextBasic 35 | extends WSDAlgorithm 36 | { 37 | public Map getDisambiguation(String sod, String context) 38 | throws SenseInventoryException; 39 | 40 | } -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/algorithm/WSDAlgorithmContextPOS.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * 21 | */ 22 | package de.tudarmstadt.ukp.dkpro.wsd.algorithm; 23 | 24 | import java.util.Map; 25 | 26 | import de.tudarmstadt.ukp.dkpro.wsd.si.POS; 27 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 28 | 29 | /** 30 | * An interface for word sense disambiguation algorithms which take a single 31 | * subject of disambiguation (typically a lemma) represented by a {@link String} 32 | * and an associated part of speech, along with the context in which this 33 | * subject occurs (represented by another {@link String}), and return a map of 34 | * sense IDs to confidence values. 35 | * 36 | * @author Tristan Miller 37 | * 38 | */ 39 | public interface WSDAlgorithmContextPOS 40 | extends WSDAlgorithm 41 | { 42 | public Map getDisambiguation(String sod, POS pos, 43 | String context) 44 | throws SenseInventoryException; 45 | } 46 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/algorithm/WSDAlgorithmDocumentBasic.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.algorithm; 20 | 21 | import java.util.Map; 22 | 23 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 24 | 25 | 26 | /** 27 | * This interface is for all algorithms that return a single map of disambiguations based on the document text 28 | * 29 | * @author nico.erbs@gmail.com 30 | * 31 | */ 32 | public interface WSDAlgorithmDocumentBasic extends WSDAlgorithm 33 | { 34 | public Map getDisambiguation(String documentText) throws SenseInventoryException; 35 | 36 | } 37 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/algorithm/WSDAlgorithmDocumentDependentBasic.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.algorithm; 20 | 21 | import java.util.Map; 22 | 23 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 24 | 25 | 26 | /** 27 | * An interface for all algorithms that expect a document id and the corresponding sod 28 | * 29 | * @author nico.erbs@gmail.com 30 | * 31 | */ 32 | public interface WSDAlgorithmDocumentDependentBasic extends WSDAlgorithm 33 | { 34 | public Map getDisambiguation(String docId, String sod) throws SenseInventoryException; 35 | 36 | } 37 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/algorithm/WSDAlgorithmDocumentTextBasic.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.algorithm; 20 | 21 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 22 | 23 | 24 | /** 25 | * An interface for algorithms returning the disambiguation as a String based on the document text. 26 | * The same disambiguation for every sod in a document is returned. 27 | * 28 | * @author nico.erbs@gmail.com 29 | * 30 | */ 31 | public interface WSDAlgorithmDocumentTextBasic extends WSDAlgorithm 32 | { 33 | public String getDisambiguation(String documentText) throws SenseInventoryException; 34 | 35 | } 36 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/algorithm/WSDAlgorithmIndividualBasic.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.algorithm; 20 | 21 | import java.util.Map; 22 | 23 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 24 | 25 | /** 26 | * An interface for word sense disambiguation algorithms which take a single 27 | * subject of disambiguation (typically a lemma) represented by a {@link String}, 28 | * and return a map of sense IDs to confidence values. 29 | * 30 | * @author Tristan Miller 31 | * 32 | */ 33 | public interface WSDAlgorithmIndividualBasic 34 | extends WSDAlgorithm 35 | { 36 | public Map getDisambiguation(String sod) 37 | throws SenseInventoryException; 38 | 39 | } 40 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/algorithm/WSDAlgorithmIndividualPOS.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.algorithm; 20 | 21 | import java.util.Map; 22 | 23 | import de.tudarmstadt.ukp.dkpro.wsd.si.POS; 24 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 25 | 26 | /** 27 | * An interface for word sense disambiguation algorithms which take a single 28 | * subject of disambiguation (typically a lemma) represented by a {@link String} 29 | * and the associated part of speech, and return a map of sense IDs to 30 | * confidence values. 31 | * 32 | * @author Tristan Miller 33 | * 34 | */ 35 | public interface WSDAlgorithmIndividualPOS 36 | extends WSDAlgorithm 37 | { 38 | public Map getDisambiguation(String sod, POS pos) 39 | throws SenseInventoryException; 40 | 41 | } 42 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/algorithm/package-info.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * This package contains a type system for WSD algorithms, and some very 21 | * simple baselines. 22 | */ 23 | package de.tudarmstadt.ukp.dkpro.wsd.algorithm; -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/annotator/package-info.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * This package contains UIMA annotators for the algorithm types 21 | * of {@link de.tudarmstadt.ukp.dkpro.wsd.algorithm}. 22 | */ 23 | package de.tudarmstadt.ukp.dkpro.wsd.annotator; -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/candidates/SenseConfidenceNormalizer.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.candidates; 20 | 21 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 22 | import org.apache.uima.jcas.JCas; 23 | import org.apache.uima.fit.component.JCasAnnotator_ImplBase; 24 | import org.apache.uima.fit.util.JCasUtil; 25 | 26 | import de.tudarmstadt.ukp.dkpro.wsd.type.WSDResult; 27 | 28 | /** 29 | * SenseConfidenceNormalizer normalizes all the weights (confidence values) of 30 | * senses in the WSDResults so that they sum to 1.0 31 | */ 32 | public class SenseConfidenceNormalizer 33 | extends JCasAnnotator_ImplBase 34 | { 35 | @Override 36 | public void process(JCas aJCas) 37 | throws AnalysisEngineProcessException 38 | { 39 | for (WSDResult r : JCasUtil.select(aJCas, WSDResult.class)) { 40 | r.normalize(); 41 | } 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/candidates/StringToWikipediaConverter.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.candidates; 20 | 21 | /** 22 | * A {@link SenseConverter} which replaces spaces in sense IDs with 23 | * underscores. This is intended for sense IDs which represent Wikipedia 24 | * article titles; some Wikipedia sense inventories (such as UBY) handle 25 | * underscores only. 26 | * 27 | * @author Tristan Miller 28 | */ 29 | public class StringToWikipediaConverter 30 | extends SenseConverter 31 | { 32 | 33 | @Override 34 | public String convert(String senseId) 35 | { 36 | return senseId.replaceAll(" ", "_"); 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/candidates/package-info.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * This package contains utility annotators (e.g., for transforming existing 21 | * sense annotations). 22 | */ 23 | package de.tudarmstadt.ukp.dkpro.wsd.candidates; -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/package-info.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * This package contains general-purpose types and utilities. 21 | */ 22 | package de.tudarmstadt.ukp.dkpro.wsd; -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/resource/WSDResourceCollectiveBasic.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.resource; 20 | 21 | import java.util.Collection; 22 | import java.util.Map; 23 | 24 | import de.tudarmstadt.ukp.dkpro.wsd.algorithm.WSDAlgorithmCollectiveBasic; 25 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 26 | 27 | /** 28 | * A resource wrapping algorithms of type {@link WSDAlgorithmCollectiveBasic} 29 | * 30 | * @author Nicolai Erbs 31 | * 32 | */ 33 | public class WSDResourceCollectiveBasic 34 | extends WSDResourceBasic 35 | implements WSDAlgorithmCollectiveBasic 36 | { 37 | 38 | @Override 39 | public Map> getDisambiguation( 40 | Collection sods) 41 | throws SenseInventoryException 42 | { 43 | return ((WSDAlgorithmCollectiveBasic) wsdAlgorithm) 44 | .getDisambiguation(sods); 45 | } 46 | } -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/resource/WSDResourceContextBasic.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * 21 | */ 22 | package de.tudarmstadt.ukp.dkpro.wsd.resource; 23 | 24 | import java.util.Map; 25 | 26 | import de.tudarmstadt.ukp.dkpro.wsd.algorithm.WSDAlgorithmContextBasic; 27 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 28 | 29 | /** 30 | * A resource wrapping algorithms of type {@link WSDAlgorithmContextBasic} 31 | * 32 | * @author Tristan Miller 33 | * 34 | */ 35 | public class WSDResourceContextBasic 36 | extends WSDResourceBasic 37 | implements WSDAlgorithmContextBasic 38 | { 39 | 40 | @Override 41 | public Map getDisambiguation(String sod, String context) 42 | throws SenseInventoryException 43 | { 44 | return ((WSDAlgorithmContextBasic) wsdAlgorithm).getDisambiguation(sod, 45 | context); 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/resource/WSDResourceContextPOS.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * 21 | */ 22 | package de.tudarmstadt.ukp.dkpro.wsd.resource; 23 | 24 | import java.util.Map; 25 | 26 | import de.tudarmstadt.ukp.dkpro.wsd.algorithm.WSDAlgorithmContextPOS; 27 | import de.tudarmstadt.ukp.dkpro.wsd.si.POS; 28 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 29 | 30 | /** 31 | * A resource wrapping algorithms of type {@link WSDAlgorithmContextPOS} 32 | * 33 | * @author Tristan Miller 34 | * 35 | */ 36 | public class WSDResourceContextPOS 37 | extends WSDResourceBasic 38 | implements WSDAlgorithmContextPOS 39 | { 40 | 41 | @Override 42 | public Map getDisambiguation(String sod, POS pos, 43 | String context) 44 | throws SenseInventoryException 45 | { 46 | return ((WSDAlgorithmContextPOS) wsdAlgorithm).getDisambiguation(sod, 47 | pos, context); 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/resource/WSDResourceDocumentBasic.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.resource; 20 | 21 | import java.util.Map; 22 | 23 | import de.tudarmstadt.ukp.dkpro.wsd.algorithm.WSDAlgorithmDocumentBasic; 24 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 25 | 26 | /** 27 | * A resource wrapping algorithms of type {@link WSDAlgorithmDocumentBasic} 28 | * 29 | * @author Nicolai Erbs 30 | * 31 | */ 32 | public class WSDResourceDocumentBasic 33 | extends WSDResourceBasic 34 | implements WSDAlgorithmDocumentBasic 35 | { 36 | 37 | @Override 38 | public Map getDisambiguation(String documentId) 39 | throws SenseInventoryException 40 | { 41 | return ((WSDAlgorithmDocumentBasic) wsdAlgorithm) 42 | .getDisambiguation(documentId); 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/resource/WSDResourceDocumentDependentBasic.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.resource; 20 | 21 | import java.util.Map; 22 | 23 | import de.tudarmstadt.ukp.dkpro.wsd.algorithm.WSDAlgorithmDocumentDependentBasic; 24 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 25 | 26 | /** 27 | * A resource wrapping algorithms of type 28 | * {@link WSDAlgorithmDocumentDependentBasic} 29 | * 30 | * @author Nicolai Erbs 31 | * 32 | */ 33 | public class WSDResourceDocumentDependentBasic 34 | extends WSDResourceBasic 35 | implements WSDAlgorithmDocumentDependentBasic 36 | { 37 | 38 | @Override 39 | public Map getDisambiguation(String documentId, String sod) 40 | throws SenseInventoryException 41 | { 42 | return ((WSDAlgorithmDocumentDependentBasic) wsdAlgorithm) 43 | .getDisambiguation(documentId, sod); 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/resource/WSDResourceDocumentTextBasic.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.resource; 20 | 21 | import de.tudarmstadt.ukp.dkpro.wsd.algorithm.WSDAlgorithmDocumentTextBasic; 22 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 23 | 24 | /** 25 | * A resource wrapping algorithms of type {@link WSDAlgorithmDocumentTextBasic} 26 | * 27 | * @author Nicolai Erbs 28 | * 29 | */ 30 | public class WSDResourceDocumentTextBasic 31 | extends WSDResourceBasic 32 | implements WSDAlgorithmDocumentTextBasic 33 | { 34 | 35 | @Override 36 | public String getDisambiguation(String someText) 37 | throws SenseInventoryException 38 | { 39 | return ((WSDAlgorithmDocumentTextBasic) wsdAlgorithm) 40 | .getDisambiguation(someText); 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/resource/WSDResourceIndividualBasic.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.resource; 20 | 21 | import java.util.Map; 22 | 23 | import de.tudarmstadt.ukp.dkpro.wsd.algorithm.WSDAlgorithmIndividualBasic; 24 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 25 | 26 | /** 27 | * A resource wrapping algorithms of type {@link WSDAlgorithmIndividualBasic} 28 | * 29 | * @author Tristan Miller 30 | * 31 | */ 32 | public class WSDResourceIndividualBasic 33 | extends WSDResourceBasic 34 | implements WSDAlgorithmIndividualBasic 35 | { 36 | 37 | @Override 38 | public Map getDisambiguation(String sod) 39 | throws SenseInventoryException 40 | { 41 | return ((WSDAlgorithmIndividualBasic) wsdAlgorithm) 42 | .getDisambiguation(sod); 43 | } 44 | } -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/resource/WSDResourceIndividualPOS.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.resource; 20 | 21 | import java.util.Map; 22 | 23 | import de.tudarmstadt.ukp.dkpro.wsd.algorithm.WSDAlgorithmIndividualPOS; 24 | import de.tudarmstadt.ukp.dkpro.wsd.si.POS; 25 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 26 | 27 | /** 28 | * A resource wrapping algorithms of type {@link WSDAlgorithmIndividualPOS} 29 | * 30 | * @author Tristan Miller 31 | * 32 | */ 33 | public class WSDResourceIndividualPOS 34 | extends WSDResourceBasic 35 | implements WSDAlgorithmIndividualPOS 36 | { 37 | 38 | @Override 39 | public Map getDisambiguation(String sod, POS pos) 40 | throws SenseInventoryException 41 | { 42 | return ((WSDAlgorithmIndividualPOS) wsdAlgorithm).getDisambiguation(sod, pos); 43 | } 44 | } -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/resource/package-info.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * This package contains UIMA resources encapsulating the algorithm types 21 | * of {@link de.tudarmstadt.ukp.dkpro.wsd.algorithm}. 22 | */ 23 | package de.tudarmstadt.ukp.dkpro.wsd.resource; -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/si/POS.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * 21 | */ 22 | package de.tudarmstadt.ukp.dkpro.wsd.si; 23 | 24 | /** 25 | * A simple enumeration for parts of speech 26 | * 27 | * @author Tristan Miller 28 | * 29 | */ 30 | public enum POS 31 | { 32 | NOUN, VERB, ADJ, ADV 33 | } 34 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/si/SenseInventoryException.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.si; 20 | 21 | import de.tudarmstadt.ukp.dkpro.wsd.WSDException; 22 | 23 | public class SenseInventoryException extends WSDException { 24 | 25 | public SenseInventoryException(Exception e) { 26 | super(e); 27 | } 28 | 29 | public SenseInventoryException(String message) { 30 | super(message); 31 | } 32 | 33 | /** 34 | * 35 | */ 36 | private static final long serialVersionUID = 1L; 37 | 38 | } 39 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/si/SenseWeightedInventory.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.si; 20 | 21 | import java.util.Map; 22 | 23 | 24 | /** 25 | * A SenseInventory with initial weights for each entry 26 | * 27 | * @author nico.erbs@gmail.com 28 | * 29 | */ 30 | public interface SenseWeightedInventory extends SenseInventory { 31 | 32 | Map getWeightedSenses(String sod) throws SenseInventoryException; 33 | 34 | } 35 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/si/package-info.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * This package contains a type system for sense inventories. 21 | */ 22 | package de.tudarmstadt.ukp.dkpro.wsd.si; -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/si/resource/package-info.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * This package contains UIMA resources for wrapping sense inventories. 21 | */ 22 | package de.tudarmstadt.ukp.dkpro.wsd.si.resource; -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/type/package-info.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * This package contains generated UIMA types. 21 | */ 22 | package de.tudarmstadt.ukp.dkpro.wsd.type; -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/resources/META-INF/org.apache.uima.fit/types.txt: -------------------------------------------------------------------------------- 1 | classpath*:desc/type/*.xml 2 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/resources/desc/type/Sense.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | Sense 4 | 5 | 1.0 6 | 7 | 8 | 9 | de.tudarmstadt.ukp.dkpro.wsd.type.Sense 10 | 11 | uima.tcas.Annotation 12 | 13 | 14 | id 15 | The id of the sense 16 | uima.cas.String 17 | 18 | 19 | confidence 20 | The confidence of the sense 21 | uima.cas.Double 22 | 23 | 24 | description 25 | A textual representation of the sense 26 | uima.cas.String 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/main/resources/stopwords/stoplist_de.txt: -------------------------------------------------------------------------------- 1 | aber 2 | als 3 | am 4 | an 5 | auch 6 | auf 7 | aus 8 | bei 9 | bin 10 | bis 11 | bist 12 | da 13 | dadurch 14 | daher 15 | darum 16 | das 17 | daß 18 | dass 19 | dein 20 | deine 21 | dem 22 | den 23 | der 24 | des 25 | dessen 26 | deshalb 27 | die 28 | dies 29 | dieser 30 | dieses 31 | doch 32 | dort 33 | du 34 | durch 35 | ein 36 | eine 37 | einem 38 | einen 39 | einer 40 | eines 41 | er 42 | es 43 | euer 44 | eure 45 | für 46 | hatte 47 | hatten 48 | hattest 49 | hattet 50 | hier hinter 51 | ich 52 | ihr 53 | ihre 54 | im 55 | in 56 | ist 57 | ja 58 | jede 59 | jedem 60 | jeden 61 | jeder 62 | jedes 63 | jener 64 | jenes 65 | jetzt 66 | kann 67 | kannst 68 | können 69 | könnt 70 | machen 71 | mein 72 | meine 73 | mit 74 | muß 75 | mußt 76 | musst 77 | müssen 78 | müßt 79 | nach 80 | nachdem 81 | nein 82 | nicht 83 | nun 84 | oder 85 | seid 86 | sein 87 | seine 88 | sich 89 | sie 90 | sind 91 | soll 92 | sollen 93 | sollst 94 | sollt 95 | sonst 96 | soweit 97 | sowie 98 | und 99 | unser unsere 100 | unter 101 | vom 102 | von 103 | vor 104 | wann 105 | warum 106 | was 107 | weiter 108 | weitere 109 | wenn 110 | wer 111 | werde 112 | werden 113 | werdet 114 | weshalb 115 | wie 116 | wieder 117 | wieso 118 | wir 119 | wird 120 | wirst 121 | wo 122 | woher 123 | wohin 124 | zu 125 | zum 126 | zur 127 | über 128 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.core/src/test/resources/text/test.txt: -------------------------------------------------------------------------------- 1 | test bat on bank -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.evaluation/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/evaluation/package-info.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * This package contains classes for evaluating sense annotations against a 21 | * gold standard, and for producing various other useful statistics on data 22 | * sets. 23 | */ 24 | package de.tudarmstadt.ukp.dkpro.wsd.evaluation; -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.evaluation/src/test/resources/AbstractSingleExactMatchEvaluatorTest/senseval/senseval_backoff_1.key: -------------------------------------------------------------------------------- 1 | d00 d00.s00.t01 art%1:09:00::/0.1 2 | d00 d00.s00.t03 change_ringing%1:04:00::/0.1 3 | d00 d00.s00.t04 be%2:42:03::/0.1 4 | d00 d00.s00.t05 false%5:00:00:specific:00/0.1 5 | d00 d00.s00.t08 false%1:18:00::/0.1 6 | d00 d00.s00.t13 false%3:00:02::/0.1 7 | d00 d00.s00.t20 rest%1:24:00::/0.1 8 | d00 d00.s00.t23 world%1:14:02::/0.1 9 | d00 d00.s01.t08 tailor%1:18:00::/0.1 10 | d00 d00.s02.t02 false%1:15:00::/0.1 11 | d00 d00.s03.t03 false%1:09:00::/0.1 12 | d00 d00.s03.t06 false%2:32:01::/0.1 13 | d00 d00.s03.t19 ancient%5:00:00:old:02/0.1 14 | d00 d00.s03.t20 stone%1:06:00::/0.1 15 | d00 d00.s03.t21 church%1:06:00::/0.1 16 | d00 d00.s03.t22 false%2:42:03::/0.1 17 | d00 d00.s03.t25 false%1:17:00::/0.1 18 | d00 d00.s03.t28 false%1:07:00::/0.1 -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.evaluation/src/test/resources/AbstractSingleExactMatchEvaluatorTest/senseval/senseval_backoff_2.key: -------------------------------------------------------------------------------- 1 | d00 d00.s00.t01 art%1:09:00::/0.01 2 | d00 d00.s00.t03 false%1:04:00::/0.01 3 | d00 d00.s00.t05 peculiar%5:00:00:specific:00/0.01 4 | d00 d00.s00.t08 false%1:18:00::/0.01 5 | d00 d00.s00.t14 english%3:01:00::/0.01 6 | d00 d00.s00.t15 false%1:09:00::/0.01 7 | d00 d00.s00.t20 rest%1:24:00::/0.01 8 | d00 d00.s00.t23 false%1:14:02::/0.01 9 | d00 d00.s02.t02 england%1:15:00::/0.01 10 | d00 d00.s03.t03 false%1:09:00::/0.01 11 | d00 d00.s03.t07 rural%3:00:00::/0.01 12 | d00 d00.s03.t08 false%1:15:00::/0.01 13 | d00 d00.s03.t19 ancient%5:00:00:old:02/0.01 14 | d00 d00.s03.t20 false%1:06:00::/0.01 15 | d00 d00.s03.t22 stand%2:42:03::/0.01 16 | d00 d00.s03.t25 false%1:17:00::/0.01 17 | d00 d00.s03.t30 bell%1:06:00::/0.01 18 | d00 d00.s03.t31 false%2:38:00::/0.01 -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.evaluation/src/test/resources/AbstractSingleExactMatchEvaluatorTest/senseval/senseval_test_algorithm.key: -------------------------------------------------------------------------------- 1 | d00 d00.s00.t01 art%1:09:00::/1.0 2 | d00 d00.s00.t03 change_ringing%1:04:00::/1.0 3 | d00 d00.s00.t04 be%2:42:03::/1.0 4 | d00 d00.s00.t05 peculiar%5:00:00:specific:00/1.0 5 | d00 d00.s00.t08 english%1:18:00::/1.0 6 | d00 d00.s00.t13 most%3:00:02::/1.0 7 | d00 d00.s00.t14 english%3:01:00::/1.0 8 | d00 d00.s00.t15 peculiarity%1:09:00::/1.0 9 | d00 d00.s00.t17 unintelligible%5:00:00:incomprehensible:00/1.0 !! results after this line are false 10 | d00 d00.s00.t20 false%1:24:00::/1.0 11 | d00 d00.s00.t23 false%1:14:02::/1.0 12 | d00 d00.s01.t08 false%1:18:00::/1.0 13 | d00 d00.s02.t02 false%1:15:00::/1.0 14 | d00 d00.s03.t03 false%1:09:00::/1.0 15 | d00 d00.s03.t06 false%2:32:01::/1.0 16 | d00 d00.s03.t07 false%3:00:00::/1.0 17 | d00 d00.s03.t08 false%1:15:00::/1.0 18 | d00 d00.s03.t15 false%5:00:00:beautiful:00/1.0 -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.evaluation/src/test/resources/SingleExactMatchEvaluatorTextTest/semcor.eval.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | foo 5 | foo 6 | 7 | 8 | foo 9 | foo 10 | 11 | 12 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.evaluation/src/test/resources/SingleExactMatchEvaluatorTextTest/semcor.gold.01.key: -------------------------------------------------------------------------------- 1 | br-a01 br-a01.p1.s1.w2 foo%1 2 | br-a01 br-a01.p1.s1.w3 foo%2 3 | br-a02 br-a02.p1.s1.w1 foo%3 4 | br-a02 br-a02.p1.s1.w2 foo%4 5 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.evaluation/src/test/resources/SingleExactMatchEvaluatorTextTest/semcor.gold.02.key: -------------------------------------------------------------------------------- 1 | br-a01 br-a01.p1.s1.w2 foo%1 foo%2 2 | br-a01 br-a01.p1.s1.w3 foo%2 3 | br-a02 br-a02.p1.s1.w1 foo%3 4 | br-a02 br-a02.p1.s1.w2 foo%4 5 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.evaluation/src/test/resources/SingleExactMatchEvaluatorTextTest/semcor.test.01.key: -------------------------------------------------------------------------------- 1 | br-a01 br-a01.p1.s1.w2 foo%1/1 2 | br-a01 br-a01.p1.s1.w3 foo%2/1 3 | br-a02 br-a02.p1.s1.w1 foo%3/1 4 | br-a02 br-a02.p1.s1.w2 foo%4/1 5 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.evaluation/src/test/resources/SingleExactMatchEvaluatorTextTest/semcor.test.02.key: -------------------------------------------------------------------------------- 1 | br-a01 br-a01.p1.s1.w2 foo%6/1 2 | br-a01 br-a01.p1.s1.w3 foo%6/1 3 | br-a02 br-a02.p1.s1.w1 foo%6/1 4 | br-a02 br-a02.p1.s1.w2 foo%6/1 5 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.evaluation/src/test/resources/SingleExactMatchEvaluatorTextTest/semcor.test.03.key: -------------------------------------------------------------------------------- 1 | br-a01 br-a01.p1.s1.w2 foo%1/1 2 | br-a01 br-a01.p1.s1.w3 foo%2/1 3 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.evaluation/src/test/resources/SingleExactMatchEvaluatorTextTest/semcor.test.04.key: -------------------------------------------------------------------------------- 1 | br-a01 br-a01.p1.s1.w2 foo%1/0.5 foo%2/0.5 2 | br-a01 br-a01.p1.s1.w3 foo%2/1 3 | br-a02 br-a02.p1.s1.w1 foo%3/1 4 | br-a02 br-a02.p1.s1.w2 foo%4/1 5 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.evaluation/src/test/resources/SingleExactMatchEvaluatorTextTest/semcor.test.05.key: -------------------------------------------------------------------------------- 1 | br-a03 br-a03.p1.s1.w2 foo%1/1 2 | br-a03 br-a03.p1.s1.w3 foo%2/1 3 | br-a03 br-a03.p1.s1.w1 foo%3/1 4 | br-a03 br-a03.p1.s1.w2 foo%4/1 5 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.evaluation/src/test/resources/SingleExactMatchEvaluatorTextTest/semcor.test.06.key: -------------------------------------------------------------------------------- 1 | br-a01 br-a01.p1.s1.w2 foo%1/0.5 foo%1/0.5 2 | br-a01 br-a01.p1.s1.w3 foo%2/1 3 | br-a02 br-a02.p1.s1.w1 foo%3/1 4 | br-a02 br-a02.p1.s1.w2 foo%4/1 5 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.examples-gpl/.license-header.txt: -------------------------------------------------------------------------------- 1 | Copyright ${year} 2 | Ubiquitous Knowledge Processing (UKP) Lab 3 | Technische Universität Darmstadt 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.examples-gpl/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/examples/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * This program is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program. If not, see . 18 | */ 19 | /** 20 | * This package is for fully working examples which illustrate how to use 21 | * various features of the WSD framework. 22 | */ 23 | package de.tudarmstadt.ukp.dkpro.wsd.examples; 24 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.examples-gpl/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=WARN,development 2 | log4j.logger.org.hibernate = WARN 3 | log4j.logger.org.hibernate.cfg.Configuration = WARN 4 | 5 | log4j.appender.development=org.apache.log4j.ConsoleAppender 6 | log4j.appender.development.layout=org.apache.log4j.PatternLayout 7 | log4j.appender.development.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %5p [%t] (%C{1}) - %m%n 8 | 9 | log4j.logger.de.tudarmstadt.ukp = INFO 10 | log4j.logger.de.tudarmstadt.ukp.dkpro.wsd = DEBUG -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.graphconnectivity.wikipedia/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/graphconnectivity/iterative/algorithm/RandomSequentialDisambiguation.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.graphconnectivity.iterative.algorithm; 20 | 21 | import java.util.Random; 22 | 23 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventory; 24 | 25 | /** 26 | * A class for sequential disambiguation using random weights between senses 27 | * 28 | * @author nico.erbs@gmail.com 29 | * 30 | */ 31 | public class RandomSequentialDisambiguation extends 32 | SequentialGraphDisambiguation { 33 | 34 | public RandomSequentialDisambiguation(SenseInventory inventory) { 35 | super(inventory); 36 | } 37 | 38 | @Override 39 | protected double getSenseSimilarity(String baseSense, String targetSense) { 40 | if(baseSense.equals(targetSense)){ 41 | return 1; 42 | } 43 | return new Random().nextDouble(); 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.graphconnectivity.wikipedia/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/graphconnectivity/iterative/util/DisambiguationEdge.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.graphconnectivity.iterative.util; 20 | 21 | /** 22 | * A java class for edges with weights 23 | * 24 | * @author nico.erbs@gmail.com 25 | * 26 | */ 27 | public class DisambiguationEdge { 28 | 29 | private double weight; 30 | 31 | public DisambiguationEdge(double weight){ 32 | this.weight = weight; 33 | } 34 | 35 | public double getWeight() { 36 | return weight; 37 | } 38 | 39 | public void setWeight(double weight) { 40 | this.weight = weight; 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.graphconnectivity.wikipedia/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/graphconnectivity/iterative/util/DisambiguationEdgeTransformer.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.graphconnectivity.iterative.util; 20 | 21 | import org.apache.commons.collections15.Transformer; 22 | 23 | /** 24 | * A transformer for edges to weights 25 | * 26 | * @author nico.erbs@gmail.com 27 | * 28 | */ 29 | public class DisambiguationEdgeTransformer implements 30 | Transformer { 31 | 32 | @Override 33 | public Double transform(DisambiguationEdge edge) { 34 | return edge.getWeight(); 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.graphconnectivity.wikipedia/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/graphconnectivity/iterative/util/DisambiguationVertexTransformer.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.graphconnectivity.iterative.util; 20 | 21 | import org.apache.commons.collections15.Transformer; 22 | 23 | /** 24 | * A transformer from a vertex to a weight 25 | * 26 | * @author nico.erbs@gmail.com 27 | * 28 | */ 29 | public class DisambiguationVertexTransformer implements 30 | Transformer { 31 | 32 | @Override 33 | public Double transform(DisambiguationVertex vertex) { 34 | return vertex.getWeight(); 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.graphconnectivity.wikipedia/src/test/resources/dictionary/SpitkovskyChang/dict_google.ser: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkpro/dkpro-wsd/7438a952336466360fdef412446b9db5b5dc776f/de.tudarmstadt.ukp.dkpro.wsd.graphconnectivity.wikipedia/src/test/resources/dictionary/SpitkovskyChang/dict_google.ser -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.graphconnectivity/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/graphconnectivity/algorithm/package-info.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * This package contains algorithms for graph-based WSD. 21 | */ 22 | package de.tudarmstadt.ukp.dkpro.wsd.graphconnectivity.algorithm; -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.io/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/io/reader/package-info.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * This package contains readers for various sense-annotated corpora and 21 | * data sets. Everything here depends only on dom4j, jaxen, and some DKPro Core 22 | * modules. Readers with other dependencies should probably go in a separate 23 | * module. 24 | */ 25 | package de.tudarmstadt.ukp.dkpro.wsd.io.reader; -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.io/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/io/writer/package-info.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * This package contains writers for sense-annotated corpora. 21 | */ 22 | package de.tudarmstadt.ukp.dkpro.wsd.io.writer; -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.lesk/pom.xml: -------------------------------------------------------------------------------- 1 | 18 | 19 | 4.0.0 20 | 21 | de.tudarmstadt.ukp.dkpro.wsd 22 | de.tudarmstadt.ukp.dkpro.wsd-asl 23 | ../de.tudarmstadt.ukp.dkpro.wsd-asl 24 | 1.3.0-SNAPSHOT 25 | 26 | de.tudarmstadt.ukp.dkpro.wsd.lesk 27 | 28 | 29 | de.tudarmstadt.ukp.dkpro.wsd 30 | de.tudarmstadt.ukp.dkpro.wsd.core 31 | 32 | 33 | DKPro WSD - Lesk 34 | DKPro WSD modules for the Lesk family of disambiguation algorithms 35 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.lesk/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/lesk/algorithm/package-info.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * This package contains implementations of various Lesk-like disambiguation 21 | * algorithms. 22 | */ 23 | package de.tudarmstadt.ukp.dkpro.wsd.lesk.algorithm; -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.lesk/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/lesk/resource/WSDResourceSimplifiedAlignedExtendedLesk.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * 21 | */ 22 | package de.tudarmstadt.ukp.dkpro.wsd.lesk.resource; 23 | 24 | import de.tudarmstadt.ukp.dkpro.wsd.lesk.algorithm.SimplifiedAlignedExtendedLesk; 25 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseTaxonomy; 26 | 27 | /** 28 | * A resource for {@link SimplifiedAlignedExtendedLesk}. 29 | * 30 | * @author Tristan Miller 31 | * 32 | */ 33 | public class WSDResourceSimplifiedAlignedExtendedLesk 34 | extends WSDResourceSimplifiedLesk 35 | { 36 | @Override 37 | protected void initializeWsdAlgorithm() 38 | { 39 | wsdAlgorithm = new SimplifiedAlignedExtendedLesk( 40 | (SenseTaxonomy) inventory, overlapStrategy, 41 | normalizationStrategy, defaultTokenizationStrategy, 42 | contextTokenizationStrategy); 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.lesk/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/lesk/resource/WSDResourceSimplifiedAlignedLesk.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * 21 | */ 22 | package de.tudarmstadt.ukp.dkpro.wsd.lesk.resource; 23 | 24 | import de.tudarmstadt.ukp.dkpro.wsd.lesk.algorithm.SimplifiedAlignedLesk; 25 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseTaxonomy; 26 | 27 | /** 28 | * A resource for {@link SimplifiedAlignedLesk}. 29 | * 30 | * @author Tristan Miller 31 | * 32 | */ 33 | public class WSDResourceSimplifiedAlignedLesk 34 | extends WSDResourceSimplifiedLesk 35 | { 36 | @Override 37 | protected void initializeWsdAlgorithm() 38 | { 39 | wsdAlgorithm = new SimplifiedAlignedLesk((SenseTaxonomy) inventory, 40 | overlapStrategy, normalizationStrategy, 41 | defaultTokenizationStrategy, contextTokenizationStrategy); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.lesk/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/lesk/resource/WSDResourceSimplifiedExtendedLesk.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * 21 | */ 22 | package de.tudarmstadt.ukp.dkpro.wsd.lesk.resource; 23 | 24 | import de.tudarmstadt.ukp.dkpro.wsd.lesk.algorithm.SimplifiedExtendedLesk; 25 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseTaxonomy; 26 | 27 | /** 28 | * A resource for {@link SimplifiedExtendedLesk}. 29 | * 30 | * @author Tristan Miller 31 | * 32 | */ 33 | public class WSDResourceSimplifiedExtendedLesk 34 | extends WSDResourceSimplifiedLesk 35 | { 36 | @Override 37 | protected void initializeWsdAlgorithm() 38 | { 39 | wsdAlgorithm = new SimplifiedExtendedLesk((SenseTaxonomy) inventory, 40 | overlapStrategy, normalizationStrategy, 41 | defaultTokenizationStrategy, contextTokenizationStrategy); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.lesk/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/lesk/util/normalization/FirstObjects.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.lesk.util.normalization; 20 | 21 | import java.util.List; 22 | 23 | public class FirstObjects 24 | implements NormalizationStrategy 25 | { 26 | 27 | /** 28 | * Takes two arrays of objects and returns the length of the first 29 | * array 30 | * 31 | * @param o1 the first array of objects 32 | * @param o2 not used 33 | * @return the length of the first array 34 | */ 35 | @Override 36 | public double normalizer(List o1, List o2) 37 | { 38 | return o1.size(); 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.lesk/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/lesk/util/normalization/FirstUniqueObjects.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.lesk.util.normalization; 20 | 21 | import java.util.HashSet; 22 | import java.util.List; 23 | 24 | public class FirstUniqueObjects 25 | implements NormalizationStrategy 26 | { 27 | 28 | /** 29 | * Takes two arrays of objects and returns the total number of unique 30 | * objects in the first array 31 | * 32 | * @param o1 the first array of objects 33 | * @param o2 not used 34 | * @return the number of unique objects in the first array 35 | */ 36 | @Override 37 | public double normalizer(List o1, List o2) 38 | { 39 | return new HashSet(o1).size(); 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.lesk/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/lesk/util/normalization/MostObjects.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.lesk.util.normalization; 20 | 21 | import java.util.List; 22 | 23 | public class MostObjects 24 | implements NormalizationStrategy 25 | { 26 | 27 | /** 28 | * Takes two arrays of objects and returns the length of the longest 29 | * array 30 | * 31 | * @param o1 the first array of objects 32 | * @param o2 the second array of objects 33 | * @return the length of the longest array 34 | */ 35 | @Override 36 | public double normalizer(List o1, List o2) 37 | { 38 | return Math.max(o1.size(), o2.size()); 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.lesk/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/lesk/util/normalization/MostUniqueObjects.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.lesk.util.normalization; 20 | 21 | import java.util.HashSet; 22 | import java.util.List; 23 | 24 | public class MostUniqueObjects 25 | implements NormalizationStrategy 26 | { 27 | 28 | /** 29 | * Takes two arrays of objects, counts their respective numbers of unique 30 | * objects, and returns the greater of the two 31 | * 32 | * @param o1 33 | * the first array of objects 34 | * @param o2 35 | * the second array of objects 36 | * @return the number of unique objects in the first array, or the number of 37 | * unique objects in the second array, whichever is greater 38 | */ 39 | @Override 40 | public double normalizer(List o1, List o2) 41 | { 42 | return Math.max(new HashSet(o1).size(), 43 | new HashSet(o2).size()); 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.lesk/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/lesk/util/normalization/NoNormalization.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.lesk.util.normalization; 20 | 21 | import java.util.List; 22 | 23 | public class NoNormalization 24 | implements NormalizationStrategy 25 | { 26 | 27 | /** 28 | * Returns 1.0. This method can be passed to the Lesk algorithm to 29 | * avoid normalizing the overlap measure. 30 | * 31 | * @param o1 not used 32 | * @param o2 not used 33 | * @return 1.0 34 | */ 35 | @Override 36 | public double normalizer(List o1, List o2) 37 | { 38 | return 1.0; 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.lesk/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/lesk/util/normalization/NormalizationStrategy.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.lesk.util.normalization; 20 | 21 | import java.util.List; 22 | 23 | /** 24 | * Strategies for normalizing the overlap measure 25 | * 26 | * @author Tristan Miller 27 | */ 28 | public interface NormalizationStrategy 29 | { 30 | /** 31 | * Takes two arrays of objects returns the a number by which the 32 | * overlap measure should be divided 33 | * 34 | * @param o1 the first array of objects 35 | * @param o2 the second array of objects 36 | * @return a divisor for normalization 37 | */ 38 | double normalizer(List o1, List o2); 39 | } 40 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.lesk/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/lesk/util/normalization/ProductMagnitude.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.lesk.util.normalization; 20 | 21 | import java.util.List; 22 | 23 | import de.tudarmstadt.ukp.dkpro.wsd.lesk.algorithm.Lesk; 24 | 25 | public class ProductMagnitude 26 | implements NormalizationStrategy 27 | { 28 | 29 | /** 30 | * Takes two arrays of objects returns the product of the magnitudes of 31 | * their frequency vectors 32 | * 33 | * @param o1 the first array of objects 34 | * @param o2 the second array of objects 35 | * @return the product of the magnitude of o1's and o2's frequency vectors 36 | */ 37 | @Override 38 | public double normalizer(List o1, List o2) 39 | { 40 | return Lesk.magnitude(o1) * Lesk.magnitude(o2); 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.lesk/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/lesk/util/normalization/SecondObjects.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.lesk.util.normalization; 20 | 21 | import java.util.List; 22 | 23 | public class SecondObjects 24 | implements NormalizationStrategy 25 | { 26 | 27 | /** 28 | * Takes two arrays of objects and returns the length of the second 29 | * array 30 | * 31 | * @param o1 not used 32 | * @param o2 the second array of objects 33 | * @return the length of the second array 34 | */ 35 | @Override 36 | public double normalizer(List o1, List o2) 37 | { 38 | return o2.size(); 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.lesk/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/lesk/util/normalization/SecondUniqueObjects.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.lesk.util.normalization; 20 | 21 | import java.util.HashSet; 22 | import java.util.List; 23 | 24 | public class SecondUniqueObjects 25 | implements NormalizationStrategy 26 | { 27 | 28 | /** 29 | * Takes two arrays of objects and returns the total number of unique 30 | * objects in the second array 31 | * 32 | * @param o1 not used 33 | * @param o2 the second array of objects 34 | * @return the number of unique objects in the second array 35 | */ 36 | @Override 37 | public double normalizer(List o1, List o2) 38 | { 39 | return new HashSet(o2).size(); 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.lesk/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/lesk/util/normalization/TotalObjects.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.lesk.util.normalization; 20 | 21 | import java.util.List; 22 | 23 | public class TotalObjects 24 | implements NormalizationStrategy 25 | { 26 | 27 | /** 28 | * Takes two arrays of objects and returns the sum of their lengths 29 | * 30 | * @param o1 the first array of objects 31 | * @param o2 the second array of objects 32 | * @return the sum of the lengths of the arrays 33 | */ 34 | @Override 35 | public double normalizer(List o1, List o2) 36 | { 37 | return o1.size() + o2.size(); 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.lesk/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/lesk/util/normalization/TotalUniqueObjects.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.lesk.util.normalization; 20 | 21 | import java.util.HashSet; 22 | import java.util.List; 23 | import java.util.Set; 24 | 25 | public class TotalUniqueObjects 26 | implements NormalizationStrategy 27 | { 28 | 29 | /** 30 | * Takes two arrays of objects and returns the total number of unique 31 | * objects in them 32 | * 33 | * @param o1 the first array of objects 34 | * @param o2 the second array of objects 35 | * @return the number of unique objects in both arrays 36 | */ 37 | @Override 38 | public double normalizer(List o1, List o2) 39 | { 40 | Set set1 = new HashSet(o1); 41 | 42 | set1.addAll(o2); 43 | return set1.size(); 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.lesk/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/lesk/util/overlap/OverlapStrategy.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.lesk.util.overlap; 20 | 21 | import java.util.List; 22 | 23 | /** 24 | * Strategies for computing overlap between two arrays of objects 25 | * 26 | * @author Tristan Miller 27 | */ 28 | public interface OverlapStrategy 29 | { 30 | /** 31 | * Computes the number of overlaps between two arrays of objects 32 | * 33 | * @param o1 the first array of objects to be compared 34 | * @param o2 the second array of objects to be compared 35 | * @return the number of overlaps between the two arrays 36 | */ 37 | double overlap(List o1, List o2); 38 | } 39 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.lesk/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/lesk/util/overlap/SetOverlap.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.lesk.util.overlap; 20 | 21 | import java.util.HashSet; 22 | import java.util.List; 23 | import java.util.Set; 24 | 25 | public class SetOverlap 26 | implements OverlapStrategy 27 | { 28 | 29 | /** 30 | * Computes the number of unique objects two arrays have in common 31 | * 32 | * @param o1 the first array of objects to be compared 33 | * @param o2 the second array of objects to be compared 34 | * @return the number of unique objects the two arrays have in common 35 | */ 36 | @Override 37 | public double overlap(List o1, List o2) 38 | { 39 | Set set1 = new HashSet(o1); 40 | Set set2 = new HashSet(o2); 41 | 42 | set2.retainAll(set1); 43 | return set2.size(); 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.lesk/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/lesk/util/tokenization/StringSplit.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.lesk.util.tokenization; 20 | 21 | import java.util.Arrays; 22 | import java.util.List; 23 | 24 | public class StringSplit 25 | implements TokenizationStrategy 26 | { 27 | 28 | /** 29 | * Tokenizes a string with String.split() 30 | * 31 | * @param s the string to tokenize 32 | * @return the string tokenized into an array of objects 33 | */ 34 | @Override 35 | public List tokenize(String s) 36 | { 37 | return Arrays.asList(s.split("\\s+")); 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.lesk/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/lesk/util/tokenization/TokenizationStrategy.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.lesk.util.tokenization; 20 | 21 | import java.util.List; 22 | 23 | /** 24 | * Strategies for tokenizing a string into a list of strings 25 | * 26 | * @author Tristan Miller 27 | */ 28 | public interface TokenizationStrategy 29 | { 30 | /** 31 | * Tokenizes a string into a list of strings 32 | * 33 | * @param s the string to tokenize 34 | * @return a list of strings 35 | */ 36 | List tokenize(String s); 37 | } 38 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.linkbased.wikipedia/src/main/resources/text/text.txt: -------------------------------------------------------------------------------- 1 | orge Washington never tried to put his money on a bank in Washington. -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.senseval/pom.xml: -------------------------------------------------------------------------------- 1 | 18 | 19 | 4.0.0 20 | 21 | de.tudarmstadt.ukp.dkpro.wsd 22 | de.tudarmstadt.ukp.dkpro.wsd-asl 23 | ../de.tudarmstadt.ukp.dkpro.wsd-asl 24 | 1.3.0-SNAPSHOT 25 | 26 | de.tudarmstadt.ukp.dkpro.wsd.senseval 27 | 28 | 29 | de.tudarmstadt.ukp.dkpro.wsd 30 | de.tudarmstadt.ukp.dkpro.wsd.io 31 | 32 | 33 | de.tudarmstadt.ukp.dkpro.wsd 34 | de.tudarmstadt.ukp.dkpro.wsd.testing 35 | test 36 | 37 | 38 | DKPro WSD - Senseval 39 | DKPro WSD modules for working with Senseval and SemEval data sets 40 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.senseval/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/senseval/candidates/WordNetSenseKeyToSenseval.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.senseval.candidates; 20 | 21 | import de.tudarmstadt.ukp.dkpro.wsd.candidates.SenseConverter; 22 | 23 | /** 24 | * Converts all sense IDs in WordNet sense key format to Senseval sense 25 | * key format by replacing apostrophes with hyphens. Note that this 26 | * has the potential to be a lossy conversion! 27 | * 28 | * @author Tristan Miller 29 | */ 30 | public class WordNetSenseKeyToSenseval 31 | extends SenseConverter 32 | { 33 | 34 | @Override 35 | public String convert(String senseId) 36 | { 37 | return senseId.replace('\'', '-'); 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.senseval/src/main/resources/README.txt: -------------------------------------------------------------------------------- 1 | This directory contains various files for reading and patching the Senseval 2 | and SemEval data sets: 3 | 4 | fix_mihalcea_senseval2.sh - a Bash script which fixes the "SemCor XML" versions 5 | of the Senseval-2 all-words data published at 6 | http://www.cse.unt.edu/~rada/downloads.html#sensevalsemcor so that they are 7 | well-formed XML and conform to the SemCor schema. 8 | 9 | fix_mihalcea_senseval3.sh - a Bash script which fixes the "SemCor XML" versions 10 | of the Senseval-3 all-words data published at 11 | http://www.cse.unt.edu/~rada/downloads.html#sensevalsemcor so that they are 12 | well-formed XML and conform to the SemCor schema. 13 | 14 | semeval1-en-cgaw-test-key.sh - a Bash script which fixes the SemEval-2007 15 | English coarse-grained all-words answer keys by lowercasing the sense keys to 16 | match the representations in WordNet 2.1. 17 | 18 | senseval2-en-aw-test.patch - a patch file which fixes numerous errors (i.e., 19 | heads referencing nonexistent satellite IDs) in the Senseval-2 English 20 | all-words test corpus. 21 | 22 | senseval2-en-ls-train-key.patch - a patch file which removes sense keys in 23 | the Senseval-2 English lexical sample training corpus answer key which do not 24 | exist in WordNet 1.7, or replaces them with senses which do exist in 25 | WordNet 1.7. 26 | 27 | senseval2-en-ls-test-key.patch - a patch file which removes sense keys in 28 | the Senseval-2 English lexical sample test corpus answer key which do not 29 | exist in WordNet 1.7. 30 | 31 | wordnet_senseval.tsv - a mapping of Senseval/SemEval sense keys to WordNet 32 | sense keys. This works for all versions of WordNet from 1.7 to 3.0. This 33 | mapping can be applied on the fly using the SenseMapper class; there is no 34 | need to use it to convert the Senseval/SemEval data sets in advance. 35 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.senseval/src/main/resources/fix-semeval1-en-cgaw-test-key.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script fixes the SemEval-2007 English coarse-grained all-words answer 4 | # keys by lowercasing the sense keys to match their representations in 5 | # WordNet 2.1. 6 | 7 | tr '[[:upper:]]' '[[:lower:]]' < dataset21.test.key > dataset21.test.key.fixed 8 | tr '[[:upper:]]' '[[:lower:]]' < fs_baseline.key > fs_baseline.key.fixed 9 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.senseval/src/main/resources/fix_mihalcea_senseval2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script fixes the "SemCor XML" versions of the Senseval-2 all-words data 4 | # published at http://www.cse.unt.edu/~rada/downloads.html#sensevalsemcor so 5 | # that they are well-formed XML and conform to the SemCor schema. 6 | 7 | for f in d00 d01 d02 8 | do 9 | sed '1s/^/\n/;s/&/&/g;s/=\([^> ]\+\)/="\1"/g;$s,$,\n,' $f.semcor.lexsn.key > $f.xml 10 | done 11 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.senseval/src/main/resources/fix_mihalcea_senseval3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script fixes the "SemCor XML" versions of the Senseval-3 all-words data 4 | # published at http://www.cse.unt.edu/~rada/downloads.html#sensevalsemcor so 5 | # that they are well-formed XML and conform to the SemCor schema. 6 | 7 | for f in d000 d001 d002 8 | do 9 | sed '1s/^/\n/;s/&/&/g;s/=\([^> ]\+\)/="\1"/g;$s,$,\n,' $f.semcor.lexsn.key > $f.xml 10 | done 11 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.senseval/src/main/resources/senseval2-en-aw-test.patch: -------------------------------------------------------------------------------- 1 | --- eng-all-words.test.xml 2013-04-11 16:36:16.000000000 +0200 2 | +++ eng-all-words.test.xml 2013-07-31 15:24:13.606975505 +0200 3 | @@ -1,5 +1,16 @@ 4 | 5 | 6 | + 16 | + 17 | 18 | 19 | 20 | @@ -986,7 +997,7 @@ 21 | the 22 | Church 23 | of 24 | -England 25 | +England 26 | steadily 27 | dwindling 28 | , 29 | @@ -2008,7 +2019,7 @@ 30 | cell 31 | 's 32 | growth 33 | -in 34 | +in 35 | check 36 | . 37 | But 38 | @@ -2044,7 +2055,7 @@ 39 | and 40 | cells 41 | normally 42 | -under 43 | +under 44 | control 45 | turn 46 | malignant 47 | @@ -2125,7 +2136,7 @@ 48 | cancer 49 | genes 50 | work 51 | -in 52 | +in 53 | concert 54 | : 55 | An 56 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.senseval/src/main/resources/senseval2-en-ls-test-key.patch: -------------------------------------------------------------------------------- 1 | --- key.sorted 2013-07-29 17:49:24.414426283 +0200 2 | +++ key.sorted 2013-07-29 17:49:24.420426332 +0200 3 | @@ -2245,7 +2245,7 @@ 4 | free free.40154 free%3:00:00:: 5 | free free.40159 free%3:00:01:: 6 | free free.40162 P 7 | -free free.40166 free_will%1:07:00:: free_will%1:26:00:: 8 | +free free.40166 free_will%1:07:00:: 9 | free free.40171 free%5:00:00:unpaid:00 10 | free free.40175 free%3:00:00:: 11 | free free.40179 free%5:00:00:unpaid:00 12 | @@ -2279,7 +2279,7 @@ 13 | free free.40273 free%3:00:00:: 14 | free free.40276 free%5:00:00:unpaid:00 15 | free free.40279 free%5:00:02:unoccupied:00 16 | -free free.40282 free_will%1:07:00:: free_will%1:26:00:: 17 | +free free.40282 free_will%1:07:00:: 18 | free free.40285 free_rein%1:26:00:: 19 | free free.40288 free%3:00:00:: 20 | free free.40291 free%5:00:00:unpaid:00 21 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.senseval/src/main/resources/senseval2-en-ls-train-key.patch: -------------------------------------------------------------------------------- 1 | --- eng-lex-sample.train.key 2013-04-11 16:36:18.000000000 +0200 2 | +++ eng-lex-sample.train.key 2013-04-11 16:36:18.000000000 +0200 3 | @@ -2861,7 +2861,7 @@ 4 | free free.40268 free%5:00:01:unoccupied:00 5 | free free.40269 free%3:00:00:: 6 | free free.40271 free%3:00:00:: 7 | -free free.40272 free_will%1:07:00:: free_will%1:26:00:: 8 | +free free.40272 free_will%1:07:00:: 9 | free free.40274 free%5:00:00:unpaid:00 10 | free free.40275 free%3:00:00:: 11 | free free.40277 free%5:00:00:unpaid:00 12 | @@ -4019,7 +4019,7 @@ 13 | natural natural.40286 natural%5:00:00:normal:01 14 | natural natural.40287 natural_history%1:09:00:: P 15 | natural natural.40289 natural%3:00:01:: 16 | -natural natural.40290 natural_language_processing%1:10:00:: 17 | +natural natural.40290 natural_language_processing%1:09:00:: 18 | natural natural.40293 natural%5:00:00:normal:01 natural%5:00:00:spontaneous:00 19 | natural natural.40294 natural_science%1:09:00:: 20 | natural natural.40296 natural%3:00:01:: 21 | @@ -4296,11 +4296,11 @@ 22 | post post.40153 post%1:15:00:: U 23 | post post.40156 post%1:06:00:: 24 | post post.40157 post%1:04:00:: 25 | -post post.40161 local_post_office%1:14:01:: post_office%1:14:01:: 26 | +post post.40161 local_post_office%1:14:00:: post_office%1:14:01:: 27 | post post.40162 post%1:06:00:: 28 | post post.40165 post_office%1:14:01:: 29 | post post.40166 P 30 | -post post.40171 local_post_office%1:14:01:: post_office%1:14:01:: 31 | +post post.40171 local_post_office%1:14:00:: post_office%1:14:01:: 32 | post post.40172 post%1:06:00:: 33 | post post.40175 post%1:06:00:: 34 | post post.40177 post%1:10:00:: 35 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.dictionary/pom.xml: -------------------------------------------------------------------------------- 1 | 18 | 19 | 4.0.0 20 | 21 | de.tudarmstadt.ukp.dkpro.wsd 22 | de.tudarmstadt.ukp.dkpro.wsd-asl 23 | ../de.tudarmstadt.ukp.dkpro.wsd-asl 24 | 1.3.0-SNAPSHOT 25 | 26 | de.tudarmstadt.ukp.dkpro.wsd.si.dictionary 27 | DKPro WSD sense Inventories based on dictionaries 28 | 29 | 30 | org.apache.commons 31 | commons-compress 32 | 33 | 34 | de.tudarmstadt.ukp.dkpro.wsd 35 | de.tudarmstadt.ukp.dkpro.wsd.core 36 | 37 | 38 | DKPro WSD - Dictionary 39 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.dictionary/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/si/dictionary/IDocumentDependentDictionary.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.si.dictionary; 20 | 21 | import java.util.Map; 22 | 23 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventory; 24 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 25 | 26 | /** 27 | * A SenseInventory that is capable of returning weights for senses 28 | * 29 | * @author nico.erbs@gmail.com 30 | * 31 | */ 32 | public interface IDocumentDependentDictionary extends SenseInventory { 33 | 34 | public Map getWeightedSenses(String docId, String sod) throws SenseInventoryException; 35 | 36 | } 37 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.dictionary/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/si/dictionary/IGoogleDictionary.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.si.dictionary; 20 | 21 | import java.util.Map; 22 | 23 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 24 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseWeightedInventory; 25 | 26 | /** 27 | * A SenseWeightedInventory with special information from inter-language and Google links 28 | * 29 | * @author nico.erbs@gmail.com 30 | * 31 | */ 32 | public interface IGoogleDictionary extends SenseWeightedInventory { 33 | 34 | public Map getInterlanguageWeightedSenses(String sod) throws SenseInventoryException; 35 | 36 | public Map getEnglishlanguageWeightedSenses(String sod) throws SenseInventoryException; 37 | 38 | public Map getWikipediaWeightedSenses(String sod) throws SenseInventoryException; 39 | 40 | public Map getAlternativeWikipediaWeightedSenses(String sod) throws SenseInventoryException; 41 | 42 | } 43 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.dictionary/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/si/dictionary/IUkbDictionary.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.si.dictionary; 20 | 21 | import java.util.Map; 22 | 23 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 24 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseWeightedInventory; 25 | 26 | /** 27 | * An extension to the SenseWeightedInventory to return prior popularity of sense 28 | * 29 | * @author nico.erbs@gmail.com 30 | * 31 | */ 32 | public interface IUkbDictionary extends SenseWeightedInventory { 33 | 34 | public Map getPopularityWeightedSenses(String sod) throws SenseInventoryException; 35 | 36 | public Map getRandomlyWeightedSenses(String sod) throws SenseInventoryException; 37 | 38 | } 39 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.dictionary/src/test/java/de/tudarmstadt/ukp/dkpro/wsd/si/dictionary/SimpleDictionaryTest.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.si.dictionary; 20 | 21 | 22 | import java.io.UnsupportedEncodingException; 23 | 24 | import junit.framework.Assert; 25 | 26 | import org.junit.Before; 27 | import org.junit.Test; 28 | 29 | import de.tudarmstadt.ukp.dkpro.wsd.si.dictionary.util.DictionaryWithoutFrequencies; 30 | 31 | 32 | public class SimpleDictionaryTest { 33 | 34 | @Before 35 | public void setUp() throws Exception { 36 | } 37 | 38 | @Test 39 | public void convertKey() throws UnsupportedEncodingException{ 40 | 41 | String key= "Real_Madrid_C\\u002eF\\u002e"; 42 | Assert.assertEquals("real_madrid_c.f.", DictionaryWithoutFrequencies.convertKey(key)); 43 | 44 | } 45 | 46 | 47 | } 48 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.dictionary/src/test/resources/dictionary/SpitkovskyChang/dict_google.ser: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkpro/dkpro-wsd/7438a952336466360fdef412446b9db5b5dc776f/de.tudarmstadt.ukp.dkpro.wsd.si.dictionary/src/test/resources/dictionary/SpitkovskyChang/dict_google.ser -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.dictionary/src/test/resources/dictionary/SpitkovskyChang/dict_google.txt.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkpro/dkpro-wsd/7438a952336466360fdef412446b9db5b5dc776f/de.tudarmstadt.ukp.dkpro.wsd.si.dictionary/src/test/resources/dictionary/SpitkovskyChang/dict_google.txt.bz2 -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.dictionary/src/test/resources/dictionary/SpitkovskyChang/needed_mentions.txt: -------------------------------------------------------------------------------- 1 | "10 Things I Hate About You 2 | "10 Things I Hate About You" 3 | " Claude Monet 4 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.germanet-gpl/.license-header.txt: -------------------------------------------------------------------------------- 1 | Copyright ${year} 2 | Ubiquitous Knowledge Processing (UKP) Lab 3 | Technische Universität Darmstadt 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.germanet-gpl/src/main/java/de/tuebingen/uni/sfs/germanet/api/CompoundCategory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2012 Department of General and Computational Linguistics, 3 | * University of Tuebingen 4 | * 5 | * This file is part of the Java API to GermaNet. 6 | * 7 | * The Java API to GermaNet is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * The Java API to GermaNet is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this API; if not, see . 19 | */ 20 | package de.tuebingen.uni.sfs.germanet.api; 21 | 22 | /** 23 | * Enumeration of possible compound word categories, i.e., part-of-speech. 24 | * 25 | * @author University of Tuebingen, Department of Linguistics (germanetinfo at uni-tuebingen.de) 26 | * @version 8.0 27 | */ 28 | public enum CompoundCategory { 29 | Adjektiv, 30 | Nomen, 31 | Verb, 32 | Adverb, 33 | Präposition, 34 | Partikel, 35 | Pronomen; 36 | } 37 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.germanet-gpl/src/main/java/de/tuebingen/uni/sfs/germanet/api/CompoundProperty.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2012 Department of General and Computational Linguistics, 3 | * University of Tuebingen 4 | * 5 | * This file is part of the Java API to GermaNet. 6 | * 7 | * The Java API to GermaNet is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * The Java API to GermaNet is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this API; if not, see . 19 | */ 20 | package de.tuebingen.uni.sfs.germanet.api; 21 | 22 | /** 23 | * Enumeration of possible compound attributes. 24 | * 25 | * @author University of Tuebingen, Department of Linguistics (germanetinfo at uni-tuebingen.de) 26 | * @version 8.0 27 | */ 28 | public enum CompoundProperty { 29 | Abkürzung, 30 | Affixoid, 31 | Fremdwort, 32 | Konfix, 33 | Wortgruppe, 34 | Eigenname, 35 | opaquesMorphem, 36 | virtuelleBildung; 37 | } 38 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.germanet-gpl/src/main/java/de/tuebingen/uni/sfs/germanet/api/EwnRel.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2012 Department of General and Computational Linguistics, 3 | * University of Tuebingen 4 | * 5 | * This file is part of the Java API to GermaNet. 6 | * 7 | * The Java API to GermaNet is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * The Java API to GermaNet is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this API; if not, see . 19 | */ 20 | package de.tuebingen.uni.sfs.germanet.api; 21 | 22 | /** 23 | * Enumeration of all lexical relations. 24 | * 25 | * @author University of Tuebingen, Department of Linguistics (germanetinfo at uni-tuebingen.de) 26 | * @version 8.0 27 | */ 28 | public enum EwnRel { 29 | synonym, 30 | causes, 31 | near_synonym, 32 | role, 33 | involved, 34 | xpos_near_synonym, 35 | is_caused_by, 36 | has_hyponym, 37 | has_holonym, 38 | has_hyperonym, 39 | has_meronym, 40 | be_in_state, 41 | has_subevent, 42 | is_subevent_of; 43 | } 44 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.germanet-gpl/src/main/java/de/tuebingen/uni/sfs/germanet/api/WordCategory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2012 Department of General and Computational Linguistics, 3 | * University of Tuebingen 4 | * 5 | * This file is part of the Java API to GermaNet. 6 | * 7 | * The Java API to GermaNet is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * The Java API to GermaNet is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this API; if not, see . 19 | */ 20 | package de.tuebingen.uni.sfs.germanet.api; 21 | 22 | /** 23 | * Enumeration of possible word categories, i.e., part-of-speech. 24 | * 25 | * @author University of Tuebingen, Department of Linguistics (germanetinfo at uni-tuebingen.de) 26 | * @version 8.0 27 | */ 28 | public enum WordCategory { 29 | adj, 30 | nomen, 31 | verben; 32 | } 33 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.germanet-gpl/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG, development 2 | 3 | log4j.appender.development=org.apache.log4j.ConsoleAppender 4 | log4j.appender.development.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.development.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %5p [%t] (%C{1}) - %m%n 6 | 7 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.linkdatabase/pom.xml: -------------------------------------------------------------------------------- 1 | 18 | 19 | 4.0.0 20 | 21 | de.tudarmstadt.ukp.dkpro.wsd-asl 22 | ../de.tudarmstadt.ukp.dkpro.wsd-asl 23 | de.tudarmstadt.ukp.dkpro.wsd 24 | 1.3.0-SNAPSHOT 25 | 26 | de.tudarmstadt.ukp.dkpro.wsd.si.linkdatabase 27 | 28 | 29 | de.tudarmstadt.ukp.dkpro.wsd 30 | de.tudarmstadt.ukp.dkpro.wsd.core 31 | 32 | 33 | mysql 34 | mysql-connector-java 35 | 8.0.16 36 | 37 | 38 | DKPro WSD - Link database 39 | DKPro WSD modules for handling link database sense inventories 40 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.linkdatabase/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/si/linkdatabase/LinkDatabase.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.si.linkdatabase; 20 | 21 | import java.util.List; 22 | import java.util.Map; 23 | 24 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 25 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseWeightedInventory; 26 | 27 | /** 28 | * A link database is a specialized database containing link information 29 | * 30 | * @author nico.erbs@gmail.com 31 | * 32 | */ 33 | public interface LinkDatabase extends SenseWeightedInventory { 34 | 35 | public Map getWeightedSenses(String sod) throws SenseInventoryException; 36 | 37 | public List getIncomingLinks(String target) throws SenseInventoryException, UnsupportedOperationException; 38 | 39 | public int getNumberOfSenses(); 40 | 41 | } 42 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.lsr/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/si/lsr/package-info.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * This package contains classes exposing JLSR as a sense inventory. 21 | */ 22 | package de.tudarmstadt.ukp.dkpro.wsd.si.lsr; -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.twsi-gpl/.license-header.txt: -------------------------------------------------------------------------------- 1 | Copyright ${year} 2 | Ubiquitous Knowledge Processing (UKP) Lab 3 | Technische Universität Darmstadt 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.twsi-gpl/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/si/twsi/TwsiSenseInventory.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * This program is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program. If not, see . 18 | */ 19 | package de.tudarmstadt.ukp.dkpro.wsd.si.twsi; 20 | 21 | import java.io.File; 22 | 23 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventory; 24 | 25 | /** 26 | * An interface for the Turk Bootstrap Word Sense Inventory 27 | * 28 | * @author Daniel Bär 29 | * 30 | */ 31 | public interface TwsiSenseInventory 32 | extends SenseInventory 33 | { 34 | File getConfigFile(); 35 | } 36 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.uby/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/si/uby/UbySenseIdToGermaNetLUID.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | package de.tudarmstadt.ukp.dkpro.wsd.si.uby; 20 | 21 | import de.tudarmstadt.ukp.dkpro.wsd.candidates.SenseConverter; 22 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventoryException; 23 | import de.tudarmstadt.ukp.dkpro.wsd.si.uby.resource.UbySenseInventoryResource; 24 | 25 | /** 26 | * Converts all Uby sense IDs to GermaNet lexical unit IDs. 27 | * 28 | * @author Tristan Miller 29 | */ 30 | public class UbySenseIdToGermaNetLUID 31 | extends SenseConverter 32 | { 33 | @Override 34 | public String convert(String senseId) 35 | { 36 | String senseKey; 37 | try { 38 | senseKey = ((UbySenseInventoryResource) sourceInventory) 39 | .getLexiconSenseId(senseId); 40 | } 41 | catch (SenseInventoryException e) { 42 | return null; 43 | } 44 | 45 | return senseKey; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.uby/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/si/uby/package-info.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * This package contains classes exposing UBY as a sense inventory. 21 | */ 22 | package de.tudarmstadt.ukp.dkpro.wsd.si.uby; -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.si.wordnet/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/si/wordnet/package-info.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * This package contains classes exposing WordNet as a sense inventory 21 | * via extJWNL. 22 | */ 23 | package de.tudarmstadt.ukp.dkpro.wsd.si.wordnet; -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/.license-header.txt: -------------------------------------------------------------------------------- 1 | Copyright ${year} 2 | Ubiquitous Knowledge Processing (UKP) Lab 3 | Technische Universität Darmstadt 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/classifiers/IEvaluator.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * IMS (It Makes Sense) -- NUS WSD System 3 | * Copyright (c) 2013 National University of Singapore. 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | * 18 | ******************************************************************************/ 19 | 20 | package sg.edu.nus.comp.nlp.ims.classifiers; 21 | 22 | import sg.edu.nus.comp.nlp.ims.util.ISenseIndex; 23 | 24 | /** 25 | * evaluator interface. 26 | * 27 | * @author zhongzhi 28 | * 29 | */ 30 | public interface IEvaluator { 31 | /** 32 | * evaluate p_Lexelt 33 | * 34 | * @param p_Lexelt 35 | * lexelt 36 | * @return evaluation result 37 | * @throws Exception 38 | * evaluation exception 39 | */ 40 | public Object evaluate(Object p_Lexelt) throws Exception; 41 | 42 | /** 43 | * set sense index 44 | * 45 | * @param p_SenseIndex 46 | * sense index 47 | */ 48 | public void setSenseIndex(ISenseIndex p_SenseIndex); 49 | 50 | /** 51 | * set options 52 | * 53 | * @param p_Options 54 | * options 55 | */ 56 | public void setOptions(String[] p_Options); 57 | } 58 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/classifiers/IModelTrainer.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * IMS (It Makes Sense) -- NUS WSD System 3 | * Copyright (c) 2013 National University of Singapore. 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | * 18 | ******************************************************************************/ 19 | 20 | package sg.edu.nus.comp.nlp.ims.classifiers; 21 | 22 | /** 23 | * interface to train a model for a given lexelt. 24 | * 25 | * @author zhongzhi 26 | * 27 | */ 28 | public interface IModelTrainer { 29 | /** 30 | * train a model with instances in p_Lexelt 31 | * 32 | * @param p_Lexelt 33 | * lexelt 34 | * @return model 35 | * @throws Exception 36 | * train exception 37 | */ 38 | public Object train(Object p_Lexelt) throws Exception; 39 | 40 | /** 41 | * set options 42 | * 43 | * @param p_Options 44 | * options 45 | */ 46 | public void setOptions(String[] p_Options); 47 | } 48 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/classifiers/package.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 |

6 | Package related to classifier. 7 |

8 | 9 | 10 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/corpus/CItem.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * IMS (It Makes Sense) -- NUS WSD System 3 | * Copyright (c) 2013 National University of Singapore. 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | * 18 | ******************************************************************************/ 19 | 20 | package sg.edu.nus.comp.nlp.ims.corpus; 21 | 22 | /** 23 | * common item in a sentence. 24 | * 25 | * @author zhongzhi 26 | * 27 | */ 28 | public class CItem extends AItem { 29 | 30 | /** 31 | * default constructor 32 | */ 33 | public CItem() { 34 | for (int i = Features.values().length - 1; i >= 0; i--) { 35 | this.m_Values.add(null); 36 | } 37 | } 38 | 39 | /* 40 | * (non-Javadoc) 41 | * @see sg.edu.nus.comp.nlp.ims.corpus.AItem#clone() 42 | */ 43 | public IItem clone() { 44 | CItem clone = new CItem(); 45 | for (String value : this.m_Values) { 46 | clone.m_Values.add(value); 47 | } 48 | return clone; 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/corpus/package.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 |

6 | Package related to the training or test corpus. 7 |

8 | 9 | 10 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/feature/CAllWordsFeatureExtractorCombination.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * IMS (It Makes Sense) -- NUS WSD System 3 | * Copyright (c) 2013 National University of Singapore. 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | * 18 | ******************************************************************************/ 19 | 20 | package sg.edu.nus.comp.nlp.ims.feature; 21 | 22 | import java.util.ArrayList; 23 | 24 | /** 25 | * a feature extractor combination for all-words tasks. 26 | * 27 | * @author zhongzhi 28 | * 29 | */ 30 | public class CAllWordsFeatureExtractorCombination extends CFeatureExtractorCombination { 31 | public CAllWordsFeatureExtractorCombination() { 32 | this.m_FeatureExtractors.clear(); 33 | this.m_FeatureExtractors.add(new CPOSFeatureExtractor()); 34 | this.m_FeatureExtractors.add(new CCollocationExtractor()); 35 | this.m_FeatureExtractors.add(new CSurroundingWordExtractor(1, 1)); 36 | } 37 | 38 | public CAllWordsFeatureExtractorCombination( 39 | ArrayList p_FeatureExtractors) { 40 | super(p_FeatureExtractors); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/feature/CCollocation.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * IMS (It Makes Sense) -- NUS WSD System 3 | * Copyright (c) 2013 National University of Singapore. 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | * 18 | ******************************************************************************/ 19 | 20 | package sg.edu.nus.comp.nlp.ims.feature; 21 | 22 | /** 23 | * collocation feature. 24 | * 25 | * @author zhongzhi 26 | * 27 | */ 28 | public class CCollocation extends AListFeature { 29 | 30 | /** 31 | * 32 | */ 33 | private static final long serialVersionUID = 1L; 34 | 35 | public CCollocation() { 36 | } 37 | 38 | /* 39 | * (non-Javadoc) 40 | * @see sg.edu.nus.comp.nlp.ims.feature.AListFeature#clone() 41 | */ 42 | public Object clone() { 43 | CCollocation clone = new CCollocation(); 44 | clone.m_Key = this.m_Key; 45 | clone.m_Value = this.m_Value; 46 | return clone; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/feature/CPOSFeature.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * IMS (It Makes Sense) -- NUS WSD System 3 | * Copyright (c) 2013 National University of Singapore. 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | * 18 | ******************************************************************************/ 19 | 20 | package sg.edu.nus.comp.nlp.ims.feature; 21 | 22 | /** 23 | * pos feature. 24 | * 25 | * @author zhongzhi 26 | * 27 | */ 28 | public class CPOSFeature extends AListFeature { 29 | 30 | /** 31 | * 32 | */ 33 | private static final long serialVersionUID = 1L; 34 | 35 | /* 36 | * (non-Javadoc) 37 | * @see sg.edu.nus.comp.nlp.ims.feature.AListFeature#clone() 38 | */ 39 | public Object clone() { 40 | CPOSFeature clone = new CPOSFeature(); 41 | clone.m_Key = this.m_Key; 42 | clone.m_Value = this.m_Value; 43 | return clone; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/feature/CSurroundingWord.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * IMS (It Makes Sense) -- NUS WSD System 3 | * Copyright (c) 2013 National University of Singapore. 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | * 18 | ******************************************************************************/ 19 | 20 | package sg.edu.nus.comp.nlp.ims.feature; 21 | 22 | /** 23 | * surrounding word feature. 24 | * 25 | * @author zhongzhi 26 | * 27 | */ 28 | public class CSurroundingWord extends ABinaryFeature { 29 | 30 | /** 31 | * 32 | */ 33 | private static final long serialVersionUID = 1L; 34 | 35 | /** 36 | * constructor 37 | */ 38 | public CSurroundingWord() { 39 | this.m_Key = null; 40 | this.m_Value = true; 41 | } 42 | 43 | /* 44 | * (non-Javadoc) 45 | * @see sg.edu.nus.comp.nlp.ims.feature.ABinaryFeature#clone() 46 | */ 47 | public Object clone() { 48 | CSurroundingWord clone = new CSurroundingWord(); 49 | clone.m_Key = this.m_Key; 50 | return clone; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/feature/package.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 |

6 | Package related to features used in WSD. 7 |

8 | 9 | 10 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/implement/package.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 |

6 | Package containing the main WSD API. 7 |

8 | 9 | 10 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/instance/package.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 |

6 | Package related to wsd instance. 7 |

8 | 9 | 10 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/io/IModelWriter.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * IMS (It Makes Sense) -- NUS WSD System 3 | * Copyright (c) 2013 National University of Singapore. 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | * 18 | ******************************************************************************/ 19 | 20 | package sg.edu.nus.comp.nlp.ims.io; 21 | 22 | import java.io.IOException; 23 | 24 | /** 25 | * model writer interface. 26 | * 27 | * @author zhongzhi 28 | * 29 | */ 30 | public interface IModelWriter { 31 | 32 | /** 33 | * write model 34 | * 35 | * @param p_ModelInfo 36 | * model 37 | * @throws IOException 38 | * exception while saving model 39 | */ 40 | public void write(Object p_ModelInfo) throws IOException; 41 | 42 | /** 43 | * set options 44 | * 45 | * @param p_Options 46 | * options 47 | */ 48 | public void setOptions(String[] p_Options); 49 | } 50 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/io/IResultWriter.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * IMS (It Makes Sense) -- NUS WSD System 3 | * Copyright (c) 2013 National University of Singapore. 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | * 18 | ******************************************************************************/ 19 | 20 | package sg.edu.nus.comp.nlp.ims.io; 21 | 22 | import java.io.IOException; 23 | 24 | /** 25 | * result writer interface. 26 | * 27 | * @author zhongzhi 28 | * 29 | */ 30 | public interface IResultWriter { 31 | /** 32 | * write results 33 | * 34 | * @param p_Result 35 | * result 36 | * @throws IOException 37 | * exception while saving result 38 | */ 39 | public void write(Object p_Result) throws IOException; 40 | 41 | /** 42 | * convert result to string 43 | * 44 | * @param p_Result 45 | * classification result 46 | * @return string format 47 | */ 48 | public String toString(Object p_Result); 49 | 50 | /** 51 | * set options 52 | * 53 | * @param p_Options 54 | * options 55 | */ 56 | public void setOptions(String[] p_Options); 57 | } 58 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/io/package.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 |

6 | Package related to IO. 7 |

8 | 9 | 10 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/lexelt/CCollocationFeatureSelector.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * IMS (It Makes Sense) -- NUS WSD System 3 | * Copyright (c) 2013 National University of Singapore. 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | * 18 | ******************************************************************************/ 19 | 20 | package sg.edu.nus.comp.nlp.ims.lexelt; 21 | 22 | import sg.edu.nus.comp.nlp.ims.feature.CCollocation; 23 | 24 | /** 25 | * collocation feature selector. 26 | * 27 | * @author zhongzhi 28 | * 29 | */ 30 | public class CCollocationFeatureSelector extends AListFeatureSelector { 31 | /** 32 | * constructor 33 | * 34 | * @param p_M2 35 | * cut off 36 | */ 37 | public CCollocationFeatureSelector(int p_M2) { 38 | this.m_M2 = p_M2; 39 | this.m_FeatureName = CCollocation.class.getName(); 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/lexelt/CModelInfo.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * IMS (It Makes Sense) -- NUS WSD System 3 | * Copyright (c) 2013 National University of Singapore. 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | * 18 | ******************************************************************************/ 19 | 20 | package sg.edu.nus.comp.nlp.ims.lexelt; 21 | 22 | /** 23 | * weka model information 24 | * 25 | * @author zhongzhi 26 | * 27 | */ 28 | public class CModelInfo { 29 | // model id 30 | public String lexelt; 31 | // weka model 32 | public Object model; 33 | // statistic 34 | public Object statistic; 35 | } 36 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/lexelt/CPOSFeatureSelector.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * IMS (It Makes Sense) -- NUS WSD System 3 | * Copyright (c) 2013 National University of Singapore. 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | * 18 | ******************************************************************************/ 19 | 20 | package sg.edu.nus.comp.nlp.ims.lexelt; 21 | 22 | import sg.edu.nus.comp.nlp.ims.feature.CPOSFeature; 23 | 24 | /** 25 | * POS tag feature selector. 26 | * 27 | * @author zhongzhi 28 | * 29 | */ 30 | public class CPOSFeatureSelector extends AListFeatureSelector { 31 | /** 32 | * constructor 33 | * 34 | * @param p_M2 35 | * threshold 36 | */ 37 | public CPOSFeatureSelector(int p_M2) { 38 | this.m_M2 = p_M2; 39 | this.m_FeatureName = CPOSFeature.class.getName(); 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/lexelt/package.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 |

6 | Package related to one instances set of the same lexelt. 7 |

8 | 9 | 10 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/util/CPair.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * IMS (It Makes Sense) -- NUS WSD System 3 | * Copyright (c) 2013 National University of Singapore. 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | * 18 | ******************************************************************************/ 19 | 20 | package sg.edu.nus.comp.nlp.ims.util; 21 | 22 | /** 23 | * pair template 24 | * @author zhongzhi 25 | * 26 | */ 27 | public class CPair { 28 | // first value 29 | protected F m_First; 30 | // second value 31 | protected S m_Second; 32 | 33 | /** 34 | * constructor 35 | * @param p_First first value 36 | * @param p_Second second value 37 | */ 38 | public CPair(F p_First, S p_Second) { 39 | m_First = p_First; 40 | m_Second = p_Second; 41 | } 42 | 43 | /** 44 | * get first value 45 | * @return first 46 | */ 47 | public F getFirst() { 48 | return m_First; 49 | } 50 | 51 | /** 52 | * get second value 53 | * @return second 54 | */ 55 | public S getSecond() { 56 | return m_Second; 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/util/ILemmatizer.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * IMS (It Makes Sense) -- NUS WSD System 3 | * Copyright (c) 2013 National University of Singapore. 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | * 18 | ******************************************************************************/ 19 | 20 | package sg.edu.nus.comp.nlp.ims.util; 21 | 22 | /** 23 | * lemmatizer interface. 24 | * 25 | * @author zhongzhi 26 | * 27 | */ 28 | public interface ILemmatizer { 29 | /** 30 | * lemmatize the input 31 | * @param p_Input input information 32 | * @return lemma 33 | */ 34 | public String lemmatize(String[] p_Input); 35 | 36 | /** 37 | * guess lexelt of input 38 | * @param p_Input input 39 | * @return lexelt 40 | */ 41 | public String guessLexelt(String[] p_Input); 42 | 43 | /** 44 | * get the lexelt of input 45 | * @param p_Input input 46 | * @return lexelt 47 | */ 48 | public String getLexelt(String[] p_Input); 49 | } 50 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/util/IPOSTagger.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * IMS (It Makes Sense) -- NUS WSD System 3 | * Copyright (c) 2013 National University of Singapore. 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | * 18 | ******************************************************************************/ 19 | 20 | package sg.edu.nus.comp.nlp.ims.util; 21 | 22 | /** 23 | * POS tagger interface. 24 | * 25 | * @author zhongzhi 26 | * 27 | */ 28 | public interface IPOSTagger { 29 | /** 30 | * tag a tokenized input sentence 31 | * 32 | * @param input 33 | * a tokenized sentence 34 | * @return sentence with tag for each token 35 | */ 36 | public String tag(String input); 37 | 38 | /** 39 | * get the tag of a tagged token 40 | * 41 | * @param input 42 | * a tagged token 43 | * @return tag 44 | */ 45 | public String getTag(String input); 46 | 47 | /** 48 | * get the original token of a tagged token 49 | * 50 | * @param input 51 | * a tagged token 52 | * @return original token 53 | */ 54 | public String getToken(String input); 55 | 56 | } 57 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/util/ISenseIndex.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * IMS (It Makes Sense) -- NUS WSD System 3 | * Copyright (c) 2013 National University of Singapore. 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | * 18 | ******************************************************************************/ 19 | 20 | package sg.edu.nus.comp.nlp.ims.util; 21 | 22 | /** 23 | * sense index interface. refer to the index.sense file in wordnet. 24 | * 25 | * @author zhongzhi 26 | * 27 | */ 28 | public interface ISenseIndex { 29 | /** 30 | * get the first sense of p_Lexelt 31 | * 32 | * @param p_Lexelt 33 | * lexelt id 34 | * @return first sense 35 | */ 36 | public String getFirstSense(String p_Lexelt); 37 | 38 | /** 39 | * get sense number of p_Sense 40 | * 41 | * @param p_Sense 42 | * sense 43 | * @return sense number 44 | */ 45 | public int getSenseNo(String p_Sense); 46 | 47 | } 48 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/util/ISentenceSplitter.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * IMS (It Makes Sense) -- NUS WSD System 3 | * Copyright (c) 2013 National University of Singapore. 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | * 18 | ******************************************************************************/ 19 | 20 | package sg.edu.nus.comp.nlp.ims.util; 21 | 22 | /** 23 | * sentence splitter interface. 24 | * 25 | * @author zhongzhi 26 | * 27 | */ 28 | public interface ISentenceSplitter { 29 | /** 30 | * split input into sentences 31 | * 32 | * @param input 33 | * input string 34 | * @return sentences 35 | */ 36 | public String[] split(String input); 37 | } 38 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/util/ITokenizer.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * IMS (It Makes Sense) -- NUS WSD System 3 | * Copyright (c) 2013 National University of Singapore. 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | * 18 | ******************************************************************************/ 19 | 20 | package sg.edu.nus.comp.nlp.ims.util; 21 | 22 | /** 23 | * sentence tokenizer interface. 24 | * 25 | * @author zhongzhi 26 | * 27 | */ 28 | public interface ITokenizer { 29 | 30 | /** 31 | * tokenize an input sentence into tokens 32 | * 33 | * @param input 34 | * input sentence 35 | * @return tokens 36 | */ 37 | public String[] tokenize(String input); 38 | } 39 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/main/java/sg/edu/nus/comp/nlp/ims/util/package.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 |

6 | Package related to utility data structures, algorithms and some external API used by multiple other packages. 7 |

8 | 9 | 10 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.ims-gpl/src/test/resources/plain/test.txt: -------------------------------------------------------------------------------- 1 | IMS (It Makes Sense) is a supervised English all-words word sense disambiguation (WSD) system. The flexible framework of IMS allows users to integrate different preprocessing tools, additional features, and different classifiers. By default, we use linear support vector machines as the classifier with multiple features. This implementation of IMS achieves state-of-the-art results on several SensEval and SemEval tasks. -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.twsi-gpl/.license-header.txt: -------------------------------------------------------------------------------- 1 | Copyright ${year} 2 | Ubiquitous Knowledge Processing (UKP) Lab 3 | Technische Universität Darmstadt 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.supervised.twsi-gpl/pom.xml: -------------------------------------------------------------------------------- 1 | 21 | 22 | 4.0.0 23 | 24 | de.tudarmstadt.ukp.dkpro.wsd 25 | de.tudarmstadt.ukp.dkpro.wsd-gpl 26 | ../de.tudarmstadt.ukp.dkpro.wsd-gpl 27 | 1.3.0-SNAPSHOT 28 | 29 | de.tudarmstadt.ukp.dkpro.wsd.supervised.twsi-gpl 30 | 31 | 32 | de.tudarmstadt.ukp.dkpro.wsd 33 | 34 | de.tudarmstadt.ukp.dkpro.wsd.si.twsi-gpl 35 | 36 | 37 | 38 | DKPro WSD GPL - TWSI disambiguation 39 | DKPro WSD GPL modules for the Turk Bootstrap Word Sense Inventory disambiguation system 40 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.testing/pom.xml: -------------------------------------------------------------------------------- 1 | 18 | 19 | 4.0.0 20 | 21 | de.tudarmstadt.ukp.dkpro.wsd-asl 22 | ../de.tudarmstadt.ukp.dkpro.wsd-asl 23 | de.tudarmstadt.ukp.dkpro.wsd 24 | 1.3.0-SNAPSHOT 25 | 26 | de.tudarmstadt.ukp.dkpro.wsd.testing 27 | 28 | 29 | 30 | maven-javadoc-plugin 31 | 32 | de.tudarmstadt.ukp.dkpro.wsd.testing 33 | 34 | 35 | 36 | 37 | DKPro WSD - Testing 38 | Data used in JUnit tests by various DKPro WSD modules 39 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.testing/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/testing/package-info.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | 19 | /** 20 | * This module contains shared resources for JUnit tests. The 21 | * package has no classes. 22 | */ 23 | package de.tudarmstadt.ukp.dkpro.wsd.testing; -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.testing/src/main/resources/masc/tell-v/tell-v.txt: -------------------------------------------------------------------------------- 1 | I am a canon builder," he told me. 2 | But I told the third person that I am only telling you once. 3 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.testing/src/main/resources/senseval/index.sense: -------------------------------------------------------------------------------- 1 | call_for%2:32:03:: 01063695 3 1 2 | call_for%2:32:04:: 00752764 1 24 3 | call_for%2:40:00:: 02305586 4 0 4 | call_for%2:42:00:: 02627934 2 21 5 | call%1:10:01:: 06272803 1 9 6 | call%1:10:02:: 07120524 3 3 7 | call%1:10:03:: 06796642 5 1 8 | call%1:10:04:: 07190693 9 0 9 | call%1:10:05:: 07192129 4 2 10 | call%1:10:06:: 07192511 8 0 11 | call%1:10:07:: 06582761 10 0 12 | call%1:10:08:: 07192661 7 0 13 | animal%1:03:00:: 00015388 1 67 14 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.testing/src/main/resources/senseval/semeval1aw.dtd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.testing/src/main/resources/senseval/semeval1aw.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Eggs 6 | looked 7 | like 8 | food 9 | . 10 | 11 | 12 | 13 | 14 | Hello 15 | . 16 | 17 | 18 | He 19 | is 20 | good 21 | . 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.testing/src/main/resources/senseval/semeval2aw.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Eggs 7 | looked 8 | up 9 | like 10 | food 11 | . 12 | 13 | 14 | 15 | 16 | Hello 17 | . 18 | 19 | 20 | He 21 | is 22 | good 23 | . 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.testing/src/main/resources/senseval/senseval2aw.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | He 6 | thought 7 | on 8 | the 9 | spur 10 | of 11 | the 12 | moment 13 | , 14 | as 15 | a 16 | matter 17 | of 18 | fact 19 | . 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.testing/src/main/resources/senseval/senseval2ls.key: -------------------------------------------------------------------------------- 1 | call.v call.1 call_for%2:32:03::/0.6 call_for%2:42:00::/0.4 !! this is a comment 2 | animal.n animal.1 animal%1:03:00:: U 3 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.testing/src/main/resources/senseval/senseval2ls.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Eggs call for food. 6 | 7 | 8 | 9 | 10 | 11 | Dave is an animal. 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.testing/src/main/resources/senseval/senseval2ls_lsr.key: -------------------------------------------------------------------------------- 1 | call.v call.1 art#933420|artistic_creation#933420|artistic_production#933420|---n/0.6 art#5638987|artistry#5638987|prowess#5638987|---n/0.4 !! this is a comment 2 | animal.n animal.1 bar#2937469|cake#2937469|---n U 3 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.testing/src/main/resources/senseval/senseval2ls_test.key: -------------------------------------------------------------------------------- 1 | call.v call.2 call_for%2:32:03::/0.0 call_for%2:42:00::/1.0 !! this is a comment 2 | animal.n animal.1 animal%1:03:00::/0.75 U/0.0 foo/0.5 3 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.testing/src/main/resources/webcage/webcage.dtd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.testing/src/main/resources/webcage/webcage0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Vor und während eines Krieges kommt es häufig zum Abbruch der diplomatischen Beziehungen. 5 | Nach dem Unwetter blieb uns nur eine Lösung: Abbruch der Zelte. 6 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.testing/src/main/resources/webcage/webcage1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Der Ladevorgang einer Internetseite kann in verschiedenen Webbrowsern mit der Escape-Taste unterbrochen werden. In den meisten Dialogfenstern von Windows-Anwendungen entspricht das Drücken der Escape-Taste einem Klick auf die Schaltfläche „Abbrechen“. Ein Druck auf die Escape-Taste bricht auch eine bereits begonnene Eingabe in einem Textfeld ab und stellt den Ursprungszustand wieder her. 6 | 7 | 8 | Im Augenblick sind vierzehn Kasinos 24 Stunden am Tag geöffnet, zu Lande oder auf dem Meer, in denen Spielergruppen - weitaus leiser, als man vermuten möchte - ohne Unterbrechung in fensterlosen Sälen unter gleißendem Neonlicht zocken. 9 | 10 | 11 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.wrapper-gpl/.license-header.txt: -------------------------------------------------------------------------------- 1 | Copyright ${year} 2 | Ubiquitous Knowledge Processing (UKP) Lab 3 | Technische Universität Darmstadt 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.wrapper-gpl/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/wrapper/Disambiguator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * This program is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program. If not, see . 18 | */ 19 | package de.tudarmstadt.ukp.dkpro.wsd.wrapper; 20 | 21 | import java.io.IOException; 22 | import java.util.List; 23 | 24 | /** 25 | * A disambiguator that returns senses for an input text 26 | * @author nico.erbs@gmail.com 27 | * 28 | */ 29 | public interface Disambiguator { 30 | 31 | /** 32 | * @param inputText The input text. 33 | * 34 | * @return 35 | * The list of string with the senses disambiguated in the input text. 36 | */ 37 | List disambiguate(String inputText) throws IOException; 38 | 39 | /** 40 | * @return The name of the disambiguator. 41 | */ 42 | String getName(); 43 | 44 | /** 45 | * @return Returns a string with the configuration details of this word sense disambiguator. 46 | */ 47 | String getConfigurationDetails(); 48 | 49 | } 50 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.wrapper-gpl/src/main/resources/stopwords/german_stopwords.txt: -------------------------------------------------------------------------------- 1 | " 2 | ' 3 | ? 4 | ! 5 | , 6 | ; 7 | : 8 | . 9 | + 10 | ( 11 | ) 12 | [ 13 | ] 14 | { 15 | } 16 | / 17 | \ 18 | aber 19 | als 20 | am 21 | an 22 | auch 23 | auf 24 | aus 25 | bei 26 | bin 27 | bis 28 | bist 29 | da 30 | dadurch 31 | daher 32 | darum 33 | das 34 | daß 35 | dass 36 | dein 37 | deine 38 | dem 39 | den 40 | der 41 | des 42 | dessen 43 | deshalb 44 | die 45 | dies 46 | dieser 47 | dieses 48 | doch 49 | dort 50 | du 51 | durch 52 | ein 53 | eine 54 | einem 55 | einen 56 | einer 57 | eines 58 | er 59 | es 60 | euer 61 | eure 62 | für 63 | hatte 64 | hatten 65 | hattest 66 | hattet 67 | hier hinter 68 | ich 69 | ihr 70 | ihre 71 | im 72 | in 73 | ist 74 | ja 75 | jede 76 | jedem 77 | jeden 78 | jeder 79 | jedes 80 | jener 81 | jenes 82 | jetzt 83 | kann 84 | kannst 85 | können 86 | könnt 87 | machen 88 | mein 89 | meine 90 | mit 91 | muß 92 | mußt 93 | musst 94 | müssen 95 | müßt 96 | nach 97 | nachdem 98 | nein 99 | nicht 100 | nun 101 | oder 102 | seid 103 | sein 104 | seine 105 | sich 106 | sie 107 | sind 108 | soll 109 | sollen 110 | sollst 111 | sollt 112 | sonst 113 | soweit 114 | sowie 115 | und 116 | unser 117 | unsere 118 | unter 119 | vom 120 | von 121 | vor 122 | wann 123 | warum 124 | was 125 | weiter 126 | weitere 127 | wenn 128 | wer 129 | werde 130 | werden 131 | werdet 132 | weshalb 133 | wie 134 | wieder 135 | wieso 136 | wir 137 | wird 138 | wirst 139 | wo 140 | woher 141 | wohin 142 | zu 143 | zum 144 | zur 145 | über -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.wrapper-gpl/src/main/resources/stopwords/punctuation.txt: -------------------------------------------------------------------------------- 1 | . 2 | , 3 | : 4 | ; 5 | ? 6 | ! -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.wrapper-gpl/src/main/resources/stopwords/stoplist_de.txt: -------------------------------------------------------------------------------- 1 | aber 2 | als 3 | am 4 | an 5 | auch 6 | auf 7 | aus 8 | bei 9 | bin 10 | bis 11 | bist 12 | da 13 | dadurch 14 | daher 15 | darum 16 | das 17 | daß 18 | dass 19 | dein 20 | deine 21 | dem 22 | den 23 | der 24 | des 25 | dessen 26 | deshalb 27 | die 28 | dies 29 | dieser 30 | dieses 31 | doch 32 | dort 33 | du 34 | durch 35 | ein 36 | eine 37 | einem 38 | einen 39 | einer 40 | eines 41 | er 42 | es 43 | euer 44 | eure 45 | für 46 | hatte 47 | hatten 48 | hattest 49 | hattet 50 | hier hinter 51 | ich 52 | ihr 53 | ihre 54 | im 55 | in 56 | ist 57 | ja 58 | jede 59 | jedem 60 | jeden 61 | jeder 62 | jedes 63 | jener 64 | jenes 65 | jetzt 66 | kann 67 | kannst 68 | können 69 | könnt 70 | machen 71 | mein 72 | meine 73 | mit 74 | muß 75 | mußt 76 | musst 77 | müssen 78 | müßt 79 | nach 80 | nachdem 81 | nein 82 | nicht 83 | nun 84 | oder 85 | seid 86 | sein 87 | seine 88 | sich 89 | sie 90 | sind 91 | soll 92 | sollen 93 | sollst 94 | sollt 95 | sonst 96 | soweit 97 | sowie 98 | und 99 | unser 100 | unsere 101 | unter 102 | vom 103 | von 104 | vor 105 | wann 106 | warum 107 | was 108 | weiter 109 | weitere 110 | wenn 111 | wer 112 | werde 113 | werden 114 | werdet 115 | weshalb 116 | wie 117 | wieder 118 | wieso 119 | wir 120 | wird 121 | wirst 122 | wo 123 | woher 124 | wohin 125 | zu 126 | zum 127 | zur 128 | über 129 | , 130 | . 131 | - 132 | • 133 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.wrapper-gpl/src/test/java/de/tudarmstadt/ukp/dkpro/wsd/wrapper/LinkDatabaseLinkMeasureDisambiguatorTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * This program is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program. If not, see . 18 | */ 19 | package de.tudarmstadt.ukp.dkpro.wsd.wrapper; 20 | 21 | import static org.junit.Assert.*; 22 | 23 | import java.io.IOException; 24 | import java.util.ArrayList; 25 | import java.util.List; 26 | 27 | import org.junit.Ignore; 28 | import org.junit.Test; 29 | 30 | public class LinkDatabaseLinkMeasureDisambiguatorTest { 31 | 32 | @Test 33 | @Ignore 34 | public void disambiguateTest() throws IOException { 35 | String input = "Please disambiguate this text."; 36 | 37 | Disambiguator disambiguator = new LinkDatabaseLinkMeasureDisambiguator(); 38 | List senses = disambiguator.disambiguate(input); 39 | System.out.println(senses); 40 | 41 | List goldSenses = new ArrayList(); 42 | goldSenses.add("word_sense_disambiguation"); 43 | 44 | assertEquals(goldSenses.size(), senses.size()); 45 | for(String sense : senses){ 46 | assertTrue(goldSenses.contains(sense)); 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.wrapper-gpl/src/test/java/de/tudarmstadt/ukp/dkpro/wsd/wrapper/LinkDatabaseMFSDisambiguatorTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * This program is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program. If not, see . 18 | */ 19 | package de.tudarmstadt.ukp.dkpro.wsd.wrapper; 20 | 21 | import static org.junit.Assert.*; 22 | 23 | import java.io.IOException; 24 | import java.util.ArrayList; 25 | import java.util.List; 26 | 27 | import org.junit.Ignore; 28 | import org.junit.Test; 29 | 30 | public class LinkDatabaseMFSDisambiguatorTest { 31 | 32 | @Test 33 | @Ignore 34 | public void disambiguateTest() throws IOException { 35 | String input = "Please disambiguate this text."; 36 | 37 | Disambiguator disambiguator = new LinkDatabaseMFSDisambiguator(); 38 | List senses = disambiguator.disambiguate(input); 39 | System.out.println(senses); 40 | 41 | List goldSenses = new ArrayList(); 42 | goldSenses.add("word_sense_disambiguation"); 43 | goldSenses.add("text_user_interface"); 44 | 45 | assertEquals(goldSenses.size(), senses.size()); 46 | for(String sense : senses){ 47 | assertTrue(goldSenses.contains(sense)); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.wsi/README.txt: -------------------------------------------------------------------------------- 1 | DKPro WSD WSI 2 | 3 | This package contains parts of the UKP system participating in the SemEval 2013 Task 11 4 | "Word Sense Induction & Disambiguation within an End-User Application". 5 | 6 | In particular it contains 7 | * a reader for the dataset (AMBIENTReader) 8 | * an annotator for marking the target words (WSIAnnotator), 9 | * a loadable sense inventory (JSONSenseInventory) which may use an induced inventory 10 | * the WSD step and a writer that produces input files for the official evaluation package (Semeval2013Task11Evaluator). 11 | 12 | What is missing to run the actual system is 13 | 14 | * the actual WSI algorithm, which was written in R. An older version of this algorithm is included 15 | as SimpleGraphClusterinInductionAlgorithm and I am planning to port back the improvements from the 16 | R implementation. 17 | 18 | * the co-occurrence database. The code to generate such a database will soon be available as part of the 19 | DKPro BigData package (https://code.google.com/p/dkpro-bigdata/). 20 | 21 | * the distributional thesaurus for lexical expansion. The code to generate the thesaurus is available 22 | from http://www.jobimtext.org. 23 | 24 | The documentation will be updated soon. If you want to make use of this module, do not hesitate to contact 25 | me (at hpzorn@gmail.com) 26 | 27 | August 2013, 28 | Hans-Peter Zorn -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.wsi/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/wsi/algorithm/SenseInductionAlgorithm.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | package de.tudarmstadt.ukp.dkpro.wsd.wsi.algorithm; 19 | 20 | import java.util.Collection; 21 | 22 | import de.tudarmstadt.ukp.dkpro.wsd.WSDException; 23 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventory; 24 | 25 | public interface SenseInductionAlgorithm 26 | { 27 | /** 28 | * Starts a batch induction 29 | * 30 | * @param targetWords 31 | */ 32 | public SenseInventory induce(Collection targetWords); 33 | 34 | /** 35 | * @param term 36 | * @throws WSDException 37 | */ 38 | public void induceSenses(String term) 39 | throws WSDException; 40 | 41 | public SenseInventory getSenseInventory(); 42 | } -------------------------------------------------------------------------------- /de.tudarmstadt.ukp.dkpro.wsd.wsi/src/main/java/de/tudarmstadt/ukp/dkpro/wsd/wsi/algorithm/WSIAlgorithmBase.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2017 3 | * Ubiquitous Knowledge Processing (UKP) Lab 4 | * Technische Universität Darmstadt 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | ******************************************************************************/ 18 | package de.tudarmstadt.ukp.dkpro.wsd.wsi.algorithm; 19 | 20 | import de.tudarmstadt.ukp.dkpro.wsd.si.SenseInventory; 21 | import de.tudarmstadt.ukp.dkpro.wsd.wsi.si.InducedSenseInventory; 22 | 23 | public abstract class WSIAlgorithmBase 24 | implements SenseInductionAlgorithm 25 | { 26 | 27 | protected InducedSenseInventory senseInventory; 28 | 29 | public WSIAlgorithmBase() 30 | { 31 | super(); 32 | } 33 | 34 | @Override 35 | public SenseInventory getSenseInventory() 36 | { 37 | return senseInventory; 38 | } 39 | 40 | } --------------------------------------------------------------------------------