├── .asf.yaml ├── .gitattributes ├── .github ├── CONTRIBUTING.md ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yml └── workflows │ ├── license.yml │ ├── maven.yml │ ├── publish-snapshots.yml │ └── shell-tests.yml ├── .gitignore ├── .mvn └── wrapper │ └── maven-wrapper.properties ├── LICENSE ├── NOTICE ├── README.md ├── checkstyle.xml ├── dev └── Snowball-Stemmer.md ├── mvnw ├── mvnw.cmd ├── opennlp-distr ├── README_FOOTER.html ├── README_HEADER.html ├── pom.xml └── src │ ├── main │ ├── assembly │ │ ├── bin.xml │ │ └── src.xml │ ├── bin │ │ ├── opennlp │ │ └── opennlp.bat │ ├── docker │ │ └── Dockerfile │ ├── readme │ │ ├── LICENSE │ │ └── NOTICE │ └── resources │ │ └── log4j2.xml │ └── test │ ├── ps │ └── test_opennlp.Tests.ps1 │ └── sh │ └── test_opennlp.bats ├── opennlp-dl-gpu ├── README.md └── pom.xml ├── opennlp-dl ├── README.md ├── pom.xml └── src │ ├── main │ └── java │ │ └── opennlp │ │ └── dl │ │ ├── AbstractDL.java │ │ ├── InferenceOptions.java │ │ ├── SpanEnd.java │ │ ├── Tokens.java │ │ ├── doccat │ │ ├── DocumentCategorizerConfig.java │ │ ├── DocumentCategorizerDL.java │ │ └── scoring │ │ │ ├── AverageClassificationScoringStrategy.java │ │ │ └── ClassificationScoringStrategy.java │ │ ├── namefinder │ │ └── NameFinderDL.java │ │ └── vectors │ │ └── SentenceVectorsDL.java │ └── test │ └── java │ └── opennlp │ └── dl │ ├── AbstractDLTest.java │ ├── doccat │ ├── DocumentCategorizerConfigTest.java │ ├── DocumentCategorizerDLEval.java │ └── scoring │ │ └── AverageClassificationScoringStrategyTest.java │ ├── namefinder │ └── NameFinderDLEval.java │ └── vectors │ └── SentenceVectorsDLEval.java ├── opennlp-docs ├── pom.xml └── src │ ├── docbkx │ ├── chunker.xml │ ├── cli.xml │ ├── coref.xml │ ├── corpora.xml │ ├── css │ │ └── opennlp-docs.css │ ├── doccat.xml │ ├── evaltest.xml │ ├── extension.xml │ ├── images │ │ ├── brat.png │ │ ├── opennlp-logo.png │ │ └── parsetree1.png │ ├── introduction.xml │ ├── langdetect.xml │ ├── lemmatizer.xml │ ├── machine-learning.xml │ ├── model-loading.xml │ ├── morfologik-addon.xml │ ├── namefinder.xml │ ├── opennlp.xml │ ├── parser.xml │ ├── postagger.xml │ ├── sentdetect.xml │ ├── tokenizer.xml │ └── uima-integration.xml │ └── main │ └── resources │ └── xsl │ └── html.xsl ├── opennlp-morfologik-addon ├── bin │ ├── morfologik-addon │ └── morfologik-addon.bat ├── pom.xml └── src │ ├── main │ ├── bin │ │ ├── morfologik-addon │ │ └── morfologik-addon.bat │ └── java │ │ └── opennlp │ │ └── morfologik │ │ ├── builder │ │ └── MorfologikDictionaryBuilder.java │ │ ├── cmdline │ │ ├── CLI.java │ │ └── builder │ │ │ ├── MorfologikDictionaryBuilderParams.java │ │ │ ├── MorfologikDictionaryBuilderTool.java │ │ │ ├── XMLDictionaryToTableParams.java │ │ │ └── XMLDictionaryToTableTool.java │ │ ├── lemmatizer │ │ └── MorfologikLemmatizer.java │ │ ├── tagdict │ │ ├── MorfologikPOSTaggerFactory.java │ │ └── MorfologikTagDictionary.java │ │ └── util │ │ └── MorfologikUtil.java │ └── test │ ├── java │ └── opennlp │ │ └── morfologik │ │ ├── AbstractMorfologikTest.java │ │ ├── builder │ │ └── MorfologikDictionaryBuilderTest.java │ │ ├── lemmatizer │ │ └── MorfologikLemmatizerTest.java │ │ └── tagdict │ │ ├── MorfologikPOSTaggerFactoryTest.java │ │ └── MorfologikTagDictionaryTest.java │ └── resources │ ├── AnnotatedSentences.txt │ ├── dictionaryWithLemma.dict │ ├── dictionaryWithLemma.info │ └── dictionaryWithLemma.txt ├── opennlp-tools-models ├── pom.xml └── src │ ├── main │ └── java │ │ └── opennlp │ │ └── tools │ │ └── models │ │ ├── AbstractClassPathModelFinder.java │ │ ├── ClassPathLoaderException.java │ │ ├── ClassPathModel.java │ │ ├── ClassPathModelEntry.java │ │ ├── ClassPathModelFinder.java │ │ ├── ClassPathModelLoader.java │ │ ├── ClassPathModelProvider.java │ │ ├── DefaultClassPathModelProvider.java │ │ ├── classgraph │ │ └── ClassgraphModelFinder.java │ │ └── simple │ │ └── SimpleClassPathModelFinder.java │ └── test │ └── java │ └── opennlp │ └── tools │ └── models │ ├── AbstractClassPathFinderTest.java │ ├── AbstractClassPathModelTest.java │ ├── AbstractModelLoaderTest.java │ ├── AbstractModelUsageTest.java │ ├── ClassPathModelLoaderTest.java │ ├── DefaultClassPathModelProviderTest.java │ ├── classgraph │ ├── ClassgraphModelFinderTest.java │ ├── ClassgraphModelLoaderTest.java │ └── ClassgraphModelUsageTest.java │ └── simple │ ├── SimpleClassPathModelFinderTest.java │ ├── SimpleModelLoaderTest.java │ └── SimpleModelUsageTest.java ├── opennlp-tools ├── bin │ ├── opennlp │ └── opennlp.bat ├── lang │ ├── de │ │ ├── abb_DE.xml │ │ └── namefinder │ │ │ ├── de-namefinder.xml │ │ │ └── fg-conll03-deu.xml │ ├── en │ │ ├── namefinder │ │ │ └── en-namefinder.xml │ │ ├── parser │ │ │ └── en-head_rules │ │ ├── postag │ │ │ └── en-tagdict.xml │ │ └── tokenizer │ │ │ └── en-detokenizer.xml │ ├── es │ │ ├── abb_ES.xml │ │ └── parser │ │ │ └── es-head-rules │ ├── fr │ │ ├── abb_FR.xml │ │ └── tokenizer │ │ │ └── fr-detokenizer.xml │ ├── ga │ │ ├── abb_GA.xml │ │ └── tokenizer │ │ │ └── ga-detokenizer.xml │ ├── general │ │ └── tokenizer │ │ │ └── special_char_dict.xml │ ├── it │ │ └── abb_IT.xml │ ├── ml │ │ ├── MaxentQNTrainerParams.txt │ │ ├── MaxentTrainerParams.txt │ │ ├── NaiveBayesTrainerParams.txt │ │ ├── PerceptronSequenceTrainerParams.txt │ │ └── PerceptronTrainerParams.txt │ ├── nl │ │ └── abb_NL.xml │ ├── pl │ │ └── abb_PL.xml │ └── pt │ │ ├── abb_PT.xml │ │ └── tokenizer │ │ └── pt-detokenizer.xml ├── pom.xml └── src │ ├── jmh │ └── java │ │ └── opennlp │ │ └── tools │ │ └── util │ │ └── jvm │ │ ├── BenchmarkRunner.java │ │ ├── StringDeduplicationBenchmark.java │ │ ├── StringListBenchmark.java │ │ └── jmh │ │ └── ExecutionPlan.java │ ├── main │ ├── java │ │ └── opennlp │ │ │ └── tools │ │ │ ├── chunker │ │ │ ├── ChunkSample.java │ │ │ ├── ChunkSampleSequenceStream.java │ │ │ ├── ChunkSampleStream.java │ │ │ ├── Chunker.java │ │ │ ├── ChunkerContextGenerator.java │ │ │ ├── ChunkerCrossValidator.java │ │ │ ├── ChunkerEvaluationMonitor.java │ │ │ ├── ChunkerEvaluator.java │ │ │ ├── ChunkerEventStream.java │ │ │ ├── ChunkerFactory.java │ │ │ ├── ChunkerME.java │ │ │ ├── ChunkerModel.java │ │ │ ├── DefaultChunkerContextGenerator.java │ │ │ ├── DefaultChunkerSequenceValidator.java │ │ │ ├── ThreadSafeChunkerME.java │ │ │ └── package-info.java │ │ │ ├── cmdline │ │ │ ├── AbstractConverterTool.java │ │ │ ├── AbstractCrossValidatorTool.java │ │ │ ├── AbstractEvaluatorTool.java │ │ │ ├── AbstractTrainerTool.java │ │ │ ├── AbstractTypedParamTool.java │ │ │ ├── ArgumentParser.java │ │ │ ├── BasicCmdLineTool.java │ │ │ ├── CLI.java │ │ │ ├── CmdLineTool.java │ │ │ ├── CmdLineUtil.java │ │ │ ├── DetailedFMeasureListener.java │ │ │ ├── EvaluationErrorPrinter.java │ │ │ ├── FineGrainedReportListener.java │ │ │ ├── GenerateManualTool.java │ │ │ ├── ModelLoader.java │ │ │ ├── ObjectStreamFactory.java │ │ │ ├── PerformanceMonitor.java │ │ │ ├── StreamFactoryRegistry.java │ │ │ ├── SystemInputStreamFactory.java │ │ │ ├── TerminateToolException.java │ │ │ ├── TypedCmdLineTool.java │ │ │ ├── chunker │ │ │ │ ├── ChunkEvaluationErrorListener.java │ │ │ │ ├── ChunkerConverterTool.java │ │ │ │ ├── ChunkerCrossValidatorTool.java │ │ │ │ ├── ChunkerDetailedFMeasureListener.java │ │ │ │ ├── ChunkerEvaluatorTool.java │ │ │ │ ├── ChunkerMETool.java │ │ │ │ ├── ChunkerModelLoader.java │ │ │ │ ├── ChunkerTrainerTool.java │ │ │ │ └── TrainingParams.java │ │ │ ├── dictionary │ │ │ │ ├── DictionaryBuilderParams.java │ │ │ │ └── DictionaryBuilderTool.java │ │ │ ├── doccat │ │ │ │ ├── DoccatConverterTool.java │ │ │ │ ├── DoccatCrossValidatorTool.java │ │ │ │ ├── DoccatEvaluationErrorListener.java │ │ │ │ ├── DoccatEvaluatorTool.java │ │ │ │ ├── DoccatFineGrainedReportListener.java │ │ │ │ ├── DoccatModelLoader.java │ │ │ │ ├── DoccatTool.java │ │ │ │ ├── DoccatTrainerTool.java │ │ │ │ └── TrainingParams.java │ │ │ ├── entitylinker │ │ │ │ └── EntityLinkerTool.java │ │ │ ├── langdetect │ │ │ │ ├── LanguageDetectorConverterTool.java │ │ │ │ ├── LanguageDetectorCrossValidatorTool.java │ │ │ │ ├── LanguageDetectorEvaluationErrorListener.java │ │ │ │ ├── LanguageDetectorEvaluatorTool.java │ │ │ │ ├── LanguageDetectorFineGrainedReportListener.java │ │ │ │ ├── LanguageDetectorModelLoader.java │ │ │ │ ├── LanguageDetectorTool.java │ │ │ │ ├── LanguageDetectorTrainerTool.java │ │ │ │ └── TrainingParams.java │ │ │ ├── languagemodel │ │ │ │ └── NGramLanguageModelTool.java │ │ │ ├── lemmatizer │ │ │ │ ├── LemmaEvaluationErrorListener.java │ │ │ │ ├── LemmatizerEvaluatorTool.java │ │ │ │ ├── LemmatizerFineGrainedReportListener.java │ │ │ │ ├── LemmatizerMETool.java │ │ │ │ ├── LemmatizerModelLoader.java │ │ │ │ ├── LemmatizerTrainerTool.java │ │ │ │ ├── TrainingParams.java │ │ │ │ └── package-info.java │ │ │ ├── namefind │ │ │ │ ├── CensusDictionaryCreatorTool.java │ │ │ │ ├── NameEvaluationErrorListener.java │ │ │ │ ├── NameSampleCountersStream.java │ │ │ │ ├── TokenNameFinderConverterTool.java │ │ │ │ ├── TokenNameFinderCrossValidatorTool.java │ │ │ │ ├── TokenNameFinderDetailedFMeasureListener.java │ │ │ │ ├── TokenNameFinderEvaluatorTool.java │ │ │ │ ├── TokenNameFinderFineGrainedReportListener.java │ │ │ │ ├── TokenNameFinderModelLoader.java │ │ │ │ ├── TokenNameFinderTool.java │ │ │ │ ├── TokenNameFinderTrainerTool.java │ │ │ │ └── TrainingParams.java │ │ │ ├── params │ │ │ │ ├── BasicFormatParams.java │ │ │ │ ├── BasicTrainingParams.java │ │ │ │ ├── CVParams.java │ │ │ │ ├── DetokenizerParameter.java │ │ │ │ ├── EncodingParameter.java │ │ │ │ ├── EvaluatorParams.java │ │ │ │ ├── FineGrainedEvaluatorParams.java │ │ │ │ ├── LanguageParams.java │ │ │ │ └── TrainingToolParams.java │ │ │ ├── parser │ │ │ │ ├── BuildModelUpdaterTool.java │ │ │ │ ├── CheckModelUpdaterTool.java │ │ │ │ ├── ModelUpdaterTool.java │ │ │ │ ├── ParserConverterTool.java │ │ │ │ ├── ParserEvaluatorTool.java │ │ │ │ ├── ParserModelLoader.java │ │ │ │ ├── ParserTool.java │ │ │ │ ├── ParserTrainerTool.java │ │ │ │ ├── TaggerModelReplacerTool.java │ │ │ │ └── TrainingParams.java │ │ │ ├── postag │ │ │ │ ├── POSEvaluationErrorListener.java │ │ │ │ ├── POSModelLoader.java │ │ │ │ ├── POSTaggerConverterTool.java │ │ │ │ ├── POSTaggerCrossValidatorTool.java │ │ │ │ ├── POSTaggerEvaluatorTool.java │ │ │ │ ├── POSTaggerFineGrainedReportListener.java │ │ │ │ ├── POSTaggerTool.java │ │ │ │ ├── POSTaggerTrainerTool.java │ │ │ │ └── TrainingParams.java │ │ │ ├── sentdetect │ │ │ │ ├── SentenceDetectorConverterTool.java │ │ │ │ ├── SentenceDetectorCrossValidatorTool.java │ │ │ │ ├── SentenceDetectorEvaluatorTool.java │ │ │ │ ├── SentenceDetectorTool.java │ │ │ │ ├── SentenceDetectorTrainerTool.java │ │ │ │ ├── SentenceEvaluationErrorListener.java │ │ │ │ ├── SentenceModelLoader.java │ │ │ │ └── TrainingParams.java │ │ │ └── tokenizer │ │ │ │ ├── CommandLineTokenizer.java │ │ │ │ ├── DetokenEvaluationErrorListener.java │ │ │ │ ├── DetokenizationDictionaryLoader.java │ │ │ │ ├── DictionaryDetokenizerTool.java │ │ │ │ ├── SimpleTokenizerTool.java │ │ │ │ ├── TokenEvaluationErrorListener.java │ │ │ │ ├── TokenizerConverterTool.java │ │ │ │ ├── TokenizerCrossValidatorTool.java │ │ │ │ ├── TokenizerMEEvaluatorTool.java │ │ │ │ ├── TokenizerMETool.java │ │ │ │ ├── TokenizerModelLoader.java │ │ │ │ ├── TokenizerTrainerTool.java │ │ │ │ └── TrainingParams.java │ │ │ ├── commons │ │ │ ├── Internal.java │ │ │ ├── Sample.java │ │ │ ├── ThreadSafe.java │ │ │ ├── Trainer.java │ │ │ └── package-info.java │ │ │ ├── dictionary │ │ │ ├── Dictionary.java │ │ │ ├── Index.java │ │ │ ├── package-info.java │ │ │ └── serializer │ │ │ │ ├── Attributes.java │ │ │ │ ├── DictionaryEntryPersistor.java │ │ │ │ ├── Entry.java │ │ │ │ └── EntryInserter.java │ │ │ ├── doccat │ │ │ ├── BagOfWordsFeatureGenerator.java │ │ │ ├── DoccatCrossValidator.java │ │ │ ├── DoccatEvaluationMonitor.java │ │ │ ├── DoccatFactory.java │ │ │ ├── DoccatModel.java │ │ │ ├── DocumentCategorizer.java │ │ │ ├── DocumentCategorizerContextGenerator.java │ │ │ ├── DocumentCategorizerEvaluator.java │ │ │ ├── DocumentCategorizerEventStream.java │ │ │ ├── DocumentCategorizerME.java │ │ │ ├── DocumentSample.java │ │ │ ├── DocumentSampleStream.java │ │ │ ├── FeatureGenerator.java │ │ │ ├── NGramFeatureGenerator.java │ │ │ └── package-info.java │ │ │ ├── entitylinker │ │ │ ├── BaseLink.java │ │ │ ├── EntityLinker.java │ │ │ ├── EntityLinkerFactory.java │ │ │ ├── EntityLinkerProperties.java │ │ │ ├── LinkedSpan.java │ │ │ └── package-info.java │ │ │ ├── formats │ │ │ ├── AbstractSampleStreamFactory.java │ │ │ ├── BioNLP2004NameSampleStream.java │ │ │ ├── BioNLP2004NameSampleStreamFactory.java │ │ │ ├── ChunkerSampleStreamFactory.java │ │ │ ├── Conll02NameSampleStream.java │ │ │ ├── Conll02NameSampleStreamFactory.java │ │ │ ├── Conll03NameSampleStream.java │ │ │ ├── Conll03NameSampleStreamFactory.java │ │ │ ├── ConllXPOSSampleStream.java │ │ │ ├── ConllXPOSSampleStreamFactory.java │ │ │ ├── ConllXSentenceSampleStreamFactory.java │ │ │ ├── ConllXTokenSampleStreamFactory.java │ │ │ ├── DetokenizerSampleStreamFactory.java │ │ │ ├── DirectorySampleStream.java │ │ │ ├── DocumentSampleStreamFactory.java │ │ │ ├── EvalitaNameSampleStream.java │ │ │ ├── EvalitaNameSampleStreamFactory.java │ │ │ ├── LanguageDetectorSampleStreamFactory.java │ │ │ ├── LanguageSampleStreamFactory.java │ │ │ ├── LemmatizerSampleStreamFactory.java │ │ │ ├── NameFinderCensus90NameStream.java │ │ │ ├── NameSampleDataStreamFactory.java │ │ │ ├── ParseSampleStreamFactory.java │ │ │ ├── SentenceSampleStreamFactory.java │ │ │ ├── TokenSampleStreamFactory.java │ │ │ ├── TwentyNewsgroupSampleStream.java │ │ │ ├── TwentyNewsgroupSampleStreamFactory.java │ │ │ ├── WordTagSampleStreamFactory.java │ │ │ ├── ad │ │ │ │ ├── ADChunkSampleStream.java │ │ │ │ ├── ADChunkSampleStreamFactory.java │ │ │ │ ├── ADNameSampleStream.java │ │ │ │ ├── ADNameSampleStreamFactory.java │ │ │ │ ├── ADPOSSampleStream.java │ │ │ │ ├── ADPOSSampleStreamFactory.java │ │ │ │ ├── ADSentenceSampleStream.java │ │ │ │ ├── ADSentenceSampleStreamFactory.java │ │ │ │ ├── ADSentenceStream.java │ │ │ │ ├── ADTokenSampleStreamFactory.java │ │ │ │ ├── PortugueseContractionUtility.java │ │ │ │ └── package-info.java │ │ │ ├── brat │ │ │ │ ├── AnnotationConfiguration.java │ │ │ │ ├── AnnotatorNoteAnnotation.java │ │ │ │ ├── AttributeAnnotation.java │ │ │ │ ├── BratAnnotation.java │ │ │ │ ├── BratAnnotationStream.java │ │ │ │ ├── BratDocument.java │ │ │ │ ├── BratDocumentParser.java │ │ │ │ ├── BratDocumentStream.java │ │ │ │ ├── BratNameSampleStream.java │ │ │ │ ├── BratNameSampleStreamFactory.java │ │ │ │ ├── EventAnnotation.java │ │ │ │ ├── RelationAnnotation.java │ │ │ │ ├── SegmenterObjectStream.java │ │ │ │ ├── SpanAnnotation.java │ │ │ │ └── package-info.java │ │ │ ├── conllu │ │ │ │ ├── ConlluLemmaSampleStream.java │ │ │ │ ├── ConlluLemmaSampleStreamFactory.java │ │ │ │ ├── ConlluPOSSampleStream.java │ │ │ │ ├── ConlluPOSSampleStreamFactory.java │ │ │ │ ├── ConlluSentence.java │ │ │ │ ├── ConlluSentenceSampleStream.java │ │ │ │ ├── ConlluSentenceSampleStreamFactory.java │ │ │ │ ├── ConlluStream.java │ │ │ │ ├── ConlluTagset.java │ │ │ │ ├── ConlluTokenSampleStream.java │ │ │ │ ├── ConlluTokenSampleStreamFactory.java │ │ │ │ ├── ConlluWordLine.java │ │ │ │ └── package-info.java │ │ │ ├── convert │ │ │ │ ├── AbstractToSentenceSampleStream.java │ │ │ │ ├── FileToByteArraySampleStream.java │ │ │ │ ├── FileToStringSampleStream.java │ │ │ │ ├── NameToSentenceSampleStream.java │ │ │ │ ├── NameToSentenceSampleStreamFactory.java │ │ │ │ ├── NameToTokenSampleStream.java │ │ │ │ ├── NameToTokenSampleStreamFactory.java │ │ │ │ ├── POSToSentenceSampleStream.java │ │ │ │ ├── POSToSentenceSampleStreamFactory.java │ │ │ │ ├── POSToTokenSampleStream.java │ │ │ │ ├── POSToTokenSampleStreamFactory.java │ │ │ │ ├── ParseToPOSSampleStream.java │ │ │ │ ├── ParseToPOSSampleStreamFactory.java │ │ │ │ ├── ParseToSentenceSampleStreamFactory.java │ │ │ │ └── ParseToTokenSampleStreamFactory.java │ │ │ ├── frenchtreebank │ │ │ │ ├── ConstitDocumentHandler.java │ │ │ │ ├── ConstitParseSampleStream.java │ │ │ │ ├── ConstitParseSampleStreamFactory.java │ │ │ │ └── package-info.java │ │ │ ├── irishsentencebank │ │ │ │ ├── IrishSentenceBankDocument.java │ │ │ │ ├── IrishSentenceBankSentenceStream.java │ │ │ │ ├── IrishSentenceBankSentenceStreamFactory.java │ │ │ │ ├── IrishSentenceBankTokenSampleStream.java │ │ │ │ ├── IrishSentenceBankTokenSampleStreamFactory.java │ │ │ │ └── package-info.java │ │ │ ├── leipzig │ │ │ │ ├── LeipzigLanguageSampleStream.java │ │ │ │ ├── LeipzigLanguageSampleStreamFactory.java │ │ │ │ ├── SampleShuffleStream.java │ │ │ │ ├── SampleSkipStream.java │ │ │ │ └── package-info.java │ │ │ ├── letsmt │ │ │ │ ├── DetokenizeSentenceSampleStream.java │ │ │ │ ├── LetsmtDocument.java │ │ │ │ ├── LetsmtSentenceStream.java │ │ │ │ ├── LetsmtSentenceStreamFactory.java │ │ │ │ └── package-info.java │ │ │ ├── masc │ │ │ │ ├── Masc.java │ │ │ │ ├── MascDocument.java │ │ │ │ ├── MascDocumentStream.java │ │ │ │ ├── MascNamedEntityParser.java │ │ │ │ ├── MascNamedEntitySampleStream.java │ │ │ │ ├── MascNamedEntitySampleStreamFactory.java │ │ │ │ ├── MascPOSSampleStream.java │ │ │ │ ├── MascPOSSampleStreamFactory.java │ │ │ │ ├── MascPennTagParser.java │ │ │ │ ├── MascSentence.java │ │ │ │ ├── MascSentenceParser.java │ │ │ │ ├── MascSentenceSampleStream.java │ │ │ │ ├── MascSentenceSampleStreamFactory.java │ │ │ │ ├── MascToken.java │ │ │ │ ├── MascTokenSampleStream.java │ │ │ │ ├── MascTokenSampleStreamFactory.java │ │ │ │ ├── MascWord.java │ │ │ │ ├── MascWordParser.java │ │ │ │ └── package-info.java │ │ │ ├── moses │ │ │ │ ├── MosesSentenceSampleStream.java │ │ │ │ └── MosesSentenceSampleStreamFactory.java │ │ │ ├── muc │ │ │ │ ├── DocumentSplitterStream.java │ │ │ │ ├── Muc6NameSampleStreamFactory.java │ │ │ │ ├── MucElementNames.java │ │ │ │ ├── MucNameContentHandler.java │ │ │ │ ├── MucNameSampleStream.java │ │ │ │ ├── SgmlParser.java │ │ │ │ └── package-info.java │ │ │ ├── nkjp │ │ │ │ ├── NKJPSegmentationDocument.java │ │ │ │ ├── NKJPSentenceSampleStream.java │ │ │ │ ├── NKJPSentenceSampleStreamFactory.java │ │ │ │ ├── NKJPTextDocument.java │ │ │ │ └── package-info.java │ │ │ ├── ontonotes │ │ │ │ ├── DocumentToLineStream.java │ │ │ │ ├── OntoNotesFormatParameters.java │ │ │ │ ├── OntoNotesNameSampleStream.java │ │ │ │ ├── OntoNotesNameSampleStreamFactory.java │ │ │ │ ├── OntoNotesPOSSampleStreamFactory.java │ │ │ │ ├── OntoNotesParseSampleStream.java │ │ │ │ ├── OntoNotesParseSampleStreamFactory.java │ │ │ │ └── package-info.java │ │ │ └── package-info.java │ │ │ ├── langdetect │ │ │ ├── DefaultLanguageDetectorContextGenerator.java │ │ │ ├── Language.java │ │ │ ├── LanguageDetector.java │ │ │ ├── LanguageDetectorConfig.java │ │ │ ├── LanguageDetectorContextGenerator.java │ │ │ ├── LanguageDetectorCrossValidator.java │ │ │ ├── LanguageDetectorEvaluationMonitor.java │ │ │ ├── LanguageDetectorEvaluator.java │ │ │ ├── LanguageDetectorEventStream.java │ │ │ ├── LanguageDetectorFactory.java │ │ │ ├── LanguageDetectorME.java │ │ │ ├── LanguageDetectorModel.java │ │ │ ├── LanguageDetectorSampleStream.java │ │ │ ├── LanguageSample.java │ │ │ ├── ProbingLanguageDetectionResult.java │ │ │ ├── ThreadSafeLanguageDetectorME.java │ │ │ └── package-info.java │ │ │ ├── languagemodel │ │ │ ├── LanguageModel.java │ │ │ ├── NGramLanguageModel.java │ │ │ └── package-info.java │ │ │ ├── lemmatizer │ │ │ ├── DefaultLemmatizerContextGenerator.java │ │ │ ├── DefaultLemmatizerSequenceValidator.java │ │ │ ├── DictionaryLemmatizer.java │ │ │ ├── LemmaSample.java │ │ │ ├── LemmaSampleEventStream.java │ │ │ ├── LemmaSampleSequenceStream.java │ │ │ ├── LemmaSampleStream.java │ │ │ ├── Lemmatizer.java │ │ │ ├── LemmatizerContextGenerator.java │ │ │ ├── LemmatizerEvaluationMonitor.java │ │ │ ├── LemmatizerEvaluator.java │ │ │ ├── LemmatizerFactory.java │ │ │ ├── LemmatizerME.java │ │ │ ├── LemmatizerModel.java │ │ │ ├── ThreadSafeLemmatizerME.java │ │ │ └── package-info.java │ │ │ ├── log │ │ │ ├── LogPrintStream.java │ │ │ └── package-info.java │ │ │ ├── ml │ │ │ ├── AbstractEventModelSequenceTrainer.java │ │ │ ├── AbstractEventTrainer.java │ │ │ ├── AbstractMLModelWriter.java │ │ │ ├── AbstractTrainer.java │ │ │ ├── ArrayMath.java │ │ │ ├── BeamSearch.java │ │ │ ├── EventModelSequenceTrainer.java │ │ │ ├── EventTrainer.java │ │ │ ├── SequenceTrainer.java │ │ │ ├── TrainerFactory.java │ │ │ ├── maxent │ │ │ │ ├── AllEnglishAffixes.txt │ │ │ │ ├── BasicContextGenerator.java │ │ │ │ ├── ContextGenerator.java │ │ │ │ ├── DataStream.java │ │ │ │ ├── GISFormat │ │ │ │ ├── GISModel.java │ │ │ │ ├── GISTrainer.java │ │ │ │ ├── RealBasicEventStream.java │ │ │ │ ├── io │ │ │ │ │ ├── BinaryGISModelReader.java │ │ │ │ │ ├── BinaryGISModelWriter.java │ │ │ │ │ ├── BinaryQNModelReader.java │ │ │ │ │ ├── BinaryQNModelWriter.java │ │ │ │ │ ├── GISModelReader.java │ │ │ │ │ ├── GISModelWriter.java │ │ │ │ │ ├── QNModelReader.java │ │ │ │ │ ├── QNModelWriter.java │ │ │ │ │ ├── package-info.java │ │ │ │ │ └── package.html │ │ │ │ ├── package-info.java │ │ │ │ ├── package.html │ │ │ │ └── quasinewton │ │ │ │ │ ├── Function.java │ │ │ │ │ ├── LineSearch.java │ │ │ │ │ ├── NegLogLikelihood.java │ │ │ │ │ ├── ParallelNegLogLikelihood.java │ │ │ │ │ ├── QNMinimizer.java │ │ │ │ │ ├── QNModel.java │ │ │ │ │ ├── QNTrainer.java │ │ │ │ │ └── package-info.java │ │ │ ├── model │ │ │ │ ├── AbstractDataIndexer.java │ │ │ │ ├── AbstractModel.java │ │ │ │ ├── AbstractModelReader.java │ │ │ │ ├── AbstractModelWriter.java │ │ │ │ ├── BinaryFileDataReader.java │ │ │ │ ├── ChecksumEventStream.java │ │ │ │ ├── ComparableEvent.java │ │ │ │ ├── ComparablePredicate.java │ │ │ │ ├── Context.java │ │ │ │ ├── DataIndexer.java │ │ │ │ ├── DataIndexerFactory.java │ │ │ │ ├── DataReader.java │ │ │ │ ├── DynamicEvalParameters.java │ │ │ │ ├── EvalParameters.java │ │ │ │ ├── Event.java │ │ │ │ ├── FileEventStream.java │ │ │ │ ├── GenericModelReader.java │ │ │ │ ├── GenericModelWriter.java │ │ │ │ ├── MaxentModel.java │ │ │ │ ├── ModelParameterChunker.java │ │ │ │ ├── MutableContext.java │ │ │ │ ├── ObjectDataReader.java │ │ │ │ ├── OnePassDataIndexer.java │ │ │ │ ├── OnePassRealValueDataIndexer.java │ │ │ │ ├── PlainTextFileDataReader.java │ │ │ │ ├── Prior.java │ │ │ │ ├── RealValueFileEventStream.java │ │ │ │ ├── Sequence.java │ │ │ │ ├── SequenceClassificationModel.java │ │ │ │ ├── SequenceStream.java │ │ │ │ ├── SequenceStreamEventStream.java │ │ │ │ ├── TwoPassDataIndexer.java │ │ │ │ ├── UniformPrior.java │ │ │ │ └── package-info.java │ │ │ ├── naivebayes │ │ │ │ ├── BinaryNaiveBayesModelReader.java │ │ │ │ ├── BinaryNaiveBayesModelWriter.java │ │ │ │ ├── LogProbabilities.java │ │ │ │ ├── LogProbability.java │ │ │ │ ├── NaiveBayesEvalParameters.java │ │ │ │ ├── NaiveBayesModel.java │ │ │ │ ├── NaiveBayesModelReader.java │ │ │ │ ├── NaiveBayesModelWriter.java │ │ │ │ ├── NaiveBayesTrainer.java │ │ │ │ ├── PlainTextNaiveBayesModelReader.java │ │ │ │ ├── PlainTextNaiveBayesModelWriter.java │ │ │ │ ├── Probabilities.java │ │ │ │ ├── Probability.java │ │ │ │ └── package-info.java │ │ │ ├── package-info.java │ │ │ └── perceptron │ │ │ │ ├── BinaryPerceptronModelReader.java │ │ │ │ ├── BinaryPerceptronModelWriter.java │ │ │ │ ├── PerceptronModel.java │ │ │ │ ├── PerceptronModelReader.java │ │ │ │ ├── PerceptronModelWriter.java │ │ │ │ ├── PerceptronTrainer.java │ │ │ │ ├── SimplePerceptronSequenceTrainer.java │ │ │ │ └── package-info.java │ │ │ ├── models │ │ │ └── ModelType.java │ │ │ ├── monitoring │ │ │ ├── DefaultTrainingProgressMonitor.java │ │ │ ├── IterDeltaAccuracyUnderTolerance.java │ │ │ ├── LogLikelihoodThresholdBreached.java │ │ │ ├── StopCriteria.java │ │ │ ├── TrainingMeasure.java │ │ │ └── TrainingProgressMonitor.java │ │ │ ├── namefind │ │ │ ├── BilouCodec.java │ │ │ ├── BilouNameFinderSequenceValidator.java │ │ │ ├── BioCodec.java │ │ │ ├── DefaultNameContextGenerator.java │ │ │ ├── DictionaryNameFinder.java │ │ │ ├── DocumentNameFinder.java │ │ │ ├── NameContextGenerator.java │ │ │ ├── NameFinderEventStream.java │ │ │ ├── NameFinderME.java │ │ │ ├── NameFinderSequenceValidator.java │ │ │ ├── NameSample.java │ │ │ ├── NameSampleDataStream.java │ │ │ ├── NameSampleSequenceStream.java │ │ │ ├── NameSampleTypeFilter.java │ │ │ ├── RegexNameFinder.java │ │ │ ├── RegexNameFinderFactory.java │ │ │ ├── ThreadSafeNameFinderME.java │ │ │ ├── TokenNameFinder.java │ │ │ ├── TokenNameFinderCrossValidator.java │ │ │ ├── TokenNameFinderEvaluationMonitor.java │ │ │ ├── TokenNameFinderEvaluator.java │ │ │ ├── TokenNameFinderFactory.java │ │ │ ├── TokenNameFinderModel.java │ │ │ └── package-info.java │ │ │ ├── ngram │ │ │ ├── NGramCharModel.java │ │ │ ├── NGramGenerator.java │ │ │ ├── NGramModel.java │ │ │ ├── NGramUtils.java │ │ │ └── package-info.java │ │ │ ├── package-info.java │ │ │ ├── parser │ │ │ ├── AbstractBottomUpParser.java │ │ │ ├── AbstractContextGenerator.java │ │ │ ├── AbstractParserEventStream.java │ │ │ ├── ChunkContextGenerator.java │ │ │ ├── ChunkSampleStream.java │ │ │ ├── Cons.java │ │ │ ├── Constituent.java │ │ │ ├── GapLabeler.java │ │ │ ├── HeadRules.java │ │ │ ├── Parse.java │ │ │ ├── ParseSampleStream.java │ │ │ ├── Parser.java │ │ │ ├── ParserChunkerFactory.java │ │ │ ├── ParserChunkerSequenceValidator.java │ │ │ ├── ParserCrossValidator.java │ │ │ ├── ParserEvaluationMonitor.java │ │ │ ├── ParserEvaluator.java │ │ │ ├── ParserEventTypeEnum.java │ │ │ ├── ParserFactory.java │ │ │ ├── ParserModel.java │ │ │ ├── ParserType.java │ │ │ ├── PosSampleStream.java │ │ │ ├── chunking │ │ │ │ ├── BuildContextGenerator.java │ │ │ │ ├── CheckContextGenerator.java │ │ │ │ ├── Parser.java │ │ │ │ ├── ParserEventStream.java │ │ │ │ └── package-info.java │ │ │ ├── lang │ │ │ │ ├── en │ │ │ │ │ └── HeadRules.java │ │ │ │ └── es │ │ │ │ │ └── AncoraSpanishHeadRules.java │ │ │ ├── package-info.java │ │ │ └── treeinsert │ │ │ │ ├── AttachContextGenerator.java │ │ │ │ ├── BuildContextGenerator.java │ │ │ │ ├── CheckContextGenerator.java │ │ │ │ ├── Parser.java │ │ │ │ ├── ParserEventStream.java │ │ │ │ └── package-info.java │ │ │ ├── postag │ │ │ ├── ConfigurablePOSContextGenerator.java │ │ │ ├── DefaultPOSContextGenerator.java │ │ │ ├── DefaultPOSSequenceValidator.java │ │ │ ├── MutableTagDictionary.java │ │ │ ├── POSContextGenerator.java │ │ │ ├── POSDictionary.java │ │ │ ├── POSEvaluator.java │ │ │ ├── POSModel.java │ │ │ ├── POSSample.java │ │ │ ├── POSSampleEventStream.java │ │ │ ├── POSSampleSequenceStream.java │ │ │ ├── POSTagFormat.java │ │ │ ├── POSTagFormatMapper.java │ │ │ ├── POSTagger.java │ │ │ ├── POSTaggerCrossValidator.java │ │ │ ├── POSTaggerEvaluationMonitor.java │ │ │ ├── POSTaggerFactory.java │ │ │ ├── POSTaggerME.java │ │ │ ├── TagDictionary.java │ │ │ ├── ThreadSafePOSTaggerME.java │ │ │ ├── WordTagSampleStream.java │ │ │ └── package-info.java │ │ │ ├── sentdetect │ │ │ ├── DefaultEndOfSentenceScanner.java │ │ │ ├── DefaultSDContextGenerator.java │ │ │ ├── EmptyLinePreprocessorStream.java │ │ │ ├── EndOfSentenceScanner.java │ │ │ ├── NewlineSentenceDetector.java │ │ │ ├── SDContextGenerator.java │ │ │ ├── SDCrossValidator.java │ │ │ ├── SDEventStream.java │ │ │ ├── SentenceDetector.java │ │ │ ├── SentenceDetectorEvaluationMonitor.java │ │ │ ├── SentenceDetectorEvaluator.java │ │ │ ├── SentenceDetectorFactory.java │ │ │ ├── SentenceDetectorME.java │ │ │ ├── SentenceModel.java │ │ │ ├── SentenceSample.java │ │ │ ├── SentenceSampleStream.java │ │ │ ├── ThreadSafeSentenceDetectorME.java │ │ │ ├── lang │ │ │ │ ├── Factory.java │ │ │ │ └── th │ │ │ │ │ ├── SentenceContextGenerator.java │ │ │ │ │ └── package.html │ │ │ └── package-info.java │ │ │ ├── stemmer │ │ │ ├── PorterStemmer.java │ │ │ ├── Stemmer.java │ │ │ └── snowball │ │ │ │ ├── AbstractSnowballStemmer.java │ │ │ │ ├── Among.java │ │ │ │ ├── SnowballProgram.java │ │ │ │ ├── SnowballStemmer.java │ │ │ │ ├── arabicStemmer.java │ │ │ │ ├── catalanStemmer.java │ │ │ │ ├── danishStemmer.java │ │ │ │ ├── dutchStemmer.java │ │ │ │ ├── englishStemmer.java │ │ │ │ ├── finnishStemmer.java │ │ │ │ ├── frenchStemmer.java │ │ │ │ ├── germanStemmer.java │ │ │ │ ├── greekStemmer.java │ │ │ │ ├── hungarianStemmer.java │ │ │ │ ├── indonesianStemmer.java │ │ │ │ ├── irishStemmer.java │ │ │ │ ├── italianStemmer.java │ │ │ │ ├── norwegianStemmer.java │ │ │ │ ├── porterStemmer.java │ │ │ │ ├── portugueseStemmer.java │ │ │ │ ├── romanianStemmer.java │ │ │ │ ├── russianStemmer.java │ │ │ │ ├── spanishStemmer.java │ │ │ │ ├── swedishStemmer.java │ │ │ │ └── turkishStemmer.java │ │ │ ├── tokenize │ │ │ ├── AbstractTokenizer.java │ │ │ ├── DefaultTokenContextGenerator.java │ │ │ ├── DetokenizationDictionary.java │ │ │ ├── Detokenizer.java │ │ │ ├── DetokenizerEvaluator.java │ │ │ ├── DictionaryDetokenizer.java │ │ │ ├── SimpleTokenizer.java │ │ │ ├── ThreadSafeTokenizerME.java │ │ │ ├── TokSpanEventStream.java │ │ │ ├── TokenContextGenerator.java │ │ │ ├── TokenSample.java │ │ │ ├── TokenSampleStream.java │ │ │ ├── Tokenizer.java │ │ │ ├── TokenizerCrossValidator.java │ │ │ ├── TokenizerEvaluationMonitor.java │ │ │ ├── TokenizerEvaluator.java │ │ │ ├── TokenizerFactory.java │ │ │ ├── TokenizerME.java │ │ │ ├── TokenizerModel.java │ │ │ ├── TokenizerStream.java │ │ │ ├── WhitespaceTokenStream.java │ │ │ ├── WhitespaceTokenizer.java │ │ │ ├── WordpieceTokenizer.java │ │ │ ├── lang │ │ │ │ ├── Factory.java │ │ │ │ └── en │ │ │ │ │ └── TokenSampleStream.java │ │ │ └── package-info.java │ │ │ └── util │ │ │ ├── AbstractEventStream.java │ │ │ ├── AbstractObjectStream.java │ │ │ ├── BaseToolFactory.java │ │ │ ├── BeamSearchContextGenerator.java │ │ │ ├── Cache.java │ │ │ ├── CollectionObjectStream.java │ │ │ ├── DownloadUtil.java │ │ │ ├── EventTraceStream.java │ │ │ ├── FilterObjectStream.java │ │ │ ├── InputStreamFactory.java │ │ │ ├── InsufficientTrainingDataException.java │ │ │ ├── InvalidFormatException.java │ │ │ ├── MarkableFileInputStream.java │ │ │ ├── MarkableFileInputStreamFactory.java │ │ │ ├── MutableInt.java │ │ │ ├── ObjectStream.java │ │ │ ├── ObjectStreamUtils.java │ │ │ ├── ParagraphStream.java │ │ │ ├── PlainTextByLineStream.java │ │ │ ├── ResetableIterator.java │ │ │ ├── ReverseListIterator.java │ │ │ ├── Sequence.java │ │ │ ├── SequenceCodec.java │ │ │ ├── SequenceValidator.java │ │ │ ├── Span.java │ │ │ ├── StringList.java │ │ │ ├── StringUtil.java │ │ │ ├── TokenTag.java │ │ │ ├── TrainingConfiguration.java │ │ │ ├── TrainingParameters.java │ │ │ ├── Version.java │ │ │ ├── XmlUtil.java │ │ │ ├── eval │ │ │ ├── CrossValidationPartitioner.java │ │ │ ├── EvaluationMonitor.java │ │ │ ├── Evaluator.java │ │ │ ├── FMeasure.java │ │ │ └── Mean.java │ │ │ ├── ext │ │ │ ├── ExtensionLoader.java │ │ │ ├── ExtensionNotLoadedException.java │ │ │ ├── ExtensionServiceKeys.java │ │ │ └── package-info.java │ │ │ ├── featuregen │ │ │ ├── AdaptiveFeatureGenerator.java │ │ │ ├── AdditionalContextFeatureGenerator.java │ │ │ ├── AggregatedFeatureGenerator.java │ │ │ ├── AggregatedFeatureGeneratorFactory.java │ │ │ ├── BigramNameFeatureGenerator.java │ │ │ ├── BigramNameFeatureGeneratorFactory.java │ │ │ ├── BrownBigramFeatureGenerator.java │ │ │ ├── BrownCluster.java │ │ │ ├── BrownClusterBigramFeatureGeneratorFactory.java │ │ │ ├── BrownClusterTokenClassFeatureGeneratorFactory.java │ │ │ ├── BrownClusterTokenFeatureGeneratorFactory.java │ │ │ ├── BrownTokenClassFeatureGenerator.java │ │ │ ├── BrownTokenClasses.java │ │ │ ├── BrownTokenFeatureGenerator.java │ │ │ ├── CachedFeatureGenerator.java │ │ │ ├── CachedFeatureGeneratorFactory.java │ │ │ ├── CharacterNgramFeatureGenerator.java │ │ │ ├── CharacterNgramFeatureGeneratorFactory.java │ │ │ ├── DefinitionFeatureGeneratorFactory.java │ │ │ ├── DictionaryFeatureGenerator.java │ │ │ ├── DictionaryFeatureGeneratorFactory.java │ │ │ ├── DocumentBeginFeatureGenerator.java │ │ │ ├── DocumentBeginFeatureGeneratorFactory.java │ │ │ ├── FeatureGeneratorResourceProvider.java │ │ │ ├── FeatureGeneratorUtil.java │ │ │ ├── GeneratorFactory.java │ │ │ ├── InSpanGenerator.java │ │ │ ├── OutcomePriorFeatureGenerator.java │ │ │ ├── POSTaggerNameFeatureGenerator.java │ │ │ ├── POSTaggerNameFeatureGeneratorFactory.java │ │ │ ├── PosTaggerFeatureGenerator.java │ │ │ ├── PosTaggerFeatureGeneratorFactory.java │ │ │ ├── PrefixFeatureGenerator.java │ │ │ ├── PrefixFeatureGeneratorFactory.java │ │ │ ├── PreviousMapFeatureGenerator.java │ │ │ ├── PreviousMapFeatureGeneratorFactory.java │ │ │ ├── PreviousTwoMapFeatureGenerator.java │ │ │ ├── SentenceFeatureGenerator.java │ │ │ ├── SentenceFeatureGeneratorFactory.java │ │ │ ├── StringPattern.java │ │ │ ├── SuffixFeatureGenerator.java │ │ │ ├── SuffixFeatureGeneratorFactory.java │ │ │ ├── TokenClassFeatureGenerator.java │ │ │ ├── TokenClassFeatureGeneratorFactory.java │ │ │ ├── TokenFeatureGenerator.java │ │ │ ├── TokenFeatureGeneratorFactory.java │ │ │ ├── TokenPatternFeatureGenerator.java │ │ │ ├── TokenPatternFeatureGeneratorFactory.java │ │ │ ├── TrigramNameFeatureGenerator.java │ │ │ ├── TrigramNameFeatureGeneratorFactory.java │ │ │ ├── WindowFeatureGenerator.java │ │ │ ├── WindowFeatureGeneratorFactory.java │ │ │ ├── WordClusterDictionary.java │ │ │ ├── WordClusterFeatureGenerator.java │ │ │ ├── WordClusterFeatureGeneratorFactory.java │ │ │ └── package-info.java │ │ │ ├── java │ │ │ └── Experimental.java │ │ │ ├── jvm │ │ │ ├── CHMStringDeduplicator.java │ │ │ ├── CHMStringInterner.java │ │ │ ├── HMStringInterner.java │ │ │ ├── JvmStringInterner.java │ │ │ ├── NoOpStringInterner.java │ │ │ ├── StringInterner.java │ │ │ └── StringInterners.java │ │ │ ├── model │ │ │ ├── ArtifactProvider.java │ │ │ ├── ArtifactSerializer.java │ │ │ ├── BaseModel.java │ │ │ ├── ByteArraySerializer.java │ │ │ ├── ChunkerModelSerializer.java │ │ │ ├── DictionarySerializer.java │ │ │ ├── GenericModelSerializer.java │ │ │ ├── ModelType.java │ │ │ ├── ModelUtil.java │ │ │ ├── POSModelSerializer.java │ │ │ ├── PropertiesSerializer.java │ │ │ ├── SerializableArtifact.java │ │ │ └── UncloseableInputStream.java │ │ │ ├── normalizer │ │ │ ├── AggregateCharSequenceNormalizer.java │ │ │ ├── CharSequenceNormalizer.java │ │ │ ├── EmojiCharSequenceNormalizer.java │ │ │ ├── NumberCharSequenceNormalizer.java │ │ │ ├── ShrinkCharSequenceNormalizer.java │ │ │ ├── TwitterCharSequenceNormalizer.java │ │ │ └── UrlCharSequenceNormalizer.java │ │ │ ├── package-info.java │ │ │ └── wordvector │ │ │ ├── DoubleArrayVector.java │ │ │ ├── FloatArrayVector.java │ │ │ ├── Glove.java │ │ │ ├── MapWordVectorTable.java │ │ │ ├── WordVector.java │ │ │ ├── WordVectorTable.java │ │ │ └── WordVectorType.java │ └── resources │ │ └── opennlp │ │ └── tools │ │ ├── namefind │ │ └── ner-default-features.xml │ │ ├── postag │ │ └── pos-default-features.xml │ │ └── util │ │ └── opennlp.version │ └── test │ ├── java │ └── opennlp │ │ └── tools │ │ ├── AbstractLoggerTest.java │ │ ├── AbstractModelLoaderTest.java │ │ ├── AbstractTempDirTest.java │ │ ├── EnabledWhenCDNAvailable.java │ │ ├── HighMemoryUsage.java │ │ ├── chunker │ │ ├── ChunkSampleStreamTest.java │ │ ├── ChunkSampleTest.java │ │ ├── ChunkerDetailedFMeasureListenerTest.java │ │ ├── ChunkerEvaluatorTest.java │ │ ├── ChunkerFactoryTest.java │ │ ├── ChunkerMEIT.java │ │ ├── ChunkerMETest.java │ │ ├── ChunkerModelTest.java │ │ ├── DummyChunkSampleStream.java │ │ ├── DummyChunker.java │ │ └── DummyChunkerFactory.java │ │ ├── cmdline │ │ ├── ArgumentParserTest.java │ │ ├── CLITest.java │ │ ├── TerminateToolExceptionTest.java │ │ ├── TokenNameFinderToolTest.java │ │ ├── chunker │ │ │ └── ChunkerModelLoaderTest.java │ │ ├── langdetect │ │ │ └── LanguageDetectorModelLoaderTest.java │ │ ├── languagemodel │ │ │ └── NGramLanguageModelToolTest.java │ │ ├── lemmatizer │ │ │ └── LemmatizerModelLoaderIT.java │ │ ├── namefind │ │ │ ├── TokenNameFinderModelLoaderTest.java │ │ │ └── generator │ │ │ │ ├── AbstractNewsGenerator.java │ │ │ │ ├── RandomEnglishNewsGenerator.java │ │ │ │ └── RandomGermanNewsGenerator.java │ │ ├── postag │ │ │ └── POSModelLoaderIT.java │ │ ├── sentdetect │ │ │ └── SentenceModelLoaderIT.java │ │ └── tokenizer │ │ │ ├── TokenizerModelLoaderIT.java │ │ │ └── TokenizerTrainerToolTest.java │ │ ├── dictionary │ │ ├── DictionaryAsSetCaseInsensitiveTest.java │ │ ├── DictionaryAsSetCaseSensitiveTest.java │ │ └── DictionaryTest.java │ │ ├── doccat │ │ ├── BagOfWordsFeatureGeneratorTest.java │ │ ├── DoccatFactoryTest.java │ │ ├── DocumentCategorizerMETest.java │ │ ├── DocumentCategorizerNBTest.java │ │ ├── DocumentSampleTest.java │ │ └── NGramFeatureGeneratorTest.java │ │ ├── eval │ │ ├── AbstractEvalTest.java │ │ ├── ArvoresDeitadasEval.java │ │ ├── Conll00ChunkerEval.java │ │ ├── Conll02NameFinderEval.java │ │ ├── ConllXPosTaggerEval.java │ │ ├── MultiThreadedToolsEval.java │ │ ├── OntoNotes4NameFinderEval.java │ │ ├── OntoNotes4ParserEval.java │ │ ├── OntoNotes4PosTaggerEval.java │ │ ├── SnowballTokenizerEval.java │ │ ├── SourceForgeModelEval.java │ │ └── UniversalDependency20Eval.java │ │ ├── formats │ │ ├── AbstractFormatTest.java │ │ ├── AbstractSampleStreamFactoryTest.java │ │ ├── AbstractSampleStreamTest.java │ │ ├── BioNLP2004NameSampleStreamFactoryTest.java │ │ ├── ChunkerSampleStreamFactoryTest.java │ │ ├── Conll02NameSampleStreamFactoryTest.java │ │ ├── Conll02NameSampleStreamTest.java │ │ ├── Conll03NameSampleStreamFactoryTest.java │ │ ├── Conll03NameSampleStreamTest.java │ │ ├── ConllXPOSSampleStreamFactoryTest.java │ │ ├── ConllXPOSSampleStreamTest.java │ │ ├── ConllXSentenceSampleStreamFactoryTest.java │ │ ├── ConllXTokenSampleStreamFactoryTest.java │ │ ├── DirectorySampleStreamTest.java │ │ ├── EvalitaNameSampleStreamFactoryTest.java │ │ ├── EvalitaNameSampleStreamTest.java │ │ ├── LanguageDetectorSampleStreamFactoryTest.java │ │ ├── LemmatizerSampleStreamFactoryTest.java │ │ ├── NameFinderCensus90NameStreamTest.java │ │ ├── NameSampleDataStreamFactoryTest.java │ │ ├── ParseSampleStreamFactoryTest.java │ │ ├── ResourceAsStreamFactory.java │ │ ├── SentenceSampleStreamFactoryTest.java │ │ ├── TokenSampleStreamFactoryTest.java │ │ ├── TwentyNewsgroupSampleStreamFactoryTest.java │ │ ├── WordTagSampleStreamFactoryTest.java │ │ ├── ad │ │ │ ├── ADChunkSampleStreamFactoryTest.java │ │ │ ├── ADChunkSampleStreamTest.java │ │ │ ├── ADNameSampleStreamTest.java │ │ │ ├── ADPOSSampleStreamFactoryTest.java │ │ │ ├── ADPOSSampleStreamTest.java │ │ │ ├── ADParagraphStreamTest.java │ │ │ ├── ADSentenceSampleStreamFactoryTest.java │ │ │ ├── ADSentenceSampleStreamTest.java │ │ │ ├── ADTokenSampleStreamFactoryTest.java │ │ │ ├── ADTokenSampleStreamTest.java │ │ │ └── AbstractADSampleStreamTest.java │ │ ├── brat │ │ │ ├── AbstractBratTest.java │ │ │ ├── BratAnnotationStreamTest.java │ │ │ ├── BratDocumentParserTest.java │ │ │ ├── BratDocumentTest.java │ │ │ ├── BratNameSampleStreamFactoryTest.java │ │ │ └── BratNameSampleStreamTest.java │ │ ├── conllu │ │ │ ├── AbstractConlluSampleStreamTest.java │ │ │ ├── ConlluLemmaSampleStreamFactoryTest.java │ │ │ ├── ConlluLemmaSampleStreamTest.java │ │ │ ├── ConlluPOSSampleStreamFactoryTest.java │ │ │ ├── ConlluPOSSampleStreamTest.java │ │ │ ├── ConlluSentenceSampleStreamFactoryTest.java │ │ │ ├── ConlluSentenceSampleStreamTest.java │ │ │ ├── ConlluStreamTest.java │ │ │ ├── ConlluTokenSampleStreamFactoryTest.java │ │ │ ├── ConlluTokenSampleStreamTest.java │ │ │ └── ConlluWordLineTest.java │ │ ├── convert │ │ │ ├── AbstractConvertTest.java │ │ │ ├── FileToByteArraySampleStreamTest.java │ │ │ ├── FileToStringSampleStreamTest.java │ │ │ ├── NameToSentenceSampleStreamFactoryTest.java │ │ │ ├── NameToTokenSampleStreamFactoryTest.java │ │ │ ├── POSToSentenceSampleStreamFactoryTest.java │ │ │ ├── POSToTokenSampleStreamFactoryTest.java │ │ │ ├── ParseToPOSSampleStreamFactoryTest.java │ │ │ ├── ParseToSentenceSampleStreamFactoryTest.java │ │ │ └── ParseToTokenSampleStreamFactoryTest.java │ │ ├── frenchtreebank │ │ │ ├── ConstitParseSampleStreamFactoryTest.java │ │ │ └── ConstitParseSampleStreamTest.java │ │ ├── irishsentencebank │ │ │ ├── IrishSentenceBankDocumentTest.java │ │ │ ├── IrishSentenceBankSentenceStreamFactoryTest.java │ │ │ └── IrishSentenceBankTokenSampleStreamFactoryTest.java │ │ ├── leipzig │ │ │ ├── LeipzigLanguageSampleStreamFactoryTest.java │ │ │ └── LeipzigLanguageSampleStreamTest.java │ │ ├── letsmt │ │ │ ├── LetsmtDocumentTest.java │ │ │ └── LetsmtSentenceStreamFactoryTest.java │ │ ├── masc │ │ │ ├── AbstractMascSampleStreamTest.java │ │ │ ├── MascNamedEntitySampleStreamFactoryTest.java │ │ │ ├── MascNamedEntitySampleStreamTest.java │ │ │ ├── MascPOSSampleStreamFactoryTest.java │ │ │ ├── MascPOSSampleStreamTest.java │ │ │ ├── MascSentenceSampleStreamFactoryTest.java │ │ │ ├── MascSentenceSampleStreamTest.java │ │ │ ├── MascTokenSampleStreamFactoryTest.java │ │ │ └── MascTokenSampleStreamTest.java │ │ ├── moses │ │ │ └── MosesSentenceSampleStreamFactoryTest.java │ │ ├── muc │ │ │ ├── DocumentSplitterStreamTest.java │ │ │ ├── Muc6NameSampleStreamFactoryTest.java │ │ │ └── SgmlParserTest.java │ │ ├── nkjp │ │ │ ├── NKJPSegmentationDocumentTest.java │ │ │ ├── NKJPSentenceSampleStreamFactoryTest.java │ │ │ └── NKJPTextDocumentTest.java │ │ └── ontonotes │ │ │ ├── OntoNotesNameSampleStreamFactoryTest.java │ │ │ ├── OntoNotesPOSSampleStreamFactoryTest.java │ │ │ └── OntoNotesParseSampleStreamFactoryTest.java │ │ ├── langdetect │ │ ├── DefaultLanguageDetectorContextGeneratorTest.java │ │ ├── DummyFactory.java │ │ ├── LanguageDetectorCrossValidatorTest.java │ │ ├── LanguageDetectorEvaluatorTest.java │ │ ├── LanguageDetectorFactoryTest.java │ │ ├── LanguageDetectorMETest.java │ │ ├── LanguageSampleTest.java │ │ └── LanguageTest.java │ │ ├── languagemodel │ │ ├── LanguageModelEvaluationTest.java │ │ ├── LanguageModelTestUtils.java │ │ └── NgramLanguageModelTest.java │ │ ├── lemmatizer │ │ ├── DictionaryLemmatizerMultiTest.java │ │ ├── DictionaryLemmatizerTest.java │ │ ├── DummyLemmaSampleStream.java │ │ ├── DummyLemmatizer.java │ │ ├── LemmaSampleTest.java │ │ ├── LemmatizerEvaluatorTest.java │ │ └── LemmatizerMETest.java │ │ ├── ml │ │ ├── AbstractEventStreamTest.java │ │ ├── ArrayMathTest.java │ │ ├── BeamSearchTest.java │ │ ├── MockEventTrainer.java │ │ ├── MockSequenceTrainer.java │ │ ├── PrepAttachDataUtil.java │ │ ├── TrainerFactoryTest.java │ │ ├── maxent │ │ │ ├── FootballEventStream.java │ │ │ ├── GISIndexingTest.java │ │ │ ├── GISTrainerTest.java │ │ │ ├── MaxentPrepAttachTest.java │ │ │ ├── MockDataIndexer.java │ │ │ ├── RealBasicEventStreamTest.java │ │ │ ├── RealValueModelTest.java │ │ │ ├── ScaleDoesntMatterTest.java │ │ │ ├── URLInputStreamFactory.java │ │ │ ├── io │ │ │ │ └── RealValueFileEventStreamTest.java │ │ │ └── quasinewton │ │ │ │ ├── LineSearchTest.java │ │ │ │ ├── NegLogLikelihoodTest.java │ │ │ │ ├── QNMinimizerTest.java │ │ │ │ ├── QNPrepAttachTest.java │ │ │ │ └── QNTrainerTest.java │ │ ├── model │ │ │ ├── ChecksumEventStreamTest.java │ │ │ ├── EventTest.java │ │ │ ├── FileEventStreamTest.java │ │ │ ├── ModelParameterChunkerTest.java │ │ │ ├── OnePassDataIndexerTest.java │ │ │ ├── OnePassRealValueDataIndexerTest.java │ │ │ ├── RealValueFileEventStreamTest.java │ │ │ ├── SimpleEventStreamBuilder.java │ │ │ └── TwoPassDataIndexerTest.java │ │ ├── naivebayes │ │ │ ├── AbstractNaiveBayesTest.java │ │ │ ├── NaiveBayesCorrectnessTest.java │ │ │ ├── NaiveBayesModelReadWriteTest.java │ │ │ ├── NaiveBayesPrepAttachTest.java │ │ │ └── NaiveBayesSerializedCorrectnessTest.java │ │ └── perceptron │ │ │ └── PerceptronPrepAttachTest.java │ │ ├── monitoring │ │ ├── DefaultTrainingProgressMonitorTest.java │ │ ├── IterDeltaAccuracyUnderToleranceTest.java │ │ └── LogLikelihoodThresholdBreachedTest.java │ │ ├── namefind │ │ ├── AbstractNameFinderTest.java │ │ ├── BilouCodecTest.java │ │ ├── BilouNameFinderSequenceValidatorTest.java │ │ ├── BioCodecTest.java │ │ ├── DictionaryNameFinderEvaluatorTest.java │ │ ├── DictionaryNameFinderTest.java │ │ ├── NameFinderEventStreamTest.java │ │ ├── NameFinderMETest.java │ │ ├── NameFinderMEWithDatesTest.java │ │ ├── NameFinderSequenceValidatorTest.java │ │ ├── NameSampleDataStreamTest.java │ │ ├── NameSampleTest.java │ │ ├── NameSampleTypeFilterTest.java │ │ ├── RegexNameFinderFactoryTest.java │ │ ├── RegexNameFinderTest.java │ │ ├── TokenNameFinderCrossValidatorTest.java │ │ ├── TokenNameFinderEvaluatorTest.java │ │ └── TokenNameFinderModelTest.java │ │ ├── ngram │ │ ├── NGramCharModelTest.java │ │ ├── NGramGeneratorTest.java │ │ ├── NGramModelTest.java │ │ └── NGramUtilsTest.java │ │ ├── parser │ │ ├── AbstractParserModelTest.java │ │ ├── ChunkSampleStreamTest.java │ │ ├── ParseSampleStreamTest.java │ │ ├── ParseTest.java │ │ ├── ParserEvaluatorTest.java │ │ ├── ParserTestUtil.java │ │ ├── PosSampleStreamTest.java │ │ ├── chunking │ │ │ └── ParserTest.java │ │ ├── lang │ │ │ └── en │ │ │ │ └── HeadRulesTest.java │ │ └── treeinsert │ │ │ └── ParserTest.java │ │ ├── postag │ │ ├── ConfigurablePOSContextGeneratorTest.java │ │ ├── DefaultPOSContextGeneratorTest.java │ │ ├── DummyPOSTaggerFactory.java │ │ ├── POSDictionaryTest.java │ │ ├── POSEvaluatorTest.java │ │ ├── POSModelTest.java │ │ ├── POSSampleEventStreamTest.java │ │ ├── POSSampleTest.java │ │ ├── POSTaggerFactoryTest.java │ │ ├── POSTaggerMEIT.java │ │ ├── POSTaggerMETest.java │ │ └── WordTagSampleStreamTest.java │ │ ├── sentdetect │ │ ├── AbstractSentenceDetectorTest.java │ │ ├── DefaultEndOfSentenceScannerTest.java │ │ ├── DefaultSDContextGeneratorTest.java │ │ ├── DummySentenceDetectorFactory.java │ │ ├── NewlineSentenceDetectorTest.java │ │ ├── SDEventStreamTest.java │ │ ├── SentenceDetectorEvaluatorTest.java │ │ ├── SentenceDetectorFactoryTest.java │ │ ├── SentenceDetectorMEDutchTest.java │ │ ├── SentenceDetectorMEFrenchTest.java │ │ ├── SentenceDetectorMEGermanTest.java │ │ ├── SentenceDetectorMEIT.java │ │ ├── SentenceDetectorMEItalianTest.java │ │ ├── SentenceDetectorMEPolishTest.java │ │ ├── SentenceDetectorMEPortugueseTest.java │ │ ├── SentenceDetectorMESpanishTest.java │ │ ├── SentenceDetectorMETest.java │ │ └── SentenceSampleTest.java │ │ ├── stemmer │ │ ├── PorterStemmerTest.java │ │ └── SnowballStemmerTest.java │ │ ├── tokenize │ │ ├── DetokenizationDictionaryTest.java │ │ ├── DetokenizerEvaluatorTest.java │ │ ├── DictionaryDetokenizerTest.java │ │ ├── DummyTokenizerFactory.java │ │ ├── SimpleTokenizerTest.java │ │ ├── TokSpanEventStreamTest.java │ │ ├── TokenSampleStreamTest.java │ │ ├── TokenSampleTest.java │ │ ├── TokenizerEvaluatorTest.java │ │ ├── TokenizerFactoryTest.java │ │ ├── TokenizerMEIT.java │ │ ├── TokenizerMETest.java │ │ ├── TokenizerModelTest.java │ │ ├── TokenizerTestUtil.java │ │ ├── WhitespaceTokenStreamTest.java │ │ ├── WhitespaceTokenizerTest.java │ │ └── WordpieceTokenizerTest.java │ │ └── util │ │ ├── AbstractEventStreamTest.java │ │ ├── DownloadParserTest.java │ │ ├── DownloadUtilDownloadTwiceTest.java │ │ ├── DownloadUtilTest.java │ │ ├── FileUtil.java │ │ ├── MockInputStreamFactory.java │ │ ├── ObjectStreamUtilsTest.java │ │ ├── ParagraphStreamTest.java │ │ ├── PlainTextByLineStreamTest.java │ │ ├── SequenceTest.java │ │ ├── SpanTest.java │ │ ├── StringListTest.java │ │ ├── StringUtilTest.java │ │ ├── TrainingParametersTest.java │ │ ├── VersionTest.java │ │ ├── eval │ │ ├── CrossValidationPartitionerTest.java │ │ ├── FMeasureTest.java │ │ └── MeanTest.java │ │ ├── ext │ │ └── ExtensionLoaderTest.java │ │ ├── featuregen │ │ ├── BigramNameFeatureGeneratorTest.java │ │ ├── BrownBigramFeatureGeneratorTest.java │ │ ├── CachedFeatureGeneratorTest.java │ │ ├── CharacterNgramFeatureGeneratorTest.java │ │ ├── FeatureGeneratorUtilTest.java │ │ ├── GeneratorFactoryTest.java │ │ ├── IdentityFeatureGenerator.java │ │ ├── InSpanGeneratorTest.java │ │ ├── POSTaggerNameFeatureGeneratorTest.java │ │ ├── PosTaggerFeatureGeneratorTest.java │ │ ├── PrefixFeatureGeneratorTest.java │ │ ├── PreviousMapFeatureGeneratorTest.java │ │ ├── PreviousTwoMapFeatureGeneratorTest.java │ │ ├── SentenceFeatureGeneratorTest.java │ │ ├── StringPatternTest.java │ │ ├── SuffixFeatureGeneratorTest.java │ │ ├── TokenClassFeatureGeneratorTest.java │ │ ├── TokenFeatureGeneratorTest.java │ │ ├── TokenPatternFeatureGeneratorTest.java │ │ ├── TrigramNameFeatureGeneratorTest.java │ │ └── WindowFeatureGeneratorTest.java │ │ ├── model │ │ └── ByteArraySerializerTest.java │ │ ├── normalizer │ │ ├── EmojiCharSequenceNormalizerTest.java │ │ ├── NumberCharSequenceNormalizerTest.java │ │ ├── ShrinkCharSequenceNormalizerTest.java │ │ ├── TwitterCharSequenceNormalizerTest.java │ │ └── UrlCharSequenceNormalizerTest.java │ │ └── wordvector │ │ ├── AbstractWordVectorTest.java │ │ ├── DoubleArrayVectorTest.java │ │ ├── FloatArrayVectorTest.java │ │ ├── GloveTest.java │ │ └── MapWordVectorTableTest.java │ └── resources │ ├── data │ ├── opennlp │ │ └── maxent │ │ │ ├── io │ │ │ ├── rvfes-bug-data-broken.txt │ │ │ └── rvfes-bug-data-ok.txt │ │ │ ├── real-valued-weights-training-data.txt │ │ │ └── repeat-weighting-training-data.txt │ └── ppa │ │ ├── NOTICE │ │ ├── bitstrings │ │ ├── devset │ │ ├── test │ │ └── training │ ├── logback-test.xml │ └── opennlp │ └── tools │ ├── chunker │ ├── chunker170custom.bin │ ├── chunker170default.bin │ ├── chunker180custom.bin │ ├── detailedOutput.txt │ ├── output.txt │ ├── test-insufficient.txt │ └── test.txt │ ├── cmdline │ └── languagemodel │ │ ├── origin_of_text_samples.txt │ │ ├── sentences_set_1.txt │ │ └── sentences_set_2.txt │ ├── doccat │ └── DoccatSample.txt │ ├── eval │ └── ner-en_pos-features.xml │ ├── formats │ ├── 20newsgroup │ │ └── sci.electronics │ │ │ └── 52794.sample │ ├── ad │ │ └── ad.sample │ ├── bionlp2004-01.sample │ ├── brat │ │ ├── brat-ann.conf │ │ ├── opennlp-1193.ann │ │ ├── opennlp-1193.txt │ │ ├── voa-with-entities-overlapping.ann │ │ ├── voa-with-entities-overlapping.txt │ │ ├── voa-with-entities.ann │ │ ├── voa-with-entities.txt │ │ ├── voa-with-relations.ann │ │ └── voa-with-relations.txt │ ├── brown-cluster.txt │ ├── census90.sample │ ├── chunker-01.sample │ ├── conll2002-es.sample │ ├── conll2002-nl.sample │ ├── conll2003-de.sample │ ├── conll2003-en.sample │ ├── conllu │ │ ├── de-ud-train-sample.conllu │ │ ├── es-ud-sample.conllu │ │ ├── full-sample.conllu │ │ └── pt_br-ud-sample.conllu │ ├── conllx.sample │ ├── evalita-ner-it-01.sample │ ├── evalita-ner-it-02.sample │ ├── evalita-ner-it-03.sample │ ├── evalita-ner-it-broken.sample │ ├── evalita-ner-it-incorrect.sample │ ├── frenchtreebank │ │ └── sample1.xml │ ├── irishsentencebank │ │ └── irishsentencebank-sample.xml │ ├── lang-detect-01.sample │ ├── leipzig-en.sample │ ├── leipzig │ │ └── samples │ │ │ ├── .hidden │ │ │ ├── 123-skipped.txt │ │ │ ├── dan-sentences.txt │ │ │ ├── dontread │ │ │ └── xxx-sentences.txt │ │ │ └── eng-sentences.txt │ ├── lemma-01.sample │ ├── letsmt │ │ └── letsmt-with-words.xml │ ├── masc │ │ ├── fakeMASC-ne.xml │ │ ├── fakeMASC-penn.xml │ │ ├── fakeMASC-s.xml │ │ ├── fakeMASC-seg.xml │ │ ├── fakeMASC.hdr │ │ └── fakeMASC.txt │ ├── moses │ │ └── moses-tiny.sample │ ├── muc │ │ ├── LDC2003T13.sgm │ │ └── parsertest1.sgml │ ├── name-data-01.sample │ ├── nkjp │ │ ├── ann_segmentation.xml │ │ └── text_structure.xml │ ├── ontonotes │ │ ├── ontonotes-sample-01.name │ │ └── ontonotes-sample-02.parse │ ├── parse-01.sample │ ├── sentences-01.sample │ ├── tokens-01.sample │ └── word-tags-01.sample │ ├── lang │ ├── abb_DE.xml │ ├── abb_EN.xml │ ├── abb_ES.xml │ ├── abb_FR.xml │ ├── abb_IT.xml │ ├── abb_NL.xml │ ├── abb_PL.xml │ └── abb_PT.xml │ ├── languagemodel │ └── sentences.txt │ ├── lemmatizer │ ├── output.txt │ ├── smalldictionary.dict │ ├── smalldictionarymulti.dict │ ├── trial.old-insufficient.tsv │ └── trial.old.tsv │ ├── ml │ └── maxent │ │ └── football.dat │ ├── namefind │ ├── AnnotatedSentences.txt │ ├── AnnotatedSentencesInsufficient.txt │ ├── AnnotatedSentencesWithTypes.txt │ ├── OnlyWithEntitiesWithTypes.train │ ├── OnlyWithNames.train │ ├── OnlyWithNamesWithTypes.train │ ├── RandomNewsWithGeneratedDates_DE.train │ ├── RandomNewsWithGeneratedDates_EN.train │ ├── html1.train │ ├── ner-pos-features-v15.xml │ ├── ner-pos-features.xml │ ├── origin-training-data.txt │ ├── voa1.train │ └── voa2.train │ ├── ngram │ ├── ngram-model-no-count.xml │ ├── ngram-model-not-a-number.xml │ └── ngram-model.xml │ ├── parser │ ├── en_head_rules │ ├── parser.train │ └── test.parse │ ├── postag │ ├── AnnotatedSentences.txt │ ├── AnnotatedSentencesInsufficient.txt │ ├── TagDictionaryCaseInsensitive.xml │ ├── TagDictionaryCaseSensitive.xml │ └── TagDictionaryWithoutCaseAttribute.xml │ ├── sentdetect │ ├── Sentences.txt │ ├── SentencesInsufficient.txt │ ├── Sentences_DE.txt │ ├── Sentences_ES.txt │ ├── Sentences_FR.txt │ ├── Sentences_IT.txt │ ├── Sentences_NL.txt │ ├── Sentences_PL.txt │ ├── Sentences_PT.txt │ ├── Test-Sample_OPENNLP-1163.txt │ └── origin-training-data.txt │ ├── tokenize │ ├── latin-detokenizer.xml │ ├── token-insufficient.train │ └── token.train │ └── util │ ├── featuregen │ ├── DictionaryTest.xml │ ├── FeatureGeneratorConfigWithUnkownElement.xml │ ├── TestAutomaticallyInsertAggregatedFeatureGenerator.xml │ ├── TestAutomaticallyInsertAggregatedFeatureGeneratorCache.xml │ ├── TestDictionarySerializerMappingExtraction.xml │ ├── TestFeatureGeneratorConfig.xml │ ├── TestInsertCachedFeatureGenerator.xml │ ├── TestNotAutomaticallyInsertAggregatedFeatureGenerator.xml │ ├── TestNotAutomaticallyInsertAggregatedFeatureGeneratorCache.xml │ ├── TestParametersConfig.xml │ └── TestTokenClassFeatureGeneratorConfig.xml │ ├── index.html │ └── wordvector │ ├── glove-example-broken-dimensions.txt │ ├── glove-example-empty.txt │ └── glove-example-short.txt ├── opennlp-uima ├── createPear.xml ├── descriptors │ ├── Chunker.xml │ ├── DateNameFinder.xml │ ├── LanguageDetector.xml │ ├── LocationNameFinder.xml │ ├── MoneyNameFinder.xml │ ├── OpenNlpTextAnalyzer.xml │ ├── OrganizationNameFinder.xml │ ├── Parser.xml │ ├── PercentageNameFinder.xml │ ├── PersonNameFinder.xml │ ├── PosTagger.xml │ ├── SentenceDetector.xml │ ├── SimpleTokenizer.xml │ ├── TimeNameFinder.xml │ ├── Tokenizer.xml │ └── TypeSystem.xml ├── metadata │ └── install.xml ├── pom.xml └── src │ ├── main │ ├── java │ │ └── opennlp │ │ │ └── uima │ │ │ ├── chunker │ │ │ ├── Chunker.java │ │ │ ├── ChunkerModelResource.java │ │ │ ├── ChunkerModelResourceImpl.java │ │ │ └── package.html │ │ │ ├── dictionary │ │ │ ├── DictionaryResource.java │ │ │ └── DictionaryResourceImpl.java │ │ │ ├── doccat │ │ │ ├── AbstractDocumentCategorizer.java │ │ │ ├── DoccatModelResource.java │ │ │ ├── DoccatModelResourceImpl.java │ │ │ ├── DocumentCategorizer.java │ │ │ └── LanguageDetector.java │ │ │ ├── namefind │ │ │ ├── AbstractNameFinder.java │ │ │ ├── DictionaryNameFinder.java │ │ │ ├── NameFinder.java │ │ │ ├── TokenNameFinderModelResource.java │ │ │ ├── TokenNameFinderModelResourceImpl.java │ │ │ └── package.html │ │ │ ├── normalizer │ │ │ ├── Normalizer.java │ │ │ ├── NumberUtil.java │ │ │ └── StringDictionary.java │ │ │ ├── parser │ │ │ ├── Parser.java │ │ │ ├── ParserModelResource.java │ │ │ └── ParserModelResourceImpl.java │ │ │ ├── postag │ │ │ ├── POSModelResource.java │ │ │ ├── POSModelResourceImpl.java │ │ │ ├── POSTagger.java │ │ │ └── package.html │ │ │ ├── sentdetect │ │ │ ├── AbstractSentenceDetector.java │ │ │ ├── SentenceDetector.java │ │ │ ├── SentenceModelResource.java │ │ │ ├── SentenceModelResourceImpl.java │ │ │ └── package.html │ │ │ ├── tokenize │ │ │ ├── AbstractTokenizer.java │ │ │ ├── SimpleTokenizer.java │ │ │ ├── Tokenizer.java │ │ │ ├── TokenizerModelResource.java │ │ │ ├── TokenizerModelResourceImpl.java │ │ │ ├── WhitespaceTokenizer.java │ │ │ └── package.html │ │ │ └── util │ │ │ ├── AbstractModelResource.java │ │ │ ├── AnnotationComboIterator.java │ │ │ ├── AnnotationComparator.java │ │ │ ├── AnnotationIteratorPair.java │ │ │ ├── AnnotatorUtil.java │ │ │ ├── ContainingConstraint.java │ │ │ ├── ExceptionMessages.java │ │ │ ├── OpenNlpAnnotatorProcessException.java │ │ │ ├── OpennlpUtil.java │ │ │ └── UimaUtil.java │ └── resources │ │ └── opennlp │ │ └── uima │ │ └── util │ │ └── ExceptionMessages_en.properties │ └── test │ ├── java │ └── opennlp │ │ └── uima │ │ ├── AbstractIT.java │ │ ├── AbstractTest.java │ │ ├── AbstractUimaTest.java │ │ ├── FullAnnotatorsFlowIT.java │ │ ├── SingleAnnotatorIT.java │ │ ├── dictionary │ │ └── DictionaryResourceTest.java │ │ ├── normalizer │ │ ├── NumberUtilTest.java │ │ └── StringDictionaryTest.java │ │ └── util │ │ ├── AnnotationComboIteratorTest.java │ │ ├── AnnotationComparatorTest.java │ │ ├── AnnotatorUtilTest.java │ │ ├── CasUtil.java │ │ ├── OpennlpUtilTest.java │ │ └── UimaUtilTest.java │ └── resources │ ├── cas │ ├── OPENNLP-676.xmi │ └── dictionary-test.xmi │ ├── dictionary.dic │ ├── simplelogger.properties │ ├── test-descriptors │ ├── Chunker.xml │ ├── DateNameFinder.xml │ ├── DictionaryNameFinder.xml │ ├── LocationNameFinder.xml │ ├── MoneyNameFinder.xml │ ├── OpenNlpTextAnalyzer.xml │ ├── OrganizationNameFinder.xml │ ├── Parser.xml │ ├── PercentageNameFinder.xml │ ├── PersonNameFinder.xml │ ├── PosTagger.xml │ ├── SentenceDetector.xml │ ├── SimpleTokenizer.xml │ ├── TimeNameFinder.xml │ ├── Tokenizer.xml │ ├── TypeSystem.xml │ └── WhitespaceTokenizer.xml │ ├── training-params-invalid.conf │ └── training-params-test.conf ├── pom.xml ├── rat-excludes └── src └── license ├── NOTICE.template └── THIRD-PARTY.properties /.gitattributes: -------------------------------------------------------------------------------- 1 | # Handle line endings automatically for files detected as text 2 | # and leave all files detected as binary untouched. 3 | * text=auto 4 | 5 | # 6 | # The above will handle all files NOT found below 7 | # 8 | # These files are text and should be normalized (Convert crlf => lf) 9 | *.adoc text eol=lf 10 | *.html text eol=lf 11 | *.java text eol=lf 12 | *.jspf text eol=lf 13 | *.md text eol=lf 14 | *.properties text eol=lf 15 | *.sh text eol=lf 16 | *.txt text eol=lf 17 | *.xml text eol=lf 18 | *.xsd text eol=lf 19 | *.xsl text eol=lf 20 | *.yml text eol=lf 21 | 22 | LICENSE text eol=lf 23 | NOTICE text eol=lf 24 | 25 | # These files are binary and should be left untouched 26 | # (binary is a macro for -text -diff) 27 | *.class binary 28 | *.dll binary 29 | *.ear binary 30 | *.gif binary 31 | *.ico binary 32 | *.jar binary 33 | *.jpg binary 34 | *.jpeg binary 35 | *.png binary 36 | *.ser binary 37 | *.so binary 38 | *.war binary 39 | *.zip binary 40 | *.exe binary 41 | *.gz binary 42 | 43 | #Windows 44 | *.bat text eol=crlf 45 | *.cmd text eol=crlf 46 | 47 | #Unix/Linux 48 | *.sh text eol=lf -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to contribute to Apache OpenNLP 2 | 3 | Thank you for your intention to contribute to the Apache OpenNLP project. As an open-source community, we highly appreciate external contributions to our project. 4 | 5 | To make the process smooth for the project *committers* (those who review and accept changes) and *contributors* (those who propose new changes via pull requests), there are a few rules to follow. 6 | 7 | ## Contribution Guidelines 8 | 9 | Please check out the [How to get involved](http://opennlp.apache.org/get-involved.html) to understand how contributions are made. 10 | A detailed list of coding standards can be found at [Apache OpenNLP Code Conventions](http://opennlp.apache.org/code-conventions.html) which also contains a list of coding guidelines that you should follow. 11 | For pull requests, there is a [check list](PULL_REQUEST_TEMPLATE.md) with criteria for acceptable contributions. 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.iml 2 | .idea 3 | target 4 | .classpath 5 | .project 6 | .settings 7 | nbactions.xml 8 | nb-configuration.xml 9 | *.DS_Store 10 | .checkstyle 11 | *.onnx 12 | vocab.txt 13 | -------------------------------------------------------------------------------- /.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | wrapperVersion=3.3.2 18 | distributionType=only-script 19 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.6/apache-maven-3.9.6-bin.zip 20 | -------------------------------------------------------------------------------- /opennlp-distr/README_FOOTER.html: -------------------------------------------------------------------------------- 1 |

How to Report Issues

2 |

3 | The Apache OpenNLP project uses JIRA for issue tracking. Please report any 4 | issues you find at 5 | http://issues.apache.org/jira/browse/opennlp 6 |

7 | 8 |

List of JIRA Issues Fixed in this Release

9 |

10 | Click issuesFixed/jira-report.html for the list of 11 | issues fixed in this release. 12 |

13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /opennlp-distr/src/main/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM openjdk:17-alpine 2 | MAINTAINER Apache OpenNLP (dev@opennlp.apache.org) 3 | 4 | ARG OPENNLP_BINARY 5 | 6 | ENV OPENNLP_BASE_DIR /opt/opennlp 7 | 8 | ADD $OPENNLP_BINARY $OPENNLP_BASE_DIR 9 | 10 | CMD ["sh"] 11 | -------------------------------------------------------------------------------- /opennlp-distr/src/main/readme/NOTICE: -------------------------------------------------------------------------------- 1 | Apache OpenNLP 2 | Copyright 2017-2024 The Apache Software Foundation 3 | 4 | This product includes software developed at 5 | The Apache Software Foundation (http://www.apache.org/). 6 | 7 | The snowball stemmers in 8 | opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball 9 | were developed by Martin Porter and Richard Boulton. 10 | The full snowball package is available from 11 | http://snowball.tartarus.org/ 12 | -------------------------------------------------------------------------------- /opennlp-dl-gpu/README.md: -------------------------------------------------------------------------------- 1 | # OpenNLP DL (GPU) 2 | 3 | This module brings in `onnxruntime_gpu` bindings to the existing `opennlp-dl` module. If you are planning to run with GPU acceleration, please use this BOM. 4 | 5 | You can use it in your code by adding the following as a dependency: 6 | 7 | ```xml 8 | 9 | org.apache.opennlp 10 | opennlp-dl-gpu 11 | ${opennlp.version} 12 | 13 | ``` -------------------------------------------------------------------------------- /opennlp-dl/src/main/java/opennlp/dl/SpanEnd.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.dl; 19 | 20 | public record SpanEnd(int index, int characterEnd) { 21 | 22 | @Override 23 | public String toString() { 24 | return "index: " + index + "; character end: " + characterEnd; 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /opennlp-dl/src/main/java/opennlp/dl/Tokens.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.dl; 19 | 20 | /** 21 | * Holds the tokens for input to an ONNX model. 22 | * 23 | * @param tokens The tokens themselves. 24 | * @param ids The token IDs as retrieved from the vocabulary. 25 | * @param mask The token mask. (Typically all 1.) 26 | * @param types The token types. (Typically all 1.) 27 | */ 28 | public record Tokens(String[] tokens, long[] ids, long[] mask, long[] types) { 29 | 30 | } 31 | -------------------------------------------------------------------------------- /opennlp-docs/src/docbkx/images/brat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/opennlp/05f69a426f80fb029d85a5d4301408e7199c4811/opennlp-docs/src/docbkx/images/brat.png -------------------------------------------------------------------------------- /opennlp-docs/src/docbkx/images/opennlp-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/opennlp/05f69a426f80fb029d85a5d4301408e7199c4811/opennlp-docs/src/docbkx/images/opennlp-logo.png -------------------------------------------------------------------------------- /opennlp-docs/src/docbkx/images/parsetree1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/opennlp/05f69a426f80fb029d85a5d4301408e7199c4811/opennlp-docs/src/docbkx/images/parsetree1.png -------------------------------------------------------------------------------- /opennlp-morfologik-addon/bin/morfologik-addon: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | mvn -e -q exec:java "-Dexec.mainClass=opennlp.morfologik.cmdline.CLI" "-Dexec.args=$*" 21 | -------------------------------------------------------------------------------- /opennlp-morfologik-addon/bin/morfologik-addon.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM # Licensed to the Apache Software Foundation (ASF) under one 4 | REM # or more contributor license agreements. See the NOTICE file 5 | REM # distributed with this work for additional information 6 | REM # regarding copyright ownership. The ASF licenses this file 7 | REM # to you under the Apache License, Version 2.0 (the 8 | REM # "License"); you may not use this file except in compliance 9 | REM # with the License. You may obtain a copy of the License at 10 | REM # 11 | REM # http://www.apache.org/licenses/LICENSE-2.0 12 | REM # 13 | REM # Unless required by applicable law or agreed to in writing, 14 | REM # software distributed under the License is distributed on an 15 | REM # 16 | REM # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | REM # KIND, either express or implied. See the License for the 18 | REM # specific language governing permissions and limitations 19 | REM # under the License. 20 | 21 | mvn -e -q exec:java "-Dexec.mainClass=opennlp.morfologik.cmdline.CLI" "-Dexec.args=%*" 22 | -------------------------------------------------------------------------------- /opennlp-morfologik-addon/src/test/resources/AnnotatedSentences.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/opennlp/05f69a426f80fb029d85a5d4301408e7199c4811/opennlp-morfologik-addon/src/test/resources/AnnotatedSentences.txt -------------------------------------------------------------------------------- /opennlp-morfologik-addon/src/test/resources/dictionaryWithLemma.dict: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/opennlp/05f69a426f80fb029d85a5d4301408e7199c4811/opennlp-morfologik-addon/src/test/resources/dictionaryWithLemma.dict -------------------------------------------------------------------------------- /opennlp-morfologik-addon/src/test/resources/dictionaryWithLemma.info: -------------------------------------------------------------------------------- 1 | # 2 | # REQUIRED PROPERTIES 3 | # 4 | 5 | # Column (lemma, inflected, tag) separator. This must be a single byte in the target encoding. 6 | fsa.dict.separator=, 7 | 8 | # The charset in which the input is encoded. UTF-8 is strongly recommended. 9 | fsa.dict.encoding=UTF-8 10 | 11 | # The type of lemma-inflected form encoding compression that precedes automaton 12 | # construction. Allowed values: [suffix, infix, prefix, none]. 13 | # Details are in Daciuk's paper and in the code. 14 | # Leave at 'prefix' if not sure. 15 | fsa.dict.encoder=prefix -------------------------------------------------------------------------------- /opennlp-morfologik-addon/src/test/resources/dictionaryWithLemma.txt: -------------------------------------------------------------------------------- 1 | carro,carro,NOUN 2 | casa,casa,NOUN 3 | Casa,Casa,PROP 4 | casa,casinha,NOUN 5 | casa,casona,NOUN 6 | casar,casa,V 7 | casar,casar,V-INF 8 | ir,foi,V 9 | menino,menina,NOUN 10 | menino,menininho,NOUN 11 | menino,menino,NOUN 12 | menino,meninão,NOUN 13 | ser,foi,V 14 | -------------------------------------------------------------------------------- /opennlp-tools-models/src/main/java/opennlp/tools/models/ClassPathModelEntry.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package opennlp.tools.models; 18 | 19 | import java.net.URI; 20 | import java.util.Optional; 21 | 22 | /** 23 | * Encapsulates a classpath entry that is associated with a {@link URI model URI} 24 | * and optional {@code properties}. 25 | * 26 | * @param model A valid {@link URI} associated with the model's location. 27 | * @param properties Optional properties to use. 28 | */ 29 | public record ClassPathModelEntry(URI model, Optional properties) { 30 | 31 | } 32 | -------------------------------------------------------------------------------- /opennlp-tools/bin/opennlp: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | mvn -e -q exec:java "-Dexec.mainClass=opennlp.tools.cmdline.CLI" "-Dexec.args=$*" 21 | -------------------------------------------------------------------------------- /opennlp-tools/bin/opennlp.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM # Licensed to the Apache Software Foundation (ASF) under one 4 | REM # or more contributor license agreements. See the NOTICE file 5 | REM # distributed with this work for additional information 6 | REM # regarding copyright ownership. The ASF licenses this file 7 | REM # to you under the Apache License, Version 2.0 (the 8 | REM # "License"); you may not use this file except in compliance 9 | REM # with the License. You may obtain a copy of the License at 10 | REM # 11 | REM # http://www.apache.org/licenses/LICENSE-2.0 12 | REM # 13 | REM # Unless required by applicable law or agreed to in writing, 14 | REM # software distributed under the License is distributed on an 15 | REM # 16 | REM # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | REM # KIND, either express or implied. See the License for the 18 | REM # specific language governing permissions and limitations 19 | REM # under the License. 20 | 21 | mvn -e -q exec:java "-Dexec.mainClass=opennlp.tools.cmdline.CLI" "-Dexec.args=%*" 22 | -------------------------------------------------------------------------------- /opennlp-tools/lang/en/namefinder/en-namefinder.xml: -------------------------------------------------------------------------------- 1 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /opennlp-tools/lang/en/parser/en-head_rules: -------------------------------------------------------------------------------- 1 | 20 ADJP 0 NNS QP NN $ ADVP JJ VBN VBG ADJP JJR NP JJS DT FW RBR RBS SBAR RB 2 | 15 ADVP 1 RB RBR RBS FW ADVP TO CD JJR JJ IN NP JJS NN 3 | 5 CONJP 1 CC RB IN 4 | 2 FRAG 1 5 | 2 INTJ 0 6 | 4 LST 1 LS : 7 | 19 NAC 0 NN NNS NNP NNPS NP NAC EX $ CD QP PRP VBG JJ JJS JJR ADJP FW 8 | 8 PP 1 IN TO VBG VBN RP FW 9 | 2 PRN 1 10 | 3 PRT 1 RP 11 | 14 QP 0 $ IN NNS NN JJ RB DT CD NCD QP JJR JJS 12 | 7 RRC 1 VP NP ADVP ADJP PP 13 | 10 S 0 TO IN VP S SBAR ADJP UCP NP 14 | 13 SBAR 0 WHNP WHPP WHADVP WHADJP IN DT S SQ SINV SBAR FRAG 15 | 7 SBARQ 0 SQ S SINV SBARQ FRAG 16 | 12 SINV 0 VBZ VBD VBP VB MD VP S SINV ADJP NP 17 | 9 SQ 0 VBZ VBD VBP VB MD VP SQ 18 | 2 UCP 1 19 | 15 VP 1 TO VBD VBN MD VBZ VB VBG VBP VP ADJP NN NNS NP 20 | 6 WHADJP 0 CC WRB JJ ADJP 21 | 4 WHADVP 1 CC WRB 22 | 8 WHNP 0 WDT WP WP$ WHADJP WHPP WHNP 23 | 5 WHPP 1 IN TO FW 24 | 2 X 1 25 | -------------------------------------------------------------------------------- /opennlp-tools/lang/ml/MaxentTrainerParams.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Sample machine learning properties file 17 | 18 | Algorithm=MAXENT 19 | Iterations=100 20 | Cutoff=5 21 | -------------------------------------------------------------------------------- /opennlp-tools/lang/ml/NaiveBayesTrainerParams.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Sample machine learning properties file 17 | 18 | Algorithm=NAIVEBAYES 19 | Cutoff=5 20 | -------------------------------------------------------------------------------- /opennlp-tools/lang/ml/PerceptronSequenceTrainerParams.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Sample machine learning properties file 17 | 18 | Algorithm=PERCEPTRON_SEQUENCE 19 | Iterations=300 20 | Cutoff=0 21 | -------------------------------------------------------------------------------- /opennlp-tools/lang/ml/PerceptronTrainerParams.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Sample machine learning properties file 17 | 18 | Algorithm=PERCEPTRON 19 | Iterations=300 20 | Cutoff=0 21 | -------------------------------------------------------------------------------- /opennlp-tools/src/jmh/java/opennlp/tools/util/jvm/BenchmarkRunner.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package opennlp.tools.util.jvm; 18 | 19 | public class BenchmarkRunner { 20 | 21 | public static void main(String[] args) throws Exception { 22 | org.openjdk.jmh.Main.main(args); 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluationMonitor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.chunker; 19 | 20 | import opennlp.tools.util.eval.EvaluationMonitor; 21 | 22 | /** 23 | * A marker interface for evaluating {@link Chunker chunkers}. 24 | */ 25 | public interface ChunkerEvaluationMonitor extends EvaluationMonitor { 26 | 27 | } 28 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/chunker/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package related to finding non-recursive syntactic annotation such as noun phrase chunks. 20 | */ 21 | package opennlp.tools.chunker; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Classes giving access to the opennlp.tools.lemmatizer functionalities. 20 | */ 21 | package opennlp.tools.cmdline.lemmatizer; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/cmdline/params/BasicFormatParams.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.cmdline.params; 19 | 20 | import java.io.File; 21 | 22 | import opennlp.tools.cmdline.ArgumentParser.ParameterDescription; 23 | 24 | /** 25 | * Common format parameters. 26 | */ 27 | public interface BasicFormatParams extends EncodingParameter { 28 | 29 | @ParameterDescription(valueName = "sampleData", description = "data to be used, usually a file name.") 30 | File getData(); 31 | } 32 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/cmdline/params/DetokenizerParameter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.cmdline.params; 19 | 20 | import opennlp.tools.cmdline.ArgumentParser.ParameterDescription; 21 | 22 | public interface DetokenizerParameter { 23 | @ParameterDescription(valueName = "dictionary", 24 | description = "specifies the file with detokenizer dictionary.") 25 | String getDetokenizer(); 26 | } 27 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/cmdline/params/LanguageParams.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.cmdline.params; 19 | 20 | import opennlp.tools.cmdline.ArgumentParser.ParameterDescription; 21 | 22 | public interface LanguageParams { 23 | 24 | @ParameterDescription(valueName = "language", description = "language which is being processed.") 25 | String getLang(); 26 | 27 | } 28 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/commons/Sample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.commons; 19 | 20 | import java.io.Serializable; 21 | 22 | /** 23 | * Represents a generic type of processable elements. 24 | */ 25 | public interface Sample extends Serializable { 26 | } 27 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/commons/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package related to common interfaces used in different contexts. 20 | */ 21 | package opennlp.tools.commons; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/dictionary/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package related to parsing and storing dictionaries. 20 | */ 21 | package opennlp.tools.dictionary; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/Entry.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package opennlp.tools.dictionary.serializer; 20 | 21 | import opennlp.tools.util.StringList; 22 | 23 | /** 24 | * An {@link Entry} is a {@link StringList} which can 25 | * optionally be mapped to attributes. 26 | *

27 | * {@link Entry entries} are read and written by the {@link DictionaryEntryPersistor}. 28 | * 29 | * @see DictionaryEntryPersistor 30 | * @see Attributes 31 | */ 32 | public record Entry(StringList tokens, Attributes attributes) { 33 | 34 | } 35 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/EntryInserter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package opennlp.tools.dictionary.serializer; 20 | 21 | import opennlp.tools.util.InvalidFormatException; 22 | 23 | public interface EntryInserter { 24 | 25 | /** 26 | * @param entry The {@link Entry} to insert. 27 | * 28 | * @throws InvalidFormatException Thrown if the {@code entry} has an invalid format. 29 | */ 30 | void insert(Entry entry) throws InvalidFormatException; 31 | } 32 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatEvaluationMonitor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.doccat; 19 | 20 | import opennlp.tools.util.eval.EvaluationMonitor; 21 | 22 | /** 23 | * A marker interface for evaluating {@link DocumentCategorizer doccat}. 24 | */ 25 | public interface DoccatEvaluationMonitor extends 26 | EvaluationMonitor { 27 | 28 | } 29 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/doccat/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package for classifying a document into a category. 20 | */ 21 | package opennlp.tools.doccat; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/entitylinker/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package related to linking entities to external data sources. 20 | */ 21 | package opennlp.tools.entitylinker; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/formats/LanguageSampleStreamFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.formats; 19 | 20 | /** 21 | * Stream factory for those streams which carry language. 22 | */ 23 | public abstract class LanguageSampleStreamFactory extends AbstractSampleStreamFactory { 24 | 25 | protected String language; 26 | 27 | protected LanguageSampleStreamFactory(Class

params) { 28 | super(params); 29 | } 30 | 31 | @Override 32 | public String getLang() { 33 | return language; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/formats/ad/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Experimental package related to the {@code Arvores Deitadas corpus} format. 20 | */ 21 | package opennlp.tools.formats.ad; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/formats/brat/AnnotatorNoteAnnotation.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.formats.brat; 19 | 20 | 21 | public class AnnotatorNoteAnnotation extends BratAnnotation { 22 | 23 | private final String attachedId; 24 | 25 | protected AnnotatorNoteAnnotation(String id, String attachedId, String note) { 26 | super(id, "#AnnotationNote"); 27 | this.attachedId = attachedId; 28 | this.setNote(note); 29 | } 30 | 31 | public String getAttachedId() { 32 | return attachedId; 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/formats/brat/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Experimental package related to the corpus format used by the "brat rapid annotation tool" (brat). 20 | */ 21 | package opennlp.tools.formats.brat; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluTagset.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.formats.conllu; 19 | 20 | public enum ConlluTagset { 21 | U, 22 | X 23 | } 24 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/formats/conllu/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Experimental package related to the CoNNL-U format. 20 | */ 21 | package opennlp.tools.formats.conllu; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/formats/frenchtreebank/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Experimental package related to the French Treebank format. 20 | */ 21 | package opennlp.tools.formats.frenchtreebank; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/formats/irishsentencebank/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Experimental package related to the Irish Sentence Bank format. 20 | */ 21 | package opennlp.tools.formats.irishsentencebank; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Experimental package related to the {@code Leipzig} corpus format. 20 | */ 21 | package opennlp.tools.formats.leipzig; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/formats/letsmt/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Experimental package related to the {@code letsmt} corpus format. 20 | */ 21 | package opennlp.tools.formats.letsmt; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/formats/masc/Masc.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.formats.masc; 19 | 20 | /** 21 | * A simple marker interface for classes that support or refer to 22 | * the {@link #MASC_FORMAT}. 23 | */ 24 | public interface Masc { 25 | 26 | String MASC_FORMAT = "masc"; 27 | } 28 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/formats/masc/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Experimental package related to the {@code MASC} corpus format. 20 | */ 21 | package opennlp.tools.formats.masc; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/formats/muc/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Experimental package related to the {@code MUC} corpus format. 20 | */ 21 | package opennlp.tools.formats.muc; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/formats/nkjp/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Experimental package related to the National corpus of Polish {@code NKJP} format. 20 | */ 21 | package opennlp.tools.formats.nkjp; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesFormatParameters.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.formats.ontonotes; 19 | 20 | import opennlp.tools.cmdline.ArgumentParser.ParameterDescription; 21 | 22 | public interface OntoNotesFormatParameters { 23 | @ParameterDescription(valueName = "OntoNotes 4.0 corpus directory") 24 | String getOntoNotesDir(); 25 | } 26 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Experimental package related to the OntoNotes 4.0 format. 20 | */ 21 | package opennlp.tools.formats.ontonotes; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/formats/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Experimental package related to converting various corpora to OpenNLP Format. 20 | */ 21 | package opennlp.tools.formats; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorEvaluationMonitor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.langdetect; 19 | 20 | import opennlp.tools.util.eval.EvaluationMonitor; 21 | 22 | /** 23 | * A marker interface for evaluating {@link LanguageDetector language detectors}. 24 | */ 25 | public interface LanguageDetectorEvaluationMonitor extends EvaluationMonitor { 26 | 27 | } 28 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/langdetect/ProbingLanguageDetectionResult.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.langdetect; 19 | 20 | /** 21 | * A data container encapsulating language detection results. 22 | * 23 | * @param languages The {@link Language languages} detected. 24 | * @param length The length in codepoints of text processed. 25 | */ 26 | public record ProbingLanguageDetectionResult(Language[] languages, int length) { 27 | 28 | } 29 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/langdetect/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package related to predicting languages from samples of text. 20 | */ 21 | package opennlp.tools.langdetect; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/languagemodel/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package related to language models 20 | */ 21 | package opennlp.tools.languagemodel; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerSequenceValidator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.lemmatizer; 19 | 20 | import opennlp.tools.util.SequenceValidator; 21 | 22 | /** 23 | * The default lemmatizer {@link SequenceValidator} implementation. 24 | */ 25 | public class DefaultLemmatizerSequenceValidator implements SequenceValidator { 26 | 27 | //TODO implement this 28 | @Override 29 | public boolean validSequence(int i, String[] sequence, String[] s, String outcome) { 30 | return true; 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerEvaluationMonitor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.lemmatizer; 19 | 20 | import opennlp.tools.util.eval.EvaluationMonitor; 21 | 22 | /** 23 | * A marker interface for evaluating {@link Lemmatizer lemmatizers}. 24 | */ 25 | public interface LemmatizerEvaluationMonitor extends EvaluationMonitor { 26 | 27 | } 28 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/lemmatizer/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package related to the lemmatizer functionality. 20 | */ 21 | package opennlp.tools.lemmatizer; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/log/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package contains a {@link java.io.PrintStream} adapter for internal use only. 20 | */ 21 | package opennlp.tools.log; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/ml/maxent/ContextGenerator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.ml.maxent; 19 | 20 | /** 21 | * Represents a generator of contexts for maxent decisions. 22 | */ 23 | public interface ContextGenerator { 24 | 25 | /** 26 | * Builds up the list of contextual predicates given an {@link T object}. 27 | * 28 | * @param o The {@link T object} used as input. 29 | */ 30 | String[] getContext(T o); 31 | 32 | } 33 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package related to the I/O functionality of the maxent package including reading 20 | * and writing models in several formats. 21 | */ 22 | package opennlp.tools.ml.maxent.io; 23 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | 27 | 28 | 29 | 30 | Provides the I/O functionality of the maxent package including reading 31 | and writing models in several formats. 32 | 33 | 34 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/ml/maxent/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package related to ML by means of the Maximum Entropy (ME) algorithm. 20 | */ 21 | package opennlp.tools.ml.maxent; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/ml/maxent/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | 27 | 28 | 29 | 30 | Provides main functionality of the maxent package including data structures and 31 | algorithms for parameter estimation. 32 | 33 | 34 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package related to ML by means of the Quasi Newton (QN) algorithm. 20 | */ 21 | package opennlp.tools.ml.maxent.quasinewton; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/ml/model/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package related to ML models and feature selection techniques. 20 | */ 21 | package opennlp.tools.ml.model; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package related to ML by means of the Naive Bayes algorithm. 20 | */ 21 | package opennlp.tools.ml.naivebayes; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/ml/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package related to Machine Learning (ML) features of OpenNLP, the related ML models, and trainers. 20 | */ 21 | package opennlp.tools.ml; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package related to ML by means of the perceptron algorithm. 20 | */ 21 | package opennlp.tools.ml.perceptron; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/monitoring/TrainingMeasure.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.monitoring; 19 | 20 | /** 21 | * Enumeration of Training measures. 22 | */ 23 | public enum TrainingMeasure { 24 | ACCURACY("Training Accuracy"), 25 | LOG_LIKELIHOOD("Log Likelihood"); 26 | 27 | private String measureName; 28 | 29 | TrainingMeasure(String measureName) { 30 | this.measureName = measureName; 31 | } 32 | 33 | public String getMeasureName() { 34 | return measureName; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderEvaluationMonitor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.namefind; 19 | 20 | import opennlp.tools.util.eval.EvaluationMonitor; 21 | 22 | /** 23 | * A marker interface for evaluating {@link TokenNameFinder name finders}. 24 | */ 25 | public interface TokenNameFinderEvaluationMonitor extends EvaluationMonitor { 26 | 27 | } 28 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/namefind/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package related to finding proper names and numeric amounts. 20 | */ 21 | package opennlp.tools.namefind; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/ngram/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package related to computing and storing n-gram frequencies. 20 | */ 21 | package opennlp.tools.ngram; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Contains packages which solve common NLP tasks. 20 | */ 21 | package opennlp.tools; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/parser/Cons.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.parser; 19 | 20 | /** 21 | * Holds feature information about a specific {@link Parse} node. 22 | * 23 | * @see Parse 24 | */ 25 | public class Cons { 26 | 27 | final String cons; 28 | final String consbo; 29 | final int index; 30 | final boolean unigram; 31 | 32 | public Cons(String cons, String consbo, int index, boolean unigram) { 33 | this.cons = cons; 34 | this.consbo = consbo; 35 | this.index = index; 36 | this.unigram = unigram; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluationMonitor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.parser; 19 | 20 | import opennlp.tools.util.eval.EvaluationMonitor; 21 | 22 | /** 23 | * A marker interface for evaluating {@link Parser parsers}. 24 | */ 25 | public interface ParserEvaluationMonitor extends EvaluationMonitor { 26 | } 27 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/parser/ParserEventTypeEnum.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package opennlp.tools.parser; 20 | 21 | /** 22 | * Enumeration of event types for a {@link Parser}. 23 | */ 24 | public enum ParserEventTypeEnum { 25 | 26 | BUILD, 27 | CHECK, 28 | 29 | // TODO Add reason why those enum values are deprecated 30 | @Deprecated 31 | CHUNK, 32 | @Deprecated 33 | TAG, 34 | 35 | ATTACH 36 | } 37 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/parser/chunking/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package containing code for performing full syntactic parsing using shift/reduce-style decisions. 20 | */ 21 | package opennlp.tools.parser.chunking; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/parser/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package containing common code for performing full syntactic parsing. 20 | */ 21 | package opennlp.tools.parser; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package containing experimental code for performing full syntactic 20 | * parsing using attachment decisions. 21 | */ 22 | package opennlp.tools.parser.treeinsert; 23 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/postag/POSTagFormat.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package opennlp.tools.postag; 18 | 19 | /** 20 | * Defines the format for part-of-speech tagging, i.e. 21 | * PENN 22 | * or UD format. 23 | */ 24 | public enum POSTagFormat { 25 | 26 | UD, PENN, CUSTOM, UNKNOWN 27 | } 28 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerEvaluationMonitor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.postag; 19 | 20 | import opennlp.tools.util.eval.EvaluationMonitor; 21 | 22 | /** 23 | * A marker interface for evaluating {@link POSTagger pos taggers}. 24 | */ 25 | public interface POSTaggerEvaluationMonitor extends EvaluationMonitor { 26 | 27 | } 28 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/postag/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package related to part-of-speech tagging. 20 | */ 21 | package opennlp.tools.postag; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorEvaluationMonitor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.sentdetect; 19 | 20 | import opennlp.tools.util.eval.EvaluationMonitor; 21 | 22 | public interface SentenceDetectorEvaluationMonitor extends 23 | EvaluationMonitor { 24 | 25 | } 26 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/th/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | Package related to the processing of Thai data. 28 | 29 | 30 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/sentdetect/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package related to identifying sentence boundaries. 20 | */ 21 | package opennlp.tools.sentdetect; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/stemmer/Stemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.stemmer; 19 | 20 | /** 21 | * The stemmer is reducing a word to its stem. 22 | */ 23 | public interface Stemmer { 24 | 25 | CharSequence stem(CharSequence word); 26 | } 27 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenContextGenerator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.tokenize; 19 | 20 | /** 21 | * Interface for context generators required for {@link TokenizerME}. 22 | */ 23 | public interface TokenContextGenerator { 24 | 25 | /** 26 | * @param sentence The string that represents a sentence. 27 | * @param index The index to consider splitting tokens. 28 | * 29 | * @return An array of features for a {@code sentence} at the specified {@code index}. 30 | */ 31 | String[] getContext(String sentence, int index); 32 | } 33 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerEvaluationMonitor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.tokenize; 19 | 20 | import opennlp.tools.util.eval.EvaluationMonitor; 21 | 22 | /** 23 | * A marker interface for evaluating {@link Tokenizer tokenizers}. 24 | */ 25 | public interface TokenizerEvaluationMonitor extends EvaluationMonitor { 26 | 27 | } 28 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/tokenize/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Contains classes related to finding token or words in a string. All 20 | * tokenizer implement the Tokenizer interface. Currently, there is the 21 | * learnable {@code TokenizerME}, the {@code WhitespaceTokenizer} and 22 | * the {@code SimpleTokenizer} which is a character class tokenizer. 23 | */ 24 | package opennlp.tools.tokenize; 25 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/util/InputStreamFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.util; 19 | 20 | import java.io.IOException; 21 | import java.io.InputStream; 22 | 23 | /** 24 | * Allows repeated reads through a stream for certain model building types. 25 | */ 26 | public interface InputStreamFactory { 27 | 28 | /** 29 | * @return A valid, open {@link InputStream} instance. 30 | * @throws IOException Thrown if IO errors occurred. 31 | */ 32 | InputStream createInputStream() throws IOException; 33 | } 34 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/util/ResetableIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package opennlp.tools.util; 20 | 21 | import java.util.Iterator; 22 | 23 | /** 24 | * This interface makes an {@link Iterator} resettable. 25 | */ 26 | public interface ResetableIterator extends Iterator { 27 | 28 | /** 29 | * Sets the {@link Iterator} back to the first retrieved element. 30 | * The already processed sequence of elements must be repeated. 31 | */ 32 | void reset(); 33 | } 34 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/util/eval/EvaluationMonitor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.util.eval; 19 | 20 | public interface EvaluationMonitor { 21 | 22 | void correctlyClassified(T reference, T prediction); 23 | 24 | void misclassified(T reference, T prediction); 25 | 26 | } 27 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionNotLoadedException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.util.ext; 19 | 20 | /** 21 | * Exception indicates that an OpenNLP extension could not be loaded. 22 | */ 23 | @SuppressWarnings("serial") 24 | public class ExtensionNotLoadedException extends RuntimeException { 25 | 26 | public ExtensionNotLoadedException(String message) { 27 | super(message); 28 | } 29 | 30 | public ExtensionNotLoadedException(Throwable t) { 31 | super(t); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionServiceKeys.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.util.ext; 19 | 20 | public final class ExtensionServiceKeys { 21 | 22 | /** 23 | * Property key for the unique {@code id} which identifies an 24 | * OpenNLP extension service. 25 | */ 26 | public static final String ID = "OPENLP_EXTENSION_ID"; 27 | } 28 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/util/ext/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package containing extension loading code. 20 | */ 21 | package opennlp.tools.util.ext; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/util/featuregen/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * This package contains classes for generating sequence features. 20 | */ 21 | package opennlp.tools.util.featuregen; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/util/java/Experimental.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.util.java; 19 | 20 | import java.lang.annotation.Documented; 21 | 22 | /** 23 | * Indicates that a certain API feature is not stable 24 | * and might change with a new release. 25 | */ 26 | @Documented 27 | public @interface Experimental { 28 | } 29 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/util/jvm/StringInterner.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.util.jvm; 19 | 20 | /** 21 | * A marker-interface for a String interner implementation. 22 | */ 23 | public interface StringInterner { 24 | 25 | /** 26 | * Interns and returns a reference to the representative instance 27 | * for any collection of string instances that are equal to each other. 28 | * 29 | * @param sample string instance to be interned 30 | * @return reference to the interned string instance 31 | */ 32 | String intern(String sample); 33 | } 34 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/util/model/ModelType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.util.model; 19 | 20 | /** 21 | * Enumeration of supported model types. 22 | */ 23 | public enum ModelType { 24 | MAXENT, 25 | PERCEPTRON, 26 | PERCEPTRON_SEQUENCE 27 | } 28 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/util/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Package containing utility data structures and algorithms used by multiple other packages. 20 | */ 21 | package opennlp.tools.util; 22 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/java/opennlp/tools/util/wordvector/WordVectorType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.util.wordvector; 19 | 20 | import opennlp.tools.util.java.Experimental; 21 | 22 | @Experimental 23 | public enum WordVectorType { 24 | FLOAT, 25 | DOUBLE 26 | } 27 | -------------------------------------------------------------------------------- /opennlp-tools/src/main/resources/opennlp/tools/util/opennlp.version: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreemnets. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Version is injected by the maven build, fall back version is 0.0.0-SNAPSHOT 17 | OpenNLP-Version: ${project.version} -------------------------------------------------------------------------------- /opennlp-tools/src/test/java/opennlp/tools/cmdline/TerminateToolExceptionTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.cmdline; 19 | 20 | import org.junit.jupiter.api.Assertions; 21 | import org.junit.jupiter.api.Test; 22 | 23 | /** 24 | * Tests for the {@link TerminateToolException} class. 25 | */ 26 | public class TerminateToolExceptionTest { 27 | 28 | @Test 29 | void testCreation() { 30 | TerminateToolException e = new TerminateToolException(-500); 31 | Assertions.assertEquals(-500, e.getCode()); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/java/opennlp/tools/formats/AbstractSampleStreamTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.formats; 19 | 20 | import opennlp.tools.util.InputStreamFactory; 21 | 22 | abstract class AbstractSampleStreamTest extends AbstractFormatTest { 23 | 24 | protected InputStreamFactory getFactory(String resource) { 25 | return new ResourceAsStreamFactory(AbstractSampleStreamTest.class, FORMATS_BASE_DIR + resource); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/java/opennlp/tools/util/featuregen/IdentityFeatureGenerator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.util.featuregen; 19 | 20 | import java.util.List; 21 | 22 | public class IdentityFeatureGenerator implements AdaptiveFeatureGenerator { 23 | 24 | public void createFeatures(List features, String[] tokens, int index, 25 | String[] previousOutcomes) { 26 | features.add(tokens[index]); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/java/opennlp/tools/util/normalizer/NumberCharSequenceNormalizerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package opennlp.tools.util.normalizer; 18 | 19 | import org.junit.jupiter.api.Assertions; 20 | import org.junit.jupiter.api.Test; 21 | 22 | public class NumberCharSequenceNormalizerTest { 23 | 24 | @Test 25 | void normalize() { 26 | Assertions.assertEquals("absc , abcd", 27 | NumberCharSequenceNormalizer.getInstance().normalize("absc 123,0123 abcd")); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/java/opennlp/tools/util/wordvector/AbstractWordVectorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.tools.util.wordvector; 19 | 20 | import java.io.InputStream; 21 | 22 | import opennlp.tools.formats.AbstractFormatTest; 23 | 24 | public class AbstractWordVectorTest { 25 | 26 | protected static final String FORMATS_BASE_DIR = "/opennlp/tools/util/wordvector/"; 27 | 28 | protected InputStream getResourceStream(String resource) { 29 | return AbstractFormatTest.class.getResourceAsStream(FORMATS_BASE_DIR + resource); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/data/opennlp/maxent/io/rvfes-bug-data-broken.txt: -------------------------------------------------------------------------------- 1 | C goodbye=1.0 2 | C goodbye 3 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/data/opennlp/maxent/io/rvfes-bug-data-ok.txt: -------------------------------------------------------------------------------- 1 | C goodbye 2 | C goodbye=1.0 3 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt: -------------------------------------------------------------------------------- 1 | A feature1=4.0 feature3=10.0 feature4=2.0 2 | A feature1=2.0 feature2=4.0 feature4=3.0 3 | A feature2=5.0 feature3=12.0 feature4=4.0 4 | A feature1=1.0 feature3=11.0 feature4=3.0 5 | A feature1=4.0 feature2=5.0 feature4=2.0 6 | A feature1=3.0 feature2=4.0 feature3=9.0 7 | A feature2=3.0 feature3=11.0 feature4=2.0 8 | A feature1=1.0 feature3=12.0 9 | A feature2=6.0 feature3=12.0 feature4=3.0 10 | A feature1=3.0 feature2=7.0 feature3=11.0 11 | B feature5=4.0 feature2=1.0 feature4=10.0 12 | B feature2=1.0 feature3=11.0 13 | B feature5=3.0 feature4=12.0 14 | B feature2=1.0 feature3=11.0 15 | B feature5=4.0 feature4=10.0 16 | B feature2=1.0 feature3=9.0 17 | B feature5=2.0 feature4=11.0 18 | B feature2=1.0 feature3=12.0 19 | B feature5=4.0 feature4=12.0 20 | B feature2=1.0 feature3=11.0 feature4=4.0 21 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/data/ppa/NOTICE: -------------------------------------------------------------------------------- 1 | This folder contains Prepositional Phrase Attachment Dataset 2 | from Ratnaparkhi, Reynar, & Roukos, 3 | "A Maximum Entropy Model for Prepositional Phrase Attachment". ARPA HLT 1994. 4 | 5 | The data is licensed under the AL 2.0. Please cite the above paper when the 6 | data is redistributed. -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/chunker/chunker170custom.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/opennlp/05f69a426f80fb029d85a5d4301408e7199c4811/opennlp-tools/src/test/resources/opennlp/tools/chunker/chunker170custom.bin -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/chunker/chunker170default.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/opennlp/05f69a426f80fb029d85a5d4301408e7199c4811/opennlp-tools/src/test/resources/opennlp/tools/chunker/chunker170default.bin -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/chunker/chunker180custom.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/opennlp/05f69a426f80fb029d85a5d4301408e7199c4811/opennlp-tools/src/test/resources/opennlp/tools/chunker/chunker180custom.bin -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/chunker/detailedOutput.txt: -------------------------------------------------------------------------------- 1 | Evaluated 3 samples with 32 entities; found: 35 entities; correct: 28. 2 | TOTAL: precision: 80.00%; recall: 87.50%; F1: 83.58%. 3 | NP: precision: 90.00%; recall: 94.74%; F1: 92.31%. [target: 19; tp: 18; fp: 2] 4 | VP: precision: 75.00%; recall: 75.00%; F1: 75.00%. [target: 8; tp: 6; fp: 2] 5 | PP: precision: 57.14%; recall: 100.00%; F1: 72.73%. [target: 4; tp: 4; fp: 3] 6 | SBAR: precision: 0.00%; recall: 0.00%; F1: 0.00%. [target: 1; tp: 0; fp: 0] 7 | 8 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/chunker/test-insufficient.txt: -------------------------------------------------------------------------------- 1 | Rockwell NNP B-NP -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/cmdline/languagemodel/origin_of_text_samples.txt: -------------------------------------------------------------------------------- 1 | | Test data file name | Source | 2 | | --------------------- | ----------------------------------------------------------------- | 3 | | sentences_set_1.txt | https://emerj.com/what-is-machine-learning/ | 4 | | sentences_set_2.txt | https://www.nasa.gov/history/50-years-ago-apollo-11-preparations/ | 5 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/cmdline/languagemodel/sentences_set_1.txt: -------------------------------------------------------------------------------- 1 | There are different approaches to getting machines to learn 2 | These include using basic decision trees to clustering to layers of artificial neural networks 3 | It depends upon what task you are trying to accomplish and the type and amount of data that you have available 4 | This dynamic sees itself played out in applications as varying as medical diagnostics or self-driving cars 5 | One of the most common mistakes among machine learning beginners is testing training data successfully and having the illusion of success 6 | Domingo and others emphasize the importance of keeping some of the data set separate when testing models 7 | And only using that reserved data to test a chosen model followed by learning on the whole data set 8 | When a learning algorithm is not working often the quicker path to success is to feed the machine more data 9 | The availability of which is by now well-known as a primary driver of progress in machine and deep learning algorithms in recent years 10 | However, this can lead to issues with scalability in which we have more data but time to learn that data remains an issue -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/cmdline/languagemodel/sentences_set_2.txt: -------------------------------------------------------------------------------- 1 | In January 1969 only the most optimistic could have predicted that in just six months time humans would be walking on the surface of the Moon 2 | NASA was making preparations for that historic mission 3 | Early in the month NASA announced the crew for Apollo 11 4 | It was the first mission that would attempt a lunar landing 5 | Commander Neil Arm Armstrong and Lunar Module Pilot Edwin Aldrin and Command Module Pilot Michael Collins were the crew members Components of their spacecraft and rocket arrived at the Kennedy Space Center in January and February 6 | At the Manned Spacecraft Center now the Johnson Space Center in Houston facilities were being prepared to receive the first humans to return from the Moon 7 | North American Rockwell of Downey shipped the Apollo 11 spacecraft to Kennedy Space Center on Jan 23 1969 8 | Workers transferred the modules to the Manned Spacecraft Operations Building where they removed them from their shipping containers 9 | On January 29 they mated the two modules and installed the assembly in an altitude chamber for testing 10 | Earlier in the month the Grumman Aircraft and Engineering Corporation had delivered the Apollo 11 Lunar Module to Kennedy Space Center 11 | The three stages of the Saturn V rocket arrived in January and February for stacking in the Vehicle Assembly Building -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/bionlp2004-01.sample: -------------------------------------------------------------------------------- 1 | High-dose O 2 | growth O 3 | hormone O 4 | does O 5 | not O 6 | affect O 7 | proinflammatory B-protein 8 | cytokine I-protein 9 | ( O 10 | tumor B-protein 11 | necrosis I-protein 12 | factor-alpha I-protein 13 | , O 14 | interleukin-6 B-protein 15 | , O 16 | and O 17 | interferon-gamma B-protein 18 | ) O 19 | release O 20 | from O 21 | activated O 22 | peripheral B-cell_type 23 | blood I-cell_type 24 | mononuclear I-cell_type 25 | cells I-cell_type 26 | or O 27 | after O 28 | minimal O 29 | to O 30 | moderate O 31 | surgical O 32 | stress O 33 | . O -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/brat/brat-ann.conf: -------------------------------------------------------------------------------- 1 | [entities] 2 | Person 3 | Location 4 | Date 5 | [relations] 6 | Related Arg1:Person, Arg2:Person 7 | Related Arg1:Person, Arg2:Location 8 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/brat/opennlp-1193.ann: -------------------------------------------------------------------------------- 1 | T1 Name 0 7;17 24;8 15 spanA_1 spanA_2 spanA_3 2 | T2 Name 26 33;40 47 spanB_1 spanB_2 3 | T3 Event 34 39 spanC 4 | E1 Event:T3 5 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/brat/opennlp-1193.txt: -------------------------------------------------------------------------------- 1 | spanA_1 spanA_2 2 | spanA_3 3 | 4 | spanB_1 spanC spanB_2 5 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-entities-overlapping.ann: -------------------------------------------------------------------------------- 1 | T1 Person 281 286 Obama 2 | T2 Person 21 33 Barack Obama 3 | T3 Location 51 62 South Korea 4 | T4 Location 151 162 North Korea 5 | T5 Location 231 236 China 6 | T6 Location 243 254 South Korea 7 | T7 Location 322 333 North Korea 8 | T8 Date 257 266 Wednesday 9 | T9 Location 386 397 North Korea 10 | T10 Person 586 591 Obama 11 | T11 Date 843 860 Wednesday evening 12 | T12 Location 889 901 South Korean 13 | T13 Person 913 928 Lee Myung - bak 14 | T14 Date 931 939 Thursday 15 | T15 Location 978 989 South Korea 16 | T16 Location 1000 1013 United States 17 | T17 Person 1121 1126 Obama 18 | T18 Location 1168 1177 Pyongyang 19 | T19 Person 1168 1177 Pyongyang 20 | #1 AnnotatorNotes T2 President Obama was the 44th U.S. president 21 | #2 AnnotatorNotes T3 The capital of South Korea is Seoul -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-entities-overlapping.txt: -------------------------------------------------------------------------------- 1 | U . S . President Barack Obama has arrived in South Korea , where he is expected to show solidarity with the country ' s president in demanding North Korea move toward ending its nuclear weapons programs . 2 | As he departed China for South Korea Wednesday , President Obama took another opportunity to urge North Korea to reach an agreement on its nuclear weapons . 3 | " North Korea has a choice . 4 | It can continue down the path of confrontation and provocation that has led to less security , less prosperity and more isolation from the global community , " President Obama said . 5 | " Or it can choose to become a full member of the international community , which will give a better life to its people by living up to international obligations and foregoing nuclear weapons . " 6 | The president landed at a U . S . air base Wednesday evening , and is to hold talks with South Korean President Lee Myung - bak Thursday here in the South Korean capital . 7 | South Korea and the United States are trying to coax the North back to six - nation talks aimed at ending its nuclear weapons . 8 | President Obama has indicated he will send an envoy to Pyongyang before the end of the year for one - on - one discussions , but only in the context of restarting the multinational process . 9 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-entities.ann: -------------------------------------------------------------------------------- 1 | T1 Person 281 286 Obama 2 | T2 Person 21 33 Barack Obama 3 | T3 Location 51 62 South Korea 4 | T4 Location 151 162 North Korea 5 | T5 Location 231 236 China 6 | T6 Location 243 254 South Korea 7 | T7 Location 322 333 North Korea 8 | T8 Date 257 266 Wednesday 9 | T9 Location 386 397 North Korea 10 | T10 Person 586 591 Obama 11 | T11 Date 843 860 Wednesday evening 12 | T12 Location 889 901 South Korean 13 | T13 Person 913 928 Lee Myung - bak 14 | T14 Date 931 939 Thursday 15 | T15 Location 978 989 South Korea 16 | T16 Location 1000 1013 United States 17 | T17 Person 1121 1126 Obama 18 | T18 Location 1168 1177 Pyongyang 19 | #1 AnnotatorNotes T2 President Obama was the 44th U.S. president 20 | #2 AnnotatorNotes T3 The capital of South Korea is Seoul -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-entities.txt: -------------------------------------------------------------------------------- 1 | U . S . President Barack Obama has arrived in South Korea , where he is expected to show solidarity with the country ' s president in demanding North Korea move toward ending its nuclear weapons programs . 2 | As he departed China for South Korea Wednesday , President Obama took another opportunity to urge North Korea to reach an agreement on its nuclear weapons . 3 | " North Korea has a choice . 4 | It can continue down the path of confrontation and provocation that has led to less security , less prosperity and more isolation from the global community , " President Obama said . 5 | " Or it can choose to become a full member of the international community , which will give a better life to its people by living up to international obligations and foregoing nuclear weapons . " 6 | The president landed at a U . S . air base Wednesday evening , and is to hold talks with South Korean President Lee Myung - bak Thursday here in the South Korean capital . 7 | South Korea and the United States are trying to coax the North back to six - nation talks aimed at ending its nuclear weapons . 8 | President Obama has indicated he will send an envoy to Pyongyang before the end of the year for one - on - one discussions , but only in the context of restarting the multinational process . 9 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-relations.ann: -------------------------------------------------------------------------------- 1 | T1 Person 281 286 Obama 2 | T2 Person 21 33 Barack Obama 3 | T3 Location 51 62 South Korea 4 | T4 Location 151 162 North Korea 5 | T5 Location 231 236 China 6 | T6 Location 243 254 South Korea 7 | T7 Location 322 333 North Korea 8 | T8 Date 257 266 Wednesday 9 | T9 Location 386 397 North Korea 10 | T10 Person 586 591 Obama 11 | T11 Date 843 860 Wednesday evening 12 | T12 Location 889 901 South Korean 13 | T13 Person 913 928 Lee Myung - bak 14 | T14 Date 931 939 Thursday 15 | T15 Location 978 989 South Korea 16 | T16 Location 1000 1013 United States 17 | T17 Person 1121 1126 Obama 18 | T18 Location 1168 1177 Pyongyang 19 | R1 Related Arg1:T2 Arg2:T3 20 | R2 Related Arg1:T1 Arg2:T7 21 | R3 Related Arg1:T13 Arg2:T12 22 | R4 Related Arg1:T17 Arg2:T18 23 | R5 Related Arg1:T2 Arg2:T4 24 | R6 Related Arg1:T2 Arg2:T5 25 | R7 Related Arg1:T2 Arg2:T6 26 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-relations.txt: -------------------------------------------------------------------------------- 1 | U . S . President Barack Obama has arrived in South Korea , where he is expected to show solidarity with the country ' s president in demanding North Korea move toward ending its nuclear weapons programs . 2 | As he departed China for South Korea Wednesday , President Obama took another opportunity to urge North Korea to reach an agreement on its nuclear weapons . 3 | " North Korea has a choice . 4 | It can continue down the path of confrontation and provocation that has led to less security , less prosperity and more isolation from the global community , " President Obama said . 5 | " Or it can choose to become a full member of the international community , which will give a better life to its people by living up to international obligations and foregoing nuclear weapons . " 6 | The president landed at a U . S . air base Wednesday evening , and is to hold talks with South Korean President Lee Myung - bak Thursday here in the South Korean capital . 7 | South Korea and the United States are trying to coax the North back to six - nation talks aimed at ending its nuclear weapons . 8 | President Obama has indicated he will send an envoy to Pyongyang before the end of the year for one - on - one discussions , but only in the context of restarting the multinational process . 9 | 10 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/census90.sample: -------------------------------------------------------------------------------- 1 | SMITH 1.006 1.006 1 2 | JOHNSON 0.810 1.816 2 3 | WILLIAMS 0.699 2.515 3 4 | JONES 0.621 3.136 4 5 | BROWN 0.621 3.757 5 6 | MARY 2.629 2.629 1 7 | PATRICIA 1.073 3.702 2 8 | LINDA 1.035 4.736 3 9 | BARBARA 0.980 5.716 4 10 | ELIZABETH 0.937 6.653 5 11 | JAMES 3.318 3.318 1 12 | JOHN 3.271 6.589 2 13 | ROBERT 3.143 9.732 3 14 | MICHAEL 2.629 12.361 4 15 | WILLIAM 2.451 14.812 5 16 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/chunker-01.sample: -------------------------------------------------------------------------------- 1 | He PRP B-NP 2 | reckons VBZ B-VP 3 | the DT B-NP 4 | current JJ I-NP 5 | account NN I-NP 6 | deficit NN I-NP 7 | will MD B-VP 8 | narrow VB I-VP 9 | to TO B-PP 10 | only RB B-NP 11 | # # I-NP 12 | 1.8 CD I-NP 13 | billion CD I-NP 14 | in IN B-PP 15 | September NNP B-NP 16 | . . O -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/conll2002-es.sample: -------------------------------------------------------------------------------- 1 | Abogado NC B-PER 2 | General AQ I-PER 3 | del SP I-PER 4 | Estado NC I-PER 5 | . Fp O 6 | 7 | Melbourne NP B-LOC 8 | ( Fpa O 9 | Australia NP B-LOC 10 | ) Fpt O 11 | , Fc O 12 | 25 Z O 13 | may NC O 14 | ( Fpa O 15 | EFE NC B-ORG 16 | ) Fpt O 17 | . Fp O -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/conll2002-nl.sample: -------------------------------------------------------------------------------- 1 | -DOCSTART- -DOCSTART- O 2 | De Art O 3 | tekst N O 4 | van Prep O 5 | het Art O 6 | arrest N O 7 | is V O 8 | nog Adv O 9 | niet Adv O 10 | schriftelijk Adj O 11 | beschikbaar Adj O 12 | maar Conj O 13 | het Art O 14 | bericht N O 15 | werd V O 16 | alvast Adv O 17 | bekendgemaakt V O 18 | door Prep O 19 | een Art O 20 | communicatiebureau N O 21 | dat Conj O 22 | Floralux N B-ORG 23 | inhuurde V O 24 | . Punc O 25 | 26 | In Prep O 27 | '81 Num O 28 | regulariseert V O 29 | de Art O 30 | toenmalige Adj O 31 | Vlaamse Adj B-MISC 32 | regering N O 33 | de Art O 34 | toestand N O 35 | met Prep O 36 | een Art O 37 | BPA N B-MISC 38 | dat Pron O 39 | het Art O 40 | bedrijf N O 41 | op Prep O 42 | eigen Pron O 43 | kosten N O 44 | heeft V O 45 | laten V O 46 | opstellen V O 47 | . Punc O -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/conll2003-de.sample: -------------------------------------------------------------------------------- 1 | -DOCSTART- -X- -X- -X- O 2 | 3 | Ereignis Ereignis NN I-NC O 4 | und und KON O O 5 | Erzählung Erzählung NN I-NC O 6 | oder oder KON I-NC O 7 | : : $. O O 8 | 9 | Albrecht Albrecht NE I-NC I-PER 10 | Lehmann Lehmann NE I-NC I-PER 11 | versucht versuchen VVFIN I-VC O 12 | in in APPR I-PC O 13 | seinem sein PPOSAT I-NC O 14 | Buch Buch NN I-NC I-MISC 15 | Im im APPRART I-PC I-MISC 16 | Fremden Fremde NN I-NC I-MISC 17 | ungewollt ungewollt ADJD O O 18 | zuhaus ADV O O 19 | 20 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/conll2003-en.sample: -------------------------------------------------------------------------------- 1 | -DOCSTART- -X- O O 2 | 3 | EU NNP I-NP I-ORG 4 | rejects VBZ I-VP O 5 | German JJ I-NP I-MISC 6 | call NN I-NP O 7 | to TO I-VP O 8 | boycott VB I-VP O 9 | British JJ I-NP I-MISC 10 | lamb NN I-NP O 11 | . . O O 12 | 13 | Peter NNP I-NP I-PER 14 | Blackburn NNP I-NP I-PER 15 | 16 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/evalita-ner-it-01.sample: -------------------------------------------------------------------------------- 1 | A E adige20041007_id413942 O 2 | parlare VF adige20041007_id413942 O 3 | di E adige20041007_id413942 O 4 | questi DP adige20041007_id413942 O 5 | problemi SP adige20041007_id413942 O 6 | sar VI adige20041007_id413942 O 7 | il RS adige20041007_id413942 O 8 | neonatologo SS adige20041007_id413942 O 9 | Dino SPN adige20041007_id413942 B-PER 10 | Pedrotti SPN adige20041007_id413942 I-PER 11 | . XPS adige20041007_id413942 O 12 | 13 | Sono VIY adige20041008_id414214 O 14 | assicurate VPP adige20041008_id414214 O 15 | a E adige20041008_id414214 O 16 | tutta DS adige20041008_id414214 O 17 | la RS adige20041008_id414214 O 18 | popolazione SS adige20041008_id414214 O 19 | a E adige20041008_id414214 O 20 | titolo SS adige20041008_id414214 O 21 | gratuito AS adige20041008_id414214 O 22 | e C adige20041008_id414214 O 23 | con E adige20041008_id414214 O 24 | accesso SS adige20041008_id414214 O 25 | diretto AS adige20041008_id414214 O 26 | . XPS adige20041008_id414214 O 27 | 28 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/evalita-ner-it-02.sample: -------------------------------------------------------------------------------- 1 | -DOCSTART- 2 | 3 | A E adige20041008_id414157 O 4 | circa B adige20041008_id414157 O 5 | 90 N adige20041008_id414157 O 6 | chilometri SP adige20041008_id414157 O 7 | dall' ES adige20041008_id414157 O 8 | arrivo SS adige20041008_id414157 O 9 | , XPW adige20041008_id414157 O 10 | il RS adige20041008_id414157 O 11 | capitano SS adige20041008_id414157 O 12 | della ES adige20041008_id414157 O 13 | Gerolsteiner SPN adige20041008_id414157 B-ORG 14 | Davide SPN adige20041008_id414157 B-PER 15 | Rebellin SPN adige20041008_id414157 I-PER 16 | ha VIY adige20041008_id414157 O 17 | allungato VSP adige20041008_id414157 O 18 | su E adige20041008_id414157 O 19 | uno RS adige20041008_id414157 O 20 | dei EP adige20041008_id414157 O 21 | pochi DP adige20041008_id414157 O 22 | tratti SP adige20041008_id414157 O 23 | in E adige20041008_id414157 O 24 | salita SS adige20041008_id414157 O 25 | , XPW adige20041008_id414157 O 26 | frazionando VG adige20041008_id414157 O 27 | il RS adige20041008_id414157 O 28 | gruppo SS adige20041008_id414157 O 29 | . XPS adige20041008_id414157 O 30 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/evalita-ner-it-03.sample: -------------------------------------------------------------------------------- 1 | -DOCSTART- 2 | 3 | Alla ES adige20041008_id414157 O 4 | sua DS adige20041008_id414157 O 5 | ruota SS adige20041008_id414157 O 6 | si PN adige20041008_id414157 O 7 | sono VIY adige20041008_id414157 O 8 | portati VPP adige20041008_id414157 O 9 | altri DP adige20041008_id414157 O 10 | sei N adige20041008_id414157 O 11 | corridori SP adige20041008_id414157 O 12 | che CCHE adige20041008_id414157 O 13 | hanno VIY adige20041008_id414157 O 14 | poi B adige20041008_id414157 O 15 | disputato VSP adige20041008_id414157 O 16 | lo RS adige20041008_id414157 O 17 | sprint SN adige20041008_id414157 O 18 | sul ES adige20041008_id414157 O 19 | traguardo SS adige20041008_id414157 O 20 | di E adige20041008_id414157 O 21 | Bourges SPN adige20041008_id414157 B-GPE 22 | . XPS adige20041008_id414157 O 23 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/evalita-ner-it-broken.sample: -------------------------------------------------------------------------------- 1 | -DOCSTART- 2 | xyz 3 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/evalita-ner-it-incorrect.sample: -------------------------------------------------------------------------------- 1 | -DOCSTART- 2 | 3 | Alla ES adige20041008_id414157 -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/irishsentencebank/irishsentencebank-sample.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | A Dhia, ag iompar clainne! 4 | Oh my God, I'm pregnant! 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | Gabh mo leithscéal, an ón chlochaois thú? 15 | Excuse me, are you from the stone age? 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/lang-detect-01.sample: -------------------------------------------------------------------------------- 1 | en This is just a test. 2 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/leipzig-en.sample: -------------------------------------------------------------------------------- 1 | 1 A rebel statement sent to Lisbon from Jamba said 86 government soldiers and 13 guerrillas were killed in the fighting that ended Jan. 3. It said the rebel forces sill held Mavinga. 2 | 2 Authorities last week issued a vacate order for a club in Manhattan and closed another in the Bronx. 3 | 3 At the first Pan Am bankruptcy hearing, for example, at least five airlines were represented. 4 | 4 Mr. Neigum, poker-faced during the difficult task, manages a 46-second showing. 5 | 5 This, combined with the container division talks, suggests the group's bankers might be considering an orderly disposal of all assets. 6 | 6 She told the Post in an interview published Sunday that some of the money may have become "mingled" into improvements on her home that included a swimming pool, a $2,500 wide-screen television and renovations to her basement. 7 | 7 According to a study by the Marshall Institute, the average NASA employee's age in 1963 was 30; now most of its senior and middle-managers will be eligible to retire in five years. 8 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/leipzig/samples/.hidden: -------------------------------------------------------------------------------- 1 | Nothing in here -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/leipzig/samples/123-skipped.txt: -------------------------------------------------------------------------------- 1 | skip this file -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/leipzig/samples/dan-sentences.txt: -------------------------------------------------------------------------------- 1 | 1 Der var engang en mand. 2 | 2 Der boede i en spand. 3 | 3 Spanden var af ler. -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/leipzig/samples/dontread/xxx-sentences.txt: -------------------------------------------------------------------------------- 1 | 1 This sentence should not be read. 2 | 2 The same goes for this sentence. 3 | 3 If we got this far then something went wrong! -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/leipzig/samples/eng-sentences.txt: -------------------------------------------------------------------------------- 1 | 1 This is a sentence. 2 | 2 This is another sentences. -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/lemma-01.sample: -------------------------------------------------------------------------------- 1 | suns NOUN sun -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/masc/fakeMASC-ne.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/masc/fakeMASC-s.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/masc/fakeMASC-seg.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/masc/fakeMASC.txt: -------------------------------------------------------------------------------- 1 | This is a test Sentence. This is 'nother test sentence. -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/moses/moses-tiny.sample: -------------------------------------------------------------------------------- 1 | je|PRO vous|PRO achète|VB un|ART aardvark|NN 2 | je|PRO vous|PRO achète|VB un|ART chat|NN 3 | je|PRO vous|PRO achète|VB un|ART grand|ADJ chat|NN blanc|ADJ 4 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/muc/parsertest1.sgml: -------------------------------------------------------------------------------- 1 | 19 | para1

para2

para2 -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/name-data-01.sample: -------------------------------------------------------------------------------- 1 | This is a test from Germany . -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/nkjp/ann_segmentation.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 |

7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 |

17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/nkjp/text_structure.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | Krótki tekst 12 | 13 | 14 | 15 |
16 |

To krótki tekst w formacie NKJP. Zawiera dwa zdania.

17 | To krótkie zdanie w drugim akapicie. 18 |
19 | 20 |
21 |
22 |
23 |
24 |
25 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/ontonotes/ontonotes-sample-02.parse: -------------------------------------------------------------------------------- 1 | ((S (S-ADV (NP-SBJ (-NONE- *PRO*)) 2 | (VP (VBG Judging) 3 | (PP-CLR (IN from) 4 | (NP (NP (DT the) (NNS Americana)) 5 | (PP-LOC (IN in) 6 | (NP (NP (NNP Haruki) (NNP Murakami) (POS 's)) 7 | (`` ``) 8 | (NX-TTL (NP (DT A) (NNP Wild) (NNP Sheep) (NNP Chase))) 9 | ('' '') 10 | (NP (-LRB- -LRB-) 11 | (NP (NNP Kodansha)) 12 | (, ,) 13 | (NP (CD 320) (NNS pages)) 14 | (, ,) 15 | (NP ($ $) 16 | (CD 18.95) 17 | (-NONE- *U*)) 18 | (-RRB- -RRB-)))))))) 19 | (, ,) 20 | (NP-SBJ (NP (NN baby) (NNS boomers)) 21 | (PP-LOC (IN on) 22 | (NP (NP (DT both) (NNS sides)) 23 | (PP (IN of) 24 | (NP (DT the) (NNP Pacific)))))) 25 | (VP (VBP have) 26 | (NP (NP (DT a) (NN lot)) 27 | (PP (IN in) 28 | (NP (NN common))))) 29 | (. .))) -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/parse-01.sample: -------------------------------------------------------------------------------- 1 | (TOP (S (NP-SBJ (DT The) (NN test) )(VP (MD shall) (VP (VB come) (NP-TMP (NN today) )))(. .) )) -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/sentences-01.sample: -------------------------------------------------------------------------------- 1 | This is a test. 2 | Is it a test? -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/tokens-01.sample: -------------------------------------------------------------------------------- 1 | token1 token2 token3<SPLIT>token4 -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/formats/word-tags-01.sample: -------------------------------------------------------------------------------- 1 | The_DT day_NN has_VBZ just_RB started_VBN ._. -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/lang/abb_DE.xml: -------------------------------------------------------------------------------- 1 |  2 | 3 | 21 | 22 | 23 | 24 | S. 25 | 26 | 27 | f. 28 | 29 | 30 | ff. 31 | 32 | 33 | z. B. 34 | 35 | 36 | z.B. 37 | 38 | 39 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/lang/abb_EN.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | tel. 25 | 26 | 27 | Mr. 28 | 29 | 30 | Ms. 31 | 32 | 33 | Mrs. 34 | 35 | 36 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/lang/abb_FR.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | c.-à-d. 25 | 26 | 27 | cf. 28 | 29 | 30 | p. 31 | 32 | 33 | p.ex. 34 | 35 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/lang/abb_IT.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | art. 25 | 26 | 27 | ca. 28 | 29 | 30 | p. 31 | 32 | 33 | S. 34 | 35 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/lemmatizer/smalldictionary.dict: -------------------------------------------------------------------------------- 1 | barking VBG bark 2 | dogs NNS dog 3 | running VBG run 4 | down RP down 5 | street NN street -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/lemmatizer/smalldictionarymulti.dict: -------------------------------------------------------------------------------- 1 | barking VBG bark#bark 2 | dogs NNS dog 3 | running VBG run#run 4 | down RP down 5 | street NN street -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/lemmatizer/trial.old-insufficient.tsv: -------------------------------------------------------------------------------- 1 | The DT the -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/namefind/AnnotatedSentences.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/opennlp/05f69a426f80fb029d85a5d4301408e7199c4811/opennlp-tools/src/test/resources/opennlp/tools/namefind/AnnotatedSentences.txt -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/namefind/AnnotatedSentencesInsufficient.txt: -------------------------------------------------------------------------------- 1 | Last September, I tried to find out the address of an old school friend whom I hadnt't seen for 15 years. 2 | I just knew his name , Alan McKennedy , and I'd heard the rumour that he'd moved to Scotland, the country of his ancestors. 3 | So I called Julie , a friend who's still in contact with him. 4 | She told me that he lived in 23213 Edinburgh, Worcesterstreet 12. 5 | I wrote him a letter right away and he answered soon, sounding very happy and delighted. -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/namefind/OnlyWithEntitiesWithTypes.train: -------------------------------------------------------------------------------- 1 | NATO 2 | United States 3 | NATO Parliamentary Assembly 4 | Edinburgh 5 | Britain 6 | Anders Fogh Rasmussen 7 | U . S . 8 | Barack Obama 9 | Afghanistan 10 | Rasmussen 11 | Afghanistan 12 | 2010 -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/namefind/OnlyWithNames.train: -------------------------------------------------------------------------------- 1 | Neil Abercrombie 2 | Anibal Acevedo-Vila 3 | Gary Ackerman 4 | Robert Aderholt 5 | Daniel Akaka 6 | Todd Akin 7 | Lamar Alexander 8 | Rodney Alexander -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/namefind/OnlyWithNamesWithTypes.train: -------------------------------------------------------------------------------- 1 | Neil Abercrombie 2 | Anibal Acevedo-Vila 3 | Gary Ackerman 4 | Robert Aderholt 5 | Daniel Akaka 6 | Todd Akin 7 | Lamar Alexander 8 | Rodney Alexander -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/namefind/html1.train: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
    5 |
  • Advanced Integrated Pest Management
  • 6 |
  • Bay Cities Produce Co., Inc.
  • 7 |
8 | 9 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/namefind/origin-training-data.txt: -------------------------------------------------------------------------------- 1 | ################ 2 | RandomNewsWithGeneratedDates_DE.train: 3 | - Randomly generated by: 4 | opennlp.tools.cmdline.namefind.generator.RandomGermanNewsGenerator 5 | - Topics: 6 | Important, famous books and movies, recent sport events, music highlights, and news distributed across the last 100 years 7 | ################ 8 | RandomNewsWithGeneratedDates_EN.train: 9 | - Randomly generated by: 10 | opennlp.tools.cmdline.namefind.generator.RandomEnglishNewsGenerator 11 | - Topics: 12 | Important, famous books and movies, recent sport events, music highlights, and news distributed across the last 100 years 13 | ################ -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/ngram/ngram-model-no-count.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | brown 25 | fox 26 | 27 | 28 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/ngram/ngram-model-not-a-number.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | brown 25 | fox 26 | 27 | 28 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/parser/en_head_rules: -------------------------------------------------------------------------------- 1 | 20 ADJP 0 NNS QP NN $ ADVP JJ VBN VBG ADJP JJR NP JJS DT FW RBR RBS SBAR RB 2 | 15 ADVP 1 RB RBR RBS FW ADVP TO CD JJR JJ IN NP JJS NN 3 | 5 CONJP 1 CC RB IN 4 | 2 FRAG 1 5 | 2 INTJ 0 6 | 4 LST 1 LS : 7 | 19 NAC 0 NN NNS NNP NNPS NP NAC EX $ CD QP PRP VBG JJ JJS JJR ADJP FW 8 | 8 PP 1 IN TO VBG VBN RP FW 9 | 2 PRN 1 10 | 3 PRT 1 RP 11 | 14 QP 0 $ IN NNS NN JJ RB DT CD NCD QP JJR JJS 12 | 7 RRC 1 VP NP ADVP ADJP PP 13 | 10 S 0 TO IN VP S SBAR ADJP UCP NP 14 | 13 SBAR 0 WHNP WHPP WHADVP WHADJP IN DT S SQ SINV SBAR FRAG 15 | 7 SBARQ 0 SQ S SINV SBARQ FRAG 16 | 12 SINV 0 VBZ VBD VBP VB MD VP S SINV ADJP NP 17 | 9 SQ 0 VBZ VBD VBP VB MD VP SQ 18 | 2 UCP 1 19 | 15 VP 1 TO VBD VBN MD VBZ VB VBG VBP VP ADJP NN NNS NP 20 | 6 WHADJP 0 CC WRB JJ ADJP 21 | 4 WHADVP 1 CC WRB 22 | 8 WHNP 0 WDT WP WP$ WHADJP WHPP WHNP 23 | 5 WHPP 1 IN TO FW 24 | 2 X 1 25 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/postag/AnnotatedSentences.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/opennlp/05f69a426f80fb029d85a5d4301408e7199c4811/opennlp-tools/src/test/resources/opennlp/tools/postag/AnnotatedSentences.txt -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/postag/AnnotatedSentencesInsufficient.txt: -------------------------------------------------------------------------------- 1 | Find_VB out_RP. -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/postag/TagDictionaryCaseInsensitive.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | McKinsey 25 | 26 | 27 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/postag/TagDictionaryCaseSensitive.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | McKinsey 25 | 26 | 27 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/postag/TagDictionaryWithoutCaseAttribute.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | McKinsey 25 | 26 | 27 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/sentdetect/Sentences.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/opennlp/05f69a426f80fb029d85a5d4301408e7199c4811/opennlp-tools/src/test/resources/opennlp/tools/sentdetect/Sentences.txt -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/sentdetect/SentencesInsufficient.txt: -------------------------------------------------------------------------------- 1 | Last September, I tried to find out the address of an old school friend whom I hadnt't seen for 15 years. 2 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/tokenize/token-insufficient.train: -------------------------------------------------------------------------------- 1 | I tried to find out the address of an old school . -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/DictionaryTest.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | S. 25 | 26 | 27 | z. B. 28 | 29 | 30 | z.B. 31 | 32 | 33 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGenerator.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorCache.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestDictionarySerializerMappingExtraction.xml: -------------------------------------------------------------------------------- 1 | 19 | 20 | 21 | 22 | opennlp/tools/util/featuregen/DictionaryTest.xml 23 | 24 | 25 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestFeatureGeneratorConfig.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestInsertCachedFeatureGenerator.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGenerator.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorCache.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestTokenClassFeatureGeneratorConfig.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | 25 | true 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/util/wordvector/glove-example-broken-dimensions.txt: -------------------------------------------------------------------------------- 1 | the 0.418 0.24968 -0.41242 2 | of 0.70853 -------------------------------------------------------------------------------- /opennlp-tools/src/test/resources/opennlp/tools/util/wordvector/glove-example-empty.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/opennlp/05f69a426f80fb029d85a5d4301408e7199c4811/opennlp-tools/src/test/resources/opennlp/tools/util/wordvector/glove-example-empty.txt -------------------------------------------------------------------------------- /opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerModelResource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.uima.chunker; 19 | 20 | import opennlp.tools.chunker.ChunkerModel; 21 | 22 | public interface ChunkerModelResource { 23 | 24 | ChunkerModel getModel(); 25 | 26 | } 27 | -------------------------------------------------------------------------------- /opennlp-uima/src/main/java/opennlp/uima/chunker/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | 27 | 28 | 29 | 30 | Package related to finding non-recursive syntactic annotation such as noun phrase chunks. 31 | 32 | 33 | -------------------------------------------------------------------------------- /opennlp-uima/src/main/java/opennlp/uima/dictionary/DictionaryResource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.uima.dictionary; 19 | 20 | import opennlp.tools.dictionary.Dictionary; 21 | 22 | public interface DictionaryResource { 23 | Dictionary getDictionary(); 24 | } 25 | -------------------------------------------------------------------------------- /opennlp-uima/src/main/java/opennlp/uima/doccat/DoccatModelResource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.uima.doccat; 19 | 20 | import opennlp.tools.doccat.DoccatModel; 21 | 22 | public interface DoccatModelResource { 23 | DoccatModel getModel(); 24 | } 25 | -------------------------------------------------------------------------------- /opennlp-uima/src/main/java/opennlp/uima/namefind/TokenNameFinderModelResource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.uima.namefind; 19 | 20 | import opennlp.tools.namefind.TokenNameFinderModel; 21 | 22 | public interface TokenNameFinderModelResource { 23 | TokenNameFinderModel getModel(); 24 | } 25 | -------------------------------------------------------------------------------- /opennlp-uima/src/main/java/opennlp/uima/namefind/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | 27 | 28 | 29 | 30 | Package related to finding proper names and numeric amounts. 31 | 32 | 33 | -------------------------------------------------------------------------------- /opennlp-uima/src/main/java/opennlp/uima/parser/ParserModelResource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.uima.parser; 19 | 20 | import opennlp.tools.parser.ParserModel; 21 | 22 | public interface ParserModelResource { 23 | ParserModel getModel(); 24 | } 25 | -------------------------------------------------------------------------------- /opennlp-uima/src/main/java/opennlp/uima/postag/POSModelResource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.uima.postag; 19 | 20 | import opennlp.tools.postag.POSModel; 21 | 22 | public interface POSModelResource { 23 | POSModel getModel(); 24 | } 25 | -------------------------------------------------------------------------------- /opennlp-uima/src/main/java/opennlp/uima/postag/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | 27 | 28 | 29 | 30 | Package related to part-of-speech tagging. 31 | 32 | 33 | -------------------------------------------------------------------------------- /opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package opennlp.uima.sentdetect; 19 | 20 | import opennlp.tools.sentdetect.SentenceModel; 21 | 22 | public interface SentenceModelResource { 23 | 24 | SentenceModel getModel(); 25 | } 26 | -------------------------------------------------------------------------------- /opennlp-uima/src/main/java/opennlp/uima/sentdetect/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | 27 | 28 | 29 | 30 | Package related to identifying sentence boundaries. 31 | 32 | 33 | -------------------------------------------------------------------------------- /opennlp-uima/src/main/java/opennlp/uima/tokenize/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | 27 | 28 | 29 | 30 | Package related to finding tokens or word segments. 31 | 32 | 33 | -------------------------------------------------------------------------------- /opennlp-uima/src/test/resources/simplelogger.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed 4 | # with this work for additional information regarding copyright 5 | # ownership. The ASF licenses this file to you under the Apache 6 | # License, Version 2.0 (the "License"); you may not use this file 7 | # except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | org.slf4j.simpleLogger.defaultLogLevel=warn 20 | -------------------------------------------------------------------------------- /opennlp-uima/src/test/resources/training-params-invalid.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed 4 | # with this work for additional information regarding copyright 5 | # ownership. The ASF licenses this file to you under the Apache 6 | # License, Version 2.0 (the "License"); you may not use this file 7 | # except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | Algorithm=XYZ 20 | Iterations=100 21 | Cutoff=5 22 | Threads=1 -------------------------------------------------------------------------------- /opennlp-uima/src/test/resources/training-params-test.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed 4 | # with this work for additional information regarding copyright 5 | # ownership. The ASF licenses this file to you under the Apache 6 | # License, Version 2.0 (the "License"); you may not use this file 7 | # except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | Algorithm=MAXENT 20 | Iterations=150 21 | Cutoff=5 22 | Threads=4 -------------------------------------------------------------------------------- /src/license/THIRD-PARTY.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # --------------------------------------------------------------------------------