├── .github ├── FUNDING.yml └── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── custom.md │ ├── feature_request.md │ └── new_feature.md ├── .gitignore ├── CHANGELOG.md ├── CHANGELOG.md.meta ├── LICENSE ├── Pictures~ ├── MultinomialNaiveBayesClassifierExample.png ├── NamedEntityRecognitionExample.png ├── POSTaggerExample.png ├── SentenceSplitterExample.png └── TokenizerExample.png ├── README.md ├── README.md.meta ├── Runtime.meta ├── Runtime ├── Converter.cs ├── Converter.cs.meta ├── SharpEntropy.meta ├── SharpEntropy │ ├── AbstractDataIndexer.cs │ ├── AbstractDataIndexer.cs.meta │ ├── BasicContextGenerator.cs │ ├── BasicContextGenerator.cs.meta │ ├── BasicEventReader.cs │ ├── BasicEventReader.cs.meta │ ├── ComparableEvent.cs │ ├── ComparableEvent.cs.meta │ ├── GisModel.cs │ ├── GisModel.cs.meta │ ├── GisTrainer.cs │ ├── GisTrainer.cs.meta │ ├── IContextGenerator.cs │ ├── IContextGenerator.cs.meta │ ├── IMaximumEntropyModel.cs │ ├── IMaximumEntropyModel.cs.meta │ ├── IO.meta │ ├── IO │ │ ├── BinaryGisModelReader.cs │ │ ├── BinaryGisModelReader.cs.meta │ │ ├── BinaryGisModelWriter.cs │ │ ├── BinaryGisModelWriter.cs.meta │ │ ├── GisModelReader.cs │ │ ├── GisModelReader.cs.meta │ │ ├── GisModelWriter.cs │ │ ├── GisModelWriter.cs.meta │ │ ├── IGisModelReader.cs │ │ ├── IGisModelReader.cs.meta │ │ ├── JavaBinaryGisModelReader.cs │ │ ├── JavaBinaryGisModelReader.cs.meta │ │ ├── JavaBinaryGisModelWriter.cs │ │ ├── JavaBinaryGisModelWriter.cs.meta │ │ ├── PlainTextGisModelReader.cs │ │ ├── PlainTextGisModelReader.cs.meta │ │ ├── PlainTextGisModelWriter.cs │ │ └── PlainTextGisModelWriter.cs.meta │ ├── ITrainingDataIndexer.cs │ ├── ITrainingDataIndexer.cs.meta │ ├── ITrainingDataReader.cs │ ├── ITrainingDataReader.cs.meta │ ├── ITrainingEventReader.cs │ ├── ITrainingEventReader.cs.meta │ ├── OnePassDataIndexer.cs │ ├── OnePassDataIndexer.cs.meta │ ├── PatternedPredicate.cs │ ├── PatternedPredicate.cs.meta │ ├── PlainTextByLineDataReader.cs │ ├── PlainTextByLineDataReader.cs.meta │ ├── TrainingEvent.cs │ ├── TrainingEvent.cs.meta │ ├── TwoPassDataIndexer.cs │ ├── TwoPassDataIndexer.cs.meta │ ├── VX.NLP.Runtime.SharpEntropy.asmdef │ └── VX.NLP.Runtime.SharpEntropy.asmdef.meta ├── SharpWordNet.meta ├── SharpWordNet │ ├── DataFileEngine.cs │ ├── DataFileEngine.cs.meta │ ├── IndexWord.cs │ ├── IndexWord.cs.meta │ ├── Morph.meta │ ├── Morph │ │ ├── AbstractDelegatingOperation.cs │ │ ├── AbstractDelegatingOperation.cs.meta │ │ ├── DetachSuffixesOperation.cs │ │ ├── DetachSuffixesOperation.cs.meta │ │ ├── IOperation.cs │ │ ├── IOperation.cs.meta │ │ ├── LookupExceptionsOperation.cs │ │ ├── LookupExceptionsOperation.cs.meta │ │ ├── LookupIndexWordOperation.cs │ │ ├── LookupIndexWordOperation.cs.meta │ │ ├── TokenizerOperation.cs │ │ ├── TokenizerOperation.cs.meta │ │ ├── Util.cs │ │ └── Util.cs.meta │ ├── Relation.cs │ ├── Relation.cs.meta │ ├── RelationType.cs │ ├── RelationType.cs.meta │ ├── Synset.cs │ ├── Synset.cs.meta │ ├── Tokenizer.cs │ ├── Tokenizer.cs.meta │ ├── VX.NLP.Runtime.SharpWordNet.asmdef │ ├── VX.NLP.Runtime.SharpWordNet.asmdef.meta │ ├── WordNetEngine.cs │ └── WordNetEngine.cs.meta ├── VX.NLP.Runtime.asmdef ├── VX.NLP.Runtime.asmdef.meta ├── VoxellNLP.meta └── VoxellNLP │ ├── Chunker.meta │ ├── Chunker │ ├── ChunkerEventReader.cs │ ├── ChunkerEventReader.cs.meta │ ├── DefaultChunkerContextGenerator.cs │ ├── DefaultChunkerContextGenerator.cs.meta │ ├── EnglishTreebankChunker.cs │ ├── EnglishTreebankChunker.cs.meta │ ├── IChunker.cs │ ├── IChunker.cs.meta │ ├── IChunkerContextGenerator.cs │ ├── IChunkerContextGenerator.cs.meta │ ├── MaximumEntropyChunker.cs │ └── MaximumEntropyChunker.cs.meta │ ├── Classifier.meta │ ├── Classifier │ ├── ClassifyOptions.cs │ ├── ClassifyOptions.cs.meta │ ├── IClassifier.cs │ ├── IClassifier.cs.meta │ ├── NaiveBayesClassifier.cs │ └── NaiveBayesClassifier.cs.meta │ ├── Coreference.meta │ ├── Coreference │ ├── AbstractLinker.cs │ ├── AbstractLinker.cs.meta │ ├── DefaultLinker.cs │ ├── DefaultLinker.cs.meta │ ├── DiscourseElement.cs │ ├── DiscourseElement.cs.meta │ ├── DiscourseEntity.cs │ ├── DiscourseEntity.cs.meta │ ├── DiscourseModel.cs │ ├── DiscourseModel.cs.meta │ ├── ILinker.cs │ ├── ILinker.cs.meta │ ├── LinkerMode.cs │ ├── LinkerMode.cs.meta │ ├── Mention.meta │ ├── Mention │ │ ├── AbstractMentionFinder.cs │ │ ├── AbstractMentionFinder.cs.meta │ │ ├── AbstractParse.cs │ │ ├── AbstractParse.cs.meta │ │ ├── DefaultParse.cs │ │ ├── DefaultParse.cs.meta │ │ ├── DictionaryFactory.cs │ │ ├── DictionaryFactory.cs.meta │ │ ├── IDictionary.cs │ │ ├── IDictionary.cs.meta │ │ ├── IHeadFinder.cs │ │ ├── IHeadFinder.cs.meta │ │ ├── IMentionFinder.cs │ │ ├── IMentionFinder.cs.meta │ │ ├── IParse.cs │ │ ├── IParse.cs.meta │ │ ├── JWNLDictionary.cs │ │ ├── JWNLDictionary.cs.meta │ │ ├── Mention.cs │ │ ├── Mention.cs.meta │ │ ├── MentionContext.cs │ │ ├── MentionContext.cs.meta │ │ ├── PennTreebankHeadFinder.cs │ │ ├── PennTreebankHeadFinder.cs.meta │ │ ├── PennTreebankMentionFinder.cs │ │ ├── PennTreebankMentionFinder.cs.meta │ │ ├── ShallowParseMentionFinder.cs │ │ ├── ShallowParseMentionFinder.cs.meta │ │ ├── WordnetDictionary.cs │ │ └── WordnetDictionary.cs.meta │ ├── Resolver.meta │ ├── Resolver │ │ ├── AbstractResolver.cs │ │ ├── AbstractResolver.cs.meta │ │ ├── CommonNounResolver.cs │ │ ├── CommonNounResolver.cs.meta │ │ ├── DefaultNonReferentialResolver.cs │ │ ├── DefaultNonReferentialResolver.cs.meta │ │ ├── DefiniteNounResolver.cs │ │ ├── DefiniteNounResolver.cs.meta │ │ ├── FixedNonReferentialResolver.cs │ │ ├── FixedNonReferentialResolver.cs.meta │ │ ├── INonReferentialResolver.cs │ │ ├── INonReferentialResolver.cs.meta │ │ ├── IResolver.cs │ │ ├── IResolver.cs.meta │ │ ├── IsAResolver.cs │ │ ├── IsAResolver.cs.meta │ │ ├── MaximumEntropyResolver.cs │ │ ├── MaximumEntropyResolver.cs.meta │ │ ├── PerfectResolver.cs │ │ ├── PerfectResolver.cs.meta │ │ ├── PluralNounResolver.cs │ │ ├── PluralNounResolver.cs.meta │ │ ├── PluralPronounResolver.cs │ │ ├── PluralPronounResolver.cs.meta │ │ ├── ProperNounResolver.cs │ │ ├── ProperNounResolver.cs.meta │ │ ├── ResolverMode.cs │ │ ├── ResolverMode.cs.meta │ │ ├── SingletonNonReferentialResolver.cs │ │ ├── SingletonNonReferentialResolver.cs.meta │ │ ├── SingularPronounResolver.cs │ │ ├── SingularPronounResolver.cs.meta │ │ ├── SpeechPronounResolver.cs │ │ └── SpeechPronounResolver.cs.meta │ ├── Similarity.meta │ ├── Similarity │ │ ├── Context.cs │ │ ├── Context.cs.meta │ │ ├── Gender.cs │ │ ├── Gender.cs.meta │ │ ├── GenderEnum.cs │ │ ├── GenderEnum.cs.meta │ │ ├── GenderModel.cs │ │ ├── GenderModel.cs.meta │ │ ├── ITestGenderModel.cs │ │ ├── ITestGenderModel.cs.meta │ │ ├── ITestNumberModel.cs │ │ ├── ITestNumberModel.cs.meta │ │ ├── ITestSimilarityModel.cs │ │ ├── ITestSimilarityModel.cs.meta │ │ ├── ITrainSimilarityModel.cs │ │ ├── ITrainSimilarityModel.cs.meta │ │ ├── MaximumEntropyCompatibilityModel.cs │ │ ├── MaximumEntropyCompatibilityModel.cs.meta │ │ ├── Number.cs │ │ ├── Number.cs.meta │ │ ├── NumberEnum.cs │ │ ├── NumberEnum.cs.meta │ │ ├── NumberModel.cs │ │ ├── NumberModel.cs.meta │ │ ├── SemanticCompatibility.cs │ │ ├── SemanticCompatibility.cs.meta │ │ ├── SemanticEnum.cs │ │ ├── SemanticEnum.cs.meta │ │ ├── SimilarityModel.cs │ │ └── SimilarityModel.cs.meta │ ├── TreebankLinker.cs │ └── TreebankLinker.cs.meta │ ├── Featuring.meta │ ├── Featuring │ ├── IFeatureExtractor.cs │ ├── IFeatureExtractor.cs.meta │ ├── TfIdfFeatureExtractor.cs │ ├── TfIdfFeatureExtractor.cs.meta │ ├── Word2VecFeatureExtractor.cs │ └── Word2VecFeatureExtractor.cs.meta │ ├── NameFind.meta │ ├── NameFind │ ├── DefaultNameContextGenerator.cs │ ├── DefaultNameContextGenerator.cs.meta │ ├── EnglishNameFinder.cs │ ├── EnglishNameFinder.cs.meta │ ├── INameContextGenerator.cs │ ├── INameContextGenerator.cs.meta │ ├── INameFinder.cs │ ├── INameFinder.cs.meta │ ├── MaximumEntropyNameFinder.cs │ ├── MaximumEntropyNameFinder.cs.meta │ ├── NameFinderEventReader.cs │ └── NameFinderEventReader.cs.meta │ ├── Parser.meta │ ├── Parser │ ├── BuildContextGenerator.cs │ ├── BuildContextGenerator.cs.meta │ ├── CheckContextGenerator.cs │ ├── CheckContextGenerator.cs.meta │ ├── ChunkContextGenerator.cs │ ├── ChunkContextGenerator.cs.meta │ ├── EnglishHeadRules.cs │ ├── EnglishHeadRules.cs.meta │ ├── EnglishTreebankParser.cs │ ├── EnglishTreebankParser.cs.meta │ ├── IHeadRules.cs │ ├── IHeadRules.cs.meta │ ├── IParserChunker.cs │ ├── IParserChunker.cs.meta │ ├── IParserTagger.cs │ ├── IParserTagger.cs.meta │ ├── MaximumEntropyParser.cs │ ├── MaximumEntropyParser.cs.meta │ ├── Parse.cs │ ├── Parse.cs.meta │ ├── ParserEventReader.cs │ └── ParserEventReader.cs.meta │ ├── PosTagger.meta │ ├── PosTagger │ ├── DefaultPosContextGenerator.cs │ ├── DefaultPosContextGenerator.cs.meta │ ├── EnglishMaximumEntropyPosTagger.cs │ ├── EnglishMaximumEntropyPosTagger.cs.meta │ ├── IPosContextGenerator.cs │ ├── IPosContextGenerator.cs.meta │ ├── IPosTagger.cs │ ├── IPosTagger.cs.meta │ ├── MaximumEntropyPosTagger.cs │ ├── MaximumEntropyPosTagger.cs.meta │ ├── PosEventReader.cs │ ├── PosEventReader.cs.meta │ ├── PosLookupList.cs │ ├── PosLookupList.cs.meta │ ├── PosLookupListWriter.cs │ └── PosLookupListWriter.cs.meta │ ├── Sentence.cs │ ├── Sentence.cs.meta │ ├── SentenceDetect.meta │ ├── SentenceDetect │ ├── DefaultEndOfSentenceScanner.cs │ ├── DefaultEndOfSentenceScanner.cs.meta │ ├── EnglishMaximumEntropySentenceDetector.cs │ ├── EnglishMaximumEntropySentenceDetector.cs.meta │ ├── IEndOfSentenceScanner.cs │ ├── IEndOfSentenceScanner.cs.meta │ ├── ISentenceDectector.cs │ ├── ISentenceDectector.cs.meta │ ├── MaximumEntropySentenceDetector.cs │ ├── MaximumEntropySentenceDetector.cs.meta │ ├── SentenceDetectionContextGenerator.cs │ ├── SentenceDetectionContextGenerator.cs.meta │ ├── SentenceDetectionEvent.cs │ ├── SentenceDetectionEvent.cs.meta │ ├── SentenceDetectionEventReader.cs │ └── SentenceDetectionEventReader.cs.meta │ ├── Stem.meta │ ├── Stem │ ├── IStemmer.cs │ ├── IStemmer.cs.meta │ ├── RegexStemmer.cs │ └── RegexStemmer.cs.meta │ ├── Token.cs │ ├── Token.cs.meta │ ├── Tokenize.meta │ ├── Tokenize │ ├── EnglishMaximumEntropyTokenizer.cs │ ├── EnglishMaximumEntropyTokenizer.cs.meta │ ├── ITokenizer.cs │ ├── ITokenizer.cs.meta │ ├── MaximumEntropyTokenizer.cs │ ├── MaximumEntropyTokenizer.cs.meta │ ├── TokenContextGenerator.cs │ ├── TokenContextGenerator.cs.meta │ ├── TokenEventReader.cs │ ├── TokenEventReader.cs.meta │ ├── TokenSpanEventReader.cs │ └── TokenSpanEventReader.cs.meta │ ├── Txt2Vec.meta │ ├── Txt2Vec │ ├── Decoder.cs │ ├── Decoder.cs.meta │ ├── Encoder.cs │ ├── Encoder.cs.meta │ ├── Model.cs │ ├── Model.cs.meta │ ├── OneHotEncoder.cs │ ├── OneHotEncoder.cs.meta │ ├── Shrink.cs │ ├── Shrink.cs.meta │ ├── VectorGenerator.cs │ └── VectorGenerator.cs.meta │ ├── Util.meta │ ├── Util │ ├── BeamSearch.cs │ ├── BeamSearch.cs.meta │ ├── Cache.cs │ ├── Cache.cs.meta │ ├── CollectionEventStream.cs │ ├── CollectionEventStream.cs.meta │ ├── CountedSet.cs │ ├── CountedSet.cs.meta │ ├── HashList.cs │ ├── HashList.cs.meta │ ├── HashSet.cs │ ├── HashSet.cs.meta │ ├── IBeamSearchContextGenerator.cs │ ├── IBeamSearchContextGenerator.cs.meta │ ├── IHeap.cs │ ├── IHeap.cs.meta │ ├── ListHeap.cs │ ├── ListHeap.cs.meta │ ├── Pair.cs │ ├── Pair.cs.meta │ ├── ReverseListIterator.cs │ ├── ReverseListIterator.cs.meta │ ├── Sequence.cs │ ├── Sequence.cs.meta │ ├── Set.cs │ ├── Set.cs.meta │ ├── SortedSet.cs │ ├── SortedSet.cs.meta │ ├── Span.cs │ ├── Span.cs.meta │ ├── StringTokenizer.cs │ ├── StringTokenizer.cs.meta │ ├── TreeHeap.cs │ ├── TreeHeap.cs.meta │ ├── TreeSet.cs │ └── TreeSet.cs.meta │ ├── VX.NLP.Runtime.VoxellNLP.asmdef │ ├── VX.NLP.Runtime.VoxellNLP.asmdef.meta │ ├── csc.rsp │ └── csc.rsp.meta ├── Samples~ └── NLPBasicExamples │ ├── Scenes.meta │ ├── Scenes │ ├── NLPBasicExamples.unity │ └── NLPBasicExamples.unity.meta │ ├── Scripts.meta │ └── Scripts │ ├── NLPNaiveBayesClassifier.cs │ ├── NLPNaiveBayesClassifier.cs.meta │ ├── NLPNamedEntityRecognition.cs │ ├── NLPNamedEntityRecognition.cs.meta │ ├── NLPPOSTagger.cs │ ├── NLPPOSTagger.cs.meta │ ├── NLPRegexStemmer.cs │ ├── NLPRegexStemmer.cs.meta │ ├── NLPSentenceSplitter.cs │ ├── NLPSentenceSplitter.cs.meta │ ├── NLPTokenizer.cs │ └── NLPTokenizer.cs.meta ├── docs~ ├── Makefile ├── make.bat └── source │ ├── classifier.rst │ ├── conf.py │ ├── index.rst │ ├── named_entity_recognition.rst │ ├── pos_tagger.rst │ ├── sentence_splitter.rst │ └── tokenizer.rst ├── package.json └── package.json.meta /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: voxelltech 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: voxelltech 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: ["paypal.me/voxelltechnologies"] 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/custom.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Custom issue template 3 | about: Describe this issue template's purpose here. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/new_feature.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: New Feature 3 | about: Creating a new feature for this project. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Summary** 11 | 12 | A quick intro or summary on the feature you want to create. 13 | 14 | **Intended Outcome** 15 | 16 | - What is the use case of this? 17 | - How will it affect/benefit the project? 18 | 19 | **How will it work?** 20 | 21 | - How to use this feature? 22 | 23 | *finally, please assign yourself to this issue if you intend to work on this new feature thanks!* 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | __pycache__/ 3 | checkpoints/ 4 | inference/ 5 | env/ 6 | temp/ 7 | build/ 8 | 9 | # This .gitignore file should be placed at the root of your Unity project directory 10 | # 11 | # Get latest from https://github.com/github/gitignore/blob/master/Unity.gitignore 12 | # 13 | /[Ll]ibrary/ 14 | /[Tt]emp/ 15 | /[Oo]bj/ 16 | /[Bb]uild/ 17 | /[Bb]uilds/ 18 | /[Ll]ogs/ 19 | /[Uu]ser[Ss]ettings/ 20 | /[Rr]ecordings/ 21 | /[Rr]eleases/ 22 | /[Rr]elease/ 23 | 24 | 25 | # MemoryCaptures can get excessive in size. 26 | # They also could contain extremely sensitive data 27 | /[Mm]emoryCaptures/ 28 | 29 | # Asset meta data should only be ignored when the corresponding asset is also ignored 30 | !/[Aa]ssets/**/*.meta 31 | 32 | # Uncomment this line if you wish to ignore the asset store tools plugin 33 | # /[Aa]ssets/AssetStoreTools* 34 | 35 | # Autogenerated Jetbrains Rider plugin 36 | /[Aa]ssets/Plugins/Editor/JetBrains* 37 | 38 | # Visual Studio cache directory 39 | .vs/ 40 | 41 | # Gradle cache directory 42 | .gradle/ 43 | 44 | # Autogenerated VS/MD/Consulo solution and project files 45 | ExportedObj/ 46 | .consulo/ 47 | *.csproj 48 | *.unityproj 49 | *.sln 50 | *.suo 51 | *.tmp 52 | *.user 53 | *.userprefs 54 | *.pidb 55 | *.booproj 56 | *.svd 57 | *.pdb 58 | *.mdb 59 | *.opendb 60 | *.VC.db 61 | 62 | # Unity3D generated meta files 63 | *.pidb.meta 64 | *.pdb.meta 65 | *.mdb.meta 66 | 67 | # Unity3D generated file on crash reports 68 | sysinfo.txt 69 | 70 | # Builds 71 | *.apk 72 | *.aab 73 | *.unitypackage 74 | *.unitypackage.meta 75 | *.exe 76 | 77 | # Crashlytics generated file 78 | crashlytics-build.properties 79 | 80 | # Packed Addressables 81 | /[Aa]ssets/[Aa]ddressable[Aa]ssets[Dd]ata/*/*.bin* 82 | 83 | # Temporary auto-generated Android Assets 84 | /[Aa]ssets/[Ss]treamingAssets/aa.meta 85 | /[Aa]ssets/[Ss]treamingAssets/aa/* 86 | 87 | # Tensorflow trained checkpoints and weights 88 | *.h5 89 | *.h5.meta 90 | *.pbmm 91 | *.pbmm.meta 92 | *.tflite 93 | *.tflite.meta 94 | 95 | # API keys 96 | *.apikey 97 | 98 | # pycaches 99 | *.pyc 100 | 101 | # audio files 102 | AudioClips/ 103 | *.mp3 104 | *.wav 105 | *.audio 106 | 107 | # license 108 | LICENSE.meta 109 | 110 | # large libraries 111 | native/ 112 | tensorflow.dll 113 | tensorflow.dll.meta -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## [1.2.0] 2 | 3 | ## [1.1.0] 4 | 5 | ### New Features 6 | 7 | - Multinomial Naive Bayes topical classifier. 8 | - Regex stemmer. 9 | - One hot encoder. 10 | - Text to vector. 11 | - Tokens and Sentence class to store necessary features. 12 | 13 | ### New Samples 14 | 15 | - Multinomial Naive Bayes intent classifier with 22 classes. 16 | 17 | ## [1.0.0] 18 | 19 | - Initial release. 20 | - Implemented OpenNLP as the backend of this package. -------------------------------------------------------------------------------- /CHANGELOG.md.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 4a131fe6669dcaf4fa85c7077f4b47af 3 | TextScriptImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /Pictures~/MultinomialNaiveBayesClassifierExample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nixon-voxell/UnityNLP/4d9bfd13e7b63495ff007df70b4aa20fa94d92d4/Pictures~/MultinomialNaiveBayesClassifierExample.png -------------------------------------------------------------------------------- /Pictures~/NamedEntityRecognitionExample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nixon-voxell/UnityNLP/4d9bfd13e7b63495ff007df70b4aa20fa94d92d4/Pictures~/NamedEntityRecognitionExample.png -------------------------------------------------------------------------------- /Pictures~/POSTaggerExample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nixon-voxell/UnityNLP/4d9bfd13e7b63495ff007df70b4aa20fa94d92d4/Pictures~/POSTaggerExample.png -------------------------------------------------------------------------------- /Pictures~/SentenceSplitterExample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nixon-voxell/UnityNLP/4d9bfd13e7b63495ff007df70b4aa20fa94d92d4/Pictures~/SentenceSplitterExample.png -------------------------------------------------------------------------------- /Pictures~/TokenizerExample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nixon-voxell/UnityNLP/4d9bfd13e7b63495ff007df70b4aa20fa94d92d4/Pictures~/TokenizerExample.png -------------------------------------------------------------------------------- /README.md.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: a7145c2d9908e9849a44896736e2244c 3 | TextScriptImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /Runtime.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: ab1802957cfd4f849bd69568e4f4f04b 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/Converter.cs: -------------------------------------------------------------------------------- 1 | using UnityEngine; 2 | using System; 3 | using System.IO; 4 | using SharpEntropy; 5 | using SharpEntropy.IO; 6 | 7 | namespace ModelConverter 8 | { 9 | /// 10 | /// Summary description for Converter. 11 | /// 12 | public static class Converter 13 | { 14 | private static bool ConvertFolder(string folder) 15 | { 16 | BinaryGisModelWriter writer = new BinaryGisModelWriter(); 17 | 18 | foreach (string file in Directory.GetFiles(folder)) 19 | { 20 | if (file.Substring(file.Length - 4, 4) == ".bin") 21 | { 22 | Debug.Log("converting " + file + " ..."); 23 | writer.Persist(new GisModel(new JavaBinaryGisModelReader(file)), file.Replace(".bin", ".nbin")); 24 | Debug.Log("done"); 25 | } 26 | } 27 | 28 | string[] directories = Directory.GetDirectories(folder); 29 | for (int d=0; d < directories.Length; d++) 30 | { 31 | if (!ConvertFolder(directories[d])) 32 | return false; 33 | } 34 | 35 | return true; 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /Runtime/Converter.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 81645e4c59747fc4887ffcc97baf82c9 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: f821e47844ad6964e98d6745a4709488 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/AbstractDataIndexer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 60ada716cf199664e999882608307157 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/BasicContextGenerator.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: eff35c6542928924a90ab77dcce237b5 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/BasicEventReader.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: d13b57745003ad6419167c536b952278 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/ComparableEvent.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 237f5c390bdde6b42b99e54d12f15379 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/GisModel.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: e8e8b1af17348df42a357a6ad0f10bf5 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/GisTrainer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 356a8d7b6e532d54d800909a93c0e87e 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/IContextGenerator.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2005 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the ContextGenerator.java source file found in the 18 | //original java implementation of MaxEnt. That source file contains the following header: 19 | 20 | // Copyright (C) 2001 Jason Baldridge and Gann Bierner 21 | // 22 | // This library is free software; you can redistribute it and/or 23 | // modify it under the terms of the GNU Lesser General Public 24 | // License as published by the Free Software Foundation; either 25 | // version 2.1 of the License, or (at your option) any later version. 26 | // 27 | // This library is distributed in the hope that it will be useful, 28 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 29 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 | // GNU General Public License for more details. 31 | // 32 | // You should have received a copy of the GNU Lesser General Public 33 | // License along with this program; if not, write to the Free Software 34 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 35 | 36 | using System; 37 | 38 | namespace SharpEntropy 39 | { 40 | /// 41 | /// Generate contexts for maximum entropy decisions. 42 | /// 43 | /// 44 | /// Jason Baldridge 45 | /// 46 | /// 47 | /// Richard J. Northedge 48 | /// 49 | /// 50 | /// based on ContextGenerator.java, $Revision: 1.1.1.1 $, $Date: 2001/10/23 14:06:53 $ 51 | /// 52 | public interface IContextGenerator 53 | { 54 | /// 55 | /// Builds up the list of contextual predicates given an object. 56 | /// 57 | string[] GetContext(object input); 58 | } 59 | 60 | /// 61 | /// Generate contexts for maximum entropy decisions. 62 | /// 63 | public interface IContextGenerator 64 | { 65 | /// 66 | /// Builds up the list of contextual predicates given an object of type T. 67 | /// 68 | string[] GetContext(T input); 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/IContextGenerator.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 38929121f3047d242a101243ce916bd5 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/IMaximumEntropyModel.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 9890fb86d62500049bb124dd73f67af7 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/IO.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: d7c2e9f67e66278418799347f926a8d4 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/IO/BinaryGisModelReader.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 9c4604c91f90a3f4a8ed47d0f63c1969 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/IO/BinaryGisModelWriter.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: c9010091bf4e4e64e8afd446d75d0e61 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/IO/GisModelReader.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 8d3b2cbc22382424f8dc75709eab3ece 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/IO/GisModelWriter.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: f2e2fcf4e947df54593e7111dd019c8b 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/IO/IGisModelReader.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 3c50e5c109edb994286a8d49e0d4b500 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/IO/JavaBinaryGisModelReader.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 6d3d6cb244a28724c9c1cf16e1d2051e 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/IO/JavaBinaryGisModelWriter.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 6cd6b6a5de26c724a8a7f39d889b0fd2 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/IO/PlainTextGisModelReader.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 15737d241f4f6f849a8464b6d1ec2e79 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/IO/PlainTextGisModelWriter.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 0da2d7f8890f5bf42989981dc92bbd55 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/ITrainingDataIndexer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: f5e1ca8c550a77140a3697f8efe80064 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/ITrainingDataReader.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 48b0b5093a7c1db49aa785c23eec6030 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/ITrainingEventReader.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: c2366b9898c2ce6478f9b17761849d32 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/OnePassDataIndexer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: fb1b8b1e4b0ee7345a6f6600274b06da 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/PatternedPredicate.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 3f249474f98f6fa44a284d41576692f1 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/PlainTextByLineDataReader.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 7d1da4d6eff4f764496ec9b88c112ef1 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/TrainingEvent.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 7edd16cc548f0b542ba2e9295d1c90c7 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/TwoPassDataIndexer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: fe9032f5757bd0e45a4a702321910765 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpEntropy/VX.NLP.Runtime.SharpEntropy.asmdef: -------------------------------------------------------------------------------- 1 | { 2 | "name": "VX.NLP.Runtime.SharpEntropy", 3 | "rootNamespace": "", 4 | "references": [], 5 | "includePlatforms": [], 6 | "excludePlatforms": [], 7 | "allowUnsafeCode": false, 8 | "overrideReferences": false, 9 | "precompiledReferences": [], 10 | "autoReferenced": true, 11 | "defineConstraints": [], 12 | "versionDefines": [], 13 | "noEngineReferences": false 14 | } -------------------------------------------------------------------------------- /Runtime/SharpEntropy/VX.NLP.Runtime.SharpEntropy.asmdef.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 094afe385ce3df645871cfee687d8406 3 | AssemblyDefinitionImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 612c1ce74046723499ef41c27d144045 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/DataFileEngine.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 41f608d9422a8854d934b52dba49c75f 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/IndexWord.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | using System; 18 | using System.Linq; 19 | 20 | namespace SharpWordNet 21 | { 22 | /// 23 | /// Summary description for IndexWord. 24 | /// 25 | public class IndexWord 26 | { 27 | // Properties ------------------------ 28 | 29 | public string PartOfSpeech { get; private set; } 30 | 31 | public int[] SynsetOffsets { get; private set; } 32 | 33 | public string Lemma { get; private set; } 34 | 35 | public int SenseCount 36 | { 37 | get { return this.SynsetOffsets != null ? this.SynsetOffsets.Count() : 0; } 38 | } 39 | 40 | public int TagSenseCount { get; private set; } 41 | 42 | public string[] RelationTypes { get; private set; } 43 | 44 | 45 | // Constructors -------------------- 46 | 47 | public IndexWord(string lemma, string partOfSpeech, string[] relationTypes, int[] synsetOffsets, int tagSenseCount) 48 | { 49 | this.Lemma = lemma; 50 | this.PartOfSpeech = partOfSpeech; 51 | this.RelationTypes = relationTypes; 52 | this.SynsetOffsets = synsetOffsets; 53 | this.TagSenseCount = tagSenseCount; 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/IndexWord.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 4323eceb3dce6da46a54f9d9da74a30a 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/Morph.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 2e085165fb4dd0f418bbe8c658058db9 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/Morph/AbstractDelegatingOperation.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: c690100b385a6014dbe29cb999736b40 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/Morph/DetachSuffixesOperation.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the DetachSuffixesOperation.java source file found in 18 | //the Java WordNet Library (JWNL). That source file is licensed under BSD. 19 | 20 | using System; 21 | using System.Collections.Generic; 22 | using System.Text; 23 | 24 | namespace SharpWordNet.Morph 25 | { 26 | /// 27 | /// Remove all applicable suffixes from the word(s) and do a look-up. 28 | /// 29 | public class DetachSuffixesOperation : AbstractDelegatingOperation 30 | { 31 | public const string Operations = "operations"; 32 | 33 | private Dictionary mSuffixMap; 34 | 35 | public DetachSuffixesOperation(Dictionary suffixMap) 36 | { 37 | mSuffixMap = suffixMap; 38 | } 39 | 40 | #region IOperation Members 41 | 42 | public override bool Execute(string lemma, string partOfSpeech, List baseForms) 43 | { 44 | if (!mSuffixMap.ContainsKey(partOfSpeech)) 45 | { 46 | return false; 47 | } 48 | string[][] suffixArray = mSuffixMap[partOfSpeech]; 49 | 50 | bool addedBaseForm = false; 51 | for (int currentSuffix = 0; currentSuffix < suffixArray.Length; currentSuffix++) 52 | { 53 | if (lemma.EndsWith(suffixArray[currentSuffix][0])) 54 | { 55 | string stem = lemma.Substring(0, (lemma.Length - suffixArray[currentSuffix][0].Length) - (0)) + suffixArray[currentSuffix][1]; 56 | if (ExecuteDelegate(stem, partOfSpeech, baseForms, Operations)) 57 | { 58 | addedBaseForm = true; 59 | } 60 | } 61 | } 62 | return addedBaseForm; 63 | } 64 | 65 | #endregion 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/Morph/DetachSuffixesOperation.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: cf08c3978b603b54895b7bc04d2da44a 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/Morph/IOperation.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the Operation.java source file found in 18 | //the Java WordNet Library (JWNL). That source file is licensed under BSD. 19 | 20 | using System; 21 | using System.Collections.Generic; 22 | using System.Text; 23 | 24 | namespace SharpWordNet.Morph 25 | { 26 | public interface IOperation 27 | { 28 | /// 29 | /// Execute the operation. 30 | /// 31 | /// 32 | /// input lemma to look up 33 | /// 34 | /// 35 | /// part of speech of the lemma to look up 36 | /// 37 | /// 38 | /// List to which all discovered base forms should be added. 39 | /// 40 | /// 41 | /// True if at least one base form was discovered by the operation and 42 | /// added to baseForms. 43 | /// 44 | bool Execute(string lemma, string partOfSpeech, List baseForms); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/Morph/IOperation.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 2b1656ee21a7be44d977b5c3d4a9129f 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/Morph/LookupExceptionsOperation.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the LookupExceptionsOperation.java source file found in 18 | //the Java WordNet Library (JWNL). That source file is licensed under BSD. 19 | 20 | using System; 21 | using System.Collections.Generic; 22 | using System.Text; 23 | 24 | namespace SharpWordNet.Morph 25 | { 26 | /// Lookup the word in the exceptions file of the given part-of-speech. 27 | public class LookupExceptionsOperation : IOperation 28 | { 29 | private WordNetEngine mEngine; 30 | 31 | public LookupExceptionsOperation(WordNetEngine engine) 32 | { 33 | mEngine = engine; 34 | } 35 | 36 | #region IOperation Members 37 | 38 | public bool Execute(string lemma, string partOfSpeech, List baseForms) 39 | { 40 | bool addedBaseForm = false; 41 | string[] exceptionForms = mEngine.GetExceptionForms(lemma, partOfSpeech); 42 | 43 | foreach (string exceptionForm in exceptionForms) 44 | { 45 | if (!baseForms.Contains(exceptionForm)) 46 | { 47 | baseForms.Add(exceptionForm); 48 | addedBaseForm = true; 49 | } 50 | } 51 | 52 | return addedBaseForm; 53 | } 54 | 55 | #endregion 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/Morph/LookupExceptionsOperation.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 39fe6c81198b29046aac7be11e809127 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/Morph/LookupIndexWordOperation.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the LookupIndexWordOperation.java source file found in 18 | //the Java WordNet Library (JWNL). That source file is licensed under BSD. 19 | 20 | using System; 21 | using System.Collections.Generic; 22 | using System.Text; 23 | 24 | namespace SharpWordNet.Morph 25 | { 26 | public class LookupIndexWordOperation : IOperation 27 | { 28 | private WordNetEngine mEngine; 29 | 30 | public LookupIndexWordOperation(WordNetEngine engine) 31 | { 32 | mEngine = engine; 33 | } 34 | 35 | #region IOperation Members 36 | 37 | public bool Execute(string lemma, string partOfSpeech, List baseForms) 38 | { 39 | if (!baseForms.Contains(lemma) && mEngine.GetIndexWord(lemma, partOfSpeech) != null) 40 | { 41 | baseForms.Add(lemma); 42 | return true; 43 | } 44 | return false; 45 | } 46 | 47 | #endregion 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/Morph/LookupIndexWordOperation.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: ae1a5f3fb744a424d8e82e9783d02aa5 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/Morph/TokenizerOperation.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 88fdb7335c1286844a8ca82687af7198 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/Morph/Util.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: e4380b2b0c690994085b1a62dc8b1f87 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/Relation.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | using System; 18 | 19 | namespace SharpWordNet 20 | { 21 | /// 22 | /// Summary description for Relation. 23 | /// 24 | public class Relation 25 | { 26 | private WordNetEngine mWordNetEngine; 27 | 28 | private RelationType mRelationType; 29 | 30 | private int mTargetSynsetOffset; 31 | private string mTargetSynsetPartOfSpeech; 32 | 33 | private Synset mTargetSynset; 34 | 35 | private int miSourceWord; 36 | private int miTargetWord; 37 | 38 | public RelationType SynsetRelationType 39 | { 40 | get 41 | { 42 | return mRelationType; 43 | } 44 | } 45 | 46 | public int TargetSynsetOffset 47 | { 48 | get 49 | { 50 | return mTargetSynsetOffset; 51 | } 52 | } 53 | 54 | public Synset TargetSynset 55 | { 56 | get 57 | { 58 | if (mTargetSynset == null) 59 | { 60 | mTargetSynset = mWordNetEngine.CreateSynset(mTargetSynsetPartOfSpeech, mTargetSynsetOffset); 61 | } 62 | return mTargetSynset; 63 | } 64 | } 65 | 66 | private Relation() 67 | { 68 | } 69 | 70 | protected internal Relation(WordNetEngine wordNetEngine, RelationType relationType, int targetSynsetOffset, string targetSynsetPartOfSpeech) 71 | { 72 | mWordNetEngine = wordNetEngine; 73 | mRelationType = relationType; 74 | 75 | mTargetSynsetOffset = targetSynsetOffset; 76 | mTargetSynsetPartOfSpeech = targetSynsetPartOfSpeech; 77 | } 78 | 79 | protected internal Relation(WordNetEngine wordNetEngine, RelationType relationType, int targetSynsetOffset, string targetSynsetPartOfSpeech, int sourceWord, int targetWord) : this(wordNetEngine, relationType, targetSynsetOffset, targetSynsetPartOfSpeech) 80 | { 81 | miSourceWord = sourceWord; 82 | miTargetWord = targetWord; 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/Relation.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: a8d08c9f093434f4faa30a9c7f7c73b4 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/RelationType.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | using System; 18 | 19 | namespace SharpWordNet 20 | { 21 | /// 22 | /// Summary description for RelationType. 23 | /// 24 | public class RelationType 25 | { 26 | private string mName; 27 | private RelationType mOpposite; 28 | private string[] mPartsOfSpeech; 29 | 30 | public string Name 31 | { 32 | get 33 | { 34 | return mName; 35 | } 36 | } 37 | 38 | public RelationType Opposite 39 | { 40 | get 41 | { 42 | return mOpposite; 43 | } 44 | } 45 | 46 | public string GetPartOfSpeech(int index) 47 | { 48 | return mPartsOfSpeech[index]; 49 | } 50 | 51 | public int PartsOfSpeechCount 52 | { 53 | get 54 | { 55 | return mPartsOfSpeech.Length; 56 | } 57 | } 58 | 59 | protected internal RelationType(string name, string[] partsOfSpeech) 60 | { 61 | mName = name; 62 | mPartsOfSpeech = partsOfSpeech; 63 | } 64 | 65 | protected internal RelationType(string name, RelationType opposite, string[] partsOfSpeech) 66 | { 67 | mName = name; 68 | mOpposite = opposite; 69 | mPartsOfSpeech = partsOfSpeech; 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/RelationType.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 4dae6547fbbfeb64db4da871e42f2521 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/Synset.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 0ca793f7c7f544848877d8bcbfa246b8 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/Tokenizer.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | using System; 18 | 19 | namespace SharpWordNet 20 | { 21 | /// 22 | /// Summary description for Tokenizer. 23 | /// 24 | public class Tokenizer 25 | { 26 | private readonly string[] _tokens; 27 | int _position; 28 | 29 | public Tokenizer(string input, params char[] separators) 30 | { 31 | _tokens = input.Split(separators); 32 | _position = 0; 33 | } 34 | 35 | public string NextToken() 36 | { 37 | while (_position < _tokens.Length) 38 | { 39 | if ((_tokens[_position].Length > 0)) 40 | { 41 | return _tokens[_position++]; 42 | } 43 | _position++; 44 | } 45 | return null; 46 | } 47 | 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/Tokenizer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 89fbf9b581646c34a98892ddeeb563e9 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/VX.NLP.Runtime.SharpWordNet.asmdef: -------------------------------------------------------------------------------- 1 | { 2 | "name": "VX.NLP.Runtime.SharpWordNet", 3 | "rootNamespace": "", 4 | "references": [], 5 | "includePlatforms": [], 6 | "excludePlatforms": [], 7 | "allowUnsafeCode": false, 8 | "overrideReferences": false, 9 | "precompiledReferences": [], 10 | "autoReferenced": true, 11 | "defineConstraints": [], 12 | "versionDefines": [], 13 | "noEngineReferences": false 14 | } -------------------------------------------------------------------------------- /Runtime/SharpWordNet/VX.NLP.Runtime.SharpWordNet.asmdef.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 4cb1ae9a716e8e447bc1a8026800340d 3 | AssemblyDefinitionImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /Runtime/SharpWordNet/WordNetEngine.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 53ed488ee2e59fb40b55cb7169ff70a2 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VX.NLP.Runtime.asmdef: -------------------------------------------------------------------------------- 1 | { 2 | "name": "VX.NLP.Runtime", 3 | "rootNamespace": "", 4 | "references": [ 5 | "GUID:4cb1ae9a716e8e447bc1a8026800340d", 6 | "GUID:094afe385ce3df645871cfee687d8406", 7 | "GUID:5d052c2f1dea4e64ab062f48c70faf0e" 8 | ], 9 | "includePlatforms": [], 10 | "excludePlatforms": [], 11 | "allowUnsafeCode": false, 12 | "overrideReferences": false, 13 | "precompiledReferences": [], 14 | "autoReferenced": true, 15 | "defineConstraints": [], 16 | "versionDefines": [], 17 | "noEngineReferences": false 18 | } -------------------------------------------------------------------------------- /Runtime/VX.NLP.Runtime.asmdef.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: aa41269285d05ea47a1bf4986430d9e3 3 | AssemblyDefinitionImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 8baf881a1229ede42a17fc1d89b23824 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Chunker.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: e85db816c5dffa54b8ae4dba2c4b5afc 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Chunker/ChunkerEventReader.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 86d011282dc2d3f4bb65df8f7e5d8548 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Chunker/DefaultChunkerContextGenerator.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 4d678a9d4b1f2f64184cbfaa6f808b82 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Chunker/EnglishTreebankChunker.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: a499ec2b60f72634d9a2445b1ddd973a 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Chunker/IChunker.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 24f0a5bbf51a4404781cfd85f0d5a035 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Chunker/IChunkerContextGenerator.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2005 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the ChunkerContextGenerator.java source file found in the 18 | //original java implementation of OpenNLP. 19 | 20 | using System; 21 | using System.Collections; 22 | 23 | namespace Voxell.NLP.Chunker 24 | { 25 | /// 26 | /// Context generator interface for chunkers. 27 | /// 28 | public interface IChunkerContextGenerator : Util.IBeamSearchContextGenerator 29 | { 30 | /// 31 | /// Returns the contexts for chunking of the specified index. 32 | /// 33 | /// 34 | /// The index of the token in the specified toks array for which the context should be constructed. 35 | /// 36 | /// 37 | /// The tokens of the sentence. The toString methods of these objects should return the token text. 38 | /// 39 | /// 40 | /// The POS tags for the the specified tokens. 41 | /// 42 | /// /// 43 | /// The previous decisions made in the tagging of this sequence. Only indices less than tokenIndex will be examined. 44 | /// 45 | /// 46 | /// An array of predictive contexts on which a model basis its decisions. 47 | /// 48 | string[] GetContext(int tokenIndex, object[] tokens, string[] tags, string[] previousDecisions); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Chunker/IChunkerContextGenerator.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 2da398b1f7cf8064582ccf680efce57e 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Chunker/MaximumEntropyChunker.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: fb117850d14ecfe4e945052f168776d7 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Classifier.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 80de64f7c3dec0248b43315f94f282a5 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Classifier/ClassifyOptions.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using UnityEngine; 3 | using Voxell.Inspector; 4 | 5 | namespace Voxell.NLP.Classifier 6 | { 7 | [System.Serializable] 8 | public struct ClassifyOptions 9 | { 10 | [StreamingAssetFilePath, SerializeField] private string modelFilePath; 11 | [Tooltip("Vocabulary size")] public int dimension; 12 | [Tooltip("Types of labels"), InspectOnly] public List labels; 13 | 14 | public void AddLabel(string label) 15 | { 16 | if (!labels.Contains(label)) labels.Add(label); 17 | } 18 | 19 | public string GetModelFilePath() => FileUtilx.GetStreamingAssetFilePath(modelFilePath); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Classifier/ClassifyOptions.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 39151a56a4c038941b8f36524837d2d7 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Classifier/IClassifier.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | 4 | namespace Voxell.NLP.Classifier 5 | { 6 | public interface IClassifier 7 | { 8 | /// 9 | /// Training by feature vector 10 | /// 11 | /// 12 | /// 13 | void Train(List sentences, ClassifyOptions options); 14 | 15 | /// 16 | /// Predict by feature vector 17 | /// 18 | /// 19 | /// 20 | /// 21 | List> Classify(Sentence sentence, ClassifyOptions options); 22 | 23 | void SaveModel(ClassifyOptions options); 24 | 25 | void LoadModel(ClassifyOptions options); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Classifier/IClassifier.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 8908b16ab9741844ca25b9251559842d 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Classifier/NaiveBayesClassifier.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 915e6e1d2d010ee4eb58084dbdf94ff8 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 9ed3134a0636f904ea4de7ede5fb5c6c 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/AbstractLinker.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 15f6dccab6a0b374fa9f7b9cee60bc00 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/DefaultLinker.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 790d35db05b726146a8b3523852fb8b0 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/DiscourseElement.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: fed9bf697dfebe44497e5c100ab6abbc 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/DiscourseEntity.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 63d53ad0b78d7fb4b8f0554566d8f50e 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/DiscourseModel.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: b5f7c30f3f671c74fba863dd05919499 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/ILinker.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: deba19e6acb395542a3ec642a9cc60aa 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/LinkerMode.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the LinkerMode.java source file found in the 18 | //original java implementation of OpenNLP. That source file contains the following header: 19 | 20 | //Copyright (C) 2003 Thomas Morton 21 | // 22 | //This library is free software; you can redistribute it and/or 23 | //modify it under the terms of the GNU Lesser General Public 24 | //License as published by the Free Software Foundation; either 25 | //version 2.1 of the License, or (at your option) any later version. 26 | // 27 | //This library is distributed in the hope that it will be useful, 28 | //but WITHOUT ANY WARRANTY; without even the implied warranty of 29 | //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 | //GNU Lesser General Public License for more details. 31 | // 32 | //You should have received a copy of the GNU Lesser General Public 33 | //License along with this program; if not, write to the Free Software 34 | //Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 35 | 36 | using System; 37 | namespace Voxell.NLP.Coreference 38 | { 39 | 40 | /// 41 | /// Enumeration of modes in which a linker can run. 42 | /// 43 | public enum LinkerMode 44 | { 45 | /// 46 | /// Testing mode, used to identify coreference relationships in un-annotated text. 47 | /// 48 | Test, 49 | /// 50 | /// Training mode, used to learn coreference relationships in annotated text. 51 | /// 52 | Train, 53 | /// 54 | /// Evaluation mode, used to evaluate identifed coreference relationships based on annotated text. 55 | /// 56 | Eval, 57 | /// 58 | /// Training mode, used to learn coreference relationships in annotated text. 59 | /// 60 | Sim 61 | } 62 | } -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/LinkerMode.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: bc1b76b1f088b254697affe7c41311d5 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Mention.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 63997462a706284458b4c28c9a925959 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Mention/AbstractMentionFinder.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: b80dd6608737fdc4696973c2c526a3a7 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Mention/AbstractParse.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: b49e2b9c64c4cf64199fe15527347be4 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Mention/DefaultParse.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: ae1652bf47b62d545ae025d40cb51f58 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Mention/DictionaryFactory.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the DictionaryFactory.java source file found in the 18 | //original java implementation of OpenNLP. That source file contains the following header: 19 | 20 | //Copyright (C) 2003 Thomas Morton 21 | // 22 | //This library is free software; you can redistribute it and/or 23 | //modify it under the terms of the GNU Lesser General Public 24 | //License as published by the Free Software Foundation; either 25 | //version 2.1 of the License, or (at your option) any later version. 26 | // 27 | //This library is distributed in the hope that it will be useful, 28 | //but WITHOUT ANY WARRANTY; without even the implied warranty of 29 | //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 | //GNU Lesser General Public License for more details. 31 | // 32 | //You should have received a copy of the GNU Lesser General Public 33 | //License along with this program; if not, write to the Free Software 34 | //Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 35 | 36 | using System.Configuration; 37 | 38 | namespace Voxell.NLP.Coreference.Mention 39 | { 40 | /// 41 | /// Factory class used to get an instance of a dictionary object. 42 | /// 43 | public class DictionaryFactory 44 | { 45 | /// 46 | /// Returns the default implementation of the Dictionary interface. 47 | /// 48 | public static IDictionary GetDictionary(string searchDirectory) 49 | { 50 | if (mDictionary == null) 51 | mDictionary = new WordnetDictionary(searchDirectory); 52 | return mDictionary; 53 | } 54 | 55 | public static IDictionary GetDictionary() 56 | => GetDictionary(ConfigurationManager.AppSettings["WordnetSearchDirectory"]); 57 | 58 | private static IDictionary mDictionary; 59 | private DictionaryFactory() {} 60 | } 61 | } -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Mention/DictionaryFactory.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 81ebf99b16da8f24091dacbf7db55c66 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Mention/IDictionary.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 5a83f3a2eeeb4e74cbd1587e2d8bb19e 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Mention/IHeadFinder.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: d62ff25e4d4456b468b36d9b4297b6ec 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Mention/IMentionFinder.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 489fa6035e5dbee4b81d6513f71b53e2 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Mention/IParse.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: e1efa30afd0ce9f44b71cf68c04cef16 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Mention/JWNLDictionary.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 7719efc6fab8a51468374957afcb682e 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Mention/Mention.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: fc91082d0332f3f40991ca3b505ab947 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Mention/MentionContext.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: c81b16e74cdd49f498b6ef3525a4a604 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Mention/PennTreebankHeadFinder.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 90fd50c2769c36442a6883d13bbd16c6 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Mention/PennTreebankMentionFinder.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 0699d83fb8dda084089e2f76e68b5fb6 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Mention/ShallowParseMentionFinder.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: e3fdf773ef682b84b9dcf446b5b59152 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Mention/WordnetDictionary.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 43bb46b2e772d584e853cd653a2c57d2 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Resolver.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: c1b53d610a4238d4ca4af800d3f4acbe 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Resolver/AbstractResolver.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 871b6a846d0847f478dae3600ec3c79f 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Resolver/CommonNounResolver.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: e773d1ac9d23c81449328c889783afca 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Resolver/DefaultNonReferentialResolver.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 6dcf1208c97602c4cad4b555c4b6461c 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Resolver/DefiniteNounResolver.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 321e98836a0995b4bbd1f0bba50935e8 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Resolver/FixedNonReferentialResolver.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the FixedNonReferentialResolver.java source file found in the 18 | //original java implementation of OpenNLP. That source file contains the following header: 19 | 20 | //Copyright (C) 2003 Thomas Morton 21 | // 22 | //This library is free software; you can redistribute it and/or 23 | //modify it under the terms of the GNU Lesser General Public 24 | //License as published by the Free Software Foundation; either 25 | //version 2.1 of the License, or (at your option) any later version. 26 | // 27 | //This library is distributed in the hope that it will be useful, 28 | //but WITHOUT ANY WARRANTY; without even the implied warranty of 29 | //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 | //GNU Lesser General Public License for more details. 31 | // 32 | //You should have received a copy of the GNU Lesser General Public 33 | //License along with this program; if not, write to the Free Software 34 | //Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 35 | 36 | using System; 37 | using MentionContext = Voxell.NLP.Coreference.Mention.MentionContext; 38 | namespace Voxell.NLP.Coreference.Resolver 39 | { 40 | 41 | /// Implementation of non-referential classifier which uses a fixed-value threshold. 42 | public class FixedNonReferentialResolver : INonReferentialResolver 43 | { 44 | 45 | private double mNonReferentialProbability; 46 | 47 | public FixedNonReferentialResolver(double nonReferentialProbability) 48 | { 49 | mNonReferentialProbability = nonReferentialProbability; 50 | } 51 | 52 | public virtual double GetNonReferentialProbability(MentionContext mention) 53 | { 54 | return mNonReferentialProbability; 55 | } 56 | 57 | public virtual void AddEvent(MentionContext mention) 58 | { 59 | } 60 | 61 | public virtual void Train() 62 | { 63 | } 64 | } 65 | } -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Resolver/FixedNonReferentialResolver.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 1be3005d33941a9429e6c57c555329df 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Resolver/INonReferentialResolver.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 5db33cc6ee18c694e8abe808245b17ff 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Resolver/IResolver.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 29fb670b929346c49a9ce0af30cc47a9 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Resolver/IsAResolver.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 2fe08a45cdbf7074ca2ba5cf1328786c 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Resolver/MaximumEntropyResolver.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: cfbca4617f994d64fa688d6de074f6d7 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Resolver/PerfectResolver.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the PerfectResolver.java source file found in the 18 | //original java implementation of OpenNLP. That source file contains the following header: 19 | 20 | //Copyright (C) 2003 Thomas Morton 21 | // 22 | //This library is free software; you can redistribute it and/or 23 | //modify it under the terms of the GNU Lesser General Public 24 | //License as published by the Free Software Foundation; either 25 | //version 2.1 of the License, or (at your option) any later version. 26 | // 27 | //This library is distributed in the hope that it will be useful, 28 | //but WITHOUT ANY WARRANTY; without even the implied warranty of 29 | //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 | //GNU Lesser General Public License for more details. 31 | // 32 | //You should have received a copy of the GNU Lesser General Public 33 | //License along with this program; if not, write to the Free Software 34 | //Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 35 | 36 | using System; 37 | using DiscourseEntity = Voxell.NLP.Coreference.DiscourseEntity; 38 | using DiscourseModel = Voxell.NLP.Coreference.DiscourseModel; 39 | using MentionContext = Voxell.NLP.Coreference.Mention.MentionContext; 40 | namespace Voxell.NLP.Coreference.Resolver 41 | { 42 | 43 | /// Resolver used in training to update the discourse model based on the coreference annotation. 44 | public class PerfectResolver:AbstractResolver 45 | { 46 | 47 | public PerfectResolver():base(0) 48 | { 49 | } 50 | 51 | public override bool CanResolve(MentionContext ec) 52 | { 53 | return (true); 54 | } 55 | 56 | protected internal override bool IsOutOfRange(MentionContext ec, DiscourseEntity de) 57 | { 58 | return (false); 59 | } 60 | 61 | public override DiscourseEntity Resolve(MentionContext ec, DiscourseModel dm) 62 | { 63 | return (null); 64 | } 65 | } 66 | } -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Resolver/PerfectResolver.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: bc15b3064cd00324bbe433098aded20d 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Resolver/PluralNounResolver.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 1e57ff55fc4497c4dae9a57891aa6fe8 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Resolver/PluralPronounResolver.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 70186c41077040345a647feef1a726ab 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Resolver/ProperNounResolver.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 58ab4bca16f7c1f4c94eb9d953eeb64d 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Resolver/ResolverMode.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: da3de1298a742624bab14dc557ecd035 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Resolver/SingletonNonReferentialResolver.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: f7ccf420c322a2e46ab3e58abdc27717 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Resolver/SingularPronounResolver.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 77c9b863ad1932d4f9046ae4007be7da 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Resolver/SpeechPronounResolver.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: bca6b5c16eddfef4784fb2419880bbef 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 69ab0a5e30123a24ca1b65ff64a228ad 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/Context.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: e19d481c1d7c7a9449537f3fd98a4511 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/Gender.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the Gender.java source file found in the 18 | //original java implementation of OpenNLP. 19 | 20 | using System; 21 | 22 | namespace Voxell.NLP.Coreference.Similarity 23 | { 24 | public class Gender 25 | { 26 | private GenderEnum mType; 27 | private double mConfidence; 28 | 29 | public virtual GenderEnum Type 30 | { 31 | get 32 | { 33 | return mType; 34 | } 35 | } 36 | 37 | public virtual double Confidence 38 | { 39 | get 40 | { 41 | return mConfidence; 42 | } 43 | } 44 | 45 | public Gender(GenderEnum type, double confidence) 46 | { 47 | mType = type; 48 | mConfidence = confidence; 49 | } 50 | } 51 | } -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/Gender.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 30ebf59422da0f34cbe23094028f5f72 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/GenderEnum.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 311105159393aa049886e189f323d328 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/GenderModel.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 8eb464d15f0a2994c94896e4d92e5ae7 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/ITestGenderModel.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the TestGenderModel.java source file found in the 18 | //original java implementation of OpenNLP. That source file contains the following header: 19 | 20 | //Copyright (C) 2003 Thomas Morton 21 | // 22 | //This library is free software; you can redistribute it and/or 23 | //modify it under the terms of the GNU Lesser General Public 24 | //License as published by the Free Software Foundation; either 25 | //version 2.1 of the License, or (at your option) any later version. 26 | // 27 | //This library is distributed in the hope that it will be useful, 28 | //but WITHOUT ANY WARRANTY; without even the implied warranty of 29 | //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 | //GNU Lesser General Public License for more details. 31 | // 32 | //You should have received a copy of the GNU Lesser General Public 33 | //License along with this program; if not, write to the Free Software 34 | //Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 35 | 36 | using System; 37 | namespace Voxell.NLP.Coreference.Similarity 38 | { 39 | 40 | /// Interface for testing a gender model. 41 | public interface ITestGenderModel 42 | { 43 | int MaleIndex 44 | { 45 | get; 46 | } 47 | 48 | int FemaleIndex 49 | { 50 | get; 51 | } 52 | 53 | int NeuterIndex 54 | { 55 | get; 56 | } 57 | 58 | double[] GenderDistribution(Context nounPhrase); 59 | } 60 | } -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/ITestGenderModel.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 5fdbd01e0f0833645a4f94e2c0912e00 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/ITestNumberModel.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the TestNumberModel.java source file found in the 18 | //original java implementation of OpenNLP. That source file contains the following header: 19 | 20 | //Copyright (C) 2003 Thomas Morton 21 | // 22 | //This library is free software; you can redistribute it and/or 23 | //modify it under the terms of the GNU Lesser General Public 24 | //License as published by the Free Software Foundation; either 25 | //version 2.1 of the License, or (at your option) any later version. 26 | // 27 | //This library is distributed in the hope that it will be useful, 28 | //but WITHOUT ANY WARRANTY; without even the implied warranty of 29 | //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 | //GNU Lesser General Public License for more details. 31 | // 32 | //You should have received a copy of the GNU Lesser General Public 33 | //License along with this program; if not, write to the Free Software 34 | //Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 35 | 36 | using System; 37 | namespace Voxell.NLP.Coreference.Similarity 38 | { 39 | /// 40 | /// Interface for testing a number model. 41 | /// 42 | public interface ITestNumberModel 43 | { 44 | int SingularIndex 45 | { 46 | get; 47 | } 48 | 49 | int PluralIndex 50 | { 51 | get; 52 | } 53 | 54 | double[] NumberDistribution(Context nounPhrase); 55 | } 56 | } -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/ITestNumberModel.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 8361e80019521974eb7d9f1f7530c43e 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/ITestSimilarityModel.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the TestSimilarityModel.java source file found in the 18 | //original java implementation of OpenNLP. That source file contains the following header: 19 | 20 | //Copyright (C) 2003 Thomas Morton 21 | // 22 | //This library is free software; you can redistribute it and/or 23 | //modify it under the terms of the GNU Lesser General Public 24 | //License as published by the Free Software Foundation; either 25 | //version 2.1 of the License, or (at your option) any later version. 26 | // 27 | //This library is distributed in the hope that it will be useful, 28 | //but WITHOUT ANY WARRANTY; without even the implied warranty of 29 | //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 | //GNU Lesser General Public License for more details. 31 | // 32 | //You should have received a copy of the GNU Lesser General Public 33 | //License along with this program; if not, write to the Free Software 34 | //Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 35 | 36 | using System; 37 | 38 | namespace Voxell.NLP.Coreference.Similarity 39 | { 40 | /// 41 | /// Interface for testing a similarity model. 42 | /// 43 | public interface ITestSimilarityModel 44 | { 45 | double AreCompatible(Context firstNounPhrase, Context secondNounPhrase); 46 | } 47 | } -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/ITestSimilarityModel.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 43b8558ac70e2b74ab654be90e973c04 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/ITrainSimilarityModel.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the TrainSimilarityModel.java source file found in the 18 | //original java implementation of OpenNLP. That source file contains the following header: 19 | 20 | //Copyright (C) 2003 Thomas Morton 21 | // 22 | //This library is free software; you can redistribute it and/or 23 | //modify it under the terms of the GNU Lesser General Public 24 | //License as published by the Free Software Foundation; either 25 | //version 2.1 of the License, or (at your option) any later version. 26 | // 27 | //This library is distributed in the hope that it will be useful, 28 | //but WITHOUT ANY WARRANTY; without even the implied warranty of 29 | //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 | //GNU Lesser General Public License for more details. 31 | // 32 | //You should have received a copy of the GNU Lesser General Public 33 | //License along with this program; if not, write to the Free Software 34 | //Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 35 | 36 | using System; 37 | 38 | namespace Voxell.NLP.Coreference.Similarity 39 | { 40 | /// 41 | /// Interface for training a similarity, gender, or number model. 42 | /// 43 | public interface ITrainSimilarityModel 44 | { 45 | /// 46 | /// Creates similarity training pairs based on the specified extents. 47 | /// Extents are considered compatible if they are in the same coreference chain, 48 | /// have the same named-entity tag, or share a common head word. Incompatible extents are chosen at random 49 | /// from the set of extents which don't meet this criteria. 50 | /// 51 | /// 52 | /// 53 | void SetExtents(Context[] extents); 54 | 55 | void TrainModel(); 56 | } 57 | } -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/ITrainSimilarityModel.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 16345df693e1d164d8d67ba1debc176d 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/MaximumEntropyCompatibilityModel.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: b0a135a7fae4fb94db0ae61ee49376a9 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/Number.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the Number.java source file found in the 18 | //original java implementation of OpenNLP. 19 | 20 | using System; 21 | 22 | namespace Voxell.NLP.Coreference.Similarity 23 | { 24 | public class Number 25 | { 26 | private NumberEnum mType; 27 | private double mConfidence; 28 | 29 | public virtual NumberEnum Type 30 | { 31 | get 32 | { 33 | return mType; 34 | } 35 | } 36 | 37 | public virtual double Confidence 38 | { 39 | get 40 | { 41 | return mConfidence; 42 | } 43 | } 44 | 45 | public Number(NumberEnum type, double confidence) 46 | { 47 | mType = type; 48 | mConfidence = confidence; 49 | } 50 | } 51 | } -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/Number.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: a80e876f428130941ac4a2d66e3069e7 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/NumberEnum.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the NumberEnum.java source file found in the 18 | //original java implementation of OpenNLP. That source file contains the following header: 19 | 20 | //Copyright (C) 2003 Thomas Morton 21 | // 22 | //This library is free software; you can redistribute it and/or 23 | //modify it under the terms of the GNU Lesser General Public 24 | //License as published by the Free Software Foundation; either 25 | //version 2.1 of the License, or (at your option) any later version. 26 | // 27 | //This library is distributed in the hope that it will be useful, 28 | //but WITHOUT ANY WARRANTY; without even the implied warranty of 29 | //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 | //GNU Lesser General Public License for more details. 31 | // 32 | //You should have received a copy of the GNU Lesser General Public 33 | //License along with this program; if not, write to the Free Software 34 | //Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 35 | 36 | using System; 37 | namespace Voxell.NLP.Coreference.Similarity 38 | { 39 | 40 | /// Enumeration of number types. 41 | public class NumberEnum 42 | { 43 | private string mName; 44 | 45 | private NumberEnum(string name) 46 | { 47 | mName = name; 48 | } 49 | 50 | public override string ToString() 51 | { 52 | return mName; 53 | } 54 | 55 | /// 56 | /// Singular number type. 57 | /// 58 | public static readonly NumberEnum Singular = new NumberEnum("singular"); 59 | /// 60 | /// Plural number type. 61 | /// 62 | public static readonly NumberEnum Plural = new NumberEnum("plural"); 63 | /// 64 | /// Unknown number type. 65 | /// 66 | public static readonly NumberEnum Unknown = new NumberEnum("unknown"); 67 | } 68 | } -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/NumberEnum.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 2e04e29c9ce407e4ebda205dda21ac6e 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/NumberModel.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 246cf1df59e754d4c92f5532019883a5 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/SemanticCompatibility.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the SemanticCompatibility.java source file found in the 18 | //original java implementation of OpenNLP. 19 | 20 | using System; 21 | namespace Voxell.NLP.Coreference.Similarity 22 | { 23 | public class SemanticCompatibility 24 | { 25 | private SemanticEnum mType; 26 | private double mConfidence; 27 | 28 | public virtual SemanticEnum Type 29 | { 30 | get 31 | { 32 | return mType; 33 | } 34 | } 35 | 36 | public virtual double Confidence 37 | { 38 | get 39 | { 40 | return mConfidence; 41 | } 42 | } 43 | 44 | public SemanticCompatibility(SemanticEnum type, double confidence) 45 | { 46 | mType = type; 47 | mConfidence = confidence; 48 | } 49 | } 50 | } -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/SemanticCompatibility.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: c4ced8c0e814ae64a9bc318b37d546b2 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/SemanticEnum.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the SemanticEnum.java source file found in the 18 | //original java implementation of OpenNLP. 19 | 20 | using System; 21 | namespace Voxell.NLP.Coreference.Similarity 22 | { 23 | 24 | public class SemanticEnum 25 | { 26 | private string mCompatibility; 27 | 28 | private SemanticEnum(string compatibility) 29 | { 30 | mCompatibility = compatibility; 31 | } 32 | 33 | public override string ToString() 34 | { 35 | return mCompatibility; 36 | } 37 | 38 | /// 39 | /// Semantically compatible. 40 | /// 41 | public static readonly SemanticEnum Compatible = new SemanticEnum("compatible"); 42 | 43 | /// 44 | /// Semantically incompatible. 45 | /// 46 | public static readonly SemanticEnum Incompatible = new SemanticEnum("incompatible"); 47 | 48 | /// 49 | /// Semantic compatibility Unknown. 50 | /// 51 | public static readonly SemanticEnum Unknown = new SemanticEnum("unknown"); 52 | 53 | 54 | } 55 | } -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/SemanticEnum.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: e5f3bd0bc9e2aae4dbd98dd15c6882d2 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/Similarity/SimilarityModel.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 7815d0a1e618fed479bef7aadaf8f985 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Coreference/TreebankLinker.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 2b3fc782d54e789419f3ec1d156a1a77 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Featuring.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: d492d5e436713044fa69d34ee73774e5 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Featuring/IFeatureExtractor.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | 4 | namespace Voxell.NLP.Featuring 5 | { 6 | public interface IFeatureExtractor 7 | { 8 | /// 9 | /// Feature dimension size 10 | /// 11 | int Dimension { get; set; } 12 | 13 | /// 14 | /// The whole corpus 15 | /// 16 | List Sentences { get; set; } 17 | 18 | /// 19 | /// Feature names 20 | /// 21 | List Features { get; set; } 22 | 23 | /// 24 | /// All words and frequency 25 | /// 26 | List> Dictionary { get; set; } 27 | 28 | /// 29 | /// Vectorize sentence 30 | /// 31 | void Vectorize(List features); 32 | 33 | /// 34 | /// Array shape 35 | /// 36 | //Shape Shape { get; set; } 37 | 38 | /// 39 | /// Pre-trained model file path 40 | /// 41 | string ModelFile { get; set; } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Featuring/IFeatureExtractor.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: e011d3c8c04cd4c459098a76948d5290 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Featuring/TfIdfFeatureExtractor.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: bacec3257f7b6554096b44bd6c604827 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Featuring/Word2VecFeatureExtractor.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | //using Bigtree.Algorithm.Matrix; 5 | using Txt2Vec; 6 | 7 | namespace Voxell.NLP.Featuring 8 | { 9 | public class Word2VecFeatureExtractor : IFeatureExtractor 10 | { 11 | public int Dimension { get; set; } 12 | public List Sentences { get; set; } 13 | public List> Dictionary { get; set; } 14 | public List Features { get; set; } 15 | //public Shape Shape { get; set; } 16 | public VectorGenerator Vg { get; set; } 17 | public int SentenceVectorSize { get; set; } 18 | public string ModelFile { get; set; } 19 | 20 | public void Vectorize(List features) 21 | { 22 | Init(); 23 | 24 | Sentences.ForEach(s => { 25 | List wordLemmas = new List(); 26 | s.words.ForEach(word => { 27 | if (features.Contains(word.lemma)) 28 | wordLemmas.Add(word.lemma); 29 | }); 30 | Vec sentenceVec = Vg.Sent2Vec(wordLemmas); 31 | 32 | s.vector = sentenceVec.VecNodes.ToArray(); 33 | }); 34 | 35 | } 36 | 37 | private void Init() 38 | { 39 | if(Vg == null) 40 | { 41 | Args args = new Args(); 42 | args.ModelFile = ModelFile; 43 | Vg = new VectorGenerator(args); 44 | SentenceVectorSize = this.Vg.Model.VectorSize; 45 | Features = new List(); 46 | for (int i = 0; i < SentenceVectorSize; i++) 47 | Features.Add($"f-{i}"); 48 | } 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Featuring/Word2VecFeatureExtractor.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 75c6a31dde7725d4aa81b907df3fce04 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/NameFind.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: dfe560d3365469b4da72cc8feb98a5ea 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/NameFind/DefaultNameContextGenerator.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 2b2247cd0f3ed134ca6ddbd31d4c35d5 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/NameFind/EnglishNameFinder.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 9109597d74dfef64b9461e5d392c2173 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/NameFind/INameContextGenerator.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 936dc3ea0d8bc944db9a6aa253c2d3de 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/NameFind/INameFinder.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 6c69c020dc9264940b3ce04c5c06a3d3 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/NameFind/MaximumEntropyNameFinder.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 9131ee5bcbda7e64fb2aeb6a0202fa06 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/NameFind/NameFinderEventReader.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: c72322ec5e1a17d4a9ec611d5e5627e4 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Parser.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 0cc2dfb7294c1664d9de15e746747548 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Parser/BuildContextGenerator.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: abce6c2605e44be46a7b8405c7f1927d 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Parser/CheckContextGenerator.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 18667b41671527d46b605f6a63afe5c5 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Parser/ChunkContextGenerator.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 1a70c92aadbf8f341888e66530ff19da 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Parser/EnglishHeadRules.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: e5d5a57142a2ec1439de5ec22e27652c 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Parser/EnglishTreebankParser.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 24ab222b63c7179479b6667bfadcda18 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Parser/IHeadRules.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2005 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the HeadRules.java source file found in the 18 | //original java implementation of OpenNLP. That source file contains the following header: 19 | 20 | //Copyright (C) 2004 Thomas Morton 21 | // 22 | //This library is free software; you can redistribute it and/or 23 | //modify it under the terms of the GNU Lesser General Public 24 | //License as published by the Free Software Foundation; either 25 | //version 2.1 of the License, or (at your option) any later version. 26 | // 27 | //This library is distributed in the hope that it will be useful, 28 | //but WITHOUT ANY WARRANTY; without even the implied warranty of 29 | //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 | //GNU Lesser General Public License for more details. 31 | // 32 | //You should have received a copy of the GNU Lesser General Public 33 | //License along with this program; if not, write to the Free Software 34 | //Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 35 | 36 | using System; 37 | 38 | namespace Voxell.NLP.Parser 39 | { 40 | /// 41 | /// Interface for encoding the head rules associated with parsing. 42 | /// 43 | public interface IHeadRules 44 | { 45 | /// 46 | /// Returns the head constituent for the specified constituents of the specified type. 47 | /// 48 | /// 49 | /// The constituents which make up a constituent of the specified type. 50 | /// 51 | /// 52 | /// The type of a constituent which is made up of the specifed constituents. 53 | /// 54 | /// 55 | /// The constituent which is the head. 56 | /// 57 | Parse GetHead(Parse[] constituents, string type); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Parser/IHeadRules.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 4e6fc0930d01dcb49b172ba6c985370b 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Parser/IParserChunker.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 88dbaaec4be065a4883058b3e16d8eba 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Parser/IParserTagger.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2005 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the ParserTagger.java source file found in the 18 | //original java implementation of OpenNLP. That source file contains the following header: 19 | 20 | //Copyright (C) 2003 Thomas Morton 21 | // 22 | //This library is free software; you can redistribute it and/or 23 | //modify it under the terms of the GNU Lesser General Public 24 | //License as published by the Free Software Foundation; either 25 | //version 2.1 of the License, or (at your option) any later version. 26 | // 27 | //This library is distributed in the hope that it will be useful, 28 | //but WITHOUT ANY WARRANTY; without even the implied warranty of 29 | //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 | //GNU Lesser General Public License for more details. 31 | // 32 | //You should have received a copy of the GNU Lesser General Public 33 | //License along with this program; if not, write to the Free Software 34 | //Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 35 | 36 | using System; 37 | using System.Collections; 38 | 39 | namespace Voxell.NLP.Parser 40 | { 41 | /// 42 | /// Interface that a pos-tagger used by the parser must implement. 43 | /// 44 | public interface IParserTagger : PosTagger.IPosTagger 45 | { 46 | Util.Sequence[] TopKSequences(ArrayList sentence); 47 | Util.Sequence[] TopKSequences(string[] sentence); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Parser/IParserTagger.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: d3ba9308583e8184f945904a4eb96364 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Parser/MaximumEntropyParser.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 31d406b093248ea48aa6e0c9cb691547 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Parser/Parse.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: bb5bda6e2d99f1a48a409f219609a020 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Parser/ParserEventReader.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 68de4da893048c949b6b5577dcdc453c 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/PosTagger.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: ff93a774677a4ab4e934c2535b5d88dd 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/PosTagger/DefaultPosContextGenerator.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 2ff109cdba3ad0440a97e07235f47770 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/PosTagger/EnglishMaximumEntropyPosTagger.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 6b104094da4a2ec4e84d0d2ea755348c 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/PosTagger/IPosContextGenerator.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2005 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the DefaultPOSContextGenerator.java source file found in the 18 | //original java implementation of OpenNLP. That source file contains the following header: 19 | 20 | // Copyright (C) 2002 Jason Baldridge and Gann Bierner 21 | // 22 | // This library is free software; you can redistribute it and/or 23 | // modify it under the terms of the GNU Lesser General Public 24 | // License as published by the Free Software Foundation; either 25 | // version 2.1 of the License, or (at your option) any later version. 26 | // 27 | // This library is distributed in the hope that it will be useful, 28 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 29 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 | // GNU Lesser General Public License for more details. 31 | // 32 | // You should have received a copy of the GNU Lesser General Public 33 | // License along with this program; if not, write to the Free Software 34 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 35 | 36 | using System; 37 | using System.Collections; 38 | 39 | namespace Voxell.NLP.PosTagger 40 | { 41 | /// 42 | /// The interface for a context generator for the POS Tagger. 43 | /// 44 | public interface IPosContextGenerator : Util.IBeamSearchContextGenerator 45 | { 46 | new string[] GetContext(int position, object[] tokens, string[] previousTags, object[] additionalContext); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/PosTagger/IPosContextGenerator.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 2b0663ebab2dcb147bea560a53c4f11c 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/PosTagger/IPosTagger.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2005 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the POSTagger.java source file found in the 18 | //original java implementation of OpenNLP. 19 | 20 | using System; 21 | using System.Collections; 22 | 23 | namespace Voxell.NLP.PosTagger 24 | { 25 | /// 26 | /// The interface for part of speech taggers. 27 | /// 28 | public interface IPosTagger 29 | { 30 | /// 31 | /// Assigns the sentence of tokens pos tags. 32 | /// 33 | /// 34 | /// The sentence of tokens to be tagged. 35 | /// 36 | /// 37 | /// a list of pos tags for each token provided in sentence. 38 | /// 39 | ArrayList Tag(ArrayList tokens); 40 | 41 | /// 42 | /// Assigns the sentence of tokens pos tags. 43 | /// 44 | /// The sentence of tokens to be tagged. 45 | /// 46 | /// 47 | /// an array of pos tags for each token provided in sentence. 48 | /// 49 | string[] Tag(string[] tokens); 50 | 51 | /// 52 | /// Assigns pos tags to the sentence of space-delimited tokens. 53 | /// 54 | /// 55 | /// The sentence of space-delimited tokens to be tagged. 56 | /// 57 | /// 58 | /// a string of space-delimited pos tags for each token provided in sentence. 59 | /// 60 | string TagSentence(string sentence); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/PosTagger/IPosTagger.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: b5a347589b4b7f84bb0e592b0bb4e90b 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/PosTagger/MaximumEntropyPosTagger.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 10a02a56aa2153b4e9b47e22d4407607 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/PosTagger/PosEventReader.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 68bce0d5c92312748bd89e07fd5a0af4 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/PosTagger/PosLookupList.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: b3fcbf19abe69f84883752739bf800d0 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/PosTagger/PosLookupListWriter.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: a8fa7197d139a4440b60f5af00169c46 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Sentence.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using Voxell.NLP.Tokenize; 3 | using Voxell.NLP.PosTagger; 4 | using Voxell.NLP.Stem; 5 | 6 | namespace Voxell.NLP 7 | { 8 | [System.Serializable] 9 | public class Sentence 10 | { 11 | public Sentence( 12 | string text, 13 | string label, 14 | ITokenizer tokenizer, 15 | IPosTagger posTagger, 16 | IStemmer stemmer 17 | ) 18 | { 19 | this.text = text; 20 | this.label = label; 21 | string[] tokens = tokenizer.Tokenize(text); 22 | string[] tags = posTagger.Tag(tokens); 23 | 24 | words = new List(); 25 | for (int t=0; t < tokens.Length; t++) 26 | words.Add(new Token(tokens[t], tags[t], ref stemmer)); 27 | } 28 | 29 | public string text; 30 | public string label; 31 | public List words; 32 | public double[] vector; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Sentence.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: e8d48a62926b8ac4d94b80d921e2d3a2 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/SentenceDetect.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 2a708b4971b35c140ad28daeb5a84105 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/SentenceDetect/DefaultEndOfSentenceScanner.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 952db1aa9b8515d41bce7afa0508c746 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/SentenceDetect/EnglishMaximumEntropySentenceDetector.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2005 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the EnglishSentenceDetectorME.java source file found in the 18 | //original java implementation of OpenNLP. That source file contains the following header: 19 | 20 | // Copyright (C) 2004 Jason Baldridge, Gann Bierner and Tom Morton 21 | // 22 | // This library is free software; you can redistribute it and/or 23 | // modify it under the terms of the GNU Lesser General Public 24 | // License as published by the Free Software Foundation; either 25 | // version 2.1 of the License, or (at your option) any later version. 26 | // 27 | // This library is distributed in the hope that it will be useful, 28 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 29 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 | // GNU Lesser General Public License for more details. 31 | // 32 | // You should have received a copy of the GNU Lesser General Public 33 | // License along with this program; if not, write to the Free Software 34 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 35 | 36 | using System; 37 | 38 | namespace Voxell.NLP.SentenceDetect 39 | { 40 | /// 41 | /// A sentence detector which uses a model trained on English data (Wall Street 42 | /// Journal text). 43 | /// 44 | public class EnglishMaximumEntropySentenceDetector : MaximumEntropySentenceDetector 45 | { 46 | /// 47 | /// Constructor which loads the English sentence detection model 48 | /// transparently. 49 | /// 50 | public EnglishMaximumEntropySentenceDetector(string name) : base(new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(name))) 51 | { 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/SentenceDetect/EnglishMaximumEntropySentenceDetector.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 2d894379c7ec40244866ccf61df4f936 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/SentenceDetect/IEndOfSentenceScanner.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: c596e26b13a92874a9292baf6ef0681c 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/SentenceDetect/ISentenceDectector.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: d55210b749549d14d961b5b2c64047d6 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/SentenceDetect/MaximumEntropySentenceDetector.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 6176b30e93be5354c91c0f43902c2772 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/SentenceDetect/SentenceDetectionContextGenerator.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: c820b044d88dd7340995f80610cec42c 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/SentenceDetect/SentenceDetectionEvent.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2005 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the SDEvent.java source file found in the 18 | //original java implementation of OpenNLP. That source file contains the following header: 19 | 20 | // Copyright (c) 2001, Eric D. Friedman All Rights Reserved. 21 | // 22 | // This library is free software; you can redistribute it and/or 23 | // modify it under the terms of the GNU Lesser General Public 24 | // License as published by the Free Software Foundation; either 25 | // version 2.1 of the License, or (at your option) any later version. 26 | // 27 | // This library is distributed in the hope that it will be useful, 28 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 29 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 | // GNU Lesser General Public License for more details. 31 | // 32 | // You should have received a copy of the GNU Lesser General Public 33 | // License along with this program; if not, write to the Free Software 34 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 35 | 36 | using System; 37 | 38 | namespace Voxell.NLP.SentenceDetect 39 | { 40 | /// 41 | /// An Event which can hold a pointer to another Event for use in a 42 | /// linked list. 43 | /// 44 | public class SentenceDetectionEvent : SharpEntropy.TrainingEvent 45 | { 46 | private SentenceDetectionEvent mNextEvent; 47 | 48 | internal SentenceDetectionEvent NextEvent 49 | { 50 | get 51 | { 52 | return mNextEvent; 53 | } 54 | set 55 | { 56 | mNextEvent = value; 57 | } 58 | } 59 | 60 | /// 61 | /// package access only 62 | /// 63 | internal SentenceDetectionEvent(string outcome, string[] context) : base(outcome, context) 64 | { 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/SentenceDetect/SentenceDetectionEvent.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 47e30b7b5f47edb4db51afd58520fe6c 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/SentenceDetect/SentenceDetectionEventReader.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 5b76225499fb9be4ea65b1aa14783405 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Stem.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 83f8930248c24ea43a76d57fd0dbb4d5 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Stem/IStemmer.cs: -------------------------------------------------------------------------------- 1 | namespace Voxell.NLP.Stem 2 | { 3 | /// 4 | /// Stemmer is used to remove morphological affixes from words, leaving only the word stem. 5 | /// Stemming algorithms aim to remove those affixes leaving only the stem of the word. 6 | /// IStemmer defines a standard interface for stemmers. 7 | /// 8 | public interface IStemmer 9 | { 10 | /// 11 | /// Strip affixes from the token and return the stem. 12 | /// 13 | string Stem(string word); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Stem/IStemmer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 07248b4328531d147a4f4480eafe244d 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Stem/RegexStemmer.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * CherubNLP Library 3 | * Copyright (C) 2018 Haiping Chen 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | */ 18 | 19 | using System.Collections.Generic; 20 | using System.Linq; 21 | using System.Text.RegularExpressions; 22 | 23 | namespace Voxell.NLP.Stem 24 | { 25 | /// 26 | /// A stemmer that uses regular expressions to identify morphological affixes. 27 | /// Any substrings that match the regular expressions will be removed. 28 | /// 29 | public class RegexStemmer : IStemmer 30 | { 31 | private string _pattern; 32 | private Regex _regex; 33 | private Dictionary replacements = new Dictionary(); 34 | 35 | public void CreatePattern(string pattern=null) 36 | { 37 | if (string.IsNullOrEmpty(_pattern)) 38 | { 39 | // replacements["nning"] = "n"; // running 40 | // replacements["pping"] = "p"; // skipping 41 | // replacements["tting"] = "t"; // putting 42 | // replacements["ing"] = ""; 43 | replacements["am"] = "be"; 44 | replacements["is"] = "be"; 45 | replacements["are"] = "be"; 46 | replacements["was"] = "be"; 47 | replacements["were"] = "be"; 48 | 49 | _pattern = string.Join("$|", replacements.Keys) + "$"; 50 | } 51 | 52 | _regex = new Regex(_pattern); 53 | } 54 | 55 | public string Stem(string word) 56 | { 57 | Match match = _regex.Matches(word).Cast().FirstOrDefault(); 58 | return match == null ? word : word.Substring(0, match.Index) + replacements[match.Value]; 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Stem/RegexStemmer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: be67206b2e69a3d4a83984382689ff83 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Token.cs: -------------------------------------------------------------------------------- 1 | using System.Text.RegularExpressions; 2 | using Voxell.NLP.Stem; 3 | 4 | namespace Voxell.NLP 5 | { 6 | [System.Serializable] 7 | public class Token 8 | { 9 | public Token(string text, string tag, ref IStemmer stemmer) 10 | { 11 | this.text = text; 12 | this.tag = tag; 13 | this.lemma = stemmer.Stem(text); 14 | } 15 | /// 16 | /// The original word text. 17 | /// 18 | public string text; 19 | 20 | /// 21 | /// Part-of-speech tag. 22 | /// https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html 23 | /// 24 | public string tag; 25 | 26 | /// 27 | /// The base form of the word. 28 | /// 29 | public string lemma; 30 | 31 | /// 32 | /// Is the token an alpha character? 33 | /// 34 | public bool IsAlpha 35 | { 36 | get 37 | { 38 | return Regex.IsMatch(text, @"^[a-zA-Z]+|[\u4e00-\u9fa5]+$"); 39 | } 40 | } 41 | 42 | public double vector; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Token.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: f9c10d794a4d59c4d97a57c680dad414 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Tokenize.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: cae27ad5699510243b3343c76253854b 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Tokenize/EnglishMaximumEntropyTokenizer.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2005 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the EnglishTokenizerME.java source file found in the 18 | //original java implementation of OpenNLP. That source file contains the following header: 19 | 20 | // Copyright (C) 2004 Jason Baldridge, Gann Bierner, and Tom Morton 21 | // 22 | // This library is free software; you can redistribute it and/or 23 | // modify it under the terms of the GNU Lesser General Public 24 | // License as published by the Free Software Foundation; either 25 | // version 2.1 of the License, or (at your option) any later version. 26 | // 27 | // This library is distributed in the hope that it will be useful, 28 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 29 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 | // GNU Lesser General Public License for more details. 31 | // 32 | // You should have received a copy of the GNU Lesser General Public 33 | // License along with this program; if not, write to the Free Software 34 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 35 | 36 | using System; 37 | 38 | namespace Voxell.NLP.Tokenize 39 | { 40 | /// 41 | /// A tokenizer which uses default English data for the maximum entropy model. 42 | /// 43 | public class EnglishMaximumEntropyTokenizer : MaximumEntropyTokenizer 44 | { 45 | public EnglishMaximumEntropyTokenizer(string name) : base(new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(name))) 46 | { 47 | AlphaNumericOptimization = true; 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Tokenize/EnglishMaximumEntropyTokenizer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 3edcef84a557a39478fb911770b44eb4 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Tokenize/ITokenizer.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2005 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the Tokenizer.java source file found in the 18 | //original java implementation of OpenNLP. That source file contains the following header: 19 | 20 | // Copyright (C) 2002 Jason Baldridge and Gann Bierner 21 | // 22 | // This library is free software; you can redistribute it and/or 23 | // modify it under the terms of the GNU Lesser General Public 24 | // License as published by the Free Software Foundation; either 25 | // version 2.1 of the License, or (at your option) any later version. 26 | // 27 | // This library is distributed in the hope that it will be useful, 28 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 29 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 | // GNU Lesser General Public License for more details. 31 | // 32 | // You should have received a copy of the GNU Lesser General Public 33 | // License along with this program; if not, write to the Free Software 34 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 35 | 36 | using System; 37 | 38 | namespace Voxell.NLP.Tokenize 39 | { 40 | /// 41 | /// The interface for tokenizers, which turn messy text into nicely segmented 42 | /// text tokens. 43 | /// 44 | public interface ITokenizer 45 | { 46 | /// 47 | /// Tokenize a string. 48 | /// 49 | /// 50 | /// The string to be tokenized. 51 | /// 52 | /// 53 | /// The string[] with the individual tokens as the array 54 | /// elements. 55 | /// 56 | string[] Tokenize(string input); 57 | 58 | /// 59 | /// Tokenize a string. 60 | /// 61 | /// 62 | /// The string to be tokenized. 63 | /// 64 | /// 65 | /// The Span[] with the spans (offsets into input) for each 66 | /// token as the individuals array elements. 67 | /// 68 | Util.Span[] TokenizePositions(string input); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Tokenize/ITokenizer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 19b53205a905eea4681fcfeb02da22aa 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Tokenize/MaximumEntropyTokenizer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 0361e38e91f81ef40968e3d9d20ea1d8 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Tokenize/TokenContextGenerator.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: ee040969f6edbf843b3c46ecc8fc4dfe 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Tokenize/TokenEventReader.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: f837451df275b0740ba9c4e91890fb8a 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Tokenize/TokenSpanEventReader.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 01582425076703940961e67f623d26f4 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Txt2Vec.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 242dfbf36a1e4e347bc673d5a140221d 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Txt2Vec/Decoder.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 26f98c2bdf4c9374e8e7faa68ee8a025 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Txt2Vec/Encoder.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: baa97969c2c1115408d12f4d6a1a4b8f 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Txt2Vec/Model.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: e356049bcafa479408e7d2ae91099dce 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Txt2Vec/OneHotEncoder.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Linq; 3 | 4 | namespace Voxell.NLP.Txt2Vec 5 | { 6 | /// 7 | /// A one hot encoding is a representation of categorical variables as binary vectors. 8 | /// Each integer value is represented as a binary vector that is all zero values except the index of the integer, which is marked with a 1. 9 | /// 10 | public class OneHotEncoder 11 | { 12 | public List Sentences { get; set; } 13 | 14 | public List words { get; set; } 15 | 16 | public void Encode(Sentence sentence) 17 | { 18 | InitDictionary(); 19 | 20 | var vector = words.Select(x => 0D).ToArray(); 21 | 22 | sentence.words.ForEach(w => 23 | { 24 | int index = words.IndexOf(w.lemma); 25 | if(index > 0) 26 | vector[index] = 1; 27 | }); 28 | 29 | sentence.vector = vector; 30 | } 31 | 32 | public List EncodeAll() 33 | { 34 | InitDictionary(); 35 | Sentences.ForEach(sent => Encode(sent)); 36 | //Parallel.ForEach(Sentences, sent => Encode(sent)); 37 | 38 | return words; 39 | } 40 | 41 | private List InitDictionary() 42 | { 43 | if (words == null) 44 | { 45 | words = new List(); 46 | Sentences.ForEach(x => 47 | { 48 | words.AddRange(x.words.Where(w => w.IsAlpha).Select(w => w.lemma)); 49 | }); 50 | words = words.Distinct().OrderBy(x => x).ToList(); 51 | } 52 | 53 | return words; 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Txt2Vec/OneHotEncoder.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: eb447cbc1ed4cd245bfaeab69b20351b 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Txt2Vec/Shrink.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.IO; 3 | using UnityEngine; 4 | 5 | namespace Txt2Vec 6 | { 7 | public class Shrink 8 | { 9 | public void Run(string strModelFileName, string strNewModelFileName, string strDictFileName) 10 | { 11 | string strLine = null; 12 | 13 | //Load lexical dictionary 14 | // Logger.WriteLine("Load lexical dictionary..."); 15 | StreamReader sr = new StreamReader(strDictFileName); 16 | HashSet setTerm = new HashSet(); 17 | while ((strLine = sr.ReadLine()) != null) 18 | { 19 | string[] items = strLine.Split('\t'); 20 | setTerm.Add(items[0]); 21 | } 22 | sr.Close(); 23 | 24 | 25 | //Load raw model 26 | // Logger.WriteLine("Loading raw model..."); 27 | sr = new StreamReader(strModelFileName); 28 | BinaryReader br = new BinaryReader(sr.BaseStream); 29 | 30 | int words = br.ReadInt32(); 31 | int size = br.ReadInt32(); 32 | int vqSize = br.ReadInt32(); 33 | 34 | if (vqSize != 0) 35 | { 36 | Debug.LogWarning("Currently, we don't support to shrink vector quantization model."); 37 | return; 38 | } 39 | 40 | Dictionary vocab = new Dictionary(); 41 | Dictionary rev_vocab = new Dictionary(); 42 | List termList = new List(); 43 | double []M = new double[words * size]; 44 | 45 | int newwords = 0; 46 | for (int b = 0; b < words; b++) 47 | { 48 | string strTerm = br.ReadString(); 49 | if (setTerm.Contains(strTerm) == true) 50 | { 51 | termList.Add(strTerm); 52 | for (int a = 0; a < size; a++) 53 | M[a + newwords * size] = br.ReadSingle(); 54 | newwords++; 55 | } 56 | else 57 | { 58 | //Skip the vectors of this word 59 | for (int a = 0; a < size; a++) 60 | { 61 | br.ReadSingle(); 62 | } 63 | } 64 | } 65 | sr.Close(); 66 | 67 | //Save the shrinked model 68 | // Logger.WriteLine("Saving shrinked model..."); 69 | StreamWriter sw = new StreamWriter(strNewModelFileName); 70 | BinaryWriter bw = new BinaryWriter(sw.BaseStream); 71 | 72 | bw.Write(newwords); 73 | bw.Write(size); 74 | bw.Write(vqSize); 75 | 76 | for (int i = 0; i < newwords; i++) 77 | { 78 | bw.Write(termList[i]); 79 | for (int j = 0; j < size; j++) 80 | bw.Write((float)M[j + i * size]); 81 | } 82 | sw.Close(); 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Txt2Vec/Shrink.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: de1a6ef6ad44a5e45a743f7ce421b7b4 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Txt2Vec/VectorGenerator.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 6299411ee2c2aa0419418e60956699b9 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: a20804290a2019c4c926f595053358cf 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/BeamSearch.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 9560637d79f40774998ed1fb6ebf5e02 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/Cache.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 3dc63fcd64b99b749b11bee31f595f4a 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/CollectionEventStream.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2006 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the CollectionEventStream.java source file found in the 18 | //original java implementation of OpenNLP. That source file contains the following header: 19 | 20 | //Copyright (C) 2004 Thomas Morton 21 | // 22 | //This library is free software; you can redistribute it and/or 23 | //modify it under the terms of the GNU Lesser General Public 24 | //License as published by the Free Software Foundation; either 25 | //version 2.1 of the License, or (at your option) any later version. 26 | // 27 | //This library is distributed in the hope that it will be useful, 28 | //but WITHOUT ANY WARRANTY; without even the implied warranty of 29 | //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 | //GNU General Public License for more details. 31 | // 32 | //You should have received a copy of the GNU Lesser General Public 33 | //License along with this program; if not, write to the Free Software 34 | //Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 35 | 36 | using System; 37 | using System.Collections; 38 | 39 | namespace Voxell.NLP.Util 40 | { 41 | /// 42 | /// Creates an event stream out of a collection of events. 43 | /// 44 | public class CollectionEventReader : SharpEntropy.ITrainingEventReader 45 | { 46 | private IEnumerator mCollection; 47 | 48 | public CollectionEventReader(ICollection c) 49 | { 50 | mCollection = c.GetEnumerator(); 51 | } 52 | 53 | #region ITrainingEventReader Members 54 | 55 | public virtual bool HasNext() 56 | { 57 | return mCollection.MoveNext(); 58 | } 59 | 60 | public SharpEntropy.TrainingEvent ReadNextEvent() 61 | { 62 | return (SharpEntropy.TrainingEvent)mCollection.Current; 63 | } 64 | 65 | #endregion 66 | } 67 | } -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/CollectionEventStream.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 3f5d6351452e74d4d88449f81492d30f 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/CountedSet.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: ad635ca8bbeb7c44d8dd14d7d6ffaa70 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/HashList.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 49e99aa86675fde4d8efbe5027997443 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/HashSet.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: d0ba914ae7b910d428d7937f80317459 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/IBeamSearchContextGenerator.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: d3ec184f7f5f167478c2bd66c2aa68a4 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/IHeap.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: a056339bfa8738449a2bb970c5341e4b 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/ListHeap.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: f5d49b251f1f41c43b7a2ec623cca064 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/Pair.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: f3d897966c37ffe418f74a3e543bdfdb 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/ReverseListIterator.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 2d361964ad7b63249b2e541d6b86a795 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/Sequence.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 343a917a20e0aa849931c5e327520b43 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/Set.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: b056ea408df09ad4f809aeabb29174f9 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/SortedSet.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 9d116955c57de5a4ab529500662487a5 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/Span.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 9cfe96f5a066b9249bd9b8d04f1a23e5 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/StringTokenizer.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2005 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | using System; 18 | 19 | namespace Voxell.NLP.Util 20 | { 21 | /// 22 | /// Class providing simple tokenization of a string, for manipulation. 23 | /// For NLP tokenizing, see the Voxell.NLP.Tokenize namespace. 24 | /// 25 | public class StringTokenizer 26 | { 27 | private const string mDelimiters = " \t\n\r"; //The tokenizer uses the default delimiter set: the space character, the tab character, the newline character, and the carriage-return character 28 | private string[] mTokens; 29 | int mPosition; 30 | 31 | /// 32 | /// Initializes a new class instance with a specified string to process 33 | /// 34 | /// 35 | /// String to tokenize 36 | /// 37 | public StringTokenizer(string input) : this(input, mDelimiters.ToCharArray()) 38 | { 39 | } 40 | 41 | public StringTokenizer(string input, string separators) : this(input, separators.ToCharArray()) 42 | { 43 | } 44 | 45 | public StringTokenizer(string input, params char[] separators) 46 | { 47 | mTokens = input.Split(separators); 48 | mPosition = 0; 49 | } 50 | 51 | public string NextToken() 52 | { 53 | while (mPosition < mTokens.Length) 54 | { 55 | if ((mTokens[mPosition].Length > 0)) 56 | { 57 | return mTokens[mPosition++]; 58 | } 59 | mPosition++; 60 | } 61 | return null; 62 | } 63 | 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/StringTokenizer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 369c806178c109244821ae7651ec0824 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/TreeHeap.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2005 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | //This file is based on the TreeHeap.java source file found in the 18 | //original java implementation of OpenNLP. 19 | 20 | using System; 21 | 22 | namespace Voxell.NLP.Util 23 | { 24 | /// 25 | /// An implemention of the heap interface based on SortedSet. 26 | /// This implementation will not allow multiple objects which are equal to be added to the heap. 27 | /// Only use this implementation when object in the heap can be totally ordered (no duplicates). 28 | /// 29 | public class TreeHeap : IHeap 30 | { 31 | 32 | private SortedSet mTree; 33 | 34 | /// Creates a new tree heap. 35 | public TreeHeap() 36 | { 37 | mTree = new TreeSet(); 38 | } 39 | 40 | /// 41 | /// Creates a new tree heap of the specified size. 42 | /// 43 | /// 44 | /// The size of the new tree heap. 45 | /// 46 | public TreeHeap(int size) 47 | { 48 | mTree = new TreeSet(); 49 | } 50 | 51 | public virtual T Extract() 52 | { 53 | T extracted = mTree.First(); 54 | mTree.Remove(extracted); 55 | return extracted; 56 | } 57 | 58 | public virtual T Top 59 | { 60 | get 61 | { 62 | return mTree.First(); 63 | } 64 | } 65 | 66 | public virtual void Add(T input) 67 | { 68 | mTree.Add(input); 69 | } 70 | 71 | public void Sort() 72 | { 73 | mTree.Sort(); 74 | } 75 | 76 | public virtual int Size 77 | { 78 | get 79 | { 80 | return mTree.Count; 81 | } 82 | } 83 | 84 | public virtual void Clear() 85 | { 86 | mTree.Clear(); 87 | } 88 | 89 | public virtual bool IsEmpty 90 | { 91 | get 92 | { 93 | return mTree.IsEmpty(); 94 | } 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/TreeHeap.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 4d2e2ee21a1da0c4bb81d501938d6eee 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/TreeSet.cs: -------------------------------------------------------------------------------- 1 | //Copyright (C) 2005 Richard J. Northedge 2 | // 3 | // This library is free software; you can redistribute it and/or 4 | // modify it under the terms of the GNU Lesser General Public 5 | // License as published by the Free Software Foundation; either 6 | // version 2.1 of the License, or (at your option) any later version. 7 | // 8 | // This library is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public 14 | // License along with this program; if not, write to the Free Software 15 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 | 17 | using System; 18 | using System.Collections.Generic; 19 | 20 | namespace Voxell.NLP.Util 21 | { 22 | /// 23 | /// This class manages a tree set collection of sorted elements. 24 | /// 25 | public class TreeSet : SortedSet 26 | { 27 | /// 28 | /// Creates a new TreeSet. 29 | /// 30 | public TreeSet() 31 | { 32 | } 33 | 34 | /// 35 | /// Create a new TreeSet with a specific collection. 36 | /// 37 | /// 38 | /// The collection used to initialize the TreeSet 39 | /// 40 | public TreeSet(ICollection collection): base(collection) 41 | { 42 | } 43 | 44 | /// 45 | /// Creates a copy of the TreeSet. 46 | /// 47 | /// A copy of the TreeSet. 48 | public virtual object TreeSetClone() 49 | { 50 | TreeSet internalClone = new TreeSet(); 51 | internalClone.AddAll(this); 52 | return internalClone; 53 | } 54 | 55 | /// 56 | /// Retrieves the number of elements contained in the set. 57 | /// 58 | /// 59 | /// An integer value that represent the number of element in the set. 60 | /// 61 | public virtual int Size() 62 | { 63 | return this.Count; 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/Util/TreeSet.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 97523c81e3a5533418a93543a250145b 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/VX.NLP.Runtime.VoxellNLP.asmdef: -------------------------------------------------------------------------------- 1 | { 2 | "name": "VX.NLP.Runtime.VoxellNLP", 3 | "rootNamespace": "", 4 | "references": [ 5 | "GUID:4cb1ae9a716e8e447bc1a8026800340d", 6 | "GUID:094afe385ce3df645871cfee687d8406", 7 | "GUID:72163bce151732d47ae6c627dcb5e477", 8 | "GUID:b23efe0d0bd83184681bd63517e3c327" 9 | ], 10 | "includePlatforms": [], 11 | "excludePlatforms": [], 12 | "allowUnsafeCode": false, 13 | "overrideReferences": false, 14 | "precompiledReferences": [], 15 | "autoReferenced": true, 16 | "defineConstraints": [], 17 | "versionDefines": [], 18 | "noEngineReferences": false 19 | } -------------------------------------------------------------------------------- /Runtime/VoxellNLP/VX.NLP.Runtime.VoxellNLP.asmdef.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 5d052c2f1dea4e64ab062f48c70faf0e 3 | AssemblyDefinitionImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /Runtime/VoxellNLP/csc.rsp: -------------------------------------------------------------------------------- 1 | -r:System.Configuration.dll -------------------------------------------------------------------------------- /Runtime/VoxellNLP/csc.rsp.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 41a96fba3f8d4674884d8376bc711958 3 | DefaultImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /Samples~/NLPBasicExamples/Scenes.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 229f10eb2b694cc46b246a54ba69b215 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Samples~/NLPBasicExamples/Scenes/NLPBasicExamples.unity.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 951b0bdccb09fba49843e2e8e8033d08 3 | DefaultImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /Samples~/NLPBasicExamples/Scripts.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: f0eec459df8e0e44da02b4aaa9f6fab5 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Samples~/NLPBasicExamples/Scripts/NLPNaiveBayesClassifier.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 603ac9302b602ab47bfdff54edb8cb33 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Samples~/NLPBasicExamples/Scripts/NLPNamedEntityRecognition.cs: -------------------------------------------------------------------------------- 1 | using UnityEngine; 2 | using Voxell; 3 | using Voxell.NLP.NameFind; 4 | using Voxell.Inspector; 5 | 6 | public class NLPNamedEntityRecognition : MonoBehaviour 7 | { 8 | [StreamingAssetFolderPath] public string nameFinderModel; 9 | [TextArea(1, 5)] public string sentence; 10 | public string[] models = new string[] 11 | { "date", "location", "money", "organization", "percentage", "person", "time" }; 12 | [TextArea(1, 5), InspectOnly] public string ner; 13 | 14 | private EnglishNameFinder nameFinder; 15 | 16 | [Button] 17 | public void Recognize() 18 | { 19 | nameFinder = new EnglishNameFinder(FileUtilx.GetStreamingAssetFilePath(nameFinderModel)); 20 | ner = nameFinder.GetNames(models, sentence); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /Samples~/NLPBasicExamples/Scripts/NLPNamedEntityRecognition.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 3c0f953702583594abe30862e5e953c2 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Samples~/NLPBasicExamples/Scripts/NLPPOSTagger.cs: -------------------------------------------------------------------------------- 1 | using UnityEngine; 2 | using Voxell; 3 | using Voxell.NLP.PosTagger; 4 | using Voxell.NLP.Tokenize; 5 | using Voxell.Inspector; 6 | 7 | public class NLPPOSTagger : MonoBehaviour 8 | { 9 | [StreamingAssetFilePath] public string tokenizerModel; 10 | [StreamingAssetFilePath] public string posTaggerModel; 11 | [StreamingAssetFilePath] public string tagDict; 12 | [TextArea(1, 5)] public string sentence; 13 | public string[] tokens; 14 | public string[] posTags; 15 | 16 | private EnglishMaximumEntropyTokenizer tokenizer; 17 | private EnglishMaximumEntropyPosTagger posTagger; 18 | 19 | [Button] 20 | public void Tag() 21 | { 22 | // link to POS tags meanings: https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html 23 | tokenizer = new EnglishMaximumEntropyTokenizer(FileUtilx.GetStreamingAssetFilePath(tokenizerModel)); 24 | posTagger = new EnglishMaximumEntropyPosTagger( 25 | FileUtilx.GetStreamingAssetFilePath(posTaggerModel), 26 | FileUtilx.GetStreamingAssetFilePath(tagDict)); 27 | 28 | tokens = tokenizer.Tokenize(sentence); 29 | posTags = posTagger.Tag(tokens); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /Samples~/NLPBasicExamples/Scripts/NLPPOSTagger.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: bd6cc2e7a6e781447a13ceb477deb0d3 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Samples~/NLPBasicExamples/Scripts/NLPRegexStemmer.cs: -------------------------------------------------------------------------------- 1 | using UnityEngine; 2 | using Voxell; 3 | using Voxell.NLP.Stem; 4 | using Voxell.NLP.Tokenize; 5 | using Voxell.Inspector; 6 | 7 | public class NLPRegexStemmer : MonoBehaviour 8 | { 9 | [StreamingAssetFilePath] public string tokenizerModel; 10 | [TextArea(1, 5)] public string sentence; 11 | public string[] tokens; 12 | public string[] stemmedTokens; 13 | 14 | private EnglishMaximumEntropyTokenizer tokenizer; 15 | private RegexStemmer regexStemmer; 16 | 17 | [Button] 18 | public void Stem() 19 | { 20 | tokenizer = new EnglishMaximumEntropyTokenizer(FileUtilx.GetStreamingAssetFilePath(tokenizerModel)); 21 | regexStemmer = new RegexStemmer(); 22 | regexStemmer.CreatePattern(); 23 | 24 | // tokenize 25 | tokens = tokenizer.Tokenize(sentence); 26 | stemmedTokens = new string[tokens.Length]; 27 | // stem 28 | for (int t=0; t < tokens.Length; t++) 29 | stemmedTokens[t] = regexStemmer.Stem(tokens[t]); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /Samples~/NLPBasicExamples/Scripts/NLPRegexStemmer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: a4176bb1cfbee084a9b233cb7b3d141f 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Samples~/NLPBasicExamples/Scripts/NLPSentenceSplitter.cs: -------------------------------------------------------------------------------- 1 | using UnityEngine; 2 | using Voxell; 3 | using Voxell.NLP.SentenceDetect; 4 | using Voxell.Inspector; 5 | 6 | public class NLPSentenceSplitter : MonoBehaviour 7 | { 8 | [StreamingAssetFilePath] public string splitterModel; 9 | [TextArea(1, 5)] public string paragraph; 10 | [TextArea(1, 3)] public string[] sentences; 11 | 12 | private EnglishMaximumEntropySentenceDetector sentenceDetector; 13 | 14 | [Button] 15 | void SplitSentence() 16 | { 17 | sentenceDetector = new EnglishMaximumEntropySentenceDetector(FileUtilx.GetStreamingAssetFilePath(splitterModel)); 18 | sentences = sentenceDetector.SentenceDetect(paragraph); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /Samples~/NLPBasicExamples/Scripts/NLPSentenceSplitter.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: c8670187ce37de8469dad64ccff13a70 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Samples~/NLPBasicExamples/Scripts/NLPTokenizer.cs: -------------------------------------------------------------------------------- 1 | using UnityEngine; 2 | using Voxell; 3 | using Voxell.NLP.Tokenize; 4 | using Voxell.Inspector; 5 | 6 | public class NLPTokenizer : MonoBehaviour 7 | { 8 | [StreamingAssetFilePath] public string tokenizerModel; 9 | [TextArea(1, 5)] public string sentence; 10 | public string[] tokens; 11 | 12 | private EnglishMaximumEntropyTokenizer tokenizer; 13 | 14 | [Button] 15 | public void Tokenize() 16 | { 17 | tokenizer = new EnglishMaximumEntropyTokenizer(FileUtilx.GetStreamingAssetFilePath(tokenizerModel)); 18 | tokens = tokenizer.Tokenize(sentence); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /Samples~/NLPBasicExamples/Scripts/NLPTokenizer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: abd901a4e916695479450c1794fd95eb 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /docs~/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs~/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs~/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | import sphinx_rtd_theme 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'UnityNLP' 21 | copyright = '2021, Nixon' 22 | author = 'Nixon' 23 | 24 | # The full version, including alpha/beta/rc tags 25 | release = '1.1.0' 26 | 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | ] 35 | 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ['_templates'] 38 | 39 | # List of patterns, relative to source directory, that match files and 40 | # directories to ignore when looking for source files. 41 | # This pattern also affects html_static_path and html_extra_path. 42 | exclude_patterns = [] 43 | 44 | 45 | # -- Options for HTML output ------------------------------------------------- 46 | 47 | # The theme to use for HTML and HTML Help pages. See the documentation for 48 | # a list of builtin themes. 49 | # 50 | html_theme = 'sphinx_rtd_theme' 51 | 52 | # Add any paths that contain custom static files (such as style sheets) here, 53 | # relative to this directory. They are copied after the builtin static files, 54 | # so a file named "default.css" will overwrite the builtin "default.css". 55 | html_static_path = ['_static'] -------------------------------------------------------------------------------- /docs~/source/index.rst: -------------------------------------------------------------------------------- 1 | UnityNLP 2 | ~~~~~~~~ 3 | 4 | Introduction 5 | ------------ 6 | 7 | This package references 2 repositories, `OpenNLP `_ and `CherubNLP `_. UnityNLP is a collection of natural language processing tools written in C# that is targeted towards the Unity Engine. Currently it provides the following NLP tools: 8 | 9 | - tokenizer 10 | - sentence splitter 11 | - part-of-speech tagger 12 | - chunker (used to "find non-recursive syntactic annotations such as noun phrase chunks") 13 | - parser 14 | - name finder 15 | - coreference tool 16 | - interface to the WordNet lexical database 17 | - topical classifier 18 | 19 | Installation 20 | ------------ 21 | 22 | This package depends on the `UnityUtil `_ package and the `UnityAI `_ package. 23 | 24 | 1. Clone the `UnityUtil `_ repository into your ``Packages`` folder. 25 | 2. Clone the `UnityAI `_ repository into your ``Packages`` folder. 26 | 3. Clone this repository into your Packages folder. 27 | 4. Download all `essential models `_ and import them into the project. 28 | 5. Place the models in the StreamingAssets folder. 29 | 6. And you are ready to go! 30 | 31 | .. toctree:: 32 | :maxdepth: 2 33 | :caption: Contents: 34 | 35 | tokenizer 36 | sentence_splitter 37 | pos_tagger 38 | named_entity_recognition 39 | classifier 40 | 41 | 42 | .. Indices and tables 43 | .. ================== 44 | 45 | .. * :ref:`genindex` 46 | .. * :ref:`modindex` 47 | .. * :ref:`search` 48 | -------------------------------------------------------------------------------- /docs~/source/named_entity_recognition.rst: -------------------------------------------------------------------------------- 1 | Named Entity Recognition 2 | ~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | A Named Entity Recognition (NER) identifies key information in a sentence. These detected entities will also be classified into a category. 5 | 6 | English Name Finder 7 | ------------------- 8 | 9 | Example 10 | ======= 11 | 12 | In this example, a pretrained model that is capable of identifying 7 categories is used to *"recognize"* a sentence. 13 | 14 | .. code-block:: csharp 15 | 16 | using UnityEngine; 17 | using Voxell; 18 | using Voxell.NLP.NameFind; 19 | using Voxell.Inspector; 20 | 21 | public class NLPNamedEntityRecognition : MonoBehaviour 22 | { 23 | [StreamingAssetFolderPath] public string nameFinderModel; 24 | [TextArea(1, 5)] public string sentence; 25 | public string[] models = new string[] 26 | { "date", "location", "money", "organization", "percentage", "person", "time" }; 27 | [TextArea(1, 5), InspectOnly] public string ner; 28 | 29 | private EnglishNameFinder nameFinder; 30 | 31 | [Button] 32 | public void Recognize() 33 | { 34 | nameFinder = new EnglishNameFinder(FileUtil.GetStreamingAssetFilePath(nameFinderModel)); 35 | ner = nameFinder.GetNames(models, sentence); 36 | } 37 | } 38 | 39 | .. image:: ../../Pictures~/NamedEntityRecognitionExample.png 40 | :alt: TokenizerExample 41 | -------------------------------------------------------------------------------- /docs~/source/pos_tagger.rst: -------------------------------------------------------------------------------- 1 | POS Tagger 2 | ~~~~~~~~~~ 3 | 4 | A part-of-speech tagger takes in a word and determines what part-of-speech it is. 5 | 6 | English Maximum Entropy Pos Tagger 7 | ---------------------------------- 8 | 9 | Example 10 | ======= 11 | 12 | In this example, the sentence is first being tokenized using the ``EnglishMaximumEntropyTokenizer`` into individual tokens (words). Each of these tokens are then being tagged via a pretrained model. 13 | 14 | You can find the full list of part-of-speech tags along with their meanings `here `_. 15 | 16 | .. code-block:: csharp 17 | 18 | using UnityEngine; 19 | using Voxell; 20 | using Voxell.NLP.PosTagger; 21 | using Voxell.NLP.Tokenize; 22 | using Voxell.Inspector; 23 | 24 | public class NLPPOSTagger : MonoBehaviour 25 | { 26 | [StreamingAssetFilePath] public string tokenizerModel; 27 | [StreamingAssetFilePath] public string posTaggerModel; 28 | [StreamingAssetFilePath] public string tagDict; 29 | [TextArea(1, 5)] public string sentence; 30 | public string[] tokens; 31 | public string[] posTags; 32 | 33 | private EnglishMaximumEntropyTokenizer tokenizer; 34 | private EnglishMaximumEntropyPosTagger posTagger; 35 | 36 | [Button] 37 | public void Tag() 38 | { 39 | // link to POS tags meanings: https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html 40 | tokenizer = new EnglishMaximumEntropyTokenizer(FileUtil.GetStreamingAssetFilePath(tokenizerModel)); 41 | posTagger = new EnglishMaximumEntropyPosTagger( 42 | FileUtil.GetStreamingAssetFilePath(posTaggerModel), 43 | FileUtil.GetStreamingAssetFilePath(tagDict)); 44 | 45 | tokens = tokenizer.Tokenize(sentence); 46 | posTags = posTagger.Tag(tokens); 47 | } 48 | } 49 | 50 | .. image:: ../../Pictures~/POSTaggerExample.png 51 | :alt: TokenizerExample 52 | -------------------------------------------------------------------------------- /docs~/source/sentence_splitter.rst: -------------------------------------------------------------------------------- 1 | Sentence Splitter 2 | ~~~~~~~~~~~~~~~~~ 3 | 4 | A sentence splitter splits a paragraph into individual sentences. 5 | 6 | English Maximum Entropy Sentence Detector 7 | ----------------------------------------- 8 | 9 | Example 10 | ======= 11 | 12 | In this example, a pretrained model is used to split the paragraph into 3 individual sentences. 13 | 14 | **Note:** 15 | One might think that it is easy to split sentences. For example, we could just detect full stops and split the text based on the location of the full stop in the sentence. This however will not always work. If you look at the sentences below closely, you will notice that there are multiple occurrence of the word **"Mr."** which consists of a full stop in itself. An accurate sentence splitter must be able to identify it. 16 | 17 | .. code-block:: csharp 18 | 19 | using UnityEngine; 20 | using Voxell; 21 | using Voxell.NLP.SentenceDetect; 22 | using Voxell.Inspector; 23 | 24 | 25 | public class NLPSentenceSplitter : MonoBehaviour 26 | { 27 | [StreamingAssetFilePath] public string splitterModel; 28 | [TextArea(1, 5)] public string paragraph; 29 | [TextArea(1, 3)] public string[] sentences; 30 | 31 | private EnglishMaximumEntropySentenceDetector sentenceDetector; 32 | 33 | [Button] 34 | void SplitSentence() 35 | { 36 | sentenceDetector = new EnglishMaximumEntropySentenceDetector(FileUtil.GetStreamingAssetFilePath(splitterModel)); 37 | sentences = sentenceDetector.SentenceDetect(paragraph); 38 | } 39 | } 40 | 41 | .. image:: ../../Pictures~/SentenceSplitterExample.png 42 | :alt: TokenizerExample 43 | -------------------------------------------------------------------------------- /docs~/source/tokenizer.rst: -------------------------------------------------------------------------------- 1 | Tokenizer 2 | ~~~~~~~~~ 3 | 4 | A tokenizer's goal is to detect individual words in a sentence and split them up into individual tokens. This functionality is the core of almost all NLP task. 5 | 6 | English Maximum Entropy Tokenizer 7 | --------------------------------- 8 | 9 | Example 10 | ======= 11 | 12 | In this example, a pretrained model is used to tokenize a fairly complex sentence which consists of symbols and punctuations that we rarely see in normal sentences. 13 | 14 | .. code-block:: csharp 15 | 16 | using UnityEngine; 17 | using Voxell; 18 | using Voxell.NLP.Tokenize; 19 | using Voxell.Inspector; 20 | 21 | public class NLPTokenizer : MonoBehaviour 22 | { 23 | [StreamingAssetFilePath] public string tokenizerModel; 24 | [TextArea(1, 5)] public string sentence; 25 | public string[] tokens; 26 | 27 | private EnglishMaximumEntropyTokenizer tokenizer; 28 | 29 | [Button] 30 | public void Tokenize() 31 | { 32 | tokenizer = new EnglishMaximumEntropyTokenizer(FileUtil.GetStreamingAssetFilePath(tokenizerModel)); 33 | tokens = tokenizer.Tokenize(sentence); 34 | } 35 | } 36 | 37 | .. image:: ../../Pictures~/TokenizerExample.png 38 | :alt: TokenizerExample 39 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "voxell.nlp", 3 | "displayName": "VX NLP", 4 | "author": "Nixon", 5 | "description": "Natural Language Processing in Unity.", 6 | "keywords": [ 7 | "NLP", 8 | "Natural Language Processing", 9 | "voxell" 10 | ], 11 | "license": "Apache 2.0", 12 | "unity": "2020.4", 13 | "unityRelease": "0f1", 14 | "version": "1.2.0", 15 | "samples": [ 16 | { 17 | "displayName": "NLP Bsaic Examples", 18 | "description": "Some example scripts to go through the basics of this package.", 19 | "path": "Samples~/NLPBasicExamples" 20 | } 21 | ], 22 | "dependencies": { 23 | "voxell.util": "1.1.0", 24 | "voxell.ai": "1.0.0", 25 | "com.unity.nuget.newtonsoft-json": "2.0.0" 26 | } 27 | } -------------------------------------------------------------------------------- /package.json.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 69706cf31a0a00e45bcc45dc18cf66a5 3 | PackageManifestImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | --------------------------------------------------------------------------------