├── sample-android ├── .gitignore ├── src │ ├── main │ │ ├── res │ │ │ ├── values │ │ │ │ ├── strings.xml │ │ │ │ ├── colors.xml │ │ │ │ ├── dimens.xml │ │ │ │ └── styles.xml │ │ │ ├── mipmap-hdpi │ │ │ │ └── ic_launcher.png │ │ │ ├── mipmap-mdpi │ │ │ │ └── ic_launcher.png │ │ │ ├── mipmap-xhdpi │ │ │ │ └── ic_launcher.png │ │ │ ├── mipmap-xxhdpi │ │ │ │ └── ic_launcher.png │ │ │ ├── mipmap-xxxhdpi │ │ │ │ └── ic_launcher.png │ │ │ ├── values-w820dp │ │ │ │ └── dimens.xml │ │ │ └── layout │ │ │ │ └── activity_main.xml │ │ ├── java │ │ │ └── com │ │ │ │ └── sixthsolution │ │ │ │ └── apex │ │ │ │ └── sample │ │ │ │ └── android │ │ │ │ ├── ApexApplication.java │ │ │ │ └── MainActivity.java │ │ └── AndroidManifest.xml │ ├── test │ │ └── java │ │ │ └── com │ │ │ └── sixthsolution │ │ │ └── apex │ │ │ └── sample │ │ │ └── android │ │ │ └── ExampleUnitTest.java │ └── androidTest │ │ └── java │ │ └── com │ │ └── sixthsolution │ │ └── apex │ │ └── sample │ │ └── android │ │ └── ExampleInstrumentedTest.java ├── proguard-rules.pro └── build.gradle ├── apex ├── gradle.properties ├── src │ ├── main │ │ └── java │ │ │ └── com │ │ │ └── sixthsolution │ │ │ └── apex │ │ │ ├── nlp │ │ │ ├── event │ │ │ │ ├── SeekBy.java │ │ │ │ ├── Extractor.java │ │ │ │ ├── EventDetector.java │ │ │ │ ├── StandardLocationExtractor.java │ │ │ │ ├── EventBuilder.java │ │ │ │ └── StandardEventDetector.java │ │ │ ├── ner │ │ │ │ ├── Entity.java │ │ │ │ ├── Chunker.java │ │ │ │ ├── regex │ │ │ │ │ ├── ChunkDetectionFilter.java │ │ │ │ │ ├── RegExChunker.java │ │ │ │ │ └── ChunkDetector.java │ │ │ │ ├── Label.java │ │ │ │ └── ChunkedPart.java │ │ │ ├── tagger │ │ │ │ ├── Tagger.java │ │ │ │ ├── TaggedWord.java │ │ │ │ ├── TaggedWords.java │ │ │ │ └── StandardTagger.java │ │ │ ├── tokenization │ │ │ │ ├── Tokenizer.java │ │ │ │ └── StandardTokenizer.java │ │ │ ├── parser │ │ │ │ ├── Parser.java │ │ │ │ └── StandardParserBase.java │ │ │ ├── util │ │ │ │ ├── Pair.java │ │ │ │ ├── NumericUtils.java │ │ │ │ └── Triple.java │ │ │ └── dict │ │ │ │ ├── TagValue.java │ │ │ │ ├── DictionaryBuilder.java │ │ │ │ ├── Dictionary.java │ │ │ │ ├── Tag.java │ │ │ │ └── Tags.java │ │ │ ├── model │ │ │ ├── Frequency.java │ │ │ ├── WeekDay.java │ │ │ ├── Recurrence.java │ │ │ └── Event.java │ │ │ └── Apex.java │ └── test │ │ └── java │ │ └── com │ │ └── sixthsolution │ │ └── apex │ │ └── nlp │ │ └── test │ │ ├── TagAssertion.java │ │ ├── TaggerAssertion.java │ │ ├── ChunkerAssertion.java │ │ ├── TokenizerAssertion.java │ │ ├── ChunkAssertion.java │ │ └── ChunkDetectorAssertion.java └── build.gradle ├── dfalex ├── gradle.properties ├── build.gradle ├── src │ ├── main │ │ └── java │ │ │ ├── backport │ │ │ └── java │ │ │ │ └── util │ │ │ │ └── function │ │ │ │ ├── Objects.java │ │ │ │ ├── Consumer.java │ │ │ │ ├── IntConsumer.java │ │ │ │ ├── BiConsumer.java │ │ │ │ ├── ObjIntConsumer.java │ │ │ │ └── Function.java │ │ │ └── com │ │ │ └── nobigsoftware │ │ │ ├── dfalex │ │ │ ├── SafeAppendable.java │ │ │ ├── ReplacementSelector.java │ │ │ ├── DfaTransitionConsumer.java │ │ │ ├── RawDfa.java │ │ │ ├── DfaStateImpl.java │ │ │ ├── DfaAmbiguityResolver.java │ │ │ ├── DfaStateInfo.java │ │ │ ├── StringReplacement.java │ │ │ ├── PrimeSizeFinder.java │ │ │ ├── BitUtils.java │ │ │ ├── SerializableDfa.java │ │ │ ├── NfaTransition.java │ │ │ ├── IntRangeClosureQueue.java │ │ │ ├── Matchable.java │ │ │ ├── DfaState.java │ │ │ ├── DfaStatePlaceholder.java │ │ │ ├── IntListKey.java │ │ │ ├── DfaAmbiguityException.java │ │ │ ├── StringMatchIterator.java │ │ │ ├── StringReplacements.java │ │ │ └── StringReplaceAppendable.java │ │ │ └── util │ │ │ ├── BuilderCache.java │ │ │ └── SHAOutputStream.java │ └── test │ │ ├── resources │ │ └── By3Test.out.txt │ │ └── java │ │ └── com │ │ └── nobigsoftware │ │ └── dfalex │ │ ├── BitUtilTest.java │ │ ├── JavaTest.java │ │ ├── StringMatcherTest.java │ │ ├── By3Test.java │ │ ├── ReverseFinderTest.java │ │ ├── TestBase.java │ │ ├── BuilderCacheTest.java │ │ ├── RegexTest.java │ │ ├── RegexSpeedTest.java │ │ └── IntTest.java └── README.md ├── english-nlp ├── gradle.properties ├── src │ ├── main │ │ └── java │ │ │ └── com │ │ │ └── sixthsolution │ │ │ └── apex │ │ │ └── nlp │ │ │ └── english │ │ │ ├── filter │ │ │ ├── LocationDetectionFilter.java │ │ │ ├── DateDetectionFilter.java │ │ │ └── TimeDetectionFilter.java │ │ │ ├── EnglishTokenizer.java │ │ │ ├── EnglishParser.java │ │ │ ├── LocationDetector.java │ │ │ └── TimeDetector.java │ └── test │ │ └── java │ │ └── com │ │ └── sixthsolution │ │ └── apex │ │ └── nlp │ │ └── english │ │ └── test │ │ └── tokenization │ │ ├── EnglishDetectorTest.java │ │ ├── EnglishTaggerTest.java │ │ ├── EnglishTokenizationTest.java │ │ ├── LocationDetectorTest.java │ │ └── TimeDetectorTest.java └── build.gradle ├── logo.png ├── persian-nlp ├── gradle.properties ├── src │ ├── main │ │ └── java │ │ │ └── com │ │ │ └── sixthsolution │ │ │ └── apex │ │ │ └── nlp │ │ │ └── persian │ │ │ ├── PersianTagger.java │ │ │ ├── model │ │ │ ├── PersianExtractor.java │ │ │ └── PersianEvent.java │ │ │ ├── PersianTokenizer.java │ │ │ ├── filter │ │ │ ├── LocationDetectionFilter.java │ │ │ ├── DateDetectionFilter.java │ │ │ └── TimeDetectionFilter.java │ │ │ ├── event │ │ │ ├── PersianEventBuilder.java │ │ │ └── PersianRecurrence.java │ │ │ ├── PersianLocationDetector.java │ │ │ ├── PersianTimeDetector.java │ │ │ └── calendar │ │ │ └── tools │ │ │ └── JalaliCalendar.java │ └── test │ │ └── java │ │ └── com │ │ └── sixthsolution │ │ └── apex │ │ └── nlp │ │ └── persian │ │ └── test │ │ ├── PersianDetectorTest.java │ │ ├── PersianDLTest.java │ │ ├── PersianDTTest.java │ │ └── PersianTokenizationTest.java └── build.gradle ├── .gitignore ├── gradle.properties ├── pipeline-arch.png ├── gradle ├── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties └── jacoco.gradle ├── settings.gradle ├── .travis.yml ├── TODO ├── .script ├── deploy_artifacts.sh └── deploy_javadocs.sh └── gradlew.bat /sample-android/.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | -------------------------------------------------------------------------------- /apex/gradle.properties: -------------------------------------------------------------------------------- 1 | DESC=Apex NLP 2 | -------------------------------------------------------------------------------- /dfalex/gradle.properties: -------------------------------------------------------------------------------- 1 | DESC=dfalex backport 2 | -------------------------------------------------------------------------------- /english-nlp/gradle.properties: -------------------------------------------------------------------------------- 1 | DESC=English Event Parser for ApexNLP 2 | -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/6thsolution/ApexNLP/HEAD/logo.png -------------------------------------------------------------------------------- /persian-nlp/gradle.properties: -------------------------------------------------------------------------------- 1 | DESC=Persian Event Parser for ApexNLP 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .gradle 3 | /build 4 | */build 5 | *.iml 6 | local.properties -------------------------------------------------------------------------------- /gradle.properties: -------------------------------------------------------------------------------- 1 | GROUP=com.sixthsolution.apex 2 | VERSION_NAME=0.1.0-SNAPSHOT 3 | -------------------------------------------------------------------------------- /pipeline-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/6thsolution/ApexNLP/HEAD/pipeline-arch.png -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/6thsolution/ApexNLP/HEAD/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /sample-android/src/main/res/values/strings.xml: -------------------------------------------------------------------------------- 1 | 2 | ApexSample 3 | 4 | -------------------------------------------------------------------------------- /sample-android/src/main/res/mipmap-hdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/6thsolution/ApexNLP/HEAD/sample-android/src/main/res/mipmap-hdpi/ic_launcher.png -------------------------------------------------------------------------------- /sample-android/src/main/res/mipmap-mdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/6thsolution/ApexNLP/HEAD/sample-android/src/main/res/mipmap-mdpi/ic_launcher.png -------------------------------------------------------------------------------- /sample-android/src/main/res/mipmap-xhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/6thsolution/ApexNLP/HEAD/sample-android/src/main/res/mipmap-xhdpi/ic_launcher.png -------------------------------------------------------------------------------- /sample-android/src/main/res/mipmap-xxhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/6thsolution/ApexNLP/HEAD/sample-android/src/main/res/mipmap-xxhdpi/ic_launcher.png -------------------------------------------------------------------------------- /sample-android/src/main/res/mipmap-xxxhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/6thsolution/ApexNLP/HEAD/sample-android/src/main/res/mipmap-xxxhdpi/ic_launcher.png -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | //include ':sample-android' 2 | rootProject.name = 'ApexNLP' 3 | include 'apex' 4 | include 'english-nlp' 5 | include 'persian-nlp' 6 | include 'dfalex' 7 | 8 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/event/SeekBy.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.event; 2 | 3 | /** 4 | * @author Saeed Masoumi (s-masoumi@live.com) 5 | */ 6 | 7 | public enum SeekBy { 8 | MIN, HOUR 9 | } 10 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/ner/Entity.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.ner; 2 | 3 | /** 4 | * @author Saeed Masoumi (s-masoumi@live.com) 5 | */ 6 | 7 | public enum Entity { 8 | TIME, DATE, LOCATION, NONE 9 | } 10 | -------------------------------------------------------------------------------- /sample-android/src/main/res/values/colors.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | #3F51B5 4 | #303F9F 5 | #FF4081 6 | 7 | -------------------------------------------------------------------------------- /sample-android/src/main/res/values/dimens.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16dp 4 | 16dp 5 | 6 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/tagger/Tagger.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.tagger; 2 | 3 | /** 4 | * @author Saeed Masoumi (s-masoumi@live.com) 5 | */ 6 | 7 | public interface Tagger { 8 | TaggedWords tag(String[] tokenizedSentence); 9 | } 10 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Sun Jan 22 18:58:30 IRST 2017 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=https\://services.gradle.org/distributions/gradle-3.0-all.zip 7 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/tokenization/Tokenizer.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.tokenization; 2 | 3 | /** 4 | * @author Saeed Masoumi (s-masoumi@live.com) 5 | */ 6 | 7 | public interface Tokenizer { 8 | 9 | String[] tokenize(String sentence); 10 | 11 | } 12 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/ner/Chunker.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.ner; 2 | 3 | import com.sixthsolution.apex.nlp.tagger.TaggedWords; 4 | 5 | import java.util.List; 6 | 7 | /** 8 | * @author Saeed Masoumi (s-masoumi@live.com) 9 | */ 10 | 11 | public interface Chunker { 12 | 13 | List chunk(TaggedWords taggedWords); 14 | } 15 | -------------------------------------------------------------------------------- /dfalex/build.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: 'java' 2 | apply from: rootProject.file('gradle/jfrog-uploader.gradle') 3 | apply plugin: 'me.tatarka.retrolambda' 4 | 5 | sourceCompatibility = 1.8 6 | targetCompatibility = 1.8 7 | 8 | 9 | dependencies { 10 | testCompile group: 'junit', name: 'junit', version: '4.11' 11 | } 12 | 13 | test { 14 | exclude 'com/nobigsoftware/**' 15 | } 16 | 17 | -------------------------------------------------------------------------------- /dfalex/src/main/java/backport/java/util/function/Objects.java: -------------------------------------------------------------------------------- 1 | package backport.java.util.function; 2 | 3 | /** 4 | * @author Saeed Masoumi (s-masoumi@live.com) 5 | */ 6 | 7 | public class Objects { 8 | 9 | public static T requireNonNull(T obj) { 10 | if (obj == null) { 11 | throw new NullPointerException(); 12 | } 13 | return obj; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/parser/Parser.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.parser; 2 | 3 | import com.sixthsolution.apex.model.Event; 4 | 5 | import org.threeten.bp.LocalDateTime; 6 | 7 | /** 8 | * @author Saeed Masoumi (s-masoumi@live.com) 9 | */ 10 | 11 | public interface Parser { 12 | 13 | void initialize(); 14 | 15 | Event parse(LocalDateTime source, String sentence); 16 | } 17 | -------------------------------------------------------------------------------- /sample-android/src/main/res/values-w820dp/dimens.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 64dp 6 | 7 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/event/Extractor.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.event; 2 | 3 | import com.sixthsolution.apex.nlp.ner.ChunkedPart; 4 | 5 | import org.threeten.bp.LocalDateTime; 6 | 7 | /** 8 | * @author Saeed Masoumi (s-masoumi@live.com) 9 | */ 10 | 11 | public interface Extractor { 12 | void extract(EventBuilder builder, LocalDateTime source, ChunkedPart chunkedPart); 13 | } 14 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | 3 | jdk: 4 | - oraclejdk8 5 | 6 | cache: 7 | directories: 8 | - $HOME/.m2 9 | - $HOME/.gradle 10 | 11 | before_script: 12 | - chmod +x gradlew 13 | - chmod +x .script/deploy_artifacts.sh 14 | 15 | script: 16 | - ./gradlew clean build --info 17 | 18 | after_success: 19 | - bash <(curl -s https://codecov.io/bash) 20 | - .script/deploy_artifacts.sh 21 | 22 | notifications: 23 | email: false -------------------------------------------------------------------------------- /sample-android/src/main/res/values/styles.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/ner/regex/ChunkDetectionFilter.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.ner.regex; 2 | 3 | import com.sixthsolution.apex.nlp.ner.Label; 4 | import com.sixthsolution.apex.nlp.tagger.TaggedWords; 5 | 6 | /** 7 | * @author Saeed Masoumi (s-masoumi@live.com) 8 | */ 9 | 10 | public abstract class ChunkDetectionFilter { 11 | 12 | public abstract boolean accept(Label label, TaggedWords taggedWords, int startIndex, int endIndex); 13 | } 14 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/event/EventDetector.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.event; 2 | 3 | import com.sixthsolution.apex.model.Event; 4 | import com.sixthsolution.apex.nlp.ner.ChunkedPart; 5 | 6 | import org.threeten.bp.LocalDateTime; 7 | 8 | import java.util.List; 9 | 10 | /** 11 | * @author Saeed Masoumi (s-masoumi@live.com) 12 | */ 13 | 14 | public interface EventDetector { 15 | 16 | Event detect(LocalDateTime source, List chunkedParts); 17 | } 18 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/dfalex/SafeAppendable.java: -------------------------------------------------------------------------------- 1 | package com.nobigsoftware.dfalex; 2 | 3 | 4 | /** 5 | * A refinement of the {@link Appendable} interface that doesn't throw exceptions 6 | */ 7 | public interface SafeAppendable extends Appendable 8 | { 9 | @Override 10 | SafeAppendable append(char c); 11 | 12 | @Override 13 | SafeAppendable append(CharSequence csq, int start, int end); 14 | 15 | @Override 16 | SafeAppendable append(CharSequence csq); 17 | } 18 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/util/Pair.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.util; 2 | 3 | /** 4 | * @author Saeed Masoumi (s-masoumi@live.com) 5 | */ 6 | 7 | public class Pair { 8 | public final F first; 9 | public final S second; 10 | 11 | public Pair(F first, S second) { 12 | this.first = first; 13 | this.second = second; 14 | } 15 | 16 | @Override 17 | public String toString() { 18 | return "[" + first + "," + second + "]"; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | # English NLP 2 | 1. Support Go to [place name] 3 | 2. Support time like( half past ten, next hour) 4 | 3. Support 930am 5 | 6 | http://www.nltk.org/book/ch07.html 7 | http://www.slideshare.net/gagan1667/opennlp-demo 8 | https://github.com/stanfordnlp/CoreNLP/blob/master/src/edu/stanford/nlp/time/SUTime.java 9 | https://helpspot.readdle.com/calendars/index.php?pg=kb.page&id=429 10 | https://github.com/dstl/baleen/blob/master/baleen/baleen-annotators/src/main/java/uk/gov/dstl/baleen/annotators/regex/Date.java 11 | https://github.com/ahmetaa/zemberek-nlp -------------------------------------------------------------------------------- /apex/build.gradle: -------------------------------------------------------------------------------- 1 | apply from: rootProject.file('gradle/jfrog-uploader.gradle') 2 | apply from: rootProject.file('gradle/jacoco.gradle') 3 | 4 | configurations { 5 | testOutput 6 | } 7 | 8 | dependencies { 9 | testOutput sourceSets.test.output 10 | } 11 | 12 | dependencies { 13 | compile project(':dfalex') 14 | compileOnly group: 'org.threeten', name: 'threetenbp', version: '1.3.3' 15 | 16 | testCompile group: 'junit', name: 'junit', version: '4.11' 17 | testCompile group: 'org.threeten', name: 'threetenbp', version: '1.3.3' 18 | } 19 | -------------------------------------------------------------------------------- /sample-android/src/test/java/com/sixthsolution/apex/sample/android/ExampleUnitTest.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.sample.android; 2 | 3 | import org.junit.Test; 4 | 5 | import static org.junit.Assert.*; 6 | 7 | /** 8 | * Example local unit test, which will execute on the development machine (host). 9 | * 10 | * @see Testing documentation 11 | */ 12 | public class ExampleUnitTest { 13 | @Test 14 | public void addition_isCorrect() throws Exception { 15 | assertEquals(4, 2 + 2); 16 | } 17 | } -------------------------------------------------------------------------------- /dfalex/src/test/resources/By3Test.out.txt: -------------------------------------------------------------------------------- 1 | S0:true 2 | 0 -> S0:true 3 | 1 -> S1 4 | 2 -> S2 5 | 3 -> S0:true 6 | 4 -> S1 7 | 5 -> S2 8 | 6 -> S0:true 9 | 7 -> S1 10 | 8 -> S2 11 | 9 -> S0:true 12 | S1 13 | 0 -> S1 14 | 1 -> S2 15 | 2 -> S0:true 16 | 3 -> S1 17 | 4 -> S2 18 | 5 -> S0:true 19 | 6 -> S1 20 | 7 -> S2 21 | 8 -> S0:true 22 | 9 -> S1 23 | S2 24 | 0 -> S2 25 | 1 -> S0:true 26 | 2 -> S1 27 | 3 -> S2 28 | 4 -> S0:true 29 | 5 -> S1 30 | 6 -> S2 31 | 7 -> S0:true 32 | 8 -> S1 33 | 9 -> S2 34 | -------------------------------------------------------------------------------- /persian-nlp/src/main/java/com/sixthsolution/apex/nlp/persian/PersianTagger.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.persian; 2 | 3 | import com.sixthsolution.apex.nlp.dict.Dictionary; 4 | import com.sixthsolution.apex.nlp.tagger.StandardTagger; 5 | import com.sixthsolution.apex.nlp.tagger.TaggedWord; 6 | 7 | import java.util.ListIterator; 8 | 9 | /** 10 | * @author Saeed Masoumi (s-masoumi@live.com) 11 | */ 12 | //its test totally wont work 13 | class PersianTagger extends StandardTagger { 14 | public PersianTagger(Dictionary dictionary) { 15 | super(dictionary); 16 | } 17 | 18 | 19 | } 20 | -------------------------------------------------------------------------------- /persian-nlp/src/main/java/com/sixthsolution/apex/nlp/persian/model/PersianExtractor.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.persian.model; 2 | 3 | import com.sixthsolution.apex.nlp.ner.ChunkedPart; 4 | import com.sixthsolution.apex.nlp.persian.calendar.tools.JalaliCalendar; 5 | import com.sixthsolution.apex.nlp.persian.event.PersianEventBuilder; 6 | import org.threeten.bp.LocalDateTime; 7 | 8 | /** 9 | * @author Saeed Masoumi (s-masoumi@live.com) 10 | */ 11 | 12 | public interface PersianExtractor { 13 | void extract(PersianEventBuilder builder, JalaliCalendar source, ChunkedPart chunkedPart); 14 | } 15 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/util/NumericUtils.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.util; 2 | 3 | /** 4 | * @author Saeed Masoumi (s-masoumi@live.com) 5 | */ 6 | 7 | public final class NumericUtils { 8 | //TODO support arabic digits 9 | public static boolean isNumeric(String str) { 10 | try { 11 | double d = Double.parseDouble(str); 12 | } catch (NumberFormatException nfe) { 13 | return false; 14 | } 15 | return true; 16 | } 17 | 18 | public static int toInt(String str) { 19 | return Integer.parseInt(str); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /dfalex/src/test/java/com/nobigsoftware/dfalex/BitUtilTest.java: -------------------------------------------------------------------------------- 1 | package com.nobigsoftware.dfalex; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | 6 | public class BitUtilTest extends TestBase 7 | { 8 | @Test 9 | public void testBitIndex() throws Exception 10 | { 11 | Assert.assertEquals(-1, BitUtils.lowBitIndex(0)); 12 | for (int i=0;i<32;++i) 13 | { 14 | Assert.assertEquals(i, BitUtils.lowBitIndex(1< builder = new DfaBuilder<>(); 13 | for (JavaToken tok : JavaToken.values()) 14 | { 15 | builder.addPattern(tok.m_pattern, tok); 16 | } 17 | EnumSet lang = EnumSet.allOf(JavaToken.class); 18 | DfaState start = builder.build(lang, null); 19 | _checkDfa(start, "JavaTest.out.txt", false); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /persian-nlp/build.gradle: -------------------------------------------------------------------------------- 1 | apply from: rootProject.file('gradle/jfrog-uploader.gradle') 2 | apply from: rootProject.file('gradle/jacoco.gradle') 3 | 4 | dependencies { 5 | compile project(':apex') 6 | 7 | testCompile group: 'junit', name: 'junit', version: '4.11' 8 | testCompile project(path: ':apex', configuration: 'testOutput') 9 | testCompile group: 'org.threeten', name: 'threetenbp', version: '1.3.3' 10 | } 11 | dependencies { 12 | compile project(':dfalex') 13 | compileOnly group: 'org.threeten', name: 'threetenbp', version: '1.3.3' 14 | 15 | testCompile group: 'junit', name: 'junit', version: '4.11' 16 | testCompile group: 'org.threeten', name: 'threetenbp', version: '1.3.3' 17 | } 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /persian-nlp/src/main/java/com/sixthsolution/apex/nlp/persian/PersianTokenizer.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.persian; 2 | 3 | import com.sixthsolution.apex.nlp.tokenization.StandardTokenizer; 4 | 5 | /** 6 | * Created by rozhin on 2/15/2017. 7 | */ 8 | public class PersianTokenizer extends StandardTokenizer { 9 | 10 | 11 | @Override 12 | protected String normalize(String sentence) { 13 | return 14 | super.normalize(sentence) 15 | .replaceAll("(چار|پنج|چهار|سه|دو|یک)(\\s)(شنبه)","$1\\_$3") 16 | .replaceAll("(قبل|بعد)(\\s)(از)(\\s)(ظهر)","$1\\_$3\\_$5") 17 | .replaceAll("(روز)(\\s)(ها)","$1\\_$3"); 18 | 19 | 20 | } 21 | } -------------------------------------------------------------------------------- /dfalex/src/main/java/backport/java/util/function/Consumer.java: -------------------------------------------------------------------------------- 1 | package backport.java.util.function; 2 | 3 | /** 4 | * Represents an operation that accepts a single input argument and returns no 5 | * result. Unlike most other functional interfaces, {@code Consumer} is expected 6 | * to operate via side-effects. 7 | *

8 | *

This is a functional interface 9 | * whose functional method is {@link #accept(Object)}. 10 | * 11 | * @param the type of the input to the operation 12 | */ 13 | public interface Consumer { 14 | 15 | /** 16 | * Performs this operation on the given argument. 17 | * 18 | * @param t the input argument 19 | */ 20 | void accept(T t); 21 | } 22 | -------------------------------------------------------------------------------- /english-nlp/src/main/java/com/sixthsolution/apex/nlp/english/filter/LocationDetectionFilter.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.english.filter; 2 | 3 | import com.sixthsolution.apex.nlp.ner.Label; 4 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetectionFilter; 5 | import com.sixthsolution.apex.nlp.tagger.TaggedWords; 6 | 7 | /** 8 | * @author Saeed Masoumi (s-masoumi@live.com) 9 | */ 10 | 11 | public class LocationDetectionFilter extends ChunkDetectionFilter { 12 | @Override 13 | public boolean accept(Label label, TaggedWords taggedWords, int startIndex, int endIndex) { 14 | switch (label) { 15 | case LOCATION: 16 | return true; 17 | } 18 | return false; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /persian-nlp/src/main/java/com/sixthsolution/apex/nlp/persian/filter/LocationDetectionFilter.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.persian.filter; 2 | 3 | import com.sixthsolution.apex.nlp.ner.Label; 4 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetectionFilter; 5 | import com.sixthsolution.apex.nlp.tagger.TaggedWords; 6 | 7 | /** 8 | * @author Saeed Masoumi (s-masoumi@live.com) 9 | */ 10 | 11 | public class LocationDetectionFilter extends ChunkDetectionFilter { 12 | @Override 13 | public boolean accept(Label label, TaggedWords taggedWords, int startIndex, int endIndex) { 14 | switch (label) { 15 | case LOCATION: 16 | return true; 17 | } 18 | return false; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /english-nlp/build.gradle: -------------------------------------------------------------------------------- 1 | apply from: rootProject.file('gradle/jfrog-uploader.gradle') 2 | apply from: rootProject.file('gradle/jacoco.gradle') 3 | 4 | dependencies { 5 | compile project(':apex') 6 | 7 | testCompile group: 'junit', name: 'junit', version: '4.11' 8 | testCompile project(path: ':apex', configuration: 'testOutput') 9 | testCompile group: 'org.threeten', name: 'threetenbp', version: '1.3.3' 10 | 11 | } 12 | dependencies { 13 | compile project(':dfalex') 14 | compileOnly group: 'org.threeten', name: 'threetenbp', version: '1.3.3' 15 | 16 | testCompile group: 'junit', name: 'junit', version: '4.11' 17 | testCompile group: 'org.threeten', name: 'threetenbp', version: '1.3.3' 18 | } 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /sample-android/proguard-rules.pro: -------------------------------------------------------------------------------- 1 | # Add project specific ProGuard rules here. 2 | # By default, the flags in this file are appended to flags specified 3 | # in /Users/Saeed/Library/Android/sdk/tools/proguard/proguard-android.txt 4 | # You can edit the include path and order by changing the proguardFiles 5 | # directive in build.gradle. 6 | # 7 | # For more details, see 8 | # http://developer.android.com/guide/developing/tools/proguard.html 9 | 10 | # Add any project specific keep options here: 11 | 12 | # If your project uses WebView with JS, uncomment the following 13 | # and specify the fully qualified class name to the JavaScript interface 14 | # class: 15 | #-keepclassmembers class fqcn.of.javascript.interface.for.webview { 16 | # public *; 17 | #} 18 | -------------------------------------------------------------------------------- /dfalex/src/main/java/backport/java/util/function/IntConsumer.java: -------------------------------------------------------------------------------- 1 | package backport.java.util.function; 2 | 3 | /** 4 | * Represents an operation that accepts a single {@code int}-valued argument and 5 | * returns no result. This is the primitive type specialization of 6 | * {@link Consumer} for {@code int}. Unlike most other functional interfaces, 7 | * {@code IntConsumer} is expected to operate via side-effects. 8 | *

9 | *

This is a functional interface 10 | * whose functional method is {@link #accept(int)}. 11 | * 12 | * @see Consumer 13 | */ 14 | public interface IntConsumer { 15 | 16 | /** 17 | * Performs this operation on the given argument. 18 | * 19 | * @param value the input argument 20 | */ 21 | void accept(int value); 22 | } -------------------------------------------------------------------------------- /sample-android/src/main/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /english-nlp/src/main/java/com/sixthsolution/apex/nlp/english/EnglishTokenizer.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.english; 2 | 3 | import com.sixthsolution.apex.nlp.tokenization.StandardTokenizer; 4 | 5 | /** 6 | * @author Saeed Masoumi (s-masoumi@live.com) 7 | * @author Rozhin Bayati 8 | */ 9 | 10 | public class EnglishTokenizer extends StandardTokenizer { 11 | @Override 12 | protected String normalize(String sentence) { 13 | return super.normalize(sentence) 14 | .replaceAll("(\\d+)(\\s+)(\\d+)", "$1 , $3") 15 | .replaceAll("(,)"," $1 ") 16 | .replaceAll("(in)(\\s+)(the)(\\s+)(evening|afternoon)", "$1_$3_$5") 17 | .replaceAll("(\\d+)(th)","") 18 | // .replaceAll("(\\s+)(from|starts)(\\s+)", "$1, $2$3") 19 | ; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /sample-android/src/main/res/layout/activity_main.xml: -------------------------------------------------------------------------------- 1 | 2 | 13 | 14 | 18 | 19 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/ner/Label.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.ner; 2 | 3 | /** 4 | * @author Saeed Masoumi (s-masoumi@live.com) 5 | * @author Rozhin Bayati 6 | */ 7 | 8 | public enum Label { 9 | NONE, DATE, TIME, LOCATION, TITLE, 10 | FIXED_TIME, 11 | RELATIVE_TIME, 12 | RANGE_TIME, 13 | /** 14 | * Formal dates are those in which the month, day, and year are represented as integers 15 | * separated by a common separator character. The year is optional and may proceed the month or 16 | * succeed the day of month. If a two-digit year is given, it must succeed the day of month. 17 | */ 18 | FORMAL_DATE, 19 | RELAX_DATE, 20 | RELATIVE_DATE, 21 | EXPLICIT_RELATIVE_DATE, 22 | GLOBAL_DATE, 23 | FOREVER_DATE, 24 | LIMITED_DATE, 25 | DATE_RULES, 26 | RECURRENCE, 27 | 28 | 29 | } 30 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/tokenization/StandardTokenizer.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.tokenization; 2 | 3 | /** 4 | * @author Saeed Masoumi (s-masoumi@live.com) 5 | * @author Rozhin Bayati 6 | */ 7 | 8 | public class StandardTokenizer implements Tokenizer { 9 | 10 | private static final String REGEX = "([\\s]+)"; 11 | 12 | @Override 13 | public String[] tokenize(String sentence) { 14 | return normalize(sentence).split(splitRegexRule()); 15 | } 16 | 17 | protected String normalize(String sentence) { 18 | return sentence 19 | //useful for num/num/num or numPM num.numPM ... 20 | .replaceAll("(\\d+)", " $1 ") 21 | .replaceAll("(-|/)", " $1 ") 22 | .trim(); 23 | } 24 | 25 | protected String splitRegexRule() { 26 | return REGEX; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /persian-nlp/src/test/java/com/sixthsolution/apex/nlp/persian/test/PersianDetectorTest.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.persian.test; 2 | 3 | 4 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetector; 5 | import com.sixthsolution.apex.nlp.persian.PersianTokenizer; 6 | import com.sixthsolution.apex.nlp.persian.PersianVocabulary; 7 | import com.sixthsolution.apex.nlp.tagger.StandardTagger; 8 | import com.sixthsolution.apex.nlp.test.ChunkDetectorAssertion; 9 | 10 | import org.junit.Before; 11 | 12 | /** 13 | * Created by rozhin on 7/30/2017. 14 | */ 15 | 16 | 17 | 18 | public abstract class PersianDetectorTest { 19 | @Before 20 | public void setUp() { 21 | ChunkDetectorAssertion.init(new PersianTokenizer(), 22 | new StandardTagger(PersianVocabulary.build()), provideDetector()); 23 | } 24 | 25 | protected abstract ChunkDetector provideDetector(); 26 | 27 | } 28 | -------------------------------------------------------------------------------- /sample-android/src/main/java/com/sixthsolution/apex/sample/android/MainActivity.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.sample.android; 2 | 3 | import android.os.Bundle; 4 | import android.support.v7.app.AppCompatActivity; 5 | 6 | import com.sixthsolution.apex.Apex; 7 | import com.sixthsolution.apex.model.Event; 8 | 9 | public class MainActivity extends AppCompatActivity { 10 | 11 | @Override 12 | protected void onCreate(Bundle savedInstanceState) { 13 | super.onCreate(savedInstanceState); 14 | setContentView(R.layout.activity_main); 15 | long t = System.currentTimeMillis(); 16 | Event event = Apex.nlp("en", "Wash the Car at Mall on march 19 at 8.45pm 12/12/12"); 17 | System.out.println(event.start().toString()); 18 | System.out.println(event.end().toString()); 19 | System.out.println("Takes " + (System.currentTimeMillis() - t) + " millis"); 20 | } 21 | 22 | } 23 | -------------------------------------------------------------------------------- /english-nlp/src/test/java/com/sixthsolution/apex/nlp/english/test/tokenization/EnglishDetectorTest.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.english.test.tokenization; 2 | 3 | import com.sixthsolution.apex.nlp.english.EnglishTokenizer; 4 | import com.sixthsolution.apex.nlp.english.EnglishVocabulary; 5 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetector; 6 | import com.sixthsolution.apex.nlp.tagger.StandardTagger; 7 | import com.sixthsolution.apex.nlp.test.ChunkDetectorAssertion; 8 | 9 | import org.junit.Before; 10 | 11 | /** 12 | * @author Saeed Masoumi (s-masoumi@live.com) 13 | */ 14 | 15 | public abstract class EnglishDetectorTest { 16 | @Before 17 | public void setUp() { 18 | ChunkDetectorAssertion.init(new EnglishTokenizer(), 19 | new StandardTagger(EnglishVocabulary.build()), provideDetector()); 20 | } 21 | 22 | protected abstract ChunkDetector provideDetector(); 23 | 24 | } 25 | -------------------------------------------------------------------------------- /gradle/jacoco.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: "jacoco" 2 | 3 | jacocoTestReport { 4 | dependsOn = [test] 5 | group = "Jacoco Reporting" 6 | description = "Aggregate Jacoco coverage reports." 7 | additionalSourceDirs = files(sourceSets.main.allSource.srcDirs) 8 | sourceDirectories = files(sourceSets.main.allSource.srcDirs) 9 | classDirectories = files(sourceSets.main.output) 10 | executionData = fileTree(dir: 'build/jacoco', include: '**/*.exec') 11 | reports { 12 | xml.enabled = true 13 | html.enabled = true 14 | csv.enabled = false 15 | html.destination = "${buildDir}/reports/jacoco/test/html" 16 | xml.destination = "${buildDir}/reports/jacoco/test/jacocoTestReport.xml" 17 | } 18 | onlyIf = { 19 | true 20 | } 21 | doFirst { 22 | executionData = files(executionData.findAll { 23 | it.exists() 24 | }) 25 | } 26 | } 27 | 28 | check.dependsOn jacocoTestReport -------------------------------------------------------------------------------- /sample-android/src/androidTest/java/com/sixthsolution/apex/sample/android/ExampleInstrumentedTest.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.sample.android; 2 | 3 | import android.content.Context; 4 | import android.support.test.InstrumentationRegistry; 5 | import android.support.test.runner.AndroidJUnit4; 6 | 7 | import org.junit.Test; 8 | import org.junit.runner.RunWith; 9 | 10 | import static org.junit.Assert.*; 11 | 12 | /** 13 | * Instrumentation test, which will execute on an Android device. 14 | * 15 | * @see Testing documentation 16 | */ 17 | @RunWith(AndroidJUnit4.class) 18 | public class ExampleInstrumentedTest { 19 | @Test 20 | public void useAppContext() throws Exception { 21 | // Context of the app under test. 22 | Context appContext = InstrumentationRegistry.getTargetContext(); 23 | 24 | assertEquals("com.sixthsolution.apex.sample.android", appContext.getPackageName()); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /dfalex/src/main/java/backport/java/util/function/BiConsumer.java: -------------------------------------------------------------------------------- 1 | package backport.java.util.function; 2 | 3 | 4 | /** 5 | * Represents an operation that accepts two input arguments and returns no 6 | * result. This is the two-arity specialization of {@link Consumer}. 7 | * Unlike most other functional interfaces, {@code BiConsumer} is expected 8 | * to operate via side-effects. 9 | *

10 | *

This is a functional interface 11 | * whose functional method is {@link #accept(Object, Object)}. 12 | * 13 | * @param the type of the first argument to the operation 14 | * @param the type of the second argument to the operation 15 | * @see Consumer 16 | */ 17 | public interface BiConsumer { 18 | 19 | /** 20 | * Performs this operation on the given arguments. 21 | * 22 | * @param t the first input argument 23 | * @param u the second input argument 24 | */ 25 | void accept(T t, U u); 26 | 27 | } 28 | -------------------------------------------------------------------------------- /english-nlp/src/main/java/com/sixthsolution/apex/nlp/english/filter/DateDetectionFilter.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.english.filter; 2 | 3 | import com.sixthsolution.apex.nlp.ner.Label; 4 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetectionFilter; 5 | import com.sixthsolution.apex.nlp.tagger.TaggedWords; 6 | 7 | /** 8 | * @author Saeed Masoumi (s-masoumi@live.com) 9 | * @author Rozhin Bayati 10 | */ 11 | 12 | public class DateDetectionFilter extends ChunkDetectionFilter { 13 | @Override 14 | public boolean accept(Label label, TaggedWords taggedWords, int startIndex, int endIndex) { 15 | switch (label) { 16 | case FORMAL_DATE: 17 | case RELAX_DATE: 18 | case FOREVER_DATE: 19 | case GLOBAL_DATE: 20 | case RELATIVE_DATE: 21 | case LIMITED_DATE: 22 | case EXPLICIT_RELATIVE_DATE: 23 | return true; 24 | } 25 | return false; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /persian-nlp/src/main/java/com/sixthsolution/apex/nlp/persian/filter/DateDetectionFilter.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.persian.filter; 2 | 3 | import com.sixthsolution.apex.nlp.ner.Label; 4 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetectionFilter; 5 | import com.sixthsolution.apex.nlp.tagger.TaggedWords; 6 | 7 | /** 8 | * @author Saeed Masoumi (s-masoumi@live.com) 9 | * @author Rozhin Bayati 10 | */ 11 | 12 | public class DateDetectionFilter extends ChunkDetectionFilter { 13 | @Override 14 | public boolean accept(Label label, TaggedWords taggedWords, int startIndex, int endIndex) { 15 | switch (label) { 16 | case FORMAL_DATE: 17 | case RELAX_DATE: 18 | case FOREVER_DATE: 19 | case GLOBAL_DATE: 20 | case RELATIVE_DATE: 21 | case LIMITED_DATE: 22 | case EXPLICIT_RELATIVE_DATE: 23 | return true; 24 | } 25 | return false; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /dfalex/src/main/java/backport/java/util/function/ObjIntConsumer.java: -------------------------------------------------------------------------------- 1 | package backport.java.util.function; 2 | 3 | 4 | 5 | /** 6 | * Represents an operation that accepts an object-valued and a 7 | * {@code int}-valued argument, and returns no result. This is the 8 | * {@code (reference, int)} specialization of {@link BiConsumer}. 9 | * Unlike most other functional interfaces, {@code ObjIntConsumer} is 10 | * expected to operate via side-effects. 11 | * 12 | *

This is a functional interface 13 | * whose functional method is {@link #accept(Object, int)}. 14 | * 15 | * @param the type of the object argument to the operation 16 | * 17 | * @since 1.8 18 | */ 19 | public interface ObjIntConsumer { 20 | 21 | /** 22 | * Performs this operation on the given arguments. 23 | * 24 | * @param t the first input argument 25 | * @param value the second input argument 26 | */ 27 | void accept(T t, int value); 28 | } 29 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/model/Frequency.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 6thSolution 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.sixthsolution.apex.model; 18 | 19 | /** 20 | * The frequency that the {@link Event} should be repeated (such as "DAILY"). 21 | * 22 | * @author Saeed Masoumi (saeed@6thsolution.com) 23 | */ 24 | public enum Frequency { 25 | DAILY, WEEKLY, MONTHLY, YEARLY 26 | } 27 | -------------------------------------------------------------------------------- /english-nlp/src/test/java/com/sixthsolution/apex/nlp/english/test/tokenization/EnglishTaggerTest.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.english.test.tokenization; 2 | 3 | import com.sixthsolution.apex.nlp.english.EnglishTokenizer; 4 | import com.sixthsolution.apex.nlp.english.EnglishVocabulary; 5 | import com.sixthsolution.apex.nlp.tagger.StandardTagger; 6 | 7 | import org.junit.Before; 8 | import org.junit.Test; 9 | 10 | import static com.sixthsolution.apex.nlp.test.TaggerAssertion.assertSentence; 11 | import static com.sixthsolution.apex.nlp.test.TaggerAssertion.init; 12 | 13 | /** 14 | * @author Saeed Masoumi (s-masoumi@live.com) 15 | */ 16 | 17 | public class EnglishTaggerTest { 18 | 19 | @Before 20 | public void setUp() throws Exception { 21 | init(new EnglishTokenizer(), new StandardTagger(EnglishVocabulary.build())); 22 | } 23 | 24 | @Test 25 | public void test() { 26 | assertSentence("party on monday 10").hasTags("N|PP|D_WD|NM"); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /.script/deploy_artifacts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Deploy a jar, source jar, and javadoc jar to bintray repo. 4 | # 5 | # Adapted from https://coderwall.com/p/9b_lfq and 6 | # http://benlimmer.com/2013/12/26/automatically-publish-javadoc-to-gh-pages-with-travis-ci/ 7 | 8 | SLUG="6thsolution/ApexNLP" 9 | BRANCH="master" 10 | JDK="oraclejdk8" 11 | 12 | set -e 13 | 14 | if [ "$TRAVIS_REPO_SLUG" != "$SLUG" ]; then 15 | echo "Skipping deployment: wrong repository. Expected '$SLUG' but was '$TRAVIS_REPO_SLUG'." 16 | elif [ "$TRAVIS_JDK_VERSION" != "$JDK" ]; then 17 | echo "Skipping deployment: wrong JDK. Expected '$JDK' but was '$TRAVIS_JDK_VERSION'." 18 | elif [ "$TRAVIS_PULL_REQUEST" != "false" ]; then 19 | echo "Skipping deployment: was pull request." 20 | elif [ "$TRAVIS_BRANCH" != "$BRANCH" ]; then 21 | echo "Skipping deployment: wrong branch. Expected '$BRANCH' but was '$TRAVIS_BRANCH'." 22 | else 23 | echo "Deploying artifacts..." 24 | ./gradlew publishFromCI --info 25 | echo "Artifacts deployed!" 26 | fi -------------------------------------------------------------------------------- /dfalex/src/main/java/backport/java/util/function/Function.java: -------------------------------------------------------------------------------- 1 | package backport.java.util.function; 2 | 3 | /** 4 | * Represents a function that accepts one argument and produces a result. 5 | *

6 | *

This is a functional interface 7 | * whose functional method is {@link #apply(Object)}. 8 | * 9 | * @param the type of the input to the function 10 | * @param the type of the result of the function 11 | */ 12 | public interface Function { 13 | 14 | /** 15 | * Returns a function that always returns its input argument. 16 | * 17 | * @param the type of the input and output objects to the function 18 | * @return a function that always returns its input argument 19 | */ 20 | static Function identity() { 21 | return t -> t; 22 | } 23 | 24 | /** 25 | * Applies this function to the given argument. 26 | * 27 | * @param t the function argument 28 | * @return the function result 29 | */ 30 | R apply(T t); 31 | } 32 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/model/WeekDay.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 6thSolution 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.sixthsolution.apex.model; 18 | 19 | /** 20 | * @author Saeed Masoumi (saeed@6thsolution.com) 21 | */ 22 | public enum WeekDay { 23 | MON(1), TUE(2), WED(3), THU(4), FRI(5), SAT(6), SUN(7); 24 | 25 | private final int dayofWeek; 26 | 27 | WeekDay(int dayOfWeek) { 28 | this.dayofWeek = dayOfWeek; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/dict/TagValue.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.dict; 2 | 3 | import com.sixthsolution.apex.nlp.ner.Entity; 4 | import com.sixthsolution.apex.nlp.util.Triple; 5 | 6 | /** 7 | * @author Saeed Masoumi (s-masoumi@live.com) 8 | */ 9 | 10 | public class TagValue extends Triple { 11 | 12 | public Tag tag; 13 | public Object value; 14 | public Entity entity; 15 | 16 | private volatile String toStringResult; 17 | 18 | public TagValue(Tag tag, Object value, Entity entity) { 19 | super(tag, value, entity); 20 | this.tag = tag; 21 | this.value = value; 22 | this.entity = entity; 23 | } 24 | 25 | @Override 26 | public String toString() { 27 | if (toStringResult == null) { 28 | toStringResult = "Triple{" + 29 | "tag=" + tag.name() + 30 | ", value=" + value + 31 | ", entity=" + entity + 32 | '}'; 33 | } 34 | 35 | return toStringResult; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/tagger/TaggedWord.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.tagger; 2 | 3 | import com.sixthsolution.apex.nlp.dict.Tag; 4 | import com.sixthsolution.apex.nlp.dict.Tags; 5 | 6 | import java.util.Collection; 7 | import java.util.Set; 8 | 9 | /** 10 | * @author Saeed Masoumi (s-masoumi@live.com) 11 | */ 12 | 13 | public class TaggedWord { 14 | 15 | private Tags tags; 16 | private final String word; 17 | 18 | public TaggedWord(String word) { 19 | this.word = word; 20 | this.tags = new Tags(); 21 | } 22 | 23 | public TaggedWord(String word, Tags tags) { 24 | this.word = word; 25 | this.tags = tags; 26 | } 27 | 28 | public String getWord() { 29 | return word; 30 | } 31 | 32 | public Tags getTags() { 33 | return tags; 34 | } 35 | 36 | public boolean hasTag(Tag... tags) { 37 | return getTags().containsTag(tags); 38 | } 39 | 40 | @Override 41 | public String toString() { 42 | return "TaggedWord{" + 43 | "word='" + word + '\'' + 44 | ", tags=" + tags + 45 | '}'; 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/ner/regex/RegExChunker.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.ner.regex; 2 | 3 | import com.sixthsolution.apex.nlp.ner.ChunkedPart; 4 | import com.sixthsolution.apex.nlp.ner.Chunker; 5 | import com.sixthsolution.apex.nlp.tagger.TaggedWords; 6 | 7 | import java.util.ArrayList; 8 | import java.util.List; 9 | 10 | /** 11 | * @author Saeed Masoumi (s-masoumi@live.com) 12 | */ 13 | 14 | public class RegExChunker implements Chunker { 15 | 16 | private final List chunkDetectors; 17 | 18 | public RegExChunker(List chunkDetectors) { 19 | this.chunkDetectors = chunkDetectors; 20 | } 21 | 22 | @Override 23 | public List chunk(TaggedWords taggedWords) { 24 | TaggedWords clonedTaggedWords = (TaggedWords) taggedWords.clone(); 25 | List chunkedParts = new ArrayList<>(); 26 | for (ChunkDetector detector : chunkDetectors) { 27 | ChunkedPart result = detector.detect(clonedTaggedWords); 28 | if (result != null) { 29 | chunkedParts.add(result); 30 | } 31 | } 32 | return chunkedParts; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /apex/src/test/java/com/sixthsolution/apex/nlp/test/TaggerAssertion.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.test; 2 | 3 | import com.sixthsolution.apex.nlp.tagger.TaggedWords; 4 | import com.sixthsolution.apex.nlp.tagger.Tagger; 5 | import com.sixthsolution.apex.nlp.tokenization.Tokenizer; 6 | 7 | /** 8 | * @author Saeed Masoumi (s-masoumi@live.com) 9 | */ 10 | 11 | public final class TaggerAssertion { 12 | 13 | private static TaggerAssertion instance = null; 14 | private Tokenizer tokenizer; 15 | private Tagger tagger; 16 | 17 | private TaggerAssertion(Tokenizer tokenizer, Tagger tagger) { 18 | this.tokenizer = tokenizer; 19 | this.tagger = tagger; 20 | } 21 | 22 | private static TaggerAssertion getInstance() { 23 | return instance; 24 | } 25 | 26 | public static void init(Tokenizer tokenizer, Tagger tagger) { 27 | instance = new TaggerAssertion(tokenizer, tagger); 28 | } 29 | 30 | public static TagAssertion assertSentence(String word) { 31 | return getInstance().makeTagAssertion(word); 32 | } 33 | 34 | private TagAssertion makeTagAssertion(String word) { 35 | TaggedWords taggedWords = tagger.tag(tokenizer.tokenize(word)); 36 | return new TagAssertion(taggedWords); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/dict/DictionaryBuilder.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.dict; 2 | 3 | import com.sixthsolution.apex.nlp.ner.Entity; 4 | 5 | /** 6 | * @author Saeed Masoumi (s-masoumi@live.com) 7 | */ 8 | public class DictionaryBuilder { 9 | 10 | private Dictionary dictionary = new Dictionary(); 11 | 12 | public Dictionary build() { 13 | return dictionary; 14 | } 15 | 16 | public TagEntryBuilder tag(Tag tag, Entity entity) { 17 | return new TagEntryBuilder(dictionary, tag, entity); 18 | } 19 | 20 | public static class TagEntryBuilder { 21 | private final Dictionary dictionary; 22 | private final Tag tag; 23 | private final Entity entity; 24 | 25 | TagEntryBuilder(Dictionary dictionary, Tag tag, Entity entity) { 26 | this.dictionary = dictionary; 27 | this.tag = tag; 28 | this.entity = entity; 29 | } 30 | 31 | public TagEntryBuilder e(Object value, String... words) { 32 | dictionary.addAll(words, tag, value, entity); 33 | return this; 34 | } 35 | 36 | public TagEntryBuilder e(String... words) { 37 | dictionary.addAll(words, tag, "", entity); 38 | return this; 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /english-nlp/src/test/java/com/sixthsolution/apex/nlp/english/test/tokenization/EnglishTokenizationTest.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.english.test.tokenization; 2 | 3 | import com.sixthsolution.apex.nlp.english.EnglishTokenizer; 4 | 5 | import org.junit.Before; 6 | import org.junit.Test; 7 | 8 | import static com.sixthsolution.apex.nlp.test.TokenizerAssertion.assertTokens; 9 | import static com.sixthsolution.apex.nlp.test.TokenizerAssertion.init; 10 | 11 | /** 12 | * @author Saeed Masoumi (s-masoumi@live.com) 13 | * @author Rozhin Bayati 14 | */ 15 | 16 | public class EnglishTokenizationTest { 17 | 18 | @Before 19 | public void setUp() { 20 | init(new EnglishTokenizer()); 21 | } 22 | 23 | @Test 24 | public void test_sentences() { 25 | assertTokens( 26 | "Pizza party on the 2nd Friday of every month at 1pm\n", 27 | "Pizza", "party", "on", "the", "2", "nd", "Friday", "of", "every", "month", "at", 28 | "1", "pm"); 29 | 30 | assertTokens( 31 | "Mission Trip at Jakarta on Nov 13-17 calendar Church\n", 32 | "Mission", "Trip", "at", "Jakarta", "on", "Nov", "13", "-", "17", "calendar", 33 | "Church" 34 | ); 35 | assertTokens("Go GYM 2.05.2013 19:00", 36 | "Go", "GYM", "2", ".", "05", ".", "2013", ",", "19", ":", "00"); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/tagger/TaggedWords.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.tagger; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Iterator; 5 | import java.util.List; 6 | 7 | /** 8 | * @author Saeed Masoumi (s-masoumi@live.com) 9 | */ 10 | 11 | public class TaggedWords extends ArrayList { 12 | 13 | public TaggedWords (){ 14 | } 15 | 16 | public TaggedWords (List in){ 17 | this.addAll(in); 18 | } 19 | 20 | public void removeRange(int fromIndex, int toIndex) { 21 | super.removeRange(fromIndex, toIndex); 22 | } 23 | 24 | @Override 25 | public String toString() { 26 | StringBuilder sb = new StringBuilder(); 27 | Iterator itr = iterator(); 28 | while (itr.hasNext()) { 29 | TaggedWord next = itr.next(); 30 | sb.append(next.getWord()).append(" [").append(next.getTags().toString()).append("]"); 31 | if (itr.hasNext()) { 32 | sb.append(", "); 33 | } 34 | } 35 | return sb.toString(); 36 | } 37 | 38 | public List newSubList(int startIndex, int endIndex) { 39 | List taggedWords = new ArrayList<>(); 40 | for (int i = startIndex; i < endIndex; i++) { 41 | taggedWords.add(get(i)); 42 | } 43 | return taggedWords; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/dfalex/ReplacementSelector.java: -------------------------------------------------------------------------------- 1 | package com.nobigsoftware.dfalex; 2 | 3 | /** 4 | * For search and replace operations, a functional interface that is called to select replacement text for 5 | * matches, based on the MATCHRESULT. 6 | *

7 | * This is called by a {@link StringSearcher#findAndReplace(String, ReplacementSelector)} to replace instances 8 | * of patterns found in a string. 9 | */ 10 | public interface ReplacementSelector 11 | { 12 | /** 13 | * This will be called for each instance of each pattern found 14 | * 15 | * @param dest The replacement text for the matching substring should be written here 16 | * @param mr The MATCHRESULT produced by the match 17 | * @param src The string being searched, or a part of the stream being searched that contains the current match 18 | * @param startPos the start index of the current match in src 19 | * @param endPos the end index of the current match in src 20 | * @return if this is >0, then it is the position in the source string at which to continue processing after 21 | * replacement. If you set this <= startPos, a runtime exception will be thrown to 22 | * abort the infinite loop that would result. Almost always return 0. 23 | */ 24 | int apply(SafeAppendable dest, MATCHRESULT mr, CharSequence src, int startPos, int endPos); 25 | } -------------------------------------------------------------------------------- /.script/deploy_javadocs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | REPO="git@github.com:6thsolution/ApexNLP.git" 6 | 7 | DIR=temp-clone 8 | 9 | # Delete any existing temporary website clone 10 | rm -rf $DIR 11 | 12 | # Clone the current repo into temp folder 13 | git clone $REPO $DIR 14 | 15 | # Move working directory into temp folder 16 | cd $DIR 17 | # Checkout and track the gh-pages branch 18 | git checkout -t origin/gh-pages 19 | 20 | # Artifactory location 21 | server=https://oss.jfrog.org 22 | repo=oss-snapshot-local 23 | 24 | # jfrog artifacts location 25 | for name in apex dfalex english-nlp persian-nlp 26 | do 27 | artifact=com/sixthsolution/easymvp/$name 28 | path=$server/$repo/$artifact 29 | version=`curl -s $path/maven-metadata.xml | grep latest | sed "s/.*\([^<]*\)<\/latest>.*/\1/"` 30 | build=`curl -s $path/$version/maven-metadata.xml | grep '' | head -1 | sed "s/.*\([^<]*\)<\/value>.*/\1/"` 31 | jar=$name-$build-javadoc.jar 32 | url=$path/$version/$jar 33 | 34 | # Download 35 | echo $url 36 | curl -L $url > "$name".zip 37 | javadoc="${name:8}-javadoc" 38 | mkdir -p "$javadoc" 39 | unzip "$name".zip -d "$javadoc" 40 | rm "$name".zip 41 | done 42 | 43 | # Stage all files in git and create a commit 44 | git add . 45 | git add -u 46 | git commit -m "java docs updated at $(date)" 47 | 48 | # Push the new files up to GitHub 49 | git push origin gh-pages 50 | 51 | cd .. 52 | rm -rf $DIR -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/event/StandardLocationExtractor.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.event; 2 | 3 | import com.sixthsolution.apex.nlp.dict.Tag; 4 | import com.sixthsolution.apex.nlp.ner.ChunkedPart; 5 | import com.sixthsolution.apex.nlp.tagger.TaggedWord; 6 | 7 | import org.threeten.bp.LocalDateTime; 8 | 9 | import java.util.Iterator; 10 | 11 | /** 12 | * @author Saeed Masoumi (s-masoumi@live.com) 13 | * 14 | */ 15 | 16 | public class StandardLocationExtractor implements Extractor { 17 | 18 | @Override 19 | public void extract(EventBuilder builder, LocalDateTime source, ChunkedPart chunkedPart) { 20 | switch (chunkedPart.getLabel()) { 21 | case LOCATION: 22 | String location = getLocation(chunkedPart); 23 | builder.setLocation(location); 24 | break; 25 | } 26 | } 27 | 28 | private String getLocation(ChunkedPart chunkedPart) { 29 | StringBuilder sb = new StringBuilder(); 30 | Iterator itr = chunkedPart.getTaggedWords().iterator(); 31 | while (itr.hasNext()) { 32 | TaggedWord next = itr.next(); 33 | if (!next.hasTag(Tag.LOCATION_PREFIX)) { 34 | sb.append(next.getWord()); 35 | if (itr.hasNext()) { 36 | sb.append(" "); 37 | } 38 | } 39 | 40 | } 41 | return sb.toString(); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/ner/ChunkedPart.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.ner; 2 | 3 | import com.sixthsolution.apex.nlp.tagger.TaggedWord; 4 | 5 | import java.util.List; 6 | 7 | /** 8 | * @author Saeed Masoumi (s-masoumi@live.com) 9 | */ 10 | 11 | public class ChunkedPart { 12 | 13 | private final Entity entity; 14 | private final Label label; 15 | private final List taggedWords; 16 | 17 | public ChunkedPart(Entity entity,Label label, 18 | List taggedWords) { 19 | this.entity =entity; 20 | this.label = label; 21 | this.taggedWords = taggedWords; 22 | } 23 | 24 | public Label getLabel() { 25 | return label; 26 | } 27 | 28 | public Entity getEntity() { 29 | return entity; 30 | } 31 | 32 | public List getTaggedWords(int start,int end) { 33 | return taggedWords.subList(start,end); 34 | } 35 | public List getTaggedWords() { 36 | return taggedWords; 37 | } 38 | public String toStringTaggedWords() { 39 | StringBuilder sb = new StringBuilder(); 40 | for (TaggedWord taggedWord : taggedWords) { 41 | sb.append(taggedWord.getWord()).append(" "); 42 | } 43 | return sb.toString().trim(); 44 | } 45 | 46 | @Override 47 | public String toString() { 48 | return label.name() + " -> " + toStringTaggedWords() ; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/dfalex/DfaTransitionConsumer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.dfalex; 17 | 18 | /** 19 | * A functional interface that can accept transitions 20 | *

21 | * This is used with {@link DfaState#enumerateTransitions(DfaTransitionConsumer)} 22 | */ 23 | public interface DfaTransitionConsumer 24 | { 25 | /** 26 | * Accept a DFA transition 27 | *

28 | * This call indicates that the current state has a transition to target on 29 | * every character with code point >=firstChar and <=lastChar 30 | * @param firstChar First character that triggers this transition 31 | * @param lastChar Last character that triggers this transition 32 | * @param target Target state of this transition 33 | */ 34 | void acceptTransition(char firstChar, char lastChar, DfaState target); 35 | } 36 | -------------------------------------------------------------------------------- /sample-android/build.gradle: -------------------------------------------------------------------------------- 1 | buildscript { 2 | repositories { 3 | jcenter() 4 | } 5 | dependencies { 6 | classpath 'com.android.tools.build:gradle:2.2.3' 7 | 8 | // NOTE: Do not place your application dependencies here; they belong 9 | // in the individual module build.gradle files 10 | } 11 | } 12 | apply plugin: 'com.android.application' 13 | 14 | android { 15 | compileSdkVersion 25 16 | buildToolsVersion "25.0.0" 17 | 18 | defaultConfig { 19 | applicationId "com.sixthsolution.apex.sample.android" 20 | minSdkVersion 15 21 | targetSdkVersion 25 22 | versionCode 1 23 | versionName "1.0" 24 | 25 | testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner" 26 | 27 | } 28 | buildTypes { 29 | release { 30 | minifyEnabled false 31 | proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro' 32 | } 33 | } 34 | } 35 | 36 | dependencies { 37 | compile project(':apex') 38 | compile project(':dfalex') 39 | compile project(':english-nlp') 40 | compile fileTree(dir: 'libs', include: ['*.jar']) 41 | compile group: 'org.threeten', name: 'threetenbp', version: '1.3.2' 42 | 43 | androidTestCompile('com.android.support.test.espresso:espresso-core:2.2.2', { 44 | exclude group: 'com.android.support', module: 'support-annotations' 45 | }) 46 | compile 'com.android.support:appcompat-v7:25.0.1' 47 | testCompile 'junit:junit:4.12' 48 | } 49 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/dfalex/RawDfa.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.dfalex; 17 | 18 | import java.util.List; 19 | 20 | /** 21 | * A DFA in uncomrpessed form 22 | */ 23 | class RawDfa 24 | { 25 | private final List m_dfaStates; 26 | private final List m_acceptSets; 27 | private final int[] m_startStates; 28 | 29 | /** 30 | * Create a new RawDfa. 31 | */ 32 | public RawDfa(List dfaStates, 33 | List acceptSets, 34 | int[] startStates) 35 | { 36 | m_dfaStates = dfaStates; 37 | m_acceptSets = acceptSets; 38 | m_startStates = startStates; 39 | } 40 | 41 | public List getStates() 42 | { 43 | return m_dfaStates; 44 | } 45 | 46 | public List getAcceptSets() 47 | { 48 | return m_acceptSets; 49 | } 50 | 51 | public int[] getStartStates() 52 | { 53 | return m_startStates; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/tagger/StandardTagger.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.tagger; 2 | 3 | import com.sixthsolution.apex.nlp.dict.Dictionary; 4 | import com.sixthsolution.apex.nlp.dict.Tag; 5 | import com.sixthsolution.apex.nlp.dict.TagValue; 6 | import com.sixthsolution.apex.nlp.dict.Tags; 7 | import com.sixthsolution.apex.nlp.ner.Entity; 8 | 9 | import java.util.Arrays; 10 | import java.util.List; 11 | 12 | import static com.sixthsolution.apex.nlp.util.NumericUtils.isNumeric; 13 | import static com.sixthsolution.apex.nlp.util.NumericUtils.toInt; 14 | 15 | /** 16 | * @author Saeed Masoumi (s-masoumi@live.com) 17 | */ 18 | 19 | public class StandardTagger implements Tagger { 20 | 21 | protected final Dictionary dictionary; 22 | 23 | public StandardTagger(Dictionary dictionary) { 24 | this.dictionary = dictionary; 25 | } 26 | 27 | @Override 28 | public TaggedWords tag(String[] tokenizedSentence) { 29 | TaggedWords taggedWords = new TaggedWords(); 30 | List tokens = Arrays.asList(tokenizedSentence); 31 | for (String token : tokens) { 32 | Tags tags = null; 33 | if (isNumeric(token)) { 34 | tags = new Tags(); 35 | tags.add(new TagValue(Tag.NUMBER, toInt(token), Entity.NONE)); 36 | } else { 37 | tags = dictionary.getRelatedTags(token, true); 38 | } 39 | taggedWords.add(new TaggedWord(token, tags)); 40 | } 41 | return taggedWords; 42 | } 43 | 44 | 45 | } 46 | -------------------------------------------------------------------------------- /english-nlp/src/main/java/com/sixthsolution/apex/nlp/english/EnglishParser.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.english; 2 | 3 | import com.sixthsolution.apex.nlp.event.EventDetector; 4 | import com.sixthsolution.apex.nlp.event.StandardEventDetector; 5 | import com.sixthsolution.apex.nlp.ner.Chunker; 6 | import com.sixthsolution.apex.nlp.ner.regex.RegExChunker; 7 | import com.sixthsolution.apex.nlp.parser.StandardParserBase; 8 | import com.sixthsolution.apex.nlp.tagger.StandardTagger; 9 | import com.sixthsolution.apex.nlp.tagger.Tagger; 10 | import com.sixthsolution.apex.nlp.tokenization.Tokenizer; 11 | 12 | import java.util.Arrays; 13 | 14 | /** 15 | * @author Saeed Masoumi (s-masoumi@live.com) 16 | */ 17 | 18 | public class EnglishParser extends StandardParserBase { 19 | 20 | @Override 21 | public void initialize() { 22 | super.initialize(); 23 | //TODO need some training sentences 24 | } 25 | 26 | @Override 27 | protected Tagger provideTagger() { 28 | return new StandardTagger(EnglishVocabulary.build()); 29 | } 30 | 31 | @Override 32 | protected Tokenizer provideTokenizer() { 33 | return new EnglishTokenizer(); 34 | } 35 | 36 | @Override 37 | protected Chunker provideChunker() { 38 | return new RegExChunker( 39 | Arrays.asList(new TimeDetector(), new LocationDetector(), new DateDetector())); 40 | } 41 | 42 | @Override 43 | protected EventDetector provideEventDetector() { 44 | return new StandardEventDetector(new StandardExtractor()); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/dict/Dictionary.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.dict; 2 | 3 | import com.sixthsolution.apex.nlp.ner.Entity; 4 | 5 | import java.util.HashMap; 6 | 7 | /** 8 | * @author Saeed Masoumi (s-masoumi@live.com) 9 | */ 10 | public class Dictionary extends HashMap { 11 | private static final Tags NONE_TAG; 12 | 13 | static { 14 | NONE_TAG = new Tags(); 15 | NONE_TAG.add(new TagValue(Tag.NONE, "", Entity.NONE)); 16 | } 17 | 18 | public void addAll(String[] words, Tag tag, Object value, Entity entity) { 19 | TagValue tagValue = new TagValue(tag, value, entity); 20 | for (String word : words) { 21 | update(word, tagValue); 22 | } 23 | } 24 | 25 | public void update(String word, TagValue tagValue) { 26 | Tags posting = getOrEmpty(word); 27 | posting.add(tagValue); 28 | put(word, posting); 29 | } 30 | 31 | public Tags getOrEmpty(String word) { 32 | if (!containsKey(word)) { 33 | return new Tags(); 34 | } 35 | return get(word); 36 | } 37 | 38 | public Tags getRelatedTags(String word, boolean caseInsensitive) { 39 | if (caseInsensitive) { 40 | return getTags(word.toLowerCase()); 41 | } 42 | return getTags(word); 43 | } 44 | 45 | private Tags getTags(String word) { 46 | Tags tags = getOrEmpty(word); 47 | if (!tags.isEmpty()) { 48 | return tags; 49 | } 50 | return NONE_TAG; 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /dfalex/src/test/java/com/nobigsoftware/dfalex/StringMatcherTest.java: -------------------------------------------------------------------------------- 1 | package com.nobigsoftware.dfalex; 2 | 3 | 4 | import org.junit.Assert; 5 | import org.junit.Test; 6 | 7 | public class StringMatcherTest extends TestBase 8 | { 9 | @Test 10 | public void testStringMatcher() 11 | { 12 | DfaState dfa; 13 | { 14 | DfaBuilder builder = new DfaBuilder<>(); 15 | builder.addPattern(Pattern.regex("a[ab]*b"), 1); 16 | builder.addPattern(Pattern.regex("a[ab]*c"), 2); 17 | dfa = builder.build(null); 18 | } 19 | StringMatcher matcher = new StringMatcher("bbbbbaaaaaaaaaaaaaaaaaaaaaaaabbbbcaaaaaaabbbaaaaaaa"); 20 | Integer result = matcher.findNext(dfa); 21 | Assert.assertEquals((Integer)2, result); 22 | Assert.assertEquals("aaaaaaaaaaaaaaaaaaaaaaaabbbbc", matcher.getLastMatch()); 23 | Assert.assertEquals(5, matcher.getLastMatchStart()); 24 | Assert.assertEquals(34, matcher.getLastMatchEnd()); 25 | result = matcher.findNext(dfa); 26 | Assert.assertEquals((Integer)1, result); 27 | Assert.assertEquals("aaaaaaabbb", matcher.getLastMatch()); 28 | result = matcher.findNext(dfa); 29 | Assert.assertEquals(null, result); 30 | 31 | matcher.setPositions(15, 20, 33); 32 | Assert.assertEquals("aaaaa", matcher.getLastMatch()); 33 | result = matcher.findNext(dfa); 34 | Assert.assertEquals("aaaaaaaaabbbb", matcher.getLastMatch()); 35 | result = matcher.findNext(dfa); 36 | Assert.assertEquals(null, result); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/Apex.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex; 2 | 3 | import com.sixthsolution.apex.model.Event; 4 | import com.sixthsolution.apex.nlp.parser.Parser; 5 | 6 | import org.threeten.bp.LocalDateTime; 7 | 8 | import java.util.HashMap; 9 | import java.util.Map; 10 | 11 | /** 12 | * @author Saeed Masoumi (s-masoumi@live.com) 13 | * @author Rozhin Bayati 14 | */ 15 | 16 | public class Apex { 17 | 18 | private static Apex inst = null; 19 | private Map parsers = new HashMap<>(); 20 | 21 | private Apex(ApexConfig config) { 22 | this.parsers = config.parsers; 23 | } 24 | 25 | public static void init(ApexConfig config) { 26 | for (Parser parser : config.parsers.values()) { 27 | parser.initialize(); 28 | } 29 | inst = new Apex(config); 30 | } 31 | 32 | public static Event nlp(String name, String sentence) { 33 | return inst.parsers.get(name).parse(LocalDateTime.now(), sentence); 34 | } 35 | 36 | public static class ApexBuilder { 37 | 38 | private Map parsers = new HashMap<>(); 39 | 40 | public ApexBuilder addParser(String name, Parser parser) { 41 | parsers.put(name, parser); 42 | return this; 43 | } 44 | 45 | public ApexConfig build() { 46 | return new ApexConfig(parsers); 47 | } 48 | } 49 | 50 | private static class ApexConfig { 51 | Map parsers = new HashMap<>(); 52 | 53 | ApexConfig(Map parsers) { 54 | this.parsers = parsers; 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/dfalex/DfaStateImpl.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.dfalex; 17 | 18 | /** 19 | * Implementation of a Dfa State. 20 | *

21 | * This can either be a "placeholder" state that delegates to another DFA state, or 22 | * a DFA state in final form. As the last step in DFA construction, 23 | */ 24 | abstract class DfaStateImpl extends DfaState 25 | { 26 | /** 27 | * Replace any internal placeholder references with references to 28 | * their delegates. 29 | *

30 | * Every reference to a state X is replaces with x.resolvePlaceholder(); 31 | */ 32 | abstract void fixPlaceholderReferences(); 33 | 34 | /** 35 | * If this is a placeholder that delegates to another state, 36 | * return that other state. Otherwise return this. 37 | *

38 | * This method will follow a chain of placeholders to the end 39 | * 40 | * @return the final delegate of this state 41 | */ 42 | abstract DfaStateImpl resolvePlaceholder(); 43 | } 44 | -------------------------------------------------------------------------------- /dfalex/src/test/java/com/nobigsoftware/dfalex/By3Test.java: -------------------------------------------------------------------------------- 1 | package com.nobigsoftware.dfalex; 2 | 3 | import java.util.Collections; 4 | 5 | import org.junit.Assert; 6 | import org.junit.Test; 7 | 8 | import com.nobigsoftware.dfalex.CharRange; 9 | import com.nobigsoftware.dfalex.DfaBuilder; 10 | import com.nobigsoftware.dfalex.DfaState; 11 | import com.nobigsoftware.dfalex.Pattern; 12 | 13 | public class By3Test extends TestBase 14 | { 15 | @Test 16 | public void test() throws Exception 17 | { 18 | //make pattern for whole numbers divisible by 3 19 | 20 | //digits mod 3 21 | Matchable d0=CharRange.anyOf("0369"); 22 | Pattern d1=Pattern.match(CharRange.anyOf("147")).thenMaybeRepeat(d0); 23 | Pattern d2=Pattern.match(CharRange.anyOf("258")).thenMaybeRepeat(d0); 24 | 25 | Pattern Plus2 = Pattern.maybeRepeat(d1.then(d2)).then(Pattern.anyOf( 26 | d1.then(d1), 27 | d2 28 | )); 29 | Pattern Minus2 = Pattern.maybeRepeat(d2.then(d1)).then(Pattern.anyOf( 30 | d2.then(d2), 31 | d1 32 | )); 33 | 34 | Pattern By3 = Pattern.maybeRepeat(Pattern.anyOf( 35 | d0, 36 | d1.then(d2), 37 | Plus2.then(Minus2) 38 | )); 39 | DfaBuilder builder = new DfaBuilder<>(); 40 | builder.addPattern(By3, true); 41 | DfaState start = builder.build(Collections.singleton(Boolean.TRUE), null); 42 | Assert.assertEquals(3, _countStates(start)); 43 | _checkDfa(start, "By3Test.out.txt", false); 44 | } 45 | 46 | 47 | } 48 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/dfalex/DfaAmbiguityResolver.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.dfalex; 17 | 18 | import com.nobigsoftware.util.BuilderCache; 19 | 20 | import java.io.Serializable; 21 | import java.util.Set; 22 | import backport.java.util.function.Function; 23 | 24 | /** 25 | * Implementations of this interface are used to resolve ambiguities in {@link DfaBuilder}. 26 | *

27 | * When it's possible for a single string to match patterns that produce different results, the 28 | * ambiguity resolver is called to determine what the result should be. 29 | *

30 | * The implementation can throw a {@link DfaAmbiguityException} in this case, or can combine the 31 | * multiple result objects into a single object if its type (e.g., EnumSet) permits. 32 | *

33 | * This interface implements Serializable so that it can be written into the key signature for 34 | * {@link BuilderCache}. 35 | */ 36 | public interface DfaAmbiguityResolver 37 | extends Function, MATCHRESULT>, Serializable { 38 | } 39 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/dfalex/DfaStateInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.dfalex; 17 | 18 | import java.util.List; 19 | import backport.java.util.function.Consumer; 20 | 21 | class DfaStateInfo 22 | { 23 | private int m_acceptSetIndex; 24 | private int m_transitionCount; 25 | private NfaTransition[] m_transitionBuf; 26 | 27 | DfaStateInfo(List transitions, int acceptSetIndex) 28 | { 29 | 30 | m_acceptSetIndex = acceptSetIndex; 31 | m_transitionCount = transitions.size(); 32 | m_transitionBuf = transitions.toArray(new NfaTransition[m_transitionCount]); 33 | } 34 | 35 | public int getAcceptSetIndex() 36 | { 37 | return m_acceptSetIndex; 38 | } 39 | 40 | public int getTransitionCount() 41 | { 42 | return m_transitionCount; 43 | } 44 | 45 | public NfaTransition getTransition(int index) 46 | { 47 | return m_transitionBuf[index]; 48 | } 49 | 50 | public void forEachTransition(Consumer consumer) 51 | { 52 | for (int i=0; i< m_transitionCount; ++i) 53 | { 54 | consumer.accept(m_transitionBuf[i]); 55 | } 56 | } 57 | } -------------------------------------------------------------------------------- /english-nlp/src/test/java/com/sixthsolution/apex/nlp/english/test/tokenization/LocationDetectorTest.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.english.test.tokenization; 2 | 3 | import com.sixthsolution.apex.nlp.english.LocationDetector; 4 | import com.sixthsolution.apex.nlp.ner.Entity; 5 | import com.sixthsolution.apex.nlp.ner.Label; 6 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetector; 7 | 8 | import org.junit.Test; 9 | 10 | import static com.sixthsolution.apex.nlp.test.ChunkDetectorAssertion.assertChunkedPart; 11 | 12 | /** 13 | * @author Saeed Masoumi (s-masoumi@live.com) 14 | */ 15 | 16 | public class LocationDetectorTest extends EnglishDetectorTest { 17 | 18 | @Override 19 | protected ChunkDetector provideDetector() { 20 | return new LocationDetector(); 21 | } 22 | 23 | @Test 24 | public void test_location() { 25 | assertChunkedPart("at home").text("at home") 26 | .label(Label.LOCATION).entity(Entity.LOCATION); 27 | assertChunkedPart("at Starbucks").text("at Starbucks") 28 | .label(Label.LOCATION).entity(Entity.LOCATION); 29 | assertChunkedPart("at 123 st.").text("at 123 st.") 30 | .label(Label.LOCATION).entity(Entity.LOCATION); 31 | } 32 | 33 | @Test 34 | public void test_int_full_sentence() { 35 | assertChunkedPart("Grocery shopping at Wegman's Thursday at 5pm").text("at Wegman's") 36 | .label(Label.LOCATION).entity(Entity.LOCATION); 37 | assertChunkedPart("Meet John at Mall from 9:30 to 12:00").text("at Mall") 38 | .label(Label.LOCATION).entity(Entity.LOCATION); 39 | assertChunkedPart("Bring Negin lunch at 123 st.").text("at 123 st.") 40 | .label(Label.LOCATION).entity(Entity.LOCATION); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /persian-nlp/src/test/java/com/sixthsolution/apex/nlp/persian/test/PersianDLTest.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.persian.test; 2 | 3 | import com.sixthsolution.apex.nlp.ner.Entity; 4 | import com.sixthsolution.apex.nlp.ner.Label; 5 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetector; 6 | import com.sixthsolution.apex.nlp.persian.PersianLocationDetector; 7 | import org.junit.Test; 8 | 9 | import static com.sixthsolution.apex.nlp.test.ChunkDetectorAssertion.assertChunkedPart; 10 | 11 | public class PersianDLTest extends PersianDetectorTest { 12 | 13 | @Override 14 | protected ChunkDetector provideDetector() { 15 | return new PersianLocationDetector(); 16 | } 17 | 18 | @Test 19 | public void test_location() { 20 | assertChunkedPart("در کوچه شقایق").text("در کوچه شقایق") 21 | .label(Label.LOCATION).entity(Entity.LOCATION); 22 | assertChunkedPart("در خیابان دهم").text("در خیابان دهم") 23 | .label(Label.LOCATION).entity(Entity.LOCATION); 24 | assertChunkedPart("در بزرگراه چمران").text("در بزرگراه چمران") 25 | .label(Label.LOCATION).entity(Entity.LOCATION); 26 | assertChunkedPart("در بازار").text("در بازار") 27 | .label(Label.LOCATION).entity(Entity.LOCATION); 28 | 29 | } 30 | 31 | @Test 32 | public void test_int_full_sentence() { 33 | assertChunkedPart("خرید در بازار در روز چهارشنبه").text("در بازار") 34 | .label(Label.LOCATION).entity(Entity.LOCATION); 35 | assertChunkedPart("امشب ملاقات با دوستم در رستوران").text("در رستوران") 36 | .label(Label.LOCATION).entity(Entity.LOCATION); 37 | assertChunkedPart("خرید لباس در خیابان دهم").text("در خیابان دهم") 38 | .label(Label.LOCATION).entity(Entity.LOCATION); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/parser/StandardParserBase.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.parser; 2 | 3 | import com.sixthsolution.apex.model.Event; 4 | import com.sixthsolution.apex.nlp.event.EventDetector; 5 | import com.sixthsolution.apex.nlp.ner.ChunkedPart; 6 | import com.sixthsolution.apex.nlp.ner.Chunker; 7 | import com.sixthsolution.apex.nlp.tagger.TaggedWords; 8 | import com.sixthsolution.apex.nlp.tagger.Tagger; 9 | import com.sixthsolution.apex.nlp.tokenization.Tokenizer; 10 | 11 | import org.threeten.bp.LocalDateTime; 12 | 13 | import java.util.List; 14 | 15 | /** 16 | * @author Saeed Masoumi (s-masoumi@live.com) 17 | */ 18 | 19 | public abstract class StandardParserBase implements Parser { 20 | 21 | private Tokenizer tokenizer = null; 22 | private Tagger tagger = null; 23 | private Chunker chunker = null; 24 | private EventDetector eventDetector = null; 25 | 26 | @Override 27 | public void initialize() { 28 | tokenizer = provideTokenizer(); 29 | tagger = provideTagger(); 30 | chunker = provideChunker(); 31 | eventDetector = provideEventDetector(); 32 | } 33 | 34 | protected abstract Tagger provideTagger(); 35 | 36 | protected abstract Tokenizer provideTokenizer(); 37 | 38 | protected abstract Chunker provideChunker(); 39 | 40 | protected abstract EventDetector provideEventDetector(); 41 | 42 | @Override 43 | public Event parse(LocalDateTime source, String sentence) { 44 | //#1 45 | String[] tokens = tokenizer.tokenize(sentence); 46 | //#2 47 | TaggedWords taggedWords = tagger.tag(tokens); 48 | //#3 49 | List chunkedParts = chunker.chunk(taggedWords); 50 | //#4 51 | return eventDetector.detect(source, chunkedParts); 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /apex/src/test/java/com/sixthsolution/apex/nlp/test/ChunkerAssertion.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.test; 2 | 3 | import com.sixthsolution.apex.nlp.ner.Chunker; 4 | import com.sixthsolution.apex.nlp.tagger.TaggedWords; 5 | import com.sixthsolution.apex.nlp.tagger.Tagger; 6 | import com.sixthsolution.apex.nlp.tokenization.Tokenizer; 7 | 8 | /** 9 | * @author Saeed Masoumi (s-masoumi@live.com) 10 | */ 11 | 12 | public class ChunkerAssertion { 13 | 14 | private static ChunkerAssertion instance = null; 15 | private final Chunker chunker; 16 | private Tokenizer tokenizer; 17 | private Tagger tagger; 18 | 19 | private ChunkerAssertion(Tokenizer tokenizer, Tagger tagger, Chunker chunker) { 20 | this.tokenizer = tokenizer; 21 | this.tagger = tagger; 22 | this.chunker = chunker; 23 | } 24 | 25 | private static ChunkerAssertion getInstance() { 26 | return instance; 27 | } 28 | 29 | public static void init(Tokenizer tokenizer, Tagger tagger, Chunker chunker) { 30 | instance = new ChunkerAssertion(tokenizer, tagger, chunker); 31 | } 32 | 33 | public static ChunkAssertion assertSentence(String word) { 34 | return getInstance().makeChunkAssertion(word); 35 | } 36 | 37 | private ChunkAssertion makeChunkAssertion(String word) { 38 | System.out.println("Label assertion for: " + word); 39 | long startTime = System.currentTimeMillis(); 40 | TaggedWords taggedWords = tagger.tag(tokenizer.tokenize(word)); 41 | ChunkAssertion result = new ChunkAssertion(chunker.chunk(taggedWords)); 42 | System.out.println( 43 | "Chunking takes " + (System.currentTimeMillis() - startTime) + " millis."); 44 | System.out.println("------------------------------"); 45 | return result; 46 | } 47 | 48 | 49 | } 50 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/dfalex/StringReplacement.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.dfalex; 17 | 18 | 19 | /** 20 | * BackportFuncs interface that provides the replacement values for strings in a search+replace operation 21 | * of patterns found in a string. 22 | */ 23 | public interface StringReplacement 24 | { 25 | /** 26 | * This will be called for each instance of each pattern found 27 | * 28 | * @param dest The replacement text for the matching substring should be written here 29 | * @param src The string being searched, or the part of the stream being searched that contains the current match 30 | * @param startPos the start index of the current match in src 31 | * @param endPos the end index of the current match in src 32 | * @return if this is >0, then it is the position in the source string at which to continue processing after 33 | * replacement. If you set this <= startPos, an IndexOutOfBoundsException will be thrown to 34 | * abort the infinite loop that would result. Almost always return 0. 35 | */ 36 | int apply(SafeAppendable dest, CharSequence src, int startPos, int endPos); 37 | } 38 | 39 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/dict/Tag.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.dict; 2 | 3 | /** 4 | * @author Saeed Masoumi (s-masoumi@live.com) 5 | * @author Rozhin Bayati 6 | */ 7 | public enum Tag { 8 | NONE(97), 9 | NUMBER(98), 10 | PREPOSITION(99), 11 | RELATIVE_PREPOSITION(100), 12 | RELATIVE_SUFFIX(101), 13 | //LOCATION 14 | LOCATION_PREFIX(102), 15 | LOCATION_SUFFIX(103), 16 | LOCATION_NAME(137), 17 | //TIME 18 | TIME_PREFIX(104), //e.g. at, in ,the 19 | TIME_START_RANGE(105), //e.g. from 20 | TIME_RANGE(106), 21 | TIME_RELATIVE_PREFIX(107), //e.g. for 22 | TIME_RELATIVE(108), //e.g. morning 23 | TIME_RELATIVE_INDICATOR(109), //e.g. before,after 24 | TIME_HOUR(110), //e.g. hour 25 | TIME_MIN(111), //e.g. minutes 26 | TIME_SEC(112), //e.g. seconds 27 | TIME_MERIDIEM(113), //e.g am, pm 28 | TIME_SEPARATOR(114), //e.g :, . 29 | //DATE 30 | DATE_PREPOSITION(115), 31 | DATE_SEEKBY(116), 32 | DATE_START_RANGE(117), 33 | DATE_SUFFIX(118), 34 | DATE_DURATION_SUFFIX(119), 35 | DATE_SEPARATOR(120), 36 | WEEK_DAY(121), 37 | MONTH_NAME(122), 38 | SEASON(123), 39 | DATE_PREFIX(124), 40 | //RECURRENCE 41 | REC_WEEK_DAYS(125), 42 | NAMED_DATE(126), 43 | GLOBAL_PREPOSITION(127), 44 | DATE_RECURRENCE(128), 45 | DATE_RANGE(129), 46 | DATE_FOREVER_KEY(130), 47 | THE_PREFIX(131), 48 | DATE_BAND(132), 49 | YEAR_SEEK(133), 50 | MONTH_SEEK(134), 51 | WEEK_SEEK(135), 52 | DAY_SEEK(136), 53 | CURRENT(138); 54 | 55 | public int id; 56 | 57 | Tag(int id) { 58 | this.id = id; 59 | } 60 | 61 | @Override 62 | public String toString() { 63 | return String.valueOf((char) id); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /persian-nlp/src/main/java/com/sixthsolution/apex/nlp/persian/filter/TimeDetectionFilter.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.persian.filter; 2 | 3 | import com.sixthsolution.apex.nlp.ner.Label; 4 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetectionFilter; 5 | import com.sixthsolution.apex.nlp.tagger.TaggedWords; 6 | 7 | import static com.sixthsolution.apex.nlp.dict.Tag.*; 8 | 9 | /** 10 | * @author Saeed Masoumi (s-masoumi@live.com) 11 | * @author Rozhin Bayati 12 | */ 13 | 14 | public class TimeDetectionFilter extends ChunkDetectionFilter { 15 | 16 | @Override 17 | public boolean accept(Label label, TaggedWords taggedWords, int startIndex, int endIndex) { 18 | switch (label) { 19 | case FIXED_TIME: 20 | //ignore single number 21 | if (startIndex == endIndex - 1 && 22 | taggedWords.get(startIndex).getTags().containsTag(NUMBER)) { 23 | return false; 24 | } 25 | //ignore date formats like 12.02.2012 26 | if (taggedWords.size() > endIndex && 27 | taggedWords.get(endIndex - 1).getTags().containsTag(NUMBER) && 28 | taggedWords.get(endIndex).getTags().containsTag(DATE_SEPARATOR)) { 29 | return false; 30 | } 31 | return true; 32 | case RANGE_TIME: 33 | //ignore like فروردین 13-17 34 | if (startIndex > 0 && taggedWords.get(startIndex - 1).hasTag(MONTH_NAME)) { 35 | return false; 36 | } 37 | else if (startIndex > 0 && taggedWords.get(startIndex - 1).hasTag(DATE_SEPARATOR)) { // edited by scrc 38 | return false; 39 | } 40 | return true; 41 | case RELATIVE_TIME: 42 | return true; 43 | } 44 | return false; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /persian-nlp/src/main/java/com/sixthsolution/apex/nlp/persian/event/PersianEventBuilder.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.persian.event; 2 | 3 | import com.sixthsolution.apex.model.Event; 4 | import com.sixthsolution.apex.nlp.event.EventBuilder; 5 | import com.sixthsolution.apex.nlp.persian.calendar.tools.JalaliCalendar; 6 | import com.sixthsolution.apex.nlp.persian.model.PersianEvent; 7 | import org.threeten.bp.LocalDateTime; 8 | import org.threeten.bp.LocalTime; 9 | 10 | 11 | /** 12 | * Created by rozhin on 8/1/2017. 13 | */ 14 | public class PersianEventBuilder extends EventBuilder { 15 | private LocalTime startTime = null; 16 | private LocalTime endTime = null; 17 | private JalaliCalendar startDate; 18 | private JalaliCalendar endDate; 19 | private String location = ""; 20 | private PersianRecurrence recurrence =null; 21 | 22 | public void setStartDate(JalaliCalendar startDate) { 23 | this.startDate = startDate; 24 | } 25 | 26 | public void setEndDate(JalaliCalendar endDate) { 27 | this.endDate = endDate; 28 | } 29 | 30 | public void setRecurrence(PersianRecurrence recurrence){this.recurrence=recurrence;} 31 | 32 | @Override 33 | public Event build(LocalDateTime source) { 34 | JalaliCalendar jalaliCalendar=new JalaliCalendar(); 35 | jalaliCalendar=jalaliCalendar.convertor(source.toLocalDate()); 36 | 37 | if (startTime == null) { 38 | startTime = source.toLocalTime(); 39 | } 40 | if (endTime == null) { 41 | endTime = startTime.plusHours(1); 42 | } 43 | if (startDate == null) { 44 | startDate = jalaliCalendar; 45 | } 46 | if (endDate == null) { 47 | endDate = jalaliCalendar; 48 | } 49 | 50 | 51 | 52 | return new PersianEvent("", location, startDate, endDate,endTime,startTime, false, recurrence); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /dfalex/src/test/java/com/nobigsoftware/dfalex/ReverseFinderTest.java: -------------------------------------------------------------------------------- 1 | package com.nobigsoftware.dfalex; 2 | 3 | import java.io.PrintWriter; 4 | import java.io.StringWriter; 5 | 6 | import org.junit.Assert; 7 | import org.junit.Test; 8 | 9 | public class ReverseFinderTest extends TestBase 10 | { 11 | @Test 12 | public void test() throws Exception 13 | { 14 | DfaBuilder revbuilder = new DfaBuilder<>(); 15 | for (JavaToken tok : JavaToken.values()) 16 | { 17 | revbuilder.addPattern(Pattern.ALL_STRINGS.then(tok.m_pattern.getReversed()), true); 18 | } 19 | DfaState wantstart = revbuilder.build(null); 20 | String want = _toString(wantstart); 21 | 22 | DfaBuilder builder = new DfaBuilder<>(); 23 | for (JavaToken tok : JavaToken.values()) 24 | { 25 | builder.addPattern(tok.m_pattern, tok); 26 | } 27 | DfaState havestart = builder.buildReverseFinder(); 28 | String have = _toString(havestart); 29 | Assert.assertEquals(want, have); 30 | 31 | //make sure we properly exclude the empty string from the reverse finder DFA 32 | builder.clear(); 33 | for (JavaToken tok : JavaToken.values()) 34 | { 35 | if ((tok.ordinal()&1)==0) 36 | { 37 | builder.addPattern(tok.m_pattern, tok); 38 | } 39 | else 40 | { 41 | builder.addPattern(Pattern.maybe(tok.m_pattern), tok); 42 | } 43 | } 44 | havestart = builder.buildReverseFinder(); 45 | have = _toString(havestart); 46 | Assert.assertEquals(want, have); 47 | } 48 | 49 | private String _toString(DfaState dfa) 50 | { 51 | StringWriter w = new StringWriter(); 52 | m_printer.print(new PrintWriter(w), dfa); 53 | return w.toString(); 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/dfalex/PrimeSizeFinder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.dfalex; 17 | 18 | class PrimeSizeFinder 19 | { 20 | private final static int[] PRIME_SIZES = new int[] { 5, 7, 9, 11, 17, 23, 29, 37, 47, 59, 79, 101, 127, 163, 211, 269, 337, 421, 21 | 529, 661, 827, 1039, 1301, 1627, 2039, 2549, 3187, 3989, 4987, 6241, 7817, 9781, 12227, 15287, 19121, 23909, 29917, 37397, 22 | 46747, 58439, 73061, 91331, 114167, 142711, 178393, 222991, 278741, 348431, 435541, 544429, 680539, 850679, 23 | 1063351, 1329197, 1661503, 2076881, 2596123, 3245171, 4056467, 5070599, 6338257, 7922821, 9903557, 12379453, 24 | 15474317, 19342907, 24178639, 30223313, 37779149, 47223941, 59029963, 73787459, 92234327, 115292923, 144116201, 180145283, 25 | 225181637, 281477047, 351846337, 439807933, 549759953, 687199949, 858999971, 1073749979, 1342187489, 1677734381 26 | }; 27 | 28 | public static int findPrimeSize(int minval) 29 | { 30 | //Linear search is fine here, since returning a size generally implies we're going 31 | //to do work proportional to that size anyway 32 | for (int i=0;i=minval) 35 | { 36 | return PRIME_SIZES[i]; 37 | } 38 | } 39 | return Integer.MAX_VALUE; //Very handy that this is a Mersenne prime 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /apex/src/test/java/com/sixthsolution/apex/nlp/test/TokenizerAssertion.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.test; 2 | 3 | import com.sixthsolution.apex.nlp.tokenization.StandardTokenizer; 4 | import com.sixthsolution.apex.nlp.tokenization.Tokenizer; 5 | 6 | import java.util.Arrays; 7 | import java.util.Iterator; 8 | 9 | import static org.junit.Assert.assertArrayEquals; 10 | 11 | /** 12 | * @author Saeed Masoumi (s-masoumi@live.com) 13 | */ 14 | 15 | public final class TokenizerAssertion { 16 | 17 | private static TokenizerAssertion instance = null; 18 | private Tokenizer tokenizer; 19 | 20 | 21 | 22 | private TokenizerAssertion(Tokenizer tokenizer) { 23 | this.tokenizer = tokenizer; 24 | } 25 | 26 | private static TokenizerAssertion getInstance() { 27 | if (instance == null) { 28 | instance = new TokenizerAssertion(new StandardTokenizer()); 29 | } 30 | return instance; 31 | } 32 | 33 | public static void init(Tokenizer tokenizer) { 34 | getInstance().setTokenizer(tokenizer); 35 | } 36 | 37 | public static void assertTokens(String sentence, String... tokens) { 38 | System.out.println("Actual sentence: " + sentence); 39 | String[] tokenized = getInstance().tokenizer.tokenize(sentence); 40 | System.out.println("Tokenized sentence: " + toStringTokens(tokenized)); 41 | assertArrayEquals(tokenized, tokens); 42 | } 43 | private void setTokenizer(Tokenizer tokenizer) { 44 | this.tokenizer = tokenizer; 45 | } 46 | 47 | 48 | private static String toStringTokens(String[] e) { 49 | Iterator itr = Arrays.asList(e).iterator(); 50 | StringBuilder sb = new StringBuilder(); 51 | while (itr.hasNext()) { 52 | sb.append(itr.next()); 53 | if (itr.hasNext()) { 54 | sb.append(" "); 55 | } 56 | } 57 | return sb.toString(); 58 | } 59 | 60 | 61 | } 62 | -------------------------------------------------------------------------------- /english-nlp/src/main/java/com/sixthsolution/apex/nlp/english/filter/TimeDetectionFilter.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.english.filter; 2 | 3 | import com.sixthsolution.apex.nlp.ner.Label; 4 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetectionFilter; 5 | import com.sixthsolution.apex.nlp.tagger.TaggedWords; 6 | 7 | import static com.sixthsolution.apex.nlp.dict.Tag.DATE_SEPARATOR; 8 | import static com.sixthsolution.apex.nlp.dict.Tag.MONTH_NAME; 9 | import static com.sixthsolution.apex.nlp.dict.Tag.NUMBER; 10 | 11 | /** 12 | * @author Saeed Masoumi (s-masoumi@live.com) 13 | * @author Rozhin Bayati 14 | */ 15 | 16 | public class TimeDetectionFilter extends ChunkDetectionFilter { 17 | 18 | @Override 19 | public boolean accept(Label label, TaggedWords taggedWords, int startIndex, int endIndex) { 20 | switch (label) { 21 | case FIXED_TIME: 22 | //ignore single number 23 | if (startIndex == endIndex - 1 && 24 | taggedWords.get(startIndex).getTags().containsTag(NUMBER)) { 25 | return false; 26 | } 27 | //ignore date formats like 12.02.2012 28 | if (taggedWords.size() > endIndex && 29 | taggedWords.get(endIndex - 1).getTags().containsTag(NUMBER) && 30 | taggedWords.get(endIndex).getTags().containsTag(DATE_SEPARATOR)) { 31 | return false; 32 | } 33 | return true; 34 | case RANGE_TIME: 35 | //ignore like Nov 13-17 36 | if (startIndex > 0 && taggedWords.get(startIndex - 1).hasTag(MONTH_NAME)) { 37 | return false; 38 | } 39 | else if (startIndex > 0 && taggedWords.get(startIndex - 1).hasTag(DATE_SEPARATOR)) { // edited by scrc 40 | return false; 41 | } 42 | return true; 43 | case RELATIVE_TIME: 44 | return true; 45 | } 46 | return false; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/event/EventBuilder.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.event; 2 | 3 | import com.sixthsolution.apex.model.Event; 4 | 5 | import com.sixthsolution.apex.model.Recurrence; 6 | import org.threeten.bp.LocalDate; 7 | import org.threeten.bp.LocalDateTime; 8 | import org.threeten.bp.LocalTime; 9 | 10 | 11 | /** 12 | * @author Saeed Masoumi (s-masoumi@live.com) 13 | * @author Rozhin Bayati 14 | */ 15 | 16 | public class EventBuilder { 17 | 18 | private LocalTime startTime = null; 19 | private LocalTime endTime = null; 20 | private LocalDate startDate; 21 | private LocalDate endDate; 22 | private String location = ""; 23 | private Recurrence recurrence =null; 24 | 25 | public void setStartTime(LocalTime startTime) { 26 | this.startTime = startTime; 27 | } 28 | 29 | public void setEndTime(LocalTime endTime) { 30 | this.endTime = endTime; 31 | } 32 | 33 | public void setStartDate(LocalDate startDate) { 34 | this.startDate = startDate; 35 | } 36 | 37 | public void setEndDate(LocalDate endDate) { 38 | this.endDate = endDate; 39 | } 40 | 41 | public void setLocation(String location) { 42 | this.location = location; 43 | } 44 | 45 | public void setReccurence(Recurrence reccurence ){this.recurrence=reccurence;} 46 | 47 | public Event build(LocalDateTime source) { 48 | if (startTime == null) { 49 | startTime = source.toLocalTime(); 50 | } 51 | if (endTime == null) { 52 | endTime = startTime.plusHours(1); 53 | } 54 | if (startDate == null) { 55 | startDate = source.toLocalDate(); 56 | } 57 | if (endDate == null) { 58 | endDate = startDate; 59 | } 60 | 61 | 62 | LocalDateTime startDateTime = LocalDateTime.of(startDate, startTime); 63 | LocalDateTime endDateTime = LocalDateTime.of(endDate, endTime); 64 | 65 | return new Event("", location, startDateTime, endDateTime, false, recurrence); 66 | } 67 | 68 | } -------------------------------------------------------------------------------- /english-nlp/src/main/java/com/sixthsolution/apex/nlp/english/LocationDetector.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.english; 2 | 3 | import com.nobigsoftware.dfalex.Pattern; 4 | import com.sixthsolution.apex.nlp.dict.Tag; 5 | import com.sixthsolution.apex.nlp.english.filter.LocationDetectionFilter; 6 | import com.sixthsolution.apex.nlp.ner.Entity; 7 | import com.sixthsolution.apex.nlp.ner.Label; 8 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetectionFilter; 9 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetector; 10 | import com.sixthsolution.apex.nlp.util.Pair; 11 | 12 | import java.util.Arrays; 13 | import java.util.List; 14 | 15 | import static com.nobigsoftware.dfalex.Pattern.match; 16 | import static com.nobigsoftware.dfalex.Pattern.repeat; 17 | import static com.sixthsolution.apex.nlp.ner.Entity.LOCATION; 18 | 19 | /** 20 | * @author Saeed Masoumi (s-masoumi@live.com) 21 | */ 22 | 23 | public class LocationDetector extends ChunkDetector { 24 | 25 | /** 26 | * @return at Mall, at home , ... 27 | */ 28 | private static Pattern location() { 29 | return match(Tag.LOCATION_PREFIX.toString()).thenRepeat(Tag.NONE.toString()) 30 | .thenMaybe(Tag.LOCATION_SUFFIX.toString()); 31 | } 32 | 33 | /** 34 | * @return at 123 st 35 | */ 36 | private static Pattern address_location() { 37 | return match(Tag.LOCATION_PREFIX.toString()).thenMaybe(repeat(Tag.NONE.toString())) 38 | .then(Tag.NUMBER.toString()).then(Tag.LOCATION_SUFFIX.toString()); 39 | } 40 | 41 | @Override 42 | protected List> getPatterns() { 43 | return Arrays.asList( 44 | newPattern(Label.LOCATION, location()), 45 | newPattern(Label.LOCATION, address_location()) 46 | ); 47 | } 48 | 49 | @Override 50 | protected List getFilters() { 51 | return Arrays.asList(new LocationDetectionFilter()); 52 | } 53 | 54 | @Override 55 | protected Entity getEntity() { 56 | return LOCATION; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/dfalex/BitUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.dfalex; 17 | 18 | class BitUtils 19 | { 20 | private static final int[] DEBRUIJN_WINDOW_TO_BIT_POSITION= 21 | { 22 | -1, 0, 1, 1, 28, 28, 2, 2, 29, 29, 14, 14, 24, 24, 3, 3, 23 | 30, 30, 22, 22, 20, 20, 15, 15, 25, 25, 17, 17, 4, 4, 8, 8, 24 | 31, 31, 27, 27, 13, 13, 23, 23, 21, 21, 19, 19, 16, 16, 7, 7, 25 | 26, 26, 12, 12, 18, 18, 6, 6, 11, 11, 5, 5, 10, 10, 9, 9 26 | }; 27 | 28 | /** 29 | * Get the lowest bit set in X. 30 | * 31 | * @param x integer to test 32 | * @return smallest bit=1<>> 26)&63]; 48 | } 49 | 50 | /** 51 | * Turn off the lowest bit in in integer. 52 | * 53 | * @param x an integer 54 | * @return x - lowBit(x); 55 | */ 56 | public static int turnOffLowBit(int x) 57 | { 58 | return x & (x-1); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/event/StandardEventDetector.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.event; 2 | 3 | import com.sixthsolution.apex.model.Event; 4 | import com.sixthsolution.apex.nlp.ner.ChunkedPart; 5 | 6 | import org.threeten.bp.LocalDateTime; 7 | 8 | import java.util.List; 9 | 10 | /** 11 | * @author Saeed Masoumi (s-masoumi@live.com) 12 | * @author Rozhin Bayati 13 | */ 14 | 15 | public class StandardEventDetector implements EventDetector { 16 | 17 | protected Extractor timeExtractor; 18 | protected Extractor dateExtractor; 19 | protected Extractor locationExtractor; 20 | 21 | public StandardEventDetector() { 22 | timeExtractor = provideTimeExtractor(); 23 | dateExtractor = provideDateExtractor(); 24 | locationExtractor = provideLocationExtractor(); 25 | } 26 | 27 | @Override 28 | public Event detect(LocalDateTime source, List chunkedParts) { 29 | EventBuilder builder = new EventBuilder(); 30 | for (ChunkedPart part : chunkedParts) { 31 | switch (part.getEntity()) { 32 | case TIME: 33 | timeExtractor.extract(builder, source, part); 34 | break; 35 | case DATE: 36 | dateExtractor.extract(builder, source, part); 37 | break; 38 | case LOCATION: 39 | locationExtractor.extract(builder, source, part); 40 | break; 41 | } 42 | } 43 | return builder.build(source); 44 | } 45 | 46 | public StandardEventDetector(Extractor DateExtractor) { 47 | timeExtractor = provideTimeExtractor(); 48 | dateExtractor = DateExtractor; 49 | locationExtractor = provideLocationExtractor(); 50 | } 51 | 52 | protected Extractor provideTimeExtractor() { 53 | return new StandardTimeExtractor(); 54 | } 55 | 56 | protected Extractor provideDateExtractor() { 57 | return new StandardDateExtractor(); 58 | } 59 | 60 | protected Extractor provideLocationExtractor() { 61 | return new StandardLocationExtractor(); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /persian-nlp/src/main/java/com/sixthsolution/apex/nlp/persian/PersianLocationDetector.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.persian; 2 | import com.nobigsoftware.dfalex.Pattern; 3 | import com.sixthsolution.apex.nlp.dict.Tag; 4 | import com.sixthsolution.apex.nlp.ner.Entity; 5 | import com.sixthsolution.apex.nlp.ner.Label; 6 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetectionFilter; 7 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetector; 8 | import com.sixthsolution.apex.nlp.persian.filter.LocationDetectionFilter; 9 | import com.sixthsolution.apex.nlp.util.Pair; 10 | 11 | import java.util.Arrays; 12 | import java.util.List; 13 | 14 | import static com.nobigsoftware.dfalex.Pattern.anyOf; 15 | import static com.nobigsoftware.dfalex.Pattern.match; 16 | import static com.nobigsoftware.dfalex.Pattern.repeat; 17 | import static com.sixthsolution.apex.nlp.ner.Entity.LOCATION; 18 | 19 | /** 20 | * Created by rozhin on 7/30/2017. 21 | */ 22 | 23 | public class PersianLocationDetector extends ChunkDetector { 24 | 25 | /** 26 | * @return در_بازار 27 | */ 28 | private static Pattern location() { 29 | return match(Tag.LOCATION_PREFIX.toString()).then(anyOf(address_location(),location_name())); 30 | } 31 | 32 | private static Pattern location_name(){ 33 | return match(Tag.LOCATION_NAME.toString()); 34 | } 35 | /** 36 | * @return at 123 st 37 | */ 38 | private static Pattern address_location() { 39 | return match(Tag.LOCATION_SUFFIX.toString()).thenMaybe(repeat(Tag.NONE.toString())) 40 | .then(anyOf(Tag.NUMBER.toString(),Tag.NONE.toString())); 41 | } 42 | 43 | @Override 44 | protected List> getPatterns() { 45 | return Arrays.asList( 46 | newPattern(Label.LOCATION, location()), 47 | newPattern(Label.LOCATION, address_location()) 48 | ); 49 | } 50 | 51 | @Override 52 | protected List getFilters() { 53 | return Arrays.asList(new LocationDetectionFilter()); 54 | } 55 | 56 | @Override 57 | protected Entity getEntity() { 58 | return LOCATION; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /persian-nlp/src/main/java/com/sixthsolution/apex/nlp/persian/event/PersianRecurrence.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.persian.event; 2 | 3 | /** 4 | * Created by rozhin on 8/1/2017. 5 | */ 6 | import com.sixthsolution.apex.model.Frequency; 7 | import com.sixthsolution.apex.model.WeekDay; 8 | import com.sixthsolution.apex.nlp.persian.calendar.tools.JalaliCalendar; 9 | 10 | import java.util.List; 11 | 12 | /** 13 | * Represents a recurring event. 14 | * 15 | * @author Saeed Masoumi (saeed@6thsolution.com) 16 | * @author Rozhin Bayati 17 | */ 18 | public class PersianRecurrence { 19 | 20 | private Frequency frequency = Frequency.DAILY; 21 | 22 | /** 23 | * Specifies how often the event should be repeated. 24 | */ 25 | private int interval = 1; 26 | 27 | /** 28 | * The date or date-time until which the event should be repeated. 29 | */ 30 | private JalaliCalendar until = null; 31 | 32 | private boolean forever = false; 33 | /** 34 | * Days of the week on which the event should be repeated 35 | */ 36 | private List byDays; 37 | 38 | public PersianRecurrence(Frequency frequency, int interval,JalaliCalendar until, boolean forever, 39 | List byDays) { 40 | this.frequency = frequency; 41 | this.interval = interval; 42 | this.until = until; 43 | this.forever = forever; 44 | this.byDays = byDays; 45 | } 46 | 47 | public Frequency frequency() { 48 | return frequency; 49 | } 50 | 51 | public int interval() { 52 | return interval; 53 | } 54 | 55 | //TODO @nullable 56 | public JalaliCalendar until() { 57 | return until; 58 | } 59 | 60 | public boolean isForever() { 61 | return forever; 62 | } 63 | 64 | public List byDays() { 65 | return byDays; 66 | } 67 | 68 | @Override 69 | public String toString() { 70 | return "Recurrence{" + 71 | "frequency=" + frequency + 72 | ", interval=" + interval + 73 | ", until=" + until + 74 | ", forever=" + forever + 75 | ", byDays=" + byDays + 76 | '}'; 77 | } 78 | } 79 | 80 | -------------------------------------------------------------------------------- /persian-nlp/src/main/java/com/sixthsolution/apex/nlp/persian/model/PersianEvent.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.persian.model; 2 | 3 | import com.sixthsolution.apex.model.Event; 4 | import com.sixthsolution.apex.nlp.persian.calendar.tools.JalaliCalendar; 5 | import com.sixthsolution.apex.nlp.persian.event.PersianRecurrence; 6 | import org.threeten.bp.LocalTime; 7 | 8 | /** 9 | * Created by rozhin on 8/1/2017. 10 | */ 11 | public class PersianEvent extends Event{ 12 | 13 | private String title = ""; 14 | private String location = ""; 15 | private JalaliCalendar startDate = null; 16 | private JalaliCalendar endDate= null; 17 | private LocalTime startTime=null; 18 | private LocalTime endTime=null; 19 | private boolean isAllDay = false; 20 | private PersianRecurrence recurrence = null; 21 | 22 | 23 | public PersianEvent(String title, String location, JalaliCalendar startDateTime, 24 | JalaliCalendar endDateTime,LocalTime endtime,LocalTime starttime, 25 | boolean isAllDay, PersianRecurrence recurrence) { 26 | this.title = title; 27 | this.location = location; 28 | this.startDate = startDateTime; 29 | this.endDate = endDateTime; 30 | this.endTime=endtime; 31 | this.startTime=starttime; 32 | this.isAllDay = isAllDay; 33 | this.recurrence = recurrence; 34 | } 35 | 36 | 37 | public JalaliCalendar jalaliStart() { 38 | return startDate; 39 | } 40 | 41 | public JalaliCalendar jalaliEnd() { 42 | return endDate; 43 | } 44 | public LocalTime jalaliTimeStart(){return startTime;} 45 | public LocalTime jalaliTimeEnd(){return endTime;} 46 | public PersianRecurrence persianRecurrence(){return persianRecurrence();} 47 | 48 | @Override 49 | public String toString() { 50 | return "Event{" + 51 | "title='" + title + '\'' + 52 | ", startDate=" + startDate + 53 | ", endDateTime=" + endDate + 54 | ",startTime="+startTime+ 55 | ",endTime="+endTime+ 56 | ", isAllDay=" + isAllDay + 57 | ", recurrence=" + recurrence + 58 | '}'; 59 | } 60 | 61 | 62 | } 63 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/dict/Tags.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.dict; 2 | 3 | import com.sixthsolution.apex.nlp.ner.Entity; 4 | 5 | import java.util.ArrayList; 6 | import java.util.Arrays; 7 | import java.util.Collection; 8 | import java.util.Iterator; 9 | 10 | /** 11 | * @author Saeed Masoumi (s-masoumi@live.com) 12 | * @author Rozhin Bayati 13 | */ 14 | 15 | public class Tags extends ArrayList { 16 | 17 | public boolean containsTag(Tag tag) { 18 | Iterator iterator = iterator(); 19 | while (iterator.hasNext()) { 20 | TagValue next = iterator.next(); 21 | if (next.tag.equals(tag)) { 22 | return true; 23 | } 24 | } 25 | return false; 26 | } 27 | 28 | public TagValue containsTagByValue(Tag tag) { 29 | Iterator iterator = iterator(); 30 | while (iterator.hasNext()) { 31 | TagValue next = iterator.next(); 32 | if (next.tag.equals(tag)) { 33 | return next; 34 | } 35 | } 36 | return null; 37 | } 38 | 39 | public boolean containsTag(Tag... tags) { 40 | return containsTag(Arrays.asList(tags)); 41 | } 42 | 43 | public boolean containsTag(Collection tags) { 44 | Iterator iterator = iterator(); 45 | while (iterator.hasNext()) { 46 | TagValue next = iterator.next(); 47 | for (Tag tag : tags) 48 | if (next.tag.equals(tag)) { 49 | return true; 50 | } 51 | } 52 | return false; 53 | } 54 | 55 | public boolean containsTagName(int tag) { 56 | for (TagValue tagValue : this) { 57 | if (tagValue.tag.id == tag) { 58 | return true; 59 | } 60 | } 61 | return false; 62 | } 63 | 64 | public TagValue getTagByEntity(Entity entity) { 65 | Iterator iterator = iterator(); 66 | while (iterator.hasNext()) { 67 | TagValue next = iterator.next(); 68 | if (next.entity == entity) { 69 | return next; 70 | } 71 | } 72 | return null; 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /dfalex/README.md: -------------------------------------------------------------------------------- 1 | # dfalex backport 2 | 3 | Scanning / Lexical Analysis Without All The Fuss 4 | ================================================ 5 | 6 | Sometimes you need faster and more robust matching than you can get out of Java regular expressions. Maybe they're too slow for you, or you get stack overflows when you match things that are too long, or maybe you want to search for many patterns simultaneously. There plenty of lexical analysis tools you can use, but they invovle a lot of fuss. They make you write specifications in a domain-specific language, often mixed with code, and then generate new java code for a scanner that you have to incorporate into your build and use in pretty specific ways. 7 | 8 | DFALex provides that powerful matching capability without all the fuss. It will build you a deterministic finite automaton (DFA, googlable) for a matching/finding multiple patterns in strings simultaneously, which you can then use with various matcher classes to perform searching or scanning operations. 9 | 10 | Unlike other tools which use DFAs internally, but only build scanners with them, DFALex provides you with the actual DFA in an easy-to-use form. Yes, you can use it in standard scanners, but you can also use it in other ways that don't fit that mold. 11 | 12 | Start Here: 13 | ----------- 14 | 15 | * **DfaBuilder** for building DFAs 16 | 17 | * **Pattern** and **CharRange** for specifying patterns to match 18 | 19 | * **StringMatcher** for using your DFAs to find patterns in strings 20 | 21 | Requirements 22 | ------------ 23 | 24 | DFALex needs Java 7 or better. No special libraries are required. 25 | If you want to run the tests, you'll need JUnit4. 26 | 27 | About 28 | ----- 29 | 30 | DFALex is written by Matt Timmermans, and is all new code. It's written in Java first, with too much attention paid to performance. 31 | 32 | DFAs are generated from NFAs with a starndard powerset construction, and minimized used a fast hash-based variant of Hopcroft's algorithm. 33 | 34 | This project was started because lexical analysis is no big deal. You should be able to just do it, without having to convince your team to add a new build step to generate code from a domain specific language. This way you can use it for lots of little jobs, instead of just big, important ones. 35 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/dfalex/SerializableDfa.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.dfalex; 17 | 18 | import java.io.Serializable; 19 | import java.util.ArrayList; 20 | import java.util.List; 21 | 22 | class SerializableDfa implements Serializable 23 | { 24 | private static final long serialVersionUID = 1L; 25 | 26 | private final ArrayList> m_dfaStates; 27 | private final int[] m_startStateNumbers; 28 | 29 | private transient List> m_startStatesMemo; 30 | 31 | public SerializableDfa(RawDfa rawDfa) 32 | { 33 | final List origStates = rawDfa.getStates(); 34 | final int len = origStates.size(); 35 | m_dfaStates = new ArrayList<>(len); 36 | m_startStateNumbers = rawDfa.getStartStates(); 37 | while(m_dfaStates.size() < len) 38 | { 39 | m_dfaStates.add(new PackedTreeDfaPlaceholder<>(rawDfa, m_dfaStates.size())); 40 | } 41 | } 42 | 43 | public synchronized List> getStartStates() 44 | { 45 | if (m_startStatesMemo == null) 46 | { 47 | final int len = m_dfaStates.size(); 48 | for (int i=0;i(m_startStateNumbers.length); 57 | for (int startState : m_startStateNumbers) 58 | { 59 | m_startStatesMemo.add(m_dfaStates.get(startState).resolvePlaceholder()); 60 | } 61 | } 62 | return m_startStatesMemo; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/util/BuilderCache.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.util; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * Implementations of this interface can cache serializable objects that 22 | * can be used to bypass expensive building operations by providing 23 | * pre-built objects 24 | */ 25 | public interface BuilderCache 26 | { 27 | /** 28 | * Get a cached item. 29 | * 30 | * @param key The key used to identify the item. The key uniquely identifies all 31 | * of the source information that will go into building the item if this call fails 32 | * to retrieve a cached version. Typically this will be a cryptographic hash of 33 | * the serialized form of that information. 34 | * 35 | * @return the item that was previously cached under the key, or null if no such item 36 | * can be retrieved. 37 | */ 38 | Serializable getCachedItem(String key); 39 | 40 | /** 41 | * This method may be called when an item is built, providing an opportunity to 42 | * cache it. 43 | * 44 | * @param key The key that will be used to identify the item in future calls to {@link #getCachedItem(String)}. 45 | * Only letters, digits, and underscores are valid in keys, and key length is limited to 32 characters. 46 | * The behaviour of this method for invalid keys is undefined. 47 | *

48 | * Keys that differ only by case may or may not be considered equal by this class. 49 | * @param item The item to cache, if desired 50 | */ 51 | void maybeCacheItem(String key, Serializable item); 52 | 53 | 54 | } 55 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/util/Triple.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.util; 2 | 3 | /** 4 | * Adopted from http://facebook.github.io/jcommon/collections/jacoco/com.facebook.collections/Triple.java.html 5 | * 6 | * @author Saeed Masoumi (s-masoumi@live.com) 7 | */ 8 | 9 | public class Triple { 10 | private final First first; 11 | private final Second second; 12 | private final Third third; 13 | 14 | private volatile String toStringResult; 15 | 16 | public Triple(First first, Second second, Third third) { 17 | this.first = first; 18 | this.second = second; 19 | this.third = third; 20 | } 21 | 22 | public First getFirst() { 23 | return first; 24 | } 25 | 26 | public Second getSecond() { 27 | return second; 28 | } 29 | 30 | public Third getThird() { 31 | return third; 32 | } 33 | 34 | @Override 35 | public boolean equals(Object o) { 36 | if (this == o) { 37 | return true; 38 | } 39 | 40 | if (o == null || getClass() != o.getClass()) { 41 | return false; 42 | } 43 | 44 | final Triple triple = (Triple) o; 45 | 46 | if (first != null ? !first.equals(triple.first) : triple.first != null) { 47 | return false; 48 | } 49 | 50 | if (second != null ? !second.equals(triple.second) : triple.second != null) { 51 | return false; 52 | } 53 | 54 | if (third != null ? !third.equals(triple.third) : triple.third != null) { 55 | return false; 56 | } 57 | 58 | return true; 59 | } 60 | 61 | @Override 62 | public int hashCode() { 63 | int result = first != null ? first.hashCode() : 0; 64 | 65 | result = 31 * result + (second != null ? second.hashCode() : 0); 66 | result = 31 * result + (third != null ? third.hashCode() : 0); 67 | 68 | return result; 69 | } 70 | 71 | @Override 72 | public String toString() { 73 | if (toStringResult == null) { 74 | toStringResult = "Triple{" + 75 | "first=" + first + 76 | ", second=" + second + 77 | ", third=" + third + 78 | '}'; 79 | } 80 | 81 | return toStringResult; 82 | } 83 | } -------------------------------------------------------------------------------- /apex/src/test/java/com/sixthsolution/apex/nlp/test/ChunkAssertion.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.test; 2 | 3 | import com.sixthsolution.apex.nlp.ner.Label; 4 | import com.sixthsolution.apex.nlp.ner.ChunkedPart; 5 | 6 | import java.util.List; 7 | 8 | import static com.sixthsolution.apex.nlp.ner.Label.DATE; 9 | import static com.sixthsolution.apex.nlp.ner.Label.LOCATION; 10 | import static com.sixthsolution.apex.nlp.ner.Label.TIME; 11 | import static org.junit.Assert.assertEquals; 12 | import static org.junit.Assert.assertNotNull; 13 | import static org.junit.Assert.assertNull; 14 | 15 | /** 16 | * @author Saeed Masoumi (s-masoumi@live.com) 17 | */ 18 | 19 | public class ChunkAssertion { 20 | 21 | private final List chunkedParts; 22 | 23 | public ChunkAssertion(List chunks) { 24 | this.chunkedParts = chunks; 25 | } 26 | 27 | public ChunkAssertion hasLocationChunk(String loc) { 28 | assertChunk(loc, LOCATION); 29 | return this; 30 | } 31 | 32 | public ChunkAssertion hasNoLocationChunk() { 33 | assertEmpty(LOCATION); 34 | return this; 35 | } 36 | 37 | public ChunkAssertion hasTimeChunk(String time) { 38 | assertChunk(time, TIME); 39 | return this; 40 | } 41 | 42 | public ChunkAssertion hasNoTimeChunk() { 43 | assertEmpty(TIME); 44 | return this; 45 | } 46 | 47 | public ChunkAssertion hasDateChunk(String date) { 48 | assertChunk(date, DATE); 49 | return this; 50 | } 51 | private ChunkedPart getChunkedPartByType(Label type) { 52 | for (ChunkedPart part : chunkedParts) { 53 | if (part.getLabel().equals(type)) { 54 | return part; 55 | } 56 | } 57 | return null; 58 | } 59 | 60 | private void assertEmpty(Label type) { 61 | assertNull(getChunkedPartByType(type)); 62 | } 63 | 64 | private void assertChunk(String phrase, Label type) { 65 | ChunkedPart chunk = getChunkedPartByType(type); 66 | assertNotNull(chunk); 67 | assertEquals(phrase, chunk.toStringTaggedWords()); 68 | } 69 | 70 | @Override 71 | public String toString() { 72 | return chunkedParts.toString(); 73 | } 74 | 75 | public void print() { 76 | System.out.println(toString()); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /persian-nlp/src/test/java/com/sixthsolution/apex/nlp/persian/test/PersianDTTest.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.persian.test; 2 | 3 | /** 4 | * Created by rozhin on 7/30/2017. 5 | */ 6 | 7 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetector; 8 | 9 | import com.sixthsolution.apex.nlp.persian.PersianTimeDetector; 10 | import org.junit.Test; 11 | 12 | import static com.sixthsolution.apex.nlp.ner.Entity.TIME; 13 | import static com.sixthsolution.apex.nlp.ner.Label.FIXED_TIME; 14 | import static com.sixthsolution.apex.nlp.ner.Label.RANGE_TIME; 15 | import static com.sixthsolution.apex.nlp.ner.Label.RELATIVE_TIME; 16 | import static com.sixthsolution.apex.nlp.test.ChunkDetectorAssertion.assertChunkedPart; 17 | 18 | /** 19 | * @author Saeed Masoumi (s-masoumi@live.com) 20 | */ 21 | 22 | public class PersianDTTest extends PersianDetectorTest { 23 | 24 | @Test 25 | public void test_fixed_time() { 26 | assertChunkedPart("ساعت 10").text("ساعت 10").label(FIXED_TIME).entity(TIME); 27 | assertChunkedPart("ساعت 10 ق.ظ").text("ساعت 10 ق.ظ").label(FIXED_TIME).entity(TIME); 28 | assertChunkedPart("ساعت 10 صبح").text("ساعت 10 صبح").label(FIXED_TIME).entity(TIME); 29 | assertChunkedPart("ساعت 10 قبل_از_ظهر").text("ساعت 10 قبل_از_ظهر").label(FIXED_TIME).entity(TIME); 30 | assertChunkedPart("عصر").text("عصر").label(FIXED_TIME).entity(TIME); 31 | assertChunkedPart("7 ب.ظ").text("7 ب.ظ").label(FIXED_TIME).entity(TIME); 32 | assertChunkedPart("در 23:20").text("در 23 : 20").label(FIXED_TIME).entity(TIME); 33 | assertChunkedPart("در 23:20 ب.ظ").text("در 23 : 20 ب.ظ").label(FIXED_TIME).entity(TIME); 34 | assertChunkedPart("8.20").text("8 . 20").label(FIXED_TIME).entity(TIME); 35 | assertChunkedPart("ساعت چهار").text("ساعت چهار").label(FIXED_TIME).entity(TIME); 36 | } 37 | 38 | @Test 39 | public void test_invalid_fixed_time(){ 40 | assertChunkedPart("7").noDetection(); 41 | assertChunkedPart("12.2.2016").noDetection(); 42 | } 43 | 44 | @Test 45 | public void test_range_time() { 46 | assertChunkedPart("از صبح تا عصر").text("از صبح تا عصر") 47 | .label(RANGE_TIME).entity(TIME); 48 | 49 | } 50 | 51 | 52 | 53 | @Override 54 | protected ChunkDetector provideDetector() { 55 | return new PersianTimeDetector(); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /dfalex/src/test/java/com/nobigsoftware/dfalex/TestBase.java: -------------------------------------------------------------------------------- 1 | package com.nobigsoftware.dfalex; 2 | 3 | import org.junit.Assert; 4 | 5 | import java.io.InputStream; 6 | import java.io.InputStreamReader; 7 | import java.io.PrintWriter; 8 | import java.io.StringWriter; 9 | import java.nio.charset.Charset; 10 | import java.util.ArrayDeque; 11 | import java.util.HashSet; 12 | 13 | public class TestBase { 14 | final PrettyPrinter m_printer = new PrettyPrinter(); 15 | 16 | int _countStates(DfaState... starts) { 17 | ArrayDeque> togo = new ArrayDeque<>(); 18 | HashSet> checkSet = new HashSet<>(); 19 | for (DfaState start : starts) { 20 | if (checkSet.add(start)) { 21 | togo.add(start); 22 | } 23 | } 24 | while (!togo.isEmpty()) { 25 | DfaState scanst = togo.removeFirst(); 26 | scanst.enumerateTransitions((c1, c2, newstate) -> { 27 | if (checkSet.add(newstate)) { 28 | togo.add(newstate); 29 | } 30 | }); 31 | } 32 | return checkSet.size(); 33 | } 34 | 35 | void _checkDfa(DfaState start, String resource, boolean doStdout) throws Exception { 36 | String have; 37 | { 38 | StringWriter w = new StringWriter(); 39 | m_printer.print(new PrintWriter(w), start); 40 | have = w.toString(); 41 | } 42 | if (doStdout) { 43 | System.out.print(have); 44 | System.out.flush(); 45 | } 46 | String want = _readResource(resource); 47 | Assert.assertEquals(want, have); 48 | } 49 | 50 | String _readResource(String resource) throws Exception { 51 | InputStream instream = getClass().getClassLoader().getResourceAsStream(resource); 52 | try { 53 | InputStreamReader inreader = new InputStreamReader(instream, Charset.forName("UTF-8")); 54 | StringBuilder sb = new StringBuilder(); 55 | char[] buf = new char[1024]; 56 | for (; ; ) { 57 | int rlen = inreader.read(buf); 58 | if (rlen <= 0) { 59 | break; 60 | } 61 | sb.append(buf, 0, rlen); 62 | } 63 | return sb.toString(); 64 | } finally { 65 | instream.close(); 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/dfalex/NfaTransition.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.dfalex; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * A transition in a {@link Nfa} 22 | *

23 | * Instances of this class are immutable 24 | */ 25 | public final class NfaTransition implements Serializable 26 | { 27 | private static final long serialVersionUID = 1L; 28 | 29 | /** 30 | * The first character that triggers this transition 31 | */ 32 | public final char m_firstChar; 33 | /** 34 | * The last character that triggers this transition 35 | */ 36 | public final char m_lastChar; 37 | /** 38 | * The number of the target state of this transition 39 | */ 40 | public final int m_stateNum; 41 | 42 | /** 43 | * Create a new immutable NFA Transition. 44 | * 45 | * @param firstChar value for {@link #m_firstChar} 46 | * @param lastChar value for {@link #m_lastChar} 47 | * @param stateNum value for {@link #m_stateNum} 48 | */ 49 | public NfaTransition(char firstChar, char lastChar, int stateNum) 50 | { 51 | super(); 52 | m_firstChar = firstChar; 53 | m_lastChar = lastChar; 54 | m_stateNum = stateNum; 55 | } 56 | 57 | @Override 58 | public boolean equals(Object arg) 59 | { 60 | if (arg instanceof NfaTransition) 61 | { 62 | NfaTransition r = (NfaTransition)arg; 63 | return (r.m_firstChar == m_firstChar && r.m_lastChar==m_lastChar && r.m_stateNum == m_stateNum); 64 | } 65 | return false; 66 | } 67 | 68 | @Override 69 | public int hashCode() 70 | { 71 | int hash = (int)2166136261L; 72 | hash = (hash ^ (int)m_firstChar)*16777619; 73 | hash = (hash ^ (int)m_lastChar)*16777619; 74 | hash = (hash ^ (int)m_stateNum)*16777619; 75 | return hash ^ (hash>>16); 76 | } 77 | } -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/model/Recurrence.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 6thSolution 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.sixthsolution.apex.model; 18 | 19 | import org.threeten.bp.LocalDateTime; 20 | 21 | import java.util.List; 22 | 23 | /** 24 | * Represents a recurring event. 25 | * 26 | * @author Saeed Masoumi (saeed@6thsolution.com) 27 | * @author Rozhin Bayati 28 | */ 29 | public class Recurrence { 30 | 31 | private Frequency frequency = Frequency.DAILY; 32 | 33 | /** 34 | * Specifies how often the event should be repeated. 35 | */ 36 | private int interval = 1; 37 | 38 | /** 39 | * The date or date-time until which the event should be repeated. 40 | */ 41 | private LocalDateTime until = null; 42 | 43 | private boolean forever = false; 44 | /** 45 | * Days of the week on which the event should be repeated 46 | */ 47 | private List byDays; 48 | 49 | public Recurrence(Frequency frequency, int interval, LocalDateTime until, boolean forever, 50 | List byDays) { 51 | this.frequency = frequency; 52 | this.interval = interval; 53 | this.until = until; 54 | this.forever = forever; 55 | this.byDays = byDays; 56 | } 57 | 58 | public Frequency frequency() { 59 | return frequency; 60 | } 61 | 62 | public int interval() { 63 | return interval; 64 | } 65 | 66 | //TODO @nullable 67 | public LocalDateTime until() { 68 | return until; 69 | } 70 | 71 | public boolean isForever() { 72 | return forever; 73 | } 74 | 75 | public List byDays() { 76 | return byDays; 77 | } 78 | 79 | @Override 80 | public String toString() { 81 | return "Recurrence{" + 82 | "frequency=" + frequency + 83 | ", interval=" + interval + 84 | ", until=" + until + 85 | ", forever=" + forever + 86 | ", byDays=" + byDays + 87 | '}'; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/dfalex/IntRangeClosureQueue.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.dfalex; 17 | 18 | /** 19 | * Closure queue containing integers in a limited range. 20 | */ 21 | class IntRangeClosureQueue 22 | { 23 | final int[] m_bitmask; 24 | final int[] m_queue; 25 | int m_readpos, m_writepos; 26 | 27 | /** 28 | * Create a new IntRangeClosureQueue. 29 | *

30 | * The queue can contain integer in [0,range) 31 | * 32 | * @param range 33 | */ 34 | public IntRangeClosureQueue(int range) 35 | { 36 | m_bitmask = new int[(range+31)>>5]; 37 | m_queue = new int[m_bitmask.length*32 + 1]; 38 | } 39 | 40 | /** 41 | * Add an integer to the tail of the queue if it's not already present 42 | * 43 | * @param val integer to add 44 | * @return true if the integer was added to the queue, or false 45 | * if it was not added, because it was already in the queue 46 | */ 47 | public boolean add(int val) 48 | { 49 | int i = val>>5; 50 | int bit = 1<<(val&31); 51 | int oldbits = m_bitmask[i]; 52 | if ((oldbits & bit)==0) 53 | { 54 | m_bitmask[i] = oldbits|bit; 55 | m_queue[m_writepos] = val; 56 | if (++m_writepos >= m_queue.length) 57 | { 58 | m_writepos = 0; 59 | } 60 | assert(m_writepos != m_readpos); 61 | return true; 62 | } 63 | else 64 | { 65 | return false; 66 | } 67 | } 68 | 69 | /** 70 | * Remove an integer from the head of the queue, if it's non-empty 71 | * 72 | * @return the integer removed from the head of the queue, or -1 if the 73 | * queue was empty. 74 | */ 75 | public int poll() 76 | { 77 | if (m_readpos == m_writepos) 78 | { 79 | return -1; 80 | } 81 | int val = m_queue[m_readpos]; 82 | if (++m_readpos >= m_queue.length) 83 | { 84 | m_readpos = 0; 85 | } 86 | int i = val>>5; 87 | int bit = 1<<(val&31); 88 | assert((m_bitmask[i]&bit) != 0); 89 | m_bitmask[i]&=~bit; 90 | assert((m_bitmask[i]&bit) == 0); 91 | return val; 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/dfalex/Matchable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.dfalex; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * Base interface for the types of patterns that can be used with {@link DfaBuilder} to specify a set of strings to match. 22 | *

23 | * The primary implementation classes are {@link Pattern} and {@link CharRange}. 24 | */ 25 | public interface Matchable extends Serializable 26 | { 27 | /** 28 | * Add states to an NFA to match the desired pattern 29 | *

30 | * New states will be created in the NFA to match the pattern and transition to 31 | * the given targetState. 32 | *

33 | * NO NEW TRANSITIONS will be added to the target state or any other pre-existing state 34 | * 35 | * @param nfa nfa to add to 36 | * @param targetState target state after the pattern is matched 37 | * @return a state that transitions to targetState after matching the pattern, and 38 | * only after matching the pattern. This may be targetState if the pattern is an 39 | * empty string. 40 | */ 41 | public int addToNFA(Nfa nfa, int targetState); 42 | 43 | /** 44 | * @return true if this pattern matches the empty string 45 | */ 46 | public boolean matchesEmpty(); 47 | 48 | /** 49 | * @return true if this pattern matches any non-empty strings 50 | */ 51 | public boolean matchesNonEmpty(); 52 | 53 | /** 54 | * @return true if this pattern matches can match anything at all 55 | */ 56 | public boolean matchesSomething(); 57 | 58 | /** 59 | * @return true if this pattern matches an infinite number of strings 60 | */ 61 | public boolean isUnbounded(); 62 | 63 | /** 64 | * Get the reverse of this pattern 65 | *

66 | * The reverse of a pattern matches the reverse of all the strings that this pattern matches 67 | * 68 | * @return the reverse of this pattern 69 | */ 70 | public Matchable getReversed(); 71 | } 72 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | set DIRNAME=%~dp0 12 | if "%DIRNAME%" == "" set DIRNAME=. 13 | set APP_BASE_NAME=%~n0 14 | set APP_HOME=%DIRNAME% 15 | 16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 17 | set DEFAULT_JVM_OPTS= 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windows variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | if "%@eval[2+2]" == "4" goto 4NT_args 53 | 54 | :win9xME_args 55 | @rem Slurp the command line arguments. 56 | set CMD_LINE_ARGS= 57 | set _SKIP=2 58 | 59 | :win9xME_args_slurp 60 | if "x%~1" == "x" goto execute 61 | 62 | set CMD_LINE_ARGS=%* 63 | goto execute 64 | 65 | :4NT_args 66 | @rem Get arguments from the 4NT Shell from JP Software 67 | set CMD_LINE_ARGS=%$ 68 | 69 | :execute 70 | @rem Setup the command line 71 | 72 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 73 | 74 | @rem Execute Gradle 75 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 76 | 77 | :end 78 | @rem End local scope for the variables with windows NT shell 79 | if "%ERRORLEVEL%"=="0" goto mainEnd 80 | 81 | :fail 82 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 83 | rem the _cmd.exe /c_ return code! 84 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 85 | exit /b 1 86 | 87 | :mainEnd 88 | if "%OS%"=="Windows_NT" endlocal 89 | 90 | :omega 91 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/dfalex/DfaState.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.dfalex; 17 | 18 | /** 19 | * A state in a char-matching deterministic finite automaton (that's the google phrase) or DFA 20 | * 21 | * @param MATCHRESULT the type of result produced by matching patterns with this DFA 22 | */ 23 | public abstract class DfaState 24 | { 25 | /** 26 | * Process a character and get the next state 27 | * 28 | * @param c input character 29 | * @return The DfaState that c transitions to from this one, or null if there is no such state 30 | */ 31 | public abstract DfaState getNextState(char c); 32 | 33 | /** 34 | * Get the result that has been matched if we've transitioned into this state 35 | * 36 | * @return If the sequence of characters that led to this state match a pattern in the 37 | * language being processed, the match result for that pattern is returned. Otherwise 38 | * null. 39 | */ 40 | public abstract MATCHRESULT getMatch(); 41 | 42 | 43 | /** 44 | * Get the state number. All states reachable from the output of a single call to 45 | * a {@link DfaBuilder} build method will be compactly numbered starting at 0. 46 | *

47 | * These state numbers can be used to maintain auxiliary information about a DFA. 48 | *

49 | * See {@link DfaAuxiliaryInformation} 50 | * 51 | * @return this state's state number 52 | */ 53 | public abstract int getStateNumber(); 54 | 55 | /** 56 | * Enumerate all the transitions out of this state 57 | * 58 | * @param consumer each DFA transition will be sent here 59 | */ 60 | public abstract void enumerateTransitions(DfaTransitionConsumer consumer); 61 | 62 | /** 63 | * Get an {@link Iterable} of all the successor states of this state. 64 | *

65 | * Note that the same successor state may appear more than once in the interation 66 | * 67 | * @return an iterable of successor states. 68 | */ 69 | public abstract Iterable> getSuccessorStates(); 70 | } 71 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/dfalex/DfaStatePlaceholder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.dfalex; 17 | 18 | import java.util.List; 19 | 20 | /** 21 | * Base class for serializable placeholders that construct final-form DFA states and 22 | * temporarily assume their places in the DFA. 23 | *

24 | * In serialized placeholders, target states are identified by their state number in a 25 | * SerializableDfa. 26 | */ 27 | abstract class DfaStatePlaceholder extends DfaStateImpl implements java.io.Serializable 28 | { 29 | private static final long serialVersionUID = 1L; 30 | 31 | protected transient DfaStateImpl m_delegate = null; 32 | 33 | /** 34 | * Create a new DfaStatePlaceholder 35 | *

36 | * The initially constructed state will accept no strings 37 | */ 38 | public DfaStatePlaceholder() 39 | { 40 | } 41 | 42 | /** 43 | * Creates the final form delegate state, implementing all the required 44 | * transitions and matches. 45 | *

46 | * This is called on all DFA state placeholders after they are constructed 47 | */ 48 | abstract void createDelegate(int statenum, List> allStates); 49 | 50 | @Override 51 | final void fixPlaceholderReferences() 52 | { 53 | m_delegate.fixPlaceholderReferences(); 54 | } 55 | 56 | @Override 57 | final DfaStateImpl resolvePlaceholder() 58 | { 59 | return m_delegate.resolvePlaceholder(); 60 | } 61 | 62 | @Override 63 | final public DfaState getNextState(char c) 64 | { 65 | return m_delegate.getNextState(c); 66 | } 67 | @Override 68 | final public MATCH getMatch() 69 | { 70 | return m_delegate.getMatch(); 71 | } 72 | @Override 73 | final public void enumerateTransitions(DfaTransitionConsumer consumer) 74 | { 75 | m_delegate.enumerateTransitions(consumer); 76 | } 77 | 78 | @Override 79 | final public int getStateNumber() 80 | { 81 | return m_delegate.getStateNumber(); 82 | } 83 | 84 | @Override 85 | public Iterable> getSuccessorStates() 86 | { 87 | return m_delegate.getSuccessorStates(); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /persian-nlp/src/main/java/com/sixthsolution/apex/nlp/persian/PersianTimeDetector.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.persian; 2 | 3 | /** 4 | * Created by rozhin on 7/26/2017. 5 | */ 6 | 7 | import com.nobigsoftware.dfalex.Pattern; 8 | import com.sixthsolution.apex.nlp.ner.Entity; 9 | import com.sixthsolution.apex.nlp.ner.Label; 10 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetectionFilter; 11 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetector; 12 | import com.sixthsolution.apex.nlp.util.Pair; 13 | 14 | import java.util.Arrays; 15 | import java.util.List; 16 | 17 | import static com.nobigsoftware.dfalex.Pattern.anyOf; 18 | import static com.nobigsoftware.dfalex.Pattern.match; 19 | import static com.nobigsoftware.dfalex.Pattern.maybe; 20 | import static com.sixthsolution.apex.nlp.dict.Tag.*; 21 | import static com.sixthsolution.apex.nlp.ner.Entity.TIME; 22 | import static com.sixthsolution.apex.nlp.ner.Label.FIXED_TIME; 23 | import static com.sixthsolution.apex.nlp.ner.Label.RANGE_TIME; 24 | import static com.sixthsolution.apex.nlp.ner.Label.RELATIVE_TIME; 25 | 26 | import com.sixthsolution.apex.nlp.persian.filter.TimeDetectionFilter; 27 | 28 | 29 | public class PersianTimeDetector extends ChunkDetector { 30 | 31 | /** 32 | * @return returns شب،ظهر،عصر... 33 | */ 34 | private static Pattern time_relative() { 35 | return match(TIME_RELATIVE.toString()); 36 | } 37 | 38 | /** 39 | * @return returns hh:mm ق.ظ/ب.ظ 40 | */ 41 | private static Pattern time_hour_min() { 42 | return match(NUMBER.toString()).thenMaybe( 43 | match(TIME_SEPARATOR.toString()).then(NUMBER.toString())) 44 | .thenMaybe(anyOf(TIME_MERIDIEM.toString(),TIME_RELATIVE.toString())); 45 | } 46 | 47 | /** 48 | * @return like time_hour_min but starts with در/ساعت 49 | */ 50 | private static Pattern fixed_time() { 51 | return maybe(TIME_PREFIX.toString()).then( 52 | anyOf(time_relative(), time_hour_min())) 53 | .thenMaybe( 54 | TIME_MERIDIEM.toString()); 55 | } 56 | 57 | /** 58 | * @return از (time) تا (time) 59 | */ 60 | private static Pattern range_time() { 61 | return match(match(maybe(TIME_START_RANGE.toString()).then(maybe(fixed_time())).then(TIME_RELATIVE_PREFIX.toString()).then(fixed_time()))); 62 | } 63 | 64 | 65 | 66 | @Override 67 | protected List> getPatterns() { 68 | return Arrays.asList( 69 | newPattern(FIXED_TIME, fixed_time()), 70 | newPattern(RANGE_TIME, range_time()) 71 | ); 72 | } 73 | 74 | @Override 75 | protected List getFilters() { 76 | return Arrays.asList(new TimeDetectionFilter()); 77 | } 78 | 79 | @Override 80 | protected Entity getEntity() { 81 | return TIME; 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/dfalex/IntListKey.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.dfalex; 17 | 18 | import java.util.Arrays; 19 | import backport.java.util.function.ObjIntConsumer; 20 | 21 | /** 22 | * A simple list of integers that can be used as a hash map key and cloned 23 | */ 24 | class IntListKey implements Cloneable 25 | { 26 | private static int[] NO_INTS = new int[0]; 27 | 28 | private int[] m_buf = NO_INTS; 29 | private int m_size = 0; 30 | private int m_hash = 0; 31 | private boolean m_hashValid = false; 32 | 33 | public IntListKey() 34 | {} 35 | public IntListKey(IntListKey src) 36 | { 37 | if (src != null && src.m_size > 0) 38 | { 39 | m_buf = Arrays.copyOf(src.m_buf, src.m_size); 40 | m_size = src.m_size; 41 | if (src.m_hashValid) 42 | { 43 | m_hash = src.m_hash; 44 | m_hashValid = true; 45 | } 46 | } 47 | } 48 | 49 | public void clear() 50 | { 51 | m_size = 0; 52 | m_hashValid = false; 53 | } 54 | 55 | public void add(int v) 56 | { 57 | if (m_size >= m_buf.length) 58 | { 59 | m_buf = Arrays.copyOf(m_buf, m_size + (m_size>>1) + 16); 60 | } 61 | m_buf[m_size++] = v; 62 | m_hashValid = false; 63 | } 64 | 65 | public void forData(ObjIntConsumer target) 66 | { 67 | target.accept(m_buf, m_size); 68 | } 69 | 70 | 71 | @Override 72 | protected IntListKey clone() 73 | { 74 | return new IntListKey(this); 75 | } 76 | 77 | 78 | @Override 79 | public boolean equals(Object obj) 80 | { 81 | if (!(obj instanceof IntListKey)) 82 | { 83 | return false; 84 | } 85 | IntListKey r = (IntListKey)obj; 86 | if (m_size != r.m_size || hashCode() != r.hashCode()) 87 | { 88 | return false; 89 | } 90 | for (int i = m_size-1; i>=0; --i) 91 | { 92 | if (m_buf[i] != r.m_buf[i]) 93 | { 94 | return false; 95 | } 96 | } 97 | return true; 98 | } 99 | 100 | 101 | @Override 102 | public int hashCode() 103 | { 104 | if (!m_hashValid) 105 | { 106 | int h = 0; 107 | for (int i=0;i>>17); 113 | h ^= (h>>>11); 114 | h ^= (h>>>5); 115 | m_hash = h; 116 | m_hashValid = true; 117 | } 118 | return m_hash; 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/dfalex/DfaAmbiguityException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.dfalex; 17 | 18 | import java.util.ArrayList; 19 | import java.util.List; 20 | 21 | /** 22 | * Exception thrown by default when patterns for multiple results match the same string in a DFA, 23 | * and no way has been provided to combine result 24 | */ 25 | public class DfaAmbiguityException extends RuntimeException 26 | { 27 | private static final long serialVersionUID = 1L; 28 | private final List m_results; 29 | 30 | /** 31 | * Create a new AmbiguityException. 32 | * @param results the multiple results for patters that match the same string 33 | */ 34 | public DfaAmbiguityException(Iterable results) 35 | { 36 | this(new _Initializer(null, results)); 37 | } 38 | 39 | /** 40 | * Create a new AmbiguityException. 41 | * 42 | * @param message The exception detail message 43 | * @param results the multiple results for patters that match the same string 44 | */ 45 | public DfaAmbiguityException(String message, Iterable results) 46 | { 47 | this(new _Initializer(message, results)); 48 | } 49 | 50 | private DfaAmbiguityException(_Initializer inivals) 51 | { 52 | super(inivals.m_message); 53 | m_results = inivals.m_results; 54 | } 55 | 56 | /** 57 | * Get the set of results that can match the same string 58 | * 59 | * @return set of conflicting results 60 | */ 61 | public List getResults() 62 | { 63 | return m_results; 64 | } 65 | 66 | 67 | private static class _Initializer 68 | { 69 | String m_message; 70 | List m_results; 71 | 72 | _Initializer(String message, Iterable results) 73 | { 74 | m_results = new ArrayList(); 75 | for (Object obj : results) 76 | { 77 | m_results.add(obj); 78 | } 79 | if (message == null) 80 | { 81 | StringBuilder sb = new StringBuilder(); 82 | sb.append("The same string can match multiple patterns for: "); 83 | String sep=""; 84 | for (Object result : results) 85 | { 86 | sb.append(sep).append(result.toString()); 87 | sep=", "; 88 | } 89 | message = sb.toString(); 90 | } 91 | m_message = message; 92 | } 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/util/SHAOutputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.util; 17 | 18 | import java.io.IOException; 19 | import java.io.OutputStream; 20 | import java.security.DigestOutputStream; 21 | import java.security.MessageDigest; 22 | import java.security.NoSuchAlgorithmException; 23 | 24 | /** 25 | * An output stream that computes the SHA hash of whatever you write to it 26 | */ 27 | public class SHAOutputStream extends DigestOutputStream 28 | { 29 | private static NullOutputStream NULL_OUTPUT_STREAM = new NullOutputStream(); 30 | private static char[] DIGITS_36 = "0123456789abcdefghijklmnopqrstuvwxyz".toCharArray(); 31 | 32 | public SHAOutputStream() 33 | { 34 | super(NULL_OUTPUT_STREAM, _initDigest()); 35 | } 36 | 37 | /** 38 | * @return a base-32 version of the digest, consisting of 32 letters and digits 39 | */ 40 | public String getBase32Digest() 41 | { 42 | StringBuilder sb = new StringBuilder(); 43 | int bits = 0, nbits = 0; 44 | for (byte b : getMessageDigest().digest()) 45 | { 46 | bits |= (((int)b)&255)<= 5) 49 | { 50 | sb.append(DIGITS_36[bits&31]); 51 | bits>>>=5; 52 | nbits-=5; 53 | } 54 | } 55 | return sb.toString(); 56 | } 57 | 58 | private static class NullOutputStream extends OutputStream 59 | { 60 | @Override 61 | public void close() throws IOException 62 | { 63 | } 64 | @Override 65 | public void flush() throws IOException 66 | { 67 | } 68 | @Override 69 | public void write(byte[] arg0, int arg1, int arg2) throws IOException 70 | { 71 | } 72 | @Override 73 | public void write(byte[] arg0) throws IOException 74 | { 75 | } 76 | @Override 77 | public void write(int arg0) throws IOException 78 | { 79 | } 80 | } 81 | private static MessageDigest _initDigest() 82 | { 83 | try 84 | { 85 | return MessageDigest.getInstance("SHA-1"); 86 | } 87 | catch(NoSuchAlgorithmException e) 88 | { 89 | throw new RuntimeException("JRE is broken - it's supposed to support SHA-1, but does not"); 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /persian-nlp/src/test/java/com/sixthsolution/apex/nlp/persian/test/PersianTokenizationTest.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.persian.test; 2 | 3 | import com.sixthsolution.apex.nlp.persian.PersianTokenizer; 4 | import com.sixthsolution.apex.nlp.test.TokenizerAssertion; 5 | import org.junit.Before; 6 | import org.junit.Test; 7 | 8 | import static com.sixthsolution.apex.nlp.test.TokenizerAssertion.assertTokens; 9 | 10 | /** 11 | * @author Rozhin Bayati 12 | */ 13 | 14 | public class PersianTokenizationTest { 15 | 16 | @Before 17 | public void before_test(){ 18 | PersianTokenizer persianTokenizer=new PersianTokenizer(); 19 | TokenizerAssertion.init(persianTokenizer); 20 | } 21 | 22 | @Test 23 | public void test_sentences() { 24 | assertTokens( 25 | "22 بهمن روز پیروزی انقلاب اسلامی است", 26 | "22","بهمن","روز","پیروزی","انقلاب","اسلامی","است"); 27 | 28 | assertTokens( 29 | "فروردین 1-13 مسافرت به شمال", 30 | "فروردین","1","-","13","مسافرت","به","شمال" 31 | ); 32 | assertTokens( 33 | "روز های یکشنبه و سه شنبه قرار ملاقات با رئیس از ساعت 2 تا 5 بعد از ظهر ", 34 | "روز_های","یکشنبه","و","سه_شنبه","قرار","ملاقات","با","رئیس","از","ساعت","2","تا","5","بعد_از_ظهر" 35 | ); 36 | assertTokens( 37 | "6امین روز هفته ساعت 4 چندشنبه با سینا", 38 | "6","امین","روز","هفته","ساعت","4","چندشنبه","با","سینا" 39 | ); 40 | assertTokens( 41 | "5 روز در هفته در ساعت 9 تو میانجاده تاکسی بگیرم", 42 | "5","روز","در","هفته","در","ساعت","9","تو","میانجاده","تاکسی","بگیرم" 43 | ); 44 | assertTokens( 45 | "سال 1399 هجری شمسی", 46 | "سال","1399","هجری","شمسی" 47 | ); 48 | assertTokens( 49 | "روز دویست و دوم سال", 50 | "روز","دویست","و","دوم","سال" 51 | ); 52 | assertTokens( 53 | "ساعت بیست و دو و بیست دقیقه", 54 | "ساعت","بیست","و","دو","و","بیست","دقیقه" 55 | ); 56 | assertTokens( 57 | "هر 5 روز تا عید دوره ی درسی", 58 | "هر","5","روز","تا","عید","دوره","ی","درسی" 59 | ); 60 | assertTokens( 61 | "ماه پنجم سال تا سال بعد", 62 | "ماه","پنجم","سال","تا","سال","بعد" 63 | ); 64 | assertTokens( 65 | "4 روز بعد از عید", 66 | "4","روز","بعد","از","عید" 67 | ); 68 | assertTokens( 69 | "شنبه قبل رمضان", 70 | "شنبه","قبل","رمضان" 71 | ); 72 | assertTokens( 73 | "11 ماه بعد از اولین روز سال", 74 | "11","ماه","بعد","از","اولین","روز","سال" 75 | ); 76 | assertTokens( 77 | "دهم هر ماه خوشالم", 78 | "دهم","هر","ماه","خوشالم" 79 | ); 80 | assertTokens( 81 | "2 روز مانده به عید", 82 | "2","روز","مانده","به","عید" 83 | ); 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /dfalex/src/test/java/com/nobigsoftware/dfalex/BuilderCacheTest.java: -------------------------------------------------------------------------------- 1 | package com.nobigsoftware.dfalex; 2 | 3 | import java.io.ByteArrayInputStream; 4 | import java.io.ByteArrayOutputStream; 5 | import java.io.ObjectInputStream; 6 | import java.io.ObjectOutputStream; 7 | import java.io.Serializable; 8 | import java.util.EnumSet; 9 | import java.util.HashMap; 10 | import java.util.Map; 11 | 12 | import org.junit.Assert; 13 | import org.junit.Test; 14 | 15 | import com.nobigsoftware.util.BuilderCache; 16 | 17 | public class BuilderCacheTest extends TestBase 18 | { 19 | @Test 20 | public void test() throws Exception 21 | { 22 | InMemoryBuilderCache cache = new InMemoryBuilderCache(); 23 | 24 | DfaBuilder builder = new DfaBuilder<>(cache); 25 | _build(builder); 26 | Assert.assertEquals(1, cache.m_cache.size()); 27 | Assert.assertEquals(0, cache.m_hits); 28 | 29 | builder.clear(); 30 | _build(builder); 31 | Assert.assertEquals(2, cache.m_cache.size()); 32 | Assert.assertEquals(0, cache.m_hits); 33 | 34 | builder = new DfaBuilder<>(cache); 35 | _build(builder); 36 | Assert.assertEquals(2, cache.m_cache.size()); 37 | Assert.assertEquals(1, cache.m_hits); 38 | } 39 | 40 | private void _build(DfaBuilder builder) throws Exception 41 | { 42 | for (JavaToken tok : JavaToken.values()) 43 | { 44 | builder.addPattern(tok.m_pattern, tok); 45 | } 46 | EnumSet lang = EnumSet.allOf(JavaToken.class); 47 | DfaState start = builder.build(lang, null); 48 | _checkDfa(start, "JavaTest.out.txt", false); 49 | } 50 | 51 | private static class InMemoryBuilderCache implements BuilderCache 52 | { 53 | Map m_cache = new HashMap<>(); 54 | int m_hits = 0; 55 | 56 | @Override 57 | public Serializable getCachedItem(String key) 58 | { 59 | Serializable ret=null; 60 | byte[] bytes = m_cache.get(key); 61 | if (bytes != null) 62 | { 63 | try 64 | { 65 | ObjectInputStream is = new ObjectInputStream(new ByteArrayInputStream(bytes)); 66 | ret = (Serializable)is.readObject(); 67 | } 68 | catch(Exception e) 69 | {} 70 | } 71 | if (ret != null) 72 | { 73 | ++m_hits; 74 | } 75 | return ret; 76 | } 77 | 78 | @Override 79 | public void maybeCacheItem(String key, Serializable item) 80 | { 81 | try 82 | { 83 | ByteArrayOutputStream bos = new ByteArrayOutputStream(); 84 | ObjectOutputStream oos = new ObjectOutputStream(bos); 85 | oos.writeObject(item); 86 | oos.close(); 87 | m_cache.put(key, bos.toByteArray()); 88 | } 89 | catch(Exception e) 90 | {} 91 | } 92 | 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/dfalex/StringMatchIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.dfalex; 17 | 18 | import java.util.Iterator; 19 | 20 | /** 21 | * An {@link Iterator} that provides access to the pattern matches in a string 22 | *

23 | * {@link StringSearcher#searchString(String)} produces these. 24 | */ 25 | public interface StringMatchIterator extends Iterator 26 | { 27 | /** 28 | * Get the position of the start of the last match in the string. 29 | * 30 | * @return the index of the first character in the last match 31 | * @throws IllegalStateException unless called after a valid call to {{@link #next()} 32 | */ 33 | int matchStartPosition(); 34 | 35 | /** 36 | * Get the position of the end of the last match in the string. 37 | * 38 | * @return the index after the last character in the last match 39 | * @throws IllegalStateException unless called after a valid call to {{@link #next()} 40 | */ 41 | int matchEndPosition(); 42 | 43 | /** 44 | * Get the string value of the last match 45 | *

46 | * Note that a new string is allocated by the first call to this method for each match. 47 | * 48 | * @return the source portion of the source string corresponding to the last match 49 | * @throws IllegalStateException unless called after a valid call to {{@link #next()} 50 | */ 51 | String matchValue(); 52 | 53 | /** 54 | * Get the result of the last match. 55 | * @return the MATCHRESULT returned by the last call to {@link #next()} 56 | * @throws IllegalStateException unless called after a valid call to {{@link #next()} 57 | */ 58 | MATCHRESULT matchResult(); 59 | 60 | /** 61 | * rewind (or jump forward) to a given position in the source string 62 | *

63 | * The next match returned will be the one (if any) that starts at a position >= pos 64 | *

65 | * IMPORTANT: If this method returns true, you must call {@link #next()} to get the result 66 | * of the next match. Until then calls to the the match accessor methods will continue to 67 | * return information from the previous call to {@link #next()}. 68 | * 69 | * @param pos new position in the source string to search from 70 | * @return true if there is a match after the given position. The same value will be returned from {{@link #hasNext()} 71 | */ 72 | boolean reposition(int pos); 73 | } 74 | 75 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/model/Event.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 6thSolution 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.sixthsolution.apex.model; 18 | 19 | import org.threeten.bp.LocalDateTime; 20 | 21 | /** 22 | * Represents a single calendar event. 23 | * 24 | * @author Saeed Masoumi (saeed@6thsolution.com) 25 | * @author Rozhin Bayati 26 | */ 27 | public class Event { 28 | 29 | private String title = ""; 30 | private String location = ""; 31 | private LocalDateTime startDateTime = null; 32 | private LocalDateTime endDateTime = null; 33 | private boolean isAllDay = false; 34 | private Recurrence recurrence = null; 35 | 36 | public Event() { 37 | 38 | } 39 | 40 | public Event(String title, String location, LocalDateTime startDateTime, 41 | LocalDateTime endDateTime, 42 | boolean isAllDay, Recurrence recurrence) { 43 | this.title = title; 44 | this.location = location; 45 | this.startDateTime = startDateTime; 46 | this.endDateTime = endDateTime; 47 | this.isAllDay = isAllDay; 48 | this.recurrence = recurrence; 49 | } 50 | 51 | 52 | public void setTitle(String title) { 53 | this.title = title; 54 | } 55 | 56 | public void setStartDateTime(LocalDateTime startDateTime) { 57 | this.startDateTime = startDateTime; 58 | } 59 | 60 | public void setEndDateTime(LocalDateTime endDateTime) { 61 | this.endDateTime = endDateTime; 62 | } 63 | 64 | public String title() { 65 | return title; 66 | } 67 | 68 | public String location() { 69 | return location; 70 | } 71 | 72 | public LocalDateTime start() { 73 | return startDateTime; 74 | } 75 | 76 | public LocalDateTime end() { 77 | return endDateTime; 78 | } 79 | 80 | public boolean isAllDay() { 81 | return isAllDay; 82 | } 83 | 84 | public void setAllDay(boolean allDay) { 85 | isAllDay = allDay; 86 | } 87 | 88 | public boolean isRecurrence() { 89 | return recurrence != null; 90 | } 91 | 92 | public void setRecurrence(Recurrence recurrence) { 93 | this.recurrence = recurrence; 94 | } 95 | 96 | public Recurrence recurrence() { 97 | return recurrence; 98 | } 99 | 100 | 101 | @Override 102 | public String toString() { 103 | return "Event{" + 104 | "title='" + title + '\'' + 105 | ", startDateTime=" + startDateTime + 106 | ", endDateTime=" + endDateTime + 107 | ", isAllDay=" + isAllDay + 108 | ", recurrence=" + recurrence + 109 | '}'; 110 | } 111 | 112 | } 113 | 114 | -------------------------------------------------------------------------------- /dfalex/src/test/java/com/nobigsoftware/dfalex/RegexTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.dfalex; 17 | 18 | import java.io.PrintWriter; 19 | import java.io.StringWriter; 20 | 21 | import org.junit.Assert; 22 | import org.junit.Test; 23 | 24 | public class RegexTest extends TestBase 25 | { 26 | @Test 27 | public void testRegexParser() throws Exception 28 | { 29 | Matchable p1,p2; 30 | 31 | p1 = Pattern.anyOf("A","B"); 32 | p2 = Pattern.regex("A|B"); 33 | _check(p1,p2); 34 | 35 | p1 = Pattern.match("A").then(Pattern.anyOf("C","D")); 36 | p2 = Pattern.regex("A(C|D)"); 37 | _check(p1,p2); 38 | 39 | p1 = Pattern.match("A").then(Pattern.anyOf("C","D")).then("B"); 40 | p2 = Pattern.regex("A(C|D)B"); 41 | _check(p1,p2); 42 | 43 | p1 = Pattern.match("A").thenMaybe(Pattern.anyOf("C","D")).then("B"); 44 | p2 = Pattern.regex("A(C|D)?B"); 45 | _check(p1,p2); 46 | 47 | p1 = Pattern.match("A").thenRepeat(Pattern.anyOf("C","D")).then("B"); 48 | p2 = Pattern.regex("A(C|D)+B"); 49 | _check(p1,p2); 50 | 51 | p1 = Pattern.match("A").thenMaybeRepeat(Pattern.anyOf("C","D")).then("B"); 52 | p2 = Pattern.regex("A(C|D)*B"); 53 | _check(p1,p2); 54 | 55 | p1 = Pattern.match("A").thenMaybeRepeat(Pattern.anyOf("C","D")).then("B"); 56 | p2 = Pattern.regex("A(C|D)+?B"); 57 | _check(p1,p2); 58 | 59 | p1 = Pattern.anyOf(Pattern.match("A").thenMaybeRepeat("B"), Pattern.match("C")); 60 | p2 = Pattern.regex("AB*|C"); 61 | _check(p1,p2); 62 | 63 | p1 = Pattern.regex("\\s\\S\\d\\D\\w\\W"); 64 | p2 = Pattern.regex("[ \\t\\n\\x0B\\f\\r][^ \\t\\n\\x0B\\f\\r][0-9][^0-9][a-zA-Z_0-9][^a-zA-Z_0-9]"); 65 | _check(p1,p2); 66 | 67 | p1 = Pattern.regex("[^\\d][\\d]"); 68 | p2 = Pattern.regex("[\\D][^\\D]"); 69 | _check(p1,p2); 70 | 71 | p1 = Pattern.regex("[Cc][Aa][Tt][^0-9a-fA-F][^0-9a-f@-F]"); 72 | p2 = Pattern.regexI("cAt[^\\da-f][^\\d@-F]"); 73 | _check(p1,p2); 74 | } 75 | 76 | private void _check(Matchable pWant, Matchable pHave) throws Exception 77 | { 78 | String want = _pToString(pWant); 79 | String have = _pToString(pHave); 80 | if (!want.equals(have)) 81 | { 82 | Assert.assertEquals(want, have); 83 | } 84 | } 85 | 86 | private String _pToString(Matchable p) 87 | { 88 | DfaBuilder builder = new DfaBuilder<>(); 89 | builder.addPattern(p, true); 90 | DfaState dfa = builder.build(null); 91 | StringWriter w = new StringWriter(); 92 | m_printer.print(new PrintWriter(w), dfa); 93 | return w.toString(); 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /dfalex/src/test/java/com/nobigsoftware/dfalex/RegexSpeedTest.java: -------------------------------------------------------------------------------- 1 | package com.nobigsoftware.dfalex; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | 6 | import backport.java.util.function.Function; 7 | 8 | public class RegexSpeedTest extends TestBase 9 | { 10 | private static final int SPINUP=1000; 11 | @Test 12 | public void notFoundReplaceTest() throws Exception 13 | { 14 | String patString = ("01235|/|456*1|abc|_|\\..*|013|0?1?2?3?4?57"); 15 | String src; 16 | { 17 | StringBuilder sb = new StringBuilder(); 18 | for (int i=0;i<10000;i++) 19 | { 20 | sb.append("0123456789"); 21 | } 22 | src = sb.toString(); 23 | } 24 | 25 | int javaCount = timeJava(src, patString); 26 | int srCount = timeSearchAndReplaceBuilder(src, patString); 27 | int matcherCount = timeMatcher(src, patString); 28 | System.out.println("Search+Replace per second in 100K string, patterns not found:"); 29 | System.out.format("Java Regex: %d SearchAndReplaceBuilder: %d StringMatcher: %d\n", javaCount, srCount, matcherCount); 30 | } 31 | 32 | int timeJava(String src, String patString) 33 | { 34 | int count=0; 35 | java.util.regex.Pattern javapat = java.util.regex.Pattern.compile(patString); 36 | long start = System.currentTimeMillis(); 37 | String str = src; 38 | for (long t = System.currentTimeMillis()-start;t < SPINUP+1000; t=System.currentTimeMillis()-start) 39 | { 40 | str = javapat.matcher(str).replaceAll(""); 41 | if (t>=SPINUP) 42 | { 43 | ++count; 44 | } 45 | } 46 | Assert.assertEquals(src, str); 47 | return count; 48 | } 49 | int timeSearchAndReplaceBuilder(String src, String patString) 50 | { 51 | Function replacer; 52 | { 53 | SearchAndReplaceBuilder builder=new SearchAndReplaceBuilder(); 54 | builder.addReplacement(Pattern.regex(patString), (dest, srcStr, s, e) -> 0); 55 | replacer = builder.buildStringReplacer(); 56 | } 57 | 58 | int count=0; 59 | long start = System.currentTimeMillis(); 60 | String str = src; 61 | for (long t = System.currentTimeMillis()-start;t < SPINUP+1000; t=System.currentTimeMillis()-start) 62 | { 63 | str = replacer.apply(str); 64 | if (t>=SPINUP) 65 | { 66 | ++count; 67 | } 68 | } 69 | Assert.assertEquals(src, str); 70 | return count; 71 | } 72 | 73 | int timeMatcher(String src, String patString) 74 | { 75 | DfaState startState; 76 | { 77 | DfaBuilder builder=new DfaBuilder<>(); 78 | builder.addPattern(Pattern.regex(patString), true); 79 | startState = builder.build(null); 80 | } 81 | 82 | int count=0; 83 | long start = System.currentTimeMillis(); 84 | for (long t = System.currentTimeMillis()-start;t < SPINUP+1000; t=System.currentTimeMillis()-start) 85 | { 86 | StringMatcher m = new StringMatcher(src); 87 | if (m.findNext(startState)!=null) 88 | { 89 | throw new RuntimeException("not supposed to find a match"); 90 | } 91 | if (t>=SPINUP) 92 | { 93 | ++count; 94 | } 95 | } 96 | return count; 97 | } 98 | 99 | } 100 | -------------------------------------------------------------------------------- /apex/src/test/java/com/sixthsolution/apex/nlp/test/ChunkDetectorAssertion.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.test; 2 | 3 | import com.sixthsolution.apex.nlp.ner.ChunkedPart; 4 | import com.sixthsolution.apex.nlp.ner.Entity; 5 | import com.sixthsolution.apex.nlp.ner.Label; 6 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetector; 7 | import com.sixthsolution.apex.nlp.tagger.TaggedWord; 8 | import com.sixthsolution.apex.nlp.tagger.Tagger; 9 | import com.sixthsolution.apex.nlp.tokenization.Tokenizer; 10 | 11 | import java.util.Iterator; 12 | 13 | import static junit.framework.TestCase.assertEquals; 14 | import static junit.framework.TestCase.assertNull; 15 | import static junit.framework.TestCase.assertTrue; 16 | 17 | /** 18 | * @author Saeed Masoumi (s-masoumi@live.com) 19 | */ 20 | 21 | public class ChunkDetectorAssertion { 22 | private static ChunkDetectorAssertion instance = null; 23 | private final ChunkDetector detector; 24 | private Tokenizer tokenizer; 25 | private Tagger tagger; 26 | 27 | private ChunkDetectorAssertion(Tokenizer tokenizer, Tagger tagger, ChunkDetector detector) { 28 | this.tokenizer = tokenizer; 29 | this.tagger = tagger; 30 | this.detector = detector; 31 | } 32 | 33 | private static ChunkDetectorAssertion getInstance() { 34 | return instance; 35 | } 36 | 37 | public static void init(Tokenizer tokenizer, Tagger tagger, ChunkDetector detector) { 38 | instance = new ChunkDetectorAssertion(tokenizer, tagger, detector); 39 | } 40 | 41 | public static ChunkedPartAssertion assertChunkedPart(String sentence) { 42 | System.out.println("chunk detector for: " + sentence); 43 | long startTime = System.currentTimeMillis(); 44 | ChunkedPartAssertion result = new ChunkedPartAssertion(instance.detector.detect( 45 | instance.tagger.tag(instance.tokenizer.tokenize(sentence)))); 46 | System.out.println( 47 | "Detecting takes: " + (System.currentTimeMillis() - startTime) + " millis"); 48 | return result; 49 | } 50 | 51 | public static class ChunkedPartAssertion { 52 | 53 | private final ChunkedPart chunkedPart; 54 | 55 | public ChunkedPartAssertion(ChunkedPart chunkedPart) { 56 | System.out.println("chunkedpart:"+chunkedPart); 57 | this.chunkedPart = chunkedPart; 58 | } 59 | 60 | public ChunkedPartAssertion entity(Entity entity) { 61 | assertTrue(chunkedPart.getEntity().equals(entity)); 62 | return this; 63 | } 64 | 65 | 66 | public ChunkedPartAssertion label(Label label) { 67 | assertEquals(label, chunkedPart.getLabel()); 68 | return this; 69 | } 70 | 71 | public ChunkedPartAssertion text(String text) { 72 | StringBuilder sb = new StringBuilder(); 73 | // System.out.println("tagwords:"+chunkedPart.getTaggedWords()); 74 | Iterator itr = chunkedPart.getTaggedWords().iterator(); 75 | // System.out.println("String:"+text); 76 | 77 | Iterator itr2 = chunkedPart.getTaggedWords().iterator(); 78 | 79 | System.out.println(itr2.next().toString()+"**"); 80 | 81 | while (itr.hasNext()) { 82 | // System.out.println(itr2.next().toString()+"**"); 83 | sb.append(itr.next().getWord()); 84 | if (itr.hasNext()) { 85 | sb.append(" "); 86 | } 87 | } 88 | assertEquals(text, sb.toString()); 89 | return this; 90 | } 91 | 92 | public ChunkedPartAssertion noDetection() { 93 | assertNull(chunkedPart); 94 | return this; 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /english-nlp/src/main/java/com/sixthsolution/apex/nlp/english/TimeDetector.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.english; 2 | 3 | import com.nobigsoftware.dfalex.Pattern; 4 | import com.sixthsolution.apex.nlp.english.filter.TimeDetectionFilter; 5 | import com.sixthsolution.apex.nlp.ner.Entity; 6 | import com.sixthsolution.apex.nlp.ner.Label; 7 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetectionFilter; 8 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetector; 9 | import com.sixthsolution.apex.nlp.util.Pair; 10 | 11 | import java.util.Arrays; 12 | import java.util.List; 13 | 14 | import static com.nobigsoftware.dfalex.Pattern.anyOf; 15 | import static com.nobigsoftware.dfalex.Pattern.match; 16 | import static com.nobigsoftware.dfalex.Pattern.maybe; 17 | import static com.sixthsolution.apex.nlp.dict.Tag.NUMBER; 18 | import static com.sixthsolution.apex.nlp.dict.Tag.TIME_HOUR; 19 | import static com.sixthsolution.apex.nlp.dict.Tag.TIME_MERIDIEM; 20 | import static com.sixthsolution.apex.nlp.dict.Tag.TIME_MIN; 21 | import static com.sixthsolution.apex.nlp.dict.Tag.TIME_PREFIX; 22 | import static com.sixthsolution.apex.nlp.dict.Tag.TIME_RANGE; 23 | import static com.sixthsolution.apex.nlp.dict.Tag.TIME_RELATIVE; 24 | import static com.sixthsolution.apex.nlp.dict.Tag.TIME_RELATIVE_INDICATOR; 25 | import static com.sixthsolution.apex.nlp.dict.Tag.TIME_RELATIVE_PREFIX; 26 | import static com.sixthsolution.apex.nlp.dict.Tag.TIME_SEPARATOR; 27 | import static com.sixthsolution.apex.nlp.dict.Tag.TIME_START_RANGE; 28 | import static com.sixthsolution.apex.nlp.ner.Entity.TIME; 29 | import static com.sixthsolution.apex.nlp.ner.Label.FIXED_TIME; 30 | import static com.sixthsolution.apex.nlp.ner.Label.RANGE_TIME; 31 | import static com.sixthsolution.apex.nlp.ner.Label.RELATIVE_TIME; 32 | 33 | /** 34 | * @author Saeed Masoumi (s-masoumi@live.com) 35 | * @author Rozhin Bayati 36 | */ 37 | 38 | public class TimeDetector extends ChunkDetector { 39 | 40 | /** 41 | * @return returns noon, afternoon, etc. 42 | */ 43 | private static Pattern time_relative() { 44 | return match(TIME_RELATIVE.toString()); 45 | } 46 | 47 | /** 48 | * @return returns hh:mm am/pm 49 | */ 50 | private static Pattern time_hour_min() { 51 | return match(NUMBER.toString()).thenMaybe( 52 | match(TIME_SEPARATOR.toString()).then(NUMBER.toString())) 53 | .thenMaybe(TIME_MERIDIEM.toString()); 54 | } 55 | 56 | /** 57 | * @return at hh:mm am/pm, at noon 58 | */ 59 | private static Pattern fixed_time() { 60 | return maybe(TIME_PREFIX.toString()).then( 61 | anyOf(time_relative(), time_hour_min())) 62 | .thenMaybe( 63 | TIME_MERIDIEM.toString()); 64 | } 65 | 66 | /** 67 | * @return from (time) till (time) 68 | */ 69 | private static Pattern range_time() { 70 | //TODO add From-until 71 | return fixed_time().then(TIME_RANGE.toString()).then(anyOf(time_relative(), time_hour_min())); 72 | } 73 | 74 | private static Pattern relative_time() { 75 | return maybe(TIME_RELATIVE_PREFIX.toString()).then(match(NUMBER.toString())) 76 | .then(anyOf(TIME_HOUR.toString(), TIME_MIN.toString())) 77 | .thenMaybe(match(TIME_RELATIVE_INDICATOR.toString()).then(fixed_time())); 78 | } 79 | 80 | @Override 81 | protected List> getPatterns() { 82 | return Arrays.asList( 83 | newPattern(FIXED_TIME, fixed_time()), 84 | newPattern(RANGE_TIME, range_time()), 85 | newPattern(RELATIVE_TIME, relative_time()) 86 | ); 87 | } 88 | 89 | @Override 90 | protected List getFilters() { 91 | return Arrays.asList(new TimeDetectionFilter()); 92 | } 93 | 94 | @Override 95 | protected Entity getEntity() { 96 | return TIME; 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /dfalex/src/test/java/com/nobigsoftware/dfalex/IntTest.java: -------------------------------------------------------------------------------- 1 | package com.nobigsoftware.dfalex; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashSet; 5 | import java.util.List; 6 | import java.util.Set; 7 | 8 | import org.junit.Assert; 9 | import org.junit.Test; 10 | 11 | public class IntTest extends TestBase 12 | { 13 | final PrettyPrinter m_printer = new PrettyPrinter(); 14 | @Test 15 | public void testTo100K() throws Exception 16 | { 17 | DfaBuilder builder = new DfaBuilder<>(); 18 | for (int i=0;i<100000;++i) 19 | { 20 | builder.addPattern(Pattern.match(Integer.toString(i)), i%7); 21 | } 22 | long tstart = System.currentTimeMillis(); 23 | DfaState start = builder.build(null); 24 | int numstates = _countStates(start); 25 | long telapsed = System.currentTimeMillis() - tstart; 26 | System.out.printf("Mininmized 100000 numbers -> value mod 7 (down to %d states) in %1.3f seconds", 27 | numstates,telapsed*.001).println(); 28 | Assert.assertEquals(null, StringMatcher.matchWholeString(start, "")); 29 | Assert.assertEquals(null, StringMatcher.matchWholeString(start, "100001")); 30 | for (int i=0;i<100000;++i) 31 | { 32 | Assert.assertEquals(Integer.valueOf(i%7), StringMatcher.matchWholeString(start, Integer.toString(i))); 33 | } 34 | Assert.assertEquals(36, numstates); 35 | } 36 | 37 | @Test 38 | public void testSimultaneousLanguages() 39 | { 40 | DfaBuilder builder = new DfaBuilder<>(); 41 | for (int i=0;i<100000;++i) 42 | { 43 | if ((i%21)==0) 44 | { 45 | builder.addPattern(Pattern.match(Integer.toString(i)), 3); 46 | } 47 | else if ((i%3)==0) 48 | { 49 | builder.addPattern(Pattern.match(Integer.toString(i)), 1); 50 | } 51 | else if ((i%7)==0) 52 | { 53 | builder.addPattern(Pattern.match(Integer.toString(i)), 2); 54 | } 55 | } 56 | List> langs = new ArrayList<>(); 57 | { 58 | HashSet s1 = new HashSet<>(); 59 | s1.add(1);s1.add(3); 60 | HashSet s2 = new HashSet<>(); 61 | s2.add(2);s2.add(3); 62 | langs.add(s1); 63 | langs.add(s2); 64 | } 65 | long tstart = System.currentTimeMillis(); 66 | List> starts = builder.build(langs, null); 67 | DfaState start3 = starts.get(0); 68 | DfaState start7 = starts.get(1); 69 | int numstates = _countStates(start3,start7); 70 | long telapsed = System.currentTimeMillis() - tstart; 71 | System.out.printf("Mininmized 1000000 numbers -> divisible by 7 and 3 (down to %d states) in %1.3f seconds", 72 | numstates,telapsed*.001).println(); 73 | for (int i=0;i<100000;++i) 74 | { 75 | if ((i%21)==0) 76 | { 77 | Assert.assertEquals((Integer)3, StringMatcher.matchWholeString(start3, Integer.toString(i))); 78 | Assert.assertEquals((Integer)3, StringMatcher.matchWholeString(start7, Integer.toString(i))); 79 | } 80 | else if ((i%3)==0) 81 | { 82 | Assert.assertEquals((Integer)1, StringMatcher.matchWholeString(start3, Integer.toString(i))); 83 | Assert.assertEquals(null, StringMatcher.matchWholeString(start7, Integer.toString(i))); 84 | } 85 | else if ((i%7)==0) 86 | { 87 | Assert.assertEquals(null, StringMatcher.matchWholeString(start3, Integer.toString(i))); 88 | Assert.assertEquals((Integer)2, StringMatcher.matchWholeString(start7, Integer.toString(i))); 89 | } 90 | } 91 | Assert.assertEquals(137, numstates); 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /english-nlp/src/test/java/com/sixthsolution/apex/nlp/english/test/tokenization/TimeDetectorTest.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.english.test.tokenization; 2 | 3 | import com.sixthsolution.apex.nlp.english.TimeDetector; 4 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetector; 5 | 6 | import org.junit.Test; 7 | 8 | import static com.sixthsolution.apex.nlp.ner.Entity.TIME; 9 | import static com.sixthsolution.apex.nlp.ner.Label.FIXED_TIME; 10 | import static com.sixthsolution.apex.nlp.ner.Label.RANGE_TIME; 11 | import static com.sixthsolution.apex.nlp.ner.Label.RELATIVE_TIME; 12 | import static com.sixthsolution.apex.nlp.test.ChunkDetectorAssertion.assertChunkedPart; 13 | 14 | /** 15 | * @author Saeed Masoumi (s-masoumi@live.com) 16 | */ 17 | 18 | public class TimeDetectorTest extends EnglishDetectorTest { 19 | 20 | //TODO add every x hours 21 | 22 | @Test 23 | public void test_fixed_time() { 24 | assertChunkedPart("at 10 am").text("at 10 am").label(FIXED_TIME).entity(TIME); 25 | assertChunkedPart("at 10").text("at 10").label(FIXED_TIME).entity(TIME); 26 | assertChunkedPart("at noon").text("at noon").label(FIXED_TIME).entity(TIME); 27 | assertChunkedPart("in the evening").text("in_the_evening").label(FIXED_TIME).entity(TIME); 28 | assertChunkedPart("6p_m").text("6 p_m").label(FIXED_TIME).entity(TIME); 29 | assertChunkedPart("6p").text("6 p").label(FIXED_TIME).entity(TIME); 30 | assertChunkedPart("at 23:10").text("at 23 : 10").label(FIXED_TIME).entity(TIME); 31 | assertChunkedPart("at 8.20 pm").text("at 8 . 20 pm").label(FIXED_TIME).entity(TIME); 32 | assertChunkedPart("23:10").text("23 : 10").label(FIXED_TIME).entity(TIME); 33 | assertChunkedPart("at four").text("at four").label(FIXED_TIME).entity(TIME); 34 | } 35 | 36 | @Test 37 | public void test_invalid_fixed_time(){ 38 | assertChunkedPart("7").noDetection(); 39 | assertChunkedPart("12.2.2016").noDetection(); 40 | } 41 | 42 | @Test 43 | public void test_range_time() { 44 | assertChunkedPart("at 5pm till 6pm").text("at 5 pm till 6 pm") 45 | .label(RANGE_TIME).entity(TIME); 46 | assertChunkedPart("at 5-6pm").text("at 5 - 6 pm") 47 | .label(RANGE_TIME).entity(TIME); 48 | assertChunkedPart("at nine till eleven").text("at nine till eleven") 49 | .label(RANGE_TIME).entity(TIME); 50 | assertChunkedPart("at 5pm to 6pm").text("at 5 pm to 6 pm") 51 | .label(RANGE_TIME).entity(TIME); 52 | assertChunkedPart("at 9:30 to 10:30").text("at 9 : 30 to 10 : 30") 53 | .label(RANGE_TIME).entity(TIME); 54 | assertChunkedPart("at morning - 9pm").text("at morning - 9 pm") 55 | .label(RANGE_TIME).entity(TIME); 56 | assertChunkedPart("at 11.5 - 12.5 ").text("at 11 . 5 - 12 . 5") 57 | .label(RANGE_TIME).entity(TIME); 58 | 59 | } 60 | 61 | @Test 62 | public void test_relative_time() { 63 | assertChunkedPart("2 hours").text("2 hours") 64 | .label(RELATIVE_TIME).entity(TIME); 65 | assertChunkedPart("2 hours before noon").text("2 hours before noon") 66 | .label(RELATIVE_TIME).entity(TIME); 67 | assertChunkedPart("3 hours after midnight").text("3 hours after midnight") 68 | .label(RELATIVE_TIME).entity(TIME); 69 | assertChunkedPart("2 minutes from now").text("2 minutes") 70 | .label(RELATIVE_TIME).entity(TIME); 71 | assertChunkedPart("for two hours").text("for two hours") 72 | .label(RELATIVE_TIME).entity(TIME); 73 | assertChunkedPart("an hour after noon").text("an hour after noon") 74 | .label(RELATIVE_TIME).entity(TIME); 75 | assertChunkedPart("for 31 minutes").text("for 31 minutes") 76 | .label(RELATIVE_TIME).entity(TIME); 77 | 78 | } 79 | 80 | @Override 81 | protected ChunkDetector provideDetector() { 82 | return new TimeDetector(); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /dfalex/src/main/java/com/nobigsoftware/dfalex/StringReplacements.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Matthew Timmermans 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nobigsoftware.dfalex; 17 | 18 | /** 19 | * A utility class containing common StringReplacements that can be used with 20 | * {@link SearchAndReplaceBuilder} 21 | */ 22 | public class StringReplacements 23 | { 24 | /** 25 | * Replacement that leaves the matching substring unmodified 26 | */ 27 | public static final StringReplacement IGNORE = (dest, src, startPos, endPos) -> 28 | { 29 | dest.append(src, startPos, endPos); 30 | return 0; 31 | }; 32 | 33 | /** 34 | * Replacement that deletes the matching substring 35 | */ 36 | public static final StringReplacement DELETE = (dest, src, startPos, endPos) -> 37 | { 38 | return 0; 39 | }; 40 | 41 | /** 42 | * Replacement that converts the matching substring to upper case 43 | */ 44 | public static final StringReplacement TOUPPER = (dest, src, startPos, endPos) -> 45 | { 46 | for (int i=startPos; i 57 | { 58 | for (int i=startPos; i 70 | { 71 | for (int i=startPos; i 92 | { 93 | dest.append(str); 94 | return 0; 95 | }; 96 | } 97 | 98 | /** 99 | * Make a replacement that surrounds matches with a given prefix and suffix, and applies the given replacer 100 | * to the match itself 101 | * 102 | * @param prefix to put before matches 103 | * @param replacement for the match itself 104 | * @param suffix suffix to put after matches 105 | * @return new StringReplacement 106 | */ 107 | public static final StringReplacement surround(CharSequence prefix, StringReplacement replacement, CharSequence suffix) 108 | { 109 | return (dest, src, startPos, endPos) -> 110 | { 111 | dest.append(prefix); 112 | int ret = replacement.apply(dest, src, startPos, endPos); 113 | dest.append(suffix); 114 | return ret; 115 | }; 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /apex/src/main/java/com/sixthsolution/apex/nlp/ner/regex/ChunkDetector.java: -------------------------------------------------------------------------------- 1 | package com.sixthsolution.apex.nlp.ner.regex; 2 | 3 | import com.nobigsoftware.dfalex.DfaBuilder; 4 | import com.nobigsoftware.dfalex.DfaState; 5 | import com.nobigsoftware.dfalex.Pattern; 6 | import com.nobigsoftware.dfalex.StringMatcher; 7 | import com.sixthsolution.apex.nlp.dict.Tag; 8 | import com.sixthsolution.apex.nlp.dict.TagValue; 9 | import com.sixthsolution.apex.nlp.dict.Tags; 10 | import com.sixthsolution.apex.nlp.ner.ChunkedPart; 11 | import com.sixthsolution.apex.nlp.ner.Entity; 12 | import com.sixthsolution.apex.nlp.ner.Label; 13 | import com.sixthsolution.apex.nlp.tagger.TaggedWord; 14 | import com.sixthsolution.apex.nlp.tagger.TaggedWords; 15 | import com.sixthsolution.apex.nlp.util.Pair; 16 | 17 | import java.util.List; 18 | 19 | import static com.sixthsolution.apex.nlp.dict.Tag.NONE; 20 | import static com.sixthsolution.apex.nlp.dict.Tag.NUMBER; 21 | 22 | /** 23 | * @author Saeed Masoumi (s-masoumi@live.com) 24 | * @author Rozhin Bayati 25 | */ 26 | 27 | public abstract class ChunkDetector { 28 | 29 | protected final DfaState