This is a functional interface
11 | * whose functional method is {@link #accept(Object, Object)}.
12 | *
13 | * @param the type of the first argument to the operation
14 | * @param the type of the second argument to the operation
15 | * @see Consumer
16 | */
17 | public interface BiConsumer {
18 |
19 | /**
20 | * Performs this operation on the given arguments.
21 | *
22 | * @param t the first input argument
23 | * @param u the second input argument
24 | */
25 | void accept(T t, U u);
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/english-nlp/src/main/java/com/sixthsolution/apex/nlp/english/filter/DateDetectionFilter.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.english.filter;
2 |
3 | import com.sixthsolution.apex.nlp.ner.Label;
4 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetectionFilter;
5 | import com.sixthsolution.apex.nlp.tagger.TaggedWords;
6 |
7 | /**
8 | * @author Saeed Masoumi (s-masoumi@live.com)
9 | * @author Rozhin Bayati
10 | */
11 |
12 | public class DateDetectionFilter extends ChunkDetectionFilter {
13 | @Override
14 | public boolean accept(Label label, TaggedWords taggedWords, int startIndex, int endIndex) {
15 | switch (label) {
16 | case FORMAL_DATE:
17 | case RELAX_DATE:
18 | case FOREVER_DATE:
19 | case GLOBAL_DATE:
20 | case RELATIVE_DATE:
21 | case LIMITED_DATE:
22 | case EXPLICIT_RELATIVE_DATE:
23 | return true;
24 | }
25 | return false;
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/persian-nlp/src/main/java/com/sixthsolution/apex/nlp/persian/filter/DateDetectionFilter.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.persian.filter;
2 |
3 | import com.sixthsolution.apex.nlp.ner.Label;
4 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetectionFilter;
5 | import com.sixthsolution.apex.nlp.tagger.TaggedWords;
6 |
7 | /**
8 | * @author Saeed Masoumi (s-masoumi@live.com)
9 | * @author Rozhin Bayati
10 | */
11 |
12 | public class DateDetectionFilter extends ChunkDetectionFilter {
13 | @Override
14 | public boolean accept(Label label, TaggedWords taggedWords, int startIndex, int endIndex) {
15 | switch (label) {
16 | case FORMAL_DATE:
17 | case RELAX_DATE:
18 | case FOREVER_DATE:
19 | case GLOBAL_DATE:
20 | case RELATIVE_DATE:
21 | case LIMITED_DATE:
22 | case EXPLICIT_RELATIVE_DATE:
23 | return true;
24 | }
25 | return false;
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/dfalex/src/main/java/backport/java/util/function/ObjIntConsumer.java:
--------------------------------------------------------------------------------
1 | package backport.java.util.function;
2 |
3 |
4 |
5 | /**
6 | * Represents an operation that accepts an object-valued and a
7 | * {@code int}-valued argument, and returns no result. This is the
8 | * {@code (reference, int)} specialization of {@link BiConsumer}.
9 | * Unlike most other functional interfaces, {@code ObjIntConsumer} is
10 | * expected to operate via side-effects.
11 | *
12 | *
This is a functional interface
13 | * whose functional method is {@link #accept(Object, int)}.
14 | *
15 | * @param the type of the object argument to the operation
16 | *
17 | * @since 1.8
18 | */
19 | public interface ObjIntConsumer {
20 |
21 | /**
22 | * Performs this operation on the given arguments.
23 | *
24 | * @param t the first input argument
25 | * @param value the second input argument
26 | */
27 | void accept(T t, int value);
28 | }
29 |
--------------------------------------------------------------------------------
/apex/src/main/java/com/sixthsolution/apex/model/Frequency.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016 6thSolution
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.sixthsolution.apex.model;
18 |
19 | /**
20 | * The frequency that the {@link Event} should be repeated (such as "DAILY").
21 | *
22 | * @author Saeed Masoumi (saeed@6thsolution.com)
23 | */
24 | public enum Frequency {
25 | DAILY, WEEKLY, MONTHLY, YEARLY
26 | }
27 |
--------------------------------------------------------------------------------
/english-nlp/src/test/java/com/sixthsolution/apex/nlp/english/test/tokenization/EnglishTaggerTest.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.english.test.tokenization;
2 |
3 | import com.sixthsolution.apex.nlp.english.EnglishTokenizer;
4 | import com.sixthsolution.apex.nlp.english.EnglishVocabulary;
5 | import com.sixthsolution.apex.nlp.tagger.StandardTagger;
6 |
7 | import org.junit.Before;
8 | import org.junit.Test;
9 |
10 | import static com.sixthsolution.apex.nlp.test.TaggerAssertion.assertSentence;
11 | import static com.sixthsolution.apex.nlp.test.TaggerAssertion.init;
12 |
13 | /**
14 | * @author Saeed Masoumi (s-masoumi@live.com)
15 | */
16 |
17 | public class EnglishTaggerTest {
18 |
19 | @Before
20 | public void setUp() throws Exception {
21 | init(new EnglishTokenizer(), new StandardTagger(EnglishVocabulary.build()));
22 | }
23 |
24 | @Test
25 | public void test() {
26 | assertSentence("party on monday 10").hasTags("N|PP|D_WD|NM");
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/.script/deploy_artifacts.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Deploy a jar, source jar, and javadoc jar to bintray repo.
4 | #
5 | # Adapted from https://coderwall.com/p/9b_lfq and
6 | # http://benlimmer.com/2013/12/26/automatically-publish-javadoc-to-gh-pages-with-travis-ci/
7 |
8 | SLUG="6thsolution/ApexNLP"
9 | BRANCH="master"
10 | JDK="oraclejdk8"
11 |
12 | set -e
13 |
14 | if [ "$TRAVIS_REPO_SLUG" != "$SLUG" ]; then
15 | echo "Skipping deployment: wrong repository. Expected '$SLUG' but was '$TRAVIS_REPO_SLUG'."
16 | elif [ "$TRAVIS_JDK_VERSION" != "$JDK" ]; then
17 | echo "Skipping deployment: wrong JDK. Expected '$JDK' but was '$TRAVIS_JDK_VERSION'."
18 | elif [ "$TRAVIS_PULL_REQUEST" != "false" ]; then
19 | echo "Skipping deployment: was pull request."
20 | elif [ "$TRAVIS_BRANCH" != "$BRANCH" ]; then
21 | echo "Skipping deployment: wrong branch. Expected '$BRANCH' but was '$TRAVIS_BRANCH'."
22 | else
23 | echo "Deploying artifacts..."
24 | ./gradlew publishFromCI --info
25 | echo "Artifacts deployed!"
26 | fi
--------------------------------------------------------------------------------
/dfalex/src/main/java/backport/java/util/function/Function.java:
--------------------------------------------------------------------------------
1 | package backport.java.util.function;
2 |
3 | /**
4 | * Represents a function that accepts one argument and produces a result.
5 | *
6 | *
This is a functional interface
7 | * whose functional method is {@link #apply(Object)}.
8 | *
9 | * @param the type of the input to the function
10 | * @param the type of the result of the function
11 | */
12 | public interface Function {
13 |
14 | /**
15 | * Returns a function that always returns its input argument.
16 | *
17 | * @param the type of the input and output objects to the function
18 | * @return a function that always returns its input argument
19 | */
20 | static Function identity() {
21 | return t -> t;
22 | }
23 |
24 | /**
25 | * Applies this function to the given argument.
26 | *
27 | * @param t the function argument
28 | * @return the function result
29 | */
30 | R apply(T t);
31 | }
32 |
--------------------------------------------------------------------------------
/apex/src/main/java/com/sixthsolution/apex/model/WeekDay.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016 6thSolution
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.sixthsolution.apex.model;
18 |
19 | /**
20 | * @author Saeed Masoumi (saeed@6thsolution.com)
21 | */
22 | public enum WeekDay {
23 | MON(1), TUE(2), WED(3), THU(4), FRI(5), SAT(6), SUN(7);
24 |
25 | private final int dayofWeek;
26 |
27 | WeekDay(int dayOfWeek) {
28 | this.dayofWeek = dayOfWeek;
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/apex/src/main/java/com/sixthsolution/apex/nlp/dict/TagValue.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.dict;
2 |
3 | import com.sixthsolution.apex.nlp.ner.Entity;
4 | import com.sixthsolution.apex.nlp.util.Triple;
5 |
6 | /**
7 | * @author Saeed Masoumi (s-masoumi@live.com)
8 | */
9 |
10 | public class TagValue extends Triple {
11 |
12 | public Tag tag;
13 | public Object value;
14 | public Entity entity;
15 |
16 | private volatile String toStringResult;
17 |
18 | public TagValue(Tag tag, Object value, Entity entity) {
19 | super(tag, value, entity);
20 | this.tag = tag;
21 | this.value = value;
22 | this.entity = entity;
23 | }
24 |
25 | @Override
26 | public String toString() {
27 | if (toStringResult == null) {
28 | toStringResult = "Triple{" +
29 | "tag=" + tag.name() +
30 | ", value=" + value +
31 | ", entity=" + entity +
32 | '}';
33 | }
34 |
35 | return toStringResult;
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/apex/src/main/java/com/sixthsolution/apex/nlp/tagger/TaggedWord.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.tagger;
2 |
3 | import com.sixthsolution.apex.nlp.dict.Tag;
4 | import com.sixthsolution.apex.nlp.dict.Tags;
5 |
6 | import java.util.Collection;
7 | import java.util.Set;
8 |
9 | /**
10 | * @author Saeed Masoumi (s-masoumi@live.com)
11 | */
12 |
13 | public class TaggedWord {
14 |
15 | private Tags tags;
16 | private final String word;
17 |
18 | public TaggedWord(String word) {
19 | this.word = word;
20 | this.tags = new Tags();
21 | }
22 |
23 | public TaggedWord(String word, Tags tags) {
24 | this.word = word;
25 | this.tags = tags;
26 | }
27 |
28 | public String getWord() {
29 | return word;
30 | }
31 |
32 | public Tags getTags() {
33 | return tags;
34 | }
35 |
36 | public boolean hasTag(Tag... tags) {
37 | return getTags().containsTag(tags);
38 | }
39 |
40 | @Override
41 | public String toString() {
42 | return "TaggedWord{" +
43 | "word='" + word + '\'' +
44 | ", tags=" + tags +
45 | '}';
46 | }
47 |
48 | }
49 |
--------------------------------------------------------------------------------
/apex/src/main/java/com/sixthsolution/apex/nlp/ner/regex/RegExChunker.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.ner.regex;
2 |
3 | import com.sixthsolution.apex.nlp.ner.ChunkedPart;
4 | import com.sixthsolution.apex.nlp.ner.Chunker;
5 | import com.sixthsolution.apex.nlp.tagger.TaggedWords;
6 |
7 | import java.util.ArrayList;
8 | import java.util.List;
9 |
10 | /**
11 | * @author Saeed Masoumi (s-masoumi@live.com)
12 | */
13 |
14 | public class RegExChunker implements Chunker {
15 |
16 | private final List extends ChunkDetector> chunkDetectors;
17 |
18 | public RegExChunker(List extends ChunkDetector> chunkDetectors) {
19 | this.chunkDetectors = chunkDetectors;
20 | }
21 |
22 | @Override
23 | public List chunk(TaggedWords taggedWords) {
24 | TaggedWords clonedTaggedWords = (TaggedWords) taggedWords.clone();
25 | List chunkedParts = new ArrayList<>();
26 | for (ChunkDetector detector : chunkDetectors) {
27 | ChunkedPart result = detector.detect(clonedTaggedWords);
28 | if (result != null) {
29 | chunkedParts.add(result);
30 | }
31 | }
32 | return chunkedParts;
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/apex/src/test/java/com/sixthsolution/apex/nlp/test/TaggerAssertion.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.test;
2 |
3 | import com.sixthsolution.apex.nlp.tagger.TaggedWords;
4 | import com.sixthsolution.apex.nlp.tagger.Tagger;
5 | import com.sixthsolution.apex.nlp.tokenization.Tokenizer;
6 |
7 | /**
8 | * @author Saeed Masoumi (s-masoumi@live.com)
9 | */
10 |
11 | public final class TaggerAssertion {
12 |
13 | private static TaggerAssertion instance = null;
14 | private Tokenizer tokenizer;
15 | private Tagger tagger;
16 |
17 | private TaggerAssertion(Tokenizer tokenizer, Tagger tagger) {
18 | this.tokenizer = tokenizer;
19 | this.tagger = tagger;
20 | }
21 |
22 | private static TaggerAssertion getInstance() {
23 | return instance;
24 | }
25 |
26 | public static void init(Tokenizer tokenizer, Tagger tagger) {
27 | instance = new TaggerAssertion(tokenizer, tagger);
28 | }
29 |
30 | public static TagAssertion assertSentence(String word) {
31 | return getInstance().makeTagAssertion(word);
32 | }
33 |
34 | private TagAssertion makeTagAssertion(String word) {
35 | TaggedWords taggedWords = tagger.tag(tokenizer.tokenize(word));
36 | return new TagAssertion(taggedWords);
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/apex/src/main/java/com/sixthsolution/apex/nlp/dict/DictionaryBuilder.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.dict;
2 |
3 | import com.sixthsolution.apex.nlp.ner.Entity;
4 |
5 | /**
6 | * @author Saeed Masoumi (s-masoumi@live.com)
7 | */
8 | public class DictionaryBuilder {
9 |
10 | private Dictionary dictionary = new Dictionary();
11 |
12 | public Dictionary build() {
13 | return dictionary;
14 | }
15 |
16 | public TagEntryBuilder tag(Tag tag, Entity entity) {
17 | return new TagEntryBuilder(dictionary, tag, entity);
18 | }
19 |
20 | public static class TagEntryBuilder {
21 | private final Dictionary dictionary;
22 | private final Tag tag;
23 | private final Entity entity;
24 |
25 | TagEntryBuilder(Dictionary dictionary, Tag tag, Entity entity) {
26 | this.dictionary = dictionary;
27 | this.tag = tag;
28 | this.entity = entity;
29 | }
30 |
31 | public TagEntryBuilder e(Object value, String... words) {
32 | dictionary.addAll(words, tag, value, entity);
33 | return this;
34 | }
35 |
36 | public TagEntryBuilder e(String... words) {
37 | dictionary.addAll(words, tag, "", entity);
38 | return this;
39 | }
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/english-nlp/src/test/java/com/sixthsolution/apex/nlp/english/test/tokenization/EnglishTokenizationTest.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.english.test.tokenization;
2 |
3 | import com.sixthsolution.apex.nlp.english.EnglishTokenizer;
4 |
5 | import org.junit.Before;
6 | import org.junit.Test;
7 |
8 | import static com.sixthsolution.apex.nlp.test.TokenizerAssertion.assertTokens;
9 | import static com.sixthsolution.apex.nlp.test.TokenizerAssertion.init;
10 |
11 | /**
12 | * @author Saeed Masoumi (s-masoumi@live.com)
13 | * @author Rozhin Bayati
14 | */
15 |
16 | public class EnglishTokenizationTest {
17 |
18 | @Before
19 | public void setUp() {
20 | init(new EnglishTokenizer());
21 | }
22 |
23 | @Test
24 | public void test_sentences() {
25 | assertTokens(
26 | "Pizza party on the 2nd Friday of every month at 1pm\n",
27 | "Pizza", "party", "on", "the", "2", "nd", "Friday", "of", "every", "month", "at",
28 | "1", "pm");
29 |
30 | assertTokens(
31 | "Mission Trip at Jakarta on Nov 13-17 calendar Church\n",
32 | "Mission", "Trip", "at", "Jakarta", "on", "Nov", "13", "-", "17", "calendar",
33 | "Church"
34 | );
35 | assertTokens("Go GYM 2.05.2013 19:00",
36 | "Go", "GYM", "2", ".", "05", ".", "2013", ",", "19", ":", "00");
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/apex/src/main/java/com/sixthsolution/apex/nlp/tagger/TaggedWords.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.tagger;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Iterator;
5 | import java.util.List;
6 |
7 | /**
8 | * @author Saeed Masoumi (s-masoumi@live.com)
9 | */
10 |
11 | public class TaggedWords extends ArrayList {
12 |
13 | public TaggedWords (){
14 | }
15 |
16 | public TaggedWords (List in){
17 | this.addAll(in);
18 | }
19 |
20 | public void removeRange(int fromIndex, int toIndex) {
21 | super.removeRange(fromIndex, toIndex);
22 | }
23 |
24 | @Override
25 | public String toString() {
26 | StringBuilder sb = new StringBuilder();
27 | Iterator itr = iterator();
28 | while (itr.hasNext()) {
29 | TaggedWord next = itr.next();
30 | sb.append(next.getWord()).append(" [").append(next.getTags().toString()).append("]");
31 | if (itr.hasNext()) {
32 | sb.append(", ");
33 | }
34 | }
35 | return sb.toString();
36 | }
37 |
38 | public List newSubList(int startIndex, int endIndex) {
39 | List taggedWords = new ArrayList<>();
40 | for (int i = startIndex; i < endIndex; i++) {
41 | taggedWords.add(get(i));
42 | }
43 | return taggedWords;
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/dfalex/src/main/java/com/nobigsoftware/dfalex/ReplacementSelector.java:
--------------------------------------------------------------------------------
1 | package com.nobigsoftware.dfalex;
2 |
3 | /**
4 | * For search and replace operations, a functional interface that is called to select replacement text for
5 | * matches, based on the MATCHRESULT.
6 | *
7 | * This is called by a {@link StringSearcher#findAndReplace(String, ReplacementSelector)} to replace instances
8 | * of patterns found in a string.
9 | */
10 | public interface ReplacementSelector
11 | {
12 | /**
13 | * This will be called for each instance of each pattern found
14 | *
15 | * @param dest The replacement text for the matching substring should be written here
16 | * @param mr The MATCHRESULT produced by the match
17 | * @param src The string being searched, or a part of the stream being searched that contains the current match
18 | * @param startPos the start index of the current match in src
19 | * @param endPos the end index of the current match in src
20 | * @return if this is >0, then it is the position in the source string at which to continue processing after
21 | * replacement. If you set this <= startPos, a runtime exception will be thrown to
22 | * abort the infinite loop that would result. Almost always return 0.
23 | */
24 | int apply(SafeAppendable dest, MATCHRESULT mr, CharSequence src, int startPos, int endPos);
25 | }
--------------------------------------------------------------------------------
/.script/deploy_javadocs.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -ex
4 |
5 | REPO="git@github.com:6thsolution/ApexNLP.git"
6 |
7 | DIR=temp-clone
8 |
9 | # Delete any existing temporary website clone
10 | rm -rf $DIR
11 |
12 | # Clone the current repo into temp folder
13 | git clone $REPO $DIR
14 |
15 | # Move working directory into temp folder
16 | cd $DIR
17 | # Checkout and track the gh-pages branch
18 | git checkout -t origin/gh-pages
19 |
20 | # Artifactory location
21 | server=https://oss.jfrog.org
22 | repo=oss-snapshot-local
23 |
24 | # jfrog artifacts location
25 | for name in apex dfalex english-nlp persian-nlp
26 | do
27 | artifact=com/sixthsolution/easymvp/$name
28 | path=$server/$repo/$artifact
29 | version=`curl -s $path/maven-metadata.xml | grep latest | sed "s/.*\([^<]*\)<\/latest>.*/\1/"`
30 | build=`curl -s $path/$version/maven-metadata.xml | grep '' | head -1 | sed "s/.*\([^<]*\)<\/value>.*/\1/"`
31 | jar=$name-$build-javadoc.jar
32 | url=$path/$version/$jar
33 |
34 | # Download
35 | echo $url
36 | curl -L $url > "$name".zip
37 | javadoc="${name:8}-javadoc"
38 | mkdir -p "$javadoc"
39 | unzip "$name".zip -d "$javadoc"
40 | rm "$name".zip
41 | done
42 |
43 | # Stage all files in git and create a commit
44 | git add .
45 | git add -u
46 | git commit -m "java docs updated at $(date)"
47 |
48 | # Push the new files up to GitHub
49 | git push origin gh-pages
50 |
51 | cd ..
52 | rm -rf $DIR
--------------------------------------------------------------------------------
/apex/src/main/java/com/sixthsolution/apex/nlp/event/StandardLocationExtractor.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.event;
2 |
3 | import com.sixthsolution.apex.nlp.dict.Tag;
4 | import com.sixthsolution.apex.nlp.ner.ChunkedPart;
5 | import com.sixthsolution.apex.nlp.tagger.TaggedWord;
6 |
7 | import org.threeten.bp.LocalDateTime;
8 |
9 | import java.util.Iterator;
10 |
11 | /**
12 | * @author Saeed Masoumi (s-masoumi@live.com)
13 | *
14 | */
15 |
16 | public class StandardLocationExtractor implements Extractor {
17 |
18 | @Override
19 | public void extract(EventBuilder builder, LocalDateTime source, ChunkedPart chunkedPart) {
20 | switch (chunkedPart.getLabel()) {
21 | case LOCATION:
22 | String location = getLocation(chunkedPart);
23 | builder.setLocation(location);
24 | break;
25 | }
26 | }
27 |
28 | private String getLocation(ChunkedPart chunkedPart) {
29 | StringBuilder sb = new StringBuilder();
30 | Iterator itr = chunkedPart.getTaggedWords().iterator();
31 | while (itr.hasNext()) {
32 | TaggedWord next = itr.next();
33 | if (!next.hasTag(Tag.LOCATION_PREFIX)) {
34 | sb.append(next.getWord());
35 | if (itr.hasNext()) {
36 | sb.append(" ");
37 | }
38 | }
39 |
40 | }
41 | return sb.toString();
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/apex/src/main/java/com/sixthsolution/apex/nlp/ner/ChunkedPart.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.ner;
2 |
3 | import com.sixthsolution.apex.nlp.tagger.TaggedWord;
4 |
5 | import java.util.List;
6 |
7 | /**
8 | * @author Saeed Masoumi (s-masoumi@live.com)
9 | */
10 |
11 | public class ChunkedPart {
12 |
13 | private final Entity entity;
14 | private final Label label;
15 | private final List taggedWords;
16 |
17 | public ChunkedPart(Entity entity,Label label,
18 | List taggedWords) {
19 | this.entity =entity;
20 | this.label = label;
21 | this.taggedWords = taggedWords;
22 | }
23 |
24 | public Label getLabel() {
25 | return label;
26 | }
27 |
28 | public Entity getEntity() {
29 | return entity;
30 | }
31 |
32 | public List getTaggedWords(int start,int end) {
33 | return taggedWords.subList(start,end);
34 | }
35 | public List getTaggedWords() {
36 | return taggedWords;
37 | }
38 | public String toStringTaggedWords() {
39 | StringBuilder sb = new StringBuilder();
40 | for (TaggedWord taggedWord : taggedWords) {
41 | sb.append(taggedWord.getWord()).append(" ");
42 | }
43 | return sb.toString().trim();
44 | }
45 |
46 | @Override
47 | public String toString() {
48 | return label.name() + " -> " + toStringTaggedWords() ;
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/dfalex/src/main/java/com/nobigsoftware/dfalex/DfaTransitionConsumer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 Matthew Timmermans
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.nobigsoftware.dfalex;
17 |
18 | /**
19 | * A functional interface that can accept transitions
20 | *
21 | * This is used with {@link DfaState#enumerateTransitions(DfaTransitionConsumer)}
22 | */
23 | public interface DfaTransitionConsumer
24 | {
25 | /**
26 | * Accept a DFA transition
27 | *
28 | * This call indicates that the current state has a transition to target on
29 | * every character with code point >=firstChar and <=lastChar
30 | * @param firstChar First character that triggers this transition
31 | * @param lastChar Last character that triggers this transition
32 | * @param target Target state of this transition
33 | */
34 | void acceptTransition(char firstChar, char lastChar, DfaState target);
35 | }
36 |
--------------------------------------------------------------------------------
/sample-android/build.gradle:
--------------------------------------------------------------------------------
1 | buildscript {
2 | repositories {
3 | jcenter()
4 | }
5 | dependencies {
6 | classpath 'com.android.tools.build:gradle:2.2.3'
7 |
8 | // NOTE: Do not place your application dependencies here; they belong
9 | // in the individual module build.gradle files
10 | }
11 | }
12 | apply plugin: 'com.android.application'
13 |
14 | android {
15 | compileSdkVersion 25
16 | buildToolsVersion "25.0.0"
17 |
18 | defaultConfig {
19 | applicationId "com.sixthsolution.apex.sample.android"
20 | minSdkVersion 15
21 | targetSdkVersion 25
22 | versionCode 1
23 | versionName "1.0"
24 |
25 | testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner"
26 |
27 | }
28 | buildTypes {
29 | release {
30 | minifyEnabled false
31 | proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro'
32 | }
33 | }
34 | }
35 |
36 | dependencies {
37 | compile project(':apex')
38 | compile project(':dfalex')
39 | compile project(':english-nlp')
40 | compile fileTree(dir: 'libs', include: ['*.jar'])
41 | compile group: 'org.threeten', name: 'threetenbp', version: '1.3.2'
42 |
43 | androidTestCompile('com.android.support.test.espresso:espresso-core:2.2.2', {
44 | exclude group: 'com.android.support', module: 'support-annotations'
45 | })
46 | compile 'com.android.support:appcompat-v7:25.0.1'
47 | testCompile 'junit:junit:4.12'
48 | }
49 |
--------------------------------------------------------------------------------
/dfalex/src/main/java/com/nobigsoftware/dfalex/RawDfa.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 Matthew Timmermans
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.nobigsoftware.dfalex;
17 |
18 | import java.util.List;
19 |
20 | /**
21 | * A DFA in uncomrpessed form
22 | */
23 | class RawDfa
24 | {
25 | private final List m_dfaStates;
26 | private final List m_acceptSets;
27 | private final int[] m_startStates;
28 |
29 | /**
30 | * Create a new RawDfa.
31 | */
32 | public RawDfa(List dfaStates,
33 | List acceptSets,
34 | int[] startStates)
35 | {
36 | m_dfaStates = dfaStates;
37 | m_acceptSets = acceptSets;
38 | m_startStates = startStates;
39 | }
40 |
41 | public List getStates()
42 | {
43 | return m_dfaStates;
44 | }
45 |
46 | public List getAcceptSets()
47 | {
48 | return m_acceptSets;
49 | }
50 |
51 | public int[] getStartStates()
52 | {
53 | return m_startStates;
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/apex/src/main/java/com/sixthsolution/apex/nlp/tagger/StandardTagger.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.tagger;
2 |
3 | import com.sixthsolution.apex.nlp.dict.Dictionary;
4 | import com.sixthsolution.apex.nlp.dict.Tag;
5 | import com.sixthsolution.apex.nlp.dict.TagValue;
6 | import com.sixthsolution.apex.nlp.dict.Tags;
7 | import com.sixthsolution.apex.nlp.ner.Entity;
8 |
9 | import java.util.Arrays;
10 | import java.util.List;
11 |
12 | import static com.sixthsolution.apex.nlp.util.NumericUtils.isNumeric;
13 | import static com.sixthsolution.apex.nlp.util.NumericUtils.toInt;
14 |
15 | /**
16 | * @author Saeed Masoumi (s-masoumi@live.com)
17 | */
18 |
19 | public class StandardTagger implements Tagger {
20 |
21 | protected final Dictionary dictionary;
22 |
23 | public StandardTagger(Dictionary dictionary) {
24 | this.dictionary = dictionary;
25 | }
26 |
27 | @Override
28 | public TaggedWords tag(String[] tokenizedSentence) {
29 | TaggedWords taggedWords = new TaggedWords();
30 | List tokens = Arrays.asList(tokenizedSentence);
31 | for (String token : tokens) {
32 | Tags tags = null;
33 | if (isNumeric(token)) {
34 | tags = new Tags();
35 | tags.add(new TagValue(Tag.NUMBER, toInt(token), Entity.NONE));
36 | } else {
37 | tags = dictionary.getRelatedTags(token, true);
38 | }
39 | taggedWords.add(new TaggedWord(token, tags));
40 | }
41 | return taggedWords;
42 | }
43 |
44 |
45 | }
46 |
--------------------------------------------------------------------------------
/english-nlp/src/main/java/com/sixthsolution/apex/nlp/english/EnglishParser.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.english;
2 |
3 | import com.sixthsolution.apex.nlp.event.EventDetector;
4 | import com.sixthsolution.apex.nlp.event.StandardEventDetector;
5 | import com.sixthsolution.apex.nlp.ner.Chunker;
6 | import com.sixthsolution.apex.nlp.ner.regex.RegExChunker;
7 | import com.sixthsolution.apex.nlp.parser.StandardParserBase;
8 | import com.sixthsolution.apex.nlp.tagger.StandardTagger;
9 | import com.sixthsolution.apex.nlp.tagger.Tagger;
10 | import com.sixthsolution.apex.nlp.tokenization.Tokenizer;
11 |
12 | import java.util.Arrays;
13 |
14 | /**
15 | * @author Saeed Masoumi (s-masoumi@live.com)
16 | */
17 |
18 | public class EnglishParser extends StandardParserBase {
19 |
20 | @Override
21 | public void initialize() {
22 | super.initialize();
23 | //TODO need some training sentences
24 | }
25 |
26 | @Override
27 | protected Tagger provideTagger() {
28 | return new StandardTagger(EnglishVocabulary.build());
29 | }
30 |
31 | @Override
32 | protected Tokenizer provideTokenizer() {
33 | return new EnglishTokenizer();
34 | }
35 |
36 | @Override
37 | protected Chunker provideChunker() {
38 | return new RegExChunker(
39 | Arrays.asList(new TimeDetector(), new LocationDetector(), new DateDetector()));
40 | }
41 |
42 | @Override
43 | protected EventDetector provideEventDetector() {
44 | return new StandardEventDetector(new StandardExtractor());
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/apex/src/main/java/com/sixthsolution/apex/nlp/dict/Dictionary.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.dict;
2 |
3 | import com.sixthsolution.apex.nlp.ner.Entity;
4 |
5 | import java.util.HashMap;
6 |
7 | /**
8 | * @author Saeed Masoumi (s-masoumi@live.com)
9 | */
10 | public class Dictionary extends HashMap {
11 | private static final Tags NONE_TAG;
12 |
13 | static {
14 | NONE_TAG = new Tags();
15 | NONE_TAG.add(new TagValue(Tag.NONE, "", Entity.NONE));
16 | }
17 |
18 | public void addAll(String[] words, Tag tag, Object value, Entity entity) {
19 | TagValue tagValue = new TagValue(tag, value, entity);
20 | for (String word : words) {
21 | update(word, tagValue);
22 | }
23 | }
24 |
25 | public void update(String word, TagValue tagValue) {
26 | Tags posting = getOrEmpty(word);
27 | posting.add(tagValue);
28 | put(word, posting);
29 | }
30 |
31 | public Tags getOrEmpty(String word) {
32 | if (!containsKey(word)) {
33 | return new Tags();
34 | }
35 | return get(word);
36 | }
37 |
38 | public Tags getRelatedTags(String word, boolean caseInsensitive) {
39 | if (caseInsensitive) {
40 | return getTags(word.toLowerCase());
41 | }
42 | return getTags(word);
43 | }
44 |
45 | private Tags getTags(String word) {
46 | Tags tags = getOrEmpty(word);
47 | if (!tags.isEmpty()) {
48 | return tags;
49 | }
50 | return NONE_TAG;
51 | }
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/dfalex/src/test/java/com/nobigsoftware/dfalex/StringMatcherTest.java:
--------------------------------------------------------------------------------
1 | package com.nobigsoftware.dfalex;
2 |
3 |
4 | import org.junit.Assert;
5 | import org.junit.Test;
6 |
7 | public class StringMatcherTest extends TestBase
8 | {
9 | @Test
10 | public void testStringMatcher()
11 | {
12 | DfaState dfa;
13 | {
14 | DfaBuilder builder = new DfaBuilder<>();
15 | builder.addPattern(Pattern.regex("a[ab]*b"), 1);
16 | builder.addPattern(Pattern.regex("a[ab]*c"), 2);
17 | dfa = builder.build(null);
18 | }
19 | StringMatcher matcher = new StringMatcher("bbbbbaaaaaaaaaaaaaaaaaaaaaaaabbbbcaaaaaaabbbaaaaaaa");
20 | Integer result = matcher.findNext(dfa);
21 | Assert.assertEquals((Integer)2, result);
22 | Assert.assertEquals("aaaaaaaaaaaaaaaaaaaaaaaabbbbc", matcher.getLastMatch());
23 | Assert.assertEquals(5, matcher.getLastMatchStart());
24 | Assert.assertEquals(34, matcher.getLastMatchEnd());
25 | result = matcher.findNext(dfa);
26 | Assert.assertEquals((Integer)1, result);
27 | Assert.assertEquals("aaaaaaabbb", matcher.getLastMatch());
28 | result = matcher.findNext(dfa);
29 | Assert.assertEquals(null, result);
30 |
31 | matcher.setPositions(15, 20, 33);
32 | Assert.assertEquals("aaaaa", matcher.getLastMatch());
33 | result = matcher.findNext(dfa);
34 | Assert.assertEquals("aaaaaaaaabbbb", matcher.getLastMatch());
35 | result = matcher.findNext(dfa);
36 | Assert.assertEquals(null, result);
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/apex/src/main/java/com/sixthsolution/apex/Apex.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex;
2 |
3 | import com.sixthsolution.apex.model.Event;
4 | import com.sixthsolution.apex.nlp.parser.Parser;
5 |
6 | import org.threeten.bp.LocalDateTime;
7 |
8 | import java.util.HashMap;
9 | import java.util.Map;
10 |
11 | /**
12 | * @author Saeed Masoumi (s-masoumi@live.com)
13 | * @author Rozhin Bayati
14 | */
15 |
16 | public class Apex {
17 |
18 | private static Apex inst = null;
19 | private Map parsers = new HashMap<>();
20 |
21 | private Apex(ApexConfig config) {
22 | this.parsers = config.parsers;
23 | }
24 |
25 | public static void init(ApexConfig config) {
26 | for (Parser parser : config.parsers.values()) {
27 | parser.initialize();
28 | }
29 | inst = new Apex(config);
30 | }
31 |
32 | public static Event nlp(String name, String sentence) {
33 | return inst.parsers.get(name).parse(LocalDateTime.now(), sentence);
34 | }
35 |
36 | public static class ApexBuilder {
37 |
38 | private Map parsers = new HashMap<>();
39 |
40 | public ApexBuilder addParser(String name, Parser parser) {
41 | parsers.put(name, parser);
42 | return this;
43 | }
44 |
45 | public ApexConfig build() {
46 | return new ApexConfig(parsers);
47 | }
48 | }
49 |
50 | private static class ApexConfig {
51 | Map parsers = new HashMap<>();
52 |
53 | ApexConfig(Map parsers) {
54 | this.parsers = parsers;
55 | }
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/dfalex/src/main/java/com/nobigsoftware/dfalex/DfaStateImpl.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 Matthew Timmermans
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.nobigsoftware.dfalex;
17 |
18 | /**
19 | * Implementation of a Dfa State.
20 | *
21 | * This can either be a "placeholder" state that delegates to another DFA state, or
22 | * a DFA state in final form. As the last step in DFA construction,
23 | */
24 | abstract class DfaStateImpl extends DfaState
25 | {
26 | /**
27 | * Replace any internal placeholder references with references to
28 | * their delegates.
29 | *
30 | * Every reference to a state X is replaces with x.resolvePlaceholder();
31 | */
32 | abstract void fixPlaceholderReferences();
33 |
34 | /**
35 | * If this is a placeholder that delegates to another state,
36 | * return that other state. Otherwise return this.
37 | *
38 | * This method will follow a chain of placeholders to the end
39 | *
40 | * @return the final delegate of this state
41 | */
42 | abstract DfaStateImpl resolvePlaceholder();
43 | }
44 |
--------------------------------------------------------------------------------
/dfalex/src/test/java/com/nobigsoftware/dfalex/By3Test.java:
--------------------------------------------------------------------------------
1 | package com.nobigsoftware.dfalex;
2 |
3 | import java.util.Collections;
4 |
5 | import org.junit.Assert;
6 | import org.junit.Test;
7 |
8 | import com.nobigsoftware.dfalex.CharRange;
9 | import com.nobigsoftware.dfalex.DfaBuilder;
10 | import com.nobigsoftware.dfalex.DfaState;
11 | import com.nobigsoftware.dfalex.Pattern;
12 |
13 | public class By3Test extends TestBase
14 | {
15 | @Test
16 | public void test() throws Exception
17 | {
18 | //make pattern for whole numbers divisible by 3
19 |
20 | //digits mod 3
21 | Matchable d0=CharRange.anyOf("0369");
22 | Pattern d1=Pattern.match(CharRange.anyOf("147")).thenMaybeRepeat(d0);
23 | Pattern d2=Pattern.match(CharRange.anyOf("258")).thenMaybeRepeat(d0);
24 |
25 | Pattern Plus2 = Pattern.maybeRepeat(d1.then(d2)).then(Pattern.anyOf(
26 | d1.then(d1),
27 | d2
28 | ));
29 | Pattern Minus2 = Pattern.maybeRepeat(d2.then(d1)).then(Pattern.anyOf(
30 | d2.then(d2),
31 | d1
32 | ));
33 |
34 | Pattern By3 = Pattern.maybeRepeat(Pattern.anyOf(
35 | d0,
36 | d1.then(d2),
37 | Plus2.then(Minus2)
38 | ));
39 | DfaBuilder builder = new DfaBuilder<>();
40 | builder.addPattern(By3, true);
41 | DfaState> start = builder.build(Collections.singleton(Boolean.TRUE), null);
42 | Assert.assertEquals(3, _countStates(start));
43 | _checkDfa(start, "By3Test.out.txt", false);
44 | }
45 |
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/dfalex/src/main/java/com/nobigsoftware/dfalex/DfaAmbiguityResolver.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 Matthew Timmermans
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.nobigsoftware.dfalex;
17 |
18 | import com.nobigsoftware.util.BuilderCache;
19 |
20 | import java.io.Serializable;
21 | import java.util.Set;
22 | import backport.java.util.function.Function;
23 |
24 | /**
25 | * Implementations of this interface are used to resolve ambiguities in {@link DfaBuilder}.
26 | *
27 | * When it's possible for a single string to match patterns that produce different results, the
28 | * ambiguity resolver is called to determine what the result should be.
29 | *
30 | * The implementation can throw a {@link DfaAmbiguityException} in this case, or can combine the
31 | * multiple result objects into a single object if its type (e.g., EnumSet) permits.
32 | *
33 | * This interface implements Serializable so that it can be written into the key signature for
34 | * {@link BuilderCache}.
35 | */
36 | public interface DfaAmbiguityResolver
37 | extends Function, MATCHRESULT>, Serializable {
38 | }
39 |
--------------------------------------------------------------------------------
/dfalex/src/main/java/com/nobigsoftware/dfalex/DfaStateInfo.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 Matthew Timmermans
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.nobigsoftware.dfalex;
17 |
18 | import java.util.List;
19 | import backport.java.util.function.Consumer;
20 |
21 | class DfaStateInfo
22 | {
23 | private int m_acceptSetIndex;
24 | private int m_transitionCount;
25 | private NfaTransition[] m_transitionBuf;
26 |
27 | DfaStateInfo(List transitions, int acceptSetIndex)
28 | {
29 |
30 | m_acceptSetIndex = acceptSetIndex;
31 | m_transitionCount = transitions.size();
32 | m_transitionBuf = transitions.toArray(new NfaTransition[m_transitionCount]);
33 | }
34 |
35 | public int getAcceptSetIndex()
36 | {
37 | return m_acceptSetIndex;
38 | }
39 |
40 | public int getTransitionCount()
41 | {
42 | return m_transitionCount;
43 | }
44 |
45 | public NfaTransition getTransition(int index)
46 | {
47 | return m_transitionBuf[index];
48 | }
49 |
50 | public void forEachTransition(Consumer consumer)
51 | {
52 | for (int i=0; i< m_transitionCount; ++i)
53 | {
54 | consumer.accept(m_transitionBuf[i]);
55 | }
56 | }
57 | }
--------------------------------------------------------------------------------
/english-nlp/src/test/java/com/sixthsolution/apex/nlp/english/test/tokenization/LocationDetectorTest.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.english.test.tokenization;
2 |
3 | import com.sixthsolution.apex.nlp.english.LocationDetector;
4 | import com.sixthsolution.apex.nlp.ner.Entity;
5 | import com.sixthsolution.apex.nlp.ner.Label;
6 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetector;
7 |
8 | import org.junit.Test;
9 |
10 | import static com.sixthsolution.apex.nlp.test.ChunkDetectorAssertion.assertChunkedPart;
11 |
12 | /**
13 | * @author Saeed Masoumi (s-masoumi@live.com)
14 | */
15 |
16 | public class LocationDetectorTest extends EnglishDetectorTest {
17 |
18 | @Override
19 | protected ChunkDetector provideDetector() {
20 | return new LocationDetector();
21 | }
22 |
23 | @Test
24 | public void test_location() {
25 | assertChunkedPart("at home").text("at home")
26 | .label(Label.LOCATION).entity(Entity.LOCATION);
27 | assertChunkedPart("at Starbucks").text("at Starbucks")
28 | .label(Label.LOCATION).entity(Entity.LOCATION);
29 | assertChunkedPart("at 123 st.").text("at 123 st.")
30 | .label(Label.LOCATION).entity(Entity.LOCATION);
31 | }
32 |
33 | @Test
34 | public void test_int_full_sentence() {
35 | assertChunkedPart("Grocery shopping at Wegman's Thursday at 5pm").text("at Wegman's")
36 | .label(Label.LOCATION).entity(Entity.LOCATION);
37 | assertChunkedPart("Meet John at Mall from 9:30 to 12:00").text("at Mall")
38 | .label(Label.LOCATION).entity(Entity.LOCATION);
39 | assertChunkedPart("Bring Negin lunch at 123 st.").text("at 123 st.")
40 | .label(Label.LOCATION).entity(Entity.LOCATION);
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/persian-nlp/src/test/java/com/sixthsolution/apex/nlp/persian/test/PersianDLTest.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.persian.test;
2 |
3 | import com.sixthsolution.apex.nlp.ner.Entity;
4 | import com.sixthsolution.apex.nlp.ner.Label;
5 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetector;
6 | import com.sixthsolution.apex.nlp.persian.PersianLocationDetector;
7 | import org.junit.Test;
8 |
9 | import static com.sixthsolution.apex.nlp.test.ChunkDetectorAssertion.assertChunkedPart;
10 |
11 | public class PersianDLTest extends PersianDetectorTest {
12 |
13 | @Override
14 | protected ChunkDetector provideDetector() {
15 | return new PersianLocationDetector();
16 | }
17 |
18 | @Test
19 | public void test_location() {
20 | assertChunkedPart("در کوچه شقایق").text("در کوچه شقایق")
21 | .label(Label.LOCATION).entity(Entity.LOCATION);
22 | assertChunkedPart("در خیابان دهم").text("در خیابان دهم")
23 | .label(Label.LOCATION).entity(Entity.LOCATION);
24 | assertChunkedPart("در بزرگراه چمران").text("در بزرگراه چمران")
25 | .label(Label.LOCATION).entity(Entity.LOCATION);
26 | assertChunkedPart("در بازار").text("در بازار")
27 | .label(Label.LOCATION).entity(Entity.LOCATION);
28 |
29 | }
30 |
31 | @Test
32 | public void test_int_full_sentence() {
33 | assertChunkedPart("خرید در بازار در روز چهارشنبه").text("در بازار")
34 | .label(Label.LOCATION).entity(Entity.LOCATION);
35 | assertChunkedPart("امشب ملاقات با دوستم در رستوران").text("در رستوران")
36 | .label(Label.LOCATION).entity(Entity.LOCATION);
37 | assertChunkedPart("خرید لباس در خیابان دهم").text("در خیابان دهم")
38 | .label(Label.LOCATION).entity(Entity.LOCATION);
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/apex/src/main/java/com/sixthsolution/apex/nlp/parser/StandardParserBase.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.parser;
2 |
3 | import com.sixthsolution.apex.model.Event;
4 | import com.sixthsolution.apex.nlp.event.EventDetector;
5 | import com.sixthsolution.apex.nlp.ner.ChunkedPart;
6 | import com.sixthsolution.apex.nlp.ner.Chunker;
7 | import com.sixthsolution.apex.nlp.tagger.TaggedWords;
8 | import com.sixthsolution.apex.nlp.tagger.Tagger;
9 | import com.sixthsolution.apex.nlp.tokenization.Tokenizer;
10 |
11 | import org.threeten.bp.LocalDateTime;
12 |
13 | import java.util.List;
14 |
15 | /**
16 | * @author Saeed Masoumi (s-masoumi@live.com)
17 | */
18 |
19 | public abstract class StandardParserBase implements Parser {
20 |
21 | private Tokenizer tokenizer = null;
22 | private Tagger tagger = null;
23 | private Chunker chunker = null;
24 | private EventDetector eventDetector = null;
25 |
26 | @Override
27 | public void initialize() {
28 | tokenizer = provideTokenizer();
29 | tagger = provideTagger();
30 | chunker = provideChunker();
31 | eventDetector = provideEventDetector();
32 | }
33 |
34 | protected abstract Tagger provideTagger();
35 |
36 | protected abstract Tokenizer provideTokenizer();
37 |
38 | protected abstract Chunker provideChunker();
39 |
40 | protected abstract EventDetector provideEventDetector();
41 |
42 | @Override
43 | public Event parse(LocalDateTime source, String sentence) {
44 | //#1
45 | String[] tokens = tokenizer.tokenize(sentence);
46 | //#2
47 | TaggedWords taggedWords = tagger.tag(tokens);
48 | //#3
49 | List chunkedParts = chunker.chunk(taggedWords);
50 | //#4
51 | return eventDetector.detect(source, chunkedParts);
52 | }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/apex/src/test/java/com/sixthsolution/apex/nlp/test/ChunkerAssertion.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.test;
2 |
3 | import com.sixthsolution.apex.nlp.ner.Chunker;
4 | import com.sixthsolution.apex.nlp.tagger.TaggedWords;
5 | import com.sixthsolution.apex.nlp.tagger.Tagger;
6 | import com.sixthsolution.apex.nlp.tokenization.Tokenizer;
7 |
8 | /**
9 | * @author Saeed Masoumi (s-masoumi@live.com)
10 | */
11 |
12 | public class ChunkerAssertion {
13 |
14 | private static ChunkerAssertion instance = null;
15 | private final Chunker chunker;
16 | private Tokenizer tokenizer;
17 | private Tagger tagger;
18 |
19 | private ChunkerAssertion(Tokenizer tokenizer, Tagger tagger, Chunker chunker) {
20 | this.tokenizer = tokenizer;
21 | this.tagger = tagger;
22 | this.chunker = chunker;
23 | }
24 |
25 | private static ChunkerAssertion getInstance() {
26 | return instance;
27 | }
28 |
29 | public static void init(Tokenizer tokenizer, Tagger tagger, Chunker chunker) {
30 | instance = new ChunkerAssertion(tokenizer, tagger, chunker);
31 | }
32 |
33 | public static ChunkAssertion assertSentence(String word) {
34 | return getInstance().makeChunkAssertion(word);
35 | }
36 |
37 | private ChunkAssertion makeChunkAssertion(String word) {
38 | System.out.println("Label assertion for: " + word);
39 | long startTime = System.currentTimeMillis();
40 | TaggedWords taggedWords = tagger.tag(tokenizer.tokenize(word));
41 | ChunkAssertion result = new ChunkAssertion(chunker.chunk(taggedWords));
42 | System.out.println(
43 | "Chunking takes " + (System.currentTimeMillis() - startTime) + " millis.");
44 | System.out.println("------------------------------");
45 | return result;
46 | }
47 |
48 |
49 | }
50 |
--------------------------------------------------------------------------------
/dfalex/src/main/java/com/nobigsoftware/dfalex/StringReplacement.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 Matthew Timmermans
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.nobigsoftware.dfalex;
17 |
18 |
19 | /**
20 | * BackportFuncs interface that provides the replacement values for strings in a search+replace operation
21 | * of patterns found in a string.
22 | */
23 | public interface StringReplacement
24 | {
25 | /**
26 | * This will be called for each instance of each pattern found
27 | *
28 | * @param dest The replacement text for the matching substring should be written here
29 | * @param src The string being searched, or the part of the stream being searched that contains the current match
30 | * @param startPos the start index of the current match in src
31 | * @param endPos the end index of the current match in src
32 | * @return if this is >0, then it is the position in the source string at which to continue processing after
33 | * replacement. If you set this <= startPos, an IndexOutOfBoundsException will be thrown to
34 | * abort the infinite loop that would result. Almost always return 0.
35 | */
36 | int apply(SafeAppendable dest, CharSequence src, int startPos, int endPos);
37 | }
38 |
39 |
--------------------------------------------------------------------------------
/apex/src/main/java/com/sixthsolution/apex/nlp/dict/Tag.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.dict;
2 |
3 | /**
4 | * @author Saeed Masoumi (s-masoumi@live.com)
5 | * @author Rozhin Bayati
6 | */
7 | public enum Tag {
8 | NONE(97),
9 | NUMBER(98),
10 | PREPOSITION(99),
11 | RELATIVE_PREPOSITION(100),
12 | RELATIVE_SUFFIX(101),
13 | //LOCATION
14 | LOCATION_PREFIX(102),
15 | LOCATION_SUFFIX(103),
16 | LOCATION_NAME(137),
17 | //TIME
18 | TIME_PREFIX(104), //e.g. at, in ,the
19 | TIME_START_RANGE(105), //e.g. from
20 | TIME_RANGE(106),
21 | TIME_RELATIVE_PREFIX(107), //e.g. for
22 | TIME_RELATIVE(108), //e.g. morning
23 | TIME_RELATIVE_INDICATOR(109), //e.g. before,after
24 | TIME_HOUR(110), //e.g. hour
25 | TIME_MIN(111), //e.g. minutes
26 | TIME_SEC(112), //e.g. seconds
27 | TIME_MERIDIEM(113), //e.g am, pm
28 | TIME_SEPARATOR(114), //e.g :, .
29 | //DATE
30 | DATE_PREPOSITION(115),
31 | DATE_SEEKBY(116),
32 | DATE_START_RANGE(117),
33 | DATE_SUFFIX(118),
34 | DATE_DURATION_SUFFIX(119),
35 | DATE_SEPARATOR(120),
36 | WEEK_DAY(121),
37 | MONTH_NAME(122),
38 | SEASON(123),
39 | DATE_PREFIX(124),
40 | //RECURRENCE
41 | REC_WEEK_DAYS(125),
42 | NAMED_DATE(126),
43 | GLOBAL_PREPOSITION(127),
44 | DATE_RECURRENCE(128),
45 | DATE_RANGE(129),
46 | DATE_FOREVER_KEY(130),
47 | THE_PREFIX(131),
48 | DATE_BAND(132),
49 | YEAR_SEEK(133),
50 | MONTH_SEEK(134),
51 | WEEK_SEEK(135),
52 | DAY_SEEK(136),
53 | CURRENT(138);
54 |
55 | public int id;
56 |
57 | Tag(int id) {
58 | this.id = id;
59 | }
60 |
61 | @Override
62 | public String toString() {
63 | return String.valueOf((char) id);
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/persian-nlp/src/main/java/com/sixthsolution/apex/nlp/persian/filter/TimeDetectionFilter.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.persian.filter;
2 |
3 | import com.sixthsolution.apex.nlp.ner.Label;
4 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetectionFilter;
5 | import com.sixthsolution.apex.nlp.tagger.TaggedWords;
6 |
7 | import static com.sixthsolution.apex.nlp.dict.Tag.*;
8 |
9 | /**
10 | * @author Saeed Masoumi (s-masoumi@live.com)
11 | * @author Rozhin Bayati
12 | */
13 |
14 | public class TimeDetectionFilter extends ChunkDetectionFilter {
15 |
16 | @Override
17 | public boolean accept(Label label, TaggedWords taggedWords, int startIndex, int endIndex) {
18 | switch (label) {
19 | case FIXED_TIME:
20 | //ignore single number
21 | if (startIndex == endIndex - 1 &&
22 | taggedWords.get(startIndex).getTags().containsTag(NUMBER)) {
23 | return false;
24 | }
25 | //ignore date formats like 12.02.2012
26 | if (taggedWords.size() > endIndex &&
27 | taggedWords.get(endIndex - 1).getTags().containsTag(NUMBER) &&
28 | taggedWords.get(endIndex).getTags().containsTag(DATE_SEPARATOR)) {
29 | return false;
30 | }
31 | return true;
32 | case RANGE_TIME:
33 | //ignore like فروردین 13-17
34 | if (startIndex > 0 && taggedWords.get(startIndex - 1).hasTag(MONTH_NAME)) {
35 | return false;
36 | }
37 | else if (startIndex > 0 && taggedWords.get(startIndex - 1).hasTag(DATE_SEPARATOR)) { // edited by scrc
38 | return false;
39 | }
40 | return true;
41 | case RELATIVE_TIME:
42 | return true;
43 | }
44 | return false;
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/persian-nlp/src/main/java/com/sixthsolution/apex/nlp/persian/event/PersianEventBuilder.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.persian.event;
2 |
3 | import com.sixthsolution.apex.model.Event;
4 | import com.sixthsolution.apex.nlp.event.EventBuilder;
5 | import com.sixthsolution.apex.nlp.persian.calendar.tools.JalaliCalendar;
6 | import com.sixthsolution.apex.nlp.persian.model.PersianEvent;
7 | import org.threeten.bp.LocalDateTime;
8 | import org.threeten.bp.LocalTime;
9 |
10 |
11 | /**
12 | * Created by rozhin on 8/1/2017.
13 | */
14 | public class PersianEventBuilder extends EventBuilder {
15 | private LocalTime startTime = null;
16 | private LocalTime endTime = null;
17 | private JalaliCalendar startDate;
18 | private JalaliCalendar endDate;
19 | private String location = "";
20 | private PersianRecurrence recurrence =null;
21 |
22 | public void setStartDate(JalaliCalendar startDate) {
23 | this.startDate = startDate;
24 | }
25 |
26 | public void setEndDate(JalaliCalendar endDate) {
27 | this.endDate = endDate;
28 | }
29 |
30 | public void setRecurrence(PersianRecurrence recurrence){this.recurrence=recurrence;}
31 |
32 | @Override
33 | public Event build(LocalDateTime source) {
34 | JalaliCalendar jalaliCalendar=new JalaliCalendar();
35 | jalaliCalendar=jalaliCalendar.convertor(source.toLocalDate());
36 |
37 | if (startTime == null) {
38 | startTime = source.toLocalTime();
39 | }
40 | if (endTime == null) {
41 | endTime = startTime.plusHours(1);
42 | }
43 | if (startDate == null) {
44 | startDate = jalaliCalendar;
45 | }
46 | if (endDate == null) {
47 | endDate = jalaliCalendar;
48 | }
49 |
50 |
51 |
52 | return new PersianEvent("", location, startDate, endDate,endTime,startTime, false, recurrence);
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/dfalex/src/test/java/com/nobigsoftware/dfalex/ReverseFinderTest.java:
--------------------------------------------------------------------------------
1 | package com.nobigsoftware.dfalex;
2 |
3 | import java.io.PrintWriter;
4 | import java.io.StringWriter;
5 |
6 | import org.junit.Assert;
7 | import org.junit.Test;
8 |
9 | public class ReverseFinderTest extends TestBase
10 | {
11 | @Test
12 | public void test() throws Exception
13 | {
14 | DfaBuilder revbuilder = new DfaBuilder<>();
15 | for (JavaToken tok : JavaToken.values())
16 | {
17 | revbuilder.addPattern(Pattern.ALL_STRINGS.then(tok.m_pattern.getReversed()), true);
18 | }
19 | DfaState> wantstart = revbuilder.build(null);
20 | String want = _toString(wantstart);
21 |
22 | DfaBuilder builder = new DfaBuilder<>();
23 | for (JavaToken tok : JavaToken.values())
24 | {
25 | builder.addPattern(tok.m_pattern, tok);
26 | }
27 | DfaState> havestart = builder.buildReverseFinder();
28 | String have = _toString(havestart);
29 | Assert.assertEquals(want, have);
30 |
31 | //make sure we properly exclude the empty string from the reverse finder DFA
32 | builder.clear();
33 | for (JavaToken tok : JavaToken.values())
34 | {
35 | if ((tok.ordinal()&1)==0)
36 | {
37 | builder.addPattern(tok.m_pattern, tok);
38 | }
39 | else
40 | {
41 | builder.addPattern(Pattern.maybe(tok.m_pattern), tok);
42 | }
43 | }
44 | havestart = builder.buildReverseFinder();
45 | have = _toString(havestart);
46 | Assert.assertEquals(want, have);
47 | }
48 |
49 | private String _toString(DfaState> dfa)
50 | {
51 | StringWriter w = new StringWriter();
52 | m_printer.print(new PrintWriter(w), dfa);
53 | return w.toString();
54 | }
55 |
56 | }
57 |
--------------------------------------------------------------------------------
/dfalex/src/main/java/com/nobigsoftware/dfalex/PrimeSizeFinder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 Matthew Timmermans
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.nobigsoftware.dfalex;
17 |
18 | class PrimeSizeFinder
19 | {
20 | private final static int[] PRIME_SIZES = new int[] { 5, 7, 9, 11, 17, 23, 29, 37, 47, 59, 79, 101, 127, 163, 211, 269, 337, 421,
21 | 529, 661, 827, 1039, 1301, 1627, 2039, 2549, 3187, 3989, 4987, 6241, 7817, 9781, 12227, 15287, 19121, 23909, 29917, 37397,
22 | 46747, 58439, 73061, 91331, 114167, 142711, 178393, 222991, 278741, 348431, 435541, 544429, 680539, 850679,
23 | 1063351, 1329197, 1661503, 2076881, 2596123, 3245171, 4056467, 5070599, 6338257, 7922821, 9903557, 12379453,
24 | 15474317, 19342907, 24178639, 30223313, 37779149, 47223941, 59029963, 73787459, 92234327, 115292923, 144116201, 180145283,
25 | 225181637, 281477047, 351846337, 439807933, 549759953, 687199949, 858999971, 1073749979, 1342187489, 1677734381
26 | };
27 |
28 | public static int findPrimeSize(int minval)
29 | {
30 | //Linear search is fine here, since returning a size generally implies we're going
31 | //to do work proportional to that size anyway
32 | for (int i=0;i=minval)
35 | {
36 | return PRIME_SIZES[i];
37 | }
38 | }
39 | return Integer.MAX_VALUE; //Very handy that this is a Mersenne prime
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/apex/src/test/java/com/sixthsolution/apex/nlp/test/TokenizerAssertion.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.test;
2 |
3 | import com.sixthsolution.apex.nlp.tokenization.StandardTokenizer;
4 | import com.sixthsolution.apex.nlp.tokenization.Tokenizer;
5 |
6 | import java.util.Arrays;
7 | import java.util.Iterator;
8 |
9 | import static org.junit.Assert.assertArrayEquals;
10 |
11 | /**
12 | * @author Saeed Masoumi (s-masoumi@live.com)
13 | */
14 |
15 | public final class TokenizerAssertion {
16 |
17 | private static TokenizerAssertion instance = null;
18 | private Tokenizer tokenizer;
19 |
20 |
21 |
22 | private TokenizerAssertion(Tokenizer tokenizer) {
23 | this.tokenizer = tokenizer;
24 | }
25 |
26 | private static TokenizerAssertion getInstance() {
27 | if (instance == null) {
28 | instance = new TokenizerAssertion(new StandardTokenizer());
29 | }
30 | return instance;
31 | }
32 |
33 | public static void init(Tokenizer tokenizer) {
34 | getInstance().setTokenizer(tokenizer);
35 | }
36 |
37 | public static void assertTokens(String sentence, String... tokens) {
38 | System.out.println("Actual sentence: " + sentence);
39 | String[] tokenized = getInstance().tokenizer.tokenize(sentence);
40 | System.out.println("Tokenized sentence: " + toStringTokens(tokenized));
41 | assertArrayEquals(tokenized, tokens);
42 | }
43 | private void setTokenizer(Tokenizer tokenizer) {
44 | this.tokenizer = tokenizer;
45 | }
46 |
47 |
48 | private static String toStringTokens(String[] e) {
49 | Iterator itr = Arrays.asList(e).iterator();
50 | StringBuilder sb = new StringBuilder();
51 | while (itr.hasNext()) {
52 | sb.append(itr.next());
53 | if (itr.hasNext()) {
54 | sb.append(" ");
55 | }
56 | }
57 | return sb.toString();
58 | }
59 |
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/english-nlp/src/main/java/com/sixthsolution/apex/nlp/english/filter/TimeDetectionFilter.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.english.filter;
2 |
3 | import com.sixthsolution.apex.nlp.ner.Label;
4 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetectionFilter;
5 | import com.sixthsolution.apex.nlp.tagger.TaggedWords;
6 |
7 | import static com.sixthsolution.apex.nlp.dict.Tag.DATE_SEPARATOR;
8 | import static com.sixthsolution.apex.nlp.dict.Tag.MONTH_NAME;
9 | import static com.sixthsolution.apex.nlp.dict.Tag.NUMBER;
10 |
11 | /**
12 | * @author Saeed Masoumi (s-masoumi@live.com)
13 | * @author Rozhin Bayati
14 | */
15 |
16 | public class TimeDetectionFilter extends ChunkDetectionFilter {
17 |
18 | @Override
19 | public boolean accept(Label label, TaggedWords taggedWords, int startIndex, int endIndex) {
20 | switch (label) {
21 | case FIXED_TIME:
22 | //ignore single number
23 | if (startIndex == endIndex - 1 &&
24 | taggedWords.get(startIndex).getTags().containsTag(NUMBER)) {
25 | return false;
26 | }
27 | //ignore date formats like 12.02.2012
28 | if (taggedWords.size() > endIndex &&
29 | taggedWords.get(endIndex - 1).getTags().containsTag(NUMBER) &&
30 | taggedWords.get(endIndex).getTags().containsTag(DATE_SEPARATOR)) {
31 | return false;
32 | }
33 | return true;
34 | case RANGE_TIME:
35 | //ignore like Nov 13-17
36 | if (startIndex > 0 && taggedWords.get(startIndex - 1).hasTag(MONTH_NAME)) {
37 | return false;
38 | }
39 | else if (startIndex > 0 && taggedWords.get(startIndex - 1).hasTag(DATE_SEPARATOR)) { // edited by scrc
40 | return false;
41 | }
42 | return true;
43 | case RELATIVE_TIME:
44 | return true;
45 | }
46 | return false;
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/apex/src/main/java/com/sixthsolution/apex/nlp/event/EventBuilder.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.event;
2 |
3 | import com.sixthsolution.apex.model.Event;
4 |
5 | import com.sixthsolution.apex.model.Recurrence;
6 | import org.threeten.bp.LocalDate;
7 | import org.threeten.bp.LocalDateTime;
8 | import org.threeten.bp.LocalTime;
9 |
10 |
11 | /**
12 | * @author Saeed Masoumi (s-masoumi@live.com)
13 | * @author Rozhin Bayati
14 | */
15 |
16 | public class EventBuilder {
17 |
18 | private LocalTime startTime = null;
19 | private LocalTime endTime = null;
20 | private LocalDate startDate;
21 | private LocalDate endDate;
22 | private String location = "";
23 | private Recurrence recurrence =null;
24 |
25 | public void setStartTime(LocalTime startTime) {
26 | this.startTime = startTime;
27 | }
28 |
29 | public void setEndTime(LocalTime endTime) {
30 | this.endTime = endTime;
31 | }
32 |
33 | public void setStartDate(LocalDate startDate) {
34 | this.startDate = startDate;
35 | }
36 |
37 | public void setEndDate(LocalDate endDate) {
38 | this.endDate = endDate;
39 | }
40 |
41 | public void setLocation(String location) {
42 | this.location = location;
43 | }
44 |
45 | public void setReccurence(Recurrence reccurence ){this.recurrence=reccurence;}
46 |
47 | public Event build(LocalDateTime source) {
48 | if (startTime == null) {
49 | startTime = source.toLocalTime();
50 | }
51 | if (endTime == null) {
52 | endTime = startTime.plusHours(1);
53 | }
54 | if (startDate == null) {
55 | startDate = source.toLocalDate();
56 | }
57 | if (endDate == null) {
58 | endDate = startDate;
59 | }
60 |
61 |
62 | LocalDateTime startDateTime = LocalDateTime.of(startDate, startTime);
63 | LocalDateTime endDateTime = LocalDateTime.of(endDate, endTime);
64 |
65 | return new Event("", location, startDateTime, endDateTime, false, recurrence);
66 | }
67 |
68 | }
--------------------------------------------------------------------------------
/english-nlp/src/main/java/com/sixthsolution/apex/nlp/english/LocationDetector.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.english;
2 |
3 | import com.nobigsoftware.dfalex.Pattern;
4 | import com.sixthsolution.apex.nlp.dict.Tag;
5 | import com.sixthsolution.apex.nlp.english.filter.LocationDetectionFilter;
6 | import com.sixthsolution.apex.nlp.ner.Entity;
7 | import com.sixthsolution.apex.nlp.ner.Label;
8 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetectionFilter;
9 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetector;
10 | import com.sixthsolution.apex.nlp.util.Pair;
11 |
12 | import java.util.Arrays;
13 | import java.util.List;
14 |
15 | import static com.nobigsoftware.dfalex.Pattern.match;
16 | import static com.nobigsoftware.dfalex.Pattern.repeat;
17 | import static com.sixthsolution.apex.nlp.ner.Entity.LOCATION;
18 |
19 | /**
20 | * @author Saeed Masoumi (s-masoumi@live.com)
21 | */
22 |
23 | public class LocationDetector extends ChunkDetector {
24 |
25 | /**
26 | * @return at Mall, at home , ...
27 | */
28 | private static Pattern location() {
29 | return match(Tag.LOCATION_PREFIX.toString()).thenRepeat(Tag.NONE.toString())
30 | .thenMaybe(Tag.LOCATION_SUFFIX.toString());
31 | }
32 |
33 | /**
34 | * @return at 123 st
35 | */
36 | private static Pattern address_location() {
37 | return match(Tag.LOCATION_PREFIX.toString()).thenMaybe(repeat(Tag.NONE.toString()))
38 | .then(Tag.NUMBER.toString()).then(Tag.LOCATION_SUFFIX.toString());
39 | }
40 |
41 | @Override
42 | protected List> getPatterns() {
43 | return Arrays.asList(
44 | newPattern(Label.LOCATION, location()),
45 | newPattern(Label.LOCATION, address_location())
46 | );
47 | }
48 |
49 | @Override
50 | protected List extends ChunkDetectionFilter> getFilters() {
51 | return Arrays.asList(new LocationDetectionFilter());
52 | }
53 |
54 | @Override
55 | protected Entity getEntity() {
56 | return LOCATION;
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/dfalex/src/main/java/com/nobigsoftware/dfalex/BitUtils.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 Matthew Timmermans
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.nobigsoftware.dfalex;
17 |
18 | class BitUtils
19 | {
20 | private static final int[] DEBRUIJN_WINDOW_TO_BIT_POSITION=
21 | {
22 | -1, 0, 1, 1, 28, 28, 2, 2, 29, 29, 14, 14, 24, 24, 3, 3,
23 | 30, 30, 22, 22, 20, 20, 15, 15, 25, 25, 17, 17, 4, 4, 8, 8,
24 | 31, 31, 27, 27, 13, 13, 23, 23, 21, 21, 19, 19, 16, 16, 7, 7,
25 | 26, 26, 12, 12, 18, 18, 6, 6, 11, 11, 5, 5, 10, 10, 9, 9
26 | };
27 |
28 | /**
29 | * Get the lowest bit set in X.
30 | *
31 | * @param x integer to test
32 | * @return smallest bit=1<>> 26)&63];
48 | }
49 |
50 | /**
51 | * Turn off the lowest bit in in integer.
52 | *
53 | * @param x an integer
54 | * @return x - lowBit(x);
55 | */
56 | public static int turnOffLowBit(int x)
57 | {
58 | return x & (x-1);
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/apex/src/main/java/com/sixthsolution/apex/nlp/event/StandardEventDetector.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.event;
2 |
3 | import com.sixthsolution.apex.model.Event;
4 | import com.sixthsolution.apex.nlp.ner.ChunkedPart;
5 |
6 | import org.threeten.bp.LocalDateTime;
7 |
8 | import java.util.List;
9 |
10 | /**
11 | * @author Saeed Masoumi (s-masoumi@live.com)
12 | * @author Rozhin Bayati
13 | */
14 |
15 | public class StandardEventDetector implements EventDetector {
16 |
17 | protected Extractor timeExtractor;
18 | protected Extractor dateExtractor;
19 | protected Extractor locationExtractor;
20 |
21 | public StandardEventDetector() {
22 | timeExtractor = provideTimeExtractor();
23 | dateExtractor = provideDateExtractor();
24 | locationExtractor = provideLocationExtractor();
25 | }
26 |
27 | @Override
28 | public Event detect(LocalDateTime source, List chunkedParts) {
29 | EventBuilder builder = new EventBuilder();
30 | for (ChunkedPart part : chunkedParts) {
31 | switch (part.getEntity()) {
32 | case TIME:
33 | timeExtractor.extract(builder, source, part);
34 | break;
35 | case DATE:
36 | dateExtractor.extract(builder, source, part);
37 | break;
38 | case LOCATION:
39 | locationExtractor.extract(builder, source, part);
40 | break;
41 | }
42 | }
43 | return builder.build(source);
44 | }
45 |
46 | public StandardEventDetector(Extractor DateExtractor) {
47 | timeExtractor = provideTimeExtractor();
48 | dateExtractor = DateExtractor;
49 | locationExtractor = provideLocationExtractor();
50 | }
51 |
52 | protected Extractor provideTimeExtractor() {
53 | return new StandardTimeExtractor();
54 | }
55 |
56 | protected Extractor provideDateExtractor() {
57 | return new StandardDateExtractor();
58 | }
59 |
60 | protected Extractor provideLocationExtractor() {
61 | return new StandardLocationExtractor();
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/persian-nlp/src/main/java/com/sixthsolution/apex/nlp/persian/PersianLocationDetector.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.persian;
2 | import com.nobigsoftware.dfalex.Pattern;
3 | import com.sixthsolution.apex.nlp.dict.Tag;
4 | import com.sixthsolution.apex.nlp.ner.Entity;
5 | import com.sixthsolution.apex.nlp.ner.Label;
6 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetectionFilter;
7 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetector;
8 | import com.sixthsolution.apex.nlp.persian.filter.LocationDetectionFilter;
9 | import com.sixthsolution.apex.nlp.util.Pair;
10 |
11 | import java.util.Arrays;
12 | import java.util.List;
13 |
14 | import static com.nobigsoftware.dfalex.Pattern.anyOf;
15 | import static com.nobigsoftware.dfalex.Pattern.match;
16 | import static com.nobigsoftware.dfalex.Pattern.repeat;
17 | import static com.sixthsolution.apex.nlp.ner.Entity.LOCATION;
18 |
19 | /**
20 | * Created by rozhin on 7/30/2017.
21 | */
22 |
23 | public class PersianLocationDetector extends ChunkDetector {
24 |
25 | /**
26 | * @return در_بازار
27 | */
28 | private static Pattern location() {
29 | return match(Tag.LOCATION_PREFIX.toString()).then(anyOf(address_location(),location_name()));
30 | }
31 |
32 | private static Pattern location_name(){
33 | return match(Tag.LOCATION_NAME.toString());
34 | }
35 | /**
36 | * @return at 123 st
37 | */
38 | private static Pattern address_location() {
39 | return match(Tag.LOCATION_SUFFIX.toString()).thenMaybe(repeat(Tag.NONE.toString()))
40 | .then(anyOf(Tag.NUMBER.toString(),Tag.NONE.toString()));
41 | }
42 |
43 | @Override
44 | protected List> getPatterns() {
45 | return Arrays.asList(
46 | newPattern(Label.LOCATION, location()),
47 | newPattern(Label.LOCATION, address_location())
48 | );
49 | }
50 |
51 | @Override
52 | protected List extends ChunkDetectionFilter> getFilters() {
53 | return Arrays.asList(new LocationDetectionFilter());
54 | }
55 |
56 | @Override
57 | protected Entity getEntity() {
58 | return LOCATION;
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/persian-nlp/src/main/java/com/sixthsolution/apex/nlp/persian/event/PersianRecurrence.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.persian.event;
2 |
3 | /**
4 | * Created by rozhin on 8/1/2017.
5 | */
6 | import com.sixthsolution.apex.model.Frequency;
7 | import com.sixthsolution.apex.model.WeekDay;
8 | import com.sixthsolution.apex.nlp.persian.calendar.tools.JalaliCalendar;
9 |
10 | import java.util.List;
11 |
12 | /**
13 | * Represents a recurring event.
14 | *
15 | * @author Saeed Masoumi (saeed@6thsolution.com)
16 | * @author Rozhin Bayati
17 | */
18 | public class PersianRecurrence {
19 |
20 | private Frequency frequency = Frequency.DAILY;
21 |
22 | /**
23 | * Specifies how often the event should be repeated.
24 | */
25 | private int interval = 1;
26 |
27 | /**
28 | * The date or date-time until which the event should be repeated.
29 | */
30 | private JalaliCalendar until = null;
31 |
32 | private boolean forever = false;
33 | /**
34 | * Days of the week on which the event should be repeated
35 | */
36 | private List byDays;
37 |
38 | public PersianRecurrence(Frequency frequency, int interval,JalaliCalendar until, boolean forever,
39 | List byDays) {
40 | this.frequency = frequency;
41 | this.interval = interval;
42 | this.until = until;
43 | this.forever = forever;
44 | this.byDays = byDays;
45 | }
46 |
47 | public Frequency frequency() {
48 | return frequency;
49 | }
50 |
51 | public int interval() {
52 | return interval;
53 | }
54 |
55 | //TODO @nullable
56 | public JalaliCalendar until() {
57 | return until;
58 | }
59 |
60 | public boolean isForever() {
61 | return forever;
62 | }
63 |
64 | public List byDays() {
65 | return byDays;
66 | }
67 |
68 | @Override
69 | public String toString() {
70 | return "Recurrence{" +
71 | "frequency=" + frequency +
72 | ", interval=" + interval +
73 | ", until=" + until +
74 | ", forever=" + forever +
75 | ", byDays=" + byDays +
76 | '}';
77 | }
78 | }
79 |
80 |
--------------------------------------------------------------------------------
/persian-nlp/src/main/java/com/sixthsolution/apex/nlp/persian/model/PersianEvent.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.persian.model;
2 |
3 | import com.sixthsolution.apex.model.Event;
4 | import com.sixthsolution.apex.nlp.persian.calendar.tools.JalaliCalendar;
5 | import com.sixthsolution.apex.nlp.persian.event.PersianRecurrence;
6 | import org.threeten.bp.LocalTime;
7 |
8 | /**
9 | * Created by rozhin on 8/1/2017.
10 | */
11 | public class PersianEvent extends Event{
12 |
13 | private String title = "";
14 | private String location = "";
15 | private JalaliCalendar startDate = null;
16 | private JalaliCalendar endDate= null;
17 | private LocalTime startTime=null;
18 | private LocalTime endTime=null;
19 | private boolean isAllDay = false;
20 | private PersianRecurrence recurrence = null;
21 |
22 |
23 | public PersianEvent(String title, String location, JalaliCalendar startDateTime,
24 | JalaliCalendar endDateTime,LocalTime endtime,LocalTime starttime,
25 | boolean isAllDay, PersianRecurrence recurrence) {
26 | this.title = title;
27 | this.location = location;
28 | this.startDate = startDateTime;
29 | this.endDate = endDateTime;
30 | this.endTime=endtime;
31 | this.startTime=starttime;
32 | this.isAllDay = isAllDay;
33 | this.recurrence = recurrence;
34 | }
35 |
36 |
37 | public JalaliCalendar jalaliStart() {
38 | return startDate;
39 | }
40 |
41 | public JalaliCalendar jalaliEnd() {
42 | return endDate;
43 | }
44 | public LocalTime jalaliTimeStart(){return startTime;}
45 | public LocalTime jalaliTimeEnd(){return endTime;}
46 | public PersianRecurrence persianRecurrence(){return persianRecurrence();}
47 |
48 | @Override
49 | public String toString() {
50 | return "Event{" +
51 | "title='" + title + '\'' +
52 | ", startDate=" + startDate +
53 | ", endDateTime=" + endDate +
54 | ",startTime="+startTime+
55 | ",endTime="+endTime+
56 | ", isAllDay=" + isAllDay +
57 | ", recurrence=" + recurrence +
58 | '}';
59 | }
60 |
61 |
62 | }
63 |
--------------------------------------------------------------------------------
/apex/src/main/java/com/sixthsolution/apex/nlp/dict/Tags.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.dict;
2 |
3 | import com.sixthsolution.apex.nlp.ner.Entity;
4 |
5 | import java.util.ArrayList;
6 | import java.util.Arrays;
7 | import java.util.Collection;
8 | import java.util.Iterator;
9 |
10 | /**
11 | * @author Saeed Masoumi (s-masoumi@live.com)
12 | * @author Rozhin Bayati
13 | */
14 |
15 | public class Tags extends ArrayList {
16 |
17 | public boolean containsTag(Tag tag) {
18 | Iterator iterator = iterator();
19 | while (iterator.hasNext()) {
20 | TagValue next = iterator.next();
21 | if (next.tag.equals(tag)) {
22 | return true;
23 | }
24 | }
25 | return false;
26 | }
27 |
28 | public TagValue containsTagByValue(Tag tag) {
29 | Iterator iterator = iterator();
30 | while (iterator.hasNext()) {
31 | TagValue next = iterator.next();
32 | if (next.tag.equals(tag)) {
33 | return next;
34 | }
35 | }
36 | return null;
37 | }
38 |
39 | public boolean containsTag(Tag... tags) {
40 | return containsTag(Arrays.asList(tags));
41 | }
42 |
43 | public boolean containsTag(Collection tags) {
44 | Iterator iterator = iterator();
45 | while (iterator.hasNext()) {
46 | TagValue next = iterator.next();
47 | for (Tag tag : tags)
48 | if (next.tag.equals(tag)) {
49 | return true;
50 | }
51 | }
52 | return false;
53 | }
54 |
55 | public boolean containsTagName(int tag) {
56 | for (TagValue tagValue : this) {
57 | if (tagValue.tag.id == tag) {
58 | return true;
59 | }
60 | }
61 | return false;
62 | }
63 |
64 | public TagValue getTagByEntity(Entity entity) {
65 | Iterator iterator = iterator();
66 | while (iterator.hasNext()) {
67 | TagValue next = iterator.next();
68 | if (next.entity == entity) {
69 | return next;
70 | }
71 | }
72 | return null;
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/dfalex/README.md:
--------------------------------------------------------------------------------
1 | # dfalex backport
2 |
3 | Scanning / Lexical Analysis Without All The Fuss
4 | ================================================
5 |
6 | Sometimes you need faster and more robust matching than you can get out of Java regular expressions. Maybe they're too slow for you, or you get stack overflows when you match things that are too long, or maybe you want to search for many patterns simultaneously. There plenty of lexical analysis tools you can use, but they invovle a lot of fuss. They make you write specifications in a domain-specific language, often mixed with code, and then generate new java code for a scanner that you have to incorporate into your build and use in pretty specific ways.
7 |
8 | DFALex provides that powerful matching capability without all the fuss. It will build you a deterministic finite automaton (DFA, googlable) for a matching/finding multiple patterns in strings simultaneously, which you can then use with various matcher classes to perform searching or scanning operations.
9 |
10 | Unlike other tools which use DFAs internally, but only build scanners with them, DFALex provides you with the actual DFA in an easy-to-use form. Yes, you can use it in standard scanners, but you can also use it in other ways that don't fit that mold.
11 |
12 | Start Here:
13 | -----------
14 |
15 | * **DfaBuilder** for building DFAs
16 |
17 | * **Pattern** and **CharRange** for specifying patterns to match
18 |
19 | * **StringMatcher** for using your DFAs to find patterns in strings
20 |
21 | Requirements
22 | ------------
23 |
24 | DFALex needs Java 7 or better. No special libraries are required.
25 | If you want to run the tests, you'll need JUnit4.
26 |
27 | About
28 | -----
29 |
30 | DFALex is written by Matt Timmermans, and is all new code. It's written in Java first, with too much attention paid to performance.
31 |
32 | DFAs are generated from NFAs with a starndard powerset construction, and minimized used a fast hash-based variant of Hopcroft's algorithm.
33 |
34 | This project was started because lexical analysis is no big deal. You should be able to just do it, without having to convince your team to add a new build step to generate code from a domain specific language. This way you can use it for lots of little jobs, instead of just big, important ones.
35 |
--------------------------------------------------------------------------------
/dfalex/src/main/java/com/nobigsoftware/dfalex/SerializableDfa.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 Matthew Timmermans
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.nobigsoftware.dfalex;
17 |
18 | import java.io.Serializable;
19 | import java.util.ArrayList;
20 | import java.util.List;
21 |
22 | class SerializableDfa implements Serializable
23 | {
24 | private static final long serialVersionUID = 1L;
25 |
26 | private final ArrayList> m_dfaStates;
27 | private final int[] m_startStateNumbers;
28 |
29 | private transient List> m_startStatesMemo;
30 |
31 | public SerializableDfa(RawDfa rawDfa)
32 | {
33 | final List origStates = rawDfa.getStates();
34 | final int len = origStates.size();
35 | m_dfaStates = new ArrayList<>(len);
36 | m_startStateNumbers = rawDfa.getStartStates();
37 | while(m_dfaStates.size() < len)
38 | {
39 | m_dfaStates.add(new PackedTreeDfaPlaceholder<>(rawDfa, m_dfaStates.size()));
40 | }
41 | }
42 |
43 | public synchronized List> getStartStates()
44 | {
45 | if (m_startStatesMemo == null)
46 | {
47 | final int len = m_dfaStates.size();
48 | for (int i=0;i(m_startStateNumbers.length);
57 | for (int startState : m_startStateNumbers)
58 | {
59 | m_startStatesMemo.add(m_dfaStates.get(startState).resolvePlaceholder());
60 | }
61 | }
62 | return m_startStatesMemo;
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/dfalex/src/main/java/com/nobigsoftware/util/BuilderCache.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 Matthew Timmermans
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.nobigsoftware.util;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * Implementations of this interface can cache serializable objects that
22 | * can be used to bypass expensive building operations by providing
23 | * pre-built objects
24 | */
25 | public interface BuilderCache
26 | {
27 | /**
28 | * Get a cached item.
29 | *
30 | * @param key The key used to identify the item. The key uniquely identifies all
31 | * of the source information that will go into building the item if this call fails
32 | * to retrieve a cached version. Typically this will be a cryptographic hash of
33 | * the serialized form of that information.
34 | *
35 | * @return the item that was previously cached under the key, or null if no such item
36 | * can be retrieved.
37 | */
38 | Serializable getCachedItem(String key);
39 |
40 | /**
41 | * This method may be called when an item is built, providing an opportunity to
42 | * cache it.
43 | *
44 | * @param key The key that will be used to identify the item in future calls to {@link #getCachedItem(String)}.
45 | * Only letters, digits, and underscores are valid in keys, and key length is limited to 32 characters.
46 | * The behaviour of this method for invalid keys is undefined.
47 | *
48 | * Keys that differ only by case may or may not be considered equal by this class.
49 | * @param item The item to cache, if desired
50 | */
51 | void maybeCacheItem(String key, Serializable item);
52 |
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/apex/src/main/java/com/sixthsolution/apex/nlp/util/Triple.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.util;
2 |
3 | /**
4 | * Adopted from http://facebook.github.io/jcommon/collections/jacoco/com.facebook.collections/Triple.java.html
5 | *
6 | * @author Saeed Masoumi (s-masoumi@live.com)
7 | */
8 |
9 | public class Triple {
10 | private final First first;
11 | private final Second second;
12 | private final Third third;
13 |
14 | private volatile String toStringResult;
15 |
16 | public Triple(First first, Second second, Third third) {
17 | this.first = first;
18 | this.second = second;
19 | this.third = third;
20 | }
21 |
22 | public First getFirst() {
23 | return first;
24 | }
25 |
26 | public Second getSecond() {
27 | return second;
28 | }
29 |
30 | public Third getThird() {
31 | return third;
32 | }
33 |
34 | @Override
35 | public boolean equals(Object o) {
36 | if (this == o) {
37 | return true;
38 | }
39 |
40 | if (o == null || getClass() != o.getClass()) {
41 | return false;
42 | }
43 |
44 | final Triple triple = (Triple) o;
45 |
46 | if (first != null ? !first.equals(triple.first) : triple.first != null) {
47 | return false;
48 | }
49 |
50 | if (second != null ? !second.equals(triple.second) : triple.second != null) {
51 | return false;
52 | }
53 |
54 | if (third != null ? !third.equals(triple.third) : triple.third != null) {
55 | return false;
56 | }
57 |
58 | return true;
59 | }
60 |
61 | @Override
62 | public int hashCode() {
63 | int result = first != null ? first.hashCode() : 0;
64 |
65 | result = 31 * result + (second != null ? second.hashCode() : 0);
66 | result = 31 * result + (third != null ? third.hashCode() : 0);
67 |
68 | return result;
69 | }
70 |
71 | @Override
72 | public String toString() {
73 | if (toStringResult == null) {
74 | toStringResult = "Triple{" +
75 | "first=" + first +
76 | ", second=" + second +
77 | ", third=" + third +
78 | '}';
79 | }
80 |
81 | return toStringResult;
82 | }
83 | }
--------------------------------------------------------------------------------
/apex/src/test/java/com/sixthsolution/apex/nlp/test/ChunkAssertion.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.test;
2 |
3 | import com.sixthsolution.apex.nlp.ner.Label;
4 | import com.sixthsolution.apex.nlp.ner.ChunkedPart;
5 |
6 | import java.util.List;
7 |
8 | import static com.sixthsolution.apex.nlp.ner.Label.DATE;
9 | import static com.sixthsolution.apex.nlp.ner.Label.LOCATION;
10 | import static com.sixthsolution.apex.nlp.ner.Label.TIME;
11 | import static org.junit.Assert.assertEquals;
12 | import static org.junit.Assert.assertNotNull;
13 | import static org.junit.Assert.assertNull;
14 |
15 | /**
16 | * @author Saeed Masoumi (s-masoumi@live.com)
17 | */
18 |
19 | public class ChunkAssertion {
20 |
21 | private final List chunkedParts;
22 |
23 | public ChunkAssertion(List chunks) {
24 | this.chunkedParts = chunks;
25 | }
26 |
27 | public ChunkAssertion hasLocationChunk(String loc) {
28 | assertChunk(loc, LOCATION);
29 | return this;
30 | }
31 |
32 | public ChunkAssertion hasNoLocationChunk() {
33 | assertEmpty(LOCATION);
34 | return this;
35 | }
36 |
37 | public ChunkAssertion hasTimeChunk(String time) {
38 | assertChunk(time, TIME);
39 | return this;
40 | }
41 |
42 | public ChunkAssertion hasNoTimeChunk() {
43 | assertEmpty(TIME);
44 | return this;
45 | }
46 |
47 | public ChunkAssertion hasDateChunk(String date) {
48 | assertChunk(date, DATE);
49 | return this;
50 | }
51 | private ChunkedPart getChunkedPartByType(Label type) {
52 | for (ChunkedPart part : chunkedParts) {
53 | if (part.getLabel().equals(type)) {
54 | return part;
55 | }
56 | }
57 | return null;
58 | }
59 |
60 | private void assertEmpty(Label type) {
61 | assertNull(getChunkedPartByType(type));
62 | }
63 |
64 | private void assertChunk(String phrase, Label type) {
65 | ChunkedPart chunk = getChunkedPartByType(type);
66 | assertNotNull(chunk);
67 | assertEquals(phrase, chunk.toStringTaggedWords());
68 | }
69 |
70 | @Override
71 | public String toString() {
72 | return chunkedParts.toString();
73 | }
74 |
75 | public void print() {
76 | System.out.println(toString());
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/persian-nlp/src/test/java/com/sixthsolution/apex/nlp/persian/test/PersianDTTest.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.persian.test;
2 |
3 | /**
4 | * Created by rozhin on 7/30/2017.
5 | */
6 |
7 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetector;
8 |
9 | import com.sixthsolution.apex.nlp.persian.PersianTimeDetector;
10 | import org.junit.Test;
11 |
12 | import static com.sixthsolution.apex.nlp.ner.Entity.TIME;
13 | import static com.sixthsolution.apex.nlp.ner.Label.FIXED_TIME;
14 | import static com.sixthsolution.apex.nlp.ner.Label.RANGE_TIME;
15 | import static com.sixthsolution.apex.nlp.ner.Label.RELATIVE_TIME;
16 | import static com.sixthsolution.apex.nlp.test.ChunkDetectorAssertion.assertChunkedPart;
17 |
18 | /**
19 | * @author Saeed Masoumi (s-masoumi@live.com)
20 | */
21 |
22 | public class PersianDTTest extends PersianDetectorTest {
23 |
24 | @Test
25 | public void test_fixed_time() {
26 | assertChunkedPart("ساعت 10").text("ساعت 10").label(FIXED_TIME).entity(TIME);
27 | assertChunkedPart("ساعت 10 ق.ظ").text("ساعت 10 ق.ظ").label(FIXED_TIME).entity(TIME);
28 | assertChunkedPart("ساعت 10 صبح").text("ساعت 10 صبح").label(FIXED_TIME).entity(TIME);
29 | assertChunkedPart("ساعت 10 قبل_از_ظهر").text("ساعت 10 قبل_از_ظهر").label(FIXED_TIME).entity(TIME);
30 | assertChunkedPart("عصر").text("عصر").label(FIXED_TIME).entity(TIME);
31 | assertChunkedPart("7 ب.ظ").text("7 ب.ظ").label(FIXED_TIME).entity(TIME);
32 | assertChunkedPart("در 23:20").text("در 23 : 20").label(FIXED_TIME).entity(TIME);
33 | assertChunkedPart("در 23:20 ب.ظ").text("در 23 : 20 ب.ظ").label(FIXED_TIME).entity(TIME);
34 | assertChunkedPart("8.20").text("8 . 20").label(FIXED_TIME).entity(TIME);
35 | assertChunkedPart("ساعت چهار").text("ساعت چهار").label(FIXED_TIME).entity(TIME);
36 | }
37 |
38 | @Test
39 | public void test_invalid_fixed_time(){
40 | assertChunkedPart("7").noDetection();
41 | assertChunkedPart("12.2.2016").noDetection();
42 | }
43 |
44 | @Test
45 | public void test_range_time() {
46 | assertChunkedPart("از صبح تا عصر").text("از صبح تا عصر")
47 | .label(RANGE_TIME).entity(TIME);
48 |
49 | }
50 |
51 |
52 |
53 | @Override
54 | protected ChunkDetector provideDetector() {
55 | return new PersianTimeDetector();
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/dfalex/src/test/java/com/nobigsoftware/dfalex/TestBase.java:
--------------------------------------------------------------------------------
1 | package com.nobigsoftware.dfalex;
2 |
3 | import org.junit.Assert;
4 |
5 | import java.io.InputStream;
6 | import java.io.InputStreamReader;
7 | import java.io.PrintWriter;
8 | import java.io.StringWriter;
9 | import java.nio.charset.Charset;
10 | import java.util.ArrayDeque;
11 | import java.util.HashSet;
12 |
13 | public class TestBase {
14 | final PrettyPrinter m_printer = new PrettyPrinter();
15 |
16 | int _countStates(DfaState>... starts) {
17 | ArrayDeque> togo = new ArrayDeque<>();
18 | HashSet> checkSet = new HashSet<>();
19 | for (DfaState> start : starts) {
20 | if (checkSet.add(start)) {
21 | togo.add(start);
22 | }
23 | }
24 | while (!togo.isEmpty()) {
25 | DfaState> scanst = togo.removeFirst();
26 | scanst.enumerateTransitions((c1, c2, newstate) -> {
27 | if (checkSet.add(newstate)) {
28 | togo.add(newstate);
29 | }
30 | });
31 | }
32 | return checkSet.size();
33 | }
34 |
35 | void _checkDfa(DfaState> start, String resource, boolean doStdout) throws Exception {
36 | String have;
37 | {
38 | StringWriter w = new StringWriter();
39 | m_printer.print(new PrintWriter(w), start);
40 | have = w.toString();
41 | }
42 | if (doStdout) {
43 | System.out.print(have);
44 | System.out.flush();
45 | }
46 | String want = _readResource(resource);
47 | Assert.assertEquals(want, have);
48 | }
49 |
50 | String _readResource(String resource) throws Exception {
51 | InputStream instream = getClass().getClassLoader().getResourceAsStream(resource);
52 | try {
53 | InputStreamReader inreader = new InputStreamReader(instream, Charset.forName("UTF-8"));
54 | StringBuilder sb = new StringBuilder();
55 | char[] buf = new char[1024];
56 | for (; ; ) {
57 | int rlen = inreader.read(buf);
58 | if (rlen <= 0) {
59 | break;
60 | }
61 | sb.append(buf, 0, rlen);
62 | }
63 | return sb.toString();
64 | } finally {
65 | instream.close();
66 | }
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/dfalex/src/main/java/com/nobigsoftware/dfalex/NfaTransition.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 Matthew Timmermans
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.nobigsoftware.dfalex;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * A transition in a {@link Nfa}
22 | *
23 | * Instances of this class are immutable
24 | */
25 | public final class NfaTransition implements Serializable
26 | {
27 | private static final long serialVersionUID = 1L;
28 |
29 | /**
30 | * The first character that triggers this transition
31 | */
32 | public final char m_firstChar;
33 | /**
34 | * The last character that triggers this transition
35 | */
36 | public final char m_lastChar;
37 | /**
38 | * The number of the target state of this transition
39 | */
40 | public final int m_stateNum;
41 |
42 | /**
43 | * Create a new immutable NFA Transition.
44 | *
45 | * @param firstChar value for {@link #m_firstChar}
46 | * @param lastChar value for {@link #m_lastChar}
47 | * @param stateNum value for {@link #m_stateNum}
48 | */
49 | public NfaTransition(char firstChar, char lastChar, int stateNum)
50 | {
51 | super();
52 | m_firstChar = firstChar;
53 | m_lastChar = lastChar;
54 | m_stateNum = stateNum;
55 | }
56 |
57 | @Override
58 | public boolean equals(Object arg)
59 | {
60 | if (arg instanceof NfaTransition)
61 | {
62 | NfaTransition r = (NfaTransition)arg;
63 | return (r.m_firstChar == m_firstChar && r.m_lastChar==m_lastChar && r.m_stateNum == m_stateNum);
64 | }
65 | return false;
66 | }
67 |
68 | @Override
69 | public int hashCode()
70 | {
71 | int hash = (int)2166136261L;
72 | hash = (hash ^ (int)m_firstChar)*16777619;
73 | hash = (hash ^ (int)m_lastChar)*16777619;
74 | hash = (hash ^ (int)m_stateNum)*16777619;
75 | return hash ^ (hash>>16);
76 | }
77 | }
--------------------------------------------------------------------------------
/apex/src/main/java/com/sixthsolution/apex/model/Recurrence.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016 6thSolution
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.sixthsolution.apex.model;
18 |
19 | import org.threeten.bp.LocalDateTime;
20 |
21 | import java.util.List;
22 |
23 | /**
24 | * Represents a recurring event.
25 | *
26 | * @author Saeed Masoumi (saeed@6thsolution.com)
27 | * @author Rozhin Bayati
28 | */
29 | public class Recurrence {
30 |
31 | private Frequency frequency = Frequency.DAILY;
32 |
33 | /**
34 | * Specifies how often the event should be repeated.
35 | */
36 | private int interval = 1;
37 |
38 | /**
39 | * The date or date-time until which the event should be repeated.
40 | */
41 | private LocalDateTime until = null;
42 |
43 | private boolean forever = false;
44 | /**
45 | * Days of the week on which the event should be repeated
46 | */
47 | private List byDays;
48 |
49 | public Recurrence(Frequency frequency, int interval, LocalDateTime until, boolean forever,
50 | List byDays) {
51 | this.frequency = frequency;
52 | this.interval = interval;
53 | this.until = until;
54 | this.forever = forever;
55 | this.byDays = byDays;
56 | }
57 |
58 | public Frequency frequency() {
59 | return frequency;
60 | }
61 |
62 | public int interval() {
63 | return interval;
64 | }
65 |
66 | //TODO @nullable
67 | public LocalDateTime until() {
68 | return until;
69 | }
70 |
71 | public boolean isForever() {
72 | return forever;
73 | }
74 |
75 | public List byDays() {
76 | return byDays;
77 | }
78 |
79 | @Override
80 | public String toString() {
81 | return "Recurrence{" +
82 | "frequency=" + frequency +
83 | ", interval=" + interval +
84 | ", until=" + until +
85 | ", forever=" + forever +
86 | ", byDays=" + byDays +
87 | '}';
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/dfalex/src/main/java/com/nobigsoftware/dfalex/IntRangeClosureQueue.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 Matthew Timmermans
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.nobigsoftware.dfalex;
17 |
18 | /**
19 | * Closure queue containing integers in a limited range.
20 | */
21 | class IntRangeClosureQueue
22 | {
23 | final int[] m_bitmask;
24 | final int[] m_queue;
25 | int m_readpos, m_writepos;
26 |
27 | /**
28 | * Create a new IntRangeClosureQueue.
29 | *
30 | * The queue can contain integer in [0,range)
31 | *
32 | * @param range
33 | */
34 | public IntRangeClosureQueue(int range)
35 | {
36 | m_bitmask = new int[(range+31)>>5];
37 | m_queue = new int[m_bitmask.length*32 + 1];
38 | }
39 |
40 | /**
41 | * Add an integer to the tail of the queue if it's not already present
42 | *
43 | * @param val integer to add
44 | * @return true if the integer was added to the queue, or false
45 | * if it was not added, because it was already in the queue
46 | */
47 | public boolean add(int val)
48 | {
49 | int i = val>>5;
50 | int bit = 1<<(val&31);
51 | int oldbits = m_bitmask[i];
52 | if ((oldbits & bit)==0)
53 | {
54 | m_bitmask[i] = oldbits|bit;
55 | m_queue[m_writepos] = val;
56 | if (++m_writepos >= m_queue.length)
57 | {
58 | m_writepos = 0;
59 | }
60 | assert(m_writepos != m_readpos);
61 | return true;
62 | }
63 | else
64 | {
65 | return false;
66 | }
67 | }
68 |
69 | /**
70 | * Remove an integer from the head of the queue, if it's non-empty
71 | *
72 | * @return the integer removed from the head of the queue, or -1 if the
73 | * queue was empty.
74 | */
75 | public int poll()
76 | {
77 | if (m_readpos == m_writepos)
78 | {
79 | return -1;
80 | }
81 | int val = m_queue[m_readpos];
82 | if (++m_readpos >= m_queue.length)
83 | {
84 | m_readpos = 0;
85 | }
86 | int i = val>>5;
87 | int bit = 1<<(val&31);
88 | assert((m_bitmask[i]&bit) != 0);
89 | m_bitmask[i]&=~bit;
90 | assert((m_bitmask[i]&bit) == 0);
91 | return val;
92 | }
93 | }
94 |
--------------------------------------------------------------------------------
/dfalex/src/main/java/com/nobigsoftware/dfalex/Matchable.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 Matthew Timmermans
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.nobigsoftware.dfalex;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * Base interface for the types of patterns that can be used with {@link DfaBuilder} to specify a set of strings to match.
22 | *
23 | * The primary implementation classes are {@link Pattern} and {@link CharRange}.
24 | */
25 | public interface Matchable extends Serializable
26 | {
27 | /**
28 | * Add states to an NFA to match the desired pattern
29 | *
30 | * New states will be created in the NFA to match the pattern and transition to
31 | * the given targetState.
32 | *
33 | * NO NEW TRANSITIONS will be added to the target state or any other pre-existing state
34 | *
35 | * @param nfa nfa to add to
36 | * @param targetState target state after the pattern is matched
37 | * @return a state that transitions to targetState after matching the pattern, and
38 | * only after matching the pattern. This may be targetState if the pattern is an
39 | * empty string.
40 | */
41 | public int addToNFA(Nfa> nfa, int targetState);
42 |
43 | /**
44 | * @return true if this pattern matches the empty string
45 | */
46 | public boolean matchesEmpty();
47 |
48 | /**
49 | * @return true if this pattern matches any non-empty strings
50 | */
51 | public boolean matchesNonEmpty();
52 |
53 | /**
54 | * @return true if this pattern matches can match anything at all
55 | */
56 | public boolean matchesSomething();
57 |
58 | /**
59 | * @return true if this pattern matches an infinite number of strings
60 | */
61 | public boolean isUnbounded();
62 |
63 | /**
64 | * Get the reverse of this pattern
65 | *
66 | * The reverse of a pattern matches the reverse of all the strings that this pattern matches
67 | *
68 | * @return the reverse of this pattern
69 | */
70 | public Matchable getReversed();
71 | }
72 |
--------------------------------------------------------------------------------
/gradlew.bat:
--------------------------------------------------------------------------------
1 | @if "%DEBUG%" == "" @echo off
2 | @rem ##########################################################################
3 | @rem
4 | @rem Gradle startup script for Windows
5 | @rem
6 | @rem ##########################################################################
7 |
8 | @rem Set local scope for the variables with windows NT shell
9 | if "%OS%"=="Windows_NT" setlocal
10 |
11 | set DIRNAME=%~dp0
12 | if "%DIRNAME%" == "" set DIRNAME=.
13 | set APP_BASE_NAME=%~n0
14 | set APP_HOME=%DIRNAME%
15 |
16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17 | set DEFAULT_JVM_OPTS=
18 |
19 | @rem Find java.exe
20 | if defined JAVA_HOME goto findJavaFromJavaHome
21 |
22 | set JAVA_EXE=java.exe
23 | %JAVA_EXE% -version >NUL 2>&1
24 | if "%ERRORLEVEL%" == "0" goto init
25 |
26 | echo.
27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28 | echo.
29 | echo Please set the JAVA_HOME variable in your environment to match the
30 | echo location of your Java installation.
31 |
32 | goto fail
33 |
34 | :findJavaFromJavaHome
35 | set JAVA_HOME=%JAVA_HOME:"=%
36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37 |
38 | if exist "%JAVA_EXE%" goto init
39 |
40 | echo.
41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42 | echo.
43 | echo Please set the JAVA_HOME variable in your environment to match the
44 | echo location of your Java installation.
45 |
46 | goto fail
47 |
48 | :init
49 | @rem Get command-line arguments, handling Windows variants
50 |
51 | if not "%OS%" == "Windows_NT" goto win9xME_args
52 | if "%@eval[2+2]" == "4" goto 4NT_args
53 |
54 | :win9xME_args
55 | @rem Slurp the command line arguments.
56 | set CMD_LINE_ARGS=
57 | set _SKIP=2
58 |
59 | :win9xME_args_slurp
60 | if "x%~1" == "x" goto execute
61 |
62 | set CMD_LINE_ARGS=%*
63 | goto execute
64 |
65 | :4NT_args
66 | @rem Get arguments from the 4NT Shell from JP Software
67 | set CMD_LINE_ARGS=%$
68 |
69 | :execute
70 | @rem Setup the command line
71 |
72 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
73 |
74 | @rem Execute Gradle
75 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
76 |
77 | :end
78 | @rem End local scope for the variables with windows NT shell
79 | if "%ERRORLEVEL%"=="0" goto mainEnd
80 |
81 | :fail
82 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83 | rem the _cmd.exe /c_ return code!
84 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
85 | exit /b 1
86 |
87 | :mainEnd
88 | if "%OS%"=="Windows_NT" endlocal
89 |
90 | :omega
91 |
--------------------------------------------------------------------------------
/dfalex/src/main/java/com/nobigsoftware/dfalex/DfaState.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 Matthew Timmermans
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.nobigsoftware.dfalex;
17 |
18 | /**
19 | * A state in a char-matching deterministic finite automaton (that's the google phrase) or DFA
20 | *
21 | * @param MATCHRESULT the type of result produced by matching patterns with this DFA
22 | */
23 | public abstract class DfaState
24 | {
25 | /**
26 | * Process a character and get the next state
27 | *
28 | * @param c input character
29 | * @return The DfaState that c transitions to from this one, or null if there is no such state
30 | */
31 | public abstract DfaState getNextState(char c);
32 |
33 | /**
34 | * Get the result that has been matched if we've transitioned into this state
35 | *
36 | * @return If the sequence of characters that led to this state match a pattern in the
37 | * language being processed, the match result for that pattern is returned. Otherwise
38 | * null.
39 | */
40 | public abstract MATCHRESULT getMatch();
41 |
42 |
43 | /**
44 | * Get the state number. All states reachable from the output of a single call to
45 | * a {@link DfaBuilder} build method will be compactly numbered starting at 0.
46 | *
47 | * These state numbers can be used to maintain auxiliary information about a DFA.
48 | *
49 | * See {@link DfaAuxiliaryInformation}
50 | *
51 | * @return this state's state number
52 | */
53 | public abstract int getStateNumber();
54 |
55 | /**
56 | * Enumerate all the transitions out of this state
57 | *
58 | * @param consumer each DFA transition will be sent here
59 | */
60 | public abstract void enumerateTransitions(DfaTransitionConsumer consumer);
61 |
62 | /**
63 | * Get an {@link Iterable} of all the successor states of this state.
64 | *
65 | * Note that the same successor state may appear more than once in the interation
66 | *
67 | * @return an iterable of successor states.
68 | */
69 | public abstract Iterable> getSuccessorStates();
70 | }
71 |
--------------------------------------------------------------------------------
/dfalex/src/main/java/com/nobigsoftware/dfalex/DfaStatePlaceholder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 Matthew Timmermans
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.nobigsoftware.dfalex;
17 |
18 | import java.util.List;
19 |
20 | /**
21 | * Base class for serializable placeholders that construct final-form DFA states and
22 | * temporarily assume their places in the DFA.
23 | *
24 | * In serialized placeholders, target states are identified by their state number in a
25 | * SerializableDfa.
26 | */
27 | abstract class DfaStatePlaceholder extends DfaStateImpl implements java.io.Serializable
28 | {
29 | private static final long serialVersionUID = 1L;
30 |
31 | protected transient DfaStateImpl m_delegate = null;
32 |
33 | /**
34 | * Create a new DfaStatePlaceholder
35 | *
36 | * The initially constructed state will accept no strings
37 | */
38 | public DfaStatePlaceholder()
39 | {
40 | }
41 |
42 | /**
43 | * Creates the final form delegate state, implementing all the required
44 | * transitions and matches.
45 | *
46 | * This is called on all DFA state placeholders after they are constructed
47 | */
48 | abstract void createDelegate(int statenum, List> allStates);
49 |
50 | @Override
51 | final void fixPlaceholderReferences()
52 | {
53 | m_delegate.fixPlaceholderReferences();
54 | }
55 |
56 | @Override
57 | final DfaStateImpl resolvePlaceholder()
58 | {
59 | return m_delegate.resolvePlaceholder();
60 | }
61 |
62 | @Override
63 | final public DfaState getNextState(char c)
64 | {
65 | return m_delegate.getNextState(c);
66 | }
67 | @Override
68 | final public MATCH getMatch()
69 | {
70 | return m_delegate.getMatch();
71 | }
72 | @Override
73 | final public void enumerateTransitions(DfaTransitionConsumer consumer)
74 | {
75 | m_delegate.enumerateTransitions(consumer);
76 | }
77 |
78 | @Override
79 | final public int getStateNumber()
80 | {
81 | return m_delegate.getStateNumber();
82 | }
83 |
84 | @Override
85 | public Iterable> getSuccessorStates()
86 | {
87 | return m_delegate.getSuccessorStates();
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/persian-nlp/src/main/java/com/sixthsolution/apex/nlp/persian/PersianTimeDetector.java:
--------------------------------------------------------------------------------
1 | package com.sixthsolution.apex.nlp.persian;
2 |
3 | /**
4 | * Created by rozhin on 7/26/2017.
5 | */
6 |
7 | import com.nobigsoftware.dfalex.Pattern;
8 | import com.sixthsolution.apex.nlp.ner.Entity;
9 | import com.sixthsolution.apex.nlp.ner.Label;
10 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetectionFilter;
11 | import com.sixthsolution.apex.nlp.ner.regex.ChunkDetector;
12 | import com.sixthsolution.apex.nlp.util.Pair;
13 |
14 | import java.util.Arrays;
15 | import java.util.List;
16 |
17 | import static com.nobigsoftware.dfalex.Pattern.anyOf;
18 | import static com.nobigsoftware.dfalex.Pattern.match;
19 | import static com.nobigsoftware.dfalex.Pattern.maybe;
20 | import static com.sixthsolution.apex.nlp.dict.Tag.*;
21 | import static com.sixthsolution.apex.nlp.ner.Entity.TIME;
22 | import static com.sixthsolution.apex.nlp.ner.Label.FIXED_TIME;
23 | import static com.sixthsolution.apex.nlp.ner.Label.RANGE_TIME;
24 | import static com.sixthsolution.apex.nlp.ner.Label.RELATIVE_TIME;
25 |
26 | import com.sixthsolution.apex.nlp.persian.filter.TimeDetectionFilter;
27 |
28 |
29 | public class PersianTimeDetector extends ChunkDetector {
30 |
31 | /**
32 | * @return returns شب،ظهر،عصر...
33 | */
34 | private static Pattern time_relative() {
35 | return match(TIME_RELATIVE.toString());
36 | }
37 |
38 | /**
39 | * @return returns hh:mm ق.ظ/ب.ظ
40 | */
41 | private static Pattern time_hour_min() {
42 | return match(NUMBER.toString()).thenMaybe(
43 | match(TIME_SEPARATOR.toString()).then(NUMBER.toString()))
44 | .thenMaybe(anyOf(TIME_MERIDIEM.toString(),TIME_RELATIVE.toString()));
45 | }
46 |
47 | /**
48 | * @return like time_hour_min but starts with در/ساعت
49 | */
50 | private static Pattern fixed_time() {
51 | return maybe(TIME_PREFIX.toString()).then(
52 | anyOf(time_relative(), time_hour_min()))
53 | .thenMaybe(
54 | TIME_MERIDIEM.toString());
55 | }
56 |
57 | /**
58 | * @return از (time) تا (time)
59 | */
60 | private static Pattern range_time() {
61 | return match(match(maybe(TIME_START_RANGE.toString()).then(maybe(fixed_time())).then(TIME_RELATIVE_PREFIX.toString()).then(fixed_time())));
62 | }
63 |
64 |
65 |
66 | @Override
67 | protected List> getPatterns() {
68 | return Arrays.asList(
69 | newPattern(FIXED_TIME, fixed_time()),
70 | newPattern(RANGE_TIME, range_time())
71 | );
72 | }
73 |
74 | @Override
75 | protected List extends ChunkDetectionFilter> getFilters() {
76 | return Arrays.asList(new TimeDetectionFilter());
77 | }
78 |
79 | @Override
80 | protected Entity getEntity() {
81 | return TIME;
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/dfalex/src/main/java/com/nobigsoftware/dfalex/IntListKey.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 Matthew Timmermans
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.nobigsoftware.dfalex;
17 |
18 | import java.util.Arrays;
19 | import backport.java.util.function.ObjIntConsumer;
20 |
21 | /**
22 | * A simple list of integers that can be used as a hash map key and cloned
23 | */
24 | class IntListKey implements Cloneable
25 | {
26 | private static int[] NO_INTS = new int[0];
27 |
28 | private int[] m_buf = NO_INTS;
29 | private int m_size = 0;
30 | private int m_hash = 0;
31 | private boolean m_hashValid = false;
32 |
33 | public IntListKey()
34 | {}
35 | public IntListKey(IntListKey src)
36 | {
37 | if (src != null && src.m_size > 0)
38 | {
39 | m_buf = Arrays.copyOf(src.m_buf, src.m_size);
40 | m_size = src.m_size;
41 | if (src.m_hashValid)
42 | {
43 | m_hash = src.m_hash;
44 | m_hashValid = true;
45 | }
46 | }
47 | }
48 |
49 | public void clear()
50 | {
51 | m_size = 0;
52 | m_hashValid = false;
53 | }
54 |
55 | public void add(int v)
56 | {
57 | if (m_size >= m_buf.length)
58 | {
59 | m_buf = Arrays.copyOf(m_buf, m_size + (m_size>>1) + 16);
60 | }
61 | m_buf[m_size++] = v;
62 | m_hashValid = false;
63 | }
64 |
65 | public void forData(ObjIntConsumer target)
66 | {
67 | target.accept(m_buf, m_size);
68 | }
69 |
70 |
71 | @Override
72 | protected IntListKey clone()
73 | {
74 | return new IntListKey(this);
75 | }
76 |
77 |
78 | @Override
79 | public boolean equals(Object obj)
80 | {
81 | if (!(obj instanceof IntListKey))
82 | {
83 | return false;
84 | }
85 | IntListKey r = (IntListKey)obj;
86 | if (m_size != r.m_size || hashCode() != r.hashCode())
87 | {
88 | return false;
89 | }
90 | for (int i = m_size-1; i>=0; --i)
91 | {
92 | if (m_buf[i] != r.m_buf[i])
93 | {
94 | return false;
95 | }
96 | }
97 | return true;
98 | }
99 |
100 |
101 | @Override
102 | public int hashCode()
103 | {
104 | if (!m_hashValid)
105 | {
106 | int h = 0;
107 | for (int i=0;i>>17);
113 | h ^= (h>>>11);
114 | h ^= (h>>>5);
115 | m_hash = h;
116 | m_hashValid = true;
117 | }
118 | return m_hash;
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/dfalex/src/main/java/com/nobigsoftware/dfalex/DfaAmbiguityException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 Matthew Timmermans
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.nobigsoftware.dfalex;
17 |
18 | import java.util.ArrayList;
19 | import java.util.List;
20 |
21 | /**
22 | * Exception thrown by default when patterns for multiple results match the same string in a DFA,
23 | * and no way has been provided to combine result
24 | */
25 | public class DfaAmbiguityException extends RuntimeException
26 | {
27 | private static final long serialVersionUID = 1L;
28 | private final List