├── api
└── src
│ ├── test
│ ├── resources
│ │ ├── a
│ │ │ └── test
│ │ │ │ └── some.txt
│ │ ├── dat
│ │ │ ├── nlp4j.txt
│ │ │ └── nlp4j.txt.nlp
│ │ ├── emorynlp-line.txt
│ │ ├── emorynlp-raw.txt
│ │ ├── propbank
│ │ │ ├── wsj_0001.prop
│ │ │ ├── wsj.prop
│ │ │ ├── sample.prop
│ │ │ ├── wsj_0001.parse
│ │ │ └── wsj_0002.parse
│ │ ├── emorynlp-raw.txt.tok
│ │ ├── log4j.properties
│ │ ├── decoder-test-config.xml
│ │ └── constituent
│ │ │ ├── functionTags.parse
│ │ │ └── normalize.parse
│ └── java
│ │ └── edu
│ │ └── emory
│ │ └── mathcs
│ │ └── nlp
│ │ ├── common
│ │ ├── verbnet
│ │ │ └── VNTagTest.java
│ │ ├── util
│ │ │ ├── MathUtilsTest.java
│ │ │ ├── CharUtilsTest.java
│ │ │ ├── FileUtilsTest.java
│ │ │ ├── CharTokenizerTest.java
│ │ │ └── SplitterTest.java
│ │ ├── propbank
│ │ │ ├── PBLocationTest.java
│ │ │ └── PBInstanceTest.java
│ │ ├── collection
│ │ │ └── ngram
│ │ │ │ └── BigramTest.java
│ │ └── constituent
│ │ │ └── CTReaderTest.java
│ │ ├── component
│ │ ├── tokenizer
│ │ │ └── dictionary
│ │ │ │ ├── AbbreviationTest.java
│ │ │ │ ├── CompoundTest.java
│ │ │ │ ├── UnitTest.java
│ │ │ │ ├── EmoticonTest.java
│ │ │ │ ├── EnglishApostropheTest.java
│ │ │ │ ├── CurrencyTest.java
│ │ │ │ ├── HtmlTest.java
│ │ │ │ └── DTHyphenTest.java
│ │ └── template
│ │ │ └── util
│ │ │ └── TSVReaderTest.java
│ │ ├── util
│ │ ├── MathUtilsTest.java
│ │ ├── CharUtilsTest.java
│ │ ├── FileUtilsTest.java
│ │ ├── CharTokenizerTest.java
│ │ └── SplitterTest.java
│ │ ├── learning
│ │ ├── gridsearch
│ │ │ └── GridFunctionTest.java
│ │ └── util
│ │ │ ├── LabelMapTest.java
│ │ │ ├── FeatureVectorTest.java
│ │ │ └── FeatureMapTest.java
│ │ └── conversion
│ │ └── util
│ │ ├── HeadRuleMapTest.java
│ │ ├── HeadTagSetTest.java
│ │ └── HeadRuleTest.java
│ └── main
│ ├── resources
│ └── edu
│ │ └── emory
│ │ └── mathcs
│ │ └── nlp
│ │ ├── component
│ │ ├── tokenizer
│ │ │ └── dictionary
│ │ │ │ ├── preserve.txt
│ │ │ │ ├── currency-dollar.txt
│ │ │ │ ├── currency.txt
│ │ │ │ ├── units.txt
│ │ │ │ ├── english-hyphen-suffix.txt
│ │ │ │ ├── english-compounds.txt
│ │ │ │ ├── html-tags.txt
│ │ │ │ ├── abbreviation-period.txt
│ │ │ │ └── english-hyphen-prefix.txt
│ │ └── morph
│ │ │ └── english
│ │ │ ├── abbreviation.rule
│ │ │ ├── adverb.exc
│ │ │ ├── cardinal.base
│ │ │ ├── ordinal.base
│ │ │ └── adjective.exc
│ │ ├── configuration
│ │ ├── config-decode-pos.xml
│ │ ├── config-decode-en.xml
│ │ ├── config-train-sample.xml
│ │ ├── config-train-sample-optimized.xml
│ │ └── config-train-doc.xml
│ │ └── conversion
│ │ ├── headrule_en_stanford.txt
│ │ └── headrule_en_conll.txt
│ └── java
│ └── edu
│ └── emory
│ └── mathcs
│ └── nlp
│ ├── learning
│ ├── activation
│ │ ├── IdentityFunction.java
│ │ ├── HyperbolicTanFunction.java
│ │ ├── SoftplusFunction.java
│ │ ├── RectifiedLinearUnitFunction.java
│ │ ├── ActivationFunction.java
│ │ ├── SoftmaxFunction.java
│ │ └── SigmoidFunction.java
│ ├── normalization
│ │ ├── NormalizationEnum.java
│ │ ├── NormalizationFunction.java
│ │ ├── SoftmaxSmoothedFunction.java
│ │ ├── SigmoidFunction.java
│ │ ├── SoftmaxFunction.java
│ │ └── CustomFunction.java
│ ├── initialization
│ │ ├── WeightGenerator.java
│ │ └── RandomWeightGenerator.java
│ ├── util
│ │ ├── Prediction.java
│ │ ├── SparsePrediction.java
│ │ ├── StringPrediction.java
│ │ └── SparseItem.java
│ ├── gridsearch
│ │ ├── GridFunction.java
│ │ └── LinearFunction.java
│ └── optimization
│ │ ├── reguralization
│ │ └── Regularizer.java
│ │ └── method
│ │ └── Perceptron.java
│ ├── component
│ ├── template
│ │ ├── eval
│ │ │ ├── Eval.java
│ │ │ └── AccuracyEval.java
│ │ ├── util
│ │ │ ├── NLPFlag.java
│ │ │ └── NLPMode.java
│ │ ├── feature
│ │ │ ├── Direction.java
│ │ │ ├── Source.java
│ │ │ ├── Relation.java
│ │ │ └── Field.java
│ │ ├── NLPComponent.java
│ │ ├── node
│ │ │ └── Orthographic.java
│ │ ├── reader
│ │ │ └── NLPReader.java
│ │ ├── lexicon
│ │ │ └── GlobalLexicon.java
│ │ └── train
│ │ │ └── LOLS.java
│ ├── morph
│ │ ├── MorphAnalyzer.java
│ │ ├── MorphologicalAnalyzer.java
│ │ ├── english
│ │ │ └── EnglishDerivation.java
│ │ └── util
│ │ │ └── AbstractAffixReplacer.java
│ ├── tokenizer
│ │ ├── dictionary
│ │ │ ├── Dictionary.java
│ │ │ └── Abbreviation.java
│ │ └── token
│ │ │ └── TokenIndex.java
│ ├── dep
│ │ ├── DEPArc.java
│ │ └── DEPEval.java
│ ├── pos
│ │ ├── POSState.java
│ │ └── POSTagger.java
│ ├── it
│ │ └── ItClassifier.java
│ └── ner
│ │ └── NERTagger.java
│ ├── common
│ ├── constant
│ │ ├── MetaConst.java
│ │ └── CharConst.java
│ ├── treebank
│ │ ├── CTTag.java
│ │ └── PBArc.java
│ ├── util
│ │ ├── Language.java
│ │ ├── ObjectSizeFetcher.java
│ │ ├── FastUtils.java
│ │ ├── FileExtensionFilter.java
│ │ └── HashUtils.java
│ ├── verbnet
│ │ ├── VNMap.java
│ │ ├── VNXml.java
│ │ └── VNFrame.java
│ ├── propbank
│ │ └── frameset
│ │ │ ├── PBFType.java
│ │ │ └── PBFXml.java
│ ├── collection
│ │ ├── tuple
│ │ │ ├── CharIntPair.java
│ │ │ ├── IntIntPair.java
│ │ │ ├── BooleanIntPair.java
│ │ │ ├── DoubleIntPair.java
│ │ │ ├── CharCharPair.java
│ │ │ ├── ObjectBooleanPair.java
│ │ │ ├── Triple.java
│ │ │ ├── Pair.java
│ │ │ ├── DoubleIntIntTriple.java
│ │ │ ├── BooleanIntIntTriple.java
│ │ │ ├── ObjectCharPair.java
│ │ │ ├── ObjectIntIntTriple.java
│ │ │ ├── ObjectIntPair.java
│ │ │ ├── ObjectFloatPair.java
│ │ │ └── ObjectDoublePair.java
│ │ └── tree
│ │ │ └── PrefixNode.java
│ └── random
│ │ └── XORShiftRandom.java
│ ├── decode
│ └── NLPDecoder.java
│ └── zzz
│ ├── WordVector.java
│ └── Tmp.java
├── .gitignore
├── README.md
├── cli
└── src
│ ├── main
│ ├── config
│ │ └── log4j.properties
│ └── java
│ │ └── edu
│ │ └── emory
│ │ └── mathcs
│ │ └── nlp
│ │ ├── bin
│ │ ├── Version.java
│ │ ├── util
│ │ │ └── BinUtils.java
│ │ └── NLPDemo.java
│ │ └── zzz
│ │ └── RadiologyDecode.java
│ └── assembly
│ └── bin.xml
└── LICENSE.txt
/api/src/test/resources/a/test/some.txt:
--------------------------------------------------------------------------------
1 | This is the cereal shot from guns.
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | *~
3 | *.iml
4 | .idea
5 | /bin/
6 | .settings
7 | */.settings
8 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | NLP4J webpage: [https://emorynlp.github.io/nlp4j](https://emorynlp.github.io/nlp4j/)
--------------------------------------------------------------------------------
/api/src/main/resources/edu/emory/mathcs/nlp/component/tokenizer/dictionary/preserve.txt:
--------------------------------------------------------------------------------
1 | w/o
2 | W/O
3 | 's
4 | 'cause
--------------------------------------------------------------------------------
/api/src/main/resources/edu/emory/mathcs/nlp/component/morph/english/abbreviation.rule:
--------------------------------------------------------------------------------
1 | n't RB not
2 | 'nt RB not
3 | 'd MD would
4 | 'll MD will
5 | ca MD can
6 | i PRP I
7 | na TO to
--------------------------------------------------------------------------------
/api/src/test/resources/dat/nlp4j.txt:
--------------------------------------------------------------------------------
1 | The NLP4J project provides a NLP toolkit for JVM languages. This project is under the Apache 2 license and is currently developed by the NLP Research Group at Emory University.
--------------------------------------------------------------------------------
/api/src/main/resources/edu/emory/mathcs/nlp/component/tokenizer/dictionary/currency-dollar.txt:
--------------------------------------------------------------------------------
1 | au
2 | b
3 | bb
4 | bm
5 | bn
6 | bs
7 | bz
8 | c
9 | ca
10 | fj
11 | hk
12 | jm
13 | jy
14 | ky
15 | lr
16 | na
17 | nt
18 | nz
19 | sb
20 | sg
21 | us
22 | usd
23 | xc
24 | zb
--------------------------------------------------------------------------------
/api/src/main/resources/edu/emory/mathcs/nlp/component/tokenizer/dictionary/currency.txt:
--------------------------------------------------------------------------------
1 | ad
2 | aud
3 | cad
4 | chf
5 | cny
6 | eur
7 | ffr
8 | gbp
9 | gmt
10 | hkd
11 | jpy
12 | kpw
13 | mxn
14 | nzd
15 | rmb
16 | rub
17 | sek
18 | sgd
19 | skr
20 | try
21 | usd
22 | usd
--------------------------------------------------------------------------------
/api/src/main/resources/edu/emory/mathcs/nlp/component/morph/english/adverb.exc:
--------------------------------------------------------------------------------
1 | best well
2 | better well
3 | farther far
4 | farthest far
5 | foremost foremost
6 | further far
7 | furthest far
8 | least least
9 | less less
10 | more more
11 | more-or-less more-or-less
12 | most most
13 | worse bad
14 | worst bad
15 |
--------------------------------------------------------------------------------
/api/src/test/resources/emorynlp-line.txt:
--------------------------------------------------------------------------------
1 | The Emory NLP project provides software and resources for natural language processing.
2 | It is developed by the NLP Research Group at Emory University.
3 | Please join our discussion group if you want to get notifications about new updates or post issues, suggestions, questions, etc.
4 |
--------------------------------------------------------------------------------
/api/src/test/resources/emorynlp-raw.txt:
--------------------------------------------------------------------------------
1 | The Emory NLP project provides software and resources for natural language processing. It
2 | is developed by the NLP Research Group at Emory University. Please join our discussion
3 | group if you want to get notifications about new updates or post issues, suggestions, questions, etc.
4 |
--------------------------------------------------------------------------------
/api/src/test/resources/propbank/wsj_0001.prop:
--------------------------------------------------------------------------------
1 | propbank/wsj_0001.parse 0 8 gold join-v join.01 ----- 0:2-ARG0 7:0-ARGM-MOD 8:0-rel 9:1-ARG1 11:1-ARGM-PRD 15:1-ARGM-TMP
2 | propbank/wsj_0001.parse 1 2 gold be-v be.01 ----- 0:1-ARG1 2:0-rel 3:2-ARG2
3 | propbank/wsj_0001.parse 1 10 gold publish-v publish.01 ----- 10:0-rel 11:0-ARG0
4 |
--------------------------------------------------------------------------------
/api/src/test/resources/emorynlp-raw.txt.tok:
--------------------------------------------------------------------------------
1 | The Emory NLP project provides software and resources for natural language processing .
2 | It is developed by the NLP Research Group at Emory University .
3 | Please join our discussion group if you want to get notifications about new updates or post issues , suggestions , questions , etc.
4 |
--------------------------------------------------------------------------------
/api/src/main/resources/edu/emory/mathcs/nlp/component/tokenizer/dictionary/units.txt:
--------------------------------------------------------------------------------
1 | ag
2 | am
3 | cg
4 | ch
5 | cm
6 | cwt
7 | d
8 | dg
9 | dm
10 | drc
11 | fg
12 | fm
13 | ft
14 | fur
15 | gr
16 | h
17 | in
18 | kg
19 | km
20 | lb
21 | lea
22 | m
23 | mg
24 | mi
25 | mm
26 | ms
27 | ng
28 | nm
29 | oz
30 | pg
31 | pm
32 | qtr
33 | st
34 | yd
35 | yg
36 | ym
37 | zg
38 | zm
--------------------------------------------------------------------------------
/api/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | # Set root logger level to DEBUG and its only appender to A1.
2 | log4j.rootLogger=INFO, A1
3 |
4 | # A1 is set to be a ConsoleAppender.
5 | log4j.appender.A1=org.apache.log4j.ConsoleAppender
6 |
7 | # A1 uses PatternLayout.
8 | log4j.appender.A1.layout=org.apache.log4j.PatternLayout
9 | log4j.appender.A1.layout.conversionPattern=%m%n
10 |
--------------------------------------------------------------------------------
/cli/src/main/config/log4j.properties:
--------------------------------------------------------------------------------
1 | # Set root logger level to DEBUG and its only appender to A1.
2 | log4j.rootLogger=INFO, A1
3 |
4 | # A1 is set to be a ConsoleAppender.
5 | log4j.appender.A1=org.apache.log4j.ConsoleAppender
6 |
7 | # A1 uses PatternLayout.
8 | log4j.appender.A1.layout=org.apache.log4j.PatternLayout
9 | log4j.appender.A1.layout.conversionPattern=%m%n
10 |
--------------------------------------------------------------------------------
/api/src/test/resources/propbank/wsj.prop:
--------------------------------------------------------------------------------
1 | propbank/wsj_0001.parse 0 8 gold join-v join.01 ----- 0:2-ARG0 7:0-ARGM-MOD 8:0-rel 9:1-ARG1 11:1-ARGM-PRD 15:1-ARGM-TMP
2 | propbank/wsj_0001.parse 1 2 gold be-v be.01 ----- 0:1-ARG1 2:0-rel 3:2-ARG2
3 | propbank/wsj_0001.parse 1 10 gold publish-v publish.01 ----- 10:0-rel 11:0-ARG0
4 | propbank/wsj_0002.parse 0 16 gold name-v name.01 ----- 0:2*17:1-ARG1 16:0-rel 18:2-ARG2
5 |
--------------------------------------------------------------------------------
/api/src/main/resources/edu/emory/mathcs/nlp/component/tokenizer/dictionary/english-hyphen-suffix.txt:
--------------------------------------------------------------------------------
1 | able
2 | ahol
3 | aholic
4 | ation
5 | centric
6 | cracy
7 | crat
8 | dom
9 | er
10 | ery
11 | esque
12 | ette
13 | fest
14 | fi
15 | fold
16 | ful
17 | gate
18 | gon
19 | hood
20 | ian
21 | ible
22 | ing
23 | isation
24 | ise
25 | ising
26 | ism
27 | ist
28 | itis
29 | ization
30 | ize
31 | izing
32 | less
33 | logist
34 | logy
35 | ly
36 | most
37 | rama
38 | wise
--------------------------------------------------------------------------------
/api/src/main/resources/edu/emory/mathcs/nlp/component/morph/english/cardinal.base:
--------------------------------------------------------------------------------
1 | zero
2 | one
3 | two
4 | three
5 | four
6 | five
7 | six
8 | seven
9 | eight
10 | nine
11 | ten
12 | eleven
13 | twelve
14 | thirteen
15 | fourteen
16 | fifteen
17 | sixteen
18 | seventeen
19 | eighteen
20 | nineteen
21 | twenty
22 | thirty
23 | forty
24 | fifty
25 | sixty
26 | seventy
27 | eighty
28 | ninety
29 | hundred
30 | thousand
31 | million
32 | billion
33 | trillion
34 | quadrillion
35 | quintillion
36 | sextillion
37 | septillion
38 | octillion
39 |
--------------------------------------------------------------------------------
/api/src/main/resources/edu/emory/mathcs/nlp/component/morph/english/ordinal.base:
--------------------------------------------------------------------------------
1 | zeroth
2 | first
3 | second
4 | third
5 | fourth
6 | fifth
7 | sixth
8 | seventh
9 | eighth
10 | ninth
11 | tenth
12 | eleventh
13 | twelfth
14 | thirteenth
15 | fourteenth
16 | fifteenth
17 | sixteenth
18 | seventeenth
19 | eighteenth
20 | nineteenth
21 | twentieth
22 | thirtieth
23 | fortieth
24 | fiftieth
25 | sixtieth
26 | seventieth
27 | eightieth
28 | ninetieth
29 | hundredth
30 | thousandth
31 | millionth
32 | billionth
33 | trillionth
34 | quadrillionth
35 | quintillionth
36 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright 2015, Emory University
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
--------------------------------------------------------------------------------
/api/src/main/resources/edu/emory/mathcs/nlp/configuration/config-decode-pos.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | edu/emory/mathcs/nlp/lexica/en-ambiguity-classes-simplified-lowercase.xz
8 | edu/emory/mathcs/nlp/lexica/en-brown-clusters-simplified-lowercase.xz
9 |
10 |
11 |
12 | edu/emory/mathcs/nlp/models/en-pos.xz
13 |
14 |
15 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/activation/IdentityFunction.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package edu.emory.mathcs.nlp.learning.activation;
5 |
6 | /**
7 | * @author amit-deshmane
8 | *
9 | */
10 | public class IdentityFunction implements ActivationFunction {
11 |
12 | private static final long serialVersionUID = 797900453250163148L;
13 |
14 | public IdentityFunction() {
15 | }
16 |
17 | /* (non-Javadoc)
18 | * @see edu.emory.mathcs.nlp.learning.activation.ActivationFunction#apply(float[])
19 | */
20 | @Override
21 | public void apply(float[] scores) {
22 | return;
23 |
24 | }
25 | @Override
26 | public String toString()
27 | {
28 | return "Identity";
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/api/src/test/resources/decoder-test-config.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | edu/emory/mathcs/nlp/lexica/en-ambiguity-classes-simplified-lowercase.xz
8 | edu/emory/mathcs/nlp/lexica/en-brown-clusters-simplified-lowercase.xz
9 | edu/emory/mathcs/nlp/lexica/en-word-embeddings-undigitalized.xz
10 |
11 |
12 |
13 | edu/emory/mathcs/nlp/models/en-pos.xz
14 |
15 |
16 |
--------------------------------------------------------------------------------
/cli/src/assembly/bin.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | bin
4 |
5 | tar.gz
6 |
7 | dir
8 |
9 | true
10 |
11 |
12 | target/appassembler/bin
13 |
14 |
15 | ../api/src/main/resources/edu/emory/mathcs/nlp/configuration
16 | etc/config
17 |
18 | **/*.xml
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/api/src/main/resources/edu/emory/mathcs/nlp/component/tokenizer/dictionary/english-compounds.txt:
--------------------------------------------------------------------------------
1 | 't is
2 | 't was
3 | ai nt
4 | are nt
5 | ca nt
6 | can not
7 | could a
8 | could nt
9 | d' ye
10 | did nt
11 | do n cha
12 | do n' cha
13 | do nt
14 | does nt
15 | du n no
16 | fin na
17 | gim me
18 | gon na
19 | got ta
20 | had nt
21 | has nt
22 | have nt
23 | i 'm ma
24 | i 'mmm
25 | is nt
26 | it d
27 | it ll
28 | lem me
29 | let s
30 | might nt
31 | more 'n
32 | must nt
33 | sha nt
34 | should a
35 | should nt
36 | that d
37 | that ll
38 | that s
39 | they d
40 | they re
41 | they ve
42 | wan na
43 | was nt
44 | we ve
45 | were nt
46 | wha d ya
47 | what cha
48 | what re
49 | what s
50 | what ve
51 | what z
52 | who d
53 | who ll
54 | wo n cha
55 | wo nt
56 | would a
57 | would nt
58 | you d
59 | you ll
60 | you ve
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/activation/HyperbolicTanFunction.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package edu.emory.mathcs.nlp.learning.activation;
5 |
6 | /**
7 | * @author amit-deshmane
8 | *
9 | */
10 | public class HyperbolicTanFunction implements ActivationFunction {
11 |
12 | private static final long serialVersionUID = 6581919225914864529L;
13 |
14 | public HyperbolicTanFunction() {
15 | }
16 |
17 | /* (non-Javadoc)
18 | * @see edu.emory.mathcs.nlp.learning.activation.ActivationFunction#apply(float[])
19 | */
20 | @Override
21 | public void apply(float[] scores) {
22 | for(int index = 0; index < scores.length; index++){
23 | scores[index] = (float)Math.tanh(scores[index]);
24 | }
25 | }
26 | @Override
27 | public String toString()
28 | {
29 | return "Tanh";
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/activation/SoftplusFunction.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package edu.emory.mathcs.nlp.learning.activation;
5 |
6 | /**
7 | * @author amit-deshmane
8 | *
9 | */
10 | public class SoftplusFunction implements ActivationFunction {
11 |
12 | private static final long serialVersionUID = -3123516253479799668L;
13 |
14 | public SoftplusFunction() {
15 | }
16 |
17 | /* (non-Javadoc)
18 | * @see edu.emory.mathcs.nlp.learning.activation.ActivationFunction#apply(float[])
19 | */
20 | @Override
21 | public void apply(float[] scores) {
22 | for(int index = 0; index < scores.length; index++){
23 | scores[index] = (float)Math.log(1 + Math.exp(scores[index]));
24 | }
25 |
26 | }
27 | @Override
28 | public String toString()
29 | {
30 | return "Softplus";
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/activation/RectifiedLinearUnitFunction.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package edu.emory.mathcs.nlp.learning.activation;
5 |
6 | /**
7 | * @author amit-deshmane
8 | *
9 | */
10 | public class RectifiedLinearUnitFunction implements ActivationFunction {
11 |
12 | private static final long serialVersionUID = 2776457895707438981L;
13 |
14 | public RectifiedLinearUnitFunction() {
15 | }
16 |
17 | /* (non-Javadoc)
18 | * @see edu.emory.mathcs.nlp.learning.activation.ActivationFunction#apply(float[])
19 | */
20 | @Override
21 | public void apply(float[] scores) {
22 | for(int index = 0; index < scores.length; index++){
23 | if(scores[index] < 0){
24 | scores[index] = 0;
25 | }
26 | }
27 |
28 | }
29 | @Override
30 | public String toString()
31 | {
32 | return "Relu";
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/api/src/test/resources/propbank/sample.prop:
--------------------------------------------------------------------------------
1 | propbank/wsj_0003.parse 0 11 gold enter-v enter.01 ----- 10:1-ARG0 11:0-rel 12:1-ARG1
2 | propbank/wsj_0003.parse 0 21 gold cause-v cause.01 ----- 16:2-ARG0 21:0-rel 22:2-ARG1
3 | propbank/wsj_0003.parse 0 25 gold show-v show.02 ----- 22:1*23:1*24:1-ARG1 25:0,26:1-rel 27:2-ARGM-TMP 22:1*23:1-LINK-SLC
4 | propbank/wsj_0003.parse 0 31 gold say-v say.01 ----- 0:3*33:1-ARG1 30:1-ARG0 31:0-rel
5 | propbank/wsj_0003.parse 1 18 gold appear-v appear.02 ----- 0:2,19:2-ARG1 18:0-rel
6 | propbank/wsj_0003.parse 1 21 gold be-v be.01 ----- 0:2*19:1-ARG1 21:0-rel 22:2-ARG2
7 | propbank/wsj_0003.parse 1 28 gold study-v study.01 ----- 25:1*29:1-ARG1 28:0-rel 30:1-ARGM-LOC 25:1*29:1-LINK-PSV
8 | propbank/wsj_0003.parse 1 32 gold industrialize-v industrialize.01 ----- 32:0-rel 33:0-ARG1
9 | propbank/wsj_0003.parse 1 36 gold say-v say.01 ----- 0:3*38:1-ARG1 35:1-ARG0 36:0-rel
10 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/template/eval/Eval.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.template.eval;
17 |
18 | /**
19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
20 | */
21 | public interface Eval
22 | {
23 | void clear();
24 | double score();
25 | }
26 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/template/util/NLPFlag.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.template.util;
17 |
18 | /**
19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
20 | */
21 | public enum NLPFlag
22 | {
23 | // COLLECT,
24 | TRAIN,
25 | EVALUATE,
26 | DECODE;
27 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/template/feature/Direction.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.template.feature;
17 |
18 | /**
19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
20 | */
21 | public enum Direction
22 | {
23 | left,
24 | right,
25 | up,
26 | down,
27 | all;
28 | }
29 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/normalization/NormalizationEnum.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.normalization;
17 |
18 | /**
19 | * @author amit-deshmane
20 | *
21 | */
22 | public enum NormalizationEnum {
23 | custom, // custom implemented by Jasper
24 | sigmoid,
25 | softmax,
26 | softmax_smooth;
27 | }
28 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/initialization/WeightGenerator.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.initialization;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public interface WeightGenerator extends Serializable
24 | {
25 | float next();
26 | }
27 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/constant/MetaConst.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.constant;
17 |
18 | /**
19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
20 | */
21 | public interface MetaConst
22 | {
23 | String HYPERLINK = "#hlink#";
24 | String EMOTICON = "#emo#";
25 | String CARDINAL = "#crd#";
26 | String ORDINAL = "#ord#";
27 | }
28 |
--------------------------------------------------------------------------------
/api/src/main/resources/edu/emory/mathcs/nlp/configuration/config-decode-en.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | edu/emory/mathcs/nlp/lexica/en-ambiguity-classes-simplified-lowercase.xz
8 | edu/emory/mathcs/nlp/lexica/en-brown-clusters-simplified-lowercase.xz
9 | edu/emory/mathcs/nlp/lexica/en-word-embeddings-undigitalized.xz
10 | edu/emory/mathcs/nlp/lexica/en-named-entity-gazetteers-simplified.xz
11 |
12 |
13 |
14 | edu/emory/mathcs/nlp/models/en-pos.xz
15 | edu/emory/mathcs/nlp/models/en-ner.xz
16 | edu/emory/mathcs/nlp/models/en-dep.xz
17 |
18 |
19 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/treebank/CTTag.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2016, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.treebank;
17 |
18 | /**
19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
20 | */
21 | public interface CTTag
22 | {
23 | /** The special tag for the artificial top node. */
24 | String TOP = "TOP";
25 | /** The special tag for empty categories. */
26 | String NONE = "-NONE-";
27 | }
28 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/util/Language.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.util;
17 |
18 |
19 | /**
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public enum Language
23 | {
24 | ARABIC,
25 | CHINESE,
26 | ENGLISH,
27 | HINDI,
28 | KOREAN;
29 |
30 | static public Language getType(String s)
31 | {
32 | return valueOf(StringUtils.toUpperCase(s));
33 | }
34 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/activation/ActivationFunction.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.activation;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public interface ActivationFunction extends Serializable
24 | {
25 | /** Transforms all values in the array according to this activation function. */
26 | public void apply(float[] scores);
27 | }
28 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/normalization/NormalizationFunction.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.normalization;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @author amit-deshmane ({@code amitad87@gmail.com})
22 | */
23 | public interface NormalizationFunction extends Serializable
24 | {
25 | /** Transforms all values in the array according to this activation function. */
26 | public void apply(float[] scores);
27 | }
28 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/verbnet/VNMap.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.verbnet;
17 |
18 | import java.io.Serializable;
19 | import java.util.HashMap;
20 |
21 | /**
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class VNMap extends HashMap implements Serializable
25 | {
26 | private static final long serialVersionUID = -7409938151707095231L;
27 |
28 | public void put(VNClass vn)
29 | {
30 | put(vn.getID(), vn);
31 | }
32 | }
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/common/verbnet/VNTagTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.verbnet;
17 |
18 | import static org.junit.Assert.assertFalse;
19 | import static org.junit.Assert.assertTrue;
20 |
21 | import org.junit.Test;
22 |
23 | /**
24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
25 | */
26 | public class VNTagTest
27 | {
28 | @Test
29 | public void test()
30 | {
31 | assertTrue(VNTag.contains(VNTag.VN_AGENT));
32 | assertFalse(VNTag.contains("Hello"));
33 | }
34 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/template/NLPComponent.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.template;
17 |
18 | import edu.emory.mathcs.nlp.component.template.node.AbstractNLPNode;
19 |
20 | import java.util.List;
21 |
22 | /**
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | public interface NLPComponent>
26 | {
27 | /** Processes a sentence. */
28 | void process(N[] nodes);
29 |
30 | /** Processes a document. */
31 | void process(List document);
32 | }
33 |
--------------------------------------------------------------------------------
/api/src/test/resources/propbank/wsj_0001.parse:
--------------------------------------------------------------------------------
1 | (TOP (S (NP-SBJ (NP (NNP Pierre)
2 | (NNP Vinken))
3 | (, ,)
4 | (ADJP (NML (CD 61)
5 | (NNS years))
6 | (JJ old))
7 | (, ,))
8 | (VP (MD will)
9 | (VP (VB join)
10 | (NP (DT the)
11 | (NN board))
12 | (PP-CLR (IN as)
13 | (NP (DT a)
14 | (JJ nonexecutive)
15 | (NN director)))
16 | (NP-TMP (NNP Nov.)
17 | (CD 29))))
18 | (. .)))
19 |
20 | (TOP (S (NP-SBJ (NNP Mr.)
21 | (NNP Vinken))
22 | (VP (VBZ is)
23 | (NP-PRD (NP (NN chairman))
24 | (PP (IN of)
25 | (NP (NP (NNP Elsevier)
26 | (NNP N.V.))
27 | (, ,)
28 | (NP (DT the)
29 | (NNP Dutch)
30 | (VBG publishing)
31 | (NN group))))))
32 | (. .)))
33 |
34 |
--------------------------------------------------------------------------------
/api/src/test/resources/dat/nlp4j.txt.nlp:
--------------------------------------------------------------------------------
1 | 1 The the DT _ 3 det _ O
2 | 2 NLP4J nlp0j NNP pos2=NN 3 compound _ U-ORG
3 | 3 project project NN _ 4 nsubj _ O
4 | 4 provides provide VBZ _ 0 root _ O
5 | 5 a a DT _ 7 det _ O
6 | 6 NLP nlp NN pos2=NNP 7 compound _ O
7 | 7 toolkit toolkit NN _ 4 dobj _ O
8 | 8 for for IN _ 7 prep _ O
9 | 9 JVM jvm NN pos2=NNP 10 compound _ U-ORG
10 | 10 languages language NNS _ 8 pobj _ O
11 | 11 . . . _ 4 punct _ O
12 |
13 | 1 This this DT _ 2 det _ O
14 | 2 project project NN _ 3 nsubj _ O
15 | 3 is be VBZ _ 0 root _ O
16 | 4 under under IN _ 3 prep _ O
17 | 5 the the DT _ 8 det _ O
18 | 6 Apache apache NNP pos2=NN 8 nmod _ O
19 | 7 2 0 CD pos2=NNP 6 nmod _ O
20 | 8 license license NN pos2=NNS 4 pobj _ O
21 | 9 and and CC _ 3 cc _ O
22 | 10 is be VBZ _ 12 auxpass _ O
23 | 11 currently currently RB _ 12 advmod _ O
24 | 12 developed develop VBN _ 3 conj _ O
25 | 13 by by IN _ 12 agent _ O
26 | 14 the the DT _ 17 det _ B-ORG
27 | 15 NLP nlp NNP _ 17 compound _ I-ORG
28 | 16 Research research NNP _ 17 compound _ I-ORG
29 | 17 Group group NNP _ 13 pobj _ L-ORG
30 | 18 at at IN _ 17 prep _ O
31 | 19 Emory emory NNP _ 20 compound _ B-ORG
32 | 20 University university NNP _ 18 pobj _ L-ORG
33 | 21 . . . _ 3 punct _ O
34 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/template/util/NLPMode.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.template.util;
17 |
18 | /**
19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
20 | */
21 | public enum NLPMode
22 | {
23 | custom, // custom NLP
24 | pos, // part-of-speech tagging
25 | ner, // named entity recognition
26 | dep, // dependency parsing
27 | srl, // semantic role labeling
28 | doc, // document classification
29 | it, // it classification
30 | sentiment, // sentiment analysis
31 | sentiment_ensemble; // sentiment analysis: ensemble
32 | }
33 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/util/ObjectSizeFetcher.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.util;
17 |
18 | import java.lang.instrument.Instrumentation;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class ObjectSizeFetcher
24 | {
25 | private static Instrumentation instrumentation;
26 |
27 | public static void premain(String args, Instrumentation inst)
28 | {
29 | instrumentation = inst;
30 | }
31 |
32 | public static long getObjectSize(Object o)
33 | {
34 | return instrumentation.getObjectSize(o);
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/cli/src/main/java/edu/emory/mathcs/nlp/bin/Version.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.bin;
17 |
18 | /**
19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
20 | */
21 | public class Version
22 | {
23 | public static void main(String[] args)
24 | {
25 | System.out.println("========================================");
26 | System.out.println("NLP4J Version 1.1.3");
27 | System.out.println("Contact: choi@mathcs.emory.edu");
28 | System.out.println("Webpage: http://emorynlp.github.io/nlp4j");
29 | System.out.println("========================================");
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/propbank/frameset/PBFType.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.propbank.frameset;
17 |
18 |
19 | /**
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public enum PBFType
23 | {
24 | VERB("v"),
25 | NOUN("n"),
26 | ADJECTIVE("j");
27 |
28 | private final String value;
29 |
30 | PBFType(String value)
31 | {
32 | this.value = value;
33 | }
34 |
35 | public boolean isValue(String value)
36 | {
37 | return this.value.equals(value);
38 | }
39 |
40 | public String getValue()
41 | {
42 | return value;
43 | }
44 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/CharIntPair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.collection.tuple;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class CharIntPair implements Serializable
24 | {
25 | private static final long serialVersionUID = -2439322004395455224L;
26 |
27 | public char c;
28 | public int i;
29 |
30 | public CharIntPair(char c, int i)
31 | {
32 | set(c, i);
33 | }
34 |
35 | public void set(char c, int i)
36 | {
37 | this.c = c;
38 | this.i = i;
39 | }
40 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/IntIntPair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.collection.tuple;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class IntIntPair implements Serializable
24 | {
25 | private static final long serialVersionUID = 1674260806426517804L;
26 |
27 | public int i1;
28 | public int i2;
29 |
30 | public IntIntPair(int i1, int i2)
31 | {
32 | set(i1, i2);
33 | }
34 |
35 | public void set(int i1, int i2)
36 | {
37 | this.i1 = i1;
38 | this.i2 = i2;
39 | }
40 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/BooleanIntPair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.collection.tuple;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class BooleanIntPair implements Serializable
24 | {
25 | private static final long serialVersionUID = -3606845926289267380L;
26 | public boolean b;
27 | public int i;
28 |
29 | public BooleanIntPair(boolean b, int i)
30 | {
31 | set(b, i);
32 | }
33 |
34 | public void set(boolean b, int i)
35 | {
36 | this.b = b;
37 | this.i = i;
38 | }
39 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/DoubleIntPair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.collection.tuple;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class DoubleIntPair implements Serializable
24 | {
25 | private static final long serialVersionUID = -2439322004395455224L;
26 |
27 | public double d;
28 | public int i;
29 |
30 | public DoubleIntPair(double d, int i)
31 | {
32 | set(d, i);
33 | }
34 |
35 | public void set(double d, int i)
36 | {
37 | this.d = d;
38 | this.i = i;
39 | }
40 | }
--------------------------------------------------------------------------------
/api/src/main/resources/edu/emory/mathcs/nlp/conversion/headrule_en_stanford.txt:
--------------------------------------------------------------------------------
1 | ADJP r JJ.*|VB.*|NN.*;ADJP;IN;RB|ADVP;CD|QP;FW|NP;.*
2 | ADVP r VB.*;RP;RB.*|JJ.*;ADJP;ADVP;QP;IN;NN;CD;NP;.*
3 | CAPTION l NNP.*;NN.*;NP;CD;.*
4 | CIT l NNP.*;NN.*;NP;CD;.*
5 | CONJP l CC;VB.*;NN.*;TO|IN;.*
6 | EDITED r VP;VB.*;NN.*|PRP|NP;IN|PP;S.*;.*
7 | EMBED r S.*;FRAG|NP;.*
8 | FRAG r VP;VB.*;-PRD;S|SQ|SINV|SBARQ;NN.*|NP;PP;SBAR;JJ.*|ADJP;RB|ADVP;INTJ;.*
9 | INTJ l VB.*;NN.*;UH;INTJ;.*
10 | LST l LS|CD;NN;.*
11 | META l NP;VP|S;.*
12 | NAC r NN.*;NP;S|SINV;.*
13 | NML r NN.*|NML;CD|NP|QP|JJ.*|VB.*;.*
14 | NP r NN.*|NML;NX;PRP;FW;CD;NP;-NOM;QP|JJ.*|VB.*;ADJP;S;SBAR;.*
15 | NX r NN.*;NX;NP;.*
16 | PP l RP;TO;IN;VB.*;PP;NN.*;JJ;RB;.*
17 | PRN r VP;NP;S|SBARQ|SINV|SQ;SBAR;.*
18 | PRT l RP;PRT;.*
19 | QP r CD;NN.*;JJ;DT|PDT;RB;NP|QP;.*
20 | RRC l VP;VB.*;-PRD;NP|NN.*;ADJP;PP;.*
21 | S r VP;VB.*;-PRD;S|SQ|SINV|SBARQ;SBAR;NP;PP;.*
22 | SBAR r VP;S|SQ|SINV;SBAR.*;FRAG|NP;.*
23 | SBARQ r VP;SQ|SBARQ;S|SINV;FRAG|NP;.*
24 | SINV r VP;VB.*;MD;S|SINV;NP;.*
25 | SQ r VP;VB.*;SQ;S;MD;NP;.*
26 | UCP r .*
27 | VP l VP;VB.*;MD|TO;JJ.*|NN.*|IN;-PRD;NP;ADJP|QP;S;.*
28 | WHADJP r JJ.*|VBN;WHADJP|ADJP;.*
29 | WHADVP r RB.*|WRB;WHADVP;.*
30 | WHNP r NN.*;WP|WHNP;NP|NML|CD;JJ.*|VBG;WHADJP|ADJP;DT;.*
31 | WHPP l IN|TO;.*
32 | X r .*
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/CharCharPair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.collection.tuple;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class CharCharPair implements Serializable
24 | {
25 | private static final long serialVersionUID = -2439322004395455224L;
26 |
27 | public char c1;
28 | public char c2;
29 |
30 | public CharCharPair(char c1, char c2)
31 | {
32 | set(c1, c2);
33 | }
34 |
35 | public void set(char c1, char c2)
36 | {
37 | this.c1 = c1;
38 | this.c2 = c2;
39 | }
40 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/ObjectBooleanPair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.collection.tuple;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class ObjectBooleanPair implements Serializable
24 | {
25 | private static final long serialVersionUID = -3471022143310924799L;
26 | public T o;
27 | public boolean b;
28 |
29 | public ObjectBooleanPair(T o, boolean b)
30 | {
31 | set(o, b);
32 | }
33 |
34 | public void set(T o, boolean b)
35 | {
36 | this.o = o;
37 | this.b = b;
38 | }
39 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/morph/MorphAnalyzer.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.morph;
17 |
18 | import edu.emory.mathcs.nlp.common.util.StringUtils;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public abstract class MorphAnalyzer
24 | {
25 | /**
26 | * @param simplifiedWordForm simplified word form generated by {@link StringUtils#toSimplifiedForm(String)}.
27 | * @param pos Penn Treebank style part-of-speech tag.
28 | * @return the lemmas of the word form given the pos tag.
29 | */
30 | public abstract String lemmatize(String simplifiedWordForm, String pos);
31 | }
32 |
--------------------------------------------------------------------------------
/api/src/main/resources/edu/emory/mathcs/nlp/conversion/headrule_en_conll.txt:
--------------------------------------------------------------------------------
1 | ADJP r JJ.*|VB.*|NN.*;ADJP;IN;RB|ADVP;CD|QP;FW|NP;.*
2 | ADVP r VB.*;RP;RB.*|JJ.*;ADJP;ADVP;QP;IN;NN;CD;NP;.*
3 | CAPTION l NNP.*;NN.*;NP;CD;.*
4 | CIT l NNP.*;NN.*;NP;CD;.*
5 | CONJP l CC;VB.*;NN.*;TO|IN;.*
6 | EDITED r VB.*;VP;NN.*|PRP|NP;IN|PP;S.*;.*
7 | EMBED r S.*;FRAG|NP;.*
8 | FRAG r VB.*;VP;-PRD;S|SQ|SINV|SBARQ;NN.*|NP;PP;SBAR;JJ.*|ADJP;RB|ADVP;INTJ;.*
9 | INTJ l VB.*;NN.*;UH;INTJ;.*
10 | LST l LS|CD;NN;.*
11 | META l NP;VP|S;.*
12 | NAC r NN.*;NP;S|SINV;.*
13 | NML r NN.*|NML;CD|NP|QP|JJ.*|VB.*;.*
14 | NP r NN.*|NML;NX;PRP;FW;CD;NP;-NOM;QP|JJ.*|VB.*;ADJP;S;SBAR;.*
15 | NX r NN.*;NX;NP;.*
16 | PP l RP;TO;IN;VB.*;PP;NN.*;JJ;RB;.*
17 | PRN r VP;NP;S|SBARQ|SINV|SQ;SBAR;.*
18 | PRT l RP;PRT;.*
19 | QP r CD;NN.*;JJ;DT|PDT;RB;NP|QP;.*
20 | RRC l VB.*;VP;-PRD;NP|NN.*;ADJP;PP;.*
21 | S r MD|TO;VB.*;VP;-PRD;S|SQ|SINV|SBARQ;SBAR;NP;PP;.*
22 | SBAR r IN|TO|DT;MD;VB.*;VP;S|SQ|SINV;SBAR.*;FRAG|NP;.*
23 | SBARQ r MD;VB.*;VP;SQ|SBARQ;S|SINV;FRAG|NP;.*
24 | SINV r MD;VB.*;VP;S|SINV;NP;.*
25 | SQ r MD;VB.*;VP;SQ;S;NP;.*
26 | UCP r .*
27 | VP l MD|TO;VB.*;VP;JJ.*|NN.*|IN;-PRD;NP;ADJP|QP;S;.*
28 | WHADJP r JJ.*|VBN;WHADJP|ADJP;.*
29 | WHADVP r RB.*|WRB;WHADVP;.*
30 | WHNP r NN.*;WP|WHNP;NP|NML|CD;JJ.*|VBG;WHADJP|ADJP;DT;.*
31 | WHPP l IN|TO;.*
32 | X r .*
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/Triple.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.collection.tuple;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class Triple implements Serializable
24 | {
25 | private static final long serialVersionUID = 2261656496863083672L;
26 | public T1 o1;
27 | public T2 o2;
28 | public T3 o3;
29 |
30 | public Triple(T1 o1, T2 o2, T3 o3)
31 | {
32 | set(o1, o2, o3);
33 | }
34 |
35 | public void set(T1 o1, T2 o2, T3 o3)
36 | {
37 | this.o1 = o1;
38 | this.o2 = o2;
39 | this.o3 = o3;
40 | }
41 | }
--------------------------------------------------------------------------------
/api/src/test/resources/propbank/wsj_0002.parse:
--------------------------------------------------------------------------------
1 | (TOP (S (NP-SBJ-1 (NP (NNP Rudolph)
2 | (NNP Agnew))
3 | (, ,)
4 | (UCP (ADJP (NML (CD 55)
5 | (NNS years))
6 | (JJ old))
7 | (CC and)
8 | (NP (NP (JJ former)
9 | (NN chairman))
10 | (PP (IN of)
11 | (NP (NNP Consolidated)
12 | (NNP Gold)
13 | (NNP Fields)
14 | (NNP PLC)))))
15 | (, ,))
16 | (VP (VBD was)
17 | (VP (VBN named)
18 | (NP-2 (-NONE- *-1))
19 | (S-CLR (NP-SBJ (-NONE- *PRO*-2))
20 | (NP-PRD (NP (DT a)
21 | (JJ nonexecutive)
22 | (NN director))
23 | (PP (IN of)
24 | (NP (DT this)
25 | (JJ British)
26 | (JJ industrial)
27 | (NN conglomerate)))))))
28 | (. .)))
29 |
30 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/Pair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.collection.tuple;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class Pair implements Serializable
24 | {
25 | private static final long serialVersionUID = 8447270640444415417L;
26 |
27 | public T1 o1;
28 | public T2 o2;
29 |
30 | public Pair()
31 | {
32 | set(null, null);
33 | }
34 |
35 | public Pair(T1 o1, T2 o2)
36 | {
37 | set(o1, o2);
38 | }
39 |
40 | public void set(T1 o1, T2 o2)
41 | {
42 | this.o1 = o1;
43 | this.o2 = o2;
44 | }
45 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/propbank/frameset/PBFXml.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.propbank.frameset;
17 |
18 | /**
19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
20 | */
21 | public interface PBFXml
22 | {
23 | String E_FRAMESET = "frameset";
24 | String E_PREDICATE = "predicate";
25 | String E_ROLESET = "roleset";
26 | String E_ROLE = "role";
27 | String E_VNROLE = "vnrole";
28 |
29 | String A_LEMMA = "lemma";
30 | String A_ID = "id";
31 | String A_DESCR = "descr";
32 | String A_NAME = "name";
33 | String A_N = "n";
34 | String A_F = "f";
35 | String A_VNCLS = "vncls";
36 | String A_VNTHETA = "vntheta";
37 | }
--------------------------------------------------------------------------------
/api/src/main/resources/edu/emory/mathcs/nlp/component/morph/english/adjective.exc:
--------------------------------------------------------------------------------
1 | acer acer
2 | after after
3 | all-arounder all-arounder
4 | archer archer
5 | bayer bayer
6 | best good
7 | bestest best
8 | better good
9 | bluewater bluewater
10 | britisher britisher
11 | cagier cagey
12 | cagiest cagey
13 | camper camper
14 | cer cer
15 | cuter cute
16 | cutest cute
17 | damndest damned
18 | dicier dicey
19 | diciest dicey
20 | dopier dopey
21 | dopiest dopey
22 | eastsider eastsider
23 | eastwest eastwest
24 | eerier eerie
25 | eeriest eerie
26 | faker faker
27 | farther far
28 | farthest far
29 | feller feller
30 | first-rater first-rater
31 | first-stringer first-stringer
32 | flatwater flatwater
33 | fore-and-after fore-and-after
34 | further far
35 | furthest far
36 | gooier gooey
37 | gooiest gooey
38 | guest guest
39 | halfways halfway
40 | halter halter
41 | homier homey
42 | homiest homey
43 | later late
44 | latest late
45 | leer leer
46 | ler ler
47 | leveler leveler
48 | liver liver
49 | loather loather
50 | meeter meeter
51 | milcher milcher
52 | modest modest
53 | number number
54 | planer planer
55 | player player
56 | prompter prompter
57 | ranker ranker
58 | second-rater second-rater
59 | serer serer
60 | souther souther
61 | starest starest
62 | stiper striper
63 | third-rater third-rater
64 | welsher welsher
65 | worse bad
66 | worst bad
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/template/feature/Source.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.template.feature;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public enum Source implements Serializable
24 | {
25 | /**
26 | * For dependency parsing: top of the stack.
27 | * For semantic role labeling: predicate.
28 | * For everything else: input.
29 | */
30 | i,
31 | /**
32 | * For dependency parsing: front of the input buffer.
33 | * For semantic role labeling: argument.
34 | */
35 | j,
36 | /**
37 | * For dependency parsing: peek of the stack.
38 | */
39 | k;
40 | }
41 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/DoubleIntIntTriple.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.collection.tuple;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class DoubleIntIntTriple implements Serializable
24 | {
25 | private static final long serialVersionUID = -5353827334306132865L;
26 |
27 | public double d;
28 | public int i1;
29 | public int i2;
30 |
31 | public DoubleIntIntTriple(double d, int i1, int i2)
32 | {
33 | set(d, i1, i2);
34 | }
35 |
36 | public void set(double d, int i1, int i2)
37 | {
38 | this.d = d;
39 | this.i1 = i1;
40 | this.i2 = i2;
41 | }
42 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/random/XORShiftRandom.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.random;
17 |
18 | import java.util.Random;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class XORShiftRandom extends Random
24 | {
25 | private static final long serialVersionUID = -6971555410750547741L;
26 | private long seed;
27 |
28 | public XORShiftRandom(long seed)
29 | {
30 | this.seed = seed;
31 | }
32 |
33 | @Override
34 | protected int next(int nbits)
35 | {
36 | long x = seed;
37 | x ^= (x << 21);
38 | x ^= (x >>> 35);
39 | x ^= (x << 4);
40 | seed = x;
41 | x &= ((1L << nbits) - 1);
42 | return (int)x;
43 | }
44 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/decode/NLPDecoder.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.decode;
17 |
18 | import edu.emory.mathcs.nlp.component.template.node.NLPNode;
19 |
20 | import java.io.InputStream;
21 |
22 | /**
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | public class NLPDecoder extends AbstractNLPDecoder
26 | {
27 | public NLPDecoder() {super();}
28 |
29 | public NLPDecoder(DecodeConfig config)
30 | {
31 | super(config);
32 | }
33 |
34 | public NLPDecoder(InputStream configuration)
35 | {
36 | super(new DecodeConfig(configuration));
37 | }
38 |
39 | @Override
40 | public NLPNode create()
41 | {
42 | return new NLPNode();
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/component/tokenizer/dictionary/AbbreviationTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.tokenizer.dictionary;
17 |
18 | import static org.junit.Assert.assertFalse;
19 | import static org.junit.Assert.assertTrue;
20 |
21 | import org.junit.Test;
22 |
23 | /**
24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
25 | */
26 | public class AbbreviationTest
27 | {
28 | @Test
29 | public void test()
30 | {
31 | Abbreviation dt = new Abbreviation();
32 |
33 | assertTrue(dt.isAbbreviationEndingWithPeriod("mr"));
34 | assertTrue(dt.isAbbreviationEndingWithPeriod("mrs"));
35 |
36 | assertFalse(dt.isAbbreviationEndingWithPeriod("e.g"));
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/util/MathUtilsTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.util;
17 |
18 | import edu.emory.mathcs.nlp.common.util.MathUtils;
19 | import org.junit.Test;
20 |
21 | import static org.junit.Assert.assertEquals;
22 |
23 | /**
24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
25 | */
26 | public class MathUtilsTest
27 | {
28 | @Test
29 | public void testPow()
30 | {
31 | int i, j;
32 |
33 | for (j=-5; j<5; j++)
34 | {
35 | if (j == 0) continue;
36 |
37 | for (i=-5; i<5; i++)
38 | {
39 | assertEquals(Math.pow( 2, i), MathUtils.pow( 2, i), 0);
40 | assertEquals(Math.pow(-2, i), MathUtils.pow(-2, i), 0);
41 | }
42 | }
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/BooleanIntIntTriple.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.collection.tuple;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class BooleanIntIntTriple implements Serializable
24 | {
25 | private static final long serialVersionUID = -5353827334306132865L;
26 |
27 | public boolean b;
28 | public int i1;
29 | public int i2;
30 |
31 | public BooleanIntIntTriple(boolean b, int i1, int i2)
32 | {
33 | set(b, i1, i2);
34 | }
35 |
36 | public void set(boolean b, int i1, int i2)
37 | {
38 | this.b = b;
39 | this.i1 = i1;
40 | this.i2 = i2;
41 | }
42 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/template/node/Orthographic.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.template.node;
17 |
18 | /**
19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
20 | */
21 | public interface Orthographic
22 | {
23 | String HYPERLINK = "0";
24 | String ALL_UPPER = "1";
25 | String ALL_LOWER = "2";
26 | String ALL_DIGIT = "3";
27 | String ALL_PUNCT = "4";
28 | String ALL_DIGIT_OR_PUNCT = "5";
29 | String HAS_DIGIT = "6";
30 | String HAS_PERIOD = "7";
31 | String HAS_HYPHEN = "8";
32 | String HAS_OTHER_PUNCT = "9";
33 | String NO_LOWER = "10";
34 | String FST_UPPER = "11";
35 | String UPPER_1 = "12";
36 | String UPPER_2 = "13";
37 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/zzz/WordVector.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.zzz;
17 |
18 | /**
19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
20 | */
21 | public class WordVector
22 | {
23 | private float[] vector;
24 | private String word;
25 |
26 | public WordVector(String word, float[] vector)
27 | {
28 | setWord(word);
29 | setVector(vector);
30 | }
31 |
32 | public String getWord()
33 | {
34 | return word;
35 | }
36 |
37 | public void setWord(String word)
38 | {
39 | this.word = word;
40 | }
41 |
42 | public float[] getVector()
43 | {
44 | return vector;
45 | }
46 |
47 | public void setVector(float[] vector)
48 | {
49 | this.vector = vector;
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/common/util/MathUtilsTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.util;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import org.junit.Test;
21 |
22 | import edu.emory.mathcs.nlp.common.util.MathUtils;
23 |
24 | /**
25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
26 | */
27 | public class MathUtilsTest
28 | {
29 | @Test
30 | public void testPow()
31 | {
32 | int i, j;
33 |
34 | for (j=-5; j<5; j++)
35 | {
36 | if (j == 0) continue;
37 |
38 | for (i=-5; i<5; i++)
39 | {
40 | assertEquals(Math.pow( 2, i), MathUtils.pow( 2, i), 0);
41 | assertEquals(Math.pow(-2, i), MathUtils.pow(-2, i), 0);
42 | }
43 | }
44 | }
45 | }
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/util/CharUtilsTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.util;
17 |
18 | import edu.emory.mathcs.nlp.common.util.StringUtils;
19 | import org.junit.Test;
20 |
21 | import static org.junit.Assert.assertFalse;
22 | import static org.junit.Assert.assertTrue;
23 |
24 | /**
25 | * @since 3.0.0
26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
27 | */
28 | public class CharUtilsTest
29 | {
30 | @Test
31 | public void testContainsOnlyDigits()
32 | {
33 | assertTrue (StringUtils.containsDigitOnly("12"));
34 | assertFalse(StringUtils.containsDigitOnly("a1"));
35 | assertFalse(StringUtils.containsDigitOnly("1b"));
36 | assertFalse(StringUtils.containsDigitOnly("1-2"));
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/util/FileUtilsTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.util;
17 |
18 | import edu.emory.mathcs.nlp.common.util.FileUtils;
19 | import org.junit.Test;
20 |
21 | import static org.junit.Assert.assertEquals;
22 |
23 | /**
24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
25 | */
26 | public class FileUtilsTest
27 | {
28 | @Test
29 | public void replaceExtensionTest()
30 | {
31 | assertEquals("a.jpg", FileUtils.replaceExtension("a.txt", "jpg"));
32 | assertEquals(null , FileUtils.replaceExtension("a", "jpg"));
33 |
34 | assertEquals("a.jpg", FileUtils.replaceExtension("a.txt", "txt", "jpg"));
35 | assertEquals(null , FileUtils.replaceExtension("a.txt", "bmp", "jpg"));
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/common/util/CharUtilsTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.util;
17 |
18 | import static org.junit.Assert.assertFalse;
19 | import static org.junit.Assert.assertTrue;
20 |
21 | import org.junit.Test;
22 |
23 | import edu.emory.mathcs.nlp.common.util.StringUtils;
24 |
25 | /**
26 | * @since 3.0.0
27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
28 | */
29 | public class CharUtilsTest
30 | {
31 | @Test
32 | public void testContainsOnlyDigits()
33 | {
34 | assertTrue (StringUtils.containsDigitOnly("12"));
35 | assertFalse(StringUtils.containsDigitOnly("a1"));
36 | assertFalse(StringUtils.containsDigitOnly("1b"));
37 | assertFalse(StringUtils.containsDigitOnly("1-2"));
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/ObjectCharPair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.collection.tuple;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class ObjectCharPair implements Serializable, Comparable>
24 | {
25 | private static final long serialVersionUID = -5228607179375724504L;
26 |
27 | public T o;
28 | public char c;
29 |
30 | public ObjectCharPair(T o, char c)
31 | {
32 | set(o, c);
33 | }
34 |
35 | public void set(T o, char c)
36 | {
37 | this.o = o;
38 | this.c = c;
39 | }
40 |
41 | @Override
42 | public int compareTo(ObjectCharPair p)
43 | {
44 | return c - p.c;
45 | }
46 | }
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/common/util/FileUtilsTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.util;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import org.junit.Test;
21 |
22 | import edu.emory.mathcs.nlp.common.util.FileUtils;
23 |
24 | /**
25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
26 | */
27 | public class FileUtilsTest
28 | {
29 | @Test
30 | public void replaceExtensionTest()
31 | {
32 | assertEquals("a.jpg", FileUtils.replaceExtension("a.txt", "jpg"));
33 | assertEquals(null , FileUtils.replaceExtension("a", "jpg"));
34 |
35 | assertEquals("a.jpg", FileUtils.replaceExtension("a.txt", "txt", "jpg"));
36 | assertEquals(null , FileUtils.replaceExtension("a.txt", "bmp", "jpg"));
37 | }
38 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/collection/tree/PrefixNode.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.collection.tree;
17 |
18 | import java.util.HashMap;
19 |
20 | /**
21 | * @since 3.0.3
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class PrefixNode,V> extends HashMap>
25 | {
26 | private static final long serialVersionUID = 1566684742873455351L;
27 | private V value;
28 |
29 | public PrefixNode()
30 | {
31 | value = null;
32 | }
33 |
34 | public V getValue()
35 | {
36 | return value;
37 | }
38 |
39 | public void setValue(V value)
40 | {
41 | this.value = value;
42 | }
43 |
44 | public boolean hasValue()
45 | {
46 | return value != null;
47 | }
48 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/ObjectIntIntTriple.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.collection.tuple;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class ObjectIntIntTriple implements Serializable
24 | {
25 | private static final long serialVersionUID = -7014586350906455183L;
26 |
27 | public T o;
28 | public int i1;
29 | public int i2;
30 |
31 | public ObjectIntIntTriple()
32 | {
33 | set(null, 0, 0);
34 | }
35 |
36 | public ObjectIntIntTriple(T o, int i1, int i2)
37 | {
38 | set(o, i1, i2);
39 | }
40 |
41 | public void set(T o, int i1, int i2)
42 | {
43 | this.o = o;
44 | this.i1 = i1;
45 | this.i2 = i2;
46 | }
47 | }
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/util/CharTokenizerTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.util;
17 |
18 | import edu.emory.mathcs.nlp.common.util.CharTokenizer;
19 | import org.junit.Test;
20 |
21 | import java.util.Arrays;
22 |
23 | import static org.junit.Assert.assertEquals;
24 |
25 | /**
26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
27 | */
28 | public class CharTokenizerTest
29 | {
30 | @Test
31 | public void test()
32 | {
33 | CharTokenizer t;
34 | String s;
35 |
36 | t = new CharTokenizer(',');
37 | s = "a,b,c";
38 | assertEquals("[a, b, c]", Arrays.toString(t.tokenize(s)));
39 |
40 | t = new CharTokenizer(';');
41 | s = ";abc;def;;ghi;";
42 | assertEquals("[abc, def, ghi]", Arrays.toString(t.tokenize(s)));
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/normalization/SoftmaxSmoothedFunction.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.normalization;
17 |
18 | import edu.emory.mathcs.nlp.learning.activation.SoftmaxFunction;
19 |
20 | /**
21 | * @author amit-deshmane
22 | *
23 | */
24 | public class SoftmaxSmoothedFunction implements NormalizationFunction {
25 |
26 | private static final long serialVersionUID = -675360500573510747L;
27 | private SoftmaxFunction f;
28 |
29 | public SoftmaxSmoothedFunction() {
30 | f = new SoftmaxFunction();
31 | }
32 |
33 | /* (non-Javadoc)
34 | * @see edu.emory.mathcs.nlp.learning.normalization.NormalizationFunction#apply(float[])
35 | */
36 | @Override
37 | public void apply(float[] scores) {
38 | f.apply(scores);
39 | }
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/cli/src/main/java/edu/emory/mathcs/nlp/bin/util/BinUtils.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.bin.util;
17 |
18 | import org.kohsuke.args4j.CmdLineException;
19 | import org.kohsuke.args4j.CmdLineParser;
20 |
21 |
22 |
23 | /**
24 | * @since 3.0.0
25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
26 | */
27 | public class BinUtils
28 | {
29 | private BinUtils() {}
30 |
31 | /** Initializes arguments using args4j. */
32 | static public void initArgs(String[] args, Object bean)
33 | {
34 | CmdLineParser cmd = new CmdLineParser(bean);
35 |
36 | try
37 | {
38 | cmd.parseArgument(args);
39 | }
40 | catch (CmdLineException e)
41 | {
42 | System.err.println(e.getMessage());
43 | cmd.printUsage(System.err);
44 | System.exit(1);
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/common/util/CharTokenizerTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.util;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import java.util.Arrays;
21 |
22 | import org.junit.Test;
23 |
24 | import edu.emory.mathcs.nlp.common.util.CharTokenizer;
25 |
26 | /**
27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
28 | */
29 | public class CharTokenizerTest
30 | {
31 | @Test
32 | public void test()
33 | {
34 | CharTokenizer t;
35 | String s;
36 |
37 | t = new CharTokenizer(',');
38 | s = "a,b,c";
39 | assertEquals("[a, b, c]", Arrays.toString(t.tokenize(s)));
40 |
41 | t = new CharTokenizer(';');
42 | s = ";abc;def;;ghi;";
43 | assertEquals("[abc, def, ghi]", Arrays.toString(t.tokenize(s)));
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/api/src/main/resources/edu/emory/mathcs/nlp/configuration/config-train-sample.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | 0.00001
13 | adagrad-mini-batch
14 | 0.02
15 | 0
16 |
17 | 1
18 | 5
19 | 0
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/api/src/main/resources/edu/emory/mathcs/nlp/configuration/config-train-sample-optimized.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | 0.00001
13 | adagrad-mini-batch
14 | 0.02
15 | 0
16 |
17 | 1
18 | 3
19 | 0
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/api/src/main/resources/edu/emory/mathcs/nlp/configuration/config-train-doc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 | /Users/jdchoi/Documents/EmoryNLP/nlp4j-english/src/main/resources/edu/emory/mathcs/nlp/lexica/en-word-embeddings-undigitalized.xz
14 |
15 |
16 | r3
17 |
18 |
19 | adagrad
20 | 0.01
21 | 0.001
22 | 0
23 | 20
24 | 0
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/util/FastUtils.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.util;
17 |
18 | import it.unimi.dsi.fastutil.floats.FloatArrayList;
19 | import it.unimi.dsi.fastutil.objects.Object2IntMap;
20 |
21 | /**
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class FastUtils
25 | {
26 | static public int increment(Object2IntMap map, K key)
27 | {
28 | return map.merge(key, 1, (oldCount, newCount) -> oldCount + newCount);
29 | }
30 |
31 | static public int increment(Object2IntMap map, K key, int count)
32 | {
33 | return map.merge(key, count, (oldCount, newCount) -> oldCount + newCount);
34 | }
35 |
36 | static public void add(FloatArrayList list, int index, float inc)
37 | {
38 | list.set(index, list.getFloat(index)+inc);
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/learning/gridsearch/GridFunctionTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.gridsearch;
17 |
18 | import edu.emory.mathcs.nlp.learning.gridsearch.ExpFunction;
19 | import edu.emory.mathcs.nlp.learning.gridsearch.GridFunction;
20 | import edu.emory.mathcs.nlp.learning.gridsearch.LinearFunction;
21 |
22 | /**
23 | * @author Amit_Deshmane
24 | *
25 | */
26 | public class GridFunctionTest {
27 |
28 | /**
29 | * @param args
30 | */
31 | public static void main(String[] args) {
32 | GridFunction f = new ExpFunction(1E-6f, 1f, 10f);
33 | while(f.next()){
34 | System.out.println(f.getVal());
35 | }
36 | System.out.println("*************");
37 | f = new LinearFunction(0.1f, 5f, 10);
38 | while(f.next()){
39 | System.out.println(f.getVal());
40 | }
41 | }
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/template/reader/NLPReader.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.template.reader;
17 |
18 | import edu.emory.mathcs.nlp.component.template.node.NLPNode;
19 | import it.unimi.dsi.fastutil.objects.Object2IntMap;
20 |
21 | /**
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class NLPReader extends TSVReader
25 | {
26 | public NLPReader() {super();}
27 |
28 | public NLPReader(Object2IntMap map)
29 | {
30 | super(map);
31 | }
32 |
33 | public NLPReader(int form, int lemma, int pos, int feats, int dhead, int deprel, int sheads, int nament)
34 | {
35 | super(form, lemma, pos, feats, dhead, deprel, sheads, nament);
36 | }
37 |
38 | @Override
39 | protected NLPNode create()
40 | {
41 | return new NLPNode();
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/conversion/util/HeadRuleMapTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.conversion.util;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import org.junit.Test;
21 |
22 | import edu.emory.mathcs.nlp.common.util.IOUtils;
23 | import edu.emory.mathcs.nlp.conversion.util.HeadRuleMap;
24 |
25 |
26 | /** @author Jinho D. Choi ({@code jinho.choi@emory.edu}) */
27 | public class HeadRuleMapTest
28 | {
29 | @Test
30 | public void testHeadRuleMap()
31 | {
32 | String filename = "src/main/resources/edu/emory/mathcs/nlp/conversion/headrule_en_stanford.txt";
33 |
34 | HeadRuleMap map = new HeadRuleMap(IOUtils.createFileInputStream(filename));
35 | String str = map.toString();
36 |
37 | assertEquals(str, new HeadRuleMap(IOUtils.createByteArrayInputStream(str)).toString());
38 | }
39 | }
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/component/tokenizer/dictionary/CompoundTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.tokenizer.dictionary;
17 |
18 | import static org.junit.Assert.assertEquals;
19 | import static org.junit.Assert.assertTrue;
20 |
21 | import java.util.Arrays;
22 |
23 | import org.junit.Test;
24 |
25 | import edu.emory.mathcs.nlp.common.util.Language;
26 |
27 | /**
28 | * @since 3.0.0
29 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
30 | */
31 | public class CompoundTest
32 | {
33 | @Test
34 | public void test()
35 | {
36 | Compound dt = new Compound(Language.ENGLISH);
37 |
38 | assertEquals("[I, 'mmm]" , Arrays.toString(dt.tokenize("I'mmm")));
39 | assertEquals("[wha, d, ya]", Arrays.toString(dt.tokenize("whadya")));
40 |
41 | assertTrue(dt.tokenize("I'm") == null);
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/util/FileExtensionFilter.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.util;
17 |
18 | import java.io.File;
19 | import java.io.FilenameFilter;
20 |
21 | import edu.emory.mathcs.nlp.common.constant.StringConst;
22 |
23 | /**
24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
25 | */
26 | public class FileExtensionFilter implements FilenameFilter
27 | {
28 | private String s_extension;
29 |
30 | /** @param extension the extension of files to keep (e.g., {@code "txt"}). */
31 | public FileExtensionFilter(String extension)
32 | {
33 | s_extension = StringUtils.toLowerCase(extension);
34 | }
35 |
36 | @Override
37 | public boolean accept(File dir, String name)
38 | {
39 | return s_extension.equals(StringConst.ASTERISK) || StringUtils.toLowerCase(name).endsWith(s_extension);
40 | }
41 | }
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/component/tokenizer/dictionary/UnitTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.tokenizer.dictionary;
17 |
18 | import static org.junit.Assert.assertEquals;
19 | import static org.junit.Assert.assertTrue;
20 |
21 | import java.util.Arrays;
22 |
23 | import org.junit.Test;
24 |
25 | /**
26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
27 | */
28 | public class UnitTest
29 | {
30 | @Test
31 | public void test()
32 | {
33 | Unit dt = new Unit();
34 |
35 | assertEquals("[1, mg]", Arrays.toString(dt.tokenize("1mg")));
36 | assertEquals("[1, cm]", Arrays.toString(dt.tokenize("1cm")));
37 |
38 | assertEquals("[10, MG]", Arrays.toString(dt.tokenize("10MG")));
39 | assertEquals("[10, CM]", Arrays.toString(dt.tokenize("10CM")));
40 |
41 | assertTrue(dt.tokenize("1ma") == null);
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/treebank/PBArc.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.treebank;
17 |
18 | import edu.emory.mathcs.nlp.common.collection.arc.AbstractArc;
19 | import edu.emory.mathcs.nlp.common.constituent.CTNode;
20 | import edu.emory.mathcs.nlp.common.propbank.PBArgument;
21 |
22 | /**
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | public class PBArc extends AbstractArc
26 | {
27 | private static final long serialVersionUID = 8603308004980285093L;
28 |
29 | public PBArc(CTNode node, String label)
30 | {
31 | super(node, label);
32 | }
33 |
34 | @Override
35 | public String toString()
36 | {
37 | return node.getTerminalID() + PBArgument.DELIM + label;
38 | }
39 |
40 | @Override
41 | public int compareTo(AbstractArc arc)
42 | {
43 | return node.compareTo(arc.getNode());
44 | }
45 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/util/Prediction.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.util;
17 |
18 | import edu.emory.mathcs.nlp.common.util.MathUtils;
19 |
20 | import java.io.Serializable;
21 |
22 |
23 | /**
24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
25 | */
26 | public abstract class Prediction implements Serializable, Comparable
27 | {
28 | private static final long serialVersionUID = 4629812694101207696L;
29 | protected float score;
30 |
31 | public Prediction(float score)
32 | {
33 | setScore(score);
34 | }
35 |
36 | public float getScore()
37 | {
38 | return score;
39 | }
40 |
41 | public void setScore(float score)
42 | {
43 | this.score = score;
44 | }
45 |
46 | @Override
47 | public int compareTo(Prediction o)
48 | {
49 | return MathUtils.signum(score - o.score);
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/normalization/SigmoidFunction.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.normalization;
17 |
18 | /**
19 | * @author amit-deshmane
20 | *
21 | * Well the its just application of sigmoid function
22 | * Someone can actually make the sum = 1 if needed
23 | *
24 | */
25 | public class SigmoidFunction implements NormalizationFunction {
26 |
27 | private static final long serialVersionUID = 873532059178086953L;
28 | private edu.emory.mathcs.nlp.learning.activation.SigmoidFunction f;
29 |
30 | public SigmoidFunction() {
31 | f = new edu.emory.mathcs.nlp.learning.activation.SigmoidFunction();
32 | }
33 |
34 | /* (non-Javadoc)
35 | * @see edu.emory.mathcs.nlp.learning.normalization.NormalizationFunction#apply(float[])
36 | */
37 | @Override
38 | public void apply(float[] scores) {
39 | f.apply(scores);
40 | }
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/zzz/Tmp.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.zzz;
17 |
18 | /**
19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
20 | */
21 | public class Tmp
22 | {
23 | public Tmp(String[] args) throws Exception
24 | {
25 | boolean b = false;
26 |
27 | for (int i=0; i<10; i++)
28 | b |= get();
29 |
30 | System.out.println(b);
31 | }
32 |
33 | boolean get()
34 | {
35 | System.out.println("HELLO");
36 | return true;
37 | }
38 |
39 | boolean skip(String form)
40 | {
41 | char[] cs = form.toCharArray();
42 | if (cs.length < 3 || cs.length > 20) return true;
43 |
44 | for (int i=0; i= 128)
47 | return true;
48 | }
49 |
50 | return false;
51 | }
52 |
53 | static public void main(String[] args) throws Exception
54 | {
55 | new Tmp(args);
56 | }
57 | }
58 |
59 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/verbnet/VNXml.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.verbnet;
17 |
18 | /**
19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
20 | */
21 | public interface VNXml
22 | {
23 | String E_VNSUBCLASS = "VNSUBCLASS";
24 | String E_SEMANTICS = "SEMANTICS";
25 | String E_SYNRESTR = "SYNRESTR";
26 | String E_SYNTAX = "SYNTAX";
27 | String E_FRAMES = "FRAMES";
28 | String E_FRAME = "FRAME";
29 | String E_PRED = "PRED";
30 | String E_ARG = "ARG";
31 |
32 | String A_ID = "ID";
33 | String A_TYPE = "type";
34 | String A_VALUE = "value";
35 | String A_VALUE_CAP = "Value";
36 | String A_BOOL = "bool";
37 |
38 | String ARG_TYPE_EVENT = "Event";
39 | String ARG_TYPE_THEM_ROLE = "ThemRole";
40 | String ARG_TYPE_VERB_SPECIFIC = "VerbSpecific";
41 | String ARG_TYPE_CONSTANT = "Constant";
42 |
43 | String SYNRESTR_TYPE_PLURAL = "plural";
44 | }
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/component/tokenizer/dictionary/EmoticonTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.tokenizer.dictionary;
17 |
18 | import static org.junit.Assert.assertEquals;
19 | import static org.junit.Assert.assertTrue;
20 |
21 | import java.util.Arrays;
22 |
23 | import org.junit.Test;
24 |
25 | /**
26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
27 | */
28 | public class EmoticonTest
29 | {
30 | @Test
31 | public void test()
32 | {
33 | Emoticon dt = new Emoticon();
34 | String s;
35 |
36 | s = ":";
37 | assertTrue(dt.getEmoticonRange(s) == null);
38 |
39 | s = ":-)";
40 | assertEquals("[0, 3]", Arrays.toString(dt.getEmoticonRange(s)));
41 |
42 | s = "Hi:-)";
43 | assertEquals("[2, 5]", Arrays.toString(dt.getEmoticonRange(s)));
44 |
45 | s = ":-)..";
46 | assertEquals("[0, 3]", Arrays.toString(dt.getEmoticonRange(s)));
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/tokenizer/dictionary/Dictionary.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.tokenizer.dictionary;
17 |
18 | import edu.emory.mathcs.nlp.common.util.CharUtils;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public abstract class Dictionary
24 | {
25 | static public String ROOT = "edu/emory/mathcs/nlp/component/tokenizer/dictionary/";
26 |
27 | public String[] tokenize(String s)
28 | {
29 | char[] lcs = s.toCharArray();
30 | String lower = CharUtils.toLowerCase(lcs) ? new String(lcs) : s;
31 | return tokenize(s, lower, lcs);
32 | }
33 |
34 | /**
35 | * @param original the original string.
36 | * @param lower the lowercase of the original string.
37 | * @param lcs the lowercase character array of the original string.
38 | */
39 | abstract public String[] tokenize(String original, String lower, char[] lcs);
40 | }
41 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/ObjectIntPair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.collection.tuple;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class ObjectIntPair implements Serializable, Comparable>
24 | {
25 | private static final long serialVersionUID = -5228607179375724504L;
26 |
27 | public T o;
28 | public int i;
29 |
30 | public ObjectIntPair()
31 | {
32 | set(null, 0);
33 | }
34 |
35 | public ObjectIntPair(T o, int i)
36 | {
37 | set(o, i);
38 | }
39 |
40 | public void set(T o, int i)
41 | {
42 | this.o = o;
43 | this.i = i;
44 | }
45 |
46 | @Override
47 | public int compareTo(ObjectIntPair p)
48 | {
49 | return i - p.i;
50 | }
51 |
52 | @Override
53 | public String toString()
54 | {
55 | return "("+o.toString()+","+i+")";
56 | }
57 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/util/SparsePrediction.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.util;
17 |
18 | /**
19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
20 | */
21 | public class SparsePrediction extends Prediction
22 | {
23 | private static final long serialVersionUID = -2873195048974695284L;
24 | private int label;
25 |
26 | public SparsePrediction(int label, float score)
27 | {
28 | super(score);
29 | setLabel(label);
30 | }
31 |
32 | public int getLabel()
33 | {
34 | return label;
35 | }
36 |
37 | public void setLabel(int label)
38 | {
39 | this.label = label;
40 | }
41 |
42 | public void copy(SparsePrediction p)
43 | {
44 | set(p.label, p.score);
45 | }
46 |
47 | public void set(int label, float score)
48 | {
49 | setLabel(label);
50 | setScore(score);
51 | }
52 |
53 | @Override
54 | public String toString()
55 | {
56 | return label+":"+score;
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/initialization/RandomWeightGenerator.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.initialization;
17 |
18 | import edu.emory.mathcs.nlp.common.random.XORShiftRandom;
19 |
20 | import java.util.Random;
21 |
22 |
23 | /**
24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
25 | */
26 | public class RandomWeightGenerator implements WeightGenerator
27 | {
28 | /**
29 | *
30 | */
31 | private static final long serialVersionUID = 4923093894775449475L;
32 | private float lower_bound; // inclusive
33 | private float upper_bound; // exclusive
34 | private Random rand;
35 |
36 | public RandomWeightGenerator(float lowerBound, float upperBound)
37 | {
38 | lower_bound = lowerBound;
39 | upper_bound = upperBound;
40 | rand = new XORShiftRandom(9);
41 | }
42 |
43 | @Override
44 | public float next()
45 | {
46 | return lower_bound + (upper_bound - lower_bound) * rand.nextFloat();
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/activation/SoftmaxFunction.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.activation;
17 |
18 | import edu.emory.mathcs.nlp.common.util.DSUtils;
19 | import org.apache.commons.math3.util.FastMath;
20 |
21 | /**
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class SoftmaxFunction implements ActivationFunction
25 | {
26 | private static final long serialVersionUID = -3091974476056808242L;
27 |
28 | @Override
29 | public void apply(float[] scores)
30 | {
31 | float sum = 0, max = DSUtils.max(scores);
32 | max = 0;
33 |
34 | for (int i=0; i> extends AbstractArc
25 | {
26 | private static final long serialVersionUID = -9099516205158258095L;
27 | private double weight;
28 |
29 | public DEPArc(N node, String label)
30 | {
31 | super(node, label);
32 | }
33 |
34 | public double getWeight()
35 | {
36 | return weight;
37 | }
38 |
39 | public void setWeight(double weight)
40 | {
41 | this.weight = weight;
42 | }
43 |
44 | @Override
45 | public String toString()
46 | {
47 | return node.getID() + LABEL_DELIM + label;
48 | }
49 |
50 | @Override
51 | public int compareTo(AbstractArc arc)
52 | {
53 | return node.compareTo(arc.getNode());
54 | }
55 | }
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/learning/util/LabelMapTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.util;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import org.junit.Test;
21 |
22 | /**
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | public class LabelMapTest
26 | {
27 | @Test
28 | public void test()
29 | {
30 | LabelMap map = new LabelMap();
31 |
32 | assertEquals(0, map.add("A"));
33 | assertEquals(1, map.add("B"));
34 | assertEquals(2, map.add("C"));
35 | assertEquals(0, map.add("A"));
36 | assertEquals(0, map.add("A"));
37 | assertEquals(2, map.add("C"));
38 |
39 | assertEquals( 0, map.index("A"));
40 | assertEquals( 1, map.index("B"));
41 | assertEquals( 2, map.index("C"));
42 | assertEquals(-1, map.index("D"));
43 |
44 | assertEquals(3, map.size());
45 |
46 | assertEquals("A", map.getLabel(map.index("A")));
47 | assertEquals("B", map.getLabel(map.index("B")));
48 | assertEquals("C", map.getLabel(map.index("C")));
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/component/tokenizer/dictionary/EnglishApostropheTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.tokenizer.dictionary;
17 |
18 | import org.junit.Test;
19 |
20 | import edu.emory.mathcs.nlp.component.tokenizer.dictionary.EnglishApostrophe;
21 |
22 | import java.util.Arrays;
23 |
24 | import static org.junit.Assert.assertEquals;
25 | import static org.junit.Assert.assertTrue;
26 |
27 | /**
28 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
29 | */
30 | public class EnglishApostropheTest
31 | {
32 | @Test
33 | public void test()
34 | {
35 | EnglishApostrophe dt = new EnglishApostrophe();
36 |
37 | assertEquals("[he, 's]" , Arrays.toString(dt.tokenize("he's")));
38 | assertEquals("[he, 'S]" , Arrays.toString(dt.tokenize("he'S")));
39 | assertEquals("[do, n't]", Arrays.toString(dt.tokenize("don't")));
40 | assertEquals("[do, 'nt]", Arrays.toString(dt.tokenize("do'nt")));
41 |
42 | assertTrue(dt.tokenize("he'dd") == null);
43 | assertTrue(dt.tokenize("dont") == null);
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/tokenizer/dictionary/Abbreviation.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.tokenizer.dictionary;
17 |
18 | import edu.emory.mathcs.nlp.common.util.DSUtils;
19 | import edu.emory.mathcs.nlp.common.util.IOUtils;
20 |
21 | import java.io.InputStream;
22 | import java.util.Set;
23 |
24 | /**
25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
26 | */
27 | public class Abbreviation
28 | {
29 | private Set s_period;
30 |
31 | public Abbreviation()
32 | {
33 | String filename = Dictionary.ROOT + "abbreviation-period.txt";
34 | init(IOUtils.getInputStreamsFromResource(filename));
35 | }
36 |
37 | public Abbreviation(InputStream abbreviationPeriod)
38 | {
39 | init(abbreviationPeriod);
40 | }
41 |
42 | public void init(InputStream abbreviationPeriod)
43 | {
44 | s_period = DSUtils.createStringHashSet(abbreviationPeriod, true, true);
45 | }
46 |
47 | public boolean isAbbreviationEndingWithPeriod(String lower)
48 | {
49 | return s_period.contains(lower);
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/gridsearch/GridFunction.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.gridsearch;
17 |
18 | /**
19 | * @author Amit_Deshmane
20 | *
21 | * Supposed to contain an one dimensional grid.
22 | */
23 | public interface GridFunction {
24 |
25 | /**
26 | * get current value in grid
27 | * @return
28 | */
29 | public abstract float getVal();
30 |
31 | /**
32 | * reset to initial value
33 | */
34 | public abstract void reset();
35 |
36 | /**
37 | * go to previous point in grid
38 | * @return
39 | * true if previous exists
40 | * otherwise false
41 | */
42 | public abstract boolean previous();
43 |
44 | /**
45 | * go to next point in grid
46 | * @return
47 | * true if next exists
48 | * otherwise false
49 | */
50 | public abstract boolean next();
51 |
52 | /**
53 | * set to previously marked position
54 | */
55 | public abstract void resetToMark();
56 |
57 | /**
58 | * mark a position for future reference
59 | */
60 | public abstract void mark();
61 |
62 | }
63 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/util/StringPrediction.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.util;
17 |
18 | /**
19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
20 | */
21 | public class StringPrediction extends Prediction
22 | {
23 | private static final long serialVersionUID = 4629812694101207696L;
24 | private String label;
25 |
26 | public StringPrediction(String label, float score)
27 | {
28 | super(score);
29 | setLabel(label);
30 | }
31 |
32 | public String getLabel()
33 | {
34 | return label;
35 | }
36 |
37 | public void setLabel(String label)
38 | {
39 | this.label = label;
40 | }
41 |
42 | public boolean isLabel(String label)
43 | {
44 | return label.equals(this.label);
45 | }
46 |
47 | public void copy(StringPrediction p)
48 | {
49 | set(p.label, p.score);
50 | }
51 |
52 | public void set(String label, float score)
53 | {
54 | setLabel(label);
55 | setScore(score);
56 | }
57 |
58 | @Override
59 | public String toString()
60 | {
61 | return label+":"+score;
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/template/eval/AccuracyEval.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.template.eval;
17 |
18 | import edu.emory.mathcs.nlp.common.util.MathUtils;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class AccuracyEval implements Eval
24 | {
25 | private int correct;
26 | private int total;
27 |
28 | public AccuracyEval()
29 | {
30 | clear();
31 | }
32 |
33 | public void add(int correct, int total)
34 | {
35 | this.correct += correct;
36 | this.total += total;
37 | }
38 |
39 | public int correct()
40 | {
41 | return correct;
42 | }
43 |
44 | public int total()
45 | {
46 | return total;
47 | }
48 |
49 | @Override
50 | public void clear()
51 | {
52 | correct = total = 0;
53 | }
54 |
55 | @Override
56 | public double score()
57 | {
58 | return MathUtils.accuracy(correct, total);
59 | }
60 |
61 | @Override
62 | public String toString()
63 | {
64 | return String.format("ACC = %5.2f (%d/%d)", score(), correct, total);
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/api/src/main/resources/edu/emory/mathcs/nlp/component/tokenizer/dictionary/english-hyphen-prefix.txt:
--------------------------------------------------------------------------------
1 | a
2 | afro
3 | ambi
4 | amphi
5 | an
6 | ana
7 | anglo
8 | ante
9 | anti
10 | apo
11 | arch
12 | astro
13 | auto
14 | be
15 | bi
16 | bio
17 | centi
18 | circum
19 | cis
20 | co
21 | col
22 | com
23 | con
24 | contra
25 | cor
26 | counter
27 | cran
28 | cross
29 | cryo
30 | crypto
31 | de
32 | deca
33 | demi
34 | demo
35 | deuter
36 | deutero
37 | di
38 | dia
39 | dif
40 | dis
41 | du
42 | duo
43 | e
44 | eco
45 | electro
46 | em
47 | en
48 | ennea
49 | epi
50 | euro
51 | ex
52 | extra
53 | fin
54 | fore
55 | franco
56 | geo
57 | giga
58 | gyro
59 | hemi
60 | hepta
61 | hetero
62 | hexa
63 | hi
64 | hind
65 | homo
66 | hydro
67 | hyper
68 | hypo
69 | ideo
70 | idio
71 | in
72 | indo
73 | infra
74 | inter
75 | intra
76 | iso
77 | macro
78 | mal
79 | maxi
80 | medi
81 | mega
82 | meta
83 | micro
84 | mid
85 | midi
86 | milli
87 | mini
88 | mis
89 | mm
90 | mono
91 | multi
92 | neo
93 | non
94 | novem
95 | o
96 | octa
97 | octo
98 | omni
99 | ortho
100 | out
101 | over
102 | paleo
103 | pan
104 | para
105 | part
106 | ped
107 | penta
108 | per
109 | peri
110 | photo
111 | pica
112 | pod
113 | poly
114 | post
115 | pre
116 | preter
117 | pro
118 | pros
119 | proto
120 | pseudo
121 | pyro
122 | quadri
123 | quasi
124 | quinque
125 | re
126 | retro
127 | self
128 | semi
129 | sept
130 | soci
131 | socio
132 | step
133 | sub
134 | sup
135 | super
136 | supra
137 | sur
138 | syn
139 | t
140 | tele
141 | tera
142 | tetra
143 | trans
144 | tri
145 | twi
146 | u
147 | uber
148 | uh
149 | ultra
150 | un
151 | under
152 | uni
153 | up
154 | vice
155 | with
156 | x
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/ObjectFloatPair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.collection.tuple;
17 |
18 | import java.io.Serializable;
19 |
20 | import edu.emory.mathcs.nlp.common.util.MathUtils;
21 |
22 | /**
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | public class ObjectFloatPair implements Serializable, Comparable>
26 | {
27 | private static final long serialVersionUID = -4442614450903889259L;
28 |
29 | public T o;
30 | public float f;
31 |
32 | public ObjectFloatPair(T o, float f)
33 | {
34 | set(o, f);
35 | }
36 |
37 | public void set(T o, float f)
38 | {
39 | this.o = o;
40 | this.f = f;
41 | }
42 |
43 | public T getObject()
44 | {
45 | return o;
46 | }
47 |
48 | public float getFloat()
49 | {
50 | return f;
51 | }
52 |
53 | @Override
54 | public int compareTo(ObjectFloatPair p)
55 | {
56 | return MathUtils.signum(f - p.f);
57 | }
58 |
59 | @Override
60 | public String toString()
61 | {
62 | return "("+o.toString()+","+f+")";
63 | }
64 | }
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/component/tokenizer/dictionary/CurrencyTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.tokenizer.dictionary;
17 |
18 | import static org.junit.Assert.assertEquals;
19 | import static org.junit.Assert.assertFalse;
20 | import static org.junit.Assert.assertTrue;
21 |
22 | import java.util.Arrays;
23 |
24 | import org.junit.Test;
25 |
26 | /**
27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
28 | */
29 | public class CurrencyTest
30 | {
31 | @Test
32 | public void test()
33 | {
34 | Currency dt = new Currency();
35 |
36 | assertTrue(dt.isCurrencyDollar("c"));
37 | assertTrue(dt.isCurrencyDollar("us"));
38 |
39 | assertTrue(dt.isCurrency("usd"));
40 | assertTrue(dt.isCurrency("us$"));
41 |
42 | assertFalse(dt.isCurrencyDollar("US"));
43 | assertFalse(dt.isCurrencyDollar("a"));
44 | assertFalse(dt.isCurrency("usb"));
45 |
46 | assertEquals("[USD, 1]", Arrays.toString(dt.tokenize("USD1")));
47 | assertEquals("[us$, 1]", Arrays.toString(dt.tokenize("us$1")));
48 | assertTrue(dt.tokenize("u$1") == null);
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/component/tokenizer/dictionary/HtmlTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.tokenizer.dictionary;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import org.junit.Test;
21 |
22 | /**
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | public class HtmlTest
26 | {
27 | @Test
28 | public void test()
29 | {
30 | Html html = new Html();
31 | StringBuilder build;
32 | String s;
33 |
34 | s = ""&<>";
35 | assertEquals("\"&<>", html.replace(s));
36 |
37 | s = "¢£¤¥§©®€";
38 | build = new StringBuilder();
39 |
40 | build.append((char)162);
41 | build.append((char)163);
42 | build.append((char)164);
43 | build.append((char)165);
44 | build.append((char)167);
45 | build.append((char)169);
46 | build.append((char)174);
47 | build.append((char)8364);
48 |
49 | assertEquals(build.toString(), html.replace(s));
50 |
51 | s = "!<&rand;>{";
52 | assertEquals("!<&rand;>{", html.replace(s));
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/ObjectDoublePair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.collection.tuple;
17 |
18 | import java.io.Serializable;
19 |
20 | import edu.emory.mathcs.nlp.common.util.MathUtils;
21 |
22 | /**
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | public class ObjectDoublePair implements Serializable, Comparable>
26 | {
27 | private static final long serialVersionUID = -5228607179375724504L;
28 |
29 | public T o;
30 | public double d;
31 |
32 | public ObjectDoublePair(T o, double d)
33 | {
34 | set(o, d);
35 | }
36 |
37 | public void set(T o, double d)
38 | {
39 | this.o = o;
40 | this.d = d;
41 | }
42 |
43 | public T getObject()
44 | {
45 | return o;
46 | }
47 |
48 | public double getDouble()
49 | {
50 | return d;
51 | }
52 |
53 | @Override
54 | public int compareTo(ObjectDoublePair p)
55 | {
56 | return MathUtils.signum(d - p.d);
57 | }
58 |
59 | @Override
60 | public String toString()
61 | {
62 | return "("+o.toString()+","+d+")";
63 | }
64 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/template/lexicon/GlobalLexicon.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2016, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.template.lexicon;
17 |
18 | import edu.emory.mathcs.nlp.component.template.feature.Field;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class GlobalLexicon
24 | {
25 | private T lexicon;
26 | private Field field;
27 | private String name;
28 |
29 | public GlobalLexicon() {}
30 |
31 | public GlobalLexicon(T lexicon, Field field, String name)
32 | {
33 | setLexicon(lexicon);
34 | setField(field);
35 | setName(name);
36 | }
37 |
38 | public T getLexicon()
39 | {
40 | return lexicon;
41 | }
42 |
43 | public void setLexicon(T lexicon)
44 | {
45 | this.lexicon = lexicon;
46 | }
47 |
48 | public Field getField()
49 | {
50 | return field;
51 | }
52 |
53 | public void setField(Field field)
54 | {
55 | this.field = field;
56 | }
57 |
58 | public String getName()
59 | {
60 | return name;
61 | }
62 |
63 | public void setName(String name)
64 | {
65 | this.name = name;
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/common/collection/ngram/BigramTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.collection.ngram;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import org.junit.Test;
21 |
22 | import edu.emory.mathcs.nlp.common.collection.tuple.ObjectDoublePair;
23 |
24 | /**
25 | * @since 3.0.0
26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
27 | */
28 | public class BigramTest
29 | {
30 | @Test
31 | public void test()
32 | {
33 | Bigram map = new Bigram<>();
34 |
35 | map.add("A", "a1");
36 | map.add("A", "a2");
37 | map.add("A", "a1");
38 | map.add("A", "a3");
39 |
40 | map.add("B", "b1");
41 | map.add("B", "b2", 2);
42 | map.add("B", "b3");
43 |
44 | ObjectDoublePair p = map.getBest("A");
45 | assertEquals("a1", p.o);
46 | assertEquals(0.5, p.d, 0);
47 |
48 | p = map.getBest("B");
49 | assertEquals("b2", p.o);
50 | assertEquals(0.5, p.d, 0);
51 |
52 | assertEquals("[(a1,2)]" , map.toList("A", 1).toString());
53 | assertEquals("[(b2,0.5)]", map.toList("B", 0.4).toString());
54 |
55 | }
56 | }
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/component/tokenizer/dictionary/DTHyphenTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.tokenizer.dictionary;
17 |
18 | import static org.junit.Assert.assertFalse;
19 | import static org.junit.Assert.assertTrue;
20 |
21 | import org.junit.Test;
22 |
23 | /**
24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
25 | */
26 | public class DTHyphenTest
27 | {
28 | @Test
29 | public void test()
30 | {
31 | EnglishHyphen dt = new EnglishHyphen();
32 |
33 | assertTrue(dt.isPrefix("inter"));
34 | assertTrue(dt.isSuffix("ful"));
35 | assertTrue(dt.preserveHyphen("inter-connect".toCharArray(), 5));
36 | assertTrue(dt.preserveHyphen("beauti-ful".toCharArray(), 6));
37 | assertTrue(dt.preserveHyphen("b-a-d".toCharArray(), 1));
38 | assertTrue(dt.preserveHyphen("b-a-d".toCharArray(), 3));
39 |
40 | assertFalse(dt.preserveHyphen("inte-connect".toCharArray(), 4));
41 | assertFalse(dt.preserveHyphen("beauti-fu".toCharArray(), 6));
42 | assertFalse(dt.preserveHyphen("b-c-d".toCharArray(), 1));
43 | assertFalse(dt.preserveHyphen("b-c-d".toCharArray(), 3));
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/cli/src/main/java/edu/emory/mathcs/nlp/bin/NLPDemo.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.bin;
17 |
18 | import edu.emory.mathcs.nlp.common.util.IOUtils;
19 | import edu.emory.mathcs.nlp.common.util.Joiner;
20 | import edu.emory.mathcs.nlp.component.template.node.NLPNode;
21 | import edu.emory.mathcs.nlp.decode.AbstractNLPDecoder;
22 | import edu.emory.mathcs.nlp.decode.NLPDecoder;
23 |
24 | /**
25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
26 | */
27 | public class NLPDemo
28 | {
29 | static public void main(String[] args) throws Exception
30 | {
31 | final String configFile = "src/main/resources/edu/emory/mathcs/nlp/configuration/config-decode-en.xml";
32 | final String inputFile = "src/test/resources/dat/nlp4j.txt";
33 |
34 | NLPDecoder nlp4j = new NLPDecoder(IOUtils.createFileInputStream(configFile));
35 | NLPNode[] nodes;
36 |
37 | String sentence = "John bought a car for Mary.";
38 | nodes = nlp4j.decode(sentence);
39 | System.out.println(Joiner.join(nodes, "\n", 1)+"\n");
40 | nlp4j.decode(IOUtils.createFileInputStream(inputFile), System.out, AbstractNLPDecoder.FORMAT_RAW);
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/tokenizer/token/TokenIndex.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.tokenizer.token;
17 |
18 |
19 | /**
20 | * @author Amit-Deshmane
21 | * This class tracks the value of index variable.
22 | */
23 | public class TokenIndex
24 | {
25 | int val;
26 |
27 | public TokenIndex() {}
28 |
29 | public TokenIndex(int val)
30 | {
31 | this.val = val;
32 | }
33 |
34 | public int getVal()
35 | {
36 | return val;
37 | }
38 |
39 | public void setVal(int val)
40 | {
41 | this.val = val;
42 | }
43 |
44 | public String toString()
45 | {
46 | return Integer.toString(val);
47 | }
48 |
49 | public boolean equals(Object obj)
50 | {
51 | if(!TokenIndex.class.isInstance(obj))
52 | {
53 | return false;
54 | }
55 | else
56 | {
57 | TokenIndex input = (TokenIndex)obj;
58 |
59 | if(input.getVal() == val)
60 | {
61 | return true;
62 | }
63 | }
64 |
65 | return false;
66 | }
67 |
68 | public int hashCode()
69 | {
70 | int prime = 31;
71 | int result = 1;
72 | result = result*prime + val;
73 | return result;
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/learning/util/FeatureVectorTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.util;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import org.junit.Test;
21 |
22 | import edu.emory.mathcs.nlp.learning.util.SparseVector;
23 | import edu.emory.mathcs.nlp.learning.util.StringVector;
24 |
25 | /**
26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
27 | */
28 | public class FeatureVectorTest
29 | {
30 | @Test
31 | public void testSparseVector()
32 | {
33 | SparseVector x = new SparseVector();
34 |
35 | x.add(2);
36 | x.add(1, 0.2f);
37 | x.add(4, 0.3f);
38 | x.add(3);
39 |
40 | assertEquals("2:1.0 1:0.2 4:0.3 3:1.0", x.toString());
41 | x.sort();
42 | assertEquals("1:0.2 2:1.0 3:1.0 4:0.3", x.toString());
43 | }
44 |
45 | @Test
46 | public void testStringVector()
47 | {
48 | StringVector vector = new StringVector();
49 |
50 | vector.add((short)2, "B");
51 | vector.add((short)4, "A", 0.2f);
52 | vector.add((short)1, "A", 0.3f);
53 | vector.add((short)3, "C");
54 |
55 | assertEquals("2:B:1.0 4:A:0.2 1:A:0.3 3:C:1.0", vector.toString());
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/normalization/SoftmaxFunction.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.normalization;
17 |
18 | import edu.emory.mathcs.nlp.common.util.DSUtils;
19 | import org.apache.commons.math3.util.FastMath;
20 |
21 | /**
22 | * @author amit-deshmane
23 | *
24 | */
25 | public class SoftmaxFunction implements NormalizationFunction {
26 |
27 | private static final long serialVersionUID = -2922860244331616104L;
28 |
29 | public SoftmaxFunction() {
30 | }
31 |
32 | /* (non-Javadoc)
33 | * @see edu.emory.mathcs.nlp.learning.normalization.NormalizationFunction#apply(float[])
34 | */
35 | @Override
36 | public void apply(float[] scores)
37 | {
38 | float sum = 0, max = DSUtils.max(scores);
39 | max = 0;
40 |
41 | for (int i=0; i> extends L2RState
27 | {
28 | public POSState(N[] nodes)
29 | {
30 | super(nodes);
31 | }
32 |
33 | @Override
34 | protected String getLabel(N node)
35 | {
36 | return node.getPartOfSpeechTag();
37 | }
38 |
39 | @Override
40 | protected String setLabel(N node, String label)
41 | {
42 | String s = node.getPartOfSpeechTag();
43 | node.setPartOfSpeechTag(label);
44 | return s;
45 | }
46 |
47 | @Override
48 | public void next(LabelMap map, int[] top2, float[] scores)
49 | {
50 | if (0 <= top2[1] && scores[top2[0]] - scores[top2[1]] < 1)
51 | getInput().putFeat(NLPUtils.FEAT_POS_2ND, map.getLabel(top2[1]));
52 |
53 | super.next(map, top2, scores);
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/constant/CharConst.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the 'License');
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an 'AS IS' BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.constant;
17 |
18 | /**
19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
20 | */
21 | public interface CharConst
22 | {
23 | char PLUS = '+';
24 | char ASTERISK = '*';
25 | char EQUAL = '=';
26 | char FW_SLASH = '/';
27 | char BW_SLASH = '\\';
28 | char PIPE = '|';
29 | char UNDERSCORE = '_';
30 | char HYPHEN = '-';
31 | char COMMA = ',';
32 | char COLON = ':';
33 | char SEMICOLON = ';';
34 | char PERIOD = '.';
35 | char QUESTION = '?';
36 | char EXCLAMATION = '!';
37 | char PERCENT = '%';
38 | char POUND = '#';
39 | char DOLLAR = '$';
40 | char AMPERSAND = '&';
41 | char AT = '@';
42 | char TILDA = '~';
43 | char PRIME = '`';
44 | char EMPTY = 0;
45 | char LESS_THAN = '<';
46 | char GREATER_THAN = '>';
47 | char SINGLE_QUOTE = '\'';
48 | char DOUBLE_QUOTE = '"';
49 |
50 | char LRB = '(';
51 | char RRB = ')';
52 | char LCB = '{';
53 | char RCB = '}';
54 | char LSB = '[';
55 | char RSB = ']';
56 |
57 | char ZERO = '0';
58 |
59 | char SPACE = ' ';
60 | char TAB = '\t';
61 | char NEW_LINE = '\n';
62 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/dep/DEPEval.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.dep;
17 |
18 | import edu.emory.mathcs.nlp.common.util.MathUtils;
19 | import edu.emory.mathcs.nlp.component.template.eval.Eval;
20 |
21 | /**
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class DEPEval implements Eval
25 | {
26 | private int las, uas;
27 | private int total;
28 |
29 | public DEPEval()
30 | {
31 | clear();
32 | }
33 |
34 | public void add(int las, int uas, int total)
35 | {
36 | this.las += las;
37 | this.uas += uas;
38 | this.total += total;
39 | }
40 |
41 | public void clear()
42 | {
43 | las = uas = total = 0;
44 | }
45 |
46 | public int total()
47 | {
48 | return total;
49 | }
50 |
51 | public double getLAS()
52 | {
53 | return MathUtils.accuracy(las, total);
54 | }
55 |
56 | public double getUAS()
57 | {
58 | return MathUtils.accuracy(uas, total);
59 | }
60 |
61 | @Override
62 | public double score()
63 | {
64 | return getLAS();
65 | }
66 |
67 | @Override
68 | public String toString()
69 | {
70 | return String.format("LAS = %5.2f, UAS = %5.2f", getLAS(), getUAS());
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/morph/MorphologicalAnalyzer.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.morph;
17 |
18 | import edu.emory.mathcs.nlp.common.util.Language;
19 | import edu.emory.mathcs.nlp.component.morph.english.EnglishMorphAnalyzer;
20 | import edu.emory.mathcs.nlp.component.template.NLPComponent;
21 | import edu.emory.mathcs.nlp.component.template.node.AbstractNLPNode;
22 |
23 | import java.util.List;
24 |
25 | /**
26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
27 | */
28 | public class MorphologicalAnalyzer> implements NLPComponent
29 | {
30 | private MorphAnalyzer analyzer;
31 |
32 | public MorphologicalAnalyzer(Language language)
33 | {
34 | analyzer = new EnglishMorphAnalyzer();
35 | }
36 |
37 | @Override
38 | public void process(N[] nodes)
39 | {
40 | N node;
41 |
42 | for (int i=1; i document)
51 | {
52 | for (N[] nodes : document)
53 | process(nodes);
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/gridsearch/LinearFunction.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.gridsearch;
17 |
18 | /**
19 | * @author Amit_Deshmane
20 | *
21 | */
22 | public class LinearFunction implements GridFunction {
23 |
24 | /**
25 | *
26 | */
27 | public float min;
28 | public float max;
29 | public int steps;
30 | public int index = -1;
31 | public int markIndex = -1;
32 |
33 | public LinearFunction(float min, float max, int steps) {
34 | this.min = min;
35 | this.max = max;
36 | this.steps = steps;
37 | }
38 |
39 | public float getVal() {
40 | return min + index * (max - min)/steps;
41 | }
42 |
43 | public void reset() {
44 | index = 0;
45 | }
46 |
47 | public boolean previous() {
48 | index--;
49 | if(getVal() < min || getVal() > max){
50 | return false;
51 | }
52 | else return true;
53 | }
54 |
55 | public boolean next() {
56 | index++;
57 | if(getVal() < min || getVal() > max){
58 | return false;
59 | }
60 | else return true;
61 | }
62 |
63 | @Override
64 | public void mark(){
65 | markIndex = index;
66 | }
67 |
68 | @Override
69 | public void resetToMark(){
70 | index = markIndex;
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/optimization/reguralization/Regularizer.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.optimization.reguralization;
17 |
18 | import edu.emory.mathcs.nlp.learning.util.WeightVector;
19 |
20 | import java.io.Serializable;
21 |
22 | /**
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | public abstract class Regularizer implements Serializable
26 | {
27 | private static final long serialVersionUID = 608089379202097302L;
28 | protected float rate;
29 |
30 | public Regularizer(float rate)
31 | {
32 | setRate(rate);
33 | }
34 |
35 | public float getRate()
36 | {
37 | return rate;
38 | }
39 |
40 | public void setRate(float rate)
41 | {
42 | this.rate = rate;
43 | }
44 |
45 | public abstract void setWeightVector(WeightVector vector);
46 |
47 | /** Expands the dimension of necessary vectors with respect to the weight vector. */
48 | public abstract void expand(int sparseFeatureSize, int denseFeatureSize, int labelSize);
49 |
50 | /** Updates the index'th weight of the weight vector with respect to the regularization. */
51 | public abstract void updateWeight(int index, float gradient, float learningRate, int steps, boolean sparse);
52 | }
53 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/normalization/CustomFunction.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.normalization;
17 |
18 | /**
19 | * @author amit-deshmane
20 | *
21 | * Jasper's Normalization
22 | * Normalize input. New value is sum of entries divided by sum over all
23 | * values. Adds smallest value to scores if it is negative.
24 | */
25 | public class CustomFunction implements NormalizationFunction {
26 |
27 | private static final long serialVersionUID = 3113580872545506521L;
28 |
29 | public CustomFunction() {
30 | }
31 |
32 | /* (non-Javadoc)
33 | * @see edu.emory.mathcs.nlp.learning.normalization.NormalizationFunction#apply(float[])
34 | */
35 | @Override
36 | public void apply(float[] scores) {
37 | float sum = 0;
38 | float minVal = Float.MAX_VALUE;
39 | for (float tempScore : scores) {
40 | if(tempScore < minVal){
41 | minVal = tempScore;
42 | }
43 | }
44 | if(minVal>0){
45 | minVal=0;
46 | }
47 | for (float tempScore : scores) {
48 | sum += tempScore - minVal;
49 | }
50 | if (sum == 0) {
51 | sum = 1;
52 | }
53 | for (int i =0; i < scores.length; i ++) {
54 | scores[i] = (scores[i]-minVal) / sum;
55 | }
56 | }
57 |
58 | }
59 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/verbnet/VNFrame.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.verbnet;
17 |
18 | import java.io.Serializable;
19 |
20 | import org.w3c.dom.Element;
21 |
22 | import edu.emory.mathcs.nlp.common.util.XMLUtils;
23 |
24 | /**
25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
26 | */
27 | public class VNFrame implements Serializable
28 | {
29 | private static final long serialVersionUID = 1907495757606414993L;
30 |
31 | private VNSyntax v_syntax;
32 | private VNSemantics v_semantics;
33 |
34 | public VNFrame(Element eFrame)
35 | {
36 | init(eFrame);
37 | }
38 |
39 | private void init(Element eFrame)
40 | {
41 | setSyntax(new VNSyntax(XMLUtils.getFirstElementByTagName(eFrame, VNXml.E_SYNTAX)));
42 | setSemantics(new VNSemantics(XMLUtils.getFirstElementByTagName(eFrame, VNXml.E_SEMANTICS)));
43 | }
44 |
45 | public VNSyntax getSyntax()
46 | {
47 | return v_syntax;
48 | }
49 |
50 | public VNSemantics getSemantics()
51 | {
52 | return v_semantics;
53 | }
54 |
55 | public void setSyntax(VNSyntax syntax)
56 | {
57 | v_syntax = syntax;
58 | }
59 |
60 | public void setSemantics(VNSemantics semantics)
61 | {
62 | v_semantics = semantics;
63 | }
64 | }
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/activation/SigmoidFunction.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.activation;
17 |
18 | import edu.emory.mathcs.nlp.common.util.Sigmoid;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class SigmoidFunction implements ActivationFunction
24 | {
25 | private static final long serialVersionUID = 242731926367876732L;
26 | private Sigmoid table;
27 |
28 | /** Calls {@link #SigmoidFunction(int, float, float)}, where size = 3500, floor = -6, ceiling = 6. */
29 | public SigmoidFunction()
30 | {
31 | table = new Sigmoid();
32 | }
33 |
34 | /**
35 | * @param size the size of the sigmoid table (10,000 being the highest recommendation).
36 | * @param floor the lower convergence bound.
37 | * @param ceiling the upper convergence bound.
38 | */
39 | public SigmoidFunction(int size, float floor, float ceiling)
40 | {
41 | table = new Sigmoid(size, floor, ceiling);
42 | }
43 |
44 | @Override
45 | public void apply(float[] scores)
46 | {
47 | for (int i=0; i suffix_matchers;
29 |
30 | public EnglishDerivation(List affixMatchers)
31 | {
32 | init(affixMatchers);
33 | }
34 |
35 | private void init(List affixMatchers)
36 | {
37 | suffix_matchers = affixMatchers;
38 |
39 | if (suffix_matchers == null)
40 | throw new IllegalArgumentException("The suffix matcher list must not be null.");
41 | }
42 |
43 | public List getSuffixMatchers()
44 | {
45 | return suffix_matchers;
46 | }
47 |
48 | public String getBaseForm(String lemma, Set baseSet)
49 | {
50 | String base;
51 |
52 | for (AbstractAffixMatcher matcher : suffix_matchers)
53 | {
54 | base = matcher.getBaseForm(baseSet, lemma);
55 | if (base != null) return base;
56 | }
57 |
58 | return null;
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/api/src/test/resources/constituent/functionTags.parse:
--------------------------------------------------------------------------------
1 | (TOP (S (S (NP-SBJ (CC both)
2 | (NNP Bush)
3 | (CC and)
4 | (NNP Rice)))
5 | (VP (VBP have)
6 | (VP (VBN delivered)
7 | (NP (NP (NNS speeches))
8 | (, ,)
9 | (SBAR (WHNP-1 (WDT which))
10 | (S (NP-SBJ (-NONE- *T*-1))
11 | (VP (VBP are)
12 | (ADJP-PRD (RB very)
13 | (JJ clear))))))))))
14 |
15 | (TOP (S (NP-SBJ-1 (NNP Mr.)
16 | (NNP Clinton))
17 | (VP (VBD was)
18 | (VP (VBN joined)
19 | (NP (-NONE- *-1))
20 | (PP (IN by)
21 | (NP-LGS (JJ several)
22 | (JJ key)
23 | (NN republican)
24 | (NNS leaders)))))
25 | (. .)))
26 |
27 | (TOP (SBARQ (WHNP-1 (WP Who))
28 | (SQ-CLF (VBZ is)
29 | (NP-SBJ (PRP it))
30 | (NP-PRD (-NONE- *T*-1))
31 | (SBAR (WHNP-2 (WDT that))
32 | (S (NP-SBJ-3 (-NONE- *T*-2))
33 | (NP-TMP (NN today))
34 | (VP (VBZ wants)
35 | (S (NP-SBJ (-NONE- *PRO*-3))
36 | (VP (TO to)
37 | (VP (VB blow)
38 | (NP (NNS things))
39 | (PRT (RP up))
40 | (PP-LOC (IN in)
41 | (NP (NNP Lebanon)))))))))
42 | (, ,)
43 | (NP-VOC (NNP Doctor)))
44 | (. ?)))
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/pos/POSTagger.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.pos;
17 |
18 | import edu.emory.mathcs.nlp.component.template.OnlineComponent;
19 | import edu.emory.mathcs.nlp.component.template.eval.AccuracyEval;
20 | import edu.emory.mathcs.nlp.component.template.eval.Eval;
21 | import edu.emory.mathcs.nlp.component.template.node.AbstractNLPNode;
22 |
23 | import java.io.InputStream;
24 | import java.util.List;
25 |
26 | /**
27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
28 | */
29 | public class POSTagger> extends OnlineComponent>
30 | {
31 | private static final long serialVersionUID = -7926217238116337203L;
32 |
33 | public POSTagger() {super(false);}
34 |
35 | public POSTagger(InputStream configuration)
36 | {
37 | super(false, configuration);
38 | }
39 |
40 | @Override
41 | protected POSState initState(N[] nodes)
42 | {
43 | return new POSState<>(nodes);
44 | }
45 |
46 | @Override
47 | public Eval createEvaluator()
48 | {
49 | return new AccuracyEval();
50 | }
51 |
52 | @Override
53 | protected POSState initState(List document) {return null;}
54 |
55 | @Override
56 | protected void postProcess(POSState state) {}
57 | }
58 |
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/conversion/util/HeadRuleTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.conversion.util;
17 |
18 | import static org.junit.Assert.assertEquals;
19 | import static org.junit.Assert.assertFalse;
20 | import static org.junit.Assert.assertTrue;
21 |
22 | import org.junit.Test;
23 |
24 | import edu.emory.mathcs.nlp.common.constituent.CTNode;
25 | import edu.emory.mathcs.nlp.conversion.util.HeadRule;
26 | import edu.emory.mathcs.nlp.conversion.util.HeadTagSet;
27 |
28 |
29 | /** @author Jinho D. Choi ({@code jinho.choi@emory.edu}) */
30 | public class HeadRuleTest
31 | {
32 | @Test
33 | public void testHeadRule()
34 | {
35 | String tags = "NN.*|NP;VB.*|VP";
36 | HeadRule rule = new HeadRule(HeadRule.DIR_LEFT_TO_RIGHT, tags);
37 | CTNode node1 = new CTNode("NNS", null);
38 | CTNode node2 = new CTNode("VBN", null);
39 |
40 | assertFalse(rule.isRightToLeft());
41 |
42 | HeadTagSet[] headTags = rule.getHeadTags();
43 |
44 | HeadTagSet headTag = headTags[0];
45 | assertTrue(headTag.matches(node1));
46 | assertFalse(headTag.matches(node2));
47 |
48 | headTag = headTags[1];
49 | assertFalse(headTag.matches(node1));
50 | assertTrue(headTag.matches(node2));
51 |
52 | assertEquals(tags, rule.toString());
53 | }
54 | }
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/learning/util/FeatureMapTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.util;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import org.junit.Test;
21 |
22 | /**
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | public class FeatureMapTest
26 | {
27 | @Test
28 | public void test()
29 | {
30 | FeatureMap map = new FeatureMap();
31 | assertEquals(1, map.size());
32 |
33 | assertEquals(1, map.add(0, "A"));
34 | assertEquals(1, map.add(0, "A"));
35 | assertEquals(2, map.add(0, "B"));
36 | assertEquals(3, map.add(0, "C"));
37 | assertEquals(3, map.add(0, "C"));
38 | assertEquals(4, map.add(1, "A"));
39 | assertEquals(5, map.add(1, "B"));
40 | assertEquals(5, map.add(1, "B"));
41 | assertEquals(6, map.add(1, "C"));
42 | assertEquals(6, map.add(1, "C"));
43 |
44 | assertEquals(1, map.index(0, "A"));
45 | assertEquals(2, map.index(0, "B"));
46 | assertEquals(3, map.index(0, "C"));
47 | assertEquals(4, map.index(1, "A"));
48 | assertEquals(5, map.index(1, "B"));
49 | assertEquals(6, map.index(1, "C"));
50 |
51 | assertEquals(7, map.size());
52 |
53 | assertEquals(-1, map.index(0, "D"));
54 | assertEquals(-1, map.index(2, "A"));
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/learning/optimization/method/Perceptron.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.learning.optimization.method;
17 |
18 | import edu.emory.mathcs.nlp.learning.optimization.StochasticGradientDescent;
19 | import edu.emory.mathcs.nlp.learning.util.Instance;
20 | import edu.emory.mathcs.nlp.learning.util.WeightVector;
21 |
22 | /**
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | public class Perceptron extends StochasticGradientDescent
26 | {
27 | private static final long serialVersionUID = 4996609767585176672L;
28 |
29 | public Perceptron(WeightVector vector, float learningRate, float bias)
30 | {
31 | super(vector, learningRate, bias);
32 | }
33 |
34 | @Override
35 | public void trainAux(Instance instance)
36 | {
37 | trainClassification(instance);
38 | }
39 |
40 | @Override
41 | protected int getPredictedLabel(Instance instance)
42 | {
43 | float[] scores = instance.getScores();
44 | return argmax(scores);
45 | }
46 |
47 | @Override
48 | protected float getLearningRate(int index, boolean sparse)
49 | {
50 | return learning_rate;
51 | }
52 |
53 | @Override
54 | public void updateMiniBatch() {}
55 |
56 | @Override
57 | public String toString()
58 | {
59 | return "Perceptron";
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/api/src/test/resources/constituent/normalize.parse:
--------------------------------------------------------------------------------
1 | ( (S (PP (IN In) (NP (NN order) (S (NP-SBJ (-NONE- *PRO*)) (VP (TO to) (VP (VB determine) (NP (NP (DT the) (NN sequence)) (PP (IN of) (NP (DT the) (JJ entire) (NN transcript))))))))) (, ,) (S (S (NP-SBJ-1=4 (NP (NN RT) (HYPH -) (NN PCR)) (VP (VBG using) (NP (NP (NP (NNS primers)) (PP-LOC (IN in) (NP (NNS exons) (NML (CD 10) (CC and) (CD 11))))) (VP (VBN paired) (NP (-NONE- *)) (PP (IN with) (NP (NP (DT a) (NN primer)) (PP-LOC (IN in) (NP (NN intron) (CD 12))))))))) (VP (VBD was) (VP=3 (VBN performed) (NP-1 (-NONE- *)) (S-MNR (NP-SBJ (-NONE- *PRO*)) (VP (VBG using) (NP (NML (NML (NML (NN BALB) (HYPH /) (NN c)) (NN mouse)) (NN brain)) (JJ total) (NN RNA))))))) (CC and) (S (NP-SBJ-2=4 (DT the) (VBG resulting) (NNS products)) (VP=3 (VBN sequenced) (NP-2 (-NONE- *))))) (. .)) )
2 | ( (S (NP-SBJ (NN Figure) (CD 1)) (VP (VBZ shows) (NP (NP (DT the) (JJ average) (NN IOP)) (PP (IN of) (NP (NP (NP (DT a) (NN number)) (PP (IN of) (NP (JJ inbred) (NN mouse) (NNS strains)))) (SBAR (WHNP-1 (WDT that)) (S (NP-SBJ-1 (-NONE- *T*)) (VP (VBD were) (VP (VBN housed) (NP-1 (-NONE- *)) (PP (IN in) (NP (DT the) (JJ same) (JJ environmental) (NNS conditions))))))))))) (. .)) )
3 | ( (S (S (NP-SBJ (NP (PRP It)) (SBAR-1 (-NONE- *EXP*))) (VP (VBZ is) (VP (VBG becoming) (ADJP-PRD (RB increasingly) (JJ clear)) (SBAR-1 (IN that) (S (NP-SBJ (NP (JJ many) (NNS forms)) (PP (IN of) (NP (NN glaucoma)))) (VP (VBP have) (NP (DT a) (JJ genetic) (NN component))))) (PRN (-LRB- [) (NP (CD 6) (, ,) (CD 7)) (-RRB- ]))))) (, ,) (CC and) (S (NP-SBJ-3 (JJ much) (JJ current) (NN research)) (VP (VBZ is) (VP (VBN focused) (NP-3 (-NONE- *)) (PP (IN on) (S-NOM (NP-SBJ (-NONE- *PRO*)) (VP (VBG identifying) (NP (NP (NP (JJ chromosomal) (NNS regions)) (CC and) (NP (NNS genes))) (SBAR (WHNP-2 (WDT that)) (S (NP-SBJ-2 (-NONE- *T*)) (VP (VBP contribute) (PP (IN to) (NP (NN glaucoma)))))))))) (PRN (-LRB- [) (NP (NP (CD 8)) (PP (SYM -) (NP (CD 10)))) (-RRB- ]))))) (. .)) )
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/common/constituent/CTReaderTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.constituent;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import java.util.ArrayList;
21 | import java.util.List;
22 |
23 | import org.junit.Test;
24 |
25 | import edu.emory.mathcs.nlp.common.util.IOUtils;
26 |
27 |
28 | /**
29 | * @since 3.0.0
30 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
31 | */
32 | public class CTReaderTest
33 | {
34 | @Test
35 | public void testCTReader() throws Exception
36 | {
37 | String filename = "src/test/resources/constituent/constituent.parse";
38 | CTReader reader = new CTReader(IOUtils.createFileInputStream(filename));
39 | CTTree tree;
40 |
41 | StringBuilder build = new StringBuilder();
42 | List trees = new ArrayList<>();
43 | String tmp;
44 |
45 | while ((tree = reader.nextTree()) != null)
46 | {
47 | tmp = tree.toString();
48 | trees.add(tmp);
49 | build.append(tmp);
50 | }
51 |
52 | reader.close();
53 |
54 | reader = new CTReader(IOUtils.createByteArrayInputStream(build.toString()));
55 | int i;
56 |
57 | for (i=0; (tree = reader.nextTree()) != null; i++)
58 | assertEquals(trees.get(i), tree.toString());
59 |
60 | reader.close();
61 | }
62 | }
--------------------------------------------------------------------------------
/cli/src/main/java/edu/emory/mathcs/nlp/zzz/RadiologyDecode.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.zzz;
17 |
18 | import edu.emory.mathcs.nlp.common.util.FileUtils;
19 | import edu.emory.mathcs.nlp.common.util.IOUtils;
20 | import edu.emory.mathcs.nlp.decode.AbstractNLPDecoder;
21 | import edu.emory.mathcs.nlp.decode.NLPDecoder;
22 |
23 | /**
24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
25 | */
26 | public class RadiologyDecode
27 | {
28 | static public void main(String[] args) throws Exception
29 | {
30 | final String configFile = "/Users/jdchoi/Documents/EmoryNLP/nlp4j/src/main/resources/edu/emory/mathcs/nlp/configuration/config-decode-deident.xml";
31 | final String inputDir = "/Users/jdchoi/Desktop/radiology/Q2";
32 | final String inputExt = "txt";
33 | final String outputExt = "tsv";
34 | final String outputFormat = AbstractNLPDecoder.FORMAT_LINE;
35 |
36 | NLPDecoder nlp4j = new NLPDecoder(IOUtils.createFileInputStream(configFile));
37 |
38 | for (String inputFile : FileUtils.getFileList(inputDir, inputExt))
39 | {
40 | System.out.println(inputFile);
41 | String outputFile = inputFile+"."+outputExt;
42 | nlp4j.decode(IOUtils.createFileInputStream(inputFile), IOUtils.createFileOutputStream(outputFile), outputFormat);
43 | }
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/common/util/HashUtils.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.util;
17 |
18 | /**
19 | * @since 3.0.0
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public class HashUtils
23 | {
24 | private static final long FNV_BASIS_64 = 0xcbf29ce484222325L;
25 | private static final long FNV_PRIME_64 = 0x100000001b3L;
26 |
27 | private static final int FNV_BASIS_32 = 0x811c9dc5;
28 | private static final int FNV_PRIME_32 = 0x01000193;
29 |
30 | public static int fnv1aHash32(final String s)
31 | {
32 | return fnv1aHash32(s, FNV_BASIS_32);
33 | }
34 |
35 | public static int fnv1aHash32(final String s, int basis)
36 | {
37 | char[] cs = s.toCharArray();
38 | int i, len = s.length();
39 |
40 | for (i=0; i reader = new NLPReader();
33 | reader.open(IOUtils.createFileInputStream(filename));
34 | NLPNode[] nodes;
35 |
36 | reader.form = 1;
37 | reader.lemma = 2;
38 | reader.pos = 3;
39 | reader.feats = 4;
40 | reader.dhead = 5;
41 | reader.deprel = 6;
42 | reader.sheads = 7;
43 | reader.nament = 8;
44 |
45 | nodes = reader.next();
46 |
47 | for (int i=1; i
24 | {
25 | private static final long serialVersionUID = -8933673050278448784L;
26 | private int index;
27 | private float value;
28 |
29 | public SparseItem(int index)
30 | {
31 | this(index, 1f);
32 | }
33 |
34 | public SparseItem(int index, float value)
35 | {
36 | set(index, value);
37 | }
38 |
39 | public int getIndex()
40 | {
41 | return index;
42 | }
43 |
44 | public float getValue()
45 | {
46 | return value;
47 | }
48 |
49 | public void setIndex(int index)
50 | {
51 | this.index = index;
52 | }
53 |
54 | public void setValue(float value)
55 | {
56 | this.value = value;
57 | }
58 |
59 | public void set(int index, float value)
60 | {
61 | setIndex(index);
62 | setValue(value);
63 | }
64 |
65 | public void set(SparseItem item)
66 | {
67 | set(item.index, item.value);
68 | }
69 |
70 | @Override
71 | public int compareTo(SparseItem o)
72 | {
73 | return index - o.index;
74 | }
75 |
76 | @Override
77 | public String toString()
78 | {
79 | return index+":"+value;
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/it/ItClassifier.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.it;
17 |
18 | import edu.emory.mathcs.nlp.component.template.OnlineComponent;
19 | import edu.emory.mathcs.nlp.component.template.eval.Eval;
20 | import edu.emory.mathcs.nlp.component.template.node.AbstractNLPNode;
21 |
22 | import java.io.InputStream;
23 | import java.util.List;
24 |
25 | /**
26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
27 | */
28 | public class ItClassifier> extends OnlineComponent>
29 | {
30 | private static final long serialVersionUID = 3585863417135590906L;
31 |
32 | public ItClassifier() {super(true);}
33 |
34 | public ItClassifier(InputStream configuration)
35 | {
36 | super(true, configuration);
37 | }
38 |
39 | @Override
40 | protected ItState initState(List document)
41 | {
42 | return new ItState<>(document);
43 | }
44 |
45 | @Override
46 | public void initFeatureTemplate()
47 | {
48 | feature_template = new ItFeatureTemplate<>(config.getFeatureTemplateElement(), getHyperParameter());
49 | }
50 |
51 | @Override
52 | public Eval createEvaluator()
53 | {
54 | return new ItEval(4);
55 | }
56 |
57 | @Override
58 | protected void postProcess(ItState state) {}
59 |
60 | @Override
61 | protected ItState initState(N[] nodes) {return null;}
62 | }
63 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/ner/NERTagger.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.ner;
17 |
18 | import edu.emory.mathcs.nlp.component.template.OnlineComponent;
19 | import edu.emory.mathcs.nlp.component.template.eval.Eval;
20 | import edu.emory.mathcs.nlp.component.template.eval.F1Eval;
21 | import edu.emory.mathcs.nlp.component.template.node.AbstractNLPNode;
22 |
23 | import java.io.InputStream;
24 | import java.util.List;
25 |
26 | /**
27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
28 | */
29 | public class NERTagger> extends OnlineComponent>
30 | {
31 | private static final long serialVersionUID = 87807440372806016L;
32 |
33 | public NERTagger() {super(false);}
34 |
35 | public NERTagger(InputStream configuration)
36 | {
37 | super(false, configuration);
38 | }
39 |
40 | // ============================== ABSTRACT ==============================
41 |
42 | @Override
43 | public Eval createEvaluator()
44 | {
45 | return new F1Eval();
46 | }
47 |
48 | @Override
49 | protected NERState initState(N[] nodes)
50 | {
51 | return new NERState<>(nodes);
52 | }
53 |
54 | @Override
55 | protected void postProcess(NERState state)
56 | {
57 | state.postProcess();
58 | }
59 |
60 | @Override
61 | protected NERState initState(List document) {return null;}
62 | }
63 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/template/feature/Field.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.template.feature;
17 |
18 |
19 | /**
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public enum Field
23 | {
24 | // form features
25 | word_form,
26 | word_form_lowercase,
27 | word_form_undigitalized,
28 | word_form_simplified,
29 | word_form_simplified_lowercase,
30 | word_shape,
31 | word_shape_lowercase,
32 | orthographic,
33 | orthographic_lowercase,
34 | prefix,
35 | suffix,
36 |
37 | // part-of-speech tagging features
38 | lemma,
39 | feats,
40 | part_of_speech_tag,
41 | ambiguity_classes,
42 |
43 | // named entity recognition
44 | named_entity_tag,
45 |
46 | // dependency parsing features
47 | dependency_label,
48 | dependent_set,
49 | distance,
50 | valency,
51 |
52 | // lexica
53 | word_clusters,
54 | word_embedding,
55 | named_entity_gazetteers,
56 |
57 | // boolean
58 | positional,
59 |
60 | // document
61 | bag_of_words,
62 | bag_of_words_norm,
63 | bag_of_words_count,
64 |
65 | bag_of_words_stopwords,
66 | bag_of_words_stopwords_norm,
67 | bag_of_words_stopwords_count,
68 |
69 | bag_of_clusters,
70 | bag_of_clusters_norm,
71 | bag_of_clusters_count,
72 |
73 | bag_of_clusters_stopwords,
74 | bag_of_clusters_stopwords_norm,
75 | bag_of_clusters_stopwords_count,
76 | ;}
77 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/morph/util/AbstractAffixReplacer.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.morph.util;
17 | import java.util.Map;
18 | import java.util.Set;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | abstract public class AbstractAffixReplacer
24 | {
25 | protected String s_basePOS;
26 | protected String s_affixForm;
27 | protected String[] s_replacements;
28 |
29 | public AbstractAffixReplacer(String basePOS, String affixForm, String[] replacements)
30 | {
31 | s_basePOS = basePOS;
32 | s_affixForm = affixForm;
33 | s_replacements = replacements;
34 | }
35 |
36 | public String getBasePOS()
37 | {
38 | return s_basePOS;
39 | }
40 |
41 | /**
42 | * Returns the base morpheme of the word form if exists; otherwise, {@code null}.
43 | * @param form the word-form in lower-case.
44 | * @return the base morpheme of the word form if exists; otherwise, {@code null}.
45 | */
46 | abstract public String getBaseForm(Map> baseMap, String form);
47 |
48 | /**
49 | * Returns the base morpheme of the word form if exists; otherwise, {@code null}.
50 | * @param form the word-form in lower-case.
51 | * @return the base morpheme of the word form if exists; otherwise, {@code null}.
52 | */
53 | abstract public String getBaseForm(Set baseSet, String form);
54 | }
55 |
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/common/propbank/PBInstanceTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.propbank;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import org.junit.Test;
21 |
22 | /**
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | public class PBInstanceTest
26 | {
27 | @Test
28 | public void test()
29 | {
30 | String gold = "wsj_2100.parse 8 20 gold get-v get.04 ----- 21:2-ARG1 20:0-rel 18:0-ARG0 17:1-ARGM-MNR 18:0*11:1-LINK-PCR 17:1*15:1-LINK-SLC";
31 | PBInstance instance = new PBInstance(gold);
32 |
33 | assertEquals("20:0-rel", instance.getArgument(1).toString());
34 |
35 | gold = "wsj_2100.parse 8 20 gold get-v get.04 ----- 11:1*18:0-LINK-PCR 15:1*17:1-LINK-SLC 17:1-ARGM-MNR 18:0-ARG0 20:0-rel 21:2-ARG1";
36 | instance.sortArguments();
37 | assertEquals(gold, instance.toString());
38 | assertEquals(instance.getArgument(3), instance.getFirstArgument("ARG0"));
39 |
40 | gold = "wsj_2100.parse 8 20 gold get-v get.04 ----- 11:1*18:0-LINK-PCR 15:1*17:1-LINK-SLC 17:1-ARGM-MNR 20:0-rel 21:2-ARG1";
41 | instance.removeArguments("ARG0");
42 | assertEquals(gold, instance.toString());
43 |
44 | gold = "wsj_2100.parse 8 20 gold get-v get.04 ----- 11:1*18:0-LINK-PCR 15:1*17:1-LINK-SLC 17:1-ARGM-MNR 20:0-rel 21:2-ARG1";
45 | instance.removeArguments("ARG0");
46 | assertEquals(gold, instance.toString());
47 | }
48 | }
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/util/SplitterTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.util;
17 |
18 | import edu.emory.mathcs.nlp.common.util.Splitter;
19 | import org.junit.Test;
20 |
21 | import java.util.regex.Pattern;
22 |
23 | import static org.junit.Assert.assertEquals;
24 |
25 | /**
26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
27 | */
28 | public class SplitterTest
29 | {
30 | @Test
31 | public void testSplitIncludingMatches()
32 | {
33 | Pattern pd = Pattern.compile("\\d+");
34 | Pattern pa = Pattern.compile("\\p{Lower}+");
35 | String s, t;
36 |
37 | s = "ab12cd34ef56gh";
38 | t = "[ab, 12, cd, 34, ef, 56, gh]";
39 | assertEquals(t, Splitter.splitIncludingMatches(pd, s).toString());
40 | assertEquals(t, Splitter.splitIncludingMatches(pa, s).toString());
41 |
42 | s = "12cd34ef56";
43 | t = "[12, cd, 34, ef, 56]";
44 | assertEquals(t, Splitter.splitIncludingMatches(pd, s).toString());
45 | assertEquals(t, Splitter.splitIncludingMatches(pa, s).toString());
46 |
47 | s = "1234";
48 | t = "[1234]";
49 | assertEquals(t, Splitter.splitIncludingMatches(pd, s).toString());
50 | assertEquals(t, Splitter.splitIncludingMatches(pa, s).toString());
51 |
52 | s = "abcd";
53 | t = "[abcd]";
54 | assertEquals(t, Splitter.splitIncludingMatches(pd, s).toString());
55 | assertEquals(t, Splitter.splitIncludingMatches(pa, s).toString());
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/api/src/main/java/edu/emory/mathcs/nlp/component/template/train/LOLS.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.component.template.train;
17 |
18 | import edu.emory.mathcs.nlp.common.random.XORShiftRandom;
19 |
20 | import java.util.Random;
21 |
22 |
23 | /**
24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
25 | */
26 | public class LOLS
27 | {
28 | private int fixed_stage;
29 | private double decaying_rate;
30 | private double gold_probability;
31 | private Random random;
32 |
33 | public LOLS(int fixedStage, double decayingRate)
34 | {
35 | init(fixedStage, decayingRate);
36 | }
37 |
38 | private void init(int fixedStage, double decayingRate)
39 | {
40 | fixed_stage = fixedStage;
41 | decaying_rate = decayingRate;
42 | gold_probability = 1d;
43 | random = new XORShiftRandom(9);
44 | }
45 |
46 | public void updateGoldProbability()
47 | {
48 | if (fixed_stage <= 0)
49 | gold_probability *= decaying_rate;
50 | else
51 | fixed_stage--;
52 | }
53 |
54 | public double getGoldProbability()
55 | {
56 | return gold_probability;
57 | }
58 |
59 | public boolean chooseGold()
60 | {
61 | return (gold_probability > 0) && (gold_probability >= 1 || gold_probability > random.nextDouble());
62 | }
63 |
64 | @Override
65 | public String toString()
66 | {
67 | return String.format("LOLS: fixed = %d, decaying rate = %s", fixed_stage, decaying_rate);
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/api/src/test/java/edu/emory/mathcs/nlp/common/util/SplitterTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.mathcs.nlp.common.util;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import java.util.regex.Pattern;
21 |
22 | import org.junit.Test;
23 |
24 | import edu.emory.mathcs.nlp.common.util.Splitter;
25 |
26 | /**
27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
28 | */
29 | public class SplitterTest
30 | {
31 | @Test
32 | public void testSplitIncludingMatches()
33 | {
34 | Pattern pd = Pattern.compile("\\d+");
35 | Pattern pa = Pattern.compile("\\p{Lower}+");
36 | String s, t;
37 |
38 | s = "ab12cd34ef56gh";
39 | t = "[ab, 12, cd, 34, ef, 56, gh]";
40 | assertEquals(t, Splitter.splitIncludingMatches(pd, s).toString());
41 | assertEquals(t, Splitter.splitIncludingMatches(pa, s).toString());
42 |
43 | s = "12cd34ef56";
44 | t = "[12, cd, 34, ef, 56]";
45 | assertEquals(t, Splitter.splitIncludingMatches(pd, s).toString());
46 | assertEquals(t, Splitter.splitIncludingMatches(pa, s).toString());
47 |
48 | s = "1234";
49 | t = "[1234]";
50 | assertEquals(t, Splitter.splitIncludingMatches(pd, s).toString());
51 | assertEquals(t, Splitter.splitIncludingMatches(pa, s).toString());
52 |
53 | s = "abcd";
54 | t = "[abcd]";
55 | assertEquals(t, Splitter.splitIncludingMatches(pd, s).toString());
56 | assertEquals(t, Splitter.splitIncludingMatches(pa, s).toString());
57 | }
58 | }
59 |
--------------------------------------------------------------------------------