├── api └── src │ ├── test │ ├── resources │ │ ├── a │ │ │ └── test │ │ │ │ └── some.txt │ │ ├── dat │ │ │ ├── nlp4j.txt │ │ │ └── nlp4j.txt.nlp │ │ ├── emorynlp-line.txt │ │ ├── emorynlp-raw.txt │ │ ├── propbank │ │ │ ├── wsj_0001.prop │ │ │ ├── wsj.prop │ │ │ ├── sample.prop │ │ │ ├── wsj_0001.parse │ │ │ └── wsj_0002.parse │ │ ├── emorynlp-raw.txt.tok │ │ ├── log4j.properties │ │ ├── decoder-test-config.xml │ │ └── constituent │ │ │ ├── functionTags.parse │ │ │ └── normalize.parse │ └── java │ │ └── edu │ │ └── emory │ │ └── mathcs │ │ └── nlp │ │ ├── common │ │ ├── verbnet │ │ │ └── VNTagTest.java │ │ ├── util │ │ │ ├── MathUtilsTest.java │ │ │ ├── CharUtilsTest.java │ │ │ ├── FileUtilsTest.java │ │ │ ├── CharTokenizerTest.java │ │ │ └── SplitterTest.java │ │ ├── propbank │ │ │ ├── PBLocationTest.java │ │ │ └── PBInstanceTest.java │ │ ├── collection │ │ │ └── ngram │ │ │ │ └── BigramTest.java │ │ └── constituent │ │ │ └── CTReaderTest.java │ │ ├── component │ │ ├── tokenizer │ │ │ └── dictionary │ │ │ │ ├── AbbreviationTest.java │ │ │ │ ├── CompoundTest.java │ │ │ │ ├── UnitTest.java │ │ │ │ ├── EmoticonTest.java │ │ │ │ ├── EnglishApostropheTest.java │ │ │ │ ├── CurrencyTest.java │ │ │ │ ├── HtmlTest.java │ │ │ │ └── DTHyphenTest.java │ │ └── template │ │ │ └── util │ │ │ └── TSVReaderTest.java │ │ ├── util │ │ ├── MathUtilsTest.java │ │ ├── CharUtilsTest.java │ │ ├── FileUtilsTest.java │ │ ├── CharTokenizerTest.java │ │ └── SplitterTest.java │ │ ├── learning │ │ ├── gridsearch │ │ │ └── GridFunctionTest.java │ │ └── util │ │ │ ├── LabelMapTest.java │ │ │ ├── FeatureVectorTest.java │ │ │ └── FeatureMapTest.java │ │ └── conversion │ │ └── util │ │ ├── HeadRuleMapTest.java │ │ ├── HeadTagSetTest.java │ │ └── HeadRuleTest.java │ └── main │ ├── resources │ └── edu │ │ └── emory │ │ └── mathcs │ │ └── nlp │ │ ├── component │ │ ├── tokenizer │ │ │ └── dictionary │ │ │ │ ├── preserve.txt │ │ │ │ ├── currency-dollar.txt │ │ │ │ ├── currency.txt │ │ │ │ ├── units.txt │ │ │ │ ├── english-hyphen-suffix.txt │ │ │ │ ├── english-compounds.txt │ │ │ │ ├── html-tags.txt │ │ │ │ ├── abbreviation-period.txt │ │ │ │ └── english-hyphen-prefix.txt │ │ └── morph │ │ │ └── english │ │ │ ├── abbreviation.rule │ │ │ ├── adverb.exc │ │ │ ├── cardinal.base │ │ │ ├── ordinal.base │ │ │ └── adjective.exc │ │ ├── configuration │ │ ├── config-decode-pos.xml │ │ ├── config-decode-en.xml │ │ ├── config-train-sample.xml │ │ ├── config-train-sample-optimized.xml │ │ └── config-train-doc.xml │ │ └── conversion │ │ ├── headrule_en_stanford.txt │ │ └── headrule_en_conll.txt │ └── java │ └── edu │ └── emory │ └── mathcs │ └── nlp │ ├── learning │ ├── activation │ │ ├── IdentityFunction.java │ │ ├── HyperbolicTanFunction.java │ │ ├── SoftplusFunction.java │ │ ├── RectifiedLinearUnitFunction.java │ │ ├── ActivationFunction.java │ │ ├── SoftmaxFunction.java │ │ └── SigmoidFunction.java │ ├── normalization │ │ ├── NormalizationEnum.java │ │ ├── NormalizationFunction.java │ │ ├── SoftmaxSmoothedFunction.java │ │ ├── SigmoidFunction.java │ │ ├── SoftmaxFunction.java │ │ └── CustomFunction.java │ ├── initialization │ │ ├── WeightGenerator.java │ │ └── RandomWeightGenerator.java │ ├── util │ │ ├── Prediction.java │ │ ├── SparsePrediction.java │ │ ├── StringPrediction.java │ │ └── SparseItem.java │ ├── gridsearch │ │ ├── GridFunction.java │ │ └── LinearFunction.java │ └── optimization │ │ ├── reguralization │ │ └── Regularizer.java │ │ └── method │ │ └── Perceptron.java │ ├── component │ ├── template │ │ ├── eval │ │ │ ├── Eval.java │ │ │ └── AccuracyEval.java │ │ ├── util │ │ │ ├── NLPFlag.java │ │ │ └── NLPMode.java │ │ ├── feature │ │ │ ├── Direction.java │ │ │ ├── Source.java │ │ │ ├── Relation.java │ │ │ └── Field.java │ │ ├── NLPComponent.java │ │ ├── node │ │ │ └── Orthographic.java │ │ ├── reader │ │ │ └── NLPReader.java │ │ ├── lexicon │ │ │ └── GlobalLexicon.java │ │ └── train │ │ │ └── LOLS.java │ ├── morph │ │ ├── MorphAnalyzer.java │ │ ├── MorphologicalAnalyzer.java │ │ ├── english │ │ │ └── EnglishDerivation.java │ │ └── util │ │ │ └── AbstractAffixReplacer.java │ ├── tokenizer │ │ ├── dictionary │ │ │ ├── Dictionary.java │ │ │ └── Abbreviation.java │ │ └── token │ │ │ └── TokenIndex.java │ ├── dep │ │ ├── DEPArc.java │ │ └── DEPEval.java │ ├── pos │ │ ├── POSState.java │ │ └── POSTagger.java │ ├── it │ │ └── ItClassifier.java │ └── ner │ │ └── NERTagger.java │ ├── common │ ├── constant │ │ ├── MetaConst.java │ │ └── CharConst.java │ ├── treebank │ │ ├── CTTag.java │ │ └── PBArc.java │ ├── util │ │ ├── Language.java │ │ ├── ObjectSizeFetcher.java │ │ ├── FastUtils.java │ │ ├── FileExtensionFilter.java │ │ └── HashUtils.java │ ├── verbnet │ │ ├── VNMap.java │ │ ├── VNXml.java │ │ └── VNFrame.java │ ├── propbank │ │ └── frameset │ │ │ ├── PBFType.java │ │ │ └── PBFXml.java │ ├── collection │ │ ├── tuple │ │ │ ├── CharIntPair.java │ │ │ ├── IntIntPair.java │ │ │ ├── BooleanIntPair.java │ │ │ ├── DoubleIntPair.java │ │ │ ├── CharCharPair.java │ │ │ ├── ObjectBooleanPair.java │ │ │ ├── Triple.java │ │ │ ├── Pair.java │ │ │ ├── DoubleIntIntTriple.java │ │ │ ├── BooleanIntIntTriple.java │ │ │ ├── ObjectCharPair.java │ │ │ ├── ObjectIntIntTriple.java │ │ │ ├── ObjectIntPair.java │ │ │ ├── ObjectFloatPair.java │ │ │ └── ObjectDoublePair.java │ │ └── tree │ │ │ └── PrefixNode.java │ └── random │ │ └── XORShiftRandom.java │ ├── decode │ └── NLPDecoder.java │ └── zzz │ ├── WordVector.java │ └── Tmp.java ├── .gitignore ├── README.md ├── cli └── src │ ├── main │ ├── config │ │ └── log4j.properties │ └── java │ │ └── edu │ │ └── emory │ │ └── mathcs │ │ └── nlp │ │ ├── bin │ │ ├── Version.java │ │ ├── util │ │ │ └── BinUtils.java │ │ └── NLPDemo.java │ │ └── zzz │ │ └── RadiologyDecode.java │ └── assembly │ └── bin.xml └── LICENSE.txt /api/src/test/resources/a/test/some.txt: -------------------------------------------------------------------------------- 1 | This is the cereal shot from guns. 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | *~ 3 | *.iml 4 | .idea 5 | /bin/ 6 | .settings 7 | */.settings 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | NLP4J webpage: [https://emorynlp.github.io/nlp4j](https://emorynlp.github.io/nlp4j/) -------------------------------------------------------------------------------- /api/src/main/resources/edu/emory/mathcs/nlp/component/tokenizer/dictionary/preserve.txt: -------------------------------------------------------------------------------- 1 | w/o 2 | W/O 3 | 's 4 | 'cause -------------------------------------------------------------------------------- /api/src/main/resources/edu/emory/mathcs/nlp/component/morph/english/abbreviation.rule: -------------------------------------------------------------------------------- 1 | n't RB not 2 | 'nt RB not 3 | 'd MD would 4 | 'll MD will 5 | ca MD can 6 | i PRP I 7 | na TO to -------------------------------------------------------------------------------- /api/src/test/resources/dat/nlp4j.txt: -------------------------------------------------------------------------------- 1 | The NLP4J project provides a NLP toolkit for JVM languages. This project is under the Apache 2 license and is currently developed by the NLP Research Group at Emory University. -------------------------------------------------------------------------------- /api/src/main/resources/edu/emory/mathcs/nlp/component/tokenizer/dictionary/currency-dollar.txt: -------------------------------------------------------------------------------- 1 | au 2 | b 3 | bb 4 | bm 5 | bn 6 | bs 7 | bz 8 | c 9 | ca 10 | fj 11 | hk 12 | jm 13 | jy 14 | ky 15 | lr 16 | na 17 | nt 18 | nz 19 | sb 20 | sg 21 | us 22 | usd 23 | xc 24 | zb -------------------------------------------------------------------------------- /api/src/main/resources/edu/emory/mathcs/nlp/component/tokenizer/dictionary/currency.txt: -------------------------------------------------------------------------------- 1 | ad 2 | aud 3 | cad 4 | chf 5 | cny 6 | eur 7 | ffr 8 | gbp 9 | gmt 10 | hkd 11 | jpy 12 | kpw 13 | mxn 14 | nzd 15 | rmb 16 | rub 17 | sek 18 | sgd 19 | skr 20 | try 21 | usd 22 | usd -------------------------------------------------------------------------------- /api/src/main/resources/edu/emory/mathcs/nlp/component/morph/english/adverb.exc: -------------------------------------------------------------------------------- 1 | best well 2 | better well 3 | farther far 4 | farthest far 5 | foremost foremost 6 | further far 7 | furthest far 8 | least least 9 | less less 10 | more more 11 | more-or-less more-or-less 12 | most most 13 | worse bad 14 | worst bad 15 | -------------------------------------------------------------------------------- /api/src/test/resources/emorynlp-line.txt: -------------------------------------------------------------------------------- 1 | The Emory NLP project provides software and resources for natural language processing. 2 | It is developed by the NLP Research Group at Emory University. 3 | Please join our discussion group if you want to get notifications about new updates or post issues, suggestions, questions, etc. 4 | -------------------------------------------------------------------------------- /api/src/test/resources/emorynlp-raw.txt: -------------------------------------------------------------------------------- 1 | The Emory NLP project provides software and resources for natural language processing. It 2 | is developed by the NLP Research Group at Emory University. Please join our discussion 3 | group if you want to get notifications about new updates or post issues, suggestions, questions, etc. 4 | -------------------------------------------------------------------------------- /api/src/test/resources/propbank/wsj_0001.prop: -------------------------------------------------------------------------------- 1 | propbank/wsj_0001.parse 0 8 gold join-v join.01 ----- 0:2-ARG0 7:0-ARGM-MOD 8:0-rel 9:1-ARG1 11:1-ARGM-PRD 15:1-ARGM-TMP 2 | propbank/wsj_0001.parse 1 2 gold be-v be.01 ----- 0:1-ARG1 2:0-rel 3:2-ARG2 3 | propbank/wsj_0001.parse 1 10 gold publish-v publish.01 ----- 10:0-rel 11:0-ARG0 4 | -------------------------------------------------------------------------------- /api/src/test/resources/emorynlp-raw.txt.tok: -------------------------------------------------------------------------------- 1 | The Emory NLP project provides software and resources for natural language processing . 2 | It is developed by the NLP Research Group at Emory University . 3 | Please join our discussion group if you want to get notifications about new updates or post issues , suggestions , questions , etc. 4 | -------------------------------------------------------------------------------- /api/src/main/resources/edu/emory/mathcs/nlp/component/tokenizer/dictionary/units.txt: -------------------------------------------------------------------------------- 1 | ag 2 | am 3 | cg 4 | ch 5 | cm 6 | cwt 7 | d 8 | dg 9 | dm 10 | drc 11 | fg 12 | fm 13 | ft 14 | fur 15 | gr 16 | h 17 | in 18 | kg 19 | km 20 | lb 21 | lea 22 | m 23 | mg 24 | mi 25 | mm 26 | ms 27 | ng 28 | nm 29 | oz 30 | pg 31 | pm 32 | qtr 33 | st 34 | yd 35 | yg 36 | ym 37 | zg 38 | zm -------------------------------------------------------------------------------- /api/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set root logger level to DEBUG and its only appender to A1. 2 | log4j.rootLogger=INFO, A1 3 | 4 | # A1 is set to be a ConsoleAppender. 5 | log4j.appender.A1=org.apache.log4j.ConsoleAppender 6 | 7 | # A1 uses PatternLayout. 8 | log4j.appender.A1.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.A1.layout.conversionPattern=%m%n 10 | -------------------------------------------------------------------------------- /cli/src/main/config/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set root logger level to DEBUG and its only appender to A1. 2 | log4j.rootLogger=INFO, A1 3 | 4 | # A1 is set to be a ConsoleAppender. 5 | log4j.appender.A1=org.apache.log4j.ConsoleAppender 6 | 7 | # A1 uses PatternLayout. 8 | log4j.appender.A1.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.A1.layout.conversionPattern=%m%n 10 | -------------------------------------------------------------------------------- /api/src/test/resources/propbank/wsj.prop: -------------------------------------------------------------------------------- 1 | propbank/wsj_0001.parse 0 8 gold join-v join.01 ----- 0:2-ARG0 7:0-ARGM-MOD 8:0-rel 9:1-ARG1 11:1-ARGM-PRD 15:1-ARGM-TMP 2 | propbank/wsj_0001.parse 1 2 gold be-v be.01 ----- 0:1-ARG1 2:0-rel 3:2-ARG2 3 | propbank/wsj_0001.parse 1 10 gold publish-v publish.01 ----- 10:0-rel 11:0-ARG0 4 | propbank/wsj_0002.parse 0 16 gold name-v name.01 ----- 0:2*17:1-ARG1 16:0-rel 18:2-ARG2 5 | -------------------------------------------------------------------------------- /api/src/main/resources/edu/emory/mathcs/nlp/component/tokenizer/dictionary/english-hyphen-suffix.txt: -------------------------------------------------------------------------------- 1 | able 2 | ahol 3 | aholic 4 | ation 5 | centric 6 | cracy 7 | crat 8 | dom 9 | er 10 | ery 11 | esque 12 | ette 13 | fest 14 | fi 15 | fold 16 | ful 17 | gate 18 | gon 19 | hood 20 | ian 21 | ible 22 | ing 23 | isation 24 | ise 25 | ising 26 | ism 27 | ist 28 | itis 29 | ization 30 | ize 31 | izing 32 | less 33 | logist 34 | logy 35 | ly 36 | most 37 | rama 38 | wise -------------------------------------------------------------------------------- /api/src/main/resources/edu/emory/mathcs/nlp/component/morph/english/cardinal.base: -------------------------------------------------------------------------------- 1 | zero 2 | one 3 | two 4 | three 5 | four 6 | five 7 | six 8 | seven 9 | eight 10 | nine 11 | ten 12 | eleven 13 | twelve 14 | thirteen 15 | fourteen 16 | fifteen 17 | sixteen 18 | seventeen 19 | eighteen 20 | nineteen 21 | twenty 22 | thirty 23 | forty 24 | fifty 25 | sixty 26 | seventy 27 | eighty 28 | ninety 29 | hundred 30 | thousand 31 | million 32 | billion 33 | trillion 34 | quadrillion 35 | quintillion 36 | sextillion 37 | septillion 38 | octillion 39 | -------------------------------------------------------------------------------- /api/src/main/resources/edu/emory/mathcs/nlp/component/morph/english/ordinal.base: -------------------------------------------------------------------------------- 1 | zeroth 2 | first 3 | second 4 | third 5 | fourth 6 | fifth 7 | sixth 8 | seventh 9 | eighth 10 | ninth 11 | tenth 12 | eleventh 13 | twelfth 14 | thirteenth 15 | fourteenth 16 | fifteenth 17 | sixteenth 18 | seventeenth 19 | eighteenth 20 | nineteenth 21 | twentieth 22 | thirtieth 23 | fortieth 24 | fiftieth 25 | sixtieth 26 | seventieth 27 | eightieth 28 | ninetieth 29 | hundredth 30 | thousandth 31 | millionth 32 | billionth 33 | trillionth 34 | quadrillionth 35 | quintillionth 36 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2015, Emory University 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. -------------------------------------------------------------------------------- /api/src/main/resources/edu/emory/mathcs/nlp/configuration/config-decode-pos.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | edu/emory/mathcs/nlp/lexica/en-ambiguity-classes-simplified-lowercase.xz 8 | edu/emory/mathcs/nlp/lexica/en-brown-clusters-simplified-lowercase.xz 9 | 10 | 11 | 12 | edu/emory/mathcs/nlp/models/en-pos.xz 13 | 14 | 15 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/activation/IdentityFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package edu.emory.mathcs.nlp.learning.activation; 5 | 6 | /** 7 | * @author amit-deshmane 8 | * 9 | */ 10 | public class IdentityFunction implements ActivationFunction { 11 | 12 | private static final long serialVersionUID = 797900453250163148L; 13 | 14 | public IdentityFunction() { 15 | } 16 | 17 | /* (non-Javadoc) 18 | * @see edu.emory.mathcs.nlp.learning.activation.ActivationFunction#apply(float[]) 19 | */ 20 | @Override 21 | public void apply(float[] scores) { 22 | return; 23 | 24 | } 25 | @Override 26 | public String toString() 27 | { 28 | return "Identity"; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /api/src/test/resources/decoder-test-config.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | edu/emory/mathcs/nlp/lexica/en-ambiguity-classes-simplified-lowercase.xz 8 | edu/emory/mathcs/nlp/lexica/en-brown-clusters-simplified-lowercase.xz 9 | edu/emory/mathcs/nlp/lexica/en-word-embeddings-undigitalized.xz 10 | 11 | 12 | 13 | edu/emory/mathcs/nlp/models/en-pos.xz 14 | 15 | 16 | -------------------------------------------------------------------------------- /cli/src/assembly/bin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | bin 4 | 5 | tar.gz 6 | 7 | dir 8 | 9 | true 10 | 11 | 12 | target/appassembler/bin 13 | 14 | 15 | ../api/src/main/resources/edu/emory/mathcs/nlp/configuration 16 | etc/config 17 | 18 | **/*.xml 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /api/src/main/resources/edu/emory/mathcs/nlp/component/tokenizer/dictionary/english-compounds.txt: -------------------------------------------------------------------------------- 1 | 't is 2 | 't was 3 | ai nt 4 | are nt 5 | ca nt 6 | can not 7 | could a 8 | could nt 9 | d' ye 10 | did nt 11 | do n cha 12 | do n' cha 13 | do nt 14 | does nt 15 | du n no 16 | fin na 17 | gim me 18 | gon na 19 | got ta 20 | had nt 21 | has nt 22 | have nt 23 | i 'm ma 24 | i 'mmm 25 | is nt 26 | it d 27 | it ll 28 | lem me 29 | let s 30 | might nt 31 | more 'n 32 | must nt 33 | sha nt 34 | should a 35 | should nt 36 | that d 37 | that ll 38 | that s 39 | they d 40 | they re 41 | they ve 42 | wan na 43 | was nt 44 | we ve 45 | were nt 46 | wha d ya 47 | what cha 48 | what re 49 | what s 50 | what ve 51 | what z 52 | who d 53 | who ll 54 | wo n cha 55 | wo nt 56 | would a 57 | would nt 58 | you d 59 | you ll 60 | you ve -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/activation/HyperbolicTanFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package edu.emory.mathcs.nlp.learning.activation; 5 | 6 | /** 7 | * @author amit-deshmane 8 | * 9 | */ 10 | public class HyperbolicTanFunction implements ActivationFunction { 11 | 12 | private static final long serialVersionUID = 6581919225914864529L; 13 | 14 | public HyperbolicTanFunction() { 15 | } 16 | 17 | /* (non-Javadoc) 18 | * @see edu.emory.mathcs.nlp.learning.activation.ActivationFunction#apply(float[]) 19 | */ 20 | @Override 21 | public void apply(float[] scores) { 22 | for(int index = 0; index < scores.length; index++){ 23 | scores[index] = (float)Math.tanh(scores[index]); 24 | } 25 | } 26 | @Override 27 | public String toString() 28 | { 29 | return "Tanh"; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/activation/SoftplusFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package edu.emory.mathcs.nlp.learning.activation; 5 | 6 | /** 7 | * @author amit-deshmane 8 | * 9 | */ 10 | public class SoftplusFunction implements ActivationFunction { 11 | 12 | private static final long serialVersionUID = -3123516253479799668L; 13 | 14 | public SoftplusFunction() { 15 | } 16 | 17 | /* (non-Javadoc) 18 | * @see edu.emory.mathcs.nlp.learning.activation.ActivationFunction#apply(float[]) 19 | */ 20 | @Override 21 | public void apply(float[] scores) { 22 | for(int index = 0; index < scores.length; index++){ 23 | scores[index] = (float)Math.log(1 + Math.exp(scores[index])); 24 | } 25 | 26 | } 27 | @Override 28 | public String toString() 29 | { 30 | return "Softplus"; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/activation/RectifiedLinearUnitFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package edu.emory.mathcs.nlp.learning.activation; 5 | 6 | /** 7 | * @author amit-deshmane 8 | * 9 | */ 10 | public class RectifiedLinearUnitFunction implements ActivationFunction { 11 | 12 | private static final long serialVersionUID = 2776457895707438981L; 13 | 14 | public RectifiedLinearUnitFunction() { 15 | } 16 | 17 | /* (non-Javadoc) 18 | * @see edu.emory.mathcs.nlp.learning.activation.ActivationFunction#apply(float[]) 19 | */ 20 | @Override 21 | public void apply(float[] scores) { 22 | for(int index = 0; index < scores.length; index++){ 23 | if(scores[index] < 0){ 24 | scores[index] = 0; 25 | } 26 | } 27 | 28 | } 29 | @Override 30 | public String toString() 31 | { 32 | return "Relu"; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /api/src/test/resources/propbank/sample.prop: -------------------------------------------------------------------------------- 1 | propbank/wsj_0003.parse 0 11 gold enter-v enter.01 ----- 10:1-ARG0 11:0-rel 12:1-ARG1 2 | propbank/wsj_0003.parse 0 21 gold cause-v cause.01 ----- 16:2-ARG0 21:0-rel 22:2-ARG1 3 | propbank/wsj_0003.parse 0 25 gold show-v show.02 ----- 22:1*23:1*24:1-ARG1 25:0,26:1-rel 27:2-ARGM-TMP 22:1*23:1-LINK-SLC 4 | propbank/wsj_0003.parse 0 31 gold say-v say.01 ----- 0:3*33:1-ARG1 30:1-ARG0 31:0-rel 5 | propbank/wsj_0003.parse 1 18 gold appear-v appear.02 ----- 0:2,19:2-ARG1 18:0-rel 6 | propbank/wsj_0003.parse 1 21 gold be-v be.01 ----- 0:2*19:1-ARG1 21:0-rel 22:2-ARG2 7 | propbank/wsj_0003.parse 1 28 gold study-v study.01 ----- 25:1*29:1-ARG1 28:0-rel 30:1-ARGM-LOC 25:1*29:1-LINK-PSV 8 | propbank/wsj_0003.parse 1 32 gold industrialize-v industrialize.01 ----- 32:0-rel 33:0-ARG1 9 | propbank/wsj_0003.parse 1 36 gold say-v say.01 ----- 0:3*38:1-ARG1 35:1-ARG0 36:0-rel 10 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/template/eval/Eval.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.template.eval; 17 | 18 | /** 19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 20 | */ 21 | public interface Eval 22 | { 23 | void clear(); 24 | double score(); 25 | } 26 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/template/util/NLPFlag.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.template.util; 17 | 18 | /** 19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 20 | */ 21 | public enum NLPFlag 22 | { 23 | // COLLECT, 24 | TRAIN, 25 | EVALUATE, 26 | DECODE; 27 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/template/feature/Direction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.template.feature; 17 | 18 | /** 19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 20 | */ 21 | public enum Direction 22 | { 23 | left, 24 | right, 25 | up, 26 | down, 27 | all; 28 | } 29 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/normalization/NormalizationEnum.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.normalization; 17 | 18 | /** 19 | * @author amit-deshmane 20 | * 21 | */ 22 | public enum NormalizationEnum { 23 | custom, // custom implemented by Jasper 24 | sigmoid, 25 | softmax, 26 | softmax_smooth; 27 | } 28 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/initialization/WeightGenerator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.initialization; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public interface WeightGenerator extends Serializable 24 | { 25 | float next(); 26 | } 27 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/constant/MetaConst.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.constant; 17 | 18 | /** 19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 20 | */ 21 | public interface MetaConst 22 | { 23 | String HYPERLINK = "#hlink#"; 24 | String EMOTICON = "#emo#"; 25 | String CARDINAL = "#crd#"; 26 | String ORDINAL = "#ord#"; 27 | } 28 | -------------------------------------------------------------------------------- /api/src/main/resources/edu/emory/mathcs/nlp/configuration/config-decode-en.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | edu/emory/mathcs/nlp/lexica/en-ambiguity-classes-simplified-lowercase.xz 8 | edu/emory/mathcs/nlp/lexica/en-brown-clusters-simplified-lowercase.xz 9 | edu/emory/mathcs/nlp/lexica/en-word-embeddings-undigitalized.xz 10 | edu/emory/mathcs/nlp/lexica/en-named-entity-gazetteers-simplified.xz 11 | 12 | 13 | 14 | edu/emory/mathcs/nlp/models/en-pos.xz 15 | edu/emory/mathcs/nlp/models/en-ner.xz 16 | edu/emory/mathcs/nlp/models/en-dep.xz 17 | 18 | 19 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/treebank/CTTag.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2016, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.treebank; 17 | 18 | /** 19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 20 | */ 21 | public interface CTTag 22 | { 23 | /** The special tag for the artificial top node. */ 24 | String TOP = "TOP"; 25 | /** The special tag for empty categories. */ 26 | String NONE = "-NONE-"; 27 | } 28 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/util/Language.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.util; 17 | 18 | 19 | /** 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public enum Language 23 | { 24 | ARABIC, 25 | CHINESE, 26 | ENGLISH, 27 | HINDI, 28 | KOREAN; 29 | 30 | static public Language getType(String s) 31 | { 32 | return valueOf(StringUtils.toUpperCase(s)); 33 | } 34 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/activation/ActivationFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.activation; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public interface ActivationFunction extends Serializable 24 | { 25 | /** Transforms all values in the array according to this activation function. */ 26 | public void apply(float[] scores); 27 | } 28 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/normalization/NormalizationFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.normalization; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @author amit-deshmane ({@code amitad87@gmail.com}) 22 | */ 23 | public interface NormalizationFunction extends Serializable 24 | { 25 | /** Transforms all values in the array according to this activation function. */ 26 | public void apply(float[] scores); 27 | } 28 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/verbnet/VNMap.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.verbnet; 17 | 18 | import java.io.Serializable; 19 | import java.util.HashMap; 20 | 21 | /** 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class VNMap extends HashMap implements Serializable 25 | { 26 | private static final long serialVersionUID = -7409938151707095231L; 27 | 28 | public void put(VNClass vn) 29 | { 30 | put(vn.getID(), vn); 31 | } 32 | } -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/common/verbnet/VNTagTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.verbnet; 17 | 18 | import static org.junit.Assert.assertFalse; 19 | import static org.junit.Assert.assertTrue; 20 | 21 | import org.junit.Test; 22 | 23 | /** 24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 25 | */ 26 | public class VNTagTest 27 | { 28 | @Test 29 | public void test() 30 | { 31 | assertTrue(VNTag.contains(VNTag.VN_AGENT)); 32 | assertFalse(VNTag.contains("Hello")); 33 | } 34 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/template/NLPComponent.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.template; 17 | 18 | import edu.emory.mathcs.nlp.component.template.node.AbstractNLPNode; 19 | 20 | import java.util.List; 21 | 22 | /** 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | public interface NLPComponent> 26 | { 27 | /** Processes a sentence. */ 28 | void process(N[] nodes); 29 | 30 | /** Processes a document. */ 31 | void process(List document); 32 | } 33 | -------------------------------------------------------------------------------- /api/src/test/resources/propbank/wsj_0001.parse: -------------------------------------------------------------------------------- 1 | (TOP (S (NP-SBJ (NP (NNP Pierre) 2 | (NNP Vinken)) 3 | (, ,) 4 | (ADJP (NML (CD 61) 5 | (NNS years)) 6 | (JJ old)) 7 | (, ,)) 8 | (VP (MD will) 9 | (VP (VB join) 10 | (NP (DT the) 11 | (NN board)) 12 | (PP-CLR (IN as) 13 | (NP (DT a) 14 | (JJ nonexecutive) 15 | (NN director))) 16 | (NP-TMP (NNP Nov.) 17 | (CD 29)))) 18 | (. .))) 19 | 20 | (TOP (S (NP-SBJ (NNP Mr.) 21 | (NNP Vinken)) 22 | (VP (VBZ is) 23 | (NP-PRD (NP (NN chairman)) 24 | (PP (IN of) 25 | (NP (NP (NNP Elsevier) 26 | (NNP N.V.)) 27 | (, ,) 28 | (NP (DT the) 29 | (NNP Dutch) 30 | (VBG publishing) 31 | (NN group)))))) 32 | (. .))) 33 | 34 | -------------------------------------------------------------------------------- /api/src/test/resources/dat/nlp4j.txt.nlp: -------------------------------------------------------------------------------- 1 | 1 The the DT _ 3 det _ O 2 | 2 NLP4J nlp0j NNP pos2=NN 3 compound _ U-ORG 3 | 3 project project NN _ 4 nsubj _ O 4 | 4 provides provide VBZ _ 0 root _ O 5 | 5 a a DT _ 7 det _ O 6 | 6 NLP nlp NN pos2=NNP 7 compound _ O 7 | 7 toolkit toolkit NN _ 4 dobj _ O 8 | 8 for for IN _ 7 prep _ O 9 | 9 JVM jvm NN pos2=NNP 10 compound _ U-ORG 10 | 10 languages language NNS _ 8 pobj _ O 11 | 11 . . . _ 4 punct _ O 12 | 13 | 1 This this DT _ 2 det _ O 14 | 2 project project NN _ 3 nsubj _ O 15 | 3 is be VBZ _ 0 root _ O 16 | 4 under under IN _ 3 prep _ O 17 | 5 the the DT _ 8 det _ O 18 | 6 Apache apache NNP pos2=NN 8 nmod _ O 19 | 7 2 0 CD pos2=NNP 6 nmod _ O 20 | 8 license license NN pos2=NNS 4 pobj _ O 21 | 9 and and CC _ 3 cc _ O 22 | 10 is be VBZ _ 12 auxpass _ O 23 | 11 currently currently RB _ 12 advmod _ O 24 | 12 developed develop VBN _ 3 conj _ O 25 | 13 by by IN _ 12 agent _ O 26 | 14 the the DT _ 17 det _ B-ORG 27 | 15 NLP nlp NNP _ 17 compound _ I-ORG 28 | 16 Research research NNP _ 17 compound _ I-ORG 29 | 17 Group group NNP _ 13 pobj _ L-ORG 30 | 18 at at IN _ 17 prep _ O 31 | 19 Emory emory NNP _ 20 compound _ B-ORG 32 | 20 University university NNP _ 18 pobj _ L-ORG 33 | 21 . . . _ 3 punct _ O 34 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/template/util/NLPMode.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.template.util; 17 | 18 | /** 19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 20 | */ 21 | public enum NLPMode 22 | { 23 | custom, // custom NLP 24 | pos, // part-of-speech tagging 25 | ner, // named entity recognition 26 | dep, // dependency parsing 27 | srl, // semantic role labeling 28 | doc, // document classification 29 | it, // it classification 30 | sentiment, // sentiment analysis 31 | sentiment_ensemble; // sentiment analysis: ensemble 32 | } 33 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/util/ObjectSizeFetcher.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.util; 17 | 18 | import java.lang.instrument.Instrumentation; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class ObjectSizeFetcher 24 | { 25 | private static Instrumentation instrumentation; 26 | 27 | public static void premain(String args, Instrumentation inst) 28 | { 29 | instrumentation = inst; 30 | } 31 | 32 | public static long getObjectSize(Object o) 33 | { 34 | return instrumentation.getObjectSize(o); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /cli/src/main/java/edu/emory/mathcs/nlp/bin/Version.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.bin; 17 | 18 | /** 19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 20 | */ 21 | public class Version 22 | { 23 | public static void main(String[] args) 24 | { 25 | System.out.println("========================================"); 26 | System.out.println("NLP4J Version 1.1.3"); 27 | System.out.println("Contact: choi@mathcs.emory.edu"); 28 | System.out.println("Webpage: http://emorynlp.github.io/nlp4j"); 29 | System.out.println("========================================"); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/propbank/frameset/PBFType.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.propbank.frameset; 17 | 18 | 19 | /** 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public enum PBFType 23 | { 24 | VERB("v"), 25 | NOUN("n"), 26 | ADJECTIVE("j"); 27 | 28 | private final String value; 29 | 30 | PBFType(String value) 31 | { 32 | this.value = value; 33 | } 34 | 35 | public boolean isValue(String value) 36 | { 37 | return this.value.equals(value); 38 | } 39 | 40 | public String getValue() 41 | { 42 | return value; 43 | } 44 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/CharIntPair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.collection.tuple; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class CharIntPair implements Serializable 24 | { 25 | private static final long serialVersionUID = -2439322004395455224L; 26 | 27 | public char c; 28 | public int i; 29 | 30 | public CharIntPair(char c, int i) 31 | { 32 | set(c, i); 33 | } 34 | 35 | public void set(char c, int i) 36 | { 37 | this.c = c; 38 | this.i = i; 39 | } 40 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/IntIntPair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.collection.tuple; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class IntIntPair implements Serializable 24 | { 25 | private static final long serialVersionUID = 1674260806426517804L; 26 | 27 | public int i1; 28 | public int i2; 29 | 30 | public IntIntPair(int i1, int i2) 31 | { 32 | set(i1, i2); 33 | } 34 | 35 | public void set(int i1, int i2) 36 | { 37 | this.i1 = i1; 38 | this.i2 = i2; 39 | } 40 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/BooleanIntPair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.collection.tuple; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class BooleanIntPair implements Serializable 24 | { 25 | private static final long serialVersionUID = -3606845926289267380L; 26 | public boolean b; 27 | public int i; 28 | 29 | public BooleanIntPair(boolean b, int i) 30 | { 31 | set(b, i); 32 | } 33 | 34 | public void set(boolean b, int i) 35 | { 36 | this.b = b; 37 | this.i = i; 38 | } 39 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/DoubleIntPair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.collection.tuple; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class DoubleIntPair implements Serializable 24 | { 25 | private static final long serialVersionUID = -2439322004395455224L; 26 | 27 | public double d; 28 | public int i; 29 | 30 | public DoubleIntPair(double d, int i) 31 | { 32 | set(d, i); 33 | } 34 | 35 | public void set(double d, int i) 36 | { 37 | this.d = d; 38 | this.i = i; 39 | } 40 | } -------------------------------------------------------------------------------- /api/src/main/resources/edu/emory/mathcs/nlp/conversion/headrule_en_stanford.txt: -------------------------------------------------------------------------------- 1 | ADJP r JJ.*|VB.*|NN.*;ADJP;IN;RB|ADVP;CD|QP;FW|NP;.* 2 | ADVP r VB.*;RP;RB.*|JJ.*;ADJP;ADVP;QP;IN;NN;CD;NP;.* 3 | CAPTION l NNP.*;NN.*;NP;CD;.* 4 | CIT l NNP.*;NN.*;NP;CD;.* 5 | CONJP l CC;VB.*;NN.*;TO|IN;.* 6 | EDITED r VP;VB.*;NN.*|PRP|NP;IN|PP;S.*;.* 7 | EMBED r S.*;FRAG|NP;.* 8 | FRAG r VP;VB.*;-PRD;S|SQ|SINV|SBARQ;NN.*|NP;PP;SBAR;JJ.*|ADJP;RB|ADVP;INTJ;.* 9 | INTJ l VB.*;NN.*;UH;INTJ;.* 10 | LST l LS|CD;NN;.* 11 | META l NP;VP|S;.* 12 | NAC r NN.*;NP;S|SINV;.* 13 | NML r NN.*|NML;CD|NP|QP|JJ.*|VB.*;.* 14 | NP r NN.*|NML;NX;PRP;FW;CD;NP;-NOM;QP|JJ.*|VB.*;ADJP;S;SBAR;.* 15 | NX r NN.*;NX;NP;.* 16 | PP l RP;TO;IN;VB.*;PP;NN.*;JJ;RB;.* 17 | PRN r VP;NP;S|SBARQ|SINV|SQ;SBAR;.* 18 | PRT l RP;PRT;.* 19 | QP r CD;NN.*;JJ;DT|PDT;RB;NP|QP;.* 20 | RRC l VP;VB.*;-PRD;NP|NN.*;ADJP;PP;.* 21 | S r VP;VB.*;-PRD;S|SQ|SINV|SBARQ;SBAR;NP;PP;.* 22 | SBAR r VP;S|SQ|SINV;SBAR.*;FRAG|NP;.* 23 | SBARQ r VP;SQ|SBARQ;S|SINV;FRAG|NP;.* 24 | SINV r VP;VB.*;MD;S|SINV;NP;.* 25 | SQ r VP;VB.*;SQ;S;MD;NP;.* 26 | UCP r .* 27 | VP l VP;VB.*;MD|TO;JJ.*|NN.*|IN;-PRD;NP;ADJP|QP;S;.* 28 | WHADJP r JJ.*|VBN;WHADJP|ADJP;.* 29 | WHADVP r RB.*|WRB;WHADVP;.* 30 | WHNP r NN.*;WP|WHNP;NP|NML|CD;JJ.*|VBG;WHADJP|ADJP;DT;.* 31 | WHPP l IN|TO;.* 32 | X r .* -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/CharCharPair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.collection.tuple; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class CharCharPair implements Serializable 24 | { 25 | private static final long serialVersionUID = -2439322004395455224L; 26 | 27 | public char c1; 28 | public char c2; 29 | 30 | public CharCharPair(char c1, char c2) 31 | { 32 | set(c1, c2); 33 | } 34 | 35 | public void set(char c1, char c2) 36 | { 37 | this.c1 = c1; 38 | this.c2 = c2; 39 | } 40 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/ObjectBooleanPair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.collection.tuple; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class ObjectBooleanPair implements Serializable 24 | { 25 | private static final long serialVersionUID = -3471022143310924799L; 26 | public T o; 27 | public boolean b; 28 | 29 | public ObjectBooleanPair(T o, boolean b) 30 | { 31 | set(o, b); 32 | } 33 | 34 | public void set(T o, boolean b) 35 | { 36 | this.o = o; 37 | this.b = b; 38 | } 39 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/morph/MorphAnalyzer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.morph; 17 | 18 | import edu.emory.mathcs.nlp.common.util.StringUtils; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public abstract class MorphAnalyzer 24 | { 25 | /** 26 | * @param simplifiedWordForm simplified word form generated by {@link StringUtils#toSimplifiedForm(String)}. 27 | * @param pos Penn Treebank style part-of-speech tag. 28 | * @return the lemmas of the word form given the pos tag. 29 | */ 30 | public abstract String lemmatize(String simplifiedWordForm, String pos); 31 | } 32 | -------------------------------------------------------------------------------- /api/src/main/resources/edu/emory/mathcs/nlp/conversion/headrule_en_conll.txt: -------------------------------------------------------------------------------- 1 | ADJP r JJ.*|VB.*|NN.*;ADJP;IN;RB|ADVP;CD|QP;FW|NP;.* 2 | ADVP r VB.*;RP;RB.*|JJ.*;ADJP;ADVP;QP;IN;NN;CD;NP;.* 3 | CAPTION l NNP.*;NN.*;NP;CD;.* 4 | CIT l NNP.*;NN.*;NP;CD;.* 5 | CONJP l CC;VB.*;NN.*;TO|IN;.* 6 | EDITED r VB.*;VP;NN.*|PRP|NP;IN|PP;S.*;.* 7 | EMBED r S.*;FRAG|NP;.* 8 | FRAG r VB.*;VP;-PRD;S|SQ|SINV|SBARQ;NN.*|NP;PP;SBAR;JJ.*|ADJP;RB|ADVP;INTJ;.* 9 | INTJ l VB.*;NN.*;UH;INTJ;.* 10 | LST l LS|CD;NN;.* 11 | META l NP;VP|S;.* 12 | NAC r NN.*;NP;S|SINV;.* 13 | NML r NN.*|NML;CD|NP|QP|JJ.*|VB.*;.* 14 | NP r NN.*|NML;NX;PRP;FW;CD;NP;-NOM;QP|JJ.*|VB.*;ADJP;S;SBAR;.* 15 | NX r NN.*;NX;NP;.* 16 | PP l RP;TO;IN;VB.*;PP;NN.*;JJ;RB;.* 17 | PRN r VP;NP;S|SBARQ|SINV|SQ;SBAR;.* 18 | PRT l RP;PRT;.* 19 | QP r CD;NN.*;JJ;DT|PDT;RB;NP|QP;.* 20 | RRC l VB.*;VP;-PRD;NP|NN.*;ADJP;PP;.* 21 | S r MD|TO;VB.*;VP;-PRD;S|SQ|SINV|SBARQ;SBAR;NP;PP;.* 22 | SBAR r IN|TO|DT;MD;VB.*;VP;S|SQ|SINV;SBAR.*;FRAG|NP;.* 23 | SBARQ r MD;VB.*;VP;SQ|SBARQ;S|SINV;FRAG|NP;.* 24 | SINV r MD;VB.*;VP;S|SINV;NP;.* 25 | SQ r MD;VB.*;VP;SQ;S;NP;.* 26 | UCP r .* 27 | VP l MD|TO;VB.*;VP;JJ.*|NN.*|IN;-PRD;NP;ADJP|QP;S;.* 28 | WHADJP r JJ.*|VBN;WHADJP|ADJP;.* 29 | WHADVP r RB.*|WRB;WHADVP;.* 30 | WHNP r NN.*;WP|WHNP;NP|NML|CD;JJ.*|VBG;WHADJP|ADJP;DT;.* 31 | WHPP l IN|TO;.* 32 | X r .* -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/Triple.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.collection.tuple; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class Triple implements Serializable 24 | { 25 | private static final long serialVersionUID = 2261656496863083672L; 26 | public T1 o1; 27 | public T2 o2; 28 | public T3 o3; 29 | 30 | public Triple(T1 o1, T2 o2, T3 o3) 31 | { 32 | set(o1, o2, o3); 33 | } 34 | 35 | public void set(T1 o1, T2 o2, T3 o3) 36 | { 37 | this.o1 = o1; 38 | this.o2 = o2; 39 | this.o3 = o3; 40 | } 41 | } -------------------------------------------------------------------------------- /api/src/test/resources/propbank/wsj_0002.parse: -------------------------------------------------------------------------------- 1 | (TOP (S (NP-SBJ-1 (NP (NNP Rudolph) 2 | (NNP Agnew)) 3 | (, ,) 4 | (UCP (ADJP (NML (CD 55) 5 | (NNS years)) 6 | (JJ old)) 7 | (CC and) 8 | (NP (NP (JJ former) 9 | (NN chairman)) 10 | (PP (IN of) 11 | (NP (NNP Consolidated) 12 | (NNP Gold) 13 | (NNP Fields) 14 | (NNP PLC))))) 15 | (, ,)) 16 | (VP (VBD was) 17 | (VP (VBN named) 18 | (NP-2 (-NONE- *-1)) 19 | (S-CLR (NP-SBJ (-NONE- *PRO*-2)) 20 | (NP-PRD (NP (DT a) 21 | (JJ nonexecutive) 22 | (NN director)) 23 | (PP (IN of) 24 | (NP (DT this) 25 | (JJ British) 26 | (JJ industrial) 27 | (NN conglomerate))))))) 28 | (. .))) 29 | 30 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/Pair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.collection.tuple; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class Pair implements Serializable 24 | { 25 | private static final long serialVersionUID = 8447270640444415417L; 26 | 27 | public T1 o1; 28 | public T2 o2; 29 | 30 | public Pair() 31 | { 32 | set(null, null); 33 | } 34 | 35 | public Pair(T1 o1, T2 o2) 36 | { 37 | set(o1, o2); 38 | } 39 | 40 | public void set(T1 o1, T2 o2) 41 | { 42 | this.o1 = o1; 43 | this.o2 = o2; 44 | } 45 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/propbank/frameset/PBFXml.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.propbank.frameset; 17 | 18 | /** 19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 20 | */ 21 | public interface PBFXml 22 | { 23 | String E_FRAMESET = "frameset"; 24 | String E_PREDICATE = "predicate"; 25 | String E_ROLESET = "roleset"; 26 | String E_ROLE = "role"; 27 | String E_VNROLE = "vnrole"; 28 | 29 | String A_LEMMA = "lemma"; 30 | String A_ID = "id"; 31 | String A_DESCR = "descr"; 32 | String A_NAME = "name"; 33 | String A_N = "n"; 34 | String A_F = "f"; 35 | String A_VNCLS = "vncls"; 36 | String A_VNTHETA = "vntheta"; 37 | } -------------------------------------------------------------------------------- /api/src/main/resources/edu/emory/mathcs/nlp/component/morph/english/adjective.exc: -------------------------------------------------------------------------------- 1 | acer acer 2 | after after 3 | all-arounder all-arounder 4 | archer archer 5 | bayer bayer 6 | best good 7 | bestest best 8 | better good 9 | bluewater bluewater 10 | britisher britisher 11 | cagier cagey 12 | cagiest cagey 13 | camper camper 14 | cer cer 15 | cuter cute 16 | cutest cute 17 | damndest damned 18 | dicier dicey 19 | diciest dicey 20 | dopier dopey 21 | dopiest dopey 22 | eastsider eastsider 23 | eastwest eastwest 24 | eerier eerie 25 | eeriest eerie 26 | faker faker 27 | farther far 28 | farthest far 29 | feller feller 30 | first-rater first-rater 31 | first-stringer first-stringer 32 | flatwater flatwater 33 | fore-and-after fore-and-after 34 | further far 35 | furthest far 36 | gooier gooey 37 | gooiest gooey 38 | guest guest 39 | halfways halfway 40 | halter halter 41 | homier homey 42 | homiest homey 43 | later late 44 | latest late 45 | leer leer 46 | ler ler 47 | leveler leveler 48 | liver liver 49 | loather loather 50 | meeter meeter 51 | milcher milcher 52 | modest modest 53 | number number 54 | planer planer 55 | player player 56 | prompter prompter 57 | ranker ranker 58 | second-rater second-rater 59 | serer serer 60 | souther souther 61 | starest starest 62 | stiper striper 63 | third-rater third-rater 64 | welsher welsher 65 | worse bad 66 | worst bad -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/template/feature/Source.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.template.feature; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public enum Source implements Serializable 24 | { 25 | /** 26 | * For dependency parsing: top of the stack. 27 | * For semantic role labeling: predicate. 28 | * For everything else: input. 29 | */ 30 | i, 31 | /** 32 | * For dependency parsing: front of the input buffer. 33 | * For semantic role labeling: argument. 34 | */ 35 | j, 36 | /** 37 | * For dependency parsing: peek of the stack. 38 | */ 39 | k; 40 | } 41 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/DoubleIntIntTriple.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.collection.tuple; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class DoubleIntIntTriple implements Serializable 24 | { 25 | private static final long serialVersionUID = -5353827334306132865L; 26 | 27 | public double d; 28 | public int i1; 29 | public int i2; 30 | 31 | public DoubleIntIntTriple(double d, int i1, int i2) 32 | { 33 | set(d, i1, i2); 34 | } 35 | 36 | public void set(double d, int i1, int i2) 37 | { 38 | this.d = d; 39 | this.i1 = i1; 40 | this.i2 = i2; 41 | } 42 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/random/XORShiftRandom.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.random; 17 | 18 | import java.util.Random; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class XORShiftRandom extends Random 24 | { 25 | private static final long serialVersionUID = -6971555410750547741L; 26 | private long seed; 27 | 28 | public XORShiftRandom(long seed) 29 | { 30 | this.seed = seed; 31 | } 32 | 33 | @Override 34 | protected int next(int nbits) 35 | { 36 | long x = seed; 37 | x ^= (x << 21); 38 | x ^= (x >>> 35); 39 | x ^= (x << 4); 40 | seed = x; 41 | x &= ((1L << nbits) - 1); 42 | return (int)x; 43 | } 44 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/decode/NLPDecoder.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.decode; 17 | 18 | import edu.emory.mathcs.nlp.component.template.node.NLPNode; 19 | 20 | import java.io.InputStream; 21 | 22 | /** 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | public class NLPDecoder extends AbstractNLPDecoder 26 | { 27 | public NLPDecoder() {super();} 28 | 29 | public NLPDecoder(DecodeConfig config) 30 | { 31 | super(config); 32 | } 33 | 34 | public NLPDecoder(InputStream configuration) 35 | { 36 | super(new DecodeConfig(configuration)); 37 | } 38 | 39 | @Override 40 | public NLPNode create() 41 | { 42 | return new NLPNode(); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/component/tokenizer/dictionary/AbbreviationTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.tokenizer.dictionary; 17 | 18 | import static org.junit.Assert.assertFalse; 19 | import static org.junit.Assert.assertTrue; 20 | 21 | import org.junit.Test; 22 | 23 | /** 24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 25 | */ 26 | public class AbbreviationTest 27 | { 28 | @Test 29 | public void test() 30 | { 31 | Abbreviation dt = new Abbreviation(); 32 | 33 | assertTrue(dt.isAbbreviationEndingWithPeriod("mr")); 34 | assertTrue(dt.isAbbreviationEndingWithPeriod("mrs")); 35 | 36 | assertFalse(dt.isAbbreviationEndingWithPeriod("e.g")); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/util/MathUtilsTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.util; 17 | 18 | import edu.emory.mathcs.nlp.common.util.MathUtils; 19 | import org.junit.Test; 20 | 21 | import static org.junit.Assert.assertEquals; 22 | 23 | /** 24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 25 | */ 26 | public class MathUtilsTest 27 | { 28 | @Test 29 | public void testPow() 30 | { 31 | int i, j; 32 | 33 | for (j=-5; j<5; j++) 34 | { 35 | if (j == 0) continue; 36 | 37 | for (i=-5; i<5; i++) 38 | { 39 | assertEquals(Math.pow( 2, i), MathUtils.pow( 2, i), 0); 40 | assertEquals(Math.pow(-2, i), MathUtils.pow(-2, i), 0); 41 | } 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/BooleanIntIntTriple.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.collection.tuple; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class BooleanIntIntTriple implements Serializable 24 | { 25 | private static final long serialVersionUID = -5353827334306132865L; 26 | 27 | public boolean b; 28 | public int i1; 29 | public int i2; 30 | 31 | public BooleanIntIntTriple(boolean b, int i1, int i2) 32 | { 33 | set(b, i1, i2); 34 | } 35 | 36 | public void set(boolean b, int i1, int i2) 37 | { 38 | this.b = b; 39 | this.i1 = i1; 40 | this.i2 = i2; 41 | } 42 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/template/node/Orthographic.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.template.node; 17 | 18 | /** 19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 20 | */ 21 | public interface Orthographic 22 | { 23 | String HYPERLINK = "0"; 24 | String ALL_UPPER = "1"; 25 | String ALL_LOWER = "2"; 26 | String ALL_DIGIT = "3"; 27 | String ALL_PUNCT = "4"; 28 | String ALL_DIGIT_OR_PUNCT = "5"; 29 | String HAS_DIGIT = "6"; 30 | String HAS_PERIOD = "7"; 31 | String HAS_HYPHEN = "8"; 32 | String HAS_OTHER_PUNCT = "9"; 33 | String NO_LOWER = "10"; 34 | String FST_UPPER = "11"; 35 | String UPPER_1 = "12"; 36 | String UPPER_2 = "13"; 37 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/zzz/WordVector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.zzz; 17 | 18 | /** 19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 20 | */ 21 | public class WordVector 22 | { 23 | private float[] vector; 24 | private String word; 25 | 26 | public WordVector(String word, float[] vector) 27 | { 28 | setWord(word); 29 | setVector(vector); 30 | } 31 | 32 | public String getWord() 33 | { 34 | return word; 35 | } 36 | 37 | public void setWord(String word) 38 | { 39 | this.word = word; 40 | } 41 | 42 | public float[] getVector() 43 | { 44 | return vector; 45 | } 46 | 47 | public void setVector(float[] vector) 48 | { 49 | this.vector = vector; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/common/util/MathUtilsTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.util; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import org.junit.Test; 21 | 22 | import edu.emory.mathcs.nlp.common.util.MathUtils; 23 | 24 | /** 25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 26 | */ 27 | public class MathUtilsTest 28 | { 29 | @Test 30 | public void testPow() 31 | { 32 | int i, j; 33 | 34 | for (j=-5; j<5; j++) 35 | { 36 | if (j == 0) continue; 37 | 38 | for (i=-5; i<5; i++) 39 | { 40 | assertEquals(Math.pow( 2, i), MathUtils.pow( 2, i), 0); 41 | assertEquals(Math.pow(-2, i), MathUtils.pow(-2, i), 0); 42 | } 43 | } 44 | } 45 | } -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/util/CharUtilsTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.util; 17 | 18 | import edu.emory.mathcs.nlp.common.util.StringUtils; 19 | import org.junit.Test; 20 | 21 | import static org.junit.Assert.assertFalse; 22 | import static org.junit.Assert.assertTrue; 23 | 24 | /** 25 | * @since 3.0.0 26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 27 | */ 28 | public class CharUtilsTest 29 | { 30 | @Test 31 | public void testContainsOnlyDigits() 32 | { 33 | assertTrue (StringUtils.containsDigitOnly("12")); 34 | assertFalse(StringUtils.containsDigitOnly("a1")); 35 | assertFalse(StringUtils.containsDigitOnly("1b")); 36 | assertFalse(StringUtils.containsDigitOnly("1-2")); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/util/FileUtilsTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.util; 17 | 18 | import edu.emory.mathcs.nlp.common.util.FileUtils; 19 | import org.junit.Test; 20 | 21 | import static org.junit.Assert.assertEquals; 22 | 23 | /** 24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 25 | */ 26 | public class FileUtilsTest 27 | { 28 | @Test 29 | public void replaceExtensionTest() 30 | { 31 | assertEquals("a.jpg", FileUtils.replaceExtension("a.txt", "jpg")); 32 | assertEquals(null , FileUtils.replaceExtension("a", "jpg")); 33 | 34 | assertEquals("a.jpg", FileUtils.replaceExtension("a.txt", "txt", "jpg")); 35 | assertEquals(null , FileUtils.replaceExtension("a.txt", "bmp", "jpg")); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/common/util/CharUtilsTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.util; 17 | 18 | import static org.junit.Assert.assertFalse; 19 | import static org.junit.Assert.assertTrue; 20 | 21 | import org.junit.Test; 22 | 23 | import edu.emory.mathcs.nlp.common.util.StringUtils; 24 | 25 | /** 26 | * @since 3.0.0 27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 28 | */ 29 | public class CharUtilsTest 30 | { 31 | @Test 32 | public void testContainsOnlyDigits() 33 | { 34 | assertTrue (StringUtils.containsDigitOnly("12")); 35 | assertFalse(StringUtils.containsDigitOnly("a1")); 36 | assertFalse(StringUtils.containsDigitOnly("1b")); 37 | assertFalse(StringUtils.containsDigitOnly("1-2")); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/ObjectCharPair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.collection.tuple; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class ObjectCharPair implements Serializable, Comparable> 24 | { 25 | private static final long serialVersionUID = -5228607179375724504L; 26 | 27 | public T o; 28 | public char c; 29 | 30 | public ObjectCharPair(T o, char c) 31 | { 32 | set(o, c); 33 | } 34 | 35 | public void set(T o, char c) 36 | { 37 | this.o = o; 38 | this.c = c; 39 | } 40 | 41 | @Override 42 | public int compareTo(ObjectCharPair p) 43 | { 44 | return c - p.c; 45 | } 46 | } -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/common/util/FileUtilsTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.util; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import org.junit.Test; 21 | 22 | import edu.emory.mathcs.nlp.common.util.FileUtils; 23 | 24 | /** 25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 26 | */ 27 | public class FileUtilsTest 28 | { 29 | @Test 30 | public void replaceExtensionTest() 31 | { 32 | assertEquals("a.jpg", FileUtils.replaceExtension("a.txt", "jpg")); 33 | assertEquals(null , FileUtils.replaceExtension("a", "jpg")); 34 | 35 | assertEquals("a.jpg", FileUtils.replaceExtension("a.txt", "txt", "jpg")); 36 | assertEquals(null , FileUtils.replaceExtension("a.txt", "bmp", "jpg")); 37 | } 38 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/collection/tree/PrefixNode.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.collection.tree; 17 | 18 | import java.util.HashMap; 19 | 20 | /** 21 | * @since 3.0.3 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class PrefixNode,V> extends HashMap> 25 | { 26 | private static final long serialVersionUID = 1566684742873455351L; 27 | private V value; 28 | 29 | public PrefixNode() 30 | { 31 | value = null; 32 | } 33 | 34 | public V getValue() 35 | { 36 | return value; 37 | } 38 | 39 | public void setValue(V value) 40 | { 41 | this.value = value; 42 | } 43 | 44 | public boolean hasValue() 45 | { 46 | return value != null; 47 | } 48 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/ObjectIntIntTriple.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.collection.tuple; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class ObjectIntIntTriple implements Serializable 24 | { 25 | private static final long serialVersionUID = -7014586350906455183L; 26 | 27 | public T o; 28 | public int i1; 29 | public int i2; 30 | 31 | public ObjectIntIntTriple() 32 | { 33 | set(null, 0, 0); 34 | } 35 | 36 | public ObjectIntIntTriple(T o, int i1, int i2) 37 | { 38 | set(o, i1, i2); 39 | } 40 | 41 | public void set(T o, int i1, int i2) 42 | { 43 | this.o = o; 44 | this.i1 = i1; 45 | this.i2 = i2; 46 | } 47 | } -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/util/CharTokenizerTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.util; 17 | 18 | import edu.emory.mathcs.nlp.common.util.CharTokenizer; 19 | import org.junit.Test; 20 | 21 | import java.util.Arrays; 22 | 23 | import static org.junit.Assert.assertEquals; 24 | 25 | /** 26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 27 | */ 28 | public class CharTokenizerTest 29 | { 30 | @Test 31 | public void test() 32 | { 33 | CharTokenizer t; 34 | String s; 35 | 36 | t = new CharTokenizer(','); 37 | s = "a,b,c"; 38 | assertEquals("[a, b, c]", Arrays.toString(t.tokenize(s))); 39 | 40 | t = new CharTokenizer(';'); 41 | s = ";abc;def;;ghi;"; 42 | assertEquals("[abc, def, ghi]", Arrays.toString(t.tokenize(s))); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/normalization/SoftmaxSmoothedFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.normalization; 17 | 18 | import edu.emory.mathcs.nlp.learning.activation.SoftmaxFunction; 19 | 20 | /** 21 | * @author amit-deshmane 22 | * 23 | */ 24 | public class SoftmaxSmoothedFunction implements NormalizationFunction { 25 | 26 | private static final long serialVersionUID = -675360500573510747L; 27 | private SoftmaxFunction f; 28 | 29 | public SoftmaxSmoothedFunction() { 30 | f = new SoftmaxFunction(); 31 | } 32 | 33 | /* (non-Javadoc) 34 | * @see edu.emory.mathcs.nlp.learning.normalization.NormalizationFunction#apply(float[]) 35 | */ 36 | @Override 37 | public void apply(float[] scores) { 38 | f.apply(scores); 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /cli/src/main/java/edu/emory/mathcs/nlp/bin/util/BinUtils.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.bin.util; 17 | 18 | import org.kohsuke.args4j.CmdLineException; 19 | import org.kohsuke.args4j.CmdLineParser; 20 | 21 | 22 | 23 | /** 24 | * @since 3.0.0 25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 26 | */ 27 | public class BinUtils 28 | { 29 | private BinUtils() {} 30 | 31 | /** Initializes arguments using args4j. */ 32 | static public void initArgs(String[] args, Object bean) 33 | { 34 | CmdLineParser cmd = new CmdLineParser(bean); 35 | 36 | try 37 | { 38 | cmd.parseArgument(args); 39 | } 40 | catch (CmdLineException e) 41 | { 42 | System.err.println(e.getMessage()); 43 | cmd.printUsage(System.err); 44 | System.exit(1); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/common/util/CharTokenizerTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.util; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import java.util.Arrays; 21 | 22 | import org.junit.Test; 23 | 24 | import edu.emory.mathcs.nlp.common.util.CharTokenizer; 25 | 26 | /** 27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 28 | */ 29 | public class CharTokenizerTest 30 | { 31 | @Test 32 | public void test() 33 | { 34 | CharTokenizer t; 35 | String s; 36 | 37 | t = new CharTokenizer(','); 38 | s = "a,b,c"; 39 | assertEquals("[a, b, c]", Arrays.toString(t.tokenize(s))); 40 | 41 | t = new CharTokenizer(';'); 42 | s = ";abc;def;;ghi;"; 43 | assertEquals("[abc, def, ghi]", Arrays.toString(t.tokenize(s))); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /api/src/main/resources/edu/emory/mathcs/nlp/configuration/config-train-sample.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 0.00001 13 | adagrad-mini-batch 14 | 0.02 15 | 0 16 | 17 | 1 18 | 5 19 | 0 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /api/src/main/resources/edu/emory/mathcs/nlp/configuration/config-train-sample-optimized.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 0.00001 13 | adagrad-mini-batch 14 | 0.02 15 | 0 16 | 17 | 1 18 | 3 19 | 0 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /api/src/main/resources/edu/emory/mathcs/nlp/configuration/config-train-doc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | /Users/jdchoi/Documents/EmoryNLP/nlp4j-english/src/main/resources/edu/emory/mathcs/nlp/lexica/en-word-embeddings-undigitalized.xz 14 | 15 | 16 | r3 17 | 18 | 19 | adagrad 20 | 0.01 21 | 0.001 22 | 0 23 | 20 24 | 0 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/util/FastUtils.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.util; 17 | 18 | import it.unimi.dsi.fastutil.floats.FloatArrayList; 19 | import it.unimi.dsi.fastutil.objects.Object2IntMap; 20 | 21 | /** 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class FastUtils 25 | { 26 | static public int increment(Object2IntMap map, K key) 27 | { 28 | return map.merge(key, 1, (oldCount, newCount) -> oldCount + newCount); 29 | } 30 | 31 | static public int increment(Object2IntMap map, K key, int count) 32 | { 33 | return map.merge(key, count, (oldCount, newCount) -> oldCount + newCount); 34 | } 35 | 36 | static public void add(FloatArrayList list, int index, float inc) 37 | { 38 | list.set(index, list.getFloat(index)+inc); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/learning/gridsearch/GridFunctionTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.gridsearch; 17 | 18 | import edu.emory.mathcs.nlp.learning.gridsearch.ExpFunction; 19 | import edu.emory.mathcs.nlp.learning.gridsearch.GridFunction; 20 | import edu.emory.mathcs.nlp.learning.gridsearch.LinearFunction; 21 | 22 | /** 23 | * @author Amit_Deshmane 24 | * 25 | */ 26 | public class GridFunctionTest { 27 | 28 | /** 29 | * @param args 30 | */ 31 | public static void main(String[] args) { 32 | GridFunction f = new ExpFunction(1E-6f, 1f, 10f); 33 | while(f.next()){ 34 | System.out.println(f.getVal()); 35 | } 36 | System.out.println("*************"); 37 | f = new LinearFunction(0.1f, 5f, 10); 38 | while(f.next()){ 39 | System.out.println(f.getVal()); 40 | } 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/template/reader/NLPReader.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.template.reader; 17 | 18 | import edu.emory.mathcs.nlp.component.template.node.NLPNode; 19 | import it.unimi.dsi.fastutil.objects.Object2IntMap; 20 | 21 | /** 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class NLPReader extends TSVReader 25 | { 26 | public NLPReader() {super();} 27 | 28 | public NLPReader(Object2IntMap map) 29 | { 30 | super(map); 31 | } 32 | 33 | public NLPReader(int form, int lemma, int pos, int feats, int dhead, int deprel, int sheads, int nament) 34 | { 35 | super(form, lemma, pos, feats, dhead, deprel, sheads, nament); 36 | } 37 | 38 | @Override 39 | protected NLPNode create() 40 | { 41 | return new NLPNode(); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/conversion/util/HeadRuleMapTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.conversion.util; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import org.junit.Test; 21 | 22 | import edu.emory.mathcs.nlp.common.util.IOUtils; 23 | import edu.emory.mathcs.nlp.conversion.util.HeadRuleMap; 24 | 25 | 26 | /** @author Jinho D. Choi ({@code jinho.choi@emory.edu}) */ 27 | public class HeadRuleMapTest 28 | { 29 | @Test 30 | public void testHeadRuleMap() 31 | { 32 | String filename = "src/main/resources/edu/emory/mathcs/nlp/conversion/headrule_en_stanford.txt"; 33 | 34 | HeadRuleMap map = new HeadRuleMap(IOUtils.createFileInputStream(filename)); 35 | String str = map.toString(); 36 | 37 | assertEquals(str, new HeadRuleMap(IOUtils.createByteArrayInputStream(str)).toString()); 38 | } 39 | } -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/component/tokenizer/dictionary/CompoundTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.tokenizer.dictionary; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.assertTrue; 20 | 21 | import java.util.Arrays; 22 | 23 | import org.junit.Test; 24 | 25 | import edu.emory.mathcs.nlp.common.util.Language; 26 | 27 | /** 28 | * @since 3.0.0 29 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 30 | */ 31 | public class CompoundTest 32 | { 33 | @Test 34 | public void test() 35 | { 36 | Compound dt = new Compound(Language.ENGLISH); 37 | 38 | assertEquals("[I, 'mmm]" , Arrays.toString(dt.tokenize("I'mmm"))); 39 | assertEquals("[wha, d, ya]", Arrays.toString(dt.tokenize("whadya"))); 40 | 41 | assertTrue(dt.tokenize("I'm") == null); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/util/FileExtensionFilter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.util; 17 | 18 | import java.io.File; 19 | import java.io.FilenameFilter; 20 | 21 | import edu.emory.mathcs.nlp.common.constant.StringConst; 22 | 23 | /** 24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 25 | */ 26 | public class FileExtensionFilter implements FilenameFilter 27 | { 28 | private String s_extension; 29 | 30 | /** @param extension the extension of files to keep (e.g., {@code "txt"}). */ 31 | public FileExtensionFilter(String extension) 32 | { 33 | s_extension = StringUtils.toLowerCase(extension); 34 | } 35 | 36 | @Override 37 | public boolean accept(File dir, String name) 38 | { 39 | return s_extension.equals(StringConst.ASTERISK) || StringUtils.toLowerCase(name).endsWith(s_extension); 40 | } 41 | } -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/component/tokenizer/dictionary/UnitTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.tokenizer.dictionary; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.assertTrue; 20 | 21 | import java.util.Arrays; 22 | 23 | import org.junit.Test; 24 | 25 | /** 26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 27 | */ 28 | public class UnitTest 29 | { 30 | @Test 31 | public void test() 32 | { 33 | Unit dt = new Unit(); 34 | 35 | assertEquals("[1, mg]", Arrays.toString(dt.tokenize("1mg"))); 36 | assertEquals("[1, cm]", Arrays.toString(dt.tokenize("1cm"))); 37 | 38 | assertEquals("[10, MG]", Arrays.toString(dt.tokenize("10MG"))); 39 | assertEquals("[10, CM]", Arrays.toString(dt.tokenize("10CM"))); 40 | 41 | assertTrue(dt.tokenize("1ma") == null); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/treebank/PBArc.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.treebank; 17 | 18 | import edu.emory.mathcs.nlp.common.collection.arc.AbstractArc; 19 | import edu.emory.mathcs.nlp.common.constituent.CTNode; 20 | import edu.emory.mathcs.nlp.common.propbank.PBArgument; 21 | 22 | /** 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | public class PBArc extends AbstractArc 26 | { 27 | private static final long serialVersionUID = 8603308004980285093L; 28 | 29 | public PBArc(CTNode node, String label) 30 | { 31 | super(node, label); 32 | } 33 | 34 | @Override 35 | public String toString() 36 | { 37 | return node.getTerminalID() + PBArgument.DELIM + label; 38 | } 39 | 40 | @Override 41 | public int compareTo(AbstractArc arc) 42 | { 43 | return node.compareTo(arc.getNode()); 44 | } 45 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/util/Prediction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.util; 17 | 18 | import edu.emory.mathcs.nlp.common.util.MathUtils; 19 | 20 | import java.io.Serializable; 21 | 22 | 23 | /** 24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 25 | */ 26 | public abstract class Prediction implements Serializable, Comparable 27 | { 28 | private static final long serialVersionUID = 4629812694101207696L; 29 | protected float score; 30 | 31 | public Prediction(float score) 32 | { 33 | setScore(score); 34 | } 35 | 36 | public float getScore() 37 | { 38 | return score; 39 | } 40 | 41 | public void setScore(float score) 42 | { 43 | this.score = score; 44 | } 45 | 46 | @Override 47 | public int compareTo(Prediction o) 48 | { 49 | return MathUtils.signum(score - o.score); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/normalization/SigmoidFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.normalization; 17 | 18 | /** 19 | * @author amit-deshmane 20 | * 21 | * Well the its just application of sigmoid function
22 | * Someone can actually make the sum = 1 if needed
23 | * 24 | */ 25 | public class SigmoidFunction implements NormalizationFunction { 26 | 27 | private static final long serialVersionUID = 873532059178086953L; 28 | private edu.emory.mathcs.nlp.learning.activation.SigmoidFunction f; 29 | 30 | public SigmoidFunction() { 31 | f = new edu.emory.mathcs.nlp.learning.activation.SigmoidFunction(); 32 | } 33 | 34 | /* (non-Javadoc) 35 | * @see edu.emory.mathcs.nlp.learning.normalization.NormalizationFunction#apply(float[]) 36 | */ 37 | @Override 38 | public void apply(float[] scores) { 39 | f.apply(scores); 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/zzz/Tmp.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.zzz; 17 | 18 | /** 19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 20 | */ 21 | public class Tmp 22 | { 23 | public Tmp(String[] args) throws Exception 24 | { 25 | boolean b = false; 26 | 27 | for (int i=0; i<10; i++) 28 | b |= get(); 29 | 30 | System.out.println(b); 31 | } 32 | 33 | boolean get() 34 | { 35 | System.out.println("HELLO"); 36 | return true; 37 | } 38 | 39 | boolean skip(String form) 40 | { 41 | char[] cs = form.toCharArray(); 42 | if (cs.length < 3 || cs.length > 20) return true; 43 | 44 | for (int i=0; i= 128) 47 | return true; 48 | } 49 | 50 | return false; 51 | } 52 | 53 | static public void main(String[] args) throws Exception 54 | { 55 | new Tmp(args); 56 | } 57 | } 58 | 59 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/verbnet/VNXml.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.verbnet; 17 | 18 | /** 19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 20 | */ 21 | public interface VNXml 22 | { 23 | String E_VNSUBCLASS = "VNSUBCLASS"; 24 | String E_SEMANTICS = "SEMANTICS"; 25 | String E_SYNRESTR = "SYNRESTR"; 26 | String E_SYNTAX = "SYNTAX"; 27 | String E_FRAMES = "FRAMES"; 28 | String E_FRAME = "FRAME"; 29 | String E_PRED = "PRED"; 30 | String E_ARG = "ARG"; 31 | 32 | String A_ID = "ID"; 33 | String A_TYPE = "type"; 34 | String A_VALUE = "value"; 35 | String A_VALUE_CAP = "Value"; 36 | String A_BOOL = "bool"; 37 | 38 | String ARG_TYPE_EVENT = "Event"; 39 | String ARG_TYPE_THEM_ROLE = "ThemRole"; 40 | String ARG_TYPE_VERB_SPECIFIC = "VerbSpecific"; 41 | String ARG_TYPE_CONSTANT = "Constant"; 42 | 43 | String SYNRESTR_TYPE_PLURAL = "plural"; 44 | } -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/component/tokenizer/dictionary/EmoticonTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.tokenizer.dictionary; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.assertTrue; 20 | 21 | import java.util.Arrays; 22 | 23 | import org.junit.Test; 24 | 25 | /** 26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 27 | */ 28 | public class EmoticonTest 29 | { 30 | @Test 31 | public void test() 32 | { 33 | Emoticon dt = new Emoticon(); 34 | String s; 35 | 36 | s = ":"; 37 | assertTrue(dt.getEmoticonRange(s) == null); 38 | 39 | s = ":-)"; 40 | assertEquals("[0, 3]", Arrays.toString(dt.getEmoticonRange(s))); 41 | 42 | s = "Hi:-)"; 43 | assertEquals("[2, 5]", Arrays.toString(dt.getEmoticonRange(s))); 44 | 45 | s = ":-).."; 46 | assertEquals("[0, 3]", Arrays.toString(dt.getEmoticonRange(s))); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/tokenizer/dictionary/Dictionary.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.tokenizer.dictionary; 17 | 18 | import edu.emory.mathcs.nlp.common.util.CharUtils; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public abstract class Dictionary 24 | { 25 | static public String ROOT = "edu/emory/mathcs/nlp/component/tokenizer/dictionary/"; 26 | 27 | public String[] tokenize(String s) 28 | { 29 | char[] lcs = s.toCharArray(); 30 | String lower = CharUtils.toLowerCase(lcs) ? new String(lcs) : s; 31 | return tokenize(s, lower, lcs); 32 | } 33 | 34 | /** 35 | * @param original the original string. 36 | * @param lower the lowercase of the original string. 37 | * @param lcs the lowercase character array of the original string. 38 | */ 39 | abstract public String[] tokenize(String original, String lower, char[] lcs); 40 | } 41 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/ObjectIntPair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.collection.tuple; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class ObjectIntPair implements Serializable, Comparable> 24 | { 25 | private static final long serialVersionUID = -5228607179375724504L; 26 | 27 | public T o; 28 | public int i; 29 | 30 | public ObjectIntPair() 31 | { 32 | set(null, 0); 33 | } 34 | 35 | public ObjectIntPair(T o, int i) 36 | { 37 | set(o, i); 38 | } 39 | 40 | public void set(T o, int i) 41 | { 42 | this.o = o; 43 | this.i = i; 44 | } 45 | 46 | @Override 47 | public int compareTo(ObjectIntPair p) 48 | { 49 | return i - p.i; 50 | } 51 | 52 | @Override 53 | public String toString() 54 | { 55 | return "("+o.toString()+","+i+")"; 56 | } 57 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/util/SparsePrediction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.util; 17 | 18 | /** 19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 20 | */ 21 | public class SparsePrediction extends Prediction 22 | { 23 | private static final long serialVersionUID = -2873195048974695284L; 24 | private int label; 25 | 26 | public SparsePrediction(int label, float score) 27 | { 28 | super(score); 29 | setLabel(label); 30 | } 31 | 32 | public int getLabel() 33 | { 34 | return label; 35 | } 36 | 37 | public void setLabel(int label) 38 | { 39 | this.label = label; 40 | } 41 | 42 | public void copy(SparsePrediction p) 43 | { 44 | set(p.label, p.score); 45 | } 46 | 47 | public void set(int label, float score) 48 | { 49 | setLabel(label); 50 | setScore(score); 51 | } 52 | 53 | @Override 54 | public String toString() 55 | { 56 | return label+":"+score; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/initialization/RandomWeightGenerator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.initialization; 17 | 18 | import edu.emory.mathcs.nlp.common.random.XORShiftRandom; 19 | 20 | import java.util.Random; 21 | 22 | 23 | /** 24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 25 | */ 26 | public class RandomWeightGenerator implements WeightGenerator 27 | { 28 | /** 29 | * 30 | */ 31 | private static final long serialVersionUID = 4923093894775449475L; 32 | private float lower_bound; // inclusive 33 | private float upper_bound; // exclusive 34 | private Random rand; 35 | 36 | public RandomWeightGenerator(float lowerBound, float upperBound) 37 | { 38 | lower_bound = lowerBound; 39 | upper_bound = upperBound; 40 | rand = new XORShiftRandom(9); 41 | } 42 | 43 | @Override 44 | public float next() 45 | { 46 | return lower_bound + (upper_bound - lower_bound) * rand.nextFloat(); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/activation/SoftmaxFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.activation; 17 | 18 | import edu.emory.mathcs.nlp.common.util.DSUtils; 19 | import org.apache.commons.math3.util.FastMath; 20 | 21 | /** 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class SoftmaxFunction implements ActivationFunction 25 | { 26 | private static final long serialVersionUID = -3091974476056808242L; 27 | 28 | @Override 29 | public void apply(float[] scores) 30 | { 31 | float sum = 0, max = DSUtils.max(scores); 32 | max = 0; 33 | 34 | for (int i=0; i> extends AbstractArc 25 | { 26 | private static final long serialVersionUID = -9099516205158258095L; 27 | private double weight; 28 | 29 | public DEPArc(N node, String label) 30 | { 31 | super(node, label); 32 | } 33 | 34 | public double getWeight() 35 | { 36 | return weight; 37 | } 38 | 39 | public void setWeight(double weight) 40 | { 41 | this.weight = weight; 42 | } 43 | 44 | @Override 45 | public String toString() 46 | { 47 | return node.getID() + LABEL_DELIM + label; 48 | } 49 | 50 | @Override 51 | public int compareTo(AbstractArc arc) 52 | { 53 | return node.compareTo(arc.getNode()); 54 | } 55 | } -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/learning/util/LabelMapTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.util; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import org.junit.Test; 21 | 22 | /** 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | public class LabelMapTest 26 | { 27 | @Test 28 | public void test() 29 | { 30 | LabelMap map = new LabelMap(); 31 | 32 | assertEquals(0, map.add("A")); 33 | assertEquals(1, map.add("B")); 34 | assertEquals(2, map.add("C")); 35 | assertEquals(0, map.add("A")); 36 | assertEquals(0, map.add("A")); 37 | assertEquals(2, map.add("C")); 38 | 39 | assertEquals( 0, map.index("A")); 40 | assertEquals( 1, map.index("B")); 41 | assertEquals( 2, map.index("C")); 42 | assertEquals(-1, map.index("D")); 43 | 44 | assertEquals(3, map.size()); 45 | 46 | assertEquals("A", map.getLabel(map.index("A"))); 47 | assertEquals("B", map.getLabel(map.index("B"))); 48 | assertEquals("C", map.getLabel(map.index("C"))); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/component/tokenizer/dictionary/EnglishApostropheTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.tokenizer.dictionary; 17 | 18 | import org.junit.Test; 19 | 20 | import edu.emory.mathcs.nlp.component.tokenizer.dictionary.EnglishApostrophe; 21 | 22 | import java.util.Arrays; 23 | 24 | import static org.junit.Assert.assertEquals; 25 | import static org.junit.Assert.assertTrue; 26 | 27 | /** 28 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 29 | */ 30 | public class EnglishApostropheTest 31 | { 32 | @Test 33 | public void test() 34 | { 35 | EnglishApostrophe dt = new EnglishApostrophe(); 36 | 37 | assertEquals("[he, 's]" , Arrays.toString(dt.tokenize("he's"))); 38 | assertEquals("[he, 'S]" , Arrays.toString(dt.tokenize("he'S"))); 39 | assertEquals("[do, n't]", Arrays.toString(dt.tokenize("don't"))); 40 | assertEquals("[do, 'nt]", Arrays.toString(dt.tokenize("do'nt"))); 41 | 42 | assertTrue(dt.tokenize("he'dd") == null); 43 | assertTrue(dt.tokenize("dont") == null); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/tokenizer/dictionary/Abbreviation.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.tokenizer.dictionary; 17 | 18 | import edu.emory.mathcs.nlp.common.util.DSUtils; 19 | import edu.emory.mathcs.nlp.common.util.IOUtils; 20 | 21 | import java.io.InputStream; 22 | import java.util.Set; 23 | 24 | /** 25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 26 | */ 27 | public class Abbreviation 28 | { 29 | private Set s_period; 30 | 31 | public Abbreviation() 32 | { 33 | String filename = Dictionary.ROOT + "abbreviation-period.txt"; 34 | init(IOUtils.getInputStreamsFromResource(filename)); 35 | } 36 | 37 | public Abbreviation(InputStream abbreviationPeriod) 38 | { 39 | init(abbreviationPeriod); 40 | } 41 | 42 | public void init(InputStream abbreviationPeriod) 43 | { 44 | s_period = DSUtils.createStringHashSet(abbreviationPeriod, true, true); 45 | } 46 | 47 | public boolean isAbbreviationEndingWithPeriod(String lower) 48 | { 49 | return s_period.contains(lower); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/gridsearch/GridFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.gridsearch; 17 | 18 | /** 19 | * @author Amit_Deshmane 20 | * 21 | * Supposed to contain an one dimensional grid. 22 | */ 23 | public interface GridFunction { 24 | 25 | /** 26 | * get current value in grid 27 | * @return 28 | */ 29 | public abstract float getVal(); 30 | 31 | /** 32 | * reset to initial value 33 | */ 34 | public abstract void reset(); 35 | 36 | /** 37 | * go to previous point in grid 38 | * @return 39 | * true if previous exists
40 | * otherwise false 41 | */ 42 | public abstract boolean previous(); 43 | 44 | /** 45 | * go to next point in grid 46 | * @return 47 | * true if next exists
48 | * otherwise false 49 | */ 50 | public abstract boolean next(); 51 | 52 | /** 53 | * set to previously marked position 54 | */ 55 | public abstract void resetToMark(); 56 | 57 | /** 58 | * mark a position for future reference 59 | */ 60 | public abstract void mark(); 61 | 62 | } 63 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/util/StringPrediction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.util; 17 | 18 | /** 19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 20 | */ 21 | public class StringPrediction extends Prediction 22 | { 23 | private static final long serialVersionUID = 4629812694101207696L; 24 | private String label; 25 | 26 | public StringPrediction(String label, float score) 27 | { 28 | super(score); 29 | setLabel(label); 30 | } 31 | 32 | public String getLabel() 33 | { 34 | return label; 35 | } 36 | 37 | public void setLabel(String label) 38 | { 39 | this.label = label; 40 | } 41 | 42 | public boolean isLabel(String label) 43 | { 44 | return label.equals(this.label); 45 | } 46 | 47 | public void copy(StringPrediction p) 48 | { 49 | set(p.label, p.score); 50 | } 51 | 52 | public void set(String label, float score) 53 | { 54 | setLabel(label); 55 | setScore(score); 56 | } 57 | 58 | @Override 59 | public String toString() 60 | { 61 | return label+":"+score; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/template/eval/AccuracyEval.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.template.eval; 17 | 18 | import edu.emory.mathcs.nlp.common.util.MathUtils; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class AccuracyEval implements Eval 24 | { 25 | private int correct; 26 | private int total; 27 | 28 | public AccuracyEval() 29 | { 30 | clear(); 31 | } 32 | 33 | public void add(int correct, int total) 34 | { 35 | this.correct += correct; 36 | this.total += total; 37 | } 38 | 39 | public int correct() 40 | { 41 | return correct; 42 | } 43 | 44 | public int total() 45 | { 46 | return total; 47 | } 48 | 49 | @Override 50 | public void clear() 51 | { 52 | correct = total = 0; 53 | } 54 | 55 | @Override 56 | public double score() 57 | { 58 | return MathUtils.accuracy(correct, total); 59 | } 60 | 61 | @Override 62 | public String toString() 63 | { 64 | return String.format("ACC = %5.2f (%d/%d)", score(), correct, total); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /api/src/main/resources/edu/emory/mathcs/nlp/component/tokenizer/dictionary/english-hyphen-prefix.txt: -------------------------------------------------------------------------------- 1 | a 2 | afro 3 | ambi 4 | amphi 5 | an 6 | ana 7 | anglo 8 | ante 9 | anti 10 | apo 11 | arch 12 | astro 13 | auto 14 | be 15 | bi 16 | bio 17 | centi 18 | circum 19 | cis 20 | co 21 | col 22 | com 23 | con 24 | contra 25 | cor 26 | counter 27 | cran 28 | cross 29 | cryo 30 | crypto 31 | de 32 | deca 33 | demi 34 | demo 35 | deuter 36 | deutero 37 | di 38 | dia 39 | dif 40 | dis 41 | du 42 | duo 43 | e 44 | eco 45 | electro 46 | em 47 | en 48 | ennea 49 | epi 50 | euro 51 | ex 52 | extra 53 | fin 54 | fore 55 | franco 56 | geo 57 | giga 58 | gyro 59 | hemi 60 | hepta 61 | hetero 62 | hexa 63 | hi 64 | hind 65 | homo 66 | hydro 67 | hyper 68 | hypo 69 | ideo 70 | idio 71 | in 72 | indo 73 | infra 74 | inter 75 | intra 76 | iso 77 | macro 78 | mal 79 | maxi 80 | medi 81 | mega 82 | meta 83 | micro 84 | mid 85 | midi 86 | milli 87 | mini 88 | mis 89 | mm 90 | mono 91 | multi 92 | neo 93 | non 94 | novem 95 | o 96 | octa 97 | octo 98 | omni 99 | ortho 100 | out 101 | over 102 | paleo 103 | pan 104 | para 105 | part 106 | ped 107 | penta 108 | per 109 | peri 110 | photo 111 | pica 112 | pod 113 | poly 114 | post 115 | pre 116 | preter 117 | pro 118 | pros 119 | proto 120 | pseudo 121 | pyro 122 | quadri 123 | quasi 124 | quinque 125 | re 126 | retro 127 | self 128 | semi 129 | sept 130 | soci 131 | socio 132 | step 133 | sub 134 | sup 135 | super 136 | supra 137 | sur 138 | syn 139 | t 140 | tele 141 | tera 142 | tetra 143 | trans 144 | tri 145 | twi 146 | u 147 | uber 148 | uh 149 | ultra 150 | un 151 | under 152 | uni 153 | up 154 | vice 155 | with 156 | x -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/ObjectFloatPair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.collection.tuple; 17 | 18 | import java.io.Serializable; 19 | 20 | import edu.emory.mathcs.nlp.common.util.MathUtils; 21 | 22 | /** 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | public class ObjectFloatPair implements Serializable, Comparable> 26 | { 27 | private static final long serialVersionUID = -4442614450903889259L; 28 | 29 | public T o; 30 | public float f; 31 | 32 | public ObjectFloatPair(T o, float f) 33 | { 34 | set(o, f); 35 | } 36 | 37 | public void set(T o, float f) 38 | { 39 | this.o = o; 40 | this.f = f; 41 | } 42 | 43 | public T getObject() 44 | { 45 | return o; 46 | } 47 | 48 | public float getFloat() 49 | { 50 | return f; 51 | } 52 | 53 | @Override 54 | public int compareTo(ObjectFloatPair p) 55 | { 56 | return MathUtils.signum(f - p.f); 57 | } 58 | 59 | @Override 60 | public String toString() 61 | { 62 | return "("+o.toString()+","+f+")"; 63 | } 64 | } -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/component/tokenizer/dictionary/CurrencyTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.tokenizer.dictionary; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.assertFalse; 20 | import static org.junit.Assert.assertTrue; 21 | 22 | import java.util.Arrays; 23 | 24 | import org.junit.Test; 25 | 26 | /** 27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 28 | */ 29 | public class CurrencyTest 30 | { 31 | @Test 32 | public void test() 33 | { 34 | Currency dt = new Currency(); 35 | 36 | assertTrue(dt.isCurrencyDollar("c")); 37 | assertTrue(dt.isCurrencyDollar("us")); 38 | 39 | assertTrue(dt.isCurrency("usd")); 40 | assertTrue(dt.isCurrency("us$")); 41 | 42 | assertFalse(dt.isCurrencyDollar("US")); 43 | assertFalse(dt.isCurrencyDollar("a")); 44 | assertFalse(dt.isCurrency("usb")); 45 | 46 | assertEquals("[USD, 1]", Arrays.toString(dt.tokenize("USD1"))); 47 | assertEquals("[us$, 1]", Arrays.toString(dt.tokenize("us$1"))); 48 | assertTrue(dt.tokenize("u$1") == null); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/component/tokenizer/dictionary/HtmlTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.tokenizer.dictionary; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import org.junit.Test; 21 | 22 | /** 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | public class HtmlTest 26 | { 27 | @Test 28 | public void test() 29 | { 30 | Html html = new Html(); 31 | StringBuilder build; 32 | String s; 33 | 34 | s = ""&<>"; 35 | assertEquals("\"&<>", html.replace(s)); 36 | 37 | s = "¢£¤¥§©®€"; 38 | build = new StringBuilder(); 39 | 40 | build.append((char)162); 41 | build.append((char)163); 42 | build.append((char)164); 43 | build.append((char)165); 44 | build.append((char)167); 45 | build.append((char)169); 46 | build.append((char)174); 47 | build.append((char)8364); 48 | 49 | assertEquals(build.toString(), html.replace(s)); 50 | 51 | s = "!<&rand;>{"; 52 | assertEquals("!<&rand;>{", html.replace(s)); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/collection/tuple/ObjectDoublePair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.collection.tuple; 17 | 18 | import java.io.Serializable; 19 | 20 | import edu.emory.mathcs.nlp.common.util.MathUtils; 21 | 22 | /** 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | public class ObjectDoublePair implements Serializable, Comparable> 26 | { 27 | private static final long serialVersionUID = -5228607179375724504L; 28 | 29 | public T o; 30 | public double d; 31 | 32 | public ObjectDoublePair(T o, double d) 33 | { 34 | set(o, d); 35 | } 36 | 37 | public void set(T o, double d) 38 | { 39 | this.o = o; 40 | this.d = d; 41 | } 42 | 43 | public T getObject() 44 | { 45 | return o; 46 | } 47 | 48 | public double getDouble() 49 | { 50 | return d; 51 | } 52 | 53 | @Override 54 | public int compareTo(ObjectDoublePair p) 55 | { 56 | return MathUtils.signum(d - p.d); 57 | } 58 | 59 | @Override 60 | public String toString() 61 | { 62 | return "("+o.toString()+","+d+")"; 63 | } 64 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/template/lexicon/GlobalLexicon.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2016, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.template.lexicon; 17 | 18 | import edu.emory.mathcs.nlp.component.template.feature.Field; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class GlobalLexicon 24 | { 25 | private T lexicon; 26 | private Field field; 27 | private String name; 28 | 29 | public GlobalLexicon() {} 30 | 31 | public GlobalLexicon(T lexicon, Field field, String name) 32 | { 33 | setLexicon(lexicon); 34 | setField(field); 35 | setName(name); 36 | } 37 | 38 | public T getLexicon() 39 | { 40 | return lexicon; 41 | } 42 | 43 | public void setLexicon(T lexicon) 44 | { 45 | this.lexicon = lexicon; 46 | } 47 | 48 | public Field getField() 49 | { 50 | return field; 51 | } 52 | 53 | public void setField(Field field) 54 | { 55 | this.field = field; 56 | } 57 | 58 | public String getName() 59 | { 60 | return name; 61 | } 62 | 63 | public void setName(String name) 64 | { 65 | this.name = name; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/common/collection/ngram/BigramTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.collection.ngram; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import org.junit.Test; 21 | 22 | import edu.emory.mathcs.nlp.common.collection.tuple.ObjectDoublePair; 23 | 24 | /** 25 | * @since 3.0.0 26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 27 | */ 28 | public class BigramTest 29 | { 30 | @Test 31 | public void test() 32 | { 33 | Bigram map = new Bigram<>(); 34 | 35 | map.add("A", "a1"); 36 | map.add("A", "a2"); 37 | map.add("A", "a1"); 38 | map.add("A", "a3"); 39 | 40 | map.add("B", "b1"); 41 | map.add("B", "b2", 2); 42 | map.add("B", "b3"); 43 | 44 | ObjectDoublePair p = map.getBest("A"); 45 | assertEquals("a1", p.o); 46 | assertEquals(0.5, p.d, 0); 47 | 48 | p = map.getBest("B"); 49 | assertEquals("b2", p.o); 50 | assertEquals(0.5, p.d, 0); 51 | 52 | assertEquals("[(a1,2)]" , map.toList("A", 1).toString()); 53 | assertEquals("[(b2,0.5)]", map.toList("B", 0.4).toString()); 54 | 55 | } 56 | } -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/component/tokenizer/dictionary/DTHyphenTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.tokenizer.dictionary; 17 | 18 | import static org.junit.Assert.assertFalse; 19 | import static org.junit.Assert.assertTrue; 20 | 21 | import org.junit.Test; 22 | 23 | /** 24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 25 | */ 26 | public class DTHyphenTest 27 | { 28 | @Test 29 | public void test() 30 | { 31 | EnglishHyphen dt = new EnglishHyphen(); 32 | 33 | assertTrue(dt.isPrefix("inter")); 34 | assertTrue(dt.isSuffix("ful")); 35 | assertTrue(dt.preserveHyphen("inter-connect".toCharArray(), 5)); 36 | assertTrue(dt.preserveHyphen("beauti-ful".toCharArray(), 6)); 37 | assertTrue(dt.preserveHyphen("b-a-d".toCharArray(), 1)); 38 | assertTrue(dt.preserveHyphen("b-a-d".toCharArray(), 3)); 39 | 40 | assertFalse(dt.preserveHyphen("inte-connect".toCharArray(), 4)); 41 | assertFalse(dt.preserveHyphen("beauti-fu".toCharArray(), 6)); 42 | assertFalse(dt.preserveHyphen("b-c-d".toCharArray(), 1)); 43 | assertFalse(dt.preserveHyphen("b-c-d".toCharArray(), 3)); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /cli/src/main/java/edu/emory/mathcs/nlp/bin/NLPDemo.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.bin; 17 | 18 | import edu.emory.mathcs.nlp.common.util.IOUtils; 19 | import edu.emory.mathcs.nlp.common.util.Joiner; 20 | import edu.emory.mathcs.nlp.component.template.node.NLPNode; 21 | import edu.emory.mathcs.nlp.decode.AbstractNLPDecoder; 22 | import edu.emory.mathcs.nlp.decode.NLPDecoder; 23 | 24 | /** 25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 26 | */ 27 | public class NLPDemo 28 | { 29 | static public void main(String[] args) throws Exception 30 | { 31 | final String configFile = "src/main/resources/edu/emory/mathcs/nlp/configuration/config-decode-en.xml"; 32 | final String inputFile = "src/test/resources/dat/nlp4j.txt"; 33 | 34 | NLPDecoder nlp4j = new NLPDecoder(IOUtils.createFileInputStream(configFile)); 35 | NLPNode[] nodes; 36 | 37 | String sentence = "John bought a car for Mary."; 38 | nodes = nlp4j.decode(sentence); 39 | System.out.println(Joiner.join(nodes, "\n", 1)+"\n"); 40 | nlp4j.decode(IOUtils.createFileInputStream(inputFile), System.out, AbstractNLPDecoder.FORMAT_RAW); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/tokenizer/token/TokenIndex.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.tokenizer.token; 17 | 18 | 19 | /** 20 | * @author Amit-Deshmane 21 | * This class tracks the value of index variable. 22 | */ 23 | public class TokenIndex 24 | { 25 | int val; 26 | 27 | public TokenIndex() {} 28 | 29 | public TokenIndex(int val) 30 | { 31 | this.val = val; 32 | } 33 | 34 | public int getVal() 35 | { 36 | return val; 37 | } 38 | 39 | public void setVal(int val) 40 | { 41 | this.val = val; 42 | } 43 | 44 | public String toString() 45 | { 46 | return Integer.toString(val); 47 | } 48 | 49 | public boolean equals(Object obj) 50 | { 51 | if(!TokenIndex.class.isInstance(obj)) 52 | { 53 | return false; 54 | } 55 | else 56 | { 57 | TokenIndex input = (TokenIndex)obj; 58 | 59 | if(input.getVal() == val) 60 | { 61 | return true; 62 | } 63 | } 64 | 65 | return false; 66 | } 67 | 68 | public int hashCode() 69 | { 70 | int prime = 31; 71 | int result = 1; 72 | result = result*prime + val; 73 | return result; 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/learning/util/FeatureVectorTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.util; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import org.junit.Test; 21 | 22 | import edu.emory.mathcs.nlp.learning.util.SparseVector; 23 | import edu.emory.mathcs.nlp.learning.util.StringVector; 24 | 25 | /** 26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 27 | */ 28 | public class FeatureVectorTest 29 | { 30 | @Test 31 | public void testSparseVector() 32 | { 33 | SparseVector x = new SparseVector(); 34 | 35 | x.add(2); 36 | x.add(1, 0.2f); 37 | x.add(4, 0.3f); 38 | x.add(3); 39 | 40 | assertEquals("2:1.0 1:0.2 4:0.3 3:1.0", x.toString()); 41 | x.sort(); 42 | assertEquals("1:0.2 2:1.0 3:1.0 4:0.3", x.toString()); 43 | } 44 | 45 | @Test 46 | public void testStringVector() 47 | { 48 | StringVector vector = new StringVector(); 49 | 50 | vector.add((short)2, "B"); 51 | vector.add((short)4, "A", 0.2f); 52 | vector.add((short)1, "A", 0.3f); 53 | vector.add((short)3, "C"); 54 | 55 | assertEquals("2:B:1.0 4:A:0.2 1:A:0.3 3:C:1.0", vector.toString()); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/normalization/SoftmaxFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.normalization; 17 | 18 | import edu.emory.mathcs.nlp.common.util.DSUtils; 19 | import org.apache.commons.math3.util.FastMath; 20 | 21 | /** 22 | * @author amit-deshmane 23 | * 24 | */ 25 | public class SoftmaxFunction implements NormalizationFunction { 26 | 27 | private static final long serialVersionUID = -2922860244331616104L; 28 | 29 | public SoftmaxFunction() { 30 | } 31 | 32 | /* (non-Javadoc) 33 | * @see edu.emory.mathcs.nlp.learning.normalization.NormalizationFunction#apply(float[]) 34 | */ 35 | @Override 36 | public void apply(float[] scores) 37 | { 38 | float sum = 0, max = DSUtils.max(scores); 39 | max = 0; 40 | 41 | for (int i=0; i> extends L2RState 27 | { 28 | public POSState(N[] nodes) 29 | { 30 | super(nodes); 31 | } 32 | 33 | @Override 34 | protected String getLabel(N node) 35 | { 36 | return node.getPartOfSpeechTag(); 37 | } 38 | 39 | @Override 40 | protected String setLabel(N node, String label) 41 | { 42 | String s = node.getPartOfSpeechTag(); 43 | node.setPartOfSpeechTag(label); 44 | return s; 45 | } 46 | 47 | @Override 48 | public void next(LabelMap map, int[] top2, float[] scores) 49 | { 50 | if (0 <= top2[1] && scores[top2[0]] - scores[top2[1]] < 1) 51 | getInput().putFeat(NLPUtils.FEAT_POS_2ND, map.getLabel(top2[1])); 52 | 53 | super.next(map, top2, scores); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/constant/CharConst.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the 'License'); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an 'AS IS' BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.constant; 17 | 18 | /** 19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 20 | */ 21 | public interface CharConst 22 | { 23 | char PLUS = '+'; 24 | char ASTERISK = '*'; 25 | char EQUAL = '='; 26 | char FW_SLASH = '/'; 27 | char BW_SLASH = '\\'; 28 | char PIPE = '|'; 29 | char UNDERSCORE = '_'; 30 | char HYPHEN = '-'; 31 | char COMMA = ','; 32 | char COLON = ':'; 33 | char SEMICOLON = ';'; 34 | char PERIOD = '.'; 35 | char QUESTION = '?'; 36 | char EXCLAMATION = '!'; 37 | char PERCENT = '%'; 38 | char POUND = '#'; 39 | char DOLLAR = '$'; 40 | char AMPERSAND = '&'; 41 | char AT = '@'; 42 | char TILDA = '~'; 43 | char PRIME = '`'; 44 | char EMPTY = 0; 45 | char LESS_THAN = '<'; 46 | char GREATER_THAN = '>'; 47 | char SINGLE_QUOTE = '\''; 48 | char DOUBLE_QUOTE = '"'; 49 | 50 | char LRB = '('; 51 | char RRB = ')'; 52 | char LCB = '{'; 53 | char RCB = '}'; 54 | char LSB = '['; 55 | char RSB = ']'; 56 | 57 | char ZERO = '0'; 58 | 59 | char SPACE = ' '; 60 | char TAB = '\t'; 61 | char NEW_LINE = '\n'; 62 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/dep/DEPEval.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.dep; 17 | 18 | import edu.emory.mathcs.nlp.common.util.MathUtils; 19 | import edu.emory.mathcs.nlp.component.template.eval.Eval; 20 | 21 | /** 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class DEPEval implements Eval 25 | { 26 | private int las, uas; 27 | private int total; 28 | 29 | public DEPEval() 30 | { 31 | clear(); 32 | } 33 | 34 | public void add(int las, int uas, int total) 35 | { 36 | this.las += las; 37 | this.uas += uas; 38 | this.total += total; 39 | } 40 | 41 | public void clear() 42 | { 43 | las = uas = total = 0; 44 | } 45 | 46 | public int total() 47 | { 48 | return total; 49 | } 50 | 51 | public double getLAS() 52 | { 53 | return MathUtils.accuracy(las, total); 54 | } 55 | 56 | public double getUAS() 57 | { 58 | return MathUtils.accuracy(uas, total); 59 | } 60 | 61 | @Override 62 | public double score() 63 | { 64 | return getLAS(); 65 | } 66 | 67 | @Override 68 | public String toString() 69 | { 70 | return String.format("LAS = %5.2f, UAS = %5.2f", getLAS(), getUAS()); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/morph/MorphologicalAnalyzer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.morph; 17 | 18 | import edu.emory.mathcs.nlp.common.util.Language; 19 | import edu.emory.mathcs.nlp.component.morph.english.EnglishMorphAnalyzer; 20 | import edu.emory.mathcs.nlp.component.template.NLPComponent; 21 | import edu.emory.mathcs.nlp.component.template.node.AbstractNLPNode; 22 | 23 | import java.util.List; 24 | 25 | /** 26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 27 | */ 28 | public class MorphologicalAnalyzer> implements NLPComponent 29 | { 30 | private MorphAnalyzer analyzer; 31 | 32 | public MorphologicalAnalyzer(Language language) 33 | { 34 | analyzer = new EnglishMorphAnalyzer(); 35 | } 36 | 37 | @Override 38 | public void process(N[] nodes) 39 | { 40 | N node; 41 | 42 | for (int i=1; i document) 51 | { 52 | for (N[] nodes : document) 53 | process(nodes); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/gridsearch/LinearFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.gridsearch; 17 | 18 | /** 19 | * @author Amit_Deshmane 20 | * 21 | */ 22 | public class LinearFunction implements GridFunction { 23 | 24 | /** 25 | * 26 | */ 27 | public float min; 28 | public float max; 29 | public int steps; 30 | public int index = -1; 31 | public int markIndex = -1; 32 | 33 | public LinearFunction(float min, float max, int steps) { 34 | this.min = min; 35 | this.max = max; 36 | this.steps = steps; 37 | } 38 | 39 | public float getVal() { 40 | return min + index * (max - min)/steps; 41 | } 42 | 43 | public void reset() { 44 | index = 0; 45 | } 46 | 47 | public boolean previous() { 48 | index--; 49 | if(getVal() < min || getVal() > max){ 50 | return false; 51 | } 52 | else return true; 53 | } 54 | 55 | public boolean next() { 56 | index++; 57 | if(getVal() < min || getVal() > max){ 58 | return false; 59 | } 60 | else return true; 61 | } 62 | 63 | @Override 64 | public void mark(){ 65 | markIndex = index; 66 | } 67 | 68 | @Override 69 | public void resetToMark(){ 70 | index = markIndex; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/optimization/reguralization/Regularizer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.optimization.reguralization; 17 | 18 | import edu.emory.mathcs.nlp.learning.util.WeightVector; 19 | 20 | import java.io.Serializable; 21 | 22 | /** 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | public abstract class Regularizer implements Serializable 26 | { 27 | private static final long serialVersionUID = 608089379202097302L; 28 | protected float rate; 29 | 30 | public Regularizer(float rate) 31 | { 32 | setRate(rate); 33 | } 34 | 35 | public float getRate() 36 | { 37 | return rate; 38 | } 39 | 40 | public void setRate(float rate) 41 | { 42 | this.rate = rate; 43 | } 44 | 45 | public abstract void setWeightVector(WeightVector vector); 46 | 47 | /** Expands the dimension of necessary vectors with respect to the weight vector. */ 48 | public abstract void expand(int sparseFeatureSize, int denseFeatureSize, int labelSize); 49 | 50 | /** Updates the index'th weight of the weight vector with respect to the regularization. */ 51 | public abstract void updateWeight(int index, float gradient, float learningRate, int steps, boolean sparse); 52 | } 53 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/normalization/CustomFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.normalization; 17 | 18 | /** 19 | * @author amit-deshmane 20 | * 21 | * Jasper's Normalization
22 | * Normalize input. New value is sum of entries divided by sum over all
23 | * values. Adds smallest value to scores if it is negative.
24 | */ 25 | public class CustomFunction implements NormalizationFunction { 26 | 27 | private static final long serialVersionUID = 3113580872545506521L; 28 | 29 | public CustomFunction() { 30 | } 31 | 32 | /* (non-Javadoc) 33 | * @see edu.emory.mathcs.nlp.learning.normalization.NormalizationFunction#apply(float[]) 34 | */ 35 | @Override 36 | public void apply(float[] scores) { 37 | float sum = 0; 38 | float minVal = Float.MAX_VALUE; 39 | for (float tempScore : scores) { 40 | if(tempScore < minVal){ 41 | minVal = tempScore; 42 | } 43 | } 44 | if(minVal>0){ 45 | minVal=0; 46 | } 47 | for (float tempScore : scores) { 48 | sum += tempScore - minVal; 49 | } 50 | if (sum == 0) { 51 | sum = 1; 52 | } 53 | for (int i =0; i < scores.length; i ++) { 54 | scores[i] = (scores[i]-minVal) / sum; 55 | } 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/verbnet/VNFrame.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.verbnet; 17 | 18 | import java.io.Serializable; 19 | 20 | import org.w3c.dom.Element; 21 | 22 | import edu.emory.mathcs.nlp.common.util.XMLUtils; 23 | 24 | /** 25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 26 | */ 27 | public class VNFrame implements Serializable 28 | { 29 | private static final long serialVersionUID = 1907495757606414993L; 30 | 31 | private VNSyntax v_syntax; 32 | private VNSemantics v_semantics; 33 | 34 | public VNFrame(Element eFrame) 35 | { 36 | init(eFrame); 37 | } 38 | 39 | private void init(Element eFrame) 40 | { 41 | setSyntax(new VNSyntax(XMLUtils.getFirstElementByTagName(eFrame, VNXml.E_SYNTAX))); 42 | setSemantics(new VNSemantics(XMLUtils.getFirstElementByTagName(eFrame, VNXml.E_SEMANTICS))); 43 | } 44 | 45 | public VNSyntax getSyntax() 46 | { 47 | return v_syntax; 48 | } 49 | 50 | public VNSemantics getSemantics() 51 | { 52 | return v_semantics; 53 | } 54 | 55 | public void setSyntax(VNSyntax syntax) 56 | { 57 | v_syntax = syntax; 58 | } 59 | 60 | public void setSemantics(VNSemantics semantics) 61 | { 62 | v_semantics = semantics; 63 | } 64 | } -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/activation/SigmoidFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.activation; 17 | 18 | import edu.emory.mathcs.nlp.common.util.Sigmoid; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class SigmoidFunction implements ActivationFunction 24 | { 25 | private static final long serialVersionUID = 242731926367876732L; 26 | private Sigmoid table; 27 | 28 | /** Calls {@link #SigmoidFunction(int, float, float)}, where size = 3500, floor = -6, ceiling = 6. */ 29 | public SigmoidFunction() 30 | { 31 | table = new Sigmoid(); 32 | } 33 | 34 | /** 35 | * @param size the size of the sigmoid table (10,000 being the highest recommendation). 36 | * @param floor the lower convergence bound. 37 | * @param ceiling the upper convergence bound. 38 | */ 39 | public SigmoidFunction(int size, float floor, float ceiling) 40 | { 41 | table = new Sigmoid(size, floor, ceiling); 42 | } 43 | 44 | @Override 45 | public void apply(float[] scores) 46 | { 47 | for (int i=0; i suffix_matchers; 29 | 30 | public EnglishDerivation(List affixMatchers) 31 | { 32 | init(affixMatchers); 33 | } 34 | 35 | private void init(List affixMatchers) 36 | { 37 | suffix_matchers = affixMatchers; 38 | 39 | if (suffix_matchers == null) 40 | throw new IllegalArgumentException("The suffix matcher list must not be null."); 41 | } 42 | 43 | public List getSuffixMatchers() 44 | { 45 | return suffix_matchers; 46 | } 47 | 48 | public String getBaseForm(String lemma, Set baseSet) 49 | { 50 | String base; 51 | 52 | for (AbstractAffixMatcher matcher : suffix_matchers) 53 | { 54 | base = matcher.getBaseForm(baseSet, lemma); 55 | if (base != null) return base; 56 | } 57 | 58 | return null; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /api/src/test/resources/constituent/functionTags.parse: -------------------------------------------------------------------------------- 1 | (TOP (S (S (NP-SBJ (CC both) 2 | (NNP Bush) 3 | (CC and) 4 | (NNP Rice))) 5 | (VP (VBP have) 6 | (VP (VBN delivered) 7 | (NP (NP (NNS speeches)) 8 | (, ,) 9 | (SBAR (WHNP-1 (WDT which)) 10 | (S (NP-SBJ (-NONE- *T*-1)) 11 | (VP (VBP are) 12 | (ADJP-PRD (RB very) 13 | (JJ clear)))))))))) 14 | 15 | (TOP (S (NP-SBJ-1 (NNP Mr.) 16 | (NNP Clinton)) 17 | (VP (VBD was) 18 | (VP (VBN joined) 19 | (NP (-NONE- *-1)) 20 | (PP (IN by) 21 | (NP-LGS (JJ several) 22 | (JJ key) 23 | (NN republican) 24 | (NNS leaders))))) 25 | (. .))) 26 | 27 | (TOP (SBARQ (WHNP-1 (WP Who)) 28 | (SQ-CLF (VBZ is) 29 | (NP-SBJ (PRP it)) 30 | (NP-PRD (-NONE- *T*-1)) 31 | (SBAR (WHNP-2 (WDT that)) 32 | (S (NP-SBJ-3 (-NONE- *T*-2)) 33 | (NP-TMP (NN today)) 34 | (VP (VBZ wants) 35 | (S (NP-SBJ (-NONE- *PRO*-3)) 36 | (VP (TO to) 37 | (VP (VB blow) 38 | (NP (NNS things)) 39 | (PRT (RP up)) 40 | (PP-LOC (IN in) 41 | (NP (NNP Lebanon))))))))) 42 | (, ,) 43 | (NP-VOC (NNP Doctor))) 44 | (. ?))) -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/pos/POSTagger.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.pos; 17 | 18 | import edu.emory.mathcs.nlp.component.template.OnlineComponent; 19 | import edu.emory.mathcs.nlp.component.template.eval.AccuracyEval; 20 | import edu.emory.mathcs.nlp.component.template.eval.Eval; 21 | import edu.emory.mathcs.nlp.component.template.node.AbstractNLPNode; 22 | 23 | import java.io.InputStream; 24 | import java.util.List; 25 | 26 | /** 27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 28 | */ 29 | public class POSTagger> extends OnlineComponent> 30 | { 31 | private static final long serialVersionUID = -7926217238116337203L; 32 | 33 | public POSTagger() {super(false);} 34 | 35 | public POSTagger(InputStream configuration) 36 | { 37 | super(false, configuration); 38 | } 39 | 40 | @Override 41 | protected POSState initState(N[] nodes) 42 | { 43 | return new POSState<>(nodes); 44 | } 45 | 46 | @Override 47 | public Eval createEvaluator() 48 | { 49 | return new AccuracyEval(); 50 | } 51 | 52 | @Override 53 | protected POSState initState(List document) {return null;} 54 | 55 | @Override 56 | protected void postProcess(POSState state) {} 57 | } 58 | -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/conversion/util/HeadRuleTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.conversion.util; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.assertFalse; 20 | import static org.junit.Assert.assertTrue; 21 | 22 | import org.junit.Test; 23 | 24 | import edu.emory.mathcs.nlp.common.constituent.CTNode; 25 | import edu.emory.mathcs.nlp.conversion.util.HeadRule; 26 | import edu.emory.mathcs.nlp.conversion.util.HeadTagSet; 27 | 28 | 29 | /** @author Jinho D. Choi ({@code jinho.choi@emory.edu}) */ 30 | public class HeadRuleTest 31 | { 32 | @Test 33 | public void testHeadRule() 34 | { 35 | String tags = "NN.*|NP;VB.*|VP"; 36 | HeadRule rule = new HeadRule(HeadRule.DIR_LEFT_TO_RIGHT, tags); 37 | CTNode node1 = new CTNode("NNS", null); 38 | CTNode node2 = new CTNode("VBN", null); 39 | 40 | assertFalse(rule.isRightToLeft()); 41 | 42 | HeadTagSet[] headTags = rule.getHeadTags(); 43 | 44 | HeadTagSet headTag = headTags[0]; 45 | assertTrue(headTag.matches(node1)); 46 | assertFalse(headTag.matches(node2)); 47 | 48 | headTag = headTags[1]; 49 | assertFalse(headTag.matches(node1)); 50 | assertTrue(headTag.matches(node2)); 51 | 52 | assertEquals(tags, rule.toString()); 53 | } 54 | } -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/learning/util/FeatureMapTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.util; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import org.junit.Test; 21 | 22 | /** 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | public class FeatureMapTest 26 | { 27 | @Test 28 | public void test() 29 | { 30 | FeatureMap map = new FeatureMap(); 31 | assertEquals(1, map.size()); 32 | 33 | assertEquals(1, map.add(0, "A")); 34 | assertEquals(1, map.add(0, "A")); 35 | assertEquals(2, map.add(0, "B")); 36 | assertEquals(3, map.add(0, "C")); 37 | assertEquals(3, map.add(0, "C")); 38 | assertEquals(4, map.add(1, "A")); 39 | assertEquals(5, map.add(1, "B")); 40 | assertEquals(5, map.add(1, "B")); 41 | assertEquals(6, map.add(1, "C")); 42 | assertEquals(6, map.add(1, "C")); 43 | 44 | assertEquals(1, map.index(0, "A")); 45 | assertEquals(2, map.index(0, "B")); 46 | assertEquals(3, map.index(0, "C")); 47 | assertEquals(4, map.index(1, "A")); 48 | assertEquals(5, map.index(1, "B")); 49 | assertEquals(6, map.index(1, "C")); 50 | 51 | assertEquals(7, map.size()); 52 | 53 | assertEquals(-1, map.index(0, "D")); 54 | assertEquals(-1, map.index(2, "A")); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/learning/optimization/method/Perceptron.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.learning.optimization.method; 17 | 18 | import edu.emory.mathcs.nlp.learning.optimization.StochasticGradientDescent; 19 | import edu.emory.mathcs.nlp.learning.util.Instance; 20 | import edu.emory.mathcs.nlp.learning.util.WeightVector; 21 | 22 | /** 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | public class Perceptron extends StochasticGradientDescent 26 | { 27 | private static final long serialVersionUID = 4996609767585176672L; 28 | 29 | public Perceptron(WeightVector vector, float learningRate, float bias) 30 | { 31 | super(vector, learningRate, bias); 32 | } 33 | 34 | @Override 35 | public void trainAux(Instance instance) 36 | { 37 | trainClassification(instance); 38 | } 39 | 40 | @Override 41 | protected int getPredictedLabel(Instance instance) 42 | { 43 | float[] scores = instance.getScores(); 44 | return argmax(scores); 45 | } 46 | 47 | @Override 48 | protected float getLearningRate(int index, boolean sparse) 49 | { 50 | return learning_rate; 51 | } 52 | 53 | @Override 54 | public void updateMiniBatch() {} 55 | 56 | @Override 57 | public String toString() 58 | { 59 | return "Perceptron"; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /api/src/test/resources/constituent/normalize.parse: -------------------------------------------------------------------------------- 1 | ( (S (PP (IN In) (NP (NN order) (S (NP-SBJ (-NONE- *PRO*)) (VP (TO to) (VP (VB determine) (NP (NP (DT the) (NN sequence)) (PP (IN of) (NP (DT the) (JJ entire) (NN transcript))))))))) (, ,) (S (S (NP-SBJ-1=4 (NP (NN RT) (HYPH -) (NN PCR)) (VP (VBG using) (NP (NP (NP (NNS primers)) (PP-LOC (IN in) (NP (NNS exons) (NML (CD 10) (CC and) (CD 11))))) (VP (VBN paired) (NP (-NONE- *)) (PP (IN with) (NP (NP (DT a) (NN primer)) (PP-LOC (IN in) (NP (NN intron) (CD 12))))))))) (VP (VBD was) (VP=3 (VBN performed) (NP-1 (-NONE- *)) (S-MNR (NP-SBJ (-NONE- *PRO*)) (VP (VBG using) (NP (NML (NML (NML (NN BALB) (HYPH /) (NN c)) (NN mouse)) (NN brain)) (JJ total) (NN RNA))))))) (CC and) (S (NP-SBJ-2=4 (DT the) (VBG resulting) (NNS products)) (VP=3 (VBN sequenced) (NP-2 (-NONE- *))))) (. .)) ) 2 | ( (S (NP-SBJ (NN Figure) (CD 1)) (VP (VBZ shows) (NP (NP (DT the) (JJ average) (NN IOP)) (PP (IN of) (NP (NP (NP (DT a) (NN number)) (PP (IN of) (NP (JJ inbred) (NN mouse) (NNS strains)))) (SBAR (WHNP-1 (WDT that)) (S (NP-SBJ-1 (-NONE- *T*)) (VP (VBD were) (VP (VBN housed) (NP-1 (-NONE- *)) (PP (IN in) (NP (DT the) (JJ same) (JJ environmental) (NNS conditions))))))))))) (. .)) ) 3 | ( (S (S (NP-SBJ (NP (PRP It)) (SBAR-1 (-NONE- *EXP*))) (VP (VBZ is) (VP (VBG becoming) (ADJP-PRD (RB increasingly) (JJ clear)) (SBAR-1 (IN that) (S (NP-SBJ (NP (JJ many) (NNS forms)) (PP (IN of) (NP (NN glaucoma)))) (VP (VBP have) (NP (DT a) (JJ genetic) (NN component))))) (PRN (-LRB- [) (NP (CD 6) (, ,) (CD 7)) (-RRB- ]))))) (, ,) (CC and) (S (NP-SBJ-3 (JJ much) (JJ current) (NN research)) (VP (VBZ is) (VP (VBN focused) (NP-3 (-NONE- *)) (PP (IN on) (S-NOM (NP-SBJ (-NONE- *PRO*)) (VP (VBG identifying) (NP (NP (NP (JJ chromosomal) (NNS regions)) (CC and) (NP (NNS genes))) (SBAR (WHNP-2 (WDT that)) (S (NP-SBJ-2 (-NONE- *T*)) (VP (VBP contribute) (PP (IN to) (NP (NN glaucoma)))))))))) (PRN (-LRB- [) (NP (NP (CD 8)) (PP (SYM -) (NP (CD 10)))) (-RRB- ]))))) (. .)) ) -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/common/constituent/CTReaderTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.constituent; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import java.util.ArrayList; 21 | import java.util.List; 22 | 23 | import org.junit.Test; 24 | 25 | import edu.emory.mathcs.nlp.common.util.IOUtils; 26 | 27 | 28 | /** 29 | * @since 3.0.0 30 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 31 | */ 32 | public class CTReaderTest 33 | { 34 | @Test 35 | public void testCTReader() throws Exception 36 | { 37 | String filename = "src/test/resources/constituent/constituent.parse"; 38 | CTReader reader = new CTReader(IOUtils.createFileInputStream(filename)); 39 | CTTree tree; 40 | 41 | StringBuilder build = new StringBuilder(); 42 | List trees = new ArrayList<>(); 43 | String tmp; 44 | 45 | while ((tree = reader.nextTree()) != null) 46 | { 47 | tmp = tree.toString(); 48 | trees.add(tmp); 49 | build.append(tmp); 50 | } 51 | 52 | reader.close(); 53 | 54 | reader = new CTReader(IOUtils.createByteArrayInputStream(build.toString())); 55 | int i; 56 | 57 | for (i=0; (tree = reader.nextTree()) != null; i++) 58 | assertEquals(trees.get(i), tree.toString()); 59 | 60 | reader.close(); 61 | } 62 | } -------------------------------------------------------------------------------- /cli/src/main/java/edu/emory/mathcs/nlp/zzz/RadiologyDecode.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.zzz; 17 | 18 | import edu.emory.mathcs.nlp.common.util.FileUtils; 19 | import edu.emory.mathcs.nlp.common.util.IOUtils; 20 | import edu.emory.mathcs.nlp.decode.AbstractNLPDecoder; 21 | import edu.emory.mathcs.nlp.decode.NLPDecoder; 22 | 23 | /** 24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 25 | */ 26 | public class RadiologyDecode 27 | { 28 | static public void main(String[] args) throws Exception 29 | { 30 | final String configFile = "/Users/jdchoi/Documents/EmoryNLP/nlp4j/src/main/resources/edu/emory/mathcs/nlp/configuration/config-decode-deident.xml"; 31 | final String inputDir = "/Users/jdchoi/Desktop/radiology/Q2"; 32 | final String inputExt = "txt"; 33 | final String outputExt = "tsv"; 34 | final String outputFormat = AbstractNLPDecoder.FORMAT_LINE; 35 | 36 | NLPDecoder nlp4j = new NLPDecoder(IOUtils.createFileInputStream(configFile)); 37 | 38 | for (String inputFile : FileUtils.getFileList(inputDir, inputExt)) 39 | { 40 | System.out.println(inputFile); 41 | String outputFile = inputFile+"."+outputExt; 42 | nlp4j.decode(IOUtils.createFileInputStream(inputFile), IOUtils.createFileOutputStream(outputFile), outputFormat); 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/common/util/HashUtils.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.util; 17 | 18 | /** 19 | * @since 3.0.0 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public class HashUtils 23 | { 24 | private static final long FNV_BASIS_64 = 0xcbf29ce484222325L; 25 | private static final long FNV_PRIME_64 = 0x100000001b3L; 26 | 27 | private static final int FNV_BASIS_32 = 0x811c9dc5; 28 | private static final int FNV_PRIME_32 = 0x01000193; 29 | 30 | public static int fnv1aHash32(final String s) 31 | { 32 | return fnv1aHash32(s, FNV_BASIS_32); 33 | } 34 | 35 | public static int fnv1aHash32(final String s, int basis) 36 | { 37 | char[] cs = s.toCharArray(); 38 | int i, len = s.length(); 39 | 40 | for (i=0; i reader = new NLPReader(); 33 | reader.open(IOUtils.createFileInputStream(filename)); 34 | NLPNode[] nodes; 35 | 36 | reader.form = 1; 37 | reader.lemma = 2; 38 | reader.pos = 3; 39 | reader.feats = 4; 40 | reader.dhead = 5; 41 | reader.deprel = 6; 42 | reader.sheads = 7; 43 | reader.nament = 8; 44 | 45 | nodes = reader.next(); 46 | 47 | for (int i=1; i 24 | { 25 | private static final long serialVersionUID = -8933673050278448784L; 26 | private int index; 27 | private float value; 28 | 29 | public SparseItem(int index) 30 | { 31 | this(index, 1f); 32 | } 33 | 34 | public SparseItem(int index, float value) 35 | { 36 | set(index, value); 37 | } 38 | 39 | public int getIndex() 40 | { 41 | return index; 42 | } 43 | 44 | public float getValue() 45 | { 46 | return value; 47 | } 48 | 49 | public void setIndex(int index) 50 | { 51 | this.index = index; 52 | } 53 | 54 | public void setValue(float value) 55 | { 56 | this.value = value; 57 | } 58 | 59 | public void set(int index, float value) 60 | { 61 | setIndex(index); 62 | setValue(value); 63 | } 64 | 65 | public void set(SparseItem item) 66 | { 67 | set(item.index, item.value); 68 | } 69 | 70 | @Override 71 | public int compareTo(SparseItem o) 72 | { 73 | return index - o.index; 74 | } 75 | 76 | @Override 77 | public String toString() 78 | { 79 | return index+":"+value; 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/it/ItClassifier.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.it; 17 | 18 | import edu.emory.mathcs.nlp.component.template.OnlineComponent; 19 | import edu.emory.mathcs.nlp.component.template.eval.Eval; 20 | import edu.emory.mathcs.nlp.component.template.node.AbstractNLPNode; 21 | 22 | import java.io.InputStream; 23 | import java.util.List; 24 | 25 | /** 26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 27 | */ 28 | public class ItClassifier> extends OnlineComponent> 29 | { 30 | private static final long serialVersionUID = 3585863417135590906L; 31 | 32 | public ItClassifier() {super(true);} 33 | 34 | public ItClassifier(InputStream configuration) 35 | { 36 | super(true, configuration); 37 | } 38 | 39 | @Override 40 | protected ItState initState(List document) 41 | { 42 | return new ItState<>(document); 43 | } 44 | 45 | @Override 46 | public void initFeatureTemplate() 47 | { 48 | feature_template = new ItFeatureTemplate<>(config.getFeatureTemplateElement(), getHyperParameter()); 49 | } 50 | 51 | @Override 52 | public Eval createEvaluator() 53 | { 54 | return new ItEval(4); 55 | } 56 | 57 | @Override 58 | protected void postProcess(ItState state) {} 59 | 60 | @Override 61 | protected ItState initState(N[] nodes) {return null;} 62 | } 63 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/ner/NERTagger.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.ner; 17 | 18 | import edu.emory.mathcs.nlp.component.template.OnlineComponent; 19 | import edu.emory.mathcs.nlp.component.template.eval.Eval; 20 | import edu.emory.mathcs.nlp.component.template.eval.F1Eval; 21 | import edu.emory.mathcs.nlp.component.template.node.AbstractNLPNode; 22 | 23 | import java.io.InputStream; 24 | import java.util.List; 25 | 26 | /** 27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 28 | */ 29 | public class NERTagger> extends OnlineComponent> 30 | { 31 | private static final long serialVersionUID = 87807440372806016L; 32 | 33 | public NERTagger() {super(false);} 34 | 35 | public NERTagger(InputStream configuration) 36 | { 37 | super(false, configuration); 38 | } 39 | 40 | // ============================== ABSTRACT ============================== 41 | 42 | @Override 43 | public Eval createEvaluator() 44 | { 45 | return new F1Eval(); 46 | } 47 | 48 | @Override 49 | protected NERState initState(N[] nodes) 50 | { 51 | return new NERState<>(nodes); 52 | } 53 | 54 | @Override 55 | protected void postProcess(NERState state) 56 | { 57 | state.postProcess(); 58 | } 59 | 60 | @Override 61 | protected NERState initState(List document) {return null;} 62 | } 63 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/template/feature/Field.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.template.feature; 17 | 18 | 19 | /** 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public enum Field 23 | { 24 | // form features 25 | word_form, 26 | word_form_lowercase, 27 | word_form_undigitalized, 28 | word_form_simplified, 29 | word_form_simplified_lowercase, 30 | word_shape, 31 | word_shape_lowercase, 32 | orthographic, 33 | orthographic_lowercase, 34 | prefix, 35 | suffix, 36 | 37 | // part-of-speech tagging features 38 | lemma, 39 | feats, 40 | part_of_speech_tag, 41 | ambiguity_classes, 42 | 43 | // named entity recognition 44 | named_entity_tag, 45 | 46 | // dependency parsing features 47 | dependency_label, 48 | dependent_set, 49 | distance, 50 | valency, 51 | 52 | // lexica 53 | word_clusters, 54 | word_embedding, 55 | named_entity_gazetteers, 56 | 57 | // boolean 58 | positional, 59 | 60 | // document 61 | bag_of_words, 62 | bag_of_words_norm, 63 | bag_of_words_count, 64 | 65 | bag_of_words_stopwords, 66 | bag_of_words_stopwords_norm, 67 | bag_of_words_stopwords_count, 68 | 69 | bag_of_clusters, 70 | bag_of_clusters_norm, 71 | bag_of_clusters_count, 72 | 73 | bag_of_clusters_stopwords, 74 | bag_of_clusters_stopwords_norm, 75 | bag_of_clusters_stopwords_count, 76 | ;} 77 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/morph/util/AbstractAffixReplacer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.morph.util; 17 | import java.util.Map; 18 | import java.util.Set; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | abstract public class AbstractAffixReplacer 24 | { 25 | protected String s_basePOS; 26 | protected String s_affixForm; 27 | protected String[] s_replacements; 28 | 29 | public AbstractAffixReplacer(String basePOS, String affixForm, String[] replacements) 30 | { 31 | s_basePOS = basePOS; 32 | s_affixForm = affixForm; 33 | s_replacements = replacements; 34 | } 35 | 36 | public String getBasePOS() 37 | { 38 | return s_basePOS; 39 | } 40 | 41 | /** 42 | * Returns the base morpheme of the word form if exists; otherwise, {@code null}. 43 | * @param form the word-form in lower-case. 44 | * @return the base morpheme of the word form if exists; otherwise, {@code null}. 45 | */ 46 | abstract public String getBaseForm(Map> baseMap, String form); 47 | 48 | /** 49 | * Returns the base morpheme of the word form if exists; otherwise, {@code null}. 50 | * @param form the word-form in lower-case. 51 | * @return the base morpheme of the word form if exists; otherwise, {@code null}. 52 | */ 53 | abstract public String getBaseForm(Set baseSet, String form); 54 | } 55 | -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/common/propbank/PBInstanceTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.propbank; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import org.junit.Test; 21 | 22 | /** 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | public class PBInstanceTest 26 | { 27 | @Test 28 | public void test() 29 | { 30 | String gold = "wsj_2100.parse 8 20 gold get-v get.04 ----- 21:2-ARG1 20:0-rel 18:0-ARG0 17:1-ARGM-MNR 18:0*11:1-LINK-PCR 17:1*15:1-LINK-SLC"; 31 | PBInstance instance = new PBInstance(gold); 32 | 33 | assertEquals("20:0-rel", instance.getArgument(1).toString()); 34 | 35 | gold = "wsj_2100.parse 8 20 gold get-v get.04 ----- 11:1*18:0-LINK-PCR 15:1*17:1-LINK-SLC 17:1-ARGM-MNR 18:0-ARG0 20:0-rel 21:2-ARG1"; 36 | instance.sortArguments(); 37 | assertEquals(gold, instance.toString()); 38 | assertEquals(instance.getArgument(3), instance.getFirstArgument("ARG0")); 39 | 40 | gold = "wsj_2100.parse 8 20 gold get-v get.04 ----- 11:1*18:0-LINK-PCR 15:1*17:1-LINK-SLC 17:1-ARGM-MNR 20:0-rel 21:2-ARG1"; 41 | instance.removeArguments("ARG0"); 42 | assertEquals(gold, instance.toString()); 43 | 44 | gold = "wsj_2100.parse 8 20 gold get-v get.04 ----- 11:1*18:0-LINK-PCR 15:1*17:1-LINK-SLC 17:1-ARGM-MNR 20:0-rel 21:2-ARG1"; 45 | instance.removeArguments("ARG0"); 46 | assertEquals(gold, instance.toString()); 47 | } 48 | } -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/util/SplitterTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.util; 17 | 18 | import edu.emory.mathcs.nlp.common.util.Splitter; 19 | import org.junit.Test; 20 | 21 | import java.util.regex.Pattern; 22 | 23 | import static org.junit.Assert.assertEquals; 24 | 25 | /** 26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 27 | */ 28 | public class SplitterTest 29 | { 30 | @Test 31 | public void testSplitIncludingMatches() 32 | { 33 | Pattern pd = Pattern.compile("\\d+"); 34 | Pattern pa = Pattern.compile("\\p{Lower}+"); 35 | String s, t; 36 | 37 | s = "ab12cd34ef56gh"; 38 | t = "[ab, 12, cd, 34, ef, 56, gh]"; 39 | assertEquals(t, Splitter.splitIncludingMatches(pd, s).toString()); 40 | assertEquals(t, Splitter.splitIncludingMatches(pa, s).toString()); 41 | 42 | s = "12cd34ef56"; 43 | t = "[12, cd, 34, ef, 56]"; 44 | assertEquals(t, Splitter.splitIncludingMatches(pd, s).toString()); 45 | assertEquals(t, Splitter.splitIncludingMatches(pa, s).toString()); 46 | 47 | s = "1234"; 48 | t = "[1234]"; 49 | assertEquals(t, Splitter.splitIncludingMatches(pd, s).toString()); 50 | assertEquals(t, Splitter.splitIncludingMatches(pa, s).toString()); 51 | 52 | s = "abcd"; 53 | t = "[abcd]"; 54 | assertEquals(t, Splitter.splitIncludingMatches(pd, s).toString()); 55 | assertEquals(t, Splitter.splitIncludingMatches(pa, s).toString()); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /api/src/main/java/edu/emory/mathcs/nlp/component/template/train/LOLS.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.component.template.train; 17 | 18 | import edu.emory.mathcs.nlp.common.random.XORShiftRandom; 19 | 20 | import java.util.Random; 21 | 22 | 23 | /** 24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 25 | */ 26 | public class LOLS 27 | { 28 | private int fixed_stage; 29 | private double decaying_rate; 30 | private double gold_probability; 31 | private Random random; 32 | 33 | public LOLS(int fixedStage, double decayingRate) 34 | { 35 | init(fixedStage, decayingRate); 36 | } 37 | 38 | private void init(int fixedStage, double decayingRate) 39 | { 40 | fixed_stage = fixedStage; 41 | decaying_rate = decayingRate; 42 | gold_probability = 1d; 43 | random = new XORShiftRandom(9); 44 | } 45 | 46 | public void updateGoldProbability() 47 | { 48 | if (fixed_stage <= 0) 49 | gold_probability *= decaying_rate; 50 | else 51 | fixed_stage--; 52 | } 53 | 54 | public double getGoldProbability() 55 | { 56 | return gold_probability; 57 | } 58 | 59 | public boolean chooseGold() 60 | { 61 | return (gold_probability > 0) && (gold_probability >= 1 || gold_probability > random.nextDouble()); 62 | } 63 | 64 | @Override 65 | public String toString() 66 | { 67 | return String.format("LOLS: fixed = %d, decaying rate = %s", fixed_stage, decaying_rate); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /api/src/test/java/edu/emory/mathcs/nlp/common/util/SplitterTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.mathcs.nlp.common.util; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import java.util.regex.Pattern; 21 | 22 | import org.junit.Test; 23 | 24 | import edu.emory.mathcs.nlp.common.util.Splitter; 25 | 26 | /** 27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 28 | */ 29 | public class SplitterTest 30 | { 31 | @Test 32 | public void testSplitIncludingMatches() 33 | { 34 | Pattern pd = Pattern.compile("\\d+"); 35 | Pattern pa = Pattern.compile("\\p{Lower}+"); 36 | String s, t; 37 | 38 | s = "ab12cd34ef56gh"; 39 | t = "[ab, 12, cd, 34, ef, 56, gh]"; 40 | assertEquals(t, Splitter.splitIncludingMatches(pd, s).toString()); 41 | assertEquals(t, Splitter.splitIncludingMatches(pa, s).toString()); 42 | 43 | s = "12cd34ef56"; 44 | t = "[12, cd, 34, ef, 56]"; 45 | assertEquals(t, Splitter.splitIncludingMatches(pd, s).toString()); 46 | assertEquals(t, Splitter.splitIncludingMatches(pa, s).toString()); 47 | 48 | s = "1234"; 49 | t = "[1234]"; 50 | assertEquals(t, Splitter.splitIncludingMatches(pd, s).toString()); 51 | assertEquals(t, Splitter.splitIncludingMatches(pa, s).toString()); 52 | 53 | s = "abcd"; 54 | t = "[abcd]"; 55 | assertEquals(t, Splitter.splitIncludingMatches(pd, s).toString()); 56 | assertEquals(t, Splitter.splitIncludingMatches(pa, s).toString()); 57 | } 58 | } 59 | --------------------------------------------------------------------------------