├── README.md
├── src
├── test
│ ├── resources
│ │ ├── classification
│ │ │ └── model
│ │ │ │ ├── multi-sparse.train
│ │ │ │ ├── binary-sparse.train
│ │ │ │ ├── multi-string.train
│ │ │ │ └── binary-string.train
│ │ ├── propbank
│ │ │ ├── wsj_0001.prop
│ │ │ ├── wsj.prop
│ │ │ ├── sample.prop
│ │ │ ├── wsj_0001.parse
│ │ │ └── wsj_0002.parse
│ │ ├── feature
│ │ │ └── common
│ │ │ │ ├── dependency.txt
│ │ │ │ └── feature_common.xml
│ │ ├── nlp
│ │ │ ├── trainer
│ │ │ │ ├── feature_pos.xml
│ │ │ │ ├── feature_dep.xml
│ │ │ │ └── pos.cnlp
│ │ │ └── configuration
│ │ │ │ └── configure.xml
│ │ ├── dependency
│ │ │ └── dependency.cnlp
│ │ └── constituent
│ │ │ ├── functionTags.parse
│ │ │ └── normalize.parse
│ └── java
│ │ └── edu
│ │ └── emory
│ │ └── clir
│ │ └── clearnlp
│ │ ├── verbnet
│ │ └── VNTagTest.java
│ │ ├── util
│ │ ├── MathUtilsTest.java
│ │ ├── CharUtilsTest.java
│ │ ├── FileUtilsTest.java
│ │ ├── CharTokenizerTest.java
│ │ └── arc
│ │ │ └── SRLArcTest.java
│ │ ├── dictionary
│ │ ├── english
│ │ │ ├── DTAbbreviationTest.java
│ │ │ └── DTHyphenTest.java
│ │ └── universal
│ │ │ ├── DTCompoundTest.java
│ │ │ ├── DTUnitTest.java
│ │ │ ├── DTEmoticonTest.java
│ │ │ ├── DTHtmlTest.java
│ │ │ └── DTCurrencyTest.java
│ │ ├── headrule
│ │ ├── HeadRuleMapTest.java
│ │ ├── HeadTagSetTest.java
│ │ └── HeadRuleTest.java
│ │ ├── component
│ │ └── configuration
│ │ │ ├── DEPConfigurationTest.java
│ │ │ └── POSConfigurationTest.java
│ │ ├── collection
│ │ ├── stack
│ │ │ └── StackTest.java
│ │ └── ngram
│ │ │ ├── BigramTest.java
│ │ │ └── UnigramTest.java
│ │ ├── dependency
│ │ └── DEPFeatTest.java
│ │ ├── propbank
│ │ └── PBLocationTest.java
│ │ ├── tokenization
│ │ └── english
│ │ │ └── ApostropheTokenizerTest.java
│ │ ├── vector
│ │ └── VectorSpaceModelTest.java
│ │ └── constituent
│ │ └── CTReaderTest.java
└── main
│ ├── scripts
│ ├── rsync.sh
│ ├── count-deps.py
│ └── conll2clear.py
│ ├── resources
│ ├── samples
│ │ ├── clearnlp.txt
│ │ ├── clearnlp.txt.tok
│ │ ├── wsj_0001.parse
│ │ ├── wsj_0001.parse.dep
│ │ └── clearnlp.txt.cnlp
│ ├── configure
│ │ ├── log4j.properties
│ │ ├── config_decode_dep.xml
│ │ ├── config_sequence.xml
│ │ ├── config_future.xml
│ │ ├── config_decode_srl.xml
│ │ ├── config_train_pos.xml
│ │ ├── config_decode_ner.xml
│ │ ├── config_train_dep.xml
│ │ ├── config_train_ner.xml
│ │ └── config_train_srl.xml
│ ├── headrules
│ │ ├── headrule_en_stanford.txt
│ │ └── headrule_en_conll.txt
│ └── features
│ │ ├── feature_en_pos.xml
│ │ ├── feature_en_ner_conll03.xml
│ │ └── feature_en_ner.xml
│ └── java
│ └── edu
│ └── emory
│ └── clir
│ └── clearnlp
│ ├── ner
│ ├── BILOU.java
│ └── NERLib.java
│ ├── util
│ ├── adapter
│ │ └── Adapter1.java
│ ├── constant
│ │ ├── MetaConst.java
│ │ └── CharConst.java
│ ├── lang
│ │ └── TLanguage.java
│ ├── ObjectSizeFetcher.java
│ ├── arc
│ │ ├── DEPArc.java
│ │ └── PBArc.java
│ ├── io
│ │ └── FileExtensionFilter.java
│ ├── BinUtils.java
│ └── HashUtils.java
│ ├── srl
│ └── matcher
│ │ ├── SRLArcMatcher.java
│ │ ├── SRLArcMatcherTrue.java
│ │ ├── SRLArcMatcherSet.java
│ │ └── SRLArcMatcherPattern.java
│ ├── component
│ ├── mode
│ │ ├── srl
│ │ │ ├── SRLTransition.java
│ │ │ └── SRLEval.java
│ │ ├── morph
│ │ │ ├── DefaultMPAnalyzer.java
│ │ │ └── AbstractMPAnalyzer.java
│ │ ├── pos
│ │ │ ├── POSEval.java
│ │ │ └── POSFeatureExtractor.java
│ │ └── dep
│ │ │ └── DEPFeatureExtractor.java
│ ├── utils
│ │ ├── NLPMode.java
│ │ └── CFlag.java
│ ├── AbstractComponent.java
│ ├── evaluation
│ │ ├── AbstractEval.java
│ │ └── AbstractF1Eval.java
│ └── configuration
│ │ └── DecodeConfiguration.java
│ ├── classification
│ ├── trainer
│ │ └── TrainerType.java
│ ├── instance
│ │ ├── SparseInstance.java
│ │ ├── StringInstance.java
│ │ ├── SparseInstanceCollector.java
│ │ └── AbstractInstance.java
│ ├── prediction
│ │ ├── AbstractPrediction.java
│ │ └── StringPrediction.java
│ └── configuration
│ │ ├── AbstractTrainerConfiguration.java
│ │ └── LiblinearTrainerConfiguration.java
│ ├── feature
│ ├── type
│ │ ├── DirectionType.java
│ │ ├── SourceType.java
│ │ ├── FeatureType.java
│ │ ├── FeatureXml.java
│ │ └── RelationType.java
│ └── common
│ │ └── OrthographicType.java
│ ├── lexicon
│ ├── wordnet
│ │ ├── WNPOSTag.java
│ │ ├── WNRelation.java
│ │ └── WNIndexMap.java
│ ├── dbpedia
│ │ ├── DBPediaInfoMap.java
│ │ └── DBPediaXML.java
│ ├── verbnet
│ │ ├── VNMap.java
│ │ ├── VNXml.java
│ │ └── VNFrame.java
│ ├── propbank
│ │ └── frameset
│ │ │ ├── PBFType.java
│ │ │ └── PBFXml.java
│ └── wikipedia
│ │ ├── WikiPrint.java
│ │ ├── WikiParagraph.java
│ │ └── WikiIndex.java
│ ├── reader
│ ├── TReader.java
│ ├── LineReader.java
│ └── RawReader.java
│ ├── collection
│ ├── pair
│ │ ├── StringIntPair.java
│ │ ├── CharIntPair.java
│ │ ├── IntIntPair.java
│ │ ├── BooleanIntPair.java
│ │ ├── DoubleIntPair.java
│ │ ├── CharCharPair.java
│ │ ├── ObjectCharPair.java
│ │ ├── ObjectIntPair.java
│ │ ├── Pair.java
│ │ └── ObjectDoublePair.java
│ ├── triple
│ │ ├── DoubleIntIntTriple.java
│ │ ├── BooleanIntIntTriple.java
│ │ ├── ObjectIntIntTriple.java
│ │ └── Triple.java
│ ├── tree
│ │ └── PrefixNode.java
│ ├── set
│ │ ├── IntHashSet.java
│ │ ├── CharHashSet.java
│ │ └── DisjointSet.java
│ ├── stack
│ │ └── Stack.java
│ └── heap
│ │ └── DoubleBinaryHeap.java
│ ├── dictionary
│ ├── PathNamedEntity.java
│ ├── AbstractDTTokenizer.java
│ ├── PathEnglishMPAnalyzer.java
│ ├── PathTokenizer.java
│ └── english
│ │ └── DTAbbreviation.java
│ ├── pos
│ └── POSTag.java
│ ├── cluster
│ ├── StringVector.java
│ ├── Cluster.java
│ ├── AbstractCluster.java
│ └── Term.java
│ ├── experiment
│ └── AbstractArgsReader.java
│ ├── bin
│ └── PrintTree.java
│ └── morphology
│ └── english
│ └── EnglishDerivation.java
└── LICENSE.txt
/README.md:
--------------------------------------------------------------------------------
1 | This project has moved to https://github.com/emorynlp/nlp4j.
2 |
--------------------------------------------------------------------------------
/src/test/resources/classification/model/multi-sparse.train:
--------------------------------------------------------------------------------
1 | 2 4 2 3
2 | 0 1 5
3 | 1 1 2
4 | 0 3
5 | 1 6
--------------------------------------------------------------------------------
/src/test/resources/classification/model/binary-sparse.train:
--------------------------------------------------------------------------------
1 | 1 5 2 11
2 | 0 6 2 10
3 | 1 4 7 3
4 | 0 1 9 12
5 | 0 1 8 3
--------------------------------------------------------------------------------
/src/test/resources/classification/model/multi-string.train:
--------------------------------------------------------------------------------
1 | sunny 0:bright 1:dry 2:bright
2 | rainy 0:dark 1:wet
3 | cloudy 0:dark 1:dry
4 | rainy 2:bright
5 | cloudy 2:dark
--------------------------------------------------------------------------------
/src/test/resources/classification/model/binary-string.train:
--------------------------------------------------------------------------------
1 | male 0:jinho 1:choi 2:d
2 | female 0:jeany 1:choi 2:e
3 | male 0:james 1:martin 2:h
4 | female 0:martha 1:palmer 2:s
5 | female 0:martha 1:stewart 2:h
--------------------------------------------------------------------------------
/src/test/resources/propbank/wsj_0001.prop:
--------------------------------------------------------------------------------
1 | propbank/wsj_0001.parse 0 8 gold join-v join.01 ----- 0:2-ARG0 7:0-ARGM-MOD 8:0-rel 9:1-ARG1 11:1-ARGM-PRD 15:1-ARGM-TMP
2 | propbank/wsj_0001.parse 1 2 gold be-v be.01 ----- 0:1-ARG1 2:0-rel 3:2-ARG2
3 | propbank/wsj_0001.parse 1 10 gold publish-v publish.01 ----- 10:0-rel 11:0-ARG0
4 |
--------------------------------------------------------------------------------
/src/main/scripts/rsync.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | jar cf clearnlp.jar$1 edu
3 | #rsync -avc clearnlp.jar$1 jdchoi@ainos.mathcs.emory.edu:/home/jdchoi/lib
4 | rsync -avc clearnlp.jar choi@lab0z.mathcs.emory.edu:/home/choi/lib
5 | scp choi@lab0z.mathcs.emory.edu:/home/choi/lib/clearnlp.jar jdchoi@ainos.mathcs.emory.edu:/home/jdchoi/lib/clearnlp.jar
6 |
--------------------------------------------------------------------------------
/src/main/resources/samples/clearnlp.txt:
--------------------------------------------------------------------------------
1 | The ClearNLP project provides software and resources for natural language processing. It is developed by the Center for Language and Information Research (CLIR) at Emory University. Please join our discussion group if you want to get notifications about new updates or post issues, suggestions, questions, etc.
2 |
--------------------------------------------------------------------------------
/src/main/resources/samples/clearnlp.txt.tok:
--------------------------------------------------------------------------------
1 | The ClearNLP project provides software and resources for natural language processing .
2 | It is developed by the Center for Language and Information Research ( CLIR ) at Emory University .
3 | Please join our discussion group if you want to get notifications about new updates or post issues , suggestions , questions , etc .
4 |
--------------------------------------------------------------------------------
/src/main/resources/configure/log4j.properties:
--------------------------------------------------------------------------------
1 | # Set root logger level to DEBUG and its only appender to A1.
2 | log4j.rootLogger=DEBUG, A1
3 |
4 | # A1 is set to be a ConsoleAppender.
5 | log4j.appender.A1=org.apache.log4j.ConsoleAppender
6 |
7 | # A1 uses PatternLayout.
8 | log4j.appender.A1.layout=org.apache.log4j.PatternLayout
9 | log4j.appender.A1.layout.conversionPattern=%m
10 |
--------------------------------------------------------------------------------
/src/test/resources/propbank/wsj.prop:
--------------------------------------------------------------------------------
1 | propbank/wsj_0001.parse 0 8 gold join-v join.01 ----- 0:2-ARG0 7:0-ARGM-MOD 8:0-rel 9:1-ARG1 11:1-ARGM-PRD 15:1-ARGM-TMP
2 | propbank/wsj_0001.parse 1 2 gold be-v be.01 ----- 0:1-ARG1 2:0-rel 3:2-ARG2
3 | propbank/wsj_0001.parse 1 10 gold publish-v publish.01 ----- 10:0-rel 11:0-ARG0
4 | propbank/wsj_0002.parse 0 16 gold name-v name.01 ----- 0:2*17:1-ARG1 16:0-rel 18:2-ARG2
5 |
--------------------------------------------------------------------------------
/src/test/resources/feature/common/dependency.txt:
--------------------------------------------------------------------------------
1 | 1 He he PRP PERSON _ 3 nsubj 3:A0
2 | 2 already already RB _ _ 3 advmod 3:AM-TMP
3 | 3 bought buy VBD _ p2=VBN|pb=buy.01 0 root _
4 | 4 a a DT _ _ 5 det _
5 | 5 CAR-2 car NN _ _ 3 dobj 3:A1;8:A1
6 | 6 yesterday yesterday NN TIME _ 3 npadvmod 3:AM-TMP
7 | 7 thAt that WDT _ _ 8 nsubj 8:R-A1
8 | 8 IS be VBZ _ pb=be.01 5 rcmod _
9 | 9 Red red JJ _ p2=VBN 8 acomp 8:A2
10 | 10 ... .. . _ _ 3 punct _
11 | 11 123 0 CD _ _ 3 num _
--------------------------------------------------------------------------------
/src/test/resources/nlp/trainer/feature_pos.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/src/test/resources/nlp/trainer/feature_dep.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | 1
5 | 2
6 | 3
7 |
8 | 4
9 | 5
10 | 6
11 | 7
12 |
13 | 8
14 |
15 | 9
16 | 10
17 |
18 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright 2014-2015, Emory University
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
--------------------------------------------------------------------------------
/src/main/resources/configure/config_decode_dep.xml:
--------------------------------------------------------------------------------
1 |
2 | english
3 |
4 |
5 |
6 |
7 |
8 |
9 | brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt.xz
10 |
11 |
12 |
13 | general-en-pos.xz
14 | general-en-dep.xz
15 |
16 |
17 |
18 | root
19 | 1
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/src/main/resources/configure/config_sequence.xml:
--------------------------------------------------------------------------------
1 |
2 | english
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | 0.4
13 |
14 | true
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/src/test/resources/nlp/trainer/pos.cnlp:
--------------------------------------------------------------------------------
1 | 1 He12 he PRP _ 3 nsubj 3:A0
2 | 2 already already RB _ 3 advmod 3:AM-TMP
3 | 3 bought buy VBD p2=VBN|pb=buy.01 0 root _
4 | 4 a.-# a DT _ 5 det _
5 | 5 car car NN _ 3 dobj 3:A1;8:A1
6 | 6 yesterday yesterday NN _ 3 npadvmod 3:AM-TMP
7 | 7 that that WDT _ 8 nsubj 8:R-A1
8 | 8 is be VBZ pb=be.01 5 rcmod _
9 | 9 red red JJ p2=VBN 8 acomp 8:A2
10 | 10 . . . _ 3 punct _
11 |
12 | 1 She she PRP _ 2 nsubj 2:A0
13 | 2 sold buy VBD p2=VBN|pb=buy.01 0 root _
14 | 3 a a CD _ 4 det _
15 | 4 car car NN _ 2 dobj 2:A1;6:A1
16 | 5 that that DT _ 6 nsubj 6:R-A1
17 | 6 is be VBZ pb=be.01 4 rcmod _
18 | 7 blue red RB p2=VBN 6 acomp 6:A2
19 | 8 . . . _ 2 punct _
20 |
--------------------------------------------------------------------------------
/src/main/resources/configure/config_future.xml:
--------------------------------------------------------------------------------
1 |
2 | english
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 | false
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/src/main/scripts/count-deps.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | import os
3 | import sys
4 | import glob
5 |
6 | IN_DIR = sys.argv[1]
7 | EXT = sys.argv[2]
8 |
9 | def getCounts(filename):
10 | fin = open(filename)
11 | sc = 0
12 | wc = 0
13 | # vc = 0
14 |
15 | for line in fin:
16 | l = line.split()
17 | if l:
18 | wc += 1
19 | # if 'pb=' in l[4]: vc += 1
20 | else:
21 | sc += 1
22 |
23 | return (sc, wc)
24 |
25 | gt = [0, 0]
26 |
27 | for filename in glob.glob(os.path.join(IN_DIR, '*.'+EXT)):
28 | t = getCounts(filename)
29 | s = '%s %d %d' % (filename, t[0], t[1])
30 | print s
31 |
32 | gt[0] += t[0]
33 | gt[1] += t[1]
34 |
35 | print gt
36 |
--------------------------------------------------------------------------------
/src/main/resources/configure/config_decode_srl.xml:
--------------------------------------------------------------------------------
1 |
2 | english
3 |
4 |
5 |
6 |
7 |
8 |
9 | brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt.xz
10 |
11 |
12 |
13 | general-en-pos.xz
14 | general-en-dep.xz
15 | general-en-srl.xz
16 |
17 |
18 |
19 | root
20 | 1
21 |
22 |
23 |
24 | 4
25 | 3
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/src/main/resources/configure/config_train_pos.xml:
--------------------------------------------------------------------------------
1 |
2 | english
3 |
4 |
5 | brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt.xz
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 | 0.4
17 | 2
18 | 1500
19 | true
20 |
21 |
22 |
--------------------------------------------------------------------------------
/src/test/resources/propbank/sample.prop:
--------------------------------------------------------------------------------
1 | propbank/wsj_0003.parse 0 11 gold enter-v enter.01 ----- 10:1-ARG0 11:0-rel 12:1-ARG1
2 | propbank/wsj_0003.parse 0 21 gold cause-v cause.01 ----- 16:2-ARG0 21:0-rel 22:2-ARG1
3 | propbank/wsj_0003.parse 0 25 gold show-v show.02 ----- 22:1*23:1*24:1-ARG1 25:0,26:1-rel 27:2-ARGM-TMP 22:1*23:1-LINK-SLC
4 | propbank/wsj_0003.parse 0 31 gold say-v say.01 ----- 0:3*33:1-ARG1 30:1-ARG0 31:0-rel
5 | propbank/wsj_0003.parse 1 18 gold appear-v appear.02 ----- 0:2,19:2-ARG1 18:0-rel
6 | propbank/wsj_0003.parse 1 21 gold be-v be.01 ----- 0:2*19:1-ARG1 21:0-rel 22:2-ARG2
7 | propbank/wsj_0003.parse 1 28 gold study-v study.01 ----- 25:1*29:1-ARG1 28:0-rel 30:1-ARGM-LOC 25:1*29:1-LINK-PSV
8 | propbank/wsj_0003.parse 1 32 gold industrialize-v industrialize.01 ----- 32:0-rel 33:0-ARG1
9 | propbank/wsj_0003.parse 1 36 gold say-v say.01 ----- 0:3*38:1-ARG1 35:1-ARG0 36:0-rel
10 |
--------------------------------------------------------------------------------
/src/main/resources/configure/config_decode_ner.xml:
--------------------------------------------------------------------------------
1 |
2 | english
3 |
4 |
5 |
6 |
7 |
8 |
9 | brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt.xz
10 | general-en-ner-gazetteer.xz
11 |
12 |
13 |
14 | general-en-pos.xz
15 | general-en-dep.xz
16 | general-en-srl.xz
17 | general-en-ner.xz
18 |
19 |
20 |
21 | root
22 | 1
23 |
24 |
25 |
26 | 4
27 | 3
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/ner/BILOU.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.ner;
17 |
18 | /**
19 | * @since 3.0.0
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public enum BILOU
23 | {
24 | B,
25 | I,
26 | L,
27 | U,
28 | O
29 | }
30 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/util/adapter/Adapter1.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.util.adapter;
17 |
18 |
19 | /**
20 | * @since 3.0.0
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public interface Adapter1
24 | {
25 | void apply(T item);
26 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/srl/matcher/SRLArcMatcher.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.srl.matcher;
17 |
18 |
19 | /**
20 | * @since 3.0.0
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public interface SRLArcMatcher
24 | {
25 | boolean matches(String label);
26 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/component/mode/srl/SRLTransition.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.component.mode.srl;
17 |
18 | /**
19 | * @since 3.1.3
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public interface SRLTransition
23 | {
24 | String NO_ARC = "N";
25 | }
26 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/classification/trainer/TrainerType.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.classification.trainer;
17 |
18 | /**
19 | * @since 3.0.0
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public enum TrainerType
23 | {
24 | ONE_VS_ALL,
25 | ONLINE;
26 | }
27 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/component/utils/NLPMode.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.component.utils;
17 |
18 | /**
19 | * @since 3.0.0
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public enum NLPMode
23 | {
24 | pos,
25 | morph,
26 | dep,
27 | ner,
28 | srl;
29 | }
30 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/feature/type/DirectionType.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.feature.type;
17 |
18 | /**
19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
20 | */
21 | public enum DirectionType
22 | {
23 | l, // left
24 | r, // right
25 | u, // up
26 | d, // down
27 | a; // all
28 | }
29 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/feature/type/SourceType.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.feature.type;
17 |
18 | import java.io.Serializable;
19 |
20 |
21 | /**
22 | * @since 3.0.0
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | public enum SourceType implements Serializable
26 | {
27 | i, j, k;
28 | }
--------------------------------------------------------------------------------
/src/main/resources/configure/config_train_dep.xml:
--------------------------------------------------------------------------------
1 |
2 | english
3 |
4 |
5 | brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt.xz
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | false
21 | true
22 | root
23 | 1
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/src/main/resources/configure/config_train_ner.xml:
--------------------------------------------------------------------------------
1 |
2 | english
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 | brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt.xz
17 | general-en-ner-gazetteer.xz
18 |
19 |
20 |
21 |
22 | true
23 |
24 |
25 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/feature/type/FeatureType.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.feature.type;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public enum FeatureType implements Serializable
25 | {
26 | BINARY,
27 | SIMPLE,
28 | SET;
29 | }
30 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/lexicon/wordnet/WNPOSTag.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.lexicon.wordnet;
17 |
18 | /**
19 | * @since 3.0.0
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public interface WNPOSTag
23 | {
24 | char NOUN = 'n';
25 | char VERB = 'v';
26 | char ADJECTIVE = 'a';
27 | char ADVERB = 'r';
28 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/component/AbstractComponent.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.component;
17 |
18 | import edu.emory.clir.clearnlp.dependency.DEPTree;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | abstract public class AbstractComponent
25 | {
26 | abstract public void process(DEPTree tree);
27 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/srl/matcher/SRLArcMatcherTrue.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.srl.matcher;
17 |
18 |
19 | /**
20 | * @since 3.0.0
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class SRLArcMatcherTrue implements SRLArcMatcher
24 | {
25 | @Override
26 | public boolean matches(String label)
27 | {
28 | return true;
29 | }
30 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/util/constant/MetaConst.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.util.constant;
17 |
18 | /**
19 | * @since 3.0.0
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public interface MetaConst
23 | {
24 | String HYPERLINK = "#hlink#";
25 | String EMOTICON = "#emo#";
26 | String CARDINAL = "#crd#";
27 | String ORDINAL = "#ord#";
28 | }
29 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/component/utils/CFlag.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.component.utils;
17 |
18 | /**
19 | * @see Component-Flags
20 | * @since 3.0.0
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public enum CFlag
24 | {
25 | COLLECT,
26 | TRAIN,
27 | BOOTSTRAP,
28 | EVALUATE,
29 | DECODE;
30 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/lexicon/dbpedia/DBPediaInfoMap.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.lexicon.dbpedia;
17 |
18 | import java.util.HashMap;
19 |
20 | /**
21 | * @since 3.0.3
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class DBPediaInfoMap extends HashMap
25 | {
26 | private static final long serialVersionUID = 6100722532796570642L;
27 | }
28 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/feature/type/FeatureXml.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.feature.type;
17 |
18 | import java.util.regex.Pattern;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public interface FeatureXml
25 | {
26 | String E_FEATURE = "feature";
27 | Pattern A_FIELD = Pattern.compile("^f[\\d]+$");
28 | String A_VISIBLE = "visible";
29 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/reader/TReader.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.reader;
17 |
18 | import edu.emory.clir.clearnlp.util.StringUtils;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public enum TReader
25 | {
26 | RAW,
27 | TSV,
28 | LINE;
29 |
30 | static public TReader getType(String s)
31 | {
32 | return valueOf(StringUtils.toUpperCase(s));
33 | }
34 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/collection/pair/StringIntPair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.pair;
17 |
18 | /**
19 | * @since 3.0.0
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public class StringIntPair extends ObjectIntPair
23 | {
24 | private static final long serialVersionUID = -4177555142012481247L;
25 |
26 | public StringIntPair(String s, int i)
27 | {
28 | super(s, i);
29 | }
30 | }
--------------------------------------------------------------------------------
/src/main/resources/configure/config_train_srl.xml:
--------------------------------------------------------------------------------
1 |
2 | english
3 |
4 |
5 | brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt.xz
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 | true
23 | 4
24 | 3
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/src/main/resources/samples/wsj_0001.parse:
--------------------------------------------------------------------------------
1 | ((S (NP-SBJ (NP (NNP Pierre)
2 | (NNP Vinken))
3 | (, ,)
4 | (ADJP (NML (CD 61)
5 | (NNS years))
6 | (JJ old))
7 | (, ,))
8 | (VP (MD will)
9 | (VP (VB join)
10 | (NP (DT the)
11 | (NN board))
12 | (PP-CLR (IN as)
13 | (NP (DT a)
14 | (JJ nonexecutive)
15 | (NN director)))
16 | (NP-TMP (NNP Nov.)
17 | (CD 29))))
18 | (. .)))
19 |
20 | (TOP (S (NP-SBJ (NNP Mr.)
21 | (NNP Vinken))
22 | (VP (VBZ is)
23 | (NP-PRD (NP (NN chairman))
24 | (PP (IN of)
25 | (NP (NP (NNP Elsevier)
26 | (NNP N.V.))
27 | (, ,)
28 | (NP (DT the)
29 | (NNP Dutch)
30 | (VBG publishing)
31 | (NN group))))))
32 | (. .)))
33 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/dictionary/PathNamedEntity.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.dictionary;
17 |
18 | /**
19 | * @since 3.0.0
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public interface PathNamedEntity
23 | {
24 | String ROOT = "edu/emory/clir/clearnlp/dictionary/ner/";
25 | String US = ROOT + "us/";
26 |
27 | // US
28 | String US_FEMALE_NAMES = US + "female_names.txt";
29 | String US_MALE_NAMES = US + "male_names.txt";
30 | }
31 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/component/mode/morph/DefaultMPAnalyzer.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.component.mode.morph;
17 |
18 | import edu.emory.clir.clearnlp.dependency.DEPNode;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class DefaultMPAnalyzer extends AbstractMPAnalyzer
25 | {
26 | @Override
27 | public void analyze(DEPNode node)
28 | {
29 | node.setLemma(node.getLowerSimplifiedWordForm());
30 | }
31 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/util/lang/TLanguage.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.util.lang;
17 |
18 | import edu.emory.clir.clearnlp.util.StringUtils;
19 |
20 |
21 | /**
22 | * @since 3.0.0
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | public enum TLanguage
26 | {
27 | ARABIC,
28 | CHINESE,
29 | ENGLISH,
30 | HINDI,
31 | KOREAN;
32 |
33 | static public TLanguage getType(String s)
34 | {
35 | return valueOf(StringUtils.toUpperCase(s));
36 | }
37 | }
--------------------------------------------------------------------------------
/src/main/resources/samples/wsj_0001.parse.dep:
--------------------------------------------------------------------------------
1 | 1 Pierre pierre NNP _ 2 nn _ _ _
2 | 2 Vinken vinken NNP _ 9 nsubj _ _ _
3 | 3 , , , _ 2 punct _ _ _
4 | 4 61 0 CD _ 5 num _ _ _
5 | 5 years year NNS _ 6 npadvmod _ _ _
6 | 6 old old JJ _ 2 amod _ _ _
7 | 7 , , , _ 2 punct _ _ _
8 | 8 will will MD _ 9 aux _ _ _
9 | 9 join join VB _ 0 root _ _ _
10 | 10 the the DT _ 11 det _ _ _
11 | 11 board board NN _ 9 dobj _ _ _
12 | 12 as as IN syn=CLR 9 prep _ _ _
13 | 13 a a DT _ 15 det _ _ _
14 | 14 nonexecutive nonexecutive JJ _ 15 amod _ _ _
15 | 15 director director NN _ 12 pobj _ _ _
16 | 16 Nov. nov. NNP sem=TMP 9 npadvmod _ _ _
17 | 17 29 0 CD _ 16 num _ _ _
18 | 18 . . . _ 9 punct _ _ _
19 |
20 | 1 Mr. mr. NNP _ 2 nn _ _ _
21 | 2 Vinken vinken NNP _ 3 nsubj _ _ _
22 | 3 is be VBZ _ 0 root _ _ _
23 | 4 chairman chairman NN syn=PRD 3 attr _ _ _
24 | 5 of of IN _ 4 prep _ _ _
25 | 6 Elsevier elsevier NNP _ 7 nn _ _ _
26 | 7 N.V. n.v. NNP _ 5 pobj _ _ _
27 | 8 , , , _ 7 punct _ _ _
28 | 9 the the DT _ 12 det _ _ _
29 | 10 Dutch dutch NNP _ 12 nn _ _ _
30 | 11 publishing publish VBG _ 12 amod _ _ _
31 | 12 group group NN _ 7 appos _ _ _
32 | 13 . . . _ 3 punct _ _ _
33 |
34 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/component/evaluation/AbstractEval.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.component.evaluation;
17 |
18 | import edu.emory.clir.clearnlp.dependency.DEPTree;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | abstract public class AbstractEval
25 | {
26 | abstract public void countCorrect(DEPTree sTree, LabelType[] gLabels);
27 | abstract public double getScore();
28 | abstract public void clear();
29 | }
30 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/classification/instance/SparseInstance.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.classification.instance;
17 |
18 | import edu.emory.clir.clearnlp.classification.vector.SparseFeatureVector;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class SparseInstance extends AbstractInstance
25 | {
26 | public SparseInstance(String label, SparseFeatureVector vector)
27 | {
28 | super(label, vector);
29 | }
30 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/classification/instance/StringInstance.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.classification.instance;
17 |
18 | import edu.emory.clir.clearnlp.classification.vector.StringFeatureVector;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class StringInstance extends AbstractInstance
25 | {
26 | public StringInstance(String label, StringFeatureVector vector)
27 | {
28 | super(label, vector);
29 | }
30 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/lexicon/verbnet/VNMap.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.lexicon.verbnet;
17 |
18 | import java.io.Serializable;
19 | import java.util.HashMap;
20 |
21 | /**
22 | * @since 3.0.0
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | public class VNMap extends HashMap implements Serializable
26 | {
27 | private static final long serialVersionUID = -7409938151707095231L;
28 |
29 | public void put(VNClass vn)
30 | {
31 | put(vn.getID(), vn);
32 | }
33 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/pos/POSTag.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.pos;
17 |
18 | /**
19 | * @since 3.0.0
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public interface POSTag
23 | {
24 | /** The part-of-speech tag for emoticons. */
25 | String POS_EMOTICON = "EMO";
26 | /** The part-of-speech tag for final tags. */
27 | String POS_FINAL = POSTagEn.POS_PERIOD;
28 | /** The part-of-speech tag for hyperlinks (e.g., URLs, emails). */
29 | String POS_HYPERLINK = POSTagEn.POS_ADD;
30 | }
31 |
--------------------------------------------------------------------------------
/src/test/resources/propbank/wsj_0001.parse:
--------------------------------------------------------------------------------
1 | (TOP (S (NP-SBJ (NP (NNP Pierre)
2 | (NNP Vinken))
3 | (, ,)
4 | (ADJP (NML (CD 61)
5 | (NNS years))
6 | (JJ old))
7 | (, ,))
8 | (VP (MD will)
9 | (VP (VB join)
10 | (NP (DT the)
11 | (NN board))
12 | (PP-CLR (IN as)
13 | (NP (DT a)
14 | (JJ nonexecutive)
15 | (NN director)))
16 | (NP-TMP (NNP Nov.)
17 | (CD 29))))
18 | (. .)))
19 |
20 | (TOP (S (NP-SBJ (NNP Mr.)
21 | (NNP Vinken))
22 | (VP (VBZ is)
23 | (NP-PRD (NP (NN chairman))
24 | (PP (IN of)
25 | (NP (NP (NNP Elsevier)
26 | (NNP N.V.))
27 | (, ,)
28 | (NP (DT the)
29 | (NNP Dutch)
30 | (VBG publishing)
31 | (NN group))))))
32 | (. .)))
33 |
34 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/cluster/StringVector.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.cluster;
17 |
18 | import edu.emory.clir.clearnlp.collection.map.ObjectDoubleHashMap;
19 |
20 | /**
21 | * @since 3.1.2
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class StringVector
25 | {
26 | private ObjectDoubleHashMap term_map;
27 |
28 | public StringVector()
29 | {
30 | term_map = new ObjectDoubleHashMap<>();
31 | }
32 |
33 | public void add(String term)
34 | {
35 | term_map.add(term, 1);
36 | }
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/component/mode/pos/POSEval.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.component.mode.pos;
17 |
18 | import edu.emory.clir.clearnlp.component.evaluation.AbstractAccuracyEval;
19 | import edu.emory.clir.clearnlp.dependency.DEPNode;
20 |
21 | /**
22 | * @since 3.0.0
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | public class POSEval extends AbstractAccuracyEval
26 | {
27 | @Override
28 | protected boolean isCorrect(DEPNode node, String label)
29 | {
30 | return node.isPOSTag(label);
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/lexicon/dbpedia/DBPediaXML.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.lexicon.dbpedia;
17 |
18 | /**
19 | * @since 3.0.3
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public interface DBPediaXML
23 | {
24 | String OWL_CLASS = "owl:Class";
25 | String RDF_ABOUT = "rdf:about";
26 | String RDF_RESOURCE = "rdf:resource";
27 | String RDFS_SUBCLASS_OF = "rdfs:subClassOf";
28 | String DBPEDIA_ORG_ONTOLOGY = "http://dbpedia.org/ontology/";
29 | String DBPEDIA_ORG_RESOURCE = "http://dbpedia.org/resource/";
30 | }
31 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/srl/matcher/SRLArcMatcherSet.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.srl.matcher;
17 |
18 | import java.util.Set;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class SRLArcMatcherSet implements SRLArcMatcher
25 | {
26 | private Set s_labels;
27 |
28 | public SRLArcMatcherSet(Set labels)
29 | {
30 | s_labels = labels;
31 | }
32 |
33 | @Override
34 | public boolean matches(String label)
35 | {
36 | return s_labels.contains(label);
37 | }
38 | }
--------------------------------------------------------------------------------
/src/test/resources/feature/common/feature_common.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/util/ObjectSizeFetcher.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.util;
17 |
18 | import java.lang.instrument.Instrumentation;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class ObjectSizeFetcher
24 | {
25 | private static Instrumentation instrumentation;
26 |
27 | public static void premain(String args, Instrumentation inst)
28 | {
29 | instrumentation = inst;
30 | }
31 |
32 | public static long getObjectSize(Object o)
33 | {
34 | return instrumentation.getObjectSize(o);
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/scripts/conll2clear.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | import sys
3 | fin = open(sys.argv[1])
4 | fout = open(sys.argv[2],'w')
5 | vpos = sys.argv[3]
6 |
7 | tree = []
8 | pred = []
9 |
10 | for line in fin:
11 | l = line.split()
12 |
13 | if not l:
14 | rTree = []
15 | for node in tree:
16 | args = []
17 | for i,arg in enumerate(node[7:]):
18 | if arg == '_': continue
19 | predId = pred[i]
20 | if not tree[predId][3].startswith(vpos): continue
21 | args.append(str(predId+1)+':'+arg)
22 |
23 | del node[7:]
24 | if args: node.append(';'.join(args))
25 | else : node.append('_')
26 | rTree.append('\t'.join(node))
27 |
28 | fout.write('\n'.join(rTree)+'\n\n')
29 | tree = []
30 | pred = []
31 | continue
32 |
33 | node = []
34 |
35 | node.append(l[0]) # id
36 | node.append(l[1]) # form
37 | node.append(l[2]) # glemma
38 | node.append(l[4]) # gpos
39 |
40 | if l[13] != '_' and l[4].startswith(vpos):
41 | node.append('pb='+l[13])
42 | else:
43 | node.append('_')
44 |
45 | # node.append(l[6]) # gfeat
46 | node.append(l[8]) # headId
47 | node.append(l[10]) # deprel
48 | node.extend(l[14:]) # arg*
49 |
50 | if l[12] != '_': pred.append(int(l[0])-1)
51 | tree.append(node)
52 |
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/verbnet/VNTagTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.verbnet;
17 |
18 | import static org.junit.Assert.assertFalse;
19 | import static org.junit.Assert.assertTrue;
20 |
21 | import org.junit.Test;
22 |
23 | import edu.emory.clir.clearnlp.lexicon.verbnet.VNTag;
24 |
25 | /**
26 | * @since 3.0.0
27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
28 | */
29 | public class VNTagTest
30 | {
31 | @Test
32 | public void test()
33 | {
34 | assertTrue(VNTag.contains(VNTag.VN_AGENT));
35 | assertFalse(VNTag.contains("Hello"));
36 | }
37 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/classification/prediction/AbstractPrediction.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.classification.prediction;
17 |
18 |
19 | /**
20 | * @since 3.0.0
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | abstract public class AbstractPrediction
24 | {
25 | protected double d_score;
26 |
27 | public AbstractPrediction(double score)
28 | {
29 | setScore(score);
30 | }
31 |
32 | public double getScore()
33 | {
34 | return d_score;
35 | }
36 |
37 | public void setScore(double score)
38 | {
39 | d_score = score;
40 | }
41 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/srl/matcher/SRLArcMatcherPattern.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.srl.matcher;
17 |
18 | import java.util.regex.Pattern;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class SRLArcMatcherPattern implements SRLArcMatcher
25 | {
26 | private Pattern p_labels;
27 |
28 | public SRLArcMatcherPattern(Pattern pattern)
29 | {
30 | p_labels = pattern;
31 | }
32 |
33 | @Override
34 | public boolean matches(String label)
35 | {
36 | return p_labels.matcher(label).find();
37 | }
38 | }
--------------------------------------------------------------------------------
/src/main/resources/headrules/headrule_en_stanford.txt:
--------------------------------------------------------------------------------
1 | ADJP r JJ.*|VB.*|NN.*;ADJP;IN;RB|ADVP;CD|QP;FW|NP;.*
2 | ADVP r VB.*;RP;RB.*|JJ.*;ADJP;ADVP;QP;IN;NN;CD;NP;.*
3 | CAPTION l NNP.*;NN.*;NP;CD;.*
4 | CIT l NNP.*;NN.*;NP;CD;.*
5 | CONJP l CC;VB.*;NN.*;TO|IN;.*
6 | EDITED r VP;VB.*;NN.*|PRP|NP;IN|PP;S.*;.*
7 | EMBED r S.*;FRAG|NP;.*
8 | FRAG r VP;VB.*;-PRD;S|SQ|SINV|SBARQ;NN.*|NP;PP;SBAR;JJ.*|ADJP;RB|ADVP;INTJ;.*
9 | INTJ l VB.*;NN.*;UH;INTJ;.*
10 | LST l LS|CD;NN;.*
11 | META l NP;VP|S;.*
12 | NAC r NN.*;NP;S|SINV;.*
13 | NML r NN.*|NML;CD|NP|QP|JJ.*|VB.*;.*
14 | NP r NN.*|NML;NX;PRP;FW;CD;NP;-NOM;QP|JJ.*|VB.*;ADJP;S;SBAR;.*
15 | NX r NN.*;NX;NP;.*
16 | PP l RP;TO;IN;VB.*;PP;NN.*;JJ;RB;.*
17 | PRN r VP;NP;S|SBARQ|SINV|SQ;SBAR;.*
18 | PRT l RP;PRT;.*
19 | QP r CD;NN.*;JJ;DT|PDT;RB;NP|QP;.*
20 | RRC l VP;VB.*;-PRD;NP|NN.*;ADJP;PP;.*
21 | S r VP;VB.*;-PRD;S|SQ|SINV|SBARQ;SBAR;NP;PP;.*
22 | SBAR r VP;S|SQ|SINV;SBAR.*;FRAG|NP;.*
23 | SBARQ r VP;SQ|SBARQ;S|SINV;FRAG|NP;.*
24 | SINV r VP;VB.*;MD;S|SINV;NP;.*
25 | SQ r VP;VB.*;SQ;S;MD;NP;.*
26 | UCP r .*
27 | VP l VP;VB.*;MD|TO;JJ.*|NN.*|IN;-PRD;NP;ADJP|QP;S;.*
28 | WHADJP r JJ.*|VBN;WHADJP|ADJP;.*
29 | WHADVP r RB.*|WRB;WHADVP;.*
30 | WHNP r NN.*;WP|WHNP;NP|NML|CD;JJ.*|VBG;WHADJP|ADJP;DT;.*
31 | WHPP l IN|TO;.*
32 | X r .*
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/collection/pair/CharIntPair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.pair;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class CharIntPair implements Serializable
25 | {
26 | private static final long serialVersionUID = -2439322004395455224L;
27 |
28 | public char c;
29 | public int i;
30 |
31 | public CharIntPair(char c, int i)
32 | {
33 | set(c, i);
34 | }
35 |
36 | public void set(char c, int i)
37 | {
38 | this.c = c;
39 | this.i = i;
40 | }
41 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/collection/pair/IntIntPair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.pair;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class IntIntPair implements Serializable
25 | {
26 | private static final long serialVersionUID = 1674260806426517804L;
27 |
28 | public int i1;
29 | public int i2;
30 |
31 | public IntIntPair(int i1, int i2)
32 | {
33 | set(i1, i2);
34 | }
35 |
36 | public void set(int i1, int i2)
37 | {
38 | this.i1 = i1;
39 | this.i2 = i2;
40 | }
41 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/lexicon/propbank/frameset/PBFType.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.lexicon.propbank.frameset;
17 |
18 |
19 | /**
20 | * @since 3.0.0
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public enum PBFType
24 | {
25 | VERB("v"),
26 | NOUN("n"),
27 | ADJECTIVE("j");
28 |
29 | private final String value;
30 |
31 | PBFType(String value)
32 | {
33 | this.value = value;
34 | }
35 |
36 | public boolean isValue(String value)
37 | {
38 | return this.value.equals(value);
39 | }
40 |
41 | public String getValue()
42 | {
43 | return value;
44 | }
45 | }
--------------------------------------------------------------------------------
/src/main/resources/headrules/headrule_en_conll.txt:
--------------------------------------------------------------------------------
1 | ADJP r JJ.*|VB.*|NN.*;ADJP;IN;RB|ADVP;CD|QP;FW|NP;.*
2 | ADVP r VB.*;RP;RB.*|JJ.*;ADJP;ADVP;QP;IN;NN;CD;NP;.*
3 | CAPTION l NNP.*;NN.*;NP;CD;.*
4 | CIT l NNP.*;NN.*;NP;CD;.*
5 | CONJP l CC;VB.*;NN.*;TO|IN;.*
6 | EDITED r VB.*;VP;NN.*|PRP|NP;IN|PP;S.*;.*
7 | EMBED r S.*;FRAG|NP;.*
8 | FRAG r VB.*;VP;-PRD;S|SQ|SINV|SBARQ;NN.*|NP;PP;SBAR;JJ.*|ADJP;RB|ADVP;INTJ;.*
9 | INTJ l VB.*;NN.*;UH;INTJ;.*
10 | LST l LS|CD;NN;.*
11 | META l NP;VP|S;.*
12 | NAC r NN.*;NP;S|SINV;.*
13 | NML r NN.*|NML;CD|NP|QP|JJ.*|VB.*;.*
14 | NP r NN.*|NML;NX;PRP;FW;CD;NP;-NOM;QP|JJ.*|VB.*;ADJP;S;SBAR;.*
15 | NX r NN.*;NX;NP;.*
16 | PP l RP;TO;IN;VB.*;PP;NN.*;JJ;RB;.*
17 | PRN r VP;NP;S|SBARQ|SINV|SQ;SBAR;.*
18 | PRT l RP;PRT;.*
19 | QP r CD;NN.*;JJ;DT|PDT;RB;NP|QP;.*
20 | RRC l VB.*;VP;-PRD;NP|NN.*;ADJP;PP;.*
21 | S r MD|TO;VB.*;VP;-PRD;S|SQ|SINV|SBARQ;SBAR;NP;PP;.*
22 | SBAR r IN|TO|DT;MD;VB.*;VP;S|SQ|SINV;SBAR.*;FRAG|NP;.*
23 | SBARQ r MD;VB.*;VP;SQ|SBARQ;S|SINV;FRAG|NP;.*
24 | SINV r MD;VB.*;VP;S|SINV;NP;.*
25 | SQ r MD;VB.*;VP;SQ;S;NP;.*
26 | UCP r .*
27 | VP l MD|TO;VB.*;VP;JJ.*|NN.*|IN;-PRD;NP;ADJP|QP;S;.*
28 | WHADJP r JJ.*|VBN;WHADJP|ADJP;.*
29 | WHADVP r RB.*|WRB;WHADVP;.*
30 | WHNP r NN.*;WP|WHNP;NP|NML|CD;JJ.*|VBG;WHADJP|ADJP;DT;.*
31 | WHPP l IN|TO;.*
32 | X r .*
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/collection/pair/BooleanIntPair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.pair;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class BooleanIntPair implements Serializable
25 | {
26 | private static final long serialVersionUID = -3606845926289267380L;
27 | public boolean b;
28 | public int i;
29 |
30 | public BooleanIntPair(boolean b, int i)
31 | {
32 | set(b, i);
33 | }
34 |
35 | public void set(boolean b, int i)
36 | {
37 | this.b = b;
38 | this.i = i;
39 | }
40 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/collection/pair/DoubleIntPair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.pair;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class DoubleIntPair implements Serializable
25 | {
26 | private static final long serialVersionUID = -2439322004395455224L;
27 |
28 | public double d;
29 | public int i;
30 |
31 | public DoubleIntPair(double d, int i)
32 | {
33 | set(d, i);
34 | }
35 |
36 | public void set(double d, int i)
37 | {
38 | this.d = d;
39 | this.i = i;
40 | }
41 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/collection/pair/CharCharPair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.pair;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class CharCharPair implements Serializable
25 | {
26 | private static final long serialVersionUID = -2439322004395455224L;
27 |
28 | public char c1;
29 | public char c2;
30 |
31 | public CharCharPair(char c1, char c2)
32 | {
33 | set(c1, c2);
34 | }
35 |
36 | public void set(char c1, char c2)
37 | {
38 | this.c1 = c1;
39 | this.c2 = c2;
40 | }
41 | }
--------------------------------------------------------------------------------
/src/test/resources/propbank/wsj_0002.parse:
--------------------------------------------------------------------------------
1 | (TOP (S (NP-SBJ-1 (NP (NNP Rudolph)
2 | (NNP Agnew))
3 | (, ,)
4 | (UCP (ADJP (NML (CD 55)
5 | (NNS years))
6 | (JJ old))
7 | (CC and)
8 | (NP (NP (JJ former)
9 | (NN chairman))
10 | (PP (IN of)
11 | (NP (NNP Consolidated)
12 | (NNP Gold)
13 | (NNP Fields)
14 | (NNP PLC)))))
15 | (, ,))
16 | (VP (VBD was)
17 | (VP (VBN named)
18 | (NP-2 (-NONE- *-1))
19 | (S-CLR (NP-SBJ (-NONE- *PRO*-2))
20 | (NP-PRD (NP (DT a)
21 | (JJ nonexecutive)
22 | (NN director))
23 | (PP (IN of)
24 | (NP (DT this)
25 | (JJ British)
26 | (JJ industrial)
27 | (NN conglomerate)))))))
28 | (. .)))
29 |
30 |
--------------------------------------------------------------------------------
/src/test/resources/dependency/dependency.cnlp:
--------------------------------------------------------------------------------
1 | 1 He he PRP _ 3 nsubj 3:A0
2 | 2 already already RB _ 3 advmod 3:AM-TMP
3 | 3 bought buy VBD p2=VBN|pb=buy.01 0 root _
4 | 4 a a DT _ 5 det _
5 | 5 car car NN _ 3 dobj 3:A1;8:A1
6 | 6 yesterday yesterday NN _ 3 npadvmod 3:AM-TMP
7 | 7 that that WDT _ 8 nsubj 8:R-A1
8 | 8 is be VBZ pb=be.01 5 rcmod _
9 | 9 red red JJ p2=VBN 8 acomp 8:A2
10 | 10 . . . _ 3 punct _
11 |
12 | 1 He he PRP _ 2 nsubj 2:A0
13 | 2 bought buy VBD p2=VBN|pb=buy.01 0 root _
14 | 3 a a DT _ 4 det _
15 | 4 car car NN _ 2 dobj 2:A1;6:A1
16 | 5 that that WDT _ 6 nsubj 6:R-A1
17 | 6 is be VBZ pb=be.01 4 rcmod _
18 | 7 red red JJ p2=VBN 6 acomp 6:A2
19 | 8 . . . _ 2 punct _
20 |
21 | 1 He he PRP _ 2 nsubj 2:A0
22 | 2 bought buy VBD p2=VBN|pb=buy.01 0 root _
23 | 3 a a DT _ 4 det _
24 | 4 car car NN _ 2 dobj 2:A1;7:A1
25 | 5 tomorrow tomorrow NN _ 2 npadvmod 2:AM-TMP
26 | 6 that that WDT _ 7 nsubj 7:R-A1
27 | 7 is be VBZ pb=be.01 4 rcmod _
28 | 8 red red JJ p2=VBN 7 acomp 7:A2
29 | 9 . . . _ 2 punct _
30 |
31 | 1 He he PRP _ 2 nsubj 2:A0
32 | 2 bought buy VBD p2=VBN|pb=buy.01 0 root _
33 | 3 a a DT _ 4 det _
34 | 4 car car NN _ 2 dobj 2:A1;7:A1
35 | 5 tomorrow tomorrow NN _ 2 npadvmod 2:AM-TMP
36 | 6 that that WDT _ 7 nsubj 7:R-A1
37 | 7 is be VBZ pb=be.01 2 nproj _
38 | 8 red red JJ p2=VBN 7 acomp 7:A2
39 | 9 . . . _ 2 punct _
40 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/classification/instance/SparseInstanceCollector.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.classification.instance;
17 |
18 | import edu.emory.clir.clearnlp.classification.vector.SparseFeatureVector;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class SparseInstanceCollector extends AbstractInstanceCollector
25 | {
26 | @Override
27 | public void init()
28 | {
29 | initDefault();
30 | }
31 |
32 | @Override
33 | protected void addFeatures(SparseFeatureVector vector)
34 | {
35 | n_features = Math.max(n_features, vector.getMaxIndex()+1);
36 | }
37 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/lexicon/propbank/frameset/PBFXml.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.lexicon.propbank.frameset;
17 |
18 | /**
19 | * @since 3.0.0
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public interface PBFXml
23 | {
24 | String E_FRAMESET = "frameset";
25 | String E_PREDICATE = "predicate";
26 | String E_ROLESET = "roleset";
27 | String E_ROLE = "role";
28 | String E_VNROLE = "vnrole";
29 |
30 | String A_LEMMA = "lemma";
31 | String A_ID = "id";
32 | String A_DESCR = "descr";
33 | String A_NAME = "name";
34 | String A_N = "n";
35 | String A_F = "f";
36 | String A_VNCLS = "vncls";
37 | String A_VNTHETA = "vntheta";
38 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/reader/LineReader.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.reader;
17 |
18 | import java.io.IOException;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class LineReader extends AbstractReader
25 | {
26 | public LineReader()
27 | {
28 | super(TReader.LINE);
29 | }
30 |
31 | @Override
32 | public String next()
33 | {
34 | try
35 | {
36 | return b_reader.readLine();
37 | }
38 | catch (IOException e) {e.printStackTrace();}
39 |
40 | return null;
41 | }
42 |
43 | @Override
44 | public AbstractReader clone()
45 | {
46 | return new LineReader();
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/collection/triple/DoubleIntIntTriple.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.triple;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class DoubleIntIntTriple implements Serializable
25 | {
26 | private static final long serialVersionUID = -5353827334306132865L;
27 |
28 | public double d;
29 | public int i1;
30 | public int i2;
31 |
32 | public DoubleIntIntTriple(double d, int i1, int i2)
33 | {
34 | set(d, i1, i2);
35 | }
36 |
37 | public void set(double d, int i1, int i2)
38 | {
39 | this.d = d;
40 | this.i1 = i1;
41 | this.i2 = i2;
42 | }
43 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/component/mode/morph/AbstractMPAnalyzer.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.component.mode.morph;
17 |
18 | import edu.emory.clir.clearnlp.component.AbstractComponent;
19 | import edu.emory.clir.clearnlp.dependency.DEPNode;
20 | import edu.emory.clir.clearnlp.dependency.DEPTree;
21 |
22 | /**
23 | * Default morphological analyzer.
24 | * @since 3.0.0
25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
26 | */
27 | abstract public class AbstractMPAnalyzer extends AbstractComponent
28 | {
29 | @Override
30 | public void process(DEPTree tree)
31 | {
32 | for (DEPNode node : tree)
33 | analyze(node);
34 | }
35 |
36 | abstract public void analyze(DEPNode node);
37 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/collection/triple/BooleanIntIntTriple.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.triple;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class BooleanIntIntTriple implements Serializable
25 | {
26 | private static final long serialVersionUID = -5353827334306132865L;
27 |
28 | public boolean b;
29 | public int i1;
30 | public int i2;
31 |
32 | public BooleanIntIntTriple(boolean b, int i1, int i2)
33 | {
34 | set(b, i1, i2);
35 | }
36 |
37 | public void set(boolean b, int i1, int i2)
38 | {
39 | this.b = b;
40 | this.i1 = i1;
41 | this.i2 = i2;
42 | }
43 | }
--------------------------------------------------------------------------------
/src/test/resources/nlp/configuration/configure.xml:
--------------------------------------------------------------------------------
1 |
2 | com/clearnlp/model/english/general
3 | english
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | 0.4
19 | 2
20 | 1500
21 | true
22 |
23 |
24 |
25 |
26 | true
27 | 16
28 | true
29 | root
30 | 32
31 |
32 |
33 |
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/util/MathUtilsTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.util;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import org.junit.Test;
21 |
22 | import edu.emory.clir.clearnlp.util.MathUtils;
23 |
24 | /**
25 | * @since 3.0.0
26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
27 | */
28 | public class MathUtilsTest
29 | {
30 | @Test
31 | public void testPow()
32 | {
33 | int i, j;
34 |
35 | for (j=-5; j<5; j++)
36 | {
37 | if (j == 0) continue;
38 |
39 | for (i=-5; i<5; i++)
40 | {
41 | assertEquals(Math.pow( 2, i), MathUtils.pow( 2, i), 0);
42 | assertEquals(Math.pow(-2, i), MathUtils.pow(-2, i), 0);
43 | }
44 | }
45 | }
46 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/feature/common/OrthographicType.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.feature.common;
17 |
18 | /**
19 | * @since 3.0.0
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public interface OrthographicType
23 | {
24 | String HYPERLINK = "0";
25 | String ALL_UPPER = "1";
26 | String ALL_LOWER = "2";
27 | String ALL_DIGIT = "3";
28 | String ALL_PUNCT = "4";
29 | String ALL_DIGIT_OR_PUNCT = "5";
30 | String HAS_DIGIT = "6";
31 | String HAS_PERIOD = "7";
32 | String HAS_HYPHEN = "8";
33 | String HAS_OTHER_PUNCT = "9";
34 | String NO_LOWER = "10";
35 | String FST_UPPER = "11";
36 | String UPPER_1 = "12";
37 | String UPPER_2 = "13";
38 | }
39 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/ner/NERLib.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.ner;
17 |
18 | /**
19 | * @since 3.0.3
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public class NERLib
23 | {
24 | private NERLib() {}
25 |
26 | public static BILOU toBILOU(String tag)
27 | {
28 | return BILOU.valueOf(tag.substring(0,1));
29 | }
30 |
31 | public static String toBILOUTag(BILOU bilou, String tag)
32 | {
33 | return bilou+"-"+tag;
34 | }
35 |
36 | public static String toNamedEntity(String tag)
37 | {
38 | return tag.substring(2);
39 | }
40 |
41 | public static String changeChunkType(BILOU newBilou, String tag)
42 | {
43 | return toBILOUTag(newBilou, toNamedEntity(tag));
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/util/CharUtilsTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.util;
17 |
18 | import static org.junit.Assert.assertFalse;
19 | import static org.junit.Assert.assertTrue;
20 |
21 | import org.junit.Test;
22 |
23 | import edu.emory.clir.clearnlp.util.StringUtils;
24 |
25 | /**
26 | * @since 3.0.0
27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
28 | */
29 | public class CharUtilsTest
30 | {
31 | @Test
32 | public void testContainsOnlyDigits()
33 | {
34 | assertTrue (StringUtils.containsDigitOnly("12"));
35 | assertFalse(StringUtils.containsDigitOnly("a1"));
36 | assertFalse(StringUtils.containsDigitOnly("1b"));
37 | assertFalse(StringUtils.containsDigitOnly("1-2"));
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/util/arc/DEPArc.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.util.arc;
17 |
18 | import edu.emory.clir.clearnlp.dependency.DEPNode;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class DEPArc extends AbstractArc
25 | {
26 | private static final long serialVersionUID = -9099516205158258095L;
27 |
28 | public DEPArc(DEPNode node, String label)
29 | {
30 | set(node, label);
31 | }
32 |
33 | @Override
34 | public String toString()
35 | {
36 | return n_node.getID() + DELIM + s_label;
37 | }
38 |
39 | @Override
40 | public int compareTo(AbstractArc arc)
41 | {
42 | return n_node.compareTo(arc.getNode());
43 | }
44 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/collection/tree/PrefixNode.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.tree;
17 |
18 | import java.util.HashMap;
19 |
20 | /**
21 | * @since 3.0.3
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class PrefixNode,V> extends HashMap>
25 | {
26 | private static final long serialVersionUID = 1566684742873455351L;
27 | private V value;
28 |
29 | public PrefixNode()
30 | {
31 | value = null;
32 | }
33 |
34 | public V getValue()
35 | {
36 | return value;
37 | }
38 |
39 | public void setValue(V value)
40 | {
41 | this.value = value;
42 | }
43 |
44 | public boolean hasValue()
45 | {
46 | return value != null;
47 | }
48 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/collection/pair/ObjectCharPair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.pair;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class ObjectCharPair implements Serializable, Comparable>
25 | {
26 | private static final long serialVersionUID = -5228607179375724504L;
27 |
28 | public T o;
29 | public char c;
30 |
31 | public ObjectCharPair(T o, char c)
32 | {
33 | set(o, c);
34 | }
35 |
36 | public void set(T o, char c)
37 | {
38 | this.o = o;
39 | this.c = c;
40 | }
41 |
42 | @Override
43 | public int compareTo(ObjectCharPair p)
44 | {
45 | return c - p.c;
46 | }
47 | }
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/util/FileUtilsTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.util;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import org.junit.Test;
21 |
22 | import edu.emory.clir.clearnlp.util.FileUtils;
23 |
24 | /**
25 | * @since 3.0.0
26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
27 | */
28 | public class FileUtilsTest
29 | {
30 | @Test
31 | public void replaceExtensionTest()
32 | {
33 | assertEquals("a.jpg", FileUtils.replaceExtension("a.txt", "jpg"));
34 | assertEquals(null , FileUtils.replaceExtension("a", "jpg"));
35 |
36 | assertEquals("a.jpg", FileUtils.replaceExtension("a.txt", "txt", "jpg"));
37 | assertEquals(null , FileUtils.replaceExtension("a.txt", "bmp", "jpg"));
38 | }
39 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/collection/triple/ObjectIntIntTriple.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.triple;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class ObjectIntIntTriple implements Serializable
25 | {
26 | private static final long serialVersionUID = -7014586350906455183L;
27 |
28 | public T o;
29 | public int i1;
30 | public int i2;
31 |
32 | public ObjectIntIntTriple()
33 | {
34 | set(null, 0, 0);
35 | }
36 |
37 | public ObjectIntIntTriple(T o, int i1, int i2)
38 | {
39 | set(o, i1, i2);
40 | }
41 |
42 | public void set(T o, int i1, int i2)
43 | {
44 | this.o = o;
45 | this.i1 = i1;
46 | this.i2 = i2;
47 | }
48 | }
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/dictionary/english/DTAbbreviationTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.dictionary.english;
17 |
18 | import static org.junit.Assert.assertFalse;
19 | import static org.junit.Assert.assertTrue;
20 |
21 | import org.junit.Test;
22 |
23 | import edu.emory.clir.clearnlp.dictionary.english.DTAbbreviation;
24 |
25 | /**
26 | * @since 3.0.0
27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
28 | */
29 | public class DTAbbreviationTest
30 | {
31 | @Test
32 | public void test()
33 | {
34 | DTAbbreviation dt = new DTAbbreviation();
35 |
36 | assertTrue(dt.isAbbreviationEndingWithPeriod("mr"));
37 | assertTrue(dt.isAbbreviationEndingWithPeriod("mrs"));
38 |
39 | assertFalse(dt.isAbbreviationEndingWithPeriod("e.g"));
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/cluster/Cluster.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.cluster;
17 |
18 | import java.util.HashSet;
19 | import java.util.Set;
20 |
21 | /**
22 | * @since 3.1.2
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | public class Cluster
26 | {
27 | private Set point_set;
28 |
29 | public Cluster()
30 | {
31 | point_set = new HashSet<>();
32 | }
33 |
34 | public void addPoint(SparseVector point)
35 | {
36 | point_set.add(point);
37 | }
38 |
39 | public Set getPointSet()
40 | {
41 | return point_set;
42 | }
43 |
44 | public int size()
45 | {
46 | return point_set.size();
47 | }
48 |
49 | public void merge(Cluster cluster)
50 | {
51 | point_set.addAll(cluster.getPointSet());
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/util/CharTokenizerTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.util;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import java.util.Arrays;
21 |
22 | import org.junit.Test;
23 |
24 | import edu.emory.clir.clearnlp.util.CharTokenizer;
25 |
26 | /**
27 | * @since 3.0.0
28 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
29 | */
30 | public class CharTokenizerTest
31 | {
32 | @Test
33 | public void test()
34 | {
35 | CharTokenizer t;
36 | String s;
37 |
38 | t = new CharTokenizer(',');
39 | s = "a,b,c";
40 | assertEquals("[a, b, c]", Arrays.toString(t.tokenize(s)));
41 |
42 | t = new CharTokenizer(';');
43 | s = ";abc;def;;ghi;";
44 | assertEquals("[abc, def, ghi]", Arrays.toString(t.tokenize(s)));
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/headrule/HeadRuleMapTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.headrule;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import org.junit.Test;
21 |
22 | import edu.emory.clir.clearnlp.conversion.headrule.HeadRuleMap;
23 | import edu.emory.clir.clearnlp.util.IOUtils;
24 |
25 |
26 | /** @author Jinho D. Choi ({@code jinho.choi@emory.edu}) */
27 | public class HeadRuleMapTest
28 | {
29 | @Test
30 | public void testHeadRuleMap()
31 | {
32 | String filename = "src/main/resources/headrules/headrule_en_stanford.txt";
33 |
34 | HeadRuleMap map = new HeadRuleMap(IOUtils.createFileInputStream(filename));
35 | String str = map.toString();
36 |
37 | assertEquals(str, new HeadRuleMap(IOUtils.createByteArrayInputStream(str)).toString());
38 | }
39 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/util/arc/PBArc.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.util.arc;
17 |
18 | import edu.emory.clir.clearnlp.constituent.CTNode;
19 | import edu.emory.clir.clearnlp.lexicon.propbank.PBArgument;
20 |
21 | /**
22 | * @since 3.0.0
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | public class PBArc extends AbstractArc
26 | {
27 | private static final long serialVersionUID = 8603308004980285093L;
28 |
29 | public PBArc(CTNode node, String label)
30 | {
31 | set(node, label);
32 | }
33 |
34 | @Override
35 | public String toString()
36 | {
37 | return n_node.getTerminalID() + PBArgument.DELIM + s_label;
38 | }
39 |
40 | @Override
41 | public int compareTo(AbstractArc arc)
42 | {
43 | return n_node.compareTo(arc.getNode());
44 | }
45 | }
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/util/arc/SRLArcTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.util.arc;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import org.junit.Test;
21 |
22 | import edu.emory.clir.clearnlp.dependency.DEPNode;
23 | import edu.emory.clir.clearnlp.util.arc.SRLArc;
24 |
25 | /**
26 | * @since 3.0.0
27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
28 | */
29 | public class SRLArcTest
30 | {
31 | @Test
32 | public void test()
33 | {
34 | DEPNode node = new DEPNode(1, "A");
35 | SRLArc arc = new SRLArc(node, "A0");
36 | assertEquals("1:A0", arc.toString());
37 |
38 | arc.setNumberedArgumentTag("PRD");
39 | assertEquals("1:A0", arc.toString());
40 | assertEquals("1:A0", arc.toString(false));
41 | assertEquals("1:A0-PRD", arc.toString(true));
42 | }
43 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/dictionary/AbstractDTTokenizer.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.dictionary;
17 |
18 | import edu.emory.clir.clearnlp.util.CharUtils;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | abstract public class AbstractDTTokenizer
25 | {
26 | public String[] tokenize(String s)
27 | {
28 | char[] lcs = s.toCharArray();
29 | String lower = CharUtils.toLowerCase(lcs) ? new String(lcs) : s;
30 | return tokenize(s, lower, lcs);
31 | }
32 |
33 | /**
34 | * @param original the original string.
35 | * @param lower the lowercase of the original string.
36 | * @param lcs the lowercase character array of the original string.
37 | */
38 | abstract public String[] tokenize(String original, String lower, char[] lcs);
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/lexicon/wordnet/WNRelation.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.lexicon.wordnet;
17 |
18 |
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class WNRelation
25 | {
26 | private WNSynset w_synset;
27 | private short n_source;
28 | private short n_target;
29 |
30 | public WNSynset getWNSynset()
31 | {
32 | return w_synset;
33 | }
34 |
35 | public short getSource()
36 | {
37 | return n_source;
38 | }
39 |
40 | public short getTarget()
41 | {
42 | return n_target;
43 | }
44 |
45 | public void setWNSynset(WNSynset synset)
46 | {
47 | w_synset = synset;
48 | }
49 |
50 | public void setSource(short source)
51 | {
52 | n_source = source;
53 | }
54 |
55 | public void setTarget(short target)
56 | {
57 | n_target = target;
58 | }
59 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/classification/configuration/AbstractTrainerConfiguration.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.classification.configuration;
17 |
18 | /**
19 | * @since 3.0.0
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public class AbstractTrainerConfiguration
23 | {
24 | private byte i_vectorType;
25 | private boolean b_binary;
26 |
27 | public AbstractTrainerConfiguration(byte vectorType, boolean binary)
28 | {
29 | setVectorType(vectorType);
30 | setBinary(binary);
31 | }
32 |
33 | public byte getVectorType()
34 | {
35 | return i_vectorType;
36 | }
37 |
38 | public boolean isBinary()
39 | {
40 | return b_binary;
41 | }
42 |
43 | public void setVectorType(byte vectorType)
44 | {
45 | i_vectorType = vectorType;
46 | }
47 |
48 | public void setBinary(boolean binary)
49 | {
50 | b_binary = binary;
51 | }
52 | }
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/dictionary/universal/DTCompoundTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.dictionary.universal;
17 |
18 | import static org.junit.Assert.assertEquals;
19 | import static org.junit.Assert.assertTrue;
20 |
21 | import java.util.Arrays;
22 |
23 | import org.junit.Test;
24 |
25 | import edu.emory.clir.clearnlp.dictionary.universal.DTCompound;
26 | import edu.emory.clir.clearnlp.util.lang.TLanguage;
27 |
28 | /**
29 | * @since 3.0.0
30 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
31 | */
32 | public class DTCompoundTest
33 | {
34 | @Test
35 | public void test()
36 | {
37 | DTCompound dt = new DTCompound(TLanguage.ENGLISH);
38 |
39 | assertEquals("[I, 'mmm]" , Arrays.toString(dt.tokenize("I'mmm")));
40 | assertEquals("[wha, d, ya]", Arrays.toString(dt.tokenize("whadya")));
41 |
42 | assertTrue(dt.tokenize("I'm") == null);
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/cluster/AbstractCluster.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.cluster;
17 |
18 | import java.util.ArrayList;
19 | import java.util.List;
20 |
21 | /**
22 | * @since 3.1.2
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | public abstract class AbstractCluster
26 | {
27 | protected List s_points;
28 |
29 | public AbstractCluster()
30 | {
31 | s_points = new ArrayList<>();
32 | }
33 |
34 | public void addPoint(SparseVector point)
35 | {
36 | s_points.add(point);
37 | }
38 |
39 | public void setPoints(List points)
40 | {
41 | s_points = points;
42 | }
43 |
44 | public SparseVector getPoint(int index)
45 | {
46 | return s_points.get(index);
47 | }
48 |
49 | public List getPoints()
50 | {
51 | return s_points;
52 | }
53 |
54 | public abstract List cluster();
55 | }
56 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/dictionary/PathEnglishMPAnalyzer.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.dictionary;
17 |
18 | /**
19 | * @since 3.0.0
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public interface PathEnglishMPAnalyzer
23 | {
24 | String ROOT = "edu/emory/clir/clearnlp/dictionary/morphology/english/";
25 |
26 | String INFLECTION_SUFFIX = ROOT + "inflection_suffix.xml";
27 | String ABBREVIATOIN_RULE = ROOT + "abbreviation.rule";
28 | String CARDINAL_BASE = ROOT + "cardinal.base";
29 | String ORDINAL_BASE = ROOT + "ordinal.base";
30 |
31 | String VERB = "verb";
32 | String NOUN = "noun";
33 | String ADJECTIVE = "adjective";
34 | String ADVERB = "adverb";
35 | String EXT_BASE = ".base";
36 | String EXT_EXCEPTION = ".exc";
37 |
38 | String DERIVATION_SUFFIX_N2V = ROOT + "derivation_suffix_n2v.xml";
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/reader/RawReader.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.reader;
17 |
18 | import java.io.IOException;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class RawReader extends AbstractReader
25 | {
26 | public RawReader()
27 | {
28 | super(TReader.RAW);
29 | }
30 |
31 | @Override
32 | public String next()
33 | {
34 | try
35 | {
36 | StringBuilder build = new StringBuilder();
37 | char[] buffer = new char[1024 * 4];
38 | int n = 0;
39 |
40 | while ((n = b_reader.read(buffer)) != -1)
41 | build.append(buffer, 0, n);
42 |
43 | return build.toString();
44 | }
45 | catch (IOException e) {e.printStackTrace();}
46 |
47 | return null;
48 | }
49 |
50 | @Override
51 | public AbstractReader clone()
52 | {
53 | return new RawReader();
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/component/configuration/DecodeConfiguration.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.component.configuration;
17 |
18 | import java.io.InputStream;
19 |
20 | import org.w3c.dom.Element;
21 |
22 | import edu.emory.clir.clearnlp.component.utils.NLPMode;
23 | import edu.emory.clir.clearnlp.util.XmlUtils;
24 |
25 | /**
26 | * @since 3.0.0
27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
28 | */
29 | public class DecodeConfiguration extends AbstractConfiguration
30 | {
31 | private final Element e_model;
32 |
33 | public DecodeConfiguration(InputStream in)
34 | {
35 | super(in);
36 | e_model = getFirstElement(E_MODEL);
37 | }
38 |
39 | public String getModelPath(NLPMode mode)
40 | {
41 | Element eMode = XmlUtils.getFirstElementByTagName(e_model, mode.toString());
42 | return (eMode != null) ? XmlUtils.getTrimmedTextContent(eMode) : null;
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/experiment/AbstractArgsReader.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.experiment;
17 |
18 | import org.kohsuke.args4j.CmdLineException;
19 | import org.kohsuke.args4j.CmdLineParser;
20 |
21 | /**
22 | * @since 3.0.0
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | abstract public class AbstractArgsReader
26 | {
27 | @SuppressWarnings("deprecation")
28 | public AbstractArgsReader(String[] args, Object obj)
29 | {
30 | CmdLineParser cmd = new CmdLineParser(obj);
31 |
32 | try
33 | {
34 | cmd.parseArgument(args);
35 | String msg = getErrorMessage();
36 | if (msg != null) throw new CmdLineException(cmd, msg);
37 | }
38 | catch (CmdLineException e)
39 | {
40 | System.err.println(e.getMessage());
41 | cmd.printUsage(System.err);
42 | System.exit(1);
43 | }
44 | }
45 |
46 | abstract protected String getErrorMessage();
47 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/util/io/FileExtensionFilter.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.util.io;
17 |
18 | import java.io.File;
19 | import java.io.FilenameFilter;
20 |
21 | import edu.emory.clir.clearnlp.util.StringUtils;
22 | import edu.emory.clir.clearnlp.util.constant.StringConst;
23 |
24 | /**
25 | * @since 3.0.0
26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
27 | */
28 | public class FileExtensionFilter implements FilenameFilter
29 | {
30 | private String s_extension;
31 |
32 | /** @param extension the extension of files to keep (e.g., {@code "txt"}). */
33 | public FileExtensionFilter(String extension)
34 | {
35 | s_extension = StringUtils.toLowerCase(extension);
36 | }
37 |
38 | @Override
39 | public boolean accept(File dir, String name)
40 | {
41 | return s_extension.equals(StringConst.ASTERISK) || StringUtils.toLowerCase(name).endsWith(s_extension);
42 | }
43 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/util/BinUtils.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.util;
17 |
18 | import org.apache.log4j.Logger;
19 | import org.kohsuke.args4j.CmdLineException;
20 | import org.kohsuke.args4j.CmdLineParser;
21 |
22 |
23 | /**
24 | * @since 3.0.0
25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
26 | */
27 | public class BinUtils
28 | {
29 | private BinUtils() {}
30 |
31 | public static final Logger LOG = Logger.getLogger(BinUtils.class);
32 |
33 | /** Initializes arguments using args4j. */
34 | static public void initArgs(String[] args, Object bean)
35 | {
36 | CmdLineParser cmd = new CmdLineParser(bean);
37 |
38 | try
39 | {
40 | cmd.parseArgument(args);
41 | }
42 | catch (CmdLineException e)
43 | {
44 | System.err.println(e.getMessage());
45 | cmd.printUsage(System.err);
46 | System.exit(1);
47 | }
48 | catch (Exception e) {e.printStackTrace();}
49 | }
50 | }
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/component/configuration/DEPConfigurationTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.component.configuration;
17 |
18 | import static org.junit.Assert.assertEquals;
19 | import static org.junit.Assert.assertTrue;
20 |
21 | import org.junit.Test;
22 |
23 | import edu.emory.clir.clearnlp.component.mode.dep.DEPConfiguration;
24 | import edu.emory.clir.clearnlp.util.IOUtils;
25 |
26 | /**
27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
28 | */
29 | public class DEPConfigurationTest
30 | {
31 | @Test
32 | // @Ignore
33 | public void test()
34 | {
35 | String filename = "src/test/resources/nlp/configuration/configure.xml";
36 | DEPConfiguration config = new DEPConfiguration(IOUtils.createFileInputStream(filename));
37 |
38 | assertEquals(config.getBeamSize(), 32);
39 | assertEquals(config.getRootLabel(), "root");
40 | assertTrue(config.evaluatePunctuation());
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/collection/pair/ObjectIntPair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.pair;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class ObjectIntPair implements Serializable, Comparable>
25 | {
26 | private static final long serialVersionUID = -5228607179375724504L;
27 |
28 | public T o;
29 | public int i;
30 |
31 | public ObjectIntPair()
32 | {
33 | set(null, 0);
34 | }
35 |
36 | public ObjectIntPair(T o, int i)
37 | {
38 | set(o, i);
39 | }
40 |
41 | public void set(T o, int i)
42 | {
43 | this.o = o;
44 | this.i = i;
45 | }
46 |
47 | @Override
48 | public int compareTo(ObjectIntPair p)
49 | {
50 | return i - p.i;
51 | }
52 |
53 | @Override
54 | public String toString()
55 | {
56 | return "("+o.toString()+","+i+")";
57 | }
58 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/lexicon/verbnet/VNXml.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.lexicon.verbnet;
17 |
18 | /**
19 | * @since 3.0.0
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public interface VNXml
23 | {
24 | String E_VNSUBCLASS = "VNSUBCLASS";
25 | String E_SEMANTICS = "SEMANTICS";
26 | String E_SYNRESTR = "SYNRESTR";
27 | String E_SYNTAX = "SYNTAX";
28 | String E_FRAMES = "FRAMES";
29 | String E_FRAME = "FRAME";
30 | String E_PRED = "PRED";
31 | String E_ARG = "ARG";
32 |
33 | String A_ID = "ID";
34 | String A_TYPE = "type";
35 | String A_VALUE = "value";
36 | String A_VALUE_CAP = "Value";
37 | String A_BOOL = "bool";
38 |
39 | String ARG_TYPE_EVENT = "Event";
40 | String ARG_TYPE_THEM_ROLE = "ThemRole";
41 | String ARG_TYPE_VERB_SPECIFIC = "VerbSpecific";
42 | String ARG_TYPE_CONSTANT = "Constant";
43 |
44 | String SYNRESTR_TYPE_PLURAL = "plural";
45 | }
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/collection/stack/StackTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.stack;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import org.junit.Test;
21 |
22 | import edu.emory.clir.clearnlp.collection.stack.Stack;
23 |
24 |
25 | /**
26 | * @since 3.0.0
27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
28 | */
29 | public class StackTest
30 | {
31 | @Test
32 | public void test()
33 | {
34 | Stack stack = new Stack(3);
35 | stack.push("1");
36 | stack.push("2");
37 | stack.push("3");
38 |
39 | assertEquals("3", stack.peek());
40 | assertEquals("2", stack.peek(1));
41 |
42 | assertEquals("3", stack.pop());
43 | Stack clone = new Stack(stack);
44 |
45 | assertEquals("2", stack.pop());
46 | assertEquals("1", stack.pop());
47 |
48 | assertEquals("2", clone.pop());
49 | assertEquals("1", clone.pop());
50 | }
51 | }
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/dictionary/universal/DTUnitTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.dictionary.universal;
17 |
18 | import static org.junit.Assert.assertEquals;
19 | import static org.junit.Assert.assertTrue;
20 |
21 | import java.util.Arrays;
22 |
23 | import org.junit.Test;
24 |
25 | import edu.emory.clir.clearnlp.dictionary.universal.DTUnit;
26 |
27 | /**
28 | * @since 3.0.0
29 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
30 | */
31 | public class DTUnitTest
32 | {
33 | @Test
34 | public void test()
35 | {
36 | DTUnit dt = new DTUnit();
37 |
38 | assertEquals("[1, mg]", Arrays.toString(dt.tokenize("1mg")));
39 | assertEquals("[1, cm]", Arrays.toString(dt.tokenize("1cm")));
40 |
41 | assertEquals("[10, MG]", Arrays.toString(dt.tokenize("10MG")));
42 | assertEquals("[10, CM]", Arrays.toString(dt.tokenize("10CM")));
43 |
44 | assertTrue(dt.tokenize("1ma") == null);
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/bin/PrintTree.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.bin;
17 |
18 | import edu.emory.clir.clearnlp.constituent.CTReader;
19 | import edu.emory.clir.clearnlp.constituent.CTTree;
20 | import edu.emory.clir.clearnlp.util.IOUtils;
21 | import edu.emory.clir.clearnlp.util.constant.StringConst;
22 |
23 | /**
24 | * @since 3.0.0
25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
26 | */
27 | public class PrintTree
28 | {
29 | static public void main(String[] args)
30 | {
31 | String treeDir = args[0];
32 | String treeFile = args[1];
33 | int treeId = Integer.parseInt(args[2]);
34 |
35 | CTReader reader = new CTReader(IOUtils.createFileInputStream(treeDir+StringConst.FW_SLASH+treeFile));
36 | CTTree tree = reader.nextTree(treeId);
37 | reader.close();
38 |
39 | // System.out.println(tree.toString());
40 | System.out.println(tree.toString(true,true,StringConst.NEW_LINE));
41 | }
42 | }
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/collection/ngram/BigramTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.ngram;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import org.junit.Test;
21 |
22 | /**
23 | * @since 3.0.0
24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
25 | */
26 | public class BigramTest
27 | {
28 | @Test
29 | public void test()
30 | {
31 | Bigram map = new Bigram<>();
32 |
33 | map.add("A", "a1");
34 | map.add("A", "a2");
35 | map.add("A", "a1");
36 | map.add("A", "a3");
37 |
38 | map.add("B", "b1");
39 | map.add("B", "b2", 2);
40 | map.add("B", "b3");
41 |
42 | assertEquals("[A, B]", map.getBigramSet().toString());
43 |
44 | assertEquals("[(a3,1), (a1,2), (a2,1)]", map.toList("A", 0).toString());
45 | assertEquals("[(b1,1), (b2,2), (b3,1)]", map.toList("B", 0).toString());
46 | assertEquals("[(a1,2)]", map.toList("A", 1).toString());
47 | }
48 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/cluster/Term.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.cluster;
17 |
18 | /**
19 | * @since 3.1.2
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public class Term implements Comparable
23 | {
24 | private int id;
25 | private float score;
26 |
27 | public Term(int i1, int i2)
28 | {
29 | set(i1, i2);
30 | }
31 |
32 | public void set(int id, float score)
33 | {
34 | setID(id);
35 | setScore(score);
36 | }
37 |
38 | public int getID()
39 | {
40 | return id;
41 | }
42 |
43 | public void setID(int id)
44 | {
45 | this.id = id;
46 | }
47 |
48 | public float getScore()
49 | {
50 | return score;
51 | }
52 |
53 | public void setScore(float score)
54 | {
55 | this.score = score;
56 | }
57 |
58 | public void addScore(float score)
59 | {
60 | this.score += score;
61 | }
62 |
63 | @Override
64 | public int compareTo(Term o)
65 | {
66 | return id - o.id;
67 | }
68 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/collection/pair/Pair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.pair;
17 |
18 | import java.io.Serializable;
19 |
20 |
21 | /**
22 | * @since 3.0.0
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | public class Pair implements Serializable
26 | {
27 | private static final long serialVersionUID = 8447270640444415417L;
28 |
29 | public T1 o1;
30 | public T2 o2;
31 |
32 | public Pair()
33 | {
34 | set(null, null);
35 | }
36 |
37 | public Pair(T1 o1, T2 o2)
38 | {
39 | set(o1, o2);
40 | }
41 |
42 | public void set(T1 o1, T2 o2)
43 | {
44 | this.o1 = o1;
45 | this.o2 = o2;
46 | }
47 |
48 | public String toString()
49 | {
50 | StringBuilder build = new StringBuilder();
51 |
52 | build.append("(");
53 | build.append(o1.toString());
54 | build.append(",");
55 | build.append(o2.toString());
56 | build.append(")");
57 |
58 | return build.toString();
59 | }
60 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/dictionary/PathTokenizer.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.dictionary;
17 |
18 | /**
19 | * @since 3.0.0
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public interface PathTokenizer
23 | {
24 | String ROOT = "edu/emory/clir/clearnlp/dictionary/tokenizer/";
25 | String UNIVERSAL = ROOT + "universal/";
26 | String ENGLISH = ROOT + "english/";
27 |
28 | // Universal
29 | String CURRENCY_DOLLAR = UNIVERSAL + "currency-dollar.txt";
30 | String CURRENCY = UNIVERSAL + "currency.txt";
31 | String EMOTICONS = UNIVERSAL + "emoticons.txt";
32 | String HTML_TAGS = UNIVERSAL + "html-tags.txt";
33 | String UNITS = UNIVERSAL + "units.txt";
34 |
35 | // English
36 | String EN_ABBREVIATION_PERIOD = ENGLISH + "abbreviation-period.txt";
37 | String EN_HYPHEN_PREFIX = ENGLISH + "hyphen-prefix.txt";
38 | String EN_HYPHEN_SUFFIX = ENGLISH + "hyphen-suffix.txt";
39 | String EN_COMPOUNDS = ENGLISH + "compounds.txt";
40 | }
41 |
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/dictionary/universal/DTEmoticonTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.dictionary.universal;
17 |
18 | import static org.junit.Assert.assertEquals;
19 | import static org.junit.Assert.assertTrue;
20 |
21 | import java.util.Arrays;
22 |
23 | import org.junit.Test;
24 |
25 | import edu.emory.clir.clearnlp.dictionary.universal.DTEmoticon;
26 |
27 | /**
28 | * @since 3.0.0
29 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
30 | */
31 | public class DTEmoticonTest
32 | {
33 | @Test
34 | public void test()
35 | {
36 | DTEmoticon dt = new DTEmoticon();
37 | String s;
38 |
39 | s = ":";
40 | assertTrue(dt.getEmoticonRange(s) == null);
41 |
42 | s = ":-)";
43 | assertEquals("[0, 3]", Arrays.toString(dt.getEmoticonRange(s)));
44 |
45 | s = "Hi:-)";
46 | assertEquals("[2, 5]", Arrays.toString(dt.getEmoticonRange(s)));
47 |
48 | s = ":-)..";
49 | assertEquals("[0, 3]", Arrays.toString(dt.getEmoticonRange(s)));
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/collection/triple/Triple.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.triple;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class Triple implements Serializable
25 | {
26 | private static final long serialVersionUID = 2261656496863083672L;
27 | public T1 o1;
28 | public T2 o2;
29 | public T3 o3;
30 |
31 | public Triple(T1 o1, T2 o2, T3 o3)
32 | {
33 | set(o1, o2, o3);
34 | }
35 |
36 | public void set(T1 o1, T2 o2, T3 o3)
37 | {
38 | this.o1 = o1;
39 | this.o2 = o2;
40 | this.o3 = o3;
41 | }
42 |
43 | public String toString()
44 | {
45 | StringBuilder build = new StringBuilder();
46 |
47 | build.append("(");
48 | build.append(o1.toString());
49 | build.append(",");
50 | build.append(o2.toString());
51 | build.append(",");
52 | build.append(o3.toString());
53 | build.append(")");
54 |
55 | return build.toString();
56 | }
57 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/lexicon/wikipedia/WikiPrint.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.lexicon.wikipedia;
17 |
18 | import java.io.ObjectInputStream;
19 | import java.util.zip.ZipFile;
20 |
21 | import edu.emory.clir.clearnlp.util.IOUtils;
22 | import edu.emory.clir.clearnlp.util.Joiner;
23 |
24 | /**
25 | * @since 3.0.3
26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
27 | */
28 | public class WikiPrint
29 | {
30 | static public void main(String[] args)
31 | {
32 | WikiIndexMap map = new WikiIndexMap();
33 | final String wikiFile = args[0];
34 | final String indexFile = args[1];
35 | final String title = Joiner.join(args, " ", 2, args.length);
36 |
37 | try
38 | {
39 | ObjectInputStream in = new ObjectInputStream(IOUtils.createXZBufferedInputStream(indexFile));
40 | ZipFile zip = new ZipFile(wikiFile);
41 | map = (WikiIndexMap)in.readObject();
42 | System.out.println(map.getPage(zip, title));
43 | }
44 | catch (Exception e) {e.printStackTrace();}
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/lexicon/wikipedia/WikiParagraph.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.lexicon.wikipedia;
17 |
18 | import java.io.Serializable;
19 | import java.util.ArrayList;
20 | import java.util.List;
21 |
22 | import edu.emory.clir.clearnlp.util.Joiner;
23 | import edu.emory.clir.clearnlp.util.constant.StringConst;
24 |
25 | /**
26 | * @since 3.0.0.
27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
28 | */
29 | public class WikiParagraph implements Serializable
30 | {
31 | private static final long serialVersionUID = 7011678413565546215L;
32 | private List l_sentences;
33 |
34 | public WikiParagraph()
35 | {
36 | l_sentences = new ArrayList<>();
37 | }
38 |
39 | public List getSentences()
40 | {
41 | return l_sentences;
42 | }
43 |
44 | public void addSentence(String sentence)
45 | {
46 | l_sentences.add(sentence);
47 | }
48 |
49 | @Override
50 | public String toString()
51 | {
52 | return Joiner.join(l_sentences, StringConst.NEW_LINE);
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/dependency/DEPFeatTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.dependency;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import org.junit.Test;
21 |
22 | import edu.emory.clir.clearnlp.dependency.DEPFeat;
23 | import edu.emory.clir.clearnlp.reader.TSVReader;
24 |
25 | /**
26 | * @since 3.0.0
27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
28 | */
29 | public class DEPFeatTest
30 | {
31 | @Test
32 | public void testDEPFeat()
33 | {
34 | DEPFeat feat = new DEPFeat();
35 | assertEquals(TSVReader.BLANK, feat.toString());
36 |
37 | feat = new DEPFeat(TSVReader.BLANK);
38 | assertEquals(TSVReader.BLANK, feat.toString());
39 |
40 | feat.add("lst=choi|fst=jinho");
41 | assertEquals("fst=jinho|lst=choi", feat.toString());
42 |
43 | assertEquals("choi" , feat.get("lst"));
44 | assertEquals("jinho", feat.get("fst"));
45 | assertEquals(null , feat.get("mid"));
46 |
47 | feat.add(TSVReader.BLANK);
48 | assertEquals("fst=jinho|lst=choi", feat.toString());
49 | }
50 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/feature/type/RelationType.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.feature.type;
17 |
18 | import java.io.Serializable;
19 |
20 | /**
21 | * The Enum DEPRelationType.
22 | *
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | * @since 3.0.0
25 | */
26 | public enum RelationType implements Serializable
27 | {
28 | /** The head. */
29 | h,
30 | /** The left-most dependent. */
31 | lmd,
32 | /** The right-most dependent. */
33 | rmd,
34 | /** The left-nearest dependent. */
35 | lnd,
36 | /** The right-nearest dependent. */
37 | rnd,
38 | /** The left-nearest sibling. */
39 | lns,
40 | /** The right-nearest sibling. */
41 | rns,
42 |
43 | /** The grand head. */
44 | h2,
45 | /** The 2nd left-most dependent. */
46 | lmd2,
47 | /** The 2nd right-most dependent. */
48 | rmd2,
49 | /** The 2nd left-nearest dependent. */
50 | lnd2,
51 | /** The 2nd right-nearest dependent. */
52 | rnd2,
53 | /** The 2nd left-nearest sibling. */
54 | lns2,
55 | /** The 2nd right-nearest sibling. */
56 | rns2;
57 | }
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/propbank/PBLocationTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.propbank;
17 |
18 | import static org.junit.Assert.assertEquals;
19 | import static org.junit.Assert.assertFalse;
20 | import static org.junit.Assert.assertTrue;
21 |
22 | import org.junit.Test;
23 |
24 | import edu.emory.clir.clearnlp.lexicon.propbank.PBLocation;
25 |
26 | /**
27 | * @since 3.0.0
28 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
29 | */
30 | public class PBLocationTest
31 | {
32 | @Test
33 | public void test()
34 | {
35 | PBLocation loc1 = new PBLocation(0, 1);
36 | PBLocation loc2 = new PBLocation(0, 1, "*");
37 |
38 | assertEquals( "0:1", loc1.toString());
39 | assertEquals("*0:1", loc2.toString());
40 | assertTrue(loc1.matches(loc2.getTerminalID(), loc2.getHeight()));
41 | assertFalse(loc1.equals(loc2));
42 |
43 | loc1.set(0, 2);
44 | assertFalse(loc1.matches(loc2.getTerminalID(), loc2.getHeight()));
45 |
46 | loc2 = new PBLocation("0:3", ",");
47 | assertEquals(",0:3", loc2.toString());
48 | }
49 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/collection/set/IntHashSet.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.set;
17 |
18 | import java.io.IOException;
19 | import java.io.ObjectInputStream;
20 | import java.io.ObjectOutputStream;
21 | import java.io.Serializable;
22 |
23 | /**
24 | * @since 3.0.0
25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
26 | */
27 | public class IntHashSet extends com.carrotsearch.hppc.IntHashSet implements Serializable
28 | {
29 | private static final long serialVersionUID = 8220093021280571821L;
30 |
31 | public IntHashSet()
32 | {
33 | super();
34 | }
35 |
36 | public IntHashSet(int initialCapacity)
37 | {
38 | super(initialCapacity);
39 | }
40 |
41 | private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException
42 | {
43 | addAll((int[])in.readObject());
44 | }
45 |
46 | private void writeObject(ObjectOutputStream o) throws IOException
47 | {
48 | o.writeObject(toArray());
49 | }
50 |
51 | public void addAll(IntHashSet set)
52 | {
53 | super.addAll(set);
54 | }
55 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/dictionary/english/DTAbbreviation.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.dictionary.english;
17 |
18 | import java.io.InputStream;
19 | import java.util.Set;
20 |
21 | import edu.emory.clir.clearnlp.dictionary.PathTokenizer;
22 | import edu.emory.clir.clearnlp.util.DSUtils;
23 | import edu.emory.clir.clearnlp.util.IOUtils;
24 |
25 | /**
26 | * @since 3.0.0
27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
28 | */
29 | public class DTAbbreviation
30 | {
31 | private Set s_period;
32 |
33 | public DTAbbreviation()
34 | {
35 | init(IOUtils.getInputStreamsFromClasspath(PathTokenizer.EN_ABBREVIATION_PERIOD));
36 | }
37 |
38 | public DTAbbreviation(InputStream abbreviationPeriod)
39 | {
40 | init(abbreviationPeriod);
41 | }
42 |
43 | public void init(InputStream abbreviationPeriod)
44 | {
45 | s_period = DSUtils.createStringHashSet(abbreviationPeriod, true, true);
46 | }
47 |
48 | public boolean isAbbreviationEndingWithPeriod(String lower)
49 | {
50 | return s_period.contains(lower);
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/tokenization/english/ApostropheTokenizerTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.tokenization.english;
17 |
18 | import static org.junit.Assert.assertEquals;
19 | import static org.junit.Assert.assertTrue;
20 |
21 | import java.util.Arrays;
22 |
23 | import org.junit.Test;
24 |
25 | import edu.emory.clir.clearnlp.tokenization.english.ApostropheEnglishTokenizer;
26 |
27 | /**
28 | * @since 3.0.0
29 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
30 | */
31 | public class ApostropheTokenizerTest
32 | {
33 | @Test
34 | public void test()
35 | {
36 | ApostropheEnglishTokenizer dt = new ApostropheEnglishTokenizer();
37 |
38 | assertEquals("[he, 's]" , Arrays.toString(dt.tokenize("he's")));
39 | assertEquals("[he, 'S]" , Arrays.toString(dt.tokenize("he'S")));
40 | assertEquals("[do, n't]", Arrays.toString(dt.tokenize("don't")));
41 | assertEquals("[do, 'nt]", Arrays.toString(dt.tokenize("do'nt")));
42 |
43 | assertTrue(dt.tokenize("he'dd") == null);
44 | assertTrue(dt.tokenize("dont") == null);
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/vector/VectorSpaceModelTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.vector;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import java.util.List;
21 | import java.util.Set;
22 |
23 | import org.junit.Test;
24 |
25 | import edu.emory.clir.clearnlp.collection.map.ObjectIntHashMap;
26 | import edu.emory.clir.clearnlp.util.DSUtils;
27 |
28 | /**
29 | * @since 3.0.3
30 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
31 | */
32 | public class VectorSpaceModelTest
33 | {
34 | @Test
35 | public void test()
36 | {
37 | List terms = DSUtils.toArrayList("A","B","C","D","E");
38 | Set stopWords = DSUtils.toHashSet("B","D");
39 |
40 | ObjectIntHashMap map = VectorSpaceModel.getBagOfWords(terms, stopWords, 3);
41 |
42 | assertEquals(6, map.size());
43 | assertEquals(1, map.get("A"));
44 | assertEquals(1, map.get("C"));
45 | assertEquals(1, map.get("E"));
46 | assertEquals(1, map.get("A_C"));
47 | assertEquals(1, map.get("C_E"));
48 | assertEquals(1, map.get("A_C_E"));
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/src/main/resources/features/feature_en_pos.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/src/main/resources/samples/clearnlp.txt.cnlp:
--------------------------------------------------------------------------------
1 | 1 The the DT _ 3 det
2 | 2 ClearNLP clearnlp NNP p2=JJ 3 nn
3 | 3 project project NN _ 4 nsubj
4 | 4 provides provide VBZ _ 0 root
5 | 5 software software NN _ 4 dobj
6 | 6 and and CC _ 5 cc
7 | 7 resources resource NNS _ 5 conj
8 | 8 for for IN _ 5 prep
9 | 9 natural natural JJ _ 10 amod
10 | 10 language language NN _ 11 nn
11 | 11 processing processing NN _ 8 pobj
12 | 12 . . . _ 4 punct
13 |
14 | 1 It it PRP _ 3 nsubjpass
15 | 2 is be VBZ _ 3 auxpass
16 | 3 developed develop VBN _ 0 root
17 | 4 by by IN _ 3 agent
18 | 5 the the DT _ 6 det
19 | 6 Center center NNP p2=NNPS 4 pobj
20 | 7 for for IN _ 6 prep
21 | 8 Language language NNP _ 7 pobj
22 | 9 and and CC _ 8 cc
23 | 10 Information information NNP _ 11 nn
24 | 11 Research research NNP _ 8 conj
25 | 12 ( ( -LRB- _ 13 punct
26 | 13 CLIR clir NNP _ 11 appos
27 | 14 ) ) -RRB- _ 13 punct
28 | 15 at at IN _ 6 prep
29 | 16 Emory emory NNP _ 17 nn
30 | 17 University university NNP _ 15 pobj
31 | 18 . . . _ 3 punct
32 |
33 | 1 Please please UH _ 2 intj
34 | 2 join join VB _ 0 root
35 | 3 our our PRP$ _ 5 poss
36 | 4 discussion discussion NN _ 5 nn
37 | 5 group group NN _ 2 dobj
38 | 6 if if IN _ 8 mark
39 | 7 you you PRP _ 8 nsubj
40 | 8 want want VBP _ 2 advcl
41 | 9 to to TO _ 10 aux
42 | 10 get get VB _ 8 xcomp
43 | 11 notifications notification NNS _ 10 dobj
44 | 12 about about IN _ 11 prep
45 | 13 new new JJ _ 14 amod
46 | 14 updates update NNS _ 12 pobj
47 | 15 or or CC _ 14 cc
48 | 16 post post NN _ 17 nn
49 | 17 issues issue NNS _ 14 conj
50 | 18 , , , _ 17 punct
51 | 19 suggestions suggestion NNS _ 17 conj
52 | 20 , , , _ 19 punct
53 | 21 questions question NNS _ 19 conj
54 | 22 , , , _ 21 punct
55 | 23 etc etc FW _ 21 conj
56 | 24 . . . _ 2 punct
57 |
58 |
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/component/configuration/POSConfigurationTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.component.configuration;
17 |
18 | import static org.junit.Assert.assertEquals;
19 | import jdk.nashorn.internal.ir.annotations.Ignore;
20 |
21 | import org.junit.Test;
22 |
23 | import edu.emory.clir.clearnlp.component.mode.pos.POSConfiguration;
24 | import edu.emory.clir.clearnlp.util.IOUtils;
25 | import edu.emory.clir.clearnlp.util.lang.TLanguage;
26 |
27 | /**
28 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
29 | */
30 | public class POSConfigurationTest
31 | {
32 | @Test
33 | @Ignore
34 | public void test()
35 | {
36 | String filename = "src/test/resources/nlp/configuration/configure.xml";
37 | POSConfiguration config = new POSConfiguration(IOUtils.createFileInputStream(filename));
38 |
39 | assertEquals(TLanguage.ENGLISH, config.getLanguage());
40 | assertEquals(0.4 , config.getAmbiguityClassThreshold(), 1e-15);
41 | assertEquals(2 , config.getDocumentFrequencyCutoff());
42 | assertEquals(1500, config.getDocumentSize());
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/collection/pair/ObjectDoublePair.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.pair;
17 |
18 | import java.io.Serializable;
19 |
20 | import edu.emory.clir.clearnlp.util.MathUtils;
21 |
22 | /**
23 | * @since 3.0.0
24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
25 | */
26 | public class ObjectDoublePair implements Serializable, Comparable>
27 | {
28 | private static final long serialVersionUID = -5228607179375724504L;
29 |
30 | public T o;
31 | public double d;
32 |
33 | public ObjectDoublePair(T o, double d)
34 | {
35 | set(o, d);
36 | }
37 |
38 | public void set(T o, double d)
39 | {
40 | this.o = o;
41 | this.d = d;
42 | }
43 |
44 | public T getObject()
45 | {
46 | return o;
47 | }
48 |
49 | public double getDouble()
50 | {
51 | return d;
52 | }
53 |
54 | @Override
55 | public int compareTo(ObjectDoublePair p)
56 | {
57 | return MathUtils.signum(d - p.d);
58 | }
59 |
60 | @Override
61 | public String toString()
62 | {
63 | return "("+o.toString()+","+d+")";
64 | }
65 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/collection/set/CharHashSet.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.set;
17 |
18 | import java.io.IOException;
19 | import java.io.ObjectInputStream;
20 | import java.io.ObjectOutputStream;
21 | import java.io.Serializable;
22 |
23 | /**
24 | * @since 3.0.0
25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
26 | */
27 | public class CharHashSet extends com.carrotsearch.hppc.CharHashSet implements Serializable
28 | {
29 | private static final long serialVersionUID = -3796053685010557911L;
30 |
31 | public CharHashSet()
32 | {
33 | super();
34 | }
35 |
36 | public CharHashSet(int initialCapacity)
37 | {
38 | super(initialCapacity);
39 | }
40 |
41 | public CharHashSet(char... characters)
42 | {
43 | for (char c : characters)
44 | add(c);
45 | }
46 |
47 | private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException
48 | {
49 | addAll((char[])in.readObject());
50 | }
51 |
52 | private void writeObject(ObjectOutputStream o) throws IOException
53 | {
54 | o.writeObject(toArray());
55 | }
56 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/component/mode/srl/SRLEval.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2015, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.component.mode.srl;
17 |
18 | import java.util.List;
19 |
20 | import edu.emory.clir.clearnlp.component.evaluation.AbstractF1Eval;
21 | import edu.emory.clir.clearnlp.dependency.DEPTree;
22 | import edu.emory.clir.clearnlp.util.arc.SRLArc;
23 |
24 | /**
25 | * @since 3.2.0
26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
27 | */
28 | public class SRLEval extends AbstractF1Eval
29 | {
30 | @Override
31 | public void countCorrect(DEPTree sTree, SRLArc[][] goldHeads)
32 | {
33 | int i, size = sTree.size();
34 | List sHeads;
35 | SRLArc[] gHeads;
36 |
37 | for (i=1; i extends ArrayList implements Serializable
28 | {
29 | private static final long serialVersionUID = -8603527717926741739L;
30 |
31 | public Stack()
32 | {
33 | super();
34 | }
35 |
36 | public Stack(int initialCapacity)
37 | {
38 | super(initialCapacity);
39 | }
40 |
41 | public Stack(Stack stack)
42 | {
43 | super(stack);
44 | }
45 |
46 | public void push(T element)
47 | {
48 | add(element);
49 | }
50 |
51 | public T pop()
52 | {
53 | int n = size() - 1;
54 | return DSUtils.isRange(this, n) ? remove(n) : null;
55 | }
56 |
57 | public T peek()
58 | {
59 | return peek(0);
60 | }
61 |
62 | public T peek(int n)
63 | {
64 | n = size() - 1 - n;
65 | return DSUtils.isRange(this, n) ? get(n) : null;
66 | }
67 | }
--------------------------------------------------------------------------------
/src/main/resources/features/feature_en_ner_conll03.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/collection/set/DisjointSet.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.set;
17 |
18 | import java.util.Arrays;
19 |
20 | /**
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public class DisjointSet
24 | {
25 | private int[] s_root;
26 |
27 | public DisjointSet(int size)
28 | {
29 | s_root = new int[size];
30 | Arrays.fill(s_root, -1);
31 | }
32 |
33 | public int union(int id1, int id2)
34 | {
35 | int r1 = find(id1);
36 | int r2 = find(id2);
37 | if (r1 == r2) return r1;
38 |
39 | if (s_root[r1] < s_root[r2])
40 | {
41 | s_root[r1] += s_root[r2];
42 | s_root[r2] = r1;
43 | return r1;
44 | }
45 | else
46 | {
47 | s_root[r2] += s_root[r1];
48 | s_root[r1] = r2;
49 | return r2;
50 | }
51 | }
52 |
53 | public int find(int id)
54 | {
55 | return (s_root[id] < 0) ? id : (s_root[id] = find(s_root[id]));
56 | }
57 |
58 | public boolean inSameSet(int id1, int id2)
59 | {
60 | return find(id1) == find(id2);
61 | }
62 |
63 | public String toString()
64 | {
65 | return Arrays.toString(s_root);
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/component/mode/dep/DEPFeatureExtractor.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.component.mode.dep;
17 |
18 | import java.io.InputStream;
19 |
20 | import edu.emory.clir.clearnlp.component.mode.dep.state.AbstractDEPState;
21 | import edu.emory.clir.clearnlp.dependency.DEPNode;
22 | import edu.emory.clir.clearnlp.feature.common.CommonFeatureExtractor;
23 | import edu.emory.clir.clearnlp.feature.common.CommonFeatureToken;
24 |
25 | /**
26 | * @since 3.0.0
27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
28 | */
29 | public class DEPFeatureExtractor extends CommonFeatureExtractor
30 | {
31 | private static final long serialVersionUID = -7336596053366459297L;
32 |
33 | public DEPFeatureExtractor(InputStream in)
34 | {
35 | super(in);
36 | }
37 |
38 | @Override
39 | protected String getFeature(CommonFeatureToken token, AbstractDEPState state, DEPNode node)
40 | {
41 | switch (token.getField())
42 | {
43 | case t: return Integer.toString(state.distanceBetweenStackAndInput());
44 | default: return super.getFeature(token, state, node);
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/component/mode/pos/POSFeatureExtractor.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.component.mode.pos;
17 |
18 | import java.io.InputStream;
19 |
20 | import edu.emory.clir.clearnlp.dependency.DEPNode;
21 | import edu.emory.clir.clearnlp.feature.common.CommonFeatureExtractor;
22 | import edu.emory.clir.clearnlp.feature.common.CommonFeatureToken;
23 |
24 | /**
25 | * @since 3.0.0
26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
27 | */
28 | public class POSFeatureExtractor extends CommonFeatureExtractor
29 | {
30 | private static final long serialVersionUID = -7336596053366459297L;
31 |
32 | public POSFeatureExtractor(InputStream in)
33 | {
34 | super(in);
35 | }
36 |
37 | @Override
38 | protected String getFeature(CommonFeatureToken token, POSState state, DEPNode node)
39 | {
40 | String ftr = node.getFormFeature(token.getField());
41 | if (ftr != null) return state.extractWordFormFeature(node) ? ftr : null;
42 |
43 | switch (token.getField())
44 | {
45 | case a : return state.getAmbiguityClass(node);
46 | default: return super.getFeature(token, state, node);
47 | }
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/src/main/resources/features/feature_en_ner.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/dictionary/universal/DTHtmlTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.dictionary.universal;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import org.junit.Test;
21 |
22 | import edu.emory.clir.clearnlp.dictionary.universal.DTHtml;
23 |
24 | /**
25 | * @since 3.0.0
26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
27 | */
28 | public class DTHtmlTest
29 | {
30 | @Test
31 | public void test()
32 | {
33 | DTHtml html = new DTHtml();
34 | StringBuilder build;
35 | String s;
36 |
37 | s = ""&<>";
38 | assertEquals("\"&<>", html.replace(s));
39 |
40 | s = "¢£¤¥§©®€";
41 | build = new StringBuilder();
42 |
43 | build.append((char)162);
44 | build.append((char)163);
45 | build.append((char)164);
46 | build.append((char)165);
47 | build.append((char)167);
48 | build.append((char)169);
49 | build.append((char)174);
50 | build.append((char)8364);
51 |
52 | assertEquals(build.toString(), html.replace(s));
53 |
54 | s = "!<&rand;>{";
55 | assertEquals("!<&rand;>{", html.replace(s));
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/dictionary/universal/DTCurrencyTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.dictionary.universal;
17 |
18 | import static org.junit.Assert.assertEquals;
19 | import static org.junit.Assert.assertFalse;
20 | import static org.junit.Assert.assertTrue;
21 |
22 | import java.util.Arrays;
23 |
24 | import org.junit.Test;
25 |
26 | import edu.emory.clir.clearnlp.dictionary.universal.DTCurrency;
27 |
28 | /**
29 | * @since 3.0.0
30 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
31 | */
32 | public class DTCurrencyTest
33 | {
34 | @Test
35 | public void test()
36 | {
37 | DTCurrency dt = new DTCurrency();
38 |
39 | assertTrue(dt.isCurrencyDollar("c"));
40 | assertTrue(dt.isCurrencyDollar("us"));
41 |
42 | assertTrue(dt.isCurrency("usd"));
43 | assertTrue(dt.isCurrency("us$"));
44 |
45 | assertFalse(dt.isCurrencyDollar("US"));
46 | assertFalse(dt.isCurrencyDollar("a"));
47 | assertFalse(dt.isCurrency("usb"));
48 |
49 | assertEquals("[USD, 1]", Arrays.toString(dt.tokenize("USD1")));
50 | assertEquals("[us$, 1]", Arrays.toString(dt.tokenize("us$1")));
51 | assertTrue(dt.tokenize("u$1") == null);
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/classification/prediction/StringPrediction.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.classification.prediction;
17 |
18 | import edu.emory.clir.clearnlp.util.MathUtils;
19 |
20 | /**
21 | * @since 3.0.0
22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
23 | */
24 | public class StringPrediction extends AbstractPrediction implements Comparable
25 | {
26 | private String s_label;
27 |
28 | public StringPrediction(String label, double score)
29 | {
30 | super(score);
31 | set(label, score);
32 | }
33 |
34 | public void set(String label, double score)
35 | {
36 | setLabel(label);
37 | setScore(score);
38 | }
39 |
40 | public String getLabel()
41 | {
42 | return s_label;
43 | }
44 |
45 | public void setLabel(String label)
46 | {
47 | s_label = label;
48 | }
49 |
50 | public boolean isLabel(String label)
51 | {
52 | return s_label.equals(label);
53 | }
54 |
55 | public void set(StringPrediction p)
56 | {
57 | set(p.s_label, p.d_score);
58 | }
59 |
60 | @Override
61 | public int compareTo(StringPrediction p)
62 | {
63 | return MathUtils.signum(d_score - p.d_score);
64 | }
65 | }
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/dictionary/english/DTHyphenTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.dictionary.english;
17 |
18 | import static org.junit.Assert.assertFalse;
19 | import static org.junit.Assert.assertTrue;
20 |
21 | import org.junit.Test;
22 |
23 | import edu.emory.clir.clearnlp.dictionary.english.DTHyphen;
24 |
25 | /**
26 | * @since 3.0.0
27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
28 | */
29 | public class DTHyphenTest
30 | {
31 | @Test
32 | public void test()
33 | {
34 | DTHyphen dt = new DTHyphen();
35 |
36 | assertTrue(dt.isPrefix("inter"));
37 | assertTrue(dt.isSuffix("ful"));
38 | assertTrue(dt.preserveHyphen("inter-connect".toCharArray(), 5));
39 | assertTrue(dt.preserveHyphen("beauti-ful".toCharArray(), 6));
40 | assertTrue(dt.preserveHyphen("b-a-d".toCharArray(), 1));
41 | assertTrue(dt.preserveHyphen("b-a-d".toCharArray(), 3));
42 |
43 | assertFalse(dt.preserveHyphen("inte-connect".toCharArray(), 4));
44 | assertFalse(dt.preserveHyphen("beauti-fu".toCharArray(), 6));
45 | assertFalse(dt.preserveHyphen("b-c-d".toCharArray(), 1));
46 | assertFalse(dt.preserveHyphen("b-c-d".toCharArray(), 3));
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/headrule/HeadTagSetTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.headrule;
17 |
18 | import static org.junit.Assert.assertEquals;
19 | import static org.junit.Assert.assertFalse;
20 | import static org.junit.Assert.assertTrue;
21 |
22 | import org.junit.Test;
23 |
24 | import edu.emory.clir.clearnlp.constituent.CTNode;
25 | import edu.emory.clir.clearnlp.conversion.headrule.HeadTagSet;
26 |
27 | /**
28 | * @since 3.0.0
29 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
30 | */
31 | public class HeadTagSetTest
32 | {
33 | @Test
34 | public void testHeadTagSet()
35 | {
36 | String tags = "NN.*|NP|-SBJ|-TPC";
37 | HeadTagSet set = new HeadTagSet(tags);
38 | CTNode node;
39 |
40 | node = new CTNode("NN", null);
41 | assertTrue(set.matches(node));
42 |
43 | node.setConstituentTag("NNS");
44 | assertTrue(set.matches(node));
45 |
46 | node.setConstituentTag("NP");
47 | assertTrue(set.matches(node));
48 |
49 | node.setConstituentTag("S");
50 | assertFalse(set.matches(node));
51 |
52 | node.addFunctionTag("SBJ");
53 | assertTrue(set.matches(node));
54 |
55 | assertEquals(tags, "NN.*|NP|-SBJ|-TPC");
56 | }
57 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/component/evaluation/AbstractF1Eval.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.component.evaluation;
17 |
18 | import edu.emory.clir.clearnlp.util.MathUtils;
19 |
20 |
21 | /**
22 | * @since 3.0.0
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | abstract public class AbstractF1Eval extends AbstractEval
26 | {
27 | protected int p_total;
28 | protected int r_total;
29 | protected int n_correct;
30 |
31 | public AbstractF1Eval()
32 | {
33 | clear();
34 | }
35 |
36 | @Override
37 | public void clear()
38 | {
39 | p_total = 0;
40 | r_total = 0;
41 | n_correct = 0;
42 | }
43 |
44 | @Override
45 | public double getScore()
46 | {
47 | return getScores()[0];
48 | }
49 |
50 | @Override
51 | public String toString()
52 | {
53 | double[] d = getScores();
54 | return String.format("F1: %5.2f, P: %5.2f, R: %5.2f", d[0], d[1], d[2]);
55 | }
56 |
57 | private double[] getScores()
58 | {
59 | double precision = 100d * n_correct / p_total;
60 | double recall = 100d * n_correct / r_total;
61 |
62 | return new double[]{MathUtils.getF1(precision, recall), precision, recall};
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/collection/ngram/UnigramTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.collection.ngram;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import org.junit.Test;
21 |
22 | /**
23 | * @since 3.0.0
24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
25 | */
26 | public class UnigramTest
27 | {
28 | @Test
29 | public void test()
30 | {
31 | Unigram map = new Unigram<>();
32 |
33 | map.add("A");
34 | map.add("B", 2);
35 | map.add("C");
36 | map.add("A");
37 | map.add("B", 2);
38 | map.add("D");
39 |
40 | assertEquals("[(C,1), (D,1), (B,4), (A,2)]", map.toList(0).toString());
41 | assertEquals("[(C,0.125), (D,0.125), (B,0.5), (A,0.25)]", map.toList(0d).toString());
42 |
43 | assertEquals("[(B,4), (A,2)]", map.toList(1).toString());
44 | assertEquals("[(B,0.5), (A,0.25)]", map.toList(0.2).toString());
45 |
46 | assertEquals("[A, B, C, D]", map.keySet(0).toString());
47 | assertEquals("[A, B, C, D]", map.keySet(0d).toString());
48 |
49 | assertEquals("[A, B]", map.keySet(1).toString());
50 | assertEquals("[A, B]", map.keySet(0.2).toString());
51 |
52 | // System.out.println(map.getBest());
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/lexicon/wordnet/WNIndexMap.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.lexicon.wordnet;
17 |
18 | import java.io.BufferedReader;
19 | import java.io.IOException;
20 | import java.io.InputStream;
21 | import java.util.HashMap;
22 | import java.util.Map;
23 |
24 | import edu.emory.clir.clearnlp.util.IOUtils;
25 |
26 | /**
27 | * @since 3.0.0
28 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
29 | */
30 | public class WNIndexMap
31 | {
32 | Map m_index;
33 |
34 | /**
35 | * @param in internally wrapped by {@code new BufferedReader(new InputStreamReader(in))}.
36 | * @throws IOException
37 | */
38 | public WNIndexMap(InputStream in, WNDataMap map) throws IOException
39 | {
40 | BufferedReader reader = IOUtils.createBufferedReader(in);
41 | WNIndex index;
42 | String line;
43 |
44 | m_index = new HashMap<>();
45 |
46 | while ((line = reader.readLine()) != null)
47 | {
48 | if (line.startsWith(" ")) continue;
49 | index = new WNIndex(map, line);
50 | m_index.put(index.getLemma(), index);
51 | }
52 |
53 | reader.close();
54 | }
55 |
56 | public WNIndex getIndex(String lemma)
57 | {
58 | return m_index.get(lemma);
59 | }
60 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/util/constant/CharConst.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the 'License');
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an 'AS IS' BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.util.constant;
17 |
18 |
19 | /**
20 | * @since 3.0.0
21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
22 | */
23 | public interface CharConst
24 | {
25 | char PLUS = '+';
26 | char ASTERISK = '*';
27 | char EQUAL = '=';
28 | char FW_SLASH = '/';
29 | char BW_SLASH = '\\';
30 | char PIPE = '|';
31 | char UNDERSCORE = '_';
32 | char HYPHEN = '-';
33 | char COMMA = ',';
34 | char COLON = ':';
35 | char SEMICOLON = ';';
36 | char PERIOD = '.';
37 | char QUESTION = '?';
38 | char EXCLAMATION = '!';
39 | char PERCENT = '%';
40 | char POUND = '#';
41 | char DOLLAR = '$';
42 | char AMPERSAND = '&';
43 | char AT = '@';
44 | char TILDA = '~';
45 | char PRIME = '`';
46 | char EMPTY = 0;
47 | char LESS_THAN = '<';
48 | char GREATER_THAN = '>';
49 | char SINGLE_QUOTE = '\'';
50 | char DOUBLE_QUOTE = '"';
51 |
52 | char LRB = '(';
53 | char RRB = ')';
54 | char LCB = '{';
55 | char RCB = '}';
56 | char LSB = '[';
57 | char RSB = ']';
58 |
59 | char ZERO = '0';
60 |
61 | char SPACE = ' ';
62 | char TAB = '\t';
63 | char NEW_LINE = '\n';
64 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/classification/configuration/LiblinearTrainerConfiguration.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.classification.configuration;
17 |
18 | /**
19 | * @since 3.0.0
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public class LiblinearTrainerConfiguration extends DefaultTrainerConfiguration
23 | {
24 | private double d_cost;
25 | private double d_eps;
26 | private double d_bias;
27 |
28 | public LiblinearTrainerConfiguration(byte vectorType, boolean binary, int labelCutoff, int featureCutoff, int numberOfThreads, double cost, double epsilon, double bias)
29 | {
30 | super(vectorType, binary, labelCutoff, featureCutoff, numberOfThreads);
31 | setCost(cost);
32 | setEpsilon(epsilon);
33 | setBias(bias);
34 | }
35 |
36 | public double getCost()
37 | {
38 | return d_cost;
39 | }
40 |
41 | public double getEpsilon()
42 | {
43 | return d_eps;
44 | }
45 |
46 | public double getBias()
47 | {
48 | return d_bias;
49 | }
50 |
51 | public void setCost(double cost)
52 | {
53 | d_cost = cost;
54 | }
55 |
56 | public void setEpsilon(double eps)
57 | {
58 | d_eps = eps;
59 | }
60 |
61 | public void setBias(double bias)
62 | {
63 | d_bias = bias;
64 | }
65 | }
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/lexicon/verbnet/VNFrame.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.lexicon.verbnet;
17 |
18 | import java.io.Serializable;
19 |
20 | import org.w3c.dom.Element;
21 |
22 | import edu.emory.clir.clearnlp.util.XmlUtils;
23 |
24 | /**
25 | * @since 3.0.0
26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
27 | */
28 | public class VNFrame implements Serializable
29 | {
30 | private static final long serialVersionUID = 1907495757606414993L;
31 |
32 | private VNSyntax v_syntax;
33 | private VNSemantics v_semantics;
34 |
35 | public VNFrame(Element eFrame)
36 | {
37 | init(eFrame);
38 | }
39 |
40 | private void init(Element eFrame)
41 | {
42 | setSyntax(new VNSyntax(XmlUtils.getFirstElementByTagName(eFrame, VNXml.E_SYNTAX)));
43 | setSemantics(new VNSemantics(XmlUtils.getFirstElementByTagName(eFrame, VNXml.E_SEMANTICS)));
44 | }
45 |
46 | public VNSyntax getSyntax()
47 | {
48 | return v_syntax;
49 | }
50 |
51 | public VNSemantics getSemantics()
52 | {
53 | return v_semantics;
54 | }
55 |
56 | public void setSyntax(VNSyntax syntax)
57 | {
58 | v_syntax = syntax;
59 | }
60 |
61 | public void setSemantics(VNSemantics semantics)
62 | {
63 | v_semantics = semantics;
64 | }
65 | }
--------------------------------------------------------------------------------
/src/test/resources/constituent/functionTags.parse:
--------------------------------------------------------------------------------
1 | (TOP (S (S (NP-SBJ (CC both)
2 | (NNP Bush)
3 | (CC and)
4 | (NNP Rice)))
5 | (VP (VBP have)
6 | (VP (VBN delivered)
7 | (NP (NP (NNS speeches))
8 | (, ,)
9 | (SBAR (WHNP-1 (WDT which))
10 | (S (NP-SBJ (-NONE- *T*-1))
11 | (VP (VBP are)
12 | (ADJP-PRD (RB very)
13 | (JJ clear))))))))))
14 |
15 | (TOP (S (NP-SBJ-1 (NNP Mr.)
16 | (NNP Clinton))
17 | (VP (VBD was)
18 | (VP (VBN joined)
19 | (NP (-NONE- *-1))
20 | (PP (IN by)
21 | (NP-LGS (JJ several)
22 | (JJ key)
23 | (NN republican)
24 | (NNS leaders)))))
25 | (. .)))
26 |
27 | (TOP (SBARQ (WHNP-1 (WP Who))
28 | (SQ-CLF (VBZ is)
29 | (NP-SBJ (PRP it))
30 | (NP-PRD (-NONE- *T*-1))
31 | (SBAR (WHNP-2 (WDT that))
32 | (S (NP-SBJ-3 (-NONE- *T*-2))
33 | (NP-TMP (NN today))
34 | (VP (VBZ wants)
35 | (S (NP-SBJ (-NONE- *PRO*-3))
36 | (VP (TO to)
37 | (VP (VB blow)
38 | (NP (NNS things))
39 | (PRT (RP up))
40 | (PP-LOC (IN in)
41 | (NP (NNP Lebanon)))))))))
42 | (, ,)
43 | (NP-VOC (NNP Doctor)))
44 | (. ?)))
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/morphology/english/EnglishDerivation.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.morphology.english;
17 |
18 | import java.util.List;
19 | import java.util.Set;
20 |
21 | import edu.emory.clir.clearnlp.morphology.AbstractAffixMatcher;
22 |
23 | /**
24 | * @since 3.0.3
25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
26 | */
27 | public class EnglishDerivation
28 | {
29 | List suffix_matchers;
30 |
31 | public EnglishDerivation(List affixMatchers)
32 | {
33 | init(affixMatchers);
34 | }
35 |
36 | private void init(List affixMatchers)
37 | {
38 | suffix_matchers = affixMatchers;
39 |
40 | if (suffix_matchers == null)
41 | throw new IllegalArgumentException("The suffix matcher list must not be null.");
42 | }
43 |
44 | public List getSuffixMatchers()
45 | {
46 | return suffix_matchers;
47 | }
48 |
49 | public String getBaseForm(String lemma, Set baseSet)
50 | {
51 | String base;
52 |
53 | for (AbstractAffixMatcher matcher : suffix_matchers)
54 | {
55 | base = matcher.getBaseForm(baseSet, lemma);
56 | if (base != null) return base;
57 | }
58 |
59 | return null;
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/classification/instance/AbstractInstance.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.classification.instance;
17 |
18 | import edu.emory.clir.clearnlp.classification.vector.AbstractFeatureVector;
19 |
20 |
21 | /**
22 | * @since 3.0.0
23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
24 | */
25 | abstract public class AbstractInstance
26 | {
27 | private String s_label;
28 | private F f_vector;
29 |
30 | public AbstractInstance(String label, F vector)
31 | {
32 | set(label, vector);
33 | }
34 |
35 | public String getLabel()
36 | {
37 | return s_label;
38 | }
39 |
40 | public F getFeatureVector()
41 | {
42 | return f_vector;
43 | }
44 |
45 | public void set(String label, F vector)
46 | {
47 | setLabel(label);
48 | setFeatureVector(vector);
49 | }
50 |
51 | public void setLabel(String label)
52 | {
53 | s_label = label;
54 | }
55 |
56 | public void setFeatureVector(F vector)
57 | {
58 | f_vector = vector;
59 | }
60 |
61 | public boolean isLabel(String label)
62 | {
63 | return s_label.equals(label);
64 | }
65 |
66 | public String toString()
67 | {
68 | return s_label + AbstractFeatureVector.DELIM_FEATURE + f_vector.toString();
69 | }
70 | }
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/headrule/HeadRuleTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.headrule;
17 |
18 | import static org.junit.Assert.assertEquals;
19 | import static org.junit.Assert.assertFalse;
20 | import static org.junit.Assert.assertTrue;
21 |
22 | import org.junit.Test;
23 |
24 | import edu.emory.clir.clearnlp.constituent.CTNode;
25 | import edu.emory.clir.clearnlp.conversion.headrule.HeadRule;
26 | import edu.emory.clir.clearnlp.conversion.headrule.HeadTagSet;
27 |
28 |
29 | /** @author Jinho D. Choi ({@code jinho.choi@emory.edu}) */
30 | public class HeadRuleTest
31 | {
32 | @Test
33 | public void testHeadRule()
34 | {
35 | String tags = "NN.*|NP;VB.*|VP";
36 | HeadRule rule = new HeadRule(HeadRule.DIR_LEFT_TO_RIGHT, tags);
37 | CTNode node1 = new CTNode("NNS", null);
38 | CTNode node2 = new CTNode("VBN", null);
39 |
40 | assertFalse(rule.isRightToLeft());
41 |
42 | HeadTagSet[] headTags = rule.getHeadTags();
43 |
44 | HeadTagSet headTag = headTags[0];
45 | assertTrue(headTag.matches(node1));
46 | assertFalse(headTag.matches(node2));
47 |
48 | headTag = headTags[1];
49 | assertFalse(headTag.matches(node1));
50 | assertTrue(headTag.matches(node2));
51 |
52 | assertEquals(tags, rule.toString());
53 | }
54 | }
--------------------------------------------------------------------------------
/src/test/java/edu/emory/clir/clearnlp/constituent/CTReaderTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.constituent;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | import java.util.ArrayList;
21 | import java.util.List;
22 |
23 | import org.junit.Test;
24 |
25 | import edu.emory.clir.clearnlp.util.IOUtils;
26 |
27 |
28 | /**
29 | * @since 3.0.0
30 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
31 | */
32 | public class CTReaderTest
33 | {
34 | @Test
35 | public void testCTReader() throws Exception
36 | {
37 | String filename = "src/test/resources/constituent/constituent.parse";
38 | CTReader reader = new CTReader(IOUtils.createFileInputStream(filename));
39 | CTTree tree;
40 |
41 | StringBuilder build = new StringBuilder();
42 | List trees = new ArrayList<>();
43 | String tmp;
44 |
45 | while ((tree = reader.nextTree()) != null)
46 | {
47 | tmp = tree.toString();
48 | trees.add(tmp);
49 | build.append(tmp);
50 | }
51 |
52 | reader.close();
53 |
54 | reader = new CTReader(IOUtils.createByteArrayInputStream(build.toString()));
55 | int i;
56 |
57 | for (i=0; (tree = reader.nextTree()) != null; i++)
58 | assertEquals(trees.get(i), tree.toString());
59 |
60 | reader.close();
61 | }
62 | }
--------------------------------------------------------------------------------
/src/test/resources/constituent/normalize.parse:
--------------------------------------------------------------------------------
1 | ( (S (PP (IN In) (NP (NN order) (S (NP-SBJ (-NONE- *PRO*)) (VP (TO to) (VP (VB determine) (NP (NP (DT the) (NN sequence)) (PP (IN of) (NP (DT the) (JJ entire) (NN transcript))))))))) (, ,) (S (S (NP-SBJ-1=4 (NP (NN RT) (HYPH -) (NN PCR)) (VP (VBG using) (NP (NP (NP (NNS primers)) (PP-LOC (IN in) (NP (NNS exons) (NML (CD 10) (CC and) (CD 11))))) (VP (VBN paired) (NP (-NONE- *)) (PP (IN with) (NP (NP (DT a) (NN primer)) (PP-LOC (IN in) (NP (NN intron) (CD 12))))))))) (VP (VBD was) (VP=3 (VBN performed) (NP-1 (-NONE- *)) (S-MNR (NP-SBJ (-NONE- *PRO*)) (VP (VBG using) (NP (NML (NML (NML (NN BALB) (HYPH /) (NN c)) (NN mouse)) (NN brain)) (JJ total) (NN RNA))))))) (CC and) (S (NP-SBJ-2=4 (DT the) (VBG resulting) (NNS products)) (VP=3 (VBN sequenced) (NP-2 (-NONE- *))))) (. .)) )
2 | ( (S (NP-SBJ (NN Figure) (CD 1)) (VP (VBZ shows) (NP (NP (DT the) (JJ average) (NN IOP)) (PP (IN of) (NP (NP (NP (DT a) (NN number)) (PP (IN of) (NP (JJ inbred) (NN mouse) (NNS strains)))) (SBAR (WHNP-1 (WDT that)) (S (NP-SBJ-1 (-NONE- *T*)) (VP (VBD were) (VP (VBN housed) (NP-1 (-NONE- *)) (PP (IN in) (NP (DT the) (JJ same) (JJ environmental) (NNS conditions))))))))))) (. .)) )
3 | ( (S (S (NP-SBJ (NP (PRP It)) (SBAR-1 (-NONE- *EXP*))) (VP (VBZ is) (VP (VBG becoming) (ADJP-PRD (RB increasingly) (JJ clear)) (SBAR-1 (IN that) (S (NP-SBJ (NP (JJ many) (NNS forms)) (PP (IN of) (NP (NN glaucoma)))) (VP (VBP have) (NP (DT a) (JJ genetic) (NN component))))) (PRN (-LRB- [) (NP (CD 6) (, ,) (CD 7)) (-RRB- ]))))) (, ,) (CC and) (S (NP-SBJ-3 (JJ much) (JJ current) (NN research)) (VP (VBZ is) (VP (VBN focused) (NP-3 (-NONE- *)) (PP (IN on) (S-NOM (NP-SBJ (-NONE- *PRO*)) (VP (VBG identifying) (NP (NP (NP (JJ chromosomal) (NNS regions)) (CC and) (NP (NNS genes))) (SBAR (WHNP-2 (WDT that)) (S (NP-SBJ-2 (-NONE- *T*)) (VP (VBP contribute) (PP (IN to) (NP (NN glaucoma)))))))))) (PRN (-LRB- [) (NP (NP (CD 8)) (PP (SYM -) (NP (CD 10)))) (-RRB- ]))))) (. .)) )
--------------------------------------------------------------------------------
/src/main/java/edu/emory/clir/clearnlp/util/HashUtils.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2014, Emory University
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package edu.emory.clir.clearnlp.util;
17 |
18 | /**
19 | * @since 3.0.0
20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu})
21 | */
22 | public class HashUtils
23 | {
24 | private static final long FNV_BASIS_64 = 0xcbf29ce484222325L;
25 | private static final long FNV_PRIME_64 = 0x100000001b3L;
26 |
27 | private static final int FNV_BASIS_32 = 0x811c9dc5;
28 | private static final int FNV_PRIME_32 = 0x01000193;
29 |
30 | public static int fnv1aHash32(final String s)
31 | {
32 | return fnv1aHash32(s, FNV_BASIS_32);
33 | }
34 |
35 | public static int fnv1aHash32(final String s, int basis)
36 | {
37 | char[] cs = s.toCharArray();
38 | int i, len = s.length();
39 |
40 | for (i=0; i= get(i)) break;
73 | swap(k, i);
74 | }
75 | }
76 | }
77 |
--------------------------------------------------------------------------------