├── README.md ├── src ├── test │ ├── resources │ │ ├── classification │ │ │ └── model │ │ │ │ ├── multi-sparse.train │ │ │ │ ├── binary-sparse.train │ │ │ │ ├── multi-string.train │ │ │ │ └── binary-string.train │ │ ├── propbank │ │ │ ├── wsj_0001.prop │ │ │ ├── wsj.prop │ │ │ ├── sample.prop │ │ │ ├── wsj_0001.parse │ │ │ └── wsj_0002.parse │ │ ├── feature │ │ │ └── common │ │ │ │ ├── dependency.txt │ │ │ │ └── feature_common.xml │ │ ├── nlp │ │ │ ├── trainer │ │ │ │ ├── feature_pos.xml │ │ │ │ ├── feature_dep.xml │ │ │ │ └── pos.cnlp │ │ │ └── configuration │ │ │ │ └── configure.xml │ │ ├── dependency │ │ │ └── dependency.cnlp │ │ └── constituent │ │ │ ├── functionTags.parse │ │ │ └── normalize.parse │ └── java │ │ └── edu │ │ └── emory │ │ └── clir │ │ └── clearnlp │ │ ├── verbnet │ │ └── VNTagTest.java │ │ ├── util │ │ ├── MathUtilsTest.java │ │ ├── CharUtilsTest.java │ │ ├── FileUtilsTest.java │ │ ├── CharTokenizerTest.java │ │ └── arc │ │ │ └── SRLArcTest.java │ │ ├── dictionary │ │ ├── english │ │ │ ├── DTAbbreviationTest.java │ │ │ └── DTHyphenTest.java │ │ └── universal │ │ │ ├── DTCompoundTest.java │ │ │ ├── DTUnitTest.java │ │ │ ├── DTEmoticonTest.java │ │ │ ├── DTHtmlTest.java │ │ │ └── DTCurrencyTest.java │ │ ├── headrule │ │ ├── HeadRuleMapTest.java │ │ ├── HeadTagSetTest.java │ │ └── HeadRuleTest.java │ │ ├── component │ │ └── configuration │ │ │ ├── DEPConfigurationTest.java │ │ │ └── POSConfigurationTest.java │ │ ├── collection │ │ ├── stack │ │ │ └── StackTest.java │ │ └── ngram │ │ │ ├── BigramTest.java │ │ │ └── UnigramTest.java │ │ ├── dependency │ │ └── DEPFeatTest.java │ │ ├── propbank │ │ └── PBLocationTest.java │ │ ├── tokenization │ │ └── english │ │ │ └── ApostropheTokenizerTest.java │ │ ├── vector │ │ └── VectorSpaceModelTest.java │ │ └── constituent │ │ └── CTReaderTest.java └── main │ ├── scripts │ ├── rsync.sh │ ├── count-deps.py │ └── conll2clear.py │ ├── resources │ ├── samples │ │ ├── clearnlp.txt │ │ ├── clearnlp.txt.tok │ │ ├── wsj_0001.parse │ │ ├── wsj_0001.parse.dep │ │ └── clearnlp.txt.cnlp │ ├── configure │ │ ├── log4j.properties │ │ ├── config_decode_dep.xml │ │ ├── config_sequence.xml │ │ ├── config_future.xml │ │ ├── config_decode_srl.xml │ │ ├── config_train_pos.xml │ │ ├── config_decode_ner.xml │ │ ├── config_train_dep.xml │ │ ├── config_train_ner.xml │ │ └── config_train_srl.xml │ ├── headrules │ │ ├── headrule_en_stanford.txt │ │ └── headrule_en_conll.txt │ └── features │ │ ├── feature_en_pos.xml │ │ ├── feature_en_ner_conll03.xml │ │ └── feature_en_ner.xml │ └── java │ └── edu │ └── emory │ └── clir │ └── clearnlp │ ├── ner │ ├── BILOU.java │ └── NERLib.java │ ├── util │ ├── adapter │ │ └── Adapter1.java │ ├── constant │ │ ├── MetaConst.java │ │ └── CharConst.java │ ├── lang │ │ └── TLanguage.java │ ├── ObjectSizeFetcher.java │ ├── arc │ │ ├── DEPArc.java │ │ └── PBArc.java │ ├── io │ │ └── FileExtensionFilter.java │ ├── BinUtils.java │ └── HashUtils.java │ ├── srl │ └── matcher │ │ ├── SRLArcMatcher.java │ │ ├── SRLArcMatcherTrue.java │ │ ├── SRLArcMatcherSet.java │ │ └── SRLArcMatcherPattern.java │ ├── component │ ├── mode │ │ ├── srl │ │ │ ├── SRLTransition.java │ │ │ └── SRLEval.java │ │ ├── morph │ │ │ ├── DefaultMPAnalyzer.java │ │ │ └── AbstractMPAnalyzer.java │ │ ├── pos │ │ │ ├── POSEval.java │ │ │ └── POSFeatureExtractor.java │ │ └── dep │ │ │ └── DEPFeatureExtractor.java │ ├── utils │ │ ├── NLPMode.java │ │ └── CFlag.java │ ├── AbstractComponent.java │ ├── evaluation │ │ ├── AbstractEval.java │ │ └── AbstractF1Eval.java │ └── configuration │ │ └── DecodeConfiguration.java │ ├── classification │ ├── trainer │ │ └── TrainerType.java │ ├── instance │ │ ├── SparseInstance.java │ │ ├── StringInstance.java │ │ ├── SparseInstanceCollector.java │ │ └── AbstractInstance.java │ ├── prediction │ │ ├── AbstractPrediction.java │ │ └── StringPrediction.java │ └── configuration │ │ ├── AbstractTrainerConfiguration.java │ │ └── LiblinearTrainerConfiguration.java │ ├── feature │ ├── type │ │ ├── DirectionType.java │ │ ├── SourceType.java │ │ ├── FeatureType.java │ │ ├── FeatureXml.java │ │ └── RelationType.java │ └── common │ │ └── OrthographicType.java │ ├── lexicon │ ├── wordnet │ │ ├── WNPOSTag.java │ │ ├── WNRelation.java │ │ └── WNIndexMap.java │ ├── dbpedia │ │ ├── DBPediaInfoMap.java │ │ └── DBPediaXML.java │ ├── verbnet │ │ ├── VNMap.java │ │ ├── VNXml.java │ │ └── VNFrame.java │ ├── propbank │ │ └── frameset │ │ │ ├── PBFType.java │ │ │ └── PBFXml.java │ └── wikipedia │ │ ├── WikiPrint.java │ │ ├── WikiParagraph.java │ │ └── WikiIndex.java │ ├── reader │ ├── TReader.java │ ├── LineReader.java │ └── RawReader.java │ ├── collection │ ├── pair │ │ ├── StringIntPair.java │ │ ├── CharIntPair.java │ │ ├── IntIntPair.java │ │ ├── BooleanIntPair.java │ │ ├── DoubleIntPair.java │ │ ├── CharCharPair.java │ │ ├── ObjectCharPair.java │ │ ├── ObjectIntPair.java │ │ ├── Pair.java │ │ └── ObjectDoublePair.java │ ├── triple │ │ ├── DoubleIntIntTriple.java │ │ ├── BooleanIntIntTriple.java │ │ ├── ObjectIntIntTriple.java │ │ └── Triple.java │ ├── tree │ │ └── PrefixNode.java │ ├── set │ │ ├── IntHashSet.java │ │ ├── CharHashSet.java │ │ └── DisjointSet.java │ ├── stack │ │ └── Stack.java │ └── heap │ │ └── DoubleBinaryHeap.java │ ├── dictionary │ ├── PathNamedEntity.java │ ├── AbstractDTTokenizer.java │ ├── PathEnglishMPAnalyzer.java │ ├── PathTokenizer.java │ └── english │ │ └── DTAbbreviation.java │ ├── pos │ └── POSTag.java │ ├── cluster │ ├── StringVector.java │ ├── Cluster.java │ ├── AbstractCluster.java │ └── Term.java │ ├── experiment │ └── AbstractArgsReader.java │ ├── bin │ └── PrintTree.java │ └── morphology │ └── english │ └── EnglishDerivation.java └── LICENSE.txt /README.md: -------------------------------------------------------------------------------- 1 | This project has moved to https://github.com/emorynlp/nlp4j. 2 | -------------------------------------------------------------------------------- /src/test/resources/classification/model/multi-sparse.train: -------------------------------------------------------------------------------- 1 | 2 4 2 3 2 | 0 1 5 3 | 1 1 2 4 | 0 3 5 | 1 6 -------------------------------------------------------------------------------- /src/test/resources/classification/model/binary-sparse.train: -------------------------------------------------------------------------------- 1 | 1 5 2 11 2 | 0 6 2 10 3 | 1 4 7 3 4 | 0 1 9 12 5 | 0 1 8 3 -------------------------------------------------------------------------------- /src/test/resources/classification/model/multi-string.train: -------------------------------------------------------------------------------- 1 | sunny 0:bright 1:dry 2:bright 2 | rainy 0:dark 1:wet 3 | cloudy 0:dark 1:dry 4 | rainy 2:bright 5 | cloudy 2:dark -------------------------------------------------------------------------------- /src/test/resources/classification/model/binary-string.train: -------------------------------------------------------------------------------- 1 | male 0:jinho 1:choi 2:d 2 | female 0:jeany 1:choi 2:e 3 | male 0:james 1:martin 2:h 4 | female 0:martha 1:palmer 2:s 5 | female 0:martha 1:stewart 2:h -------------------------------------------------------------------------------- /src/test/resources/propbank/wsj_0001.prop: -------------------------------------------------------------------------------- 1 | propbank/wsj_0001.parse 0 8 gold join-v join.01 ----- 0:2-ARG0 7:0-ARGM-MOD 8:0-rel 9:1-ARG1 11:1-ARGM-PRD 15:1-ARGM-TMP 2 | propbank/wsj_0001.parse 1 2 gold be-v be.01 ----- 0:1-ARG1 2:0-rel 3:2-ARG2 3 | propbank/wsj_0001.parse 1 10 gold publish-v publish.01 ----- 10:0-rel 11:0-ARG0 4 | -------------------------------------------------------------------------------- /src/main/scripts/rsync.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | jar cf clearnlp.jar$1 edu 3 | #rsync -avc clearnlp.jar$1 jdchoi@ainos.mathcs.emory.edu:/home/jdchoi/lib 4 | rsync -avc clearnlp.jar choi@lab0z.mathcs.emory.edu:/home/choi/lib 5 | scp choi@lab0z.mathcs.emory.edu:/home/choi/lib/clearnlp.jar jdchoi@ainos.mathcs.emory.edu:/home/jdchoi/lib/clearnlp.jar 6 | -------------------------------------------------------------------------------- /src/main/resources/samples/clearnlp.txt: -------------------------------------------------------------------------------- 1 | The ClearNLP project provides software and resources for natural language processing. It is developed by the Center for Language and Information Research (CLIR) at Emory University. Please join our discussion group if you want to get notifications about new updates or post issues, suggestions, questions, etc. 2 | -------------------------------------------------------------------------------- /src/main/resources/samples/clearnlp.txt.tok: -------------------------------------------------------------------------------- 1 | The ClearNLP project provides software and resources for natural language processing . 2 | It is developed by the Center for Language and Information Research ( CLIR ) at Emory University . 3 | Please join our discussion group if you want to get notifications about new updates or post issues , suggestions , questions , etc . 4 | -------------------------------------------------------------------------------- /src/main/resources/configure/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set root logger level to DEBUG and its only appender to A1. 2 | log4j.rootLogger=DEBUG, A1 3 | 4 | # A1 is set to be a ConsoleAppender. 5 | log4j.appender.A1=org.apache.log4j.ConsoleAppender 6 | 7 | # A1 uses PatternLayout. 8 | log4j.appender.A1.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.A1.layout.conversionPattern=%m 10 | -------------------------------------------------------------------------------- /src/test/resources/propbank/wsj.prop: -------------------------------------------------------------------------------- 1 | propbank/wsj_0001.parse 0 8 gold join-v join.01 ----- 0:2-ARG0 7:0-ARGM-MOD 8:0-rel 9:1-ARG1 11:1-ARGM-PRD 15:1-ARGM-TMP 2 | propbank/wsj_0001.parse 1 2 gold be-v be.01 ----- 0:1-ARG1 2:0-rel 3:2-ARG2 3 | propbank/wsj_0001.parse 1 10 gold publish-v publish.01 ----- 10:0-rel 11:0-ARG0 4 | propbank/wsj_0002.parse 0 16 gold name-v name.01 ----- 0:2*17:1-ARG1 16:0-rel 18:2-ARG2 5 | -------------------------------------------------------------------------------- /src/test/resources/feature/common/dependency.txt: -------------------------------------------------------------------------------- 1 | 1 He he PRP PERSON _ 3 nsubj 3:A0 2 | 2 already already RB _ _ 3 advmod 3:AM-TMP 3 | 3 bought buy VBD _ p2=VBN|pb=buy.01 0 root _ 4 | 4 a a DT _ _ 5 det _ 5 | 5 CAR-2 car NN _ _ 3 dobj 3:A1;8:A1 6 | 6 yesterday yesterday NN TIME _ 3 npadvmod 3:AM-TMP 7 | 7 thAt that WDT _ _ 8 nsubj 8:R-A1 8 | 8 IS be VBZ _ pb=be.01 5 rcmod _ 9 | 9 Red red JJ _ p2=VBN 8 acomp 8:A2 10 | 10 ... .. . _ _ 3 punct _ 11 | 11 123 0 CD _ _ 3 num _ -------------------------------------------------------------------------------- /src/test/resources/nlp/trainer/feature_pos.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/test/resources/nlp/trainer/feature_dep.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 1 5 | 2 6 | 3 7 | 8 | 4 9 | 5 10 | 6 11 | 7 12 | 13 | 8 14 | 15 | 9 16 | 10 17 | 18 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2014-2015, Emory University 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. -------------------------------------------------------------------------------- /src/main/resources/configure/config_decode_dep.xml: -------------------------------------------------------------------------------- 1 | 2 | english 3 | 4 | 5 | 6 | 7 | 8 | 9 | brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt.xz 10 | 11 | 12 | 13 | general-en-pos.xz 14 | general-en-dep.xz 15 | 16 | 17 | 18 | root 19 | 1 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /src/main/resources/configure/config_sequence.xml: -------------------------------------------------------------------------------- 1 | 2 | english 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 0.4 13 | 14 | true 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /src/test/resources/nlp/trainer/pos.cnlp: -------------------------------------------------------------------------------- 1 | 1 He12 he PRP _ 3 nsubj 3:A0 2 | 2 already already RB _ 3 advmod 3:AM-TMP 3 | 3 bought buy VBD p2=VBN|pb=buy.01 0 root _ 4 | 4 a.-# a DT _ 5 det _ 5 | 5 car car NN _ 3 dobj 3:A1;8:A1 6 | 6 yesterday yesterday NN _ 3 npadvmod 3:AM-TMP 7 | 7 that that WDT _ 8 nsubj 8:R-A1 8 | 8 is be VBZ pb=be.01 5 rcmod _ 9 | 9 red red JJ p2=VBN 8 acomp 8:A2 10 | 10 . . . _ 3 punct _ 11 | 12 | 1 She she PRP _ 2 nsubj 2:A0 13 | 2 sold buy VBD p2=VBN|pb=buy.01 0 root _ 14 | 3 a a CD _ 4 det _ 15 | 4 car car NN _ 2 dobj 2:A1;6:A1 16 | 5 that that DT _ 6 nsubj 6:R-A1 17 | 6 is be VBZ pb=be.01 4 rcmod _ 18 | 7 blue red RB p2=VBN 6 acomp 6:A2 19 | 8 . . . _ 2 punct _ 20 | -------------------------------------------------------------------------------- /src/main/resources/configure/config_future.xml: -------------------------------------------------------------------------------- 1 | 2 | english 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | false 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /src/main/scripts/count-deps.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import os 3 | import sys 4 | import glob 5 | 6 | IN_DIR = sys.argv[1] 7 | EXT = sys.argv[2] 8 | 9 | def getCounts(filename): 10 | fin = open(filename) 11 | sc = 0 12 | wc = 0 13 | # vc = 0 14 | 15 | for line in fin: 16 | l = line.split() 17 | if l: 18 | wc += 1 19 | # if 'pb=' in l[4]: vc += 1 20 | else: 21 | sc += 1 22 | 23 | return (sc, wc) 24 | 25 | gt = [0, 0] 26 | 27 | for filename in glob.glob(os.path.join(IN_DIR, '*.'+EXT)): 28 | t = getCounts(filename) 29 | s = '%s %d %d' % (filename, t[0], t[1]) 30 | print s 31 | 32 | gt[0] += t[0] 33 | gt[1] += t[1] 34 | 35 | print gt 36 | -------------------------------------------------------------------------------- /src/main/resources/configure/config_decode_srl.xml: -------------------------------------------------------------------------------- 1 | 2 | english 3 | 4 | 5 | 6 | 7 | 8 | 9 | brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt.xz 10 | 11 | 12 | 13 | general-en-pos.xz 14 | general-en-dep.xz 15 | general-en-srl.xz 16 | 17 | 18 | 19 | root 20 | 1 21 | 22 | 23 | 24 | 4 25 | 3 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /src/main/resources/configure/config_train_pos.xml: -------------------------------------------------------------------------------- 1 | 2 | english 3 | 4 | 5 | brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt.xz 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 0.4 17 | 2 18 | 1500 19 | true 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/test/resources/propbank/sample.prop: -------------------------------------------------------------------------------- 1 | propbank/wsj_0003.parse 0 11 gold enter-v enter.01 ----- 10:1-ARG0 11:0-rel 12:1-ARG1 2 | propbank/wsj_0003.parse 0 21 gold cause-v cause.01 ----- 16:2-ARG0 21:0-rel 22:2-ARG1 3 | propbank/wsj_0003.parse 0 25 gold show-v show.02 ----- 22:1*23:1*24:1-ARG1 25:0,26:1-rel 27:2-ARGM-TMP 22:1*23:1-LINK-SLC 4 | propbank/wsj_0003.parse 0 31 gold say-v say.01 ----- 0:3*33:1-ARG1 30:1-ARG0 31:0-rel 5 | propbank/wsj_0003.parse 1 18 gold appear-v appear.02 ----- 0:2,19:2-ARG1 18:0-rel 6 | propbank/wsj_0003.parse 1 21 gold be-v be.01 ----- 0:2*19:1-ARG1 21:0-rel 22:2-ARG2 7 | propbank/wsj_0003.parse 1 28 gold study-v study.01 ----- 25:1*29:1-ARG1 28:0-rel 30:1-ARGM-LOC 25:1*29:1-LINK-PSV 8 | propbank/wsj_0003.parse 1 32 gold industrialize-v industrialize.01 ----- 32:0-rel 33:0-ARG1 9 | propbank/wsj_0003.parse 1 36 gold say-v say.01 ----- 0:3*38:1-ARG1 35:1-ARG0 36:0-rel 10 | -------------------------------------------------------------------------------- /src/main/resources/configure/config_decode_ner.xml: -------------------------------------------------------------------------------- 1 | 2 | english 3 | 4 | 5 | 6 | 7 | 8 | 9 | brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt.xz 10 | general-en-ner-gazetteer.xz 11 | 12 | 13 | 14 | general-en-pos.xz 15 | general-en-dep.xz 16 | general-en-srl.xz 17 | general-en-ner.xz 18 | 19 | 20 | 21 | root 22 | 1 23 | 24 | 25 | 26 | 4 27 | 3 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/ner/BILOU.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.ner; 17 | 18 | /** 19 | * @since 3.0.0 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public enum BILOU 23 | { 24 | B, 25 | I, 26 | L, 27 | U, 28 | O 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/util/adapter/Adapter1.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.util.adapter; 17 | 18 | 19 | /** 20 | * @since 3.0.0 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public interface Adapter1 24 | { 25 | void apply(T item); 26 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/srl/matcher/SRLArcMatcher.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.srl.matcher; 17 | 18 | 19 | /** 20 | * @since 3.0.0 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public interface SRLArcMatcher 24 | { 25 | boolean matches(String label); 26 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/component/mode/srl/SRLTransition.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.component.mode.srl; 17 | 18 | /** 19 | * @since 3.1.3 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public interface SRLTransition 23 | { 24 | String NO_ARC = "N"; 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/classification/trainer/TrainerType.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.classification.trainer; 17 | 18 | /** 19 | * @since 3.0.0 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public enum TrainerType 23 | { 24 | ONE_VS_ALL, 25 | ONLINE; 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/component/utils/NLPMode.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.component.utils; 17 | 18 | /** 19 | * @since 3.0.0 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public enum NLPMode 23 | { 24 | pos, 25 | morph, 26 | dep, 27 | ner, 28 | srl; 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/feature/type/DirectionType.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.feature.type; 17 | 18 | /** 19 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 20 | */ 21 | public enum DirectionType 22 | { 23 | l, // left 24 | r, // right 25 | u, // up 26 | d, // down 27 | a; // all 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/feature/type/SourceType.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.feature.type; 17 | 18 | import java.io.Serializable; 19 | 20 | 21 | /** 22 | * @since 3.0.0 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | public enum SourceType implements Serializable 26 | { 27 | i, j, k; 28 | } -------------------------------------------------------------------------------- /src/main/resources/configure/config_train_dep.xml: -------------------------------------------------------------------------------- 1 | 2 | english 3 | 4 | 5 | brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt.xz 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | false 21 | true 22 | root 23 | 1 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/main/resources/configure/config_train_ner.xml: -------------------------------------------------------------------------------- 1 | 2 | english 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt.xz 17 | general-en-ner-gazetteer.xz 18 | 19 | 20 | 21 | 22 | true 23 | 24 | 25 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/feature/type/FeatureType.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.feature.type; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public enum FeatureType implements Serializable 25 | { 26 | BINARY, 27 | SIMPLE, 28 | SET; 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/lexicon/wordnet/WNPOSTag.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.lexicon.wordnet; 17 | 18 | /** 19 | * @since 3.0.0 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public interface WNPOSTag 23 | { 24 | char NOUN = 'n'; 25 | char VERB = 'v'; 26 | char ADJECTIVE = 'a'; 27 | char ADVERB = 'r'; 28 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/component/AbstractComponent.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.component; 17 | 18 | import edu.emory.clir.clearnlp.dependency.DEPTree; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | abstract public class AbstractComponent 25 | { 26 | abstract public void process(DEPTree tree); 27 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/srl/matcher/SRLArcMatcherTrue.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.srl.matcher; 17 | 18 | 19 | /** 20 | * @since 3.0.0 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class SRLArcMatcherTrue implements SRLArcMatcher 24 | { 25 | @Override 26 | public boolean matches(String label) 27 | { 28 | return true; 29 | } 30 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/util/constant/MetaConst.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.util.constant; 17 | 18 | /** 19 | * @since 3.0.0 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public interface MetaConst 23 | { 24 | String HYPERLINK = "#hlink#"; 25 | String EMOTICON = "#emo#"; 26 | String CARDINAL = "#crd#"; 27 | String ORDINAL = "#ord#"; 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/component/utils/CFlag.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.component.utils; 17 | 18 | /** 19 | * @see Component-Flags 20 | * @since 3.0.0 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public enum CFlag 24 | { 25 | COLLECT, 26 | TRAIN, 27 | BOOTSTRAP, 28 | EVALUATE, 29 | DECODE; 30 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/lexicon/dbpedia/DBPediaInfoMap.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.lexicon.dbpedia; 17 | 18 | import java.util.HashMap; 19 | 20 | /** 21 | * @since 3.0.3 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class DBPediaInfoMap extends HashMap 25 | { 26 | private static final long serialVersionUID = 6100722532796570642L; 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/feature/type/FeatureXml.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.feature.type; 17 | 18 | import java.util.regex.Pattern; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public interface FeatureXml 25 | { 26 | String E_FEATURE = "feature"; 27 | Pattern A_FIELD = Pattern.compile("^f[\\d]+$"); 28 | String A_VISIBLE = "visible"; 29 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/reader/TReader.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.reader; 17 | 18 | import edu.emory.clir.clearnlp.util.StringUtils; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public enum TReader 25 | { 26 | RAW, 27 | TSV, 28 | LINE; 29 | 30 | static public TReader getType(String s) 31 | { 32 | return valueOf(StringUtils.toUpperCase(s)); 33 | } 34 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/collection/pair/StringIntPair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.pair; 17 | 18 | /** 19 | * @since 3.0.0 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public class StringIntPair extends ObjectIntPair 23 | { 24 | private static final long serialVersionUID = -4177555142012481247L; 25 | 26 | public StringIntPair(String s, int i) 27 | { 28 | super(s, i); 29 | } 30 | } -------------------------------------------------------------------------------- /src/main/resources/configure/config_train_srl.xml: -------------------------------------------------------------------------------- 1 | 2 | english 3 | 4 | 5 | brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt.xz 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | true 23 | 4 24 | 3 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /src/main/resources/samples/wsj_0001.parse: -------------------------------------------------------------------------------- 1 | ((S (NP-SBJ (NP (NNP Pierre) 2 | (NNP Vinken)) 3 | (, ,) 4 | (ADJP (NML (CD 61) 5 | (NNS years)) 6 | (JJ old)) 7 | (, ,)) 8 | (VP (MD will) 9 | (VP (VB join) 10 | (NP (DT the) 11 | (NN board)) 12 | (PP-CLR (IN as) 13 | (NP (DT a) 14 | (JJ nonexecutive) 15 | (NN director))) 16 | (NP-TMP (NNP Nov.) 17 | (CD 29)))) 18 | (. .))) 19 | 20 | (TOP (S (NP-SBJ (NNP Mr.) 21 | (NNP Vinken)) 22 | (VP (VBZ is) 23 | (NP-PRD (NP (NN chairman)) 24 | (PP (IN of) 25 | (NP (NP (NNP Elsevier) 26 | (NNP N.V.)) 27 | (, ,) 28 | (NP (DT the) 29 | (NNP Dutch) 30 | (VBG publishing) 31 | (NN group)))))) 32 | (. .))) 33 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/dictionary/PathNamedEntity.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.dictionary; 17 | 18 | /** 19 | * @since 3.0.0 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public interface PathNamedEntity 23 | { 24 | String ROOT = "edu/emory/clir/clearnlp/dictionary/ner/"; 25 | String US = ROOT + "us/"; 26 | 27 | // US 28 | String US_FEMALE_NAMES = US + "female_names.txt"; 29 | String US_MALE_NAMES = US + "male_names.txt"; 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/component/mode/morph/DefaultMPAnalyzer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.component.mode.morph; 17 | 18 | import edu.emory.clir.clearnlp.dependency.DEPNode; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class DefaultMPAnalyzer extends AbstractMPAnalyzer 25 | { 26 | @Override 27 | public void analyze(DEPNode node) 28 | { 29 | node.setLemma(node.getLowerSimplifiedWordForm()); 30 | } 31 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/util/lang/TLanguage.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.util.lang; 17 | 18 | import edu.emory.clir.clearnlp.util.StringUtils; 19 | 20 | 21 | /** 22 | * @since 3.0.0 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | public enum TLanguage 26 | { 27 | ARABIC, 28 | CHINESE, 29 | ENGLISH, 30 | HINDI, 31 | KOREAN; 32 | 33 | static public TLanguage getType(String s) 34 | { 35 | return valueOf(StringUtils.toUpperCase(s)); 36 | } 37 | } -------------------------------------------------------------------------------- /src/main/resources/samples/wsj_0001.parse.dep: -------------------------------------------------------------------------------- 1 | 1 Pierre pierre NNP _ 2 nn _ _ _ 2 | 2 Vinken vinken NNP _ 9 nsubj _ _ _ 3 | 3 , , , _ 2 punct _ _ _ 4 | 4 61 0 CD _ 5 num _ _ _ 5 | 5 years year NNS _ 6 npadvmod _ _ _ 6 | 6 old old JJ _ 2 amod _ _ _ 7 | 7 , , , _ 2 punct _ _ _ 8 | 8 will will MD _ 9 aux _ _ _ 9 | 9 join join VB _ 0 root _ _ _ 10 | 10 the the DT _ 11 det _ _ _ 11 | 11 board board NN _ 9 dobj _ _ _ 12 | 12 as as IN syn=CLR 9 prep _ _ _ 13 | 13 a a DT _ 15 det _ _ _ 14 | 14 nonexecutive nonexecutive JJ _ 15 amod _ _ _ 15 | 15 director director NN _ 12 pobj _ _ _ 16 | 16 Nov. nov. NNP sem=TMP 9 npadvmod _ _ _ 17 | 17 29 0 CD _ 16 num _ _ _ 18 | 18 . . . _ 9 punct _ _ _ 19 | 20 | 1 Mr. mr. NNP _ 2 nn _ _ _ 21 | 2 Vinken vinken NNP _ 3 nsubj _ _ _ 22 | 3 is be VBZ _ 0 root _ _ _ 23 | 4 chairman chairman NN syn=PRD 3 attr _ _ _ 24 | 5 of of IN _ 4 prep _ _ _ 25 | 6 Elsevier elsevier NNP _ 7 nn _ _ _ 26 | 7 N.V. n.v. NNP _ 5 pobj _ _ _ 27 | 8 , , , _ 7 punct _ _ _ 28 | 9 the the DT _ 12 det _ _ _ 29 | 10 Dutch dutch NNP _ 12 nn _ _ _ 30 | 11 publishing publish VBG _ 12 amod _ _ _ 31 | 12 group group NN _ 7 appos _ _ _ 32 | 13 . . . _ 3 punct _ _ _ 33 | 34 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/component/evaluation/AbstractEval.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.component.evaluation; 17 | 18 | import edu.emory.clir.clearnlp.dependency.DEPTree; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | abstract public class AbstractEval 25 | { 26 | abstract public void countCorrect(DEPTree sTree, LabelType[] gLabels); 27 | abstract public double getScore(); 28 | abstract public void clear(); 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/classification/instance/SparseInstance.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.classification.instance; 17 | 18 | import edu.emory.clir.clearnlp.classification.vector.SparseFeatureVector; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class SparseInstance extends AbstractInstance 25 | { 26 | public SparseInstance(String label, SparseFeatureVector vector) 27 | { 28 | super(label, vector); 29 | } 30 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/classification/instance/StringInstance.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.classification.instance; 17 | 18 | import edu.emory.clir.clearnlp.classification.vector.StringFeatureVector; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class StringInstance extends AbstractInstance 25 | { 26 | public StringInstance(String label, StringFeatureVector vector) 27 | { 28 | super(label, vector); 29 | } 30 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/lexicon/verbnet/VNMap.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.lexicon.verbnet; 17 | 18 | import java.io.Serializable; 19 | import java.util.HashMap; 20 | 21 | /** 22 | * @since 3.0.0 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | public class VNMap extends HashMap implements Serializable 26 | { 27 | private static final long serialVersionUID = -7409938151707095231L; 28 | 29 | public void put(VNClass vn) 30 | { 31 | put(vn.getID(), vn); 32 | } 33 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/pos/POSTag.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.pos; 17 | 18 | /** 19 | * @since 3.0.0 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public interface POSTag 23 | { 24 | /** The part-of-speech tag for emoticons. */ 25 | String POS_EMOTICON = "EMO"; 26 | /** The part-of-speech tag for final tags. */ 27 | String POS_FINAL = POSTagEn.POS_PERIOD; 28 | /** The part-of-speech tag for hyperlinks (e.g., URLs, emails). */ 29 | String POS_HYPERLINK = POSTagEn.POS_ADD; 30 | } 31 | -------------------------------------------------------------------------------- /src/test/resources/propbank/wsj_0001.parse: -------------------------------------------------------------------------------- 1 | (TOP (S (NP-SBJ (NP (NNP Pierre) 2 | (NNP Vinken)) 3 | (, ,) 4 | (ADJP (NML (CD 61) 5 | (NNS years)) 6 | (JJ old)) 7 | (, ,)) 8 | (VP (MD will) 9 | (VP (VB join) 10 | (NP (DT the) 11 | (NN board)) 12 | (PP-CLR (IN as) 13 | (NP (DT a) 14 | (JJ nonexecutive) 15 | (NN director))) 16 | (NP-TMP (NNP Nov.) 17 | (CD 29)))) 18 | (. .))) 19 | 20 | (TOP (S (NP-SBJ (NNP Mr.) 21 | (NNP Vinken)) 22 | (VP (VBZ is) 23 | (NP-PRD (NP (NN chairman)) 24 | (PP (IN of) 25 | (NP (NP (NNP Elsevier) 26 | (NNP N.V.)) 27 | (, ,) 28 | (NP (DT the) 29 | (NNP Dutch) 30 | (VBG publishing) 31 | (NN group)))))) 32 | (. .))) 33 | 34 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/cluster/StringVector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.cluster; 17 | 18 | import edu.emory.clir.clearnlp.collection.map.ObjectDoubleHashMap; 19 | 20 | /** 21 | * @since 3.1.2 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class StringVector 25 | { 26 | private ObjectDoubleHashMap term_map; 27 | 28 | public StringVector() 29 | { 30 | term_map = new ObjectDoubleHashMap<>(); 31 | } 32 | 33 | public void add(String term) 34 | { 35 | term_map.add(term, 1); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/component/mode/pos/POSEval.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.component.mode.pos; 17 | 18 | import edu.emory.clir.clearnlp.component.evaluation.AbstractAccuracyEval; 19 | import edu.emory.clir.clearnlp.dependency.DEPNode; 20 | 21 | /** 22 | * @since 3.0.0 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | public class POSEval extends AbstractAccuracyEval 26 | { 27 | @Override 28 | protected boolean isCorrect(DEPNode node, String label) 29 | { 30 | return node.isPOSTag(label); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/lexicon/dbpedia/DBPediaXML.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.lexicon.dbpedia; 17 | 18 | /** 19 | * @since 3.0.3 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public interface DBPediaXML 23 | { 24 | String OWL_CLASS = "owl:Class"; 25 | String RDF_ABOUT = "rdf:about"; 26 | String RDF_RESOURCE = "rdf:resource"; 27 | String RDFS_SUBCLASS_OF = "rdfs:subClassOf"; 28 | String DBPEDIA_ORG_ONTOLOGY = "http://dbpedia.org/ontology/"; 29 | String DBPEDIA_ORG_RESOURCE = "http://dbpedia.org/resource/"; 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/srl/matcher/SRLArcMatcherSet.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.srl.matcher; 17 | 18 | import java.util.Set; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class SRLArcMatcherSet implements SRLArcMatcher 25 | { 26 | private Set s_labels; 27 | 28 | public SRLArcMatcherSet(Set labels) 29 | { 30 | s_labels = labels; 31 | } 32 | 33 | @Override 34 | public boolean matches(String label) 35 | { 36 | return s_labels.contains(label); 37 | } 38 | } -------------------------------------------------------------------------------- /src/test/resources/feature/common/feature_common.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/util/ObjectSizeFetcher.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.util; 17 | 18 | import java.lang.instrument.Instrumentation; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class ObjectSizeFetcher 24 | { 25 | private static Instrumentation instrumentation; 26 | 27 | public static void premain(String args, Instrumentation inst) 28 | { 29 | instrumentation = inst; 30 | } 31 | 32 | public static long getObjectSize(Object o) 33 | { 34 | return instrumentation.getObjectSize(o); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/scripts/conll2clear.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import sys 3 | fin = open(sys.argv[1]) 4 | fout = open(sys.argv[2],'w') 5 | vpos = sys.argv[3] 6 | 7 | tree = [] 8 | pred = [] 9 | 10 | for line in fin: 11 | l = line.split() 12 | 13 | if not l: 14 | rTree = [] 15 | for node in tree: 16 | args = [] 17 | for i,arg in enumerate(node[7:]): 18 | if arg == '_': continue 19 | predId = pred[i] 20 | if not tree[predId][3].startswith(vpos): continue 21 | args.append(str(predId+1)+':'+arg) 22 | 23 | del node[7:] 24 | if args: node.append(';'.join(args)) 25 | else : node.append('_') 26 | rTree.append('\t'.join(node)) 27 | 28 | fout.write('\n'.join(rTree)+'\n\n') 29 | tree = [] 30 | pred = [] 31 | continue 32 | 33 | node = [] 34 | 35 | node.append(l[0]) # id 36 | node.append(l[1]) # form 37 | node.append(l[2]) # glemma 38 | node.append(l[4]) # gpos 39 | 40 | if l[13] != '_' and l[4].startswith(vpos): 41 | node.append('pb='+l[13]) 42 | else: 43 | node.append('_') 44 | 45 | # node.append(l[6]) # gfeat 46 | node.append(l[8]) # headId 47 | node.append(l[10]) # deprel 48 | node.extend(l[14:]) # arg* 49 | 50 | if l[12] != '_': pred.append(int(l[0])-1) 51 | tree.append(node) 52 | -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/verbnet/VNTagTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.verbnet; 17 | 18 | import static org.junit.Assert.assertFalse; 19 | import static org.junit.Assert.assertTrue; 20 | 21 | import org.junit.Test; 22 | 23 | import edu.emory.clir.clearnlp.lexicon.verbnet.VNTag; 24 | 25 | /** 26 | * @since 3.0.0 27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 28 | */ 29 | public class VNTagTest 30 | { 31 | @Test 32 | public void test() 33 | { 34 | assertTrue(VNTag.contains(VNTag.VN_AGENT)); 35 | assertFalse(VNTag.contains("Hello")); 36 | } 37 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/classification/prediction/AbstractPrediction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.classification.prediction; 17 | 18 | 19 | /** 20 | * @since 3.0.0 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | abstract public class AbstractPrediction 24 | { 25 | protected double d_score; 26 | 27 | public AbstractPrediction(double score) 28 | { 29 | setScore(score); 30 | } 31 | 32 | public double getScore() 33 | { 34 | return d_score; 35 | } 36 | 37 | public void setScore(double score) 38 | { 39 | d_score = score; 40 | } 41 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/srl/matcher/SRLArcMatcherPattern.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.srl.matcher; 17 | 18 | import java.util.regex.Pattern; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class SRLArcMatcherPattern implements SRLArcMatcher 25 | { 26 | private Pattern p_labels; 27 | 28 | public SRLArcMatcherPattern(Pattern pattern) 29 | { 30 | p_labels = pattern; 31 | } 32 | 33 | @Override 34 | public boolean matches(String label) 35 | { 36 | return p_labels.matcher(label).find(); 37 | } 38 | } -------------------------------------------------------------------------------- /src/main/resources/headrules/headrule_en_stanford.txt: -------------------------------------------------------------------------------- 1 | ADJP r JJ.*|VB.*|NN.*;ADJP;IN;RB|ADVP;CD|QP;FW|NP;.* 2 | ADVP r VB.*;RP;RB.*|JJ.*;ADJP;ADVP;QP;IN;NN;CD;NP;.* 3 | CAPTION l NNP.*;NN.*;NP;CD;.* 4 | CIT l NNP.*;NN.*;NP;CD;.* 5 | CONJP l CC;VB.*;NN.*;TO|IN;.* 6 | EDITED r VP;VB.*;NN.*|PRP|NP;IN|PP;S.*;.* 7 | EMBED r S.*;FRAG|NP;.* 8 | FRAG r VP;VB.*;-PRD;S|SQ|SINV|SBARQ;NN.*|NP;PP;SBAR;JJ.*|ADJP;RB|ADVP;INTJ;.* 9 | INTJ l VB.*;NN.*;UH;INTJ;.* 10 | LST l LS|CD;NN;.* 11 | META l NP;VP|S;.* 12 | NAC r NN.*;NP;S|SINV;.* 13 | NML r NN.*|NML;CD|NP|QP|JJ.*|VB.*;.* 14 | NP r NN.*|NML;NX;PRP;FW;CD;NP;-NOM;QP|JJ.*|VB.*;ADJP;S;SBAR;.* 15 | NX r NN.*;NX;NP;.* 16 | PP l RP;TO;IN;VB.*;PP;NN.*;JJ;RB;.* 17 | PRN r VP;NP;S|SBARQ|SINV|SQ;SBAR;.* 18 | PRT l RP;PRT;.* 19 | QP r CD;NN.*;JJ;DT|PDT;RB;NP|QP;.* 20 | RRC l VP;VB.*;-PRD;NP|NN.*;ADJP;PP;.* 21 | S r VP;VB.*;-PRD;S|SQ|SINV|SBARQ;SBAR;NP;PP;.* 22 | SBAR r VP;S|SQ|SINV;SBAR.*;FRAG|NP;.* 23 | SBARQ r VP;SQ|SBARQ;S|SINV;FRAG|NP;.* 24 | SINV r VP;VB.*;MD;S|SINV;NP;.* 25 | SQ r VP;VB.*;SQ;S;MD;NP;.* 26 | UCP r .* 27 | VP l VP;VB.*;MD|TO;JJ.*|NN.*|IN;-PRD;NP;ADJP|QP;S;.* 28 | WHADJP r JJ.*|VBN;WHADJP|ADJP;.* 29 | WHADVP r RB.*|WRB;WHADVP;.* 30 | WHNP r NN.*;WP|WHNP;NP|NML|CD;JJ.*|VBG;WHADJP|ADJP;DT;.* 31 | WHPP l IN|TO;.* 32 | X r .* -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/collection/pair/CharIntPair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.pair; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class CharIntPair implements Serializable 25 | { 26 | private static final long serialVersionUID = -2439322004395455224L; 27 | 28 | public char c; 29 | public int i; 30 | 31 | public CharIntPair(char c, int i) 32 | { 33 | set(c, i); 34 | } 35 | 36 | public void set(char c, int i) 37 | { 38 | this.c = c; 39 | this.i = i; 40 | } 41 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/collection/pair/IntIntPair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.pair; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class IntIntPair implements Serializable 25 | { 26 | private static final long serialVersionUID = 1674260806426517804L; 27 | 28 | public int i1; 29 | public int i2; 30 | 31 | public IntIntPair(int i1, int i2) 32 | { 33 | set(i1, i2); 34 | } 35 | 36 | public void set(int i1, int i2) 37 | { 38 | this.i1 = i1; 39 | this.i2 = i2; 40 | } 41 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/lexicon/propbank/frameset/PBFType.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.lexicon.propbank.frameset; 17 | 18 | 19 | /** 20 | * @since 3.0.0 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public enum PBFType 24 | { 25 | VERB("v"), 26 | NOUN("n"), 27 | ADJECTIVE("j"); 28 | 29 | private final String value; 30 | 31 | PBFType(String value) 32 | { 33 | this.value = value; 34 | } 35 | 36 | public boolean isValue(String value) 37 | { 38 | return this.value.equals(value); 39 | } 40 | 41 | public String getValue() 42 | { 43 | return value; 44 | } 45 | } -------------------------------------------------------------------------------- /src/main/resources/headrules/headrule_en_conll.txt: -------------------------------------------------------------------------------- 1 | ADJP r JJ.*|VB.*|NN.*;ADJP;IN;RB|ADVP;CD|QP;FW|NP;.* 2 | ADVP r VB.*;RP;RB.*|JJ.*;ADJP;ADVP;QP;IN;NN;CD;NP;.* 3 | CAPTION l NNP.*;NN.*;NP;CD;.* 4 | CIT l NNP.*;NN.*;NP;CD;.* 5 | CONJP l CC;VB.*;NN.*;TO|IN;.* 6 | EDITED r VB.*;VP;NN.*|PRP|NP;IN|PP;S.*;.* 7 | EMBED r S.*;FRAG|NP;.* 8 | FRAG r VB.*;VP;-PRD;S|SQ|SINV|SBARQ;NN.*|NP;PP;SBAR;JJ.*|ADJP;RB|ADVP;INTJ;.* 9 | INTJ l VB.*;NN.*;UH;INTJ;.* 10 | LST l LS|CD;NN;.* 11 | META l NP;VP|S;.* 12 | NAC r NN.*;NP;S|SINV;.* 13 | NML r NN.*|NML;CD|NP|QP|JJ.*|VB.*;.* 14 | NP r NN.*|NML;NX;PRP;FW;CD;NP;-NOM;QP|JJ.*|VB.*;ADJP;S;SBAR;.* 15 | NX r NN.*;NX;NP;.* 16 | PP l RP;TO;IN;VB.*;PP;NN.*;JJ;RB;.* 17 | PRN r VP;NP;S|SBARQ|SINV|SQ;SBAR;.* 18 | PRT l RP;PRT;.* 19 | QP r CD;NN.*;JJ;DT|PDT;RB;NP|QP;.* 20 | RRC l VB.*;VP;-PRD;NP|NN.*;ADJP;PP;.* 21 | S r MD|TO;VB.*;VP;-PRD;S|SQ|SINV|SBARQ;SBAR;NP;PP;.* 22 | SBAR r IN|TO|DT;MD;VB.*;VP;S|SQ|SINV;SBAR.*;FRAG|NP;.* 23 | SBARQ r MD;VB.*;VP;SQ|SBARQ;S|SINV;FRAG|NP;.* 24 | SINV r MD;VB.*;VP;S|SINV;NP;.* 25 | SQ r MD;VB.*;VP;SQ;S;NP;.* 26 | UCP r .* 27 | VP l MD|TO;VB.*;VP;JJ.*|NN.*|IN;-PRD;NP;ADJP|QP;S;.* 28 | WHADJP r JJ.*|VBN;WHADJP|ADJP;.* 29 | WHADVP r RB.*|WRB;WHADVP;.* 30 | WHNP r NN.*;WP|WHNP;NP|NML|CD;JJ.*|VBG;WHADJP|ADJP;DT;.* 31 | WHPP l IN|TO;.* 32 | X r .* -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/collection/pair/BooleanIntPair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.pair; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class BooleanIntPair implements Serializable 25 | { 26 | private static final long serialVersionUID = -3606845926289267380L; 27 | public boolean b; 28 | public int i; 29 | 30 | public BooleanIntPair(boolean b, int i) 31 | { 32 | set(b, i); 33 | } 34 | 35 | public void set(boolean b, int i) 36 | { 37 | this.b = b; 38 | this.i = i; 39 | } 40 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/collection/pair/DoubleIntPair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.pair; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class DoubleIntPair implements Serializable 25 | { 26 | private static final long serialVersionUID = -2439322004395455224L; 27 | 28 | public double d; 29 | public int i; 30 | 31 | public DoubleIntPair(double d, int i) 32 | { 33 | set(d, i); 34 | } 35 | 36 | public void set(double d, int i) 37 | { 38 | this.d = d; 39 | this.i = i; 40 | } 41 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/collection/pair/CharCharPair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.pair; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class CharCharPair implements Serializable 25 | { 26 | private static final long serialVersionUID = -2439322004395455224L; 27 | 28 | public char c1; 29 | public char c2; 30 | 31 | public CharCharPair(char c1, char c2) 32 | { 33 | set(c1, c2); 34 | } 35 | 36 | public void set(char c1, char c2) 37 | { 38 | this.c1 = c1; 39 | this.c2 = c2; 40 | } 41 | } -------------------------------------------------------------------------------- /src/test/resources/propbank/wsj_0002.parse: -------------------------------------------------------------------------------- 1 | (TOP (S (NP-SBJ-1 (NP (NNP Rudolph) 2 | (NNP Agnew)) 3 | (, ,) 4 | (UCP (ADJP (NML (CD 55) 5 | (NNS years)) 6 | (JJ old)) 7 | (CC and) 8 | (NP (NP (JJ former) 9 | (NN chairman)) 10 | (PP (IN of) 11 | (NP (NNP Consolidated) 12 | (NNP Gold) 13 | (NNP Fields) 14 | (NNP PLC))))) 15 | (, ,)) 16 | (VP (VBD was) 17 | (VP (VBN named) 18 | (NP-2 (-NONE- *-1)) 19 | (S-CLR (NP-SBJ (-NONE- *PRO*-2)) 20 | (NP-PRD (NP (DT a) 21 | (JJ nonexecutive) 22 | (NN director)) 23 | (PP (IN of) 24 | (NP (DT this) 25 | (JJ British) 26 | (JJ industrial) 27 | (NN conglomerate))))))) 28 | (. .))) 29 | 30 | -------------------------------------------------------------------------------- /src/test/resources/dependency/dependency.cnlp: -------------------------------------------------------------------------------- 1 | 1 He he PRP _ 3 nsubj 3:A0 2 | 2 already already RB _ 3 advmod 3:AM-TMP 3 | 3 bought buy VBD p2=VBN|pb=buy.01 0 root _ 4 | 4 a a DT _ 5 det _ 5 | 5 car car NN _ 3 dobj 3:A1;8:A1 6 | 6 yesterday yesterday NN _ 3 npadvmod 3:AM-TMP 7 | 7 that that WDT _ 8 nsubj 8:R-A1 8 | 8 is be VBZ pb=be.01 5 rcmod _ 9 | 9 red red JJ p2=VBN 8 acomp 8:A2 10 | 10 . . . _ 3 punct _ 11 | 12 | 1 He he PRP _ 2 nsubj 2:A0 13 | 2 bought buy VBD p2=VBN|pb=buy.01 0 root _ 14 | 3 a a DT _ 4 det _ 15 | 4 car car NN _ 2 dobj 2:A1;6:A1 16 | 5 that that WDT _ 6 nsubj 6:R-A1 17 | 6 is be VBZ pb=be.01 4 rcmod _ 18 | 7 red red JJ p2=VBN 6 acomp 6:A2 19 | 8 . . . _ 2 punct _ 20 | 21 | 1 He he PRP _ 2 nsubj 2:A0 22 | 2 bought buy VBD p2=VBN|pb=buy.01 0 root _ 23 | 3 a a DT _ 4 det _ 24 | 4 car car NN _ 2 dobj 2:A1;7:A1 25 | 5 tomorrow tomorrow NN _ 2 npadvmod 2:AM-TMP 26 | 6 that that WDT _ 7 nsubj 7:R-A1 27 | 7 is be VBZ pb=be.01 4 rcmod _ 28 | 8 red red JJ p2=VBN 7 acomp 7:A2 29 | 9 . . . _ 2 punct _ 30 | 31 | 1 He he PRP _ 2 nsubj 2:A0 32 | 2 bought buy VBD p2=VBN|pb=buy.01 0 root _ 33 | 3 a a DT _ 4 det _ 34 | 4 car car NN _ 2 dobj 2:A1;7:A1 35 | 5 tomorrow tomorrow NN _ 2 npadvmod 2:AM-TMP 36 | 6 that that WDT _ 7 nsubj 7:R-A1 37 | 7 is be VBZ pb=be.01 2 nproj _ 38 | 8 red red JJ p2=VBN 7 acomp 7:A2 39 | 9 . . . _ 2 punct _ 40 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/classification/instance/SparseInstanceCollector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.classification.instance; 17 | 18 | import edu.emory.clir.clearnlp.classification.vector.SparseFeatureVector; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class SparseInstanceCollector extends AbstractInstanceCollector 25 | { 26 | @Override 27 | public void init() 28 | { 29 | initDefault(); 30 | } 31 | 32 | @Override 33 | protected void addFeatures(SparseFeatureVector vector) 34 | { 35 | n_features = Math.max(n_features, vector.getMaxIndex()+1); 36 | } 37 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/lexicon/propbank/frameset/PBFXml.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.lexicon.propbank.frameset; 17 | 18 | /** 19 | * @since 3.0.0 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public interface PBFXml 23 | { 24 | String E_FRAMESET = "frameset"; 25 | String E_PREDICATE = "predicate"; 26 | String E_ROLESET = "roleset"; 27 | String E_ROLE = "role"; 28 | String E_VNROLE = "vnrole"; 29 | 30 | String A_LEMMA = "lemma"; 31 | String A_ID = "id"; 32 | String A_DESCR = "descr"; 33 | String A_NAME = "name"; 34 | String A_N = "n"; 35 | String A_F = "f"; 36 | String A_VNCLS = "vncls"; 37 | String A_VNTHETA = "vntheta"; 38 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/reader/LineReader.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.reader; 17 | 18 | import java.io.IOException; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class LineReader extends AbstractReader 25 | { 26 | public LineReader() 27 | { 28 | super(TReader.LINE); 29 | } 30 | 31 | @Override 32 | public String next() 33 | { 34 | try 35 | { 36 | return b_reader.readLine(); 37 | } 38 | catch (IOException e) {e.printStackTrace();} 39 | 40 | return null; 41 | } 42 | 43 | @Override 44 | public AbstractReader clone() 45 | { 46 | return new LineReader(); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/collection/triple/DoubleIntIntTriple.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.triple; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class DoubleIntIntTriple implements Serializable 25 | { 26 | private static final long serialVersionUID = -5353827334306132865L; 27 | 28 | public double d; 29 | public int i1; 30 | public int i2; 31 | 32 | public DoubleIntIntTriple(double d, int i1, int i2) 33 | { 34 | set(d, i1, i2); 35 | } 36 | 37 | public void set(double d, int i1, int i2) 38 | { 39 | this.d = d; 40 | this.i1 = i1; 41 | this.i2 = i2; 42 | } 43 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/component/mode/morph/AbstractMPAnalyzer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.component.mode.morph; 17 | 18 | import edu.emory.clir.clearnlp.component.AbstractComponent; 19 | import edu.emory.clir.clearnlp.dependency.DEPNode; 20 | import edu.emory.clir.clearnlp.dependency.DEPTree; 21 | 22 | /** 23 | * Default morphological analyzer. 24 | * @since 3.0.0 25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 26 | */ 27 | abstract public class AbstractMPAnalyzer extends AbstractComponent 28 | { 29 | @Override 30 | public void process(DEPTree tree) 31 | { 32 | for (DEPNode node : tree) 33 | analyze(node); 34 | } 35 | 36 | abstract public void analyze(DEPNode node); 37 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/collection/triple/BooleanIntIntTriple.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.triple; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class BooleanIntIntTriple implements Serializable 25 | { 26 | private static final long serialVersionUID = -5353827334306132865L; 27 | 28 | public boolean b; 29 | public int i1; 30 | public int i2; 31 | 32 | public BooleanIntIntTriple(boolean b, int i1, int i2) 33 | { 34 | set(b, i1, i2); 35 | } 36 | 37 | public void set(boolean b, int i1, int i2) 38 | { 39 | this.b = b; 40 | this.i1 = i1; 41 | this.i2 = i2; 42 | } 43 | } -------------------------------------------------------------------------------- /src/test/resources/nlp/configuration/configure.xml: -------------------------------------------------------------------------------- 1 | 2 | com/clearnlp/model/english/general 3 | english 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 0.4 19 | 2 20 | 1500 21 | true 22 | 23 | 24 | 25 | 26 | true 27 | 16 28 | true 29 | root 30 | 32 31 | 32 | 33 | -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/util/MathUtilsTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.util; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import org.junit.Test; 21 | 22 | import edu.emory.clir.clearnlp.util.MathUtils; 23 | 24 | /** 25 | * @since 3.0.0 26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 27 | */ 28 | public class MathUtilsTest 29 | { 30 | @Test 31 | public void testPow() 32 | { 33 | int i, j; 34 | 35 | for (j=-5; j<5; j++) 36 | { 37 | if (j == 0) continue; 38 | 39 | for (i=-5; i<5; i++) 40 | { 41 | assertEquals(Math.pow( 2, i), MathUtils.pow( 2, i), 0); 42 | assertEquals(Math.pow(-2, i), MathUtils.pow(-2, i), 0); 43 | } 44 | } 45 | } 46 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/feature/common/OrthographicType.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.feature.common; 17 | 18 | /** 19 | * @since 3.0.0 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public interface OrthographicType 23 | { 24 | String HYPERLINK = "0"; 25 | String ALL_UPPER = "1"; 26 | String ALL_LOWER = "2"; 27 | String ALL_DIGIT = "3"; 28 | String ALL_PUNCT = "4"; 29 | String ALL_DIGIT_OR_PUNCT = "5"; 30 | String HAS_DIGIT = "6"; 31 | String HAS_PERIOD = "7"; 32 | String HAS_HYPHEN = "8"; 33 | String HAS_OTHER_PUNCT = "9"; 34 | String NO_LOWER = "10"; 35 | String FST_UPPER = "11"; 36 | String UPPER_1 = "12"; 37 | String UPPER_2 = "13"; 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/ner/NERLib.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.ner; 17 | 18 | /** 19 | * @since 3.0.3 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public class NERLib 23 | { 24 | private NERLib() {} 25 | 26 | public static BILOU toBILOU(String tag) 27 | { 28 | return BILOU.valueOf(tag.substring(0,1)); 29 | } 30 | 31 | public static String toBILOUTag(BILOU bilou, String tag) 32 | { 33 | return bilou+"-"+tag; 34 | } 35 | 36 | public static String toNamedEntity(String tag) 37 | { 38 | return tag.substring(2); 39 | } 40 | 41 | public static String changeChunkType(BILOU newBilou, String tag) 42 | { 43 | return toBILOUTag(newBilou, toNamedEntity(tag)); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/util/CharUtilsTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.util; 17 | 18 | import static org.junit.Assert.assertFalse; 19 | import static org.junit.Assert.assertTrue; 20 | 21 | import org.junit.Test; 22 | 23 | import edu.emory.clir.clearnlp.util.StringUtils; 24 | 25 | /** 26 | * @since 3.0.0 27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 28 | */ 29 | public class CharUtilsTest 30 | { 31 | @Test 32 | public void testContainsOnlyDigits() 33 | { 34 | assertTrue (StringUtils.containsDigitOnly("12")); 35 | assertFalse(StringUtils.containsDigitOnly("a1")); 36 | assertFalse(StringUtils.containsDigitOnly("1b")); 37 | assertFalse(StringUtils.containsDigitOnly("1-2")); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/util/arc/DEPArc.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.util.arc; 17 | 18 | import edu.emory.clir.clearnlp.dependency.DEPNode; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class DEPArc extends AbstractArc 25 | { 26 | private static final long serialVersionUID = -9099516205158258095L; 27 | 28 | public DEPArc(DEPNode node, String label) 29 | { 30 | set(node, label); 31 | } 32 | 33 | @Override 34 | public String toString() 35 | { 36 | return n_node.getID() + DELIM + s_label; 37 | } 38 | 39 | @Override 40 | public int compareTo(AbstractArc arc) 41 | { 42 | return n_node.compareTo(arc.getNode()); 43 | } 44 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/collection/tree/PrefixNode.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.tree; 17 | 18 | import java.util.HashMap; 19 | 20 | /** 21 | * @since 3.0.3 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class PrefixNode,V> extends HashMap> 25 | { 26 | private static final long serialVersionUID = 1566684742873455351L; 27 | private V value; 28 | 29 | public PrefixNode() 30 | { 31 | value = null; 32 | } 33 | 34 | public V getValue() 35 | { 36 | return value; 37 | } 38 | 39 | public void setValue(V value) 40 | { 41 | this.value = value; 42 | } 43 | 44 | public boolean hasValue() 45 | { 46 | return value != null; 47 | } 48 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/collection/pair/ObjectCharPair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.pair; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class ObjectCharPair implements Serializable, Comparable> 25 | { 26 | private static final long serialVersionUID = -5228607179375724504L; 27 | 28 | public T o; 29 | public char c; 30 | 31 | public ObjectCharPair(T o, char c) 32 | { 33 | set(o, c); 34 | } 35 | 36 | public void set(T o, char c) 37 | { 38 | this.o = o; 39 | this.c = c; 40 | } 41 | 42 | @Override 43 | public int compareTo(ObjectCharPair p) 44 | { 45 | return c - p.c; 46 | } 47 | } -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/util/FileUtilsTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.util; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import org.junit.Test; 21 | 22 | import edu.emory.clir.clearnlp.util.FileUtils; 23 | 24 | /** 25 | * @since 3.0.0 26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 27 | */ 28 | public class FileUtilsTest 29 | { 30 | @Test 31 | public void replaceExtensionTest() 32 | { 33 | assertEquals("a.jpg", FileUtils.replaceExtension("a.txt", "jpg")); 34 | assertEquals(null , FileUtils.replaceExtension("a", "jpg")); 35 | 36 | assertEquals("a.jpg", FileUtils.replaceExtension("a.txt", "txt", "jpg")); 37 | assertEquals(null , FileUtils.replaceExtension("a.txt", "bmp", "jpg")); 38 | } 39 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/collection/triple/ObjectIntIntTriple.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.triple; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class ObjectIntIntTriple implements Serializable 25 | { 26 | private static final long serialVersionUID = -7014586350906455183L; 27 | 28 | public T o; 29 | public int i1; 30 | public int i2; 31 | 32 | public ObjectIntIntTriple() 33 | { 34 | set(null, 0, 0); 35 | } 36 | 37 | public ObjectIntIntTriple(T o, int i1, int i2) 38 | { 39 | set(o, i1, i2); 40 | } 41 | 42 | public void set(T o, int i1, int i2) 43 | { 44 | this.o = o; 45 | this.i1 = i1; 46 | this.i2 = i2; 47 | } 48 | } -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/dictionary/english/DTAbbreviationTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.dictionary.english; 17 | 18 | import static org.junit.Assert.assertFalse; 19 | import static org.junit.Assert.assertTrue; 20 | 21 | import org.junit.Test; 22 | 23 | import edu.emory.clir.clearnlp.dictionary.english.DTAbbreviation; 24 | 25 | /** 26 | * @since 3.0.0 27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 28 | */ 29 | public class DTAbbreviationTest 30 | { 31 | @Test 32 | public void test() 33 | { 34 | DTAbbreviation dt = new DTAbbreviation(); 35 | 36 | assertTrue(dt.isAbbreviationEndingWithPeriod("mr")); 37 | assertTrue(dt.isAbbreviationEndingWithPeriod("mrs")); 38 | 39 | assertFalse(dt.isAbbreviationEndingWithPeriod("e.g")); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/cluster/Cluster.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.cluster; 17 | 18 | import java.util.HashSet; 19 | import java.util.Set; 20 | 21 | /** 22 | * @since 3.1.2 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | public class Cluster 26 | { 27 | private Set point_set; 28 | 29 | public Cluster() 30 | { 31 | point_set = new HashSet<>(); 32 | } 33 | 34 | public void addPoint(SparseVector point) 35 | { 36 | point_set.add(point); 37 | } 38 | 39 | public Set getPointSet() 40 | { 41 | return point_set; 42 | } 43 | 44 | public int size() 45 | { 46 | return point_set.size(); 47 | } 48 | 49 | public void merge(Cluster cluster) 50 | { 51 | point_set.addAll(cluster.getPointSet()); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/util/CharTokenizerTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.util; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import java.util.Arrays; 21 | 22 | import org.junit.Test; 23 | 24 | import edu.emory.clir.clearnlp.util.CharTokenizer; 25 | 26 | /** 27 | * @since 3.0.0 28 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 29 | */ 30 | public class CharTokenizerTest 31 | { 32 | @Test 33 | public void test() 34 | { 35 | CharTokenizer t; 36 | String s; 37 | 38 | t = new CharTokenizer(','); 39 | s = "a,b,c"; 40 | assertEquals("[a, b, c]", Arrays.toString(t.tokenize(s))); 41 | 42 | t = new CharTokenizer(';'); 43 | s = ";abc;def;;ghi;"; 44 | assertEquals("[abc, def, ghi]", Arrays.toString(t.tokenize(s))); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/headrule/HeadRuleMapTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.headrule; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import org.junit.Test; 21 | 22 | import edu.emory.clir.clearnlp.conversion.headrule.HeadRuleMap; 23 | import edu.emory.clir.clearnlp.util.IOUtils; 24 | 25 | 26 | /** @author Jinho D. Choi ({@code jinho.choi@emory.edu}) */ 27 | public class HeadRuleMapTest 28 | { 29 | @Test 30 | public void testHeadRuleMap() 31 | { 32 | String filename = "src/main/resources/headrules/headrule_en_stanford.txt"; 33 | 34 | HeadRuleMap map = new HeadRuleMap(IOUtils.createFileInputStream(filename)); 35 | String str = map.toString(); 36 | 37 | assertEquals(str, new HeadRuleMap(IOUtils.createByteArrayInputStream(str)).toString()); 38 | } 39 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/util/arc/PBArc.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.util.arc; 17 | 18 | import edu.emory.clir.clearnlp.constituent.CTNode; 19 | import edu.emory.clir.clearnlp.lexicon.propbank.PBArgument; 20 | 21 | /** 22 | * @since 3.0.0 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | public class PBArc extends AbstractArc 26 | { 27 | private static final long serialVersionUID = 8603308004980285093L; 28 | 29 | public PBArc(CTNode node, String label) 30 | { 31 | set(node, label); 32 | } 33 | 34 | @Override 35 | public String toString() 36 | { 37 | return n_node.getTerminalID() + PBArgument.DELIM + s_label; 38 | } 39 | 40 | @Override 41 | public int compareTo(AbstractArc arc) 42 | { 43 | return n_node.compareTo(arc.getNode()); 44 | } 45 | } -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/util/arc/SRLArcTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.util.arc; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import org.junit.Test; 21 | 22 | import edu.emory.clir.clearnlp.dependency.DEPNode; 23 | import edu.emory.clir.clearnlp.util.arc.SRLArc; 24 | 25 | /** 26 | * @since 3.0.0 27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 28 | */ 29 | public class SRLArcTest 30 | { 31 | @Test 32 | public void test() 33 | { 34 | DEPNode node = new DEPNode(1, "A"); 35 | SRLArc arc = new SRLArc(node, "A0"); 36 | assertEquals("1:A0", arc.toString()); 37 | 38 | arc.setNumberedArgumentTag("PRD"); 39 | assertEquals("1:A0", arc.toString()); 40 | assertEquals("1:A0", arc.toString(false)); 41 | assertEquals("1:A0-PRD", arc.toString(true)); 42 | } 43 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/dictionary/AbstractDTTokenizer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.dictionary; 17 | 18 | import edu.emory.clir.clearnlp.util.CharUtils; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | abstract public class AbstractDTTokenizer 25 | { 26 | public String[] tokenize(String s) 27 | { 28 | char[] lcs = s.toCharArray(); 29 | String lower = CharUtils.toLowerCase(lcs) ? new String(lcs) : s; 30 | return tokenize(s, lower, lcs); 31 | } 32 | 33 | /** 34 | * @param original the original string. 35 | * @param lower the lowercase of the original string. 36 | * @param lcs the lowercase character array of the original string. 37 | */ 38 | abstract public String[] tokenize(String original, String lower, char[] lcs); 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/lexicon/wordnet/WNRelation.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.lexicon.wordnet; 17 | 18 | 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class WNRelation 25 | { 26 | private WNSynset w_synset; 27 | private short n_source; 28 | private short n_target; 29 | 30 | public WNSynset getWNSynset() 31 | { 32 | return w_synset; 33 | } 34 | 35 | public short getSource() 36 | { 37 | return n_source; 38 | } 39 | 40 | public short getTarget() 41 | { 42 | return n_target; 43 | } 44 | 45 | public void setWNSynset(WNSynset synset) 46 | { 47 | w_synset = synset; 48 | } 49 | 50 | public void setSource(short source) 51 | { 52 | n_source = source; 53 | } 54 | 55 | public void setTarget(short target) 56 | { 57 | n_target = target; 58 | } 59 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/classification/configuration/AbstractTrainerConfiguration.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.classification.configuration; 17 | 18 | /** 19 | * @since 3.0.0 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public class AbstractTrainerConfiguration 23 | { 24 | private byte i_vectorType; 25 | private boolean b_binary; 26 | 27 | public AbstractTrainerConfiguration(byte vectorType, boolean binary) 28 | { 29 | setVectorType(vectorType); 30 | setBinary(binary); 31 | } 32 | 33 | public byte getVectorType() 34 | { 35 | return i_vectorType; 36 | } 37 | 38 | public boolean isBinary() 39 | { 40 | return b_binary; 41 | } 42 | 43 | public void setVectorType(byte vectorType) 44 | { 45 | i_vectorType = vectorType; 46 | } 47 | 48 | public void setBinary(boolean binary) 49 | { 50 | b_binary = binary; 51 | } 52 | } -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/dictionary/universal/DTCompoundTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.dictionary.universal; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.assertTrue; 20 | 21 | import java.util.Arrays; 22 | 23 | import org.junit.Test; 24 | 25 | import edu.emory.clir.clearnlp.dictionary.universal.DTCompound; 26 | import edu.emory.clir.clearnlp.util.lang.TLanguage; 27 | 28 | /** 29 | * @since 3.0.0 30 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 31 | */ 32 | public class DTCompoundTest 33 | { 34 | @Test 35 | public void test() 36 | { 37 | DTCompound dt = new DTCompound(TLanguage.ENGLISH); 38 | 39 | assertEquals("[I, 'mmm]" , Arrays.toString(dt.tokenize("I'mmm"))); 40 | assertEquals("[wha, d, ya]", Arrays.toString(dt.tokenize("whadya"))); 41 | 42 | assertTrue(dt.tokenize("I'm") == null); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/cluster/AbstractCluster.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.cluster; 17 | 18 | import java.util.ArrayList; 19 | import java.util.List; 20 | 21 | /** 22 | * @since 3.1.2 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | public abstract class AbstractCluster 26 | { 27 | protected List s_points; 28 | 29 | public AbstractCluster() 30 | { 31 | s_points = new ArrayList<>(); 32 | } 33 | 34 | public void addPoint(SparseVector point) 35 | { 36 | s_points.add(point); 37 | } 38 | 39 | public void setPoints(List points) 40 | { 41 | s_points = points; 42 | } 43 | 44 | public SparseVector getPoint(int index) 45 | { 46 | return s_points.get(index); 47 | } 48 | 49 | public List getPoints() 50 | { 51 | return s_points; 52 | } 53 | 54 | public abstract List cluster(); 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/dictionary/PathEnglishMPAnalyzer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.dictionary; 17 | 18 | /** 19 | * @since 3.0.0 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public interface PathEnglishMPAnalyzer 23 | { 24 | String ROOT = "edu/emory/clir/clearnlp/dictionary/morphology/english/"; 25 | 26 | String INFLECTION_SUFFIX = ROOT + "inflection_suffix.xml"; 27 | String ABBREVIATOIN_RULE = ROOT + "abbreviation.rule"; 28 | String CARDINAL_BASE = ROOT + "cardinal.base"; 29 | String ORDINAL_BASE = ROOT + "ordinal.base"; 30 | 31 | String VERB = "verb"; 32 | String NOUN = "noun"; 33 | String ADJECTIVE = "adjective"; 34 | String ADVERB = "adverb"; 35 | String EXT_BASE = ".base"; 36 | String EXT_EXCEPTION = ".exc"; 37 | 38 | String DERIVATION_SUFFIX_N2V = ROOT + "derivation_suffix_n2v.xml"; 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/reader/RawReader.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.reader; 17 | 18 | import java.io.IOException; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class RawReader extends AbstractReader 25 | { 26 | public RawReader() 27 | { 28 | super(TReader.RAW); 29 | } 30 | 31 | @Override 32 | public String next() 33 | { 34 | try 35 | { 36 | StringBuilder build = new StringBuilder(); 37 | char[] buffer = new char[1024 * 4]; 38 | int n = 0; 39 | 40 | while ((n = b_reader.read(buffer)) != -1) 41 | build.append(buffer, 0, n); 42 | 43 | return build.toString(); 44 | } 45 | catch (IOException e) {e.printStackTrace();} 46 | 47 | return null; 48 | } 49 | 50 | @Override 51 | public AbstractReader clone() 52 | { 53 | return new RawReader(); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/component/configuration/DecodeConfiguration.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.component.configuration; 17 | 18 | import java.io.InputStream; 19 | 20 | import org.w3c.dom.Element; 21 | 22 | import edu.emory.clir.clearnlp.component.utils.NLPMode; 23 | import edu.emory.clir.clearnlp.util.XmlUtils; 24 | 25 | /** 26 | * @since 3.0.0 27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 28 | */ 29 | public class DecodeConfiguration extends AbstractConfiguration 30 | { 31 | private final Element e_model; 32 | 33 | public DecodeConfiguration(InputStream in) 34 | { 35 | super(in); 36 | e_model = getFirstElement(E_MODEL); 37 | } 38 | 39 | public String getModelPath(NLPMode mode) 40 | { 41 | Element eMode = XmlUtils.getFirstElementByTagName(e_model, mode.toString()); 42 | return (eMode != null) ? XmlUtils.getTrimmedTextContent(eMode) : null; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/experiment/AbstractArgsReader.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.experiment; 17 | 18 | import org.kohsuke.args4j.CmdLineException; 19 | import org.kohsuke.args4j.CmdLineParser; 20 | 21 | /** 22 | * @since 3.0.0 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | abstract public class AbstractArgsReader 26 | { 27 | @SuppressWarnings("deprecation") 28 | public AbstractArgsReader(String[] args, Object obj) 29 | { 30 | CmdLineParser cmd = new CmdLineParser(obj); 31 | 32 | try 33 | { 34 | cmd.parseArgument(args); 35 | String msg = getErrorMessage(); 36 | if (msg != null) throw new CmdLineException(cmd, msg); 37 | } 38 | catch (CmdLineException e) 39 | { 40 | System.err.println(e.getMessage()); 41 | cmd.printUsage(System.err); 42 | System.exit(1); 43 | } 44 | } 45 | 46 | abstract protected String getErrorMessage(); 47 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/util/io/FileExtensionFilter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.util.io; 17 | 18 | import java.io.File; 19 | import java.io.FilenameFilter; 20 | 21 | import edu.emory.clir.clearnlp.util.StringUtils; 22 | import edu.emory.clir.clearnlp.util.constant.StringConst; 23 | 24 | /** 25 | * @since 3.0.0 26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 27 | */ 28 | public class FileExtensionFilter implements FilenameFilter 29 | { 30 | private String s_extension; 31 | 32 | /** @param extension the extension of files to keep (e.g., {@code "txt"}). */ 33 | public FileExtensionFilter(String extension) 34 | { 35 | s_extension = StringUtils.toLowerCase(extension); 36 | } 37 | 38 | @Override 39 | public boolean accept(File dir, String name) 40 | { 41 | return s_extension.equals(StringConst.ASTERISK) || StringUtils.toLowerCase(name).endsWith(s_extension); 42 | } 43 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/util/BinUtils.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.util; 17 | 18 | import org.apache.log4j.Logger; 19 | import org.kohsuke.args4j.CmdLineException; 20 | import org.kohsuke.args4j.CmdLineParser; 21 | 22 | 23 | /** 24 | * @since 3.0.0 25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 26 | */ 27 | public class BinUtils 28 | { 29 | private BinUtils() {} 30 | 31 | public static final Logger LOG = Logger.getLogger(BinUtils.class); 32 | 33 | /** Initializes arguments using args4j. */ 34 | static public void initArgs(String[] args, Object bean) 35 | { 36 | CmdLineParser cmd = new CmdLineParser(bean); 37 | 38 | try 39 | { 40 | cmd.parseArgument(args); 41 | } 42 | catch (CmdLineException e) 43 | { 44 | System.err.println(e.getMessage()); 45 | cmd.printUsage(System.err); 46 | System.exit(1); 47 | } 48 | catch (Exception e) {e.printStackTrace();} 49 | } 50 | } -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/component/configuration/DEPConfigurationTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.component.configuration; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.assertTrue; 20 | 21 | import org.junit.Test; 22 | 23 | import edu.emory.clir.clearnlp.component.mode.dep.DEPConfiguration; 24 | import edu.emory.clir.clearnlp.util.IOUtils; 25 | 26 | /** 27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 28 | */ 29 | public class DEPConfigurationTest 30 | { 31 | @Test 32 | // @Ignore 33 | public void test() 34 | { 35 | String filename = "src/test/resources/nlp/configuration/configure.xml"; 36 | DEPConfiguration config = new DEPConfiguration(IOUtils.createFileInputStream(filename)); 37 | 38 | assertEquals(config.getBeamSize(), 32); 39 | assertEquals(config.getRootLabel(), "root"); 40 | assertTrue(config.evaluatePunctuation()); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/collection/pair/ObjectIntPair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.pair; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class ObjectIntPair implements Serializable, Comparable> 25 | { 26 | private static final long serialVersionUID = -5228607179375724504L; 27 | 28 | public T o; 29 | public int i; 30 | 31 | public ObjectIntPair() 32 | { 33 | set(null, 0); 34 | } 35 | 36 | public ObjectIntPair(T o, int i) 37 | { 38 | set(o, i); 39 | } 40 | 41 | public void set(T o, int i) 42 | { 43 | this.o = o; 44 | this.i = i; 45 | } 46 | 47 | @Override 48 | public int compareTo(ObjectIntPair p) 49 | { 50 | return i - p.i; 51 | } 52 | 53 | @Override 54 | public String toString() 55 | { 56 | return "("+o.toString()+","+i+")"; 57 | } 58 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/lexicon/verbnet/VNXml.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.lexicon.verbnet; 17 | 18 | /** 19 | * @since 3.0.0 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public interface VNXml 23 | { 24 | String E_VNSUBCLASS = "VNSUBCLASS"; 25 | String E_SEMANTICS = "SEMANTICS"; 26 | String E_SYNRESTR = "SYNRESTR"; 27 | String E_SYNTAX = "SYNTAX"; 28 | String E_FRAMES = "FRAMES"; 29 | String E_FRAME = "FRAME"; 30 | String E_PRED = "PRED"; 31 | String E_ARG = "ARG"; 32 | 33 | String A_ID = "ID"; 34 | String A_TYPE = "type"; 35 | String A_VALUE = "value"; 36 | String A_VALUE_CAP = "Value"; 37 | String A_BOOL = "bool"; 38 | 39 | String ARG_TYPE_EVENT = "Event"; 40 | String ARG_TYPE_THEM_ROLE = "ThemRole"; 41 | String ARG_TYPE_VERB_SPECIFIC = "VerbSpecific"; 42 | String ARG_TYPE_CONSTANT = "Constant"; 43 | 44 | String SYNRESTR_TYPE_PLURAL = "plural"; 45 | } -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/collection/stack/StackTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.stack; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import org.junit.Test; 21 | 22 | import edu.emory.clir.clearnlp.collection.stack.Stack; 23 | 24 | 25 | /** 26 | * @since 3.0.0 27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 28 | */ 29 | public class StackTest 30 | { 31 | @Test 32 | public void test() 33 | { 34 | Stack stack = new Stack(3); 35 | stack.push("1"); 36 | stack.push("2"); 37 | stack.push("3"); 38 | 39 | assertEquals("3", stack.peek()); 40 | assertEquals("2", stack.peek(1)); 41 | 42 | assertEquals("3", stack.pop()); 43 | Stack clone = new Stack(stack); 44 | 45 | assertEquals("2", stack.pop()); 46 | assertEquals("1", stack.pop()); 47 | 48 | assertEquals("2", clone.pop()); 49 | assertEquals("1", clone.pop()); 50 | } 51 | } -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/dictionary/universal/DTUnitTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.dictionary.universal; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.assertTrue; 20 | 21 | import java.util.Arrays; 22 | 23 | import org.junit.Test; 24 | 25 | import edu.emory.clir.clearnlp.dictionary.universal.DTUnit; 26 | 27 | /** 28 | * @since 3.0.0 29 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 30 | */ 31 | public class DTUnitTest 32 | { 33 | @Test 34 | public void test() 35 | { 36 | DTUnit dt = new DTUnit(); 37 | 38 | assertEquals("[1, mg]", Arrays.toString(dt.tokenize("1mg"))); 39 | assertEquals("[1, cm]", Arrays.toString(dt.tokenize("1cm"))); 40 | 41 | assertEquals("[10, MG]", Arrays.toString(dt.tokenize("10MG"))); 42 | assertEquals("[10, CM]", Arrays.toString(dt.tokenize("10CM"))); 43 | 44 | assertTrue(dt.tokenize("1ma") == null); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/bin/PrintTree.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.bin; 17 | 18 | import edu.emory.clir.clearnlp.constituent.CTReader; 19 | import edu.emory.clir.clearnlp.constituent.CTTree; 20 | import edu.emory.clir.clearnlp.util.IOUtils; 21 | import edu.emory.clir.clearnlp.util.constant.StringConst; 22 | 23 | /** 24 | * @since 3.0.0 25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 26 | */ 27 | public class PrintTree 28 | { 29 | static public void main(String[] args) 30 | { 31 | String treeDir = args[0]; 32 | String treeFile = args[1]; 33 | int treeId = Integer.parseInt(args[2]); 34 | 35 | CTReader reader = new CTReader(IOUtils.createFileInputStream(treeDir+StringConst.FW_SLASH+treeFile)); 36 | CTTree tree = reader.nextTree(treeId); 37 | reader.close(); 38 | 39 | // System.out.println(tree.toString()); 40 | System.out.println(tree.toString(true,true,StringConst.NEW_LINE)); 41 | } 42 | } -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/collection/ngram/BigramTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.ngram; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import org.junit.Test; 21 | 22 | /** 23 | * @since 3.0.0 24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 25 | */ 26 | public class BigramTest 27 | { 28 | @Test 29 | public void test() 30 | { 31 | Bigram map = new Bigram<>(); 32 | 33 | map.add("A", "a1"); 34 | map.add("A", "a2"); 35 | map.add("A", "a1"); 36 | map.add("A", "a3"); 37 | 38 | map.add("B", "b1"); 39 | map.add("B", "b2", 2); 40 | map.add("B", "b3"); 41 | 42 | assertEquals("[A, B]", map.getBigramSet().toString()); 43 | 44 | assertEquals("[(a3,1), (a1,2), (a2,1)]", map.toList("A", 0).toString()); 45 | assertEquals("[(b1,1), (b2,2), (b3,1)]", map.toList("B", 0).toString()); 46 | assertEquals("[(a1,2)]", map.toList("A", 1).toString()); 47 | } 48 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/cluster/Term.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.cluster; 17 | 18 | /** 19 | * @since 3.1.2 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public class Term implements Comparable 23 | { 24 | private int id; 25 | private float score; 26 | 27 | public Term(int i1, int i2) 28 | { 29 | set(i1, i2); 30 | } 31 | 32 | public void set(int id, float score) 33 | { 34 | setID(id); 35 | setScore(score); 36 | } 37 | 38 | public int getID() 39 | { 40 | return id; 41 | } 42 | 43 | public void setID(int id) 44 | { 45 | this.id = id; 46 | } 47 | 48 | public float getScore() 49 | { 50 | return score; 51 | } 52 | 53 | public void setScore(float score) 54 | { 55 | this.score = score; 56 | } 57 | 58 | public void addScore(float score) 59 | { 60 | this.score += score; 61 | } 62 | 63 | @Override 64 | public int compareTo(Term o) 65 | { 66 | return id - o.id; 67 | } 68 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/collection/pair/Pair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.pair; 17 | 18 | import java.io.Serializable; 19 | 20 | 21 | /** 22 | * @since 3.0.0 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | public class Pair implements Serializable 26 | { 27 | private static final long serialVersionUID = 8447270640444415417L; 28 | 29 | public T1 o1; 30 | public T2 o2; 31 | 32 | public Pair() 33 | { 34 | set(null, null); 35 | } 36 | 37 | public Pair(T1 o1, T2 o2) 38 | { 39 | set(o1, o2); 40 | } 41 | 42 | public void set(T1 o1, T2 o2) 43 | { 44 | this.o1 = o1; 45 | this.o2 = o2; 46 | } 47 | 48 | public String toString() 49 | { 50 | StringBuilder build = new StringBuilder(); 51 | 52 | build.append("("); 53 | build.append(o1.toString()); 54 | build.append(","); 55 | build.append(o2.toString()); 56 | build.append(")"); 57 | 58 | return build.toString(); 59 | } 60 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/dictionary/PathTokenizer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.dictionary; 17 | 18 | /** 19 | * @since 3.0.0 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public interface PathTokenizer 23 | { 24 | String ROOT = "edu/emory/clir/clearnlp/dictionary/tokenizer/"; 25 | String UNIVERSAL = ROOT + "universal/"; 26 | String ENGLISH = ROOT + "english/"; 27 | 28 | // Universal 29 | String CURRENCY_DOLLAR = UNIVERSAL + "currency-dollar.txt"; 30 | String CURRENCY = UNIVERSAL + "currency.txt"; 31 | String EMOTICONS = UNIVERSAL + "emoticons.txt"; 32 | String HTML_TAGS = UNIVERSAL + "html-tags.txt"; 33 | String UNITS = UNIVERSAL + "units.txt"; 34 | 35 | // English 36 | String EN_ABBREVIATION_PERIOD = ENGLISH + "abbreviation-period.txt"; 37 | String EN_HYPHEN_PREFIX = ENGLISH + "hyphen-prefix.txt"; 38 | String EN_HYPHEN_SUFFIX = ENGLISH + "hyphen-suffix.txt"; 39 | String EN_COMPOUNDS = ENGLISH + "compounds.txt"; 40 | } 41 | -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/dictionary/universal/DTEmoticonTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.dictionary.universal; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.assertTrue; 20 | 21 | import java.util.Arrays; 22 | 23 | import org.junit.Test; 24 | 25 | import edu.emory.clir.clearnlp.dictionary.universal.DTEmoticon; 26 | 27 | /** 28 | * @since 3.0.0 29 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 30 | */ 31 | public class DTEmoticonTest 32 | { 33 | @Test 34 | public void test() 35 | { 36 | DTEmoticon dt = new DTEmoticon(); 37 | String s; 38 | 39 | s = ":"; 40 | assertTrue(dt.getEmoticonRange(s) == null); 41 | 42 | s = ":-)"; 43 | assertEquals("[0, 3]", Arrays.toString(dt.getEmoticonRange(s))); 44 | 45 | s = "Hi:-)"; 46 | assertEquals("[2, 5]", Arrays.toString(dt.getEmoticonRange(s))); 47 | 48 | s = ":-).."; 49 | assertEquals("[0, 3]", Arrays.toString(dt.getEmoticonRange(s))); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/collection/triple/Triple.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.triple; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class Triple implements Serializable 25 | { 26 | private static final long serialVersionUID = 2261656496863083672L; 27 | public T1 o1; 28 | public T2 o2; 29 | public T3 o3; 30 | 31 | public Triple(T1 o1, T2 o2, T3 o3) 32 | { 33 | set(o1, o2, o3); 34 | } 35 | 36 | public void set(T1 o1, T2 o2, T3 o3) 37 | { 38 | this.o1 = o1; 39 | this.o2 = o2; 40 | this.o3 = o3; 41 | } 42 | 43 | public String toString() 44 | { 45 | StringBuilder build = new StringBuilder(); 46 | 47 | build.append("("); 48 | build.append(o1.toString()); 49 | build.append(","); 50 | build.append(o2.toString()); 51 | build.append(","); 52 | build.append(o3.toString()); 53 | build.append(")"); 54 | 55 | return build.toString(); 56 | } 57 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/lexicon/wikipedia/WikiPrint.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.lexicon.wikipedia; 17 | 18 | import java.io.ObjectInputStream; 19 | import java.util.zip.ZipFile; 20 | 21 | import edu.emory.clir.clearnlp.util.IOUtils; 22 | import edu.emory.clir.clearnlp.util.Joiner; 23 | 24 | /** 25 | * @since 3.0.3 26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 27 | */ 28 | public class WikiPrint 29 | { 30 | static public void main(String[] args) 31 | { 32 | WikiIndexMap map = new WikiIndexMap(); 33 | final String wikiFile = args[0]; 34 | final String indexFile = args[1]; 35 | final String title = Joiner.join(args, " ", 2, args.length); 36 | 37 | try 38 | { 39 | ObjectInputStream in = new ObjectInputStream(IOUtils.createXZBufferedInputStream(indexFile)); 40 | ZipFile zip = new ZipFile(wikiFile); 41 | map = (WikiIndexMap)in.readObject(); 42 | System.out.println(map.getPage(zip, title)); 43 | } 44 | catch (Exception e) {e.printStackTrace();} 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/lexicon/wikipedia/WikiParagraph.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.lexicon.wikipedia; 17 | 18 | import java.io.Serializable; 19 | import java.util.ArrayList; 20 | import java.util.List; 21 | 22 | import edu.emory.clir.clearnlp.util.Joiner; 23 | import edu.emory.clir.clearnlp.util.constant.StringConst; 24 | 25 | /** 26 | * @since 3.0.0. 27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 28 | */ 29 | public class WikiParagraph implements Serializable 30 | { 31 | private static final long serialVersionUID = 7011678413565546215L; 32 | private List l_sentences; 33 | 34 | public WikiParagraph() 35 | { 36 | l_sentences = new ArrayList<>(); 37 | } 38 | 39 | public List getSentences() 40 | { 41 | return l_sentences; 42 | } 43 | 44 | public void addSentence(String sentence) 45 | { 46 | l_sentences.add(sentence); 47 | } 48 | 49 | @Override 50 | public String toString() 51 | { 52 | return Joiner.join(l_sentences, StringConst.NEW_LINE); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/dependency/DEPFeatTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.dependency; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import org.junit.Test; 21 | 22 | import edu.emory.clir.clearnlp.dependency.DEPFeat; 23 | import edu.emory.clir.clearnlp.reader.TSVReader; 24 | 25 | /** 26 | * @since 3.0.0 27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 28 | */ 29 | public class DEPFeatTest 30 | { 31 | @Test 32 | public void testDEPFeat() 33 | { 34 | DEPFeat feat = new DEPFeat(); 35 | assertEquals(TSVReader.BLANK, feat.toString()); 36 | 37 | feat = new DEPFeat(TSVReader.BLANK); 38 | assertEquals(TSVReader.BLANK, feat.toString()); 39 | 40 | feat.add("lst=choi|fst=jinho"); 41 | assertEquals("fst=jinho|lst=choi", feat.toString()); 42 | 43 | assertEquals("choi" , feat.get("lst")); 44 | assertEquals("jinho", feat.get("fst")); 45 | assertEquals(null , feat.get("mid")); 46 | 47 | feat.add(TSVReader.BLANK); 48 | assertEquals("fst=jinho|lst=choi", feat.toString()); 49 | } 50 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/feature/type/RelationType.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.feature.type; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * The Enum DEPRelationType. 22 | * 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | * @since 3.0.0 25 | */ 26 | public enum RelationType implements Serializable 27 | { 28 | /** The head. */ 29 | h, 30 | /** The left-most dependent. */ 31 | lmd, 32 | /** The right-most dependent. */ 33 | rmd, 34 | /** The left-nearest dependent. */ 35 | lnd, 36 | /** The right-nearest dependent. */ 37 | rnd, 38 | /** The left-nearest sibling. */ 39 | lns, 40 | /** The right-nearest sibling. */ 41 | rns, 42 | 43 | /** The grand head. */ 44 | h2, 45 | /** The 2nd left-most dependent. */ 46 | lmd2, 47 | /** The 2nd right-most dependent. */ 48 | rmd2, 49 | /** The 2nd left-nearest dependent. */ 50 | lnd2, 51 | /** The 2nd right-nearest dependent. */ 52 | rnd2, 53 | /** The 2nd left-nearest sibling. */ 54 | lns2, 55 | /** The 2nd right-nearest sibling. */ 56 | rns2; 57 | } -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/propbank/PBLocationTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.propbank; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.assertFalse; 20 | import static org.junit.Assert.assertTrue; 21 | 22 | import org.junit.Test; 23 | 24 | import edu.emory.clir.clearnlp.lexicon.propbank.PBLocation; 25 | 26 | /** 27 | * @since 3.0.0 28 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 29 | */ 30 | public class PBLocationTest 31 | { 32 | @Test 33 | public void test() 34 | { 35 | PBLocation loc1 = new PBLocation(0, 1); 36 | PBLocation loc2 = new PBLocation(0, 1, "*"); 37 | 38 | assertEquals( "0:1", loc1.toString()); 39 | assertEquals("*0:1", loc2.toString()); 40 | assertTrue(loc1.matches(loc2.getTerminalID(), loc2.getHeight())); 41 | assertFalse(loc1.equals(loc2)); 42 | 43 | loc1.set(0, 2); 44 | assertFalse(loc1.matches(loc2.getTerminalID(), loc2.getHeight())); 45 | 46 | loc2 = new PBLocation("0:3", ","); 47 | assertEquals(",0:3", loc2.toString()); 48 | } 49 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/collection/set/IntHashSet.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.set; 17 | 18 | import java.io.IOException; 19 | import java.io.ObjectInputStream; 20 | import java.io.ObjectOutputStream; 21 | import java.io.Serializable; 22 | 23 | /** 24 | * @since 3.0.0 25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 26 | */ 27 | public class IntHashSet extends com.carrotsearch.hppc.IntHashSet implements Serializable 28 | { 29 | private static final long serialVersionUID = 8220093021280571821L; 30 | 31 | public IntHashSet() 32 | { 33 | super(); 34 | } 35 | 36 | public IntHashSet(int initialCapacity) 37 | { 38 | super(initialCapacity); 39 | } 40 | 41 | private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException 42 | { 43 | addAll((int[])in.readObject()); 44 | } 45 | 46 | private void writeObject(ObjectOutputStream o) throws IOException 47 | { 48 | o.writeObject(toArray()); 49 | } 50 | 51 | public void addAll(IntHashSet set) 52 | { 53 | super.addAll(set); 54 | } 55 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/dictionary/english/DTAbbreviation.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.dictionary.english; 17 | 18 | import java.io.InputStream; 19 | import java.util.Set; 20 | 21 | import edu.emory.clir.clearnlp.dictionary.PathTokenizer; 22 | import edu.emory.clir.clearnlp.util.DSUtils; 23 | import edu.emory.clir.clearnlp.util.IOUtils; 24 | 25 | /** 26 | * @since 3.0.0 27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 28 | */ 29 | public class DTAbbreviation 30 | { 31 | private Set s_period; 32 | 33 | public DTAbbreviation() 34 | { 35 | init(IOUtils.getInputStreamsFromClasspath(PathTokenizer.EN_ABBREVIATION_PERIOD)); 36 | } 37 | 38 | public DTAbbreviation(InputStream abbreviationPeriod) 39 | { 40 | init(abbreviationPeriod); 41 | } 42 | 43 | public void init(InputStream abbreviationPeriod) 44 | { 45 | s_period = DSUtils.createStringHashSet(abbreviationPeriod, true, true); 46 | } 47 | 48 | public boolean isAbbreviationEndingWithPeriod(String lower) 49 | { 50 | return s_period.contains(lower); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/tokenization/english/ApostropheTokenizerTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.tokenization.english; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.assertTrue; 20 | 21 | import java.util.Arrays; 22 | 23 | import org.junit.Test; 24 | 25 | import edu.emory.clir.clearnlp.tokenization.english.ApostropheEnglishTokenizer; 26 | 27 | /** 28 | * @since 3.0.0 29 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 30 | */ 31 | public class ApostropheTokenizerTest 32 | { 33 | @Test 34 | public void test() 35 | { 36 | ApostropheEnglishTokenizer dt = new ApostropheEnglishTokenizer(); 37 | 38 | assertEquals("[he, 's]" , Arrays.toString(dt.tokenize("he's"))); 39 | assertEquals("[he, 'S]" , Arrays.toString(dt.tokenize("he'S"))); 40 | assertEquals("[do, n't]", Arrays.toString(dt.tokenize("don't"))); 41 | assertEquals("[do, 'nt]", Arrays.toString(dt.tokenize("do'nt"))); 42 | 43 | assertTrue(dt.tokenize("he'dd") == null); 44 | assertTrue(dt.tokenize("dont") == null); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/vector/VectorSpaceModelTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.vector; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import java.util.List; 21 | import java.util.Set; 22 | 23 | import org.junit.Test; 24 | 25 | import edu.emory.clir.clearnlp.collection.map.ObjectIntHashMap; 26 | import edu.emory.clir.clearnlp.util.DSUtils; 27 | 28 | /** 29 | * @since 3.0.3 30 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 31 | */ 32 | public class VectorSpaceModelTest 33 | { 34 | @Test 35 | public void test() 36 | { 37 | List terms = DSUtils.toArrayList("A","B","C","D","E"); 38 | Set stopWords = DSUtils.toHashSet("B","D"); 39 | 40 | ObjectIntHashMap map = VectorSpaceModel.getBagOfWords(terms, stopWords, 3); 41 | 42 | assertEquals(6, map.size()); 43 | assertEquals(1, map.get("A")); 44 | assertEquals(1, map.get("C")); 45 | assertEquals(1, map.get("E")); 46 | assertEquals(1, map.get("A_C")); 47 | assertEquals(1, map.get("C_E")); 48 | assertEquals(1, map.get("A_C_E")); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/resources/features/feature_en_pos.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /src/main/resources/samples/clearnlp.txt.cnlp: -------------------------------------------------------------------------------- 1 | 1 The the DT _ 3 det 2 | 2 ClearNLP clearnlp NNP p2=JJ 3 nn 3 | 3 project project NN _ 4 nsubj 4 | 4 provides provide VBZ _ 0 root 5 | 5 software software NN _ 4 dobj 6 | 6 and and CC _ 5 cc 7 | 7 resources resource NNS _ 5 conj 8 | 8 for for IN _ 5 prep 9 | 9 natural natural JJ _ 10 amod 10 | 10 language language NN _ 11 nn 11 | 11 processing processing NN _ 8 pobj 12 | 12 . . . _ 4 punct 13 | 14 | 1 It it PRP _ 3 nsubjpass 15 | 2 is be VBZ _ 3 auxpass 16 | 3 developed develop VBN _ 0 root 17 | 4 by by IN _ 3 agent 18 | 5 the the DT _ 6 det 19 | 6 Center center NNP p2=NNPS 4 pobj 20 | 7 for for IN _ 6 prep 21 | 8 Language language NNP _ 7 pobj 22 | 9 and and CC _ 8 cc 23 | 10 Information information NNP _ 11 nn 24 | 11 Research research NNP _ 8 conj 25 | 12 ( ( -LRB- _ 13 punct 26 | 13 CLIR clir NNP _ 11 appos 27 | 14 ) ) -RRB- _ 13 punct 28 | 15 at at IN _ 6 prep 29 | 16 Emory emory NNP _ 17 nn 30 | 17 University university NNP _ 15 pobj 31 | 18 . . . _ 3 punct 32 | 33 | 1 Please please UH _ 2 intj 34 | 2 join join VB _ 0 root 35 | 3 our our PRP$ _ 5 poss 36 | 4 discussion discussion NN _ 5 nn 37 | 5 group group NN _ 2 dobj 38 | 6 if if IN _ 8 mark 39 | 7 you you PRP _ 8 nsubj 40 | 8 want want VBP _ 2 advcl 41 | 9 to to TO _ 10 aux 42 | 10 get get VB _ 8 xcomp 43 | 11 notifications notification NNS _ 10 dobj 44 | 12 about about IN _ 11 prep 45 | 13 new new JJ _ 14 amod 46 | 14 updates update NNS _ 12 pobj 47 | 15 or or CC _ 14 cc 48 | 16 post post NN _ 17 nn 49 | 17 issues issue NNS _ 14 conj 50 | 18 , , , _ 17 punct 51 | 19 suggestions suggestion NNS _ 17 conj 52 | 20 , , , _ 19 punct 53 | 21 questions question NNS _ 19 conj 54 | 22 , , , _ 21 punct 55 | 23 etc etc FW _ 21 conj 56 | 24 . . . _ 2 punct 57 | 58 | -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/component/configuration/POSConfigurationTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.component.configuration; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import jdk.nashorn.internal.ir.annotations.Ignore; 20 | 21 | import org.junit.Test; 22 | 23 | import edu.emory.clir.clearnlp.component.mode.pos.POSConfiguration; 24 | import edu.emory.clir.clearnlp.util.IOUtils; 25 | import edu.emory.clir.clearnlp.util.lang.TLanguage; 26 | 27 | /** 28 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 29 | */ 30 | public class POSConfigurationTest 31 | { 32 | @Test 33 | @Ignore 34 | public void test() 35 | { 36 | String filename = "src/test/resources/nlp/configuration/configure.xml"; 37 | POSConfiguration config = new POSConfiguration(IOUtils.createFileInputStream(filename)); 38 | 39 | assertEquals(TLanguage.ENGLISH, config.getLanguage()); 40 | assertEquals(0.4 , config.getAmbiguityClassThreshold(), 1e-15); 41 | assertEquals(2 , config.getDocumentFrequencyCutoff()); 42 | assertEquals(1500, config.getDocumentSize()); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/collection/pair/ObjectDoublePair.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.pair; 17 | 18 | import java.io.Serializable; 19 | 20 | import edu.emory.clir.clearnlp.util.MathUtils; 21 | 22 | /** 23 | * @since 3.0.0 24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 25 | */ 26 | public class ObjectDoublePair implements Serializable, Comparable> 27 | { 28 | private static final long serialVersionUID = -5228607179375724504L; 29 | 30 | public T o; 31 | public double d; 32 | 33 | public ObjectDoublePair(T o, double d) 34 | { 35 | set(o, d); 36 | } 37 | 38 | public void set(T o, double d) 39 | { 40 | this.o = o; 41 | this.d = d; 42 | } 43 | 44 | public T getObject() 45 | { 46 | return o; 47 | } 48 | 49 | public double getDouble() 50 | { 51 | return d; 52 | } 53 | 54 | @Override 55 | public int compareTo(ObjectDoublePair p) 56 | { 57 | return MathUtils.signum(d - p.d); 58 | } 59 | 60 | @Override 61 | public String toString() 62 | { 63 | return "("+o.toString()+","+d+")"; 64 | } 65 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/collection/set/CharHashSet.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.set; 17 | 18 | import java.io.IOException; 19 | import java.io.ObjectInputStream; 20 | import java.io.ObjectOutputStream; 21 | import java.io.Serializable; 22 | 23 | /** 24 | * @since 3.0.0 25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 26 | */ 27 | public class CharHashSet extends com.carrotsearch.hppc.CharHashSet implements Serializable 28 | { 29 | private static final long serialVersionUID = -3796053685010557911L; 30 | 31 | public CharHashSet() 32 | { 33 | super(); 34 | } 35 | 36 | public CharHashSet(int initialCapacity) 37 | { 38 | super(initialCapacity); 39 | } 40 | 41 | public CharHashSet(char... characters) 42 | { 43 | for (char c : characters) 44 | add(c); 45 | } 46 | 47 | private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException 48 | { 49 | addAll((char[])in.readObject()); 50 | } 51 | 52 | private void writeObject(ObjectOutputStream o) throws IOException 53 | { 54 | o.writeObject(toArray()); 55 | } 56 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/component/mode/srl/SRLEval.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.component.mode.srl; 17 | 18 | import java.util.List; 19 | 20 | import edu.emory.clir.clearnlp.component.evaluation.AbstractF1Eval; 21 | import edu.emory.clir.clearnlp.dependency.DEPTree; 22 | import edu.emory.clir.clearnlp.util.arc.SRLArc; 23 | 24 | /** 25 | * @since 3.2.0 26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 27 | */ 28 | public class SRLEval extends AbstractF1Eval 29 | { 30 | @Override 31 | public void countCorrect(DEPTree sTree, SRLArc[][] goldHeads) 32 | { 33 | int i, size = sTree.size(); 34 | List sHeads; 35 | SRLArc[] gHeads; 36 | 37 | for (i=1; i extends ArrayList implements Serializable 28 | { 29 | private static final long serialVersionUID = -8603527717926741739L; 30 | 31 | public Stack() 32 | { 33 | super(); 34 | } 35 | 36 | public Stack(int initialCapacity) 37 | { 38 | super(initialCapacity); 39 | } 40 | 41 | public Stack(Stack stack) 42 | { 43 | super(stack); 44 | } 45 | 46 | public void push(T element) 47 | { 48 | add(element); 49 | } 50 | 51 | public T pop() 52 | { 53 | int n = size() - 1; 54 | return DSUtils.isRange(this, n) ? remove(n) : null; 55 | } 56 | 57 | public T peek() 58 | { 59 | return peek(0); 60 | } 61 | 62 | public T peek(int n) 63 | { 64 | n = size() - 1 - n; 65 | return DSUtils.isRange(this, n) ? get(n) : null; 66 | } 67 | } -------------------------------------------------------------------------------- /src/main/resources/features/feature_en_ner_conll03.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/collection/set/DisjointSet.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.set; 17 | 18 | import java.util.Arrays; 19 | 20 | /** 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public class DisjointSet 24 | { 25 | private int[] s_root; 26 | 27 | public DisjointSet(int size) 28 | { 29 | s_root = new int[size]; 30 | Arrays.fill(s_root, -1); 31 | } 32 | 33 | public int union(int id1, int id2) 34 | { 35 | int r1 = find(id1); 36 | int r2 = find(id2); 37 | if (r1 == r2) return r1; 38 | 39 | if (s_root[r1] < s_root[r2]) 40 | { 41 | s_root[r1] += s_root[r2]; 42 | s_root[r2] = r1; 43 | return r1; 44 | } 45 | else 46 | { 47 | s_root[r2] += s_root[r1]; 48 | s_root[r1] = r2; 49 | return r2; 50 | } 51 | } 52 | 53 | public int find(int id) 54 | { 55 | return (s_root[id] < 0) ? id : (s_root[id] = find(s_root[id])); 56 | } 57 | 58 | public boolean inSameSet(int id1, int id2) 59 | { 60 | return find(id1) == find(id2); 61 | } 62 | 63 | public String toString() 64 | { 65 | return Arrays.toString(s_root); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/component/mode/dep/DEPFeatureExtractor.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.component.mode.dep; 17 | 18 | import java.io.InputStream; 19 | 20 | import edu.emory.clir.clearnlp.component.mode.dep.state.AbstractDEPState; 21 | import edu.emory.clir.clearnlp.dependency.DEPNode; 22 | import edu.emory.clir.clearnlp.feature.common.CommonFeatureExtractor; 23 | import edu.emory.clir.clearnlp.feature.common.CommonFeatureToken; 24 | 25 | /** 26 | * @since 3.0.0 27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 28 | */ 29 | public class DEPFeatureExtractor extends CommonFeatureExtractor 30 | { 31 | private static final long serialVersionUID = -7336596053366459297L; 32 | 33 | public DEPFeatureExtractor(InputStream in) 34 | { 35 | super(in); 36 | } 37 | 38 | @Override 39 | protected String getFeature(CommonFeatureToken token, AbstractDEPState state, DEPNode node) 40 | { 41 | switch (token.getField()) 42 | { 43 | case t: return Integer.toString(state.distanceBetweenStackAndInput()); 44 | default: return super.getFeature(token, state, node); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/component/mode/pos/POSFeatureExtractor.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.component.mode.pos; 17 | 18 | import java.io.InputStream; 19 | 20 | import edu.emory.clir.clearnlp.dependency.DEPNode; 21 | import edu.emory.clir.clearnlp.feature.common.CommonFeatureExtractor; 22 | import edu.emory.clir.clearnlp.feature.common.CommonFeatureToken; 23 | 24 | /** 25 | * @since 3.0.0 26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 27 | */ 28 | public class POSFeatureExtractor extends CommonFeatureExtractor 29 | { 30 | private static final long serialVersionUID = -7336596053366459297L; 31 | 32 | public POSFeatureExtractor(InputStream in) 33 | { 34 | super(in); 35 | } 36 | 37 | @Override 38 | protected String getFeature(CommonFeatureToken token, POSState state, DEPNode node) 39 | { 40 | String ftr = node.getFormFeature(token.getField()); 41 | if (ftr != null) return state.extractWordFormFeature(node) ? ftr : null; 42 | 43 | switch (token.getField()) 44 | { 45 | case a : return state.getAmbiguityClass(node); 46 | default: return super.getFeature(token, state, node); 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/resources/features/feature_en_ner.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/dictionary/universal/DTHtmlTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.dictionary.universal; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import org.junit.Test; 21 | 22 | import edu.emory.clir.clearnlp.dictionary.universal.DTHtml; 23 | 24 | /** 25 | * @since 3.0.0 26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 27 | */ 28 | public class DTHtmlTest 29 | { 30 | @Test 31 | public void test() 32 | { 33 | DTHtml html = new DTHtml(); 34 | StringBuilder build; 35 | String s; 36 | 37 | s = ""&<>"; 38 | assertEquals("\"&<>", html.replace(s)); 39 | 40 | s = "¢£¤¥§©®€"; 41 | build = new StringBuilder(); 42 | 43 | build.append((char)162); 44 | build.append((char)163); 45 | build.append((char)164); 46 | build.append((char)165); 47 | build.append((char)167); 48 | build.append((char)169); 49 | build.append((char)174); 50 | build.append((char)8364); 51 | 52 | assertEquals(build.toString(), html.replace(s)); 53 | 54 | s = "!<&rand;>{"; 55 | assertEquals("!<&rand;>{", html.replace(s)); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/dictionary/universal/DTCurrencyTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.dictionary.universal; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.assertFalse; 20 | import static org.junit.Assert.assertTrue; 21 | 22 | import java.util.Arrays; 23 | 24 | import org.junit.Test; 25 | 26 | import edu.emory.clir.clearnlp.dictionary.universal.DTCurrency; 27 | 28 | /** 29 | * @since 3.0.0 30 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 31 | */ 32 | public class DTCurrencyTest 33 | { 34 | @Test 35 | public void test() 36 | { 37 | DTCurrency dt = new DTCurrency(); 38 | 39 | assertTrue(dt.isCurrencyDollar("c")); 40 | assertTrue(dt.isCurrencyDollar("us")); 41 | 42 | assertTrue(dt.isCurrency("usd")); 43 | assertTrue(dt.isCurrency("us$")); 44 | 45 | assertFalse(dt.isCurrencyDollar("US")); 46 | assertFalse(dt.isCurrencyDollar("a")); 47 | assertFalse(dt.isCurrency("usb")); 48 | 49 | assertEquals("[USD, 1]", Arrays.toString(dt.tokenize("USD1"))); 50 | assertEquals("[us$, 1]", Arrays.toString(dt.tokenize("us$1"))); 51 | assertTrue(dt.tokenize("u$1") == null); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/classification/prediction/StringPrediction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.classification.prediction; 17 | 18 | import edu.emory.clir.clearnlp.util.MathUtils; 19 | 20 | /** 21 | * @since 3.0.0 22 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 23 | */ 24 | public class StringPrediction extends AbstractPrediction implements Comparable 25 | { 26 | private String s_label; 27 | 28 | public StringPrediction(String label, double score) 29 | { 30 | super(score); 31 | set(label, score); 32 | } 33 | 34 | public void set(String label, double score) 35 | { 36 | setLabel(label); 37 | setScore(score); 38 | } 39 | 40 | public String getLabel() 41 | { 42 | return s_label; 43 | } 44 | 45 | public void setLabel(String label) 46 | { 47 | s_label = label; 48 | } 49 | 50 | public boolean isLabel(String label) 51 | { 52 | return s_label.equals(label); 53 | } 54 | 55 | public void set(StringPrediction p) 56 | { 57 | set(p.s_label, p.d_score); 58 | } 59 | 60 | @Override 61 | public int compareTo(StringPrediction p) 62 | { 63 | return MathUtils.signum(d_score - p.d_score); 64 | } 65 | } -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/dictionary/english/DTHyphenTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.dictionary.english; 17 | 18 | import static org.junit.Assert.assertFalse; 19 | import static org.junit.Assert.assertTrue; 20 | 21 | import org.junit.Test; 22 | 23 | import edu.emory.clir.clearnlp.dictionary.english.DTHyphen; 24 | 25 | /** 26 | * @since 3.0.0 27 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 28 | */ 29 | public class DTHyphenTest 30 | { 31 | @Test 32 | public void test() 33 | { 34 | DTHyphen dt = new DTHyphen(); 35 | 36 | assertTrue(dt.isPrefix("inter")); 37 | assertTrue(dt.isSuffix("ful")); 38 | assertTrue(dt.preserveHyphen("inter-connect".toCharArray(), 5)); 39 | assertTrue(dt.preserveHyphen("beauti-ful".toCharArray(), 6)); 40 | assertTrue(dt.preserveHyphen("b-a-d".toCharArray(), 1)); 41 | assertTrue(dt.preserveHyphen("b-a-d".toCharArray(), 3)); 42 | 43 | assertFalse(dt.preserveHyphen("inte-connect".toCharArray(), 4)); 44 | assertFalse(dt.preserveHyphen("beauti-fu".toCharArray(), 6)); 45 | assertFalse(dt.preserveHyphen("b-c-d".toCharArray(), 1)); 46 | assertFalse(dt.preserveHyphen("b-c-d".toCharArray(), 3)); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/headrule/HeadTagSetTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.headrule; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.assertFalse; 20 | import static org.junit.Assert.assertTrue; 21 | 22 | import org.junit.Test; 23 | 24 | import edu.emory.clir.clearnlp.constituent.CTNode; 25 | import edu.emory.clir.clearnlp.conversion.headrule.HeadTagSet; 26 | 27 | /** 28 | * @since 3.0.0 29 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 30 | */ 31 | public class HeadTagSetTest 32 | { 33 | @Test 34 | public void testHeadTagSet() 35 | { 36 | String tags = "NN.*|NP|-SBJ|-TPC"; 37 | HeadTagSet set = new HeadTagSet(tags); 38 | CTNode node; 39 | 40 | node = new CTNode("NN", null); 41 | assertTrue(set.matches(node)); 42 | 43 | node.setConstituentTag("NNS"); 44 | assertTrue(set.matches(node)); 45 | 46 | node.setConstituentTag("NP"); 47 | assertTrue(set.matches(node)); 48 | 49 | node.setConstituentTag("S"); 50 | assertFalse(set.matches(node)); 51 | 52 | node.addFunctionTag("SBJ"); 53 | assertTrue(set.matches(node)); 54 | 55 | assertEquals(tags, "NN.*|NP|-SBJ|-TPC"); 56 | } 57 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/component/evaluation/AbstractF1Eval.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.component.evaluation; 17 | 18 | import edu.emory.clir.clearnlp.util.MathUtils; 19 | 20 | 21 | /** 22 | * @since 3.0.0 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | abstract public class AbstractF1Eval extends AbstractEval 26 | { 27 | protected int p_total; 28 | protected int r_total; 29 | protected int n_correct; 30 | 31 | public AbstractF1Eval() 32 | { 33 | clear(); 34 | } 35 | 36 | @Override 37 | public void clear() 38 | { 39 | p_total = 0; 40 | r_total = 0; 41 | n_correct = 0; 42 | } 43 | 44 | @Override 45 | public double getScore() 46 | { 47 | return getScores()[0]; 48 | } 49 | 50 | @Override 51 | public String toString() 52 | { 53 | double[] d = getScores(); 54 | return String.format("F1: %5.2f, P: %5.2f, R: %5.2f", d[0], d[1], d[2]); 55 | } 56 | 57 | private double[] getScores() 58 | { 59 | double precision = 100d * n_correct / p_total; 60 | double recall = 100d * n_correct / r_total; 61 | 62 | return new double[]{MathUtils.getF1(precision, recall), precision, recall}; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/collection/ngram/UnigramTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.collection.ngram; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import org.junit.Test; 21 | 22 | /** 23 | * @since 3.0.0 24 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 25 | */ 26 | public class UnigramTest 27 | { 28 | @Test 29 | public void test() 30 | { 31 | Unigram map = new Unigram<>(); 32 | 33 | map.add("A"); 34 | map.add("B", 2); 35 | map.add("C"); 36 | map.add("A"); 37 | map.add("B", 2); 38 | map.add("D"); 39 | 40 | assertEquals("[(C,1), (D,1), (B,4), (A,2)]", map.toList(0).toString()); 41 | assertEquals("[(C,0.125), (D,0.125), (B,0.5), (A,0.25)]", map.toList(0d).toString()); 42 | 43 | assertEquals("[(B,4), (A,2)]", map.toList(1).toString()); 44 | assertEquals("[(B,0.5), (A,0.25)]", map.toList(0.2).toString()); 45 | 46 | assertEquals("[A, B, C, D]", map.keySet(0).toString()); 47 | assertEquals("[A, B, C, D]", map.keySet(0d).toString()); 48 | 49 | assertEquals("[A, B]", map.keySet(1).toString()); 50 | assertEquals("[A, B]", map.keySet(0.2).toString()); 51 | 52 | // System.out.println(map.getBest()); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/lexicon/wordnet/WNIndexMap.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.lexicon.wordnet; 17 | 18 | import java.io.BufferedReader; 19 | import java.io.IOException; 20 | import java.io.InputStream; 21 | import java.util.HashMap; 22 | import java.util.Map; 23 | 24 | import edu.emory.clir.clearnlp.util.IOUtils; 25 | 26 | /** 27 | * @since 3.0.0 28 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 29 | */ 30 | public class WNIndexMap 31 | { 32 | Map m_index; 33 | 34 | /** 35 | * @param in internally wrapped by {@code new BufferedReader(new InputStreamReader(in))}. 36 | * @throws IOException 37 | */ 38 | public WNIndexMap(InputStream in, WNDataMap map) throws IOException 39 | { 40 | BufferedReader reader = IOUtils.createBufferedReader(in); 41 | WNIndex index; 42 | String line; 43 | 44 | m_index = new HashMap<>(); 45 | 46 | while ((line = reader.readLine()) != null) 47 | { 48 | if (line.startsWith(" ")) continue; 49 | index = new WNIndex(map, line); 50 | m_index.put(index.getLemma(), index); 51 | } 52 | 53 | reader.close(); 54 | } 55 | 56 | public WNIndex getIndex(String lemma) 57 | { 58 | return m_index.get(lemma); 59 | } 60 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/util/constant/CharConst.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the 'License'); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an 'AS IS' BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.util.constant; 17 | 18 | 19 | /** 20 | * @since 3.0.0 21 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 22 | */ 23 | public interface CharConst 24 | { 25 | char PLUS = '+'; 26 | char ASTERISK = '*'; 27 | char EQUAL = '='; 28 | char FW_SLASH = '/'; 29 | char BW_SLASH = '\\'; 30 | char PIPE = '|'; 31 | char UNDERSCORE = '_'; 32 | char HYPHEN = '-'; 33 | char COMMA = ','; 34 | char COLON = ':'; 35 | char SEMICOLON = ';'; 36 | char PERIOD = '.'; 37 | char QUESTION = '?'; 38 | char EXCLAMATION = '!'; 39 | char PERCENT = '%'; 40 | char POUND = '#'; 41 | char DOLLAR = '$'; 42 | char AMPERSAND = '&'; 43 | char AT = '@'; 44 | char TILDA = '~'; 45 | char PRIME = '`'; 46 | char EMPTY = 0; 47 | char LESS_THAN = '<'; 48 | char GREATER_THAN = '>'; 49 | char SINGLE_QUOTE = '\''; 50 | char DOUBLE_QUOTE = '"'; 51 | 52 | char LRB = '('; 53 | char RRB = ')'; 54 | char LCB = '{'; 55 | char RCB = '}'; 56 | char LSB = '['; 57 | char RSB = ']'; 58 | 59 | char ZERO = '0'; 60 | 61 | char SPACE = ' '; 62 | char TAB = '\t'; 63 | char NEW_LINE = '\n'; 64 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/classification/configuration/LiblinearTrainerConfiguration.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.classification.configuration; 17 | 18 | /** 19 | * @since 3.0.0 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public class LiblinearTrainerConfiguration extends DefaultTrainerConfiguration 23 | { 24 | private double d_cost; 25 | private double d_eps; 26 | private double d_bias; 27 | 28 | public LiblinearTrainerConfiguration(byte vectorType, boolean binary, int labelCutoff, int featureCutoff, int numberOfThreads, double cost, double epsilon, double bias) 29 | { 30 | super(vectorType, binary, labelCutoff, featureCutoff, numberOfThreads); 31 | setCost(cost); 32 | setEpsilon(epsilon); 33 | setBias(bias); 34 | } 35 | 36 | public double getCost() 37 | { 38 | return d_cost; 39 | } 40 | 41 | public double getEpsilon() 42 | { 43 | return d_eps; 44 | } 45 | 46 | public double getBias() 47 | { 48 | return d_bias; 49 | } 50 | 51 | public void setCost(double cost) 52 | { 53 | d_cost = cost; 54 | } 55 | 56 | public void setEpsilon(double eps) 57 | { 58 | d_eps = eps; 59 | } 60 | 61 | public void setBias(double bias) 62 | { 63 | d_bias = bias; 64 | } 65 | } -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/lexicon/verbnet/VNFrame.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.lexicon.verbnet; 17 | 18 | import java.io.Serializable; 19 | 20 | import org.w3c.dom.Element; 21 | 22 | import edu.emory.clir.clearnlp.util.XmlUtils; 23 | 24 | /** 25 | * @since 3.0.0 26 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 27 | */ 28 | public class VNFrame implements Serializable 29 | { 30 | private static final long serialVersionUID = 1907495757606414993L; 31 | 32 | private VNSyntax v_syntax; 33 | private VNSemantics v_semantics; 34 | 35 | public VNFrame(Element eFrame) 36 | { 37 | init(eFrame); 38 | } 39 | 40 | private void init(Element eFrame) 41 | { 42 | setSyntax(new VNSyntax(XmlUtils.getFirstElementByTagName(eFrame, VNXml.E_SYNTAX))); 43 | setSemantics(new VNSemantics(XmlUtils.getFirstElementByTagName(eFrame, VNXml.E_SEMANTICS))); 44 | } 45 | 46 | public VNSyntax getSyntax() 47 | { 48 | return v_syntax; 49 | } 50 | 51 | public VNSemantics getSemantics() 52 | { 53 | return v_semantics; 54 | } 55 | 56 | public void setSyntax(VNSyntax syntax) 57 | { 58 | v_syntax = syntax; 59 | } 60 | 61 | public void setSemantics(VNSemantics semantics) 62 | { 63 | v_semantics = semantics; 64 | } 65 | } -------------------------------------------------------------------------------- /src/test/resources/constituent/functionTags.parse: -------------------------------------------------------------------------------- 1 | (TOP (S (S (NP-SBJ (CC both) 2 | (NNP Bush) 3 | (CC and) 4 | (NNP Rice))) 5 | (VP (VBP have) 6 | (VP (VBN delivered) 7 | (NP (NP (NNS speeches)) 8 | (, ,) 9 | (SBAR (WHNP-1 (WDT which)) 10 | (S (NP-SBJ (-NONE- *T*-1)) 11 | (VP (VBP are) 12 | (ADJP-PRD (RB very) 13 | (JJ clear)))))))))) 14 | 15 | (TOP (S (NP-SBJ-1 (NNP Mr.) 16 | (NNP Clinton)) 17 | (VP (VBD was) 18 | (VP (VBN joined) 19 | (NP (-NONE- *-1)) 20 | (PP (IN by) 21 | (NP-LGS (JJ several) 22 | (JJ key) 23 | (NN republican) 24 | (NNS leaders))))) 25 | (. .))) 26 | 27 | (TOP (SBARQ (WHNP-1 (WP Who)) 28 | (SQ-CLF (VBZ is) 29 | (NP-SBJ (PRP it)) 30 | (NP-PRD (-NONE- *T*-1)) 31 | (SBAR (WHNP-2 (WDT that)) 32 | (S (NP-SBJ-3 (-NONE- *T*-2)) 33 | (NP-TMP (NN today)) 34 | (VP (VBZ wants) 35 | (S (NP-SBJ (-NONE- *PRO*-3)) 36 | (VP (TO to) 37 | (VP (VB blow) 38 | (NP (NNS things)) 39 | (PRT (RP up)) 40 | (PP-LOC (IN in) 41 | (NP (NNP Lebanon))))))))) 42 | (, ,) 43 | (NP-VOC (NNP Doctor))) 44 | (. ?))) -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/morphology/english/EnglishDerivation.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.morphology.english; 17 | 18 | import java.util.List; 19 | import java.util.Set; 20 | 21 | import edu.emory.clir.clearnlp.morphology.AbstractAffixMatcher; 22 | 23 | /** 24 | * @since 3.0.3 25 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 26 | */ 27 | public class EnglishDerivation 28 | { 29 | List suffix_matchers; 30 | 31 | public EnglishDerivation(List affixMatchers) 32 | { 33 | init(affixMatchers); 34 | } 35 | 36 | private void init(List affixMatchers) 37 | { 38 | suffix_matchers = affixMatchers; 39 | 40 | if (suffix_matchers == null) 41 | throw new IllegalArgumentException("The suffix matcher list must not be null."); 42 | } 43 | 44 | public List getSuffixMatchers() 45 | { 46 | return suffix_matchers; 47 | } 48 | 49 | public String getBaseForm(String lemma, Set baseSet) 50 | { 51 | String base; 52 | 53 | for (AbstractAffixMatcher matcher : suffix_matchers) 54 | { 55 | base = matcher.getBaseForm(baseSet, lemma); 56 | if (base != null) return base; 57 | } 58 | 59 | return null; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/classification/instance/AbstractInstance.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.classification.instance; 17 | 18 | import edu.emory.clir.clearnlp.classification.vector.AbstractFeatureVector; 19 | 20 | 21 | /** 22 | * @since 3.0.0 23 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 24 | */ 25 | abstract public class AbstractInstance 26 | { 27 | private String s_label; 28 | private F f_vector; 29 | 30 | public AbstractInstance(String label, F vector) 31 | { 32 | set(label, vector); 33 | } 34 | 35 | public String getLabel() 36 | { 37 | return s_label; 38 | } 39 | 40 | public F getFeatureVector() 41 | { 42 | return f_vector; 43 | } 44 | 45 | public void set(String label, F vector) 46 | { 47 | setLabel(label); 48 | setFeatureVector(vector); 49 | } 50 | 51 | public void setLabel(String label) 52 | { 53 | s_label = label; 54 | } 55 | 56 | public void setFeatureVector(F vector) 57 | { 58 | f_vector = vector; 59 | } 60 | 61 | public boolean isLabel(String label) 62 | { 63 | return s_label.equals(label); 64 | } 65 | 66 | public String toString() 67 | { 68 | return s_label + AbstractFeatureVector.DELIM_FEATURE + f_vector.toString(); 69 | } 70 | } -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/headrule/HeadRuleTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.headrule; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.assertFalse; 20 | import static org.junit.Assert.assertTrue; 21 | 22 | import org.junit.Test; 23 | 24 | import edu.emory.clir.clearnlp.constituent.CTNode; 25 | import edu.emory.clir.clearnlp.conversion.headrule.HeadRule; 26 | import edu.emory.clir.clearnlp.conversion.headrule.HeadTagSet; 27 | 28 | 29 | /** @author Jinho D. Choi ({@code jinho.choi@emory.edu}) */ 30 | public class HeadRuleTest 31 | { 32 | @Test 33 | public void testHeadRule() 34 | { 35 | String tags = "NN.*|NP;VB.*|VP"; 36 | HeadRule rule = new HeadRule(HeadRule.DIR_LEFT_TO_RIGHT, tags); 37 | CTNode node1 = new CTNode("NNS", null); 38 | CTNode node2 = new CTNode("VBN", null); 39 | 40 | assertFalse(rule.isRightToLeft()); 41 | 42 | HeadTagSet[] headTags = rule.getHeadTags(); 43 | 44 | HeadTagSet headTag = headTags[0]; 45 | assertTrue(headTag.matches(node1)); 46 | assertFalse(headTag.matches(node2)); 47 | 48 | headTag = headTags[1]; 49 | assertFalse(headTag.matches(node1)); 50 | assertTrue(headTag.matches(node2)); 51 | 52 | assertEquals(tags, rule.toString()); 53 | } 54 | } -------------------------------------------------------------------------------- /src/test/java/edu/emory/clir/clearnlp/constituent/CTReaderTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.constituent; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import java.util.ArrayList; 21 | import java.util.List; 22 | 23 | import org.junit.Test; 24 | 25 | import edu.emory.clir.clearnlp.util.IOUtils; 26 | 27 | 28 | /** 29 | * @since 3.0.0 30 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 31 | */ 32 | public class CTReaderTest 33 | { 34 | @Test 35 | public void testCTReader() throws Exception 36 | { 37 | String filename = "src/test/resources/constituent/constituent.parse"; 38 | CTReader reader = new CTReader(IOUtils.createFileInputStream(filename)); 39 | CTTree tree; 40 | 41 | StringBuilder build = new StringBuilder(); 42 | List trees = new ArrayList<>(); 43 | String tmp; 44 | 45 | while ((tree = reader.nextTree()) != null) 46 | { 47 | tmp = tree.toString(); 48 | trees.add(tmp); 49 | build.append(tmp); 50 | } 51 | 52 | reader.close(); 53 | 54 | reader = new CTReader(IOUtils.createByteArrayInputStream(build.toString())); 55 | int i; 56 | 57 | for (i=0; (tree = reader.nextTree()) != null; i++) 58 | assertEquals(trees.get(i), tree.toString()); 59 | 60 | reader.close(); 61 | } 62 | } -------------------------------------------------------------------------------- /src/test/resources/constituent/normalize.parse: -------------------------------------------------------------------------------- 1 | ( (S (PP (IN In) (NP (NN order) (S (NP-SBJ (-NONE- *PRO*)) (VP (TO to) (VP (VB determine) (NP (NP (DT the) (NN sequence)) (PP (IN of) (NP (DT the) (JJ entire) (NN transcript))))))))) (, ,) (S (S (NP-SBJ-1=4 (NP (NN RT) (HYPH -) (NN PCR)) (VP (VBG using) (NP (NP (NP (NNS primers)) (PP-LOC (IN in) (NP (NNS exons) (NML (CD 10) (CC and) (CD 11))))) (VP (VBN paired) (NP (-NONE- *)) (PP (IN with) (NP (NP (DT a) (NN primer)) (PP-LOC (IN in) (NP (NN intron) (CD 12))))))))) (VP (VBD was) (VP=3 (VBN performed) (NP-1 (-NONE- *)) (S-MNR (NP-SBJ (-NONE- *PRO*)) (VP (VBG using) (NP (NML (NML (NML (NN BALB) (HYPH /) (NN c)) (NN mouse)) (NN brain)) (JJ total) (NN RNA))))))) (CC and) (S (NP-SBJ-2=4 (DT the) (VBG resulting) (NNS products)) (VP=3 (VBN sequenced) (NP-2 (-NONE- *))))) (. .)) ) 2 | ( (S (NP-SBJ (NN Figure) (CD 1)) (VP (VBZ shows) (NP (NP (DT the) (JJ average) (NN IOP)) (PP (IN of) (NP (NP (NP (DT a) (NN number)) (PP (IN of) (NP (JJ inbred) (NN mouse) (NNS strains)))) (SBAR (WHNP-1 (WDT that)) (S (NP-SBJ-1 (-NONE- *T*)) (VP (VBD were) (VP (VBN housed) (NP-1 (-NONE- *)) (PP (IN in) (NP (DT the) (JJ same) (JJ environmental) (NNS conditions))))))))))) (. .)) ) 3 | ( (S (S (NP-SBJ (NP (PRP It)) (SBAR-1 (-NONE- *EXP*))) (VP (VBZ is) (VP (VBG becoming) (ADJP-PRD (RB increasingly) (JJ clear)) (SBAR-1 (IN that) (S (NP-SBJ (NP (JJ many) (NNS forms)) (PP (IN of) (NP (NN glaucoma)))) (VP (VBP have) (NP (DT a) (JJ genetic) (NN component))))) (PRN (-LRB- [) (NP (CD 6) (, ,) (CD 7)) (-RRB- ]))))) (, ,) (CC and) (S (NP-SBJ-3 (JJ much) (JJ current) (NN research)) (VP (VBZ is) (VP (VBN focused) (NP-3 (-NONE- *)) (PP (IN on) (S-NOM (NP-SBJ (-NONE- *PRO*)) (VP (VBG identifying) (NP (NP (NP (JJ chromosomal) (NNS regions)) (CC and) (NP (NNS genes))) (SBAR (WHNP-2 (WDT that)) (S (NP-SBJ-2 (-NONE- *T*)) (VP (VBP contribute) (PP (IN to) (NP (NN glaucoma)))))))))) (PRN (-LRB- [) (NP (NP (CD 8)) (PP (SYM -) (NP (CD 10)))) (-RRB- ]))))) (. .)) ) -------------------------------------------------------------------------------- /src/main/java/edu/emory/clir/clearnlp/util/HashUtils.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014, Emory University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package edu.emory.clir.clearnlp.util; 17 | 18 | /** 19 | * @since 3.0.0 20 | * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) 21 | */ 22 | public class HashUtils 23 | { 24 | private static final long FNV_BASIS_64 = 0xcbf29ce484222325L; 25 | private static final long FNV_PRIME_64 = 0x100000001b3L; 26 | 27 | private static final int FNV_BASIS_32 = 0x811c9dc5; 28 | private static final int FNV_PRIME_32 = 0x01000193; 29 | 30 | public static int fnv1aHash32(final String s) 31 | { 32 | return fnv1aHash32(s, FNV_BASIS_32); 33 | } 34 | 35 | public static int fnv1aHash32(final String s, int basis) 36 | { 37 | char[] cs = s.toCharArray(); 38 | int i, len = s.length(); 39 | 40 | for (i=0; i= get(i)) break; 73 | swap(k, i); 74 | } 75 | } 76 | } 77 | --------------------------------------------------------------------------------