├── README.md └── TJBatchExtractor ├── Documentation ├── TJBatchExtractor.aux ├── TJBatchExtractor.log ├── TJBatchExtractor.pdf ├── TJBatchExtractor.tex └── TJBatchExtractor.tex~ ├── Example_features.txt ├── Example_text.txt ├── README ├── TJBatchExtractor.java ├── TJInfoExtractor ├── TJInfoExtractor.zip ├── application-resources │ ├── TJ_Gazetteer │ │ ├── TJ_Gazetteer.def │ │ ├── body_parts.lst │ │ ├── city.lst │ │ ├── color.list │ │ ├── color.lst │ │ ├── color_pref.lst │ │ ├── country.lst │ │ ├── country_adj.lst │ │ ├── country_adj_plural.lst │ │ ├── cup0.list │ │ ├── cup1.list │ │ ├── cup2.list │ │ ├── cup3.lst │ │ ├── cup4.lst │ │ ├── cup5.lst │ │ ├── currency │ │ ├── date_key.lst │ │ ├── date_unit.lst │ │ ├── day.lst │ │ ├── eye.lst │ │ ├── facility_key.lst │ │ ├── festival.lst │ │ ├── firstpersonPronouns.lst │ │ ├── greeting.lst │ │ ├── hair.list │ │ ├── hair.lst │ │ ├── hair2.list │ │ ├── hair2.lst │ │ ├── hair_pref.lst │ │ ├── height.lst │ │ ├── height2.lst │ │ ├── hour.lst │ │ ├── loc_generalkey.lst │ │ ├── loc_key.lst │ │ ├── loc_prekey.lst │ │ ├── men.lst │ │ ├── minutes.lst │ │ ├── months.lst │ │ ├── name_pre.lst │ │ ├── new_adj.lst │ │ ├── numbers.lst │ │ ├── numbers2.lst │ │ ├── ordinal.lst │ │ ├── org_base.lst │ │ ├── org_key.lst │ │ ├── percent.lst │ │ ├── person_ambig.lst │ │ ├── person_ending.lst │ │ ├── person_female.lst │ │ ├── person_full.lst │ │ ├── person_male.lst │ │ ├── person_relig.lst │ │ ├── phone_prefix.lst │ │ ├── region.lst │ │ ├── service_event.lst │ │ ├── service_massage.lst │ │ ├── skin.lst │ │ ├── skin2.lst │ │ ├── skin3.lst │ │ ├── sports.lst │ │ ├── stop.lst │ │ ├── stop2.lst │ │ ├── stop_identifier.lst │ │ ├── street.lst │ │ ├── surname_prefix.lst │ │ ├── thirdpersonPronouns.lst │ │ ├── time.lst │ │ ├── time_ampm.lst │ │ ├── time_day.lst │ │ ├── time_hr.list │ │ ├── time_hr.lst │ │ ├── time_min.list │ │ ├── time_min.lst │ │ ├── time_modifier.lst │ │ ├── time_unit.lst │ │ ├── time_unithr.lst │ │ ├── timezone.lst │ │ ├── title_female.lst │ │ ├── title_male.lst │ │ ├── weight.lst │ │ ├── year.lst │ │ └── yearsold.lst │ └── jape_transducers │ │ ├── .cost.jape.swp │ │ ├── Names.jape │ │ ├── Names.jape~ │ │ ├── TJ_Annotation.jape │ │ ├── TJ_Annotation.jape~ │ │ ├── age.jape │ │ ├── age.jape~ │ │ ├── cleanNumbers.jape │ │ ├── cleanNumbers.jape~ │ │ ├── cleanNumbers2.jape │ │ ├── cleanNumbers2.jape~ │ │ ├── cleandups.jape │ │ ├── cleandups.jape~ │ │ ├── cost_dollarsign.jape │ │ ├── cost_dollarsign.jape~ │ │ ├── cost_duration.jape │ │ ├── cost_duration.jape~ │ │ ├── cost_service.jape │ │ ├── cost_service.jape~ │ │ ├── cost_unitless.jape │ │ ├── cost_unitless.jape~ │ │ ├── dotcom.jape │ │ ├── dotcom.jape~ │ │ ├── email.jape │ │ ├── email.jape~ │ │ ├── ethnicity.jape │ │ ├── ethnicity.jape~ │ │ ├── haireyeskin.jape │ │ ├── haireyeskin.jape~ │ │ ├── heightweight.jape │ │ ├── heightweight.jape~ │ │ ├── heightweight2.jape │ │ ├── heightweight2.jape~ │ │ ├── measurements.jape │ │ ├── measurements.jape~ │ │ ├── percent.jape │ │ ├── percent.jape~ │ │ ├── restrictions.jape │ │ ├── restrictions.jape~ │ │ ├── time.jape │ │ ├── time.jape~ │ │ ├── time2.jape │ │ ├── time2.jape~ │ │ ├── time3.jape │ │ ├── time3.jape~ │ │ ├── url.jape │ │ ├── url.jape~ │ │ ├── url_pre.jape │ │ └── url_pre.jape~ ├── application.xgapp ├── licence.html └── plugins │ ├── ANNIE │ ├── .annie-defaults-metadata │ │ ├── ann-job-annie.png │ │ ├── icon.png │ │ ├── long-desc.html │ │ ├── metadata.xml │ │ └── short-desc.html │ ├── ANNIE_with_defaults.gapp │ ├── build.xml │ ├── creole.xml │ └── resources │ │ ├── NE │ │ ├── address.jape │ │ ├── clean.jape │ │ ├── date.jape │ │ ├── date_pre.jape │ │ ├── email.jape │ │ ├── final.jape │ │ ├── first.jape │ │ ├── firstname.jape │ │ ├── gender.jape │ │ ├── identifier.jape │ │ ├── jobtitle.jape │ │ ├── loc_context.jape │ │ ├── main.jape │ │ ├── name.jape │ │ ├── name_context.jape │ │ ├── name_post.jape │ │ ├── number.jape │ │ ├── org_context.jape │ │ ├── reldate.jape │ │ ├── unknown.jape │ │ ├── url.jape │ │ └── url_pre.jape │ │ ├── VP │ │ └── VerbGroups.jape │ │ ├── gazetteer │ │ ├── abbreviations.lst │ │ ├── adbc.lst │ │ ├── airport.lst │ │ ├── airports.lst │ │ ├── cdg.lst │ │ ├── charities.lst │ │ ├── city.lst │ │ ├── city_cap.lst │ │ ├── city_uk.lst │ │ ├── colours.lst │ │ ├── company.lst │ │ ├── company_cap.lst │ │ ├── country.lst │ │ ├── country_abbrev.lst │ │ ├── country_adj.lst │ │ ├── country_cap.lst │ │ ├── currency │ │ ├── currency_prefix.lst │ │ ├── currency_unit.lst │ │ ├── date.lst │ │ ├── date_key.lst │ │ ├── date_post.lst │ │ ├── date_pre.lst │ │ ├── date_unit.lst │ │ ├── datespan.lst │ │ ├── day.lst │ │ ├── day_cap.lst │ │ ├── department.lst │ │ ├── determiner.lst │ │ ├── facility.lst │ │ ├── facility_key.lst │ │ ├── facility_key_ext.lst │ │ ├── festival.lst │ │ ├── govern_key.lst │ │ ├── government.lst │ │ ├── greeting.lst │ │ ├── hour.lst │ │ ├── ident_prekey.lst │ │ ├── jobtitles.lst │ │ ├── lists.def │ │ ├── loc_generalkey.lst │ │ ├── loc_key.lst │ │ ├── loc_prekey.lst │ │ ├── loc_prekey_lower.lst │ │ ├── loc_relig.lst │ │ ├── mapping.def │ │ ├── ministry.lst │ │ ├── minutes.lst │ │ ├── months.lst │ │ ├── months_lower.lst │ │ ├── mountain.lst │ │ ├── new_adj.lst │ │ ├── new_cdg.lst │ │ ├── newspapers.lst │ │ ├── non_company.lst │ │ ├── nonspec_date.lst │ │ ├── not_org.lst │ │ ├── number_fold.lst │ │ ├── numbers.lst │ │ ├── ordinal.lst │ │ ├── org_base.lst │ │ ├── org_ending.lst │ │ ├── org_key.lst │ │ ├── org_key_cap.lst │ │ ├── org_pre.lst │ │ ├── org_spur.lst │ │ ├── organization.lst │ │ ├── organization_nouns.lst │ │ ├── other_people.lst │ │ ├── othorg_key.lst │ │ ├── percent.lst │ │ ├── person_ambig.lst │ │ ├── person_ambig.old.lst │ │ ├── person_ambig2.lst │ │ ├── person_ambig_lower.lst │ │ ├── person_ending.lst │ │ ├── person_female.lst │ │ ├── person_female_cap.lst │ │ ├── person_female_lower.lst │ │ ├── person_first.lst │ │ ├── person_full.lst │ │ ├── person_male.lst │ │ ├── person_male_cap.lst │ │ ├── person_male_lower.lst │ │ ├── person_relig.lst │ │ ├── person_sci.lst │ │ ├── person_spur.lst │ │ ├── phone_prefix.lst │ │ ├── planet.lst │ │ ├── province.lst │ │ ├── province_aa.lst │ │ ├── province_ab.lst │ │ ├── racecourse.lst │ │ ├── region.lst │ │ ├── region_cap.lst │ │ ├── region_uk.lst │ │ ├── rivers.lst │ │ ├── sports.lst │ │ ├── spur.lst │ │ ├── spur_ident.lst │ │ ├── stop.lst │ │ ├── street.lst │ │ ├── surname_prefix.lst │ │ ├── team.lst │ │ ├── time.lst │ │ ├── time_ampm.lst │ │ ├── time_key.lst │ │ ├── time_modifier.lst │ │ ├── time_unit.lst │ │ ├── times.lst │ │ ├── timespan.lst │ │ ├── timex_pre.lst │ │ ├── timezone.lst │ │ ├── title.lst │ │ ├── title_female.lst │ │ ├── title_lower.lst │ │ ├── title_lowercase.lst │ │ ├── title_male.lst │ │ ├── title_mil.lst │ │ ├── title_pol.lst │ │ ├── tvcompany.lst │ │ ├── university_uk.lst │ │ ├── water.lst │ │ └── year.lst │ │ ├── heptag │ │ ├── lexicon │ │ ├── lexicon_all │ │ ├── lexicon_cap │ │ ├── lexicon_lower │ │ ├── old_lexicon │ │ ├── rules_cap │ │ └── ruleset │ │ ├── othomatcher │ │ ├── acelists.def │ │ ├── alias.lst │ │ ├── alias_ace.lst │ │ ├── cdg.lst │ │ ├── chinese_alias.lst │ │ ├── connnector.lst │ │ ├── def_art.lst │ │ ├── listsNM.def │ │ ├── nickname.txt │ │ ├── prepos.lst │ │ └── spur_match.lst │ │ ├── regex-splitter │ │ ├── external-split-patterns.txt │ │ ├── internal-split-patterns.txt │ │ └── non-split-patterns.txt │ │ ├── schema │ │ ├── ANNIE-Schemas.xml │ │ ├── AddressSchema.xml │ │ ├── DateSchema.xml │ │ ├── IdentifierSchema.xml │ │ ├── LocationSchema.xml │ │ ├── MoneySchema.xml │ │ ├── OrganizationSchema.xml │ │ ├── PercentSchema.xml │ │ ├── PersonSchema.xml │ │ ├── PhoneSchema.xml │ │ └── SentenceSchema.xml │ │ ├── sentenceSplitter │ │ ├── gazetteer │ │ │ ├── abbreviations.lst │ │ │ └── lists.def │ │ └── grammar │ │ │ ├── cleanup.jape │ │ │ ├── find-single-nl.jape │ │ │ ├── find.jape │ │ │ ├── main-single-nl.jape │ │ │ ├── main.jape │ │ │ ├── no-splits.jape │ │ │ ├── prepare.jape │ │ │ └── split.jape │ │ └── tokeniser │ │ ├── AlternateTokeniser.rules │ │ ├── DefaultTokeniser.rules │ │ └── postprocess.jape │ ├── JAPE_Plus │ ├── .classpath │ ├── .project │ ├── build.xml │ ├── creole.xml │ ├── doc │ │ └── javadoc │ │ │ ├── allclasses-frame.html │ │ │ ├── allclasses-noframe.html │ │ │ ├── com │ │ │ └── ontotext │ │ │ │ └── jape │ │ │ │ ├── automaton │ │ │ │ ├── Automaton.html │ │ │ │ ├── AutomatonBuildHelp.html │ │ │ │ ├── AutomatonDeterminizationHelp.html │ │ │ │ ├── AutomatonMinimizationHelp.html │ │ │ │ ├── ClosedHashOfLabels.html │ │ │ │ ├── ClosedHashOfObjects.html │ │ │ │ ├── ClosedHashOfStrings.html │ │ │ │ ├── CodeInt.html │ │ │ │ ├── Constants.html │ │ │ │ ├── EpsilonClosure.html │ │ │ │ ├── GenericWholeArrray.html │ │ │ │ ├── IntSequence.html │ │ │ │ ├── TripleTransitions.html │ │ │ │ ├── WholeSet.html │ │ │ │ ├── package-frame.html │ │ │ │ ├── package-summary.html │ │ │ │ └── package-tree.html │ │ │ │ └── pda │ │ │ │ ├── FSMPDA.html │ │ │ │ ├── SimpleSet.html │ │ │ │ ├── StatePDA.html │ │ │ │ ├── TransitionPDA.html │ │ │ │ ├── package-frame.html │ │ │ │ ├── package-summary.html │ │ │ │ └── package-tree.html │ │ │ ├── constant-values.html │ │ │ ├── deprecated-list.html │ │ │ ├── gate │ │ │ ├── gui │ │ │ │ └── jape │ │ │ │ │ └── plus │ │ │ │ │ ├── Viewer.html │ │ │ │ │ ├── package-frame.html │ │ │ │ │ ├── package-summary.html │ │ │ │ │ └── package-tree.html │ │ │ ├── jape │ │ │ │ └── plus │ │ │ │ │ ├── Predicate.PredicateType.html │ │ │ │ │ ├── Predicate.html │ │ │ │ │ ├── SPTBase.FSMInstance.html │ │ │ │ │ ├── SPTBase.MatchMode.html │ │ │ │ │ ├── SPTBase.State.html │ │ │ │ │ ├── SPTBase.Transition.html │ │ │ │ │ ├── SPTBase.html │ │ │ │ │ ├── SPTBuilder.html │ │ │ │ │ ├── Transducer.AnnSetListener.html │ │ │ │ │ ├── Transducer.AnnotationComparator.html │ │ │ │ │ ├── Transducer.SPTData.html │ │ │ │ │ ├── Transducer.SerialiseTransducerAction.html │ │ │ │ │ ├── Transducer.SinglePhaseTransducerPDA.html │ │ │ │ │ ├── Transducer.html │ │ │ │ │ ├── package-frame.html │ │ │ │ │ ├── package-summary.html │ │ │ │ │ └── package-tree.html │ │ │ └── resources │ │ │ │ └── img │ │ │ │ └── svg │ │ │ │ ├── JapePlusIcon.html │ │ │ │ ├── package-frame.html │ │ │ │ ├── package-summary.html │ │ │ │ └── package-tree.html │ │ │ ├── help-doc.html │ │ │ ├── index-all.html │ │ │ ├── index.html │ │ │ ├── overview-frame.html │ │ │ ├── overview-summary.html │ │ │ ├── overview-tree.html │ │ │ ├── package-list │ │ │ ├── resources │ │ │ └── inherit.gif │ │ │ ├── serialized-form.html │ │ │ └── stylesheet.css │ ├── jape-plus.jar │ ├── lib │ │ ├── colt-1.2.0-free.jar │ │ ├── licence-colt-1.2.0-free.txt │ │ └── src │ │ │ └── colt-1.2.0-free-src.jar │ ├── src │ │ ├── com │ │ │ └── ontotext │ │ │ │ └── jape │ │ │ │ ├── automaton │ │ │ │ ├── Automaton.java │ │ │ │ ├── AutomatonBuildHelp.java │ │ │ │ ├── AutomatonDeterminizationHelp.java │ │ │ │ ├── AutomatonMinimizationHelp.java │ │ │ │ ├── ClosedHashOfLabels.java │ │ │ │ ├── ClosedHashOfObjects.java │ │ │ │ ├── ClosedHashOfStrings.java │ │ │ │ ├── CodeInt.java │ │ │ │ ├── Constants.java │ │ │ │ ├── EpsilonClosure.java │ │ │ │ ├── GenericWholeArrray.java │ │ │ │ ├── IntSequence.java │ │ │ │ ├── TripleTransitions.java │ │ │ │ └── WholeSet.java │ │ │ │ └── pda │ │ │ │ ├── FSMPDA.java │ │ │ │ ├── SimpleSet.java │ │ │ │ ├── StatePDA.java │ │ │ │ └── TransitionPDA.java │ │ └── gate │ │ │ ├── gui │ │ │ └── jape │ │ │ │ └── plus │ │ │ │ └── Viewer.java │ │ │ ├── jape │ │ │ └── plus │ │ │ │ ├── Predicate.java │ │ │ │ ├── SPTBase.java │ │ │ │ ├── SPTBuilder.java │ │ │ │ └── Transducer.java │ │ │ └── resources │ │ │ └── img │ │ │ └── svg │ │ │ ├── JapePlus.svg │ │ │ └── JapePlusIcon.java │ └── test │ │ ├── test-contains.jape │ │ ├── test-contains.xml │ │ ├── test-not-contains.jape │ │ ├── test-not-within.jape │ │ ├── test-within.jape │ │ ├── test-within.xml │ │ ├── test_actioncontext.gapp │ │ ├── test_actioncontext.jape │ │ ├── test_actioncontext.session │ │ ├── test_controllerblocks.gapp │ │ ├── test_controllerblocks.jape │ │ ├── test_controllerblocks.session │ │ ├── testabc.jape │ │ ├── testabc.xml │ │ ├── testdoc.txt │ │ ├── testonto.doc.xml │ │ ├── testonto.gapp │ │ ├── testonto.jape │ │ ├── testonto.rdf.xml │ │ ├── testonto.session │ │ ├── zero-length-ann.jape │ │ └── zero-length-ann.xml │ ├── Tagger_Integer │ ├── .classpath │ ├── .project │ ├── IntegerTagger.jar │ ├── README │ ├── build.xml │ ├── classes │ │ ├── gate │ │ │ ├── creole │ │ │ │ └── integers │ │ │ │ │ └── package-info.class │ │ │ └── resources │ │ │ │ └── img │ │ │ │ └── numbers.png │ │ └── integers │ │ │ ├── AnnotationConstants.class │ │ │ ├── IntegerTagger$1.class │ │ │ ├── IntegerTagger$2.class │ │ │ ├── IntegerTagger$Config$1.class │ │ │ ├── IntegerTagger$Config.class │ │ │ ├── IntegerTagger$Multiplier$Type.class │ │ │ ├── IntegerTagger$Multiplier.class │ │ │ └── IntegerTagger.class │ ├── creole.xml │ ├── doc │ │ └── svg │ │ │ └── numbers.svg │ ├── resources │ │ └── languages │ │ │ ├── English_Dictionary.xml │ │ │ ├── english.xml │ │ │ ├── english_and_symbols.xml │ │ │ ├── symbols.xml │ │ │ └── symbols.xml~ │ └── src │ │ └── gate │ │ ├── creole │ │ └── integers │ │ │ ├── AnnotationConstants.java │ │ │ ├── AnnotationConstants.java~ │ │ │ ├── IntegerTagger.java │ │ │ ├── IntegerTagger.java~ │ │ │ ├── package-info.java │ │ │ └── package-info.java~ │ │ └── resources │ │ └── img │ │ └── numbers.png │ └── Tagger_PhoneNumber │ ├── .classpath │ ├── .project │ ├── PhoneNumberTagger.jar │ ├── README │ ├── build.xml │ ├── classes │ ├── gate │ │ ├── creole │ │ │ └── phonenumbers │ │ │ │ └── package-info.class │ │ └── resources │ │ │ └── img │ │ │ └── numbers.png │ └── phonenumbers │ │ ├── AnnotationConstants.class │ │ ├── PhoneNumberTagger$1.class │ │ ├── PhoneNumberTagger$AreaCodeDescription.class │ │ ├── PhoneNumberTagger$Config$1.class │ │ ├── PhoneNumberTagger$Config.class │ │ ├── PhoneNumberTagger$DigitGroup.class │ │ └── PhoneNumberTagger.class │ ├── creole.xml │ ├── resources │ ├── AreaCode.xml │ ├── areacodes_2014-01-29.csv │ └── areacodes_2014-01-29.xml │ └── src │ └── gate │ ├── creole │ └── phonenumbers │ │ ├── AnnotationConstants.java │ │ ├── AnnotationConstants.java~ │ │ ├── PhoneNumberTagger.java │ │ ├── PhoneNumberTagger.java~ │ │ ├── package-info.java │ │ └── package-info.java~ │ └── resources │ └── img │ └── numbers.png ├── dependencies ├── ant-launcher.jar ├── ant.jar ├── aopalliance.jar ├── apache-mime4j-core.jar ├── apache-mime4j-dom.jar ├── bcmail-jdk15.jar ├── bcprov-jdk15.jar ├── commons-codec.jar ├── commons-compress.jar ├── commons-io.jar ├── commons-lang.jar ├── commons-logging.jar ├── dom4j.jar ├── flying-saucer-core.jar ├── fontbox.jar ├── gate-asm.jar ├── gate-compiler-jdt.jar ├── gate.jar ├── ivy-report.css ├── ivy.jar ├── jackson-annotations.jar ├── jackson-core.jar ├── jackson-databind.jar ├── java-getopt.jar ├── jaxen.jar ├── jdom.jar ├── jempbox.jar ├── jsoup-1.7.3.jar ├── junit.jar ├── log4j.jar ├── lucene-core.jar ├── metadata-extractor.jar ├── nekohtml.jar ├── pdfbox.jar ├── poi-ooxml-schemas.jar ├── poi-ooxml.jar ├── poi-scratchpad.jar ├── poi.jar ├── spring-aop.jar ├── spring-beans.jar ├── spring-core.jar ├── stax2-api.jar ├── tika-core.jar ├── tika-parsers.jar ├── uk.ac.gate-gate-core-all.html ├── uk.ac.gate-gate-core-compile.html ├── uk.ac.gate-gate-core-default.html ├── uk.ac.gate-gate-core-master.html ├── uk.ac.gate-gate-core-required.html ├── uk.ac.gate-gate-core-runtime.html ├── uk.ac.gate-gate-core-test.html ├── woodstox-core-lgpl.jar ├── xercesImpl.jar ├── xmlbeans.jar ├── xmlunit.jar ├── xpp3.jar └── xstream.jar ├── gate.xml └── plugins ├── ANNIE ├── .annie-defaults-metadata │ ├── ann-job-annie.png │ ├── icon.png │ ├── long-desc.html │ ├── metadata.xml │ └── short-desc.html ├── ANNIE_with_defaults.gapp ├── build.xml ├── creole.xml └── resources │ ├── NE │ ├── address.jape │ ├── clean.jape │ ├── date.jape │ ├── date_pre.jape │ ├── email.jape │ ├── final.jape │ ├── first.jape │ ├── firstname.jape │ ├── gender.jape │ ├── identifier.jape │ ├── jobtitle.jape │ ├── loc_context.jape │ ├── main.jape │ ├── name.jape │ ├── name_context.jape │ ├── name_post.jape │ ├── number.jape │ ├── org_context.jape │ ├── reldate.jape │ ├── unknown.jape │ ├── url.jape │ └── url_pre.jape │ ├── VP │ └── VerbGroups.jape │ ├── gazetteer │ ├── abbreviations.lst │ ├── adbc.lst │ ├── airport.lst │ ├── airports.lst │ ├── cdg.lst │ ├── charities.lst │ ├── city.lst │ ├── city_cap.lst │ ├── city_uk.lst │ ├── colours.lst │ ├── company.lst │ ├── company_cap.lst │ ├── country.lst │ ├── country_abbrev.lst │ ├── country_adj.lst │ ├── country_cap.lst │ ├── currency_prefix.lst │ ├── currency_unit.lst │ ├── date.lst │ ├── date_key.lst │ ├── date_post.lst │ ├── date_pre.lst │ ├── date_unit.lst │ ├── datespan.lst │ ├── day.lst │ ├── day_cap.lst │ ├── department.lst │ ├── determiner.lst │ ├── facility.lst │ ├── facility_key.lst │ ├── facility_key_ext.lst │ ├── festival.lst │ ├── govern_key.lst │ ├── government.lst │ ├── greeting.lst │ ├── hour.lst │ ├── ident_prekey.lst │ ├── jobtitles.lst │ ├── lists.def │ ├── loc_generalkey.lst │ ├── loc_key.lst │ ├── loc_prekey.lst │ ├── loc_prekey_lower.lst │ ├── loc_relig.lst │ ├── mapping.def │ ├── ministry.lst │ ├── minutes.lst │ ├── months.lst │ ├── months_lower.lst │ ├── mountain.lst │ ├── new_adj.lst │ ├── new_cdg.lst │ ├── newspapers.lst │ ├── non_company.lst │ ├── nonspec_date.lst │ ├── not_org.lst │ ├── number_fold.lst │ ├── numbers.lst │ ├── ordinal.lst │ ├── org_base.lst │ ├── org_ending.lst │ ├── org_key.lst │ ├── org_key_cap.lst │ ├── org_pre.lst │ ├── org_spur.lst │ ├── organization.lst │ ├── organization_nouns.lst │ ├── other_people.lst │ ├── othorg_key.lst │ ├── percent.lst │ ├── person_ambig.lst │ ├── person_ambig.old.lst │ ├── person_ambig2.lst │ ├── person_ambig_lower.lst │ ├── person_ending.lst │ ├── person_female.lst │ ├── person_female_cap.lst │ ├── person_female_lower.lst │ ├── person_first.lst │ ├── person_full.lst │ ├── person_male.lst │ ├── person_male_cap.lst │ ├── person_male_lower.lst │ ├── person_relig.lst │ ├── person_sci.lst │ ├── person_spur.lst │ ├── phone_prefix.lst │ ├── planet.lst │ ├── province.lst │ ├── province_aa.lst │ ├── province_ab.lst │ ├── racecourse.lst │ ├── region.lst │ ├── region_cap.lst │ ├── region_uk.lst │ ├── rivers.lst │ ├── sports.lst │ ├── spur.lst │ ├── spur_ident.lst │ ├── stop.lst │ ├── street.lst │ ├── surname_prefix.lst │ ├── team.lst │ ├── time.lst │ ├── time_ampm.lst │ ├── time_key.lst │ ├── time_modifier.lst │ ├── time_unit.lst │ ├── times.lst │ ├── timespan.lst │ ├── timex_pre.lst │ ├── timezone.lst │ ├── title.lst │ ├── title_female.lst │ ├── title_lower.lst │ ├── title_lowercase.lst │ ├── title_male.lst │ ├── title_mil.lst │ ├── title_pol.lst │ ├── tvcompany.lst │ ├── university_uk.lst │ ├── water.lst │ └── year.lst │ ├── heptag │ ├── lexicon │ ├── lexicon_all │ ├── lexicon_cap │ ├── lexicon_lower │ ├── old_lexicon │ ├── rules_cap │ └── ruleset │ ├── othomatcher │ ├── acelists.def │ ├── alias.lst │ ├── alias_ace.lst │ ├── cdg.lst │ ├── chinese_alias.lst │ ├── connnector.lst │ ├── def_art.lst │ ├── listsNM.def │ ├── nickname.txt │ ├── prepos.lst │ └── spur_match.lst │ ├── regex-splitter │ ├── external-split-patterns.txt │ ├── internal-split-patterns.txt │ └── non-split-patterns.txt │ ├── schema │ ├── ANNIE-Schemas.xml │ ├── AddressSchema.xml │ ├── DateSchema.xml │ ├── IdentifierSchema.xml │ ├── LocationSchema.xml │ ├── MoneySchema.xml │ ├── OrganizationSchema.xml │ ├── PercentSchema.xml │ ├── PersonSchema.xml │ ├── PhoneSchema.xml │ └── SentenceSchema.xml │ ├── sentenceSplitter │ ├── gazetteer │ │ ├── abbreviations.lst │ │ └── lists.def │ └── grammar │ │ ├── cleanup.jape │ │ ├── find-single-nl.jape │ │ ├── find.jape │ │ ├── main-single-nl.jape │ │ ├── main.jape │ │ ├── no-splits.jape │ │ ├── prepare.jape │ │ └── split.jape │ └── tokeniser │ ├── AlternateTokeniser.rules │ ├── DefaultTokeniser.rules │ └── postprocess.jape ├── JAPE_Plus ├── .classpath ├── .project ├── build.xml ├── creole.xml ├── doc │ └── javadoc │ │ ├── allclasses-frame.html │ │ ├── allclasses-noframe.html │ │ ├── com │ │ └── ontotext │ │ │ └── jape │ │ │ ├── automaton │ │ │ ├── Automaton.html │ │ │ ├── AutomatonBuildHelp.html │ │ │ ├── AutomatonDeterminizationHelp.html │ │ │ ├── AutomatonMinimizationHelp.html │ │ │ ├── ClosedHashOfLabels.html │ │ │ ├── ClosedHashOfObjects.html │ │ │ ├── ClosedHashOfStrings.html │ │ │ ├── CodeInt.html │ │ │ ├── Constants.html │ │ │ ├── EpsilonClosure.html │ │ │ ├── GenericWholeArrray.html │ │ │ ├── IntSequence.html │ │ │ ├── TripleTransitions.html │ │ │ ├── WholeSet.html │ │ │ ├── package-frame.html │ │ │ ├── package-summary.html │ │ │ └── package-tree.html │ │ │ └── pda │ │ │ ├── FSMPDA.html │ │ │ ├── SimpleSet.html │ │ │ ├── StatePDA.html │ │ │ ├── TransitionPDA.html │ │ │ ├── package-frame.html │ │ │ ├── package-summary.html │ │ │ └── package-tree.html │ │ ├── constant-values.html │ │ ├── deprecated-list.html │ │ ├── gate │ │ ├── gui │ │ │ └── jape │ │ │ │ └── plus │ │ │ │ ├── Viewer.html │ │ │ │ ├── package-frame.html │ │ │ │ ├── package-summary.html │ │ │ │ └── package-tree.html │ │ ├── jape │ │ │ └── plus │ │ │ │ ├── Predicate.PredicateType.html │ │ │ │ ├── Predicate.html │ │ │ │ ├── SPTBase.FSMInstance.html │ │ │ │ ├── SPTBase.MatchMode.html │ │ │ │ ├── SPTBase.State.html │ │ │ │ ├── SPTBase.Transition.html │ │ │ │ ├── SPTBase.html │ │ │ │ ├── SPTBuilder.html │ │ │ │ ├── Transducer.AnnSetListener.html │ │ │ │ ├── Transducer.AnnotationComparator.html │ │ │ │ ├── Transducer.SPTData.html │ │ │ │ ├── Transducer.SerialiseTransducerAction.html │ │ │ │ ├── Transducer.SinglePhaseTransducerPDA.html │ │ │ │ ├── Transducer.html │ │ │ │ ├── package-frame.html │ │ │ │ ├── package-summary.html │ │ │ │ └── package-tree.html │ │ └── resources │ │ │ └── img │ │ │ └── svg │ │ │ ├── JapePlusIcon.html │ │ │ ├── package-frame.html │ │ │ ├── package-summary.html │ │ │ └── package-tree.html │ │ ├── help-doc.html │ │ ├── index-all.html │ │ ├── index.html │ │ ├── overview-frame.html │ │ ├── overview-summary.html │ │ ├── overview-tree.html │ │ ├── package-list │ │ ├── resources │ │ └── inherit.gif │ │ ├── serialized-form.html │ │ └── stylesheet.css ├── jape-plus.jar ├── lib │ ├── colt-1.2.0-free.jar │ ├── licence-colt-1.2.0-free.txt │ └── src │ │ └── colt-1.2.0-free-src.jar ├── src │ ├── com │ │ └── ontotext │ │ │ └── jape │ │ │ ├── automaton │ │ │ ├── Automaton.java │ │ │ ├── AutomatonBuildHelp.java │ │ │ ├── AutomatonDeterminizationHelp.java │ │ │ ├── AutomatonMinimizationHelp.java │ │ │ ├── ClosedHashOfLabels.java │ │ │ ├── ClosedHashOfObjects.java │ │ │ ├── ClosedHashOfStrings.java │ │ │ ├── CodeInt.java │ │ │ ├── Constants.java │ │ │ ├── EpsilonClosure.java │ │ │ ├── GenericWholeArrray.java │ │ │ ├── IntSequence.java │ │ │ ├── TripleTransitions.java │ │ │ └── WholeSet.java │ │ │ └── pda │ │ │ ├── FSMPDA.java │ │ │ ├── SimpleSet.java │ │ │ ├── StatePDA.java │ │ │ └── TransitionPDA.java │ └── gate │ │ ├── gui │ │ └── jape │ │ │ └── plus │ │ │ └── Viewer.java │ │ ├── jape │ │ └── plus │ │ │ ├── Predicate.java │ │ │ ├── SPTBase.java │ │ │ ├── SPTBuilder.java │ │ │ └── Transducer.java │ │ └── resources │ │ └── img │ │ └── svg │ │ ├── JapePlus.svg │ │ └── JapePlusIcon.java └── test │ ├── test-contains.jape │ ├── test-contains.xml │ ├── test-not-contains.jape │ ├── test-not-within.jape │ ├── test-within.jape │ ├── test-within.xml │ ├── test_actioncontext.gapp │ ├── test_actioncontext.jape │ ├── test_actioncontext.session │ ├── test_controllerblocks.gapp │ ├── test_controllerblocks.jape │ ├── test_controllerblocks.session │ ├── testabc.jape │ ├── testabc.xml │ ├── testdoc.txt │ ├── testonto.doc.xml │ ├── testonto.gapp │ ├── testonto.jape │ ├── testonto.rdf.xml │ ├── testonto.session │ ├── zero-length-ann.jape │ └── zero-length-ann.xml └── Tools ├── .classpath ├── .project ├── build.xml ├── creole.xml ├── doc └── javadoc │ ├── allclasses-frame.html │ ├── allclasses-noframe.html │ ├── constant-values.html │ ├── deprecated-list.html │ ├── gate │ ├── configurableexporter │ │ ├── ConfigurableExporter.html │ │ ├── package-frame.html │ │ ├── package-summary.html │ │ └── package-tree.html │ └── qa │ │ ├── AnnotationDiffExporter.html │ │ ├── Measure.html │ │ ├── QualityAssurancePR.html │ │ ├── package-frame.html │ │ ├── package-summary.html │ │ └── package-tree.html │ ├── help-doc.html │ ├── index-all.html │ ├── index.html │ ├── overview-frame.html │ ├── overview-summary.html │ ├── overview-tree.html │ ├── package-list │ ├── resources │ └── inherit.gif │ ├── serialized-form.html │ └── stylesheet.css ├── resources ├── configurableexporter │ └── example.conf ├── morph │ ├── adj.dat │ ├── adv.dat │ ├── default.rul │ ├── noun.dat │ └── verb.dat └── schemas │ └── SyntaxTreeNodeSchema.xml ├── src └── gate │ ├── configurableexporter │ └── ConfigurableExporter.java │ └── qa │ ├── AnnotationDiffExporter.java │ ├── Measure.java │ └── QualityAssurancePR.java └── tools.jar /TJBatchExtractor/Documentation/TJBatchExtractor.aux: -------------------------------------------------------------------------------- 1 | \relax 2 | \@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}} 3 | \@writefile{toc}{\contentsline {section}{\numberline {2}Components}{1}} 4 | \@writefile{toc}{\contentsline {subsection}{\numberline {2.1}{TJBatchExtractor.java}}{2}} 5 | \@writefile{toc}{\contentsline {subsubsection}{\numberline {2.1.1}Outfile}{2}} 6 | \@writefile{toc}{\contentsline {section}{\numberline {3}{TJinformationExtractor}}{3}} 7 | \@writefile{toc}{\contentsline {subsection}{\numberline {3.1}{Integer Tagger}}{4}} 8 | \@writefile{toc}{\contentsline {subsection}{\numberline {3.2}{Phone Number Tagger}}{4}} 9 | \@writefile{toc}{\contentsline {subsection}{\numberline {3.3}{ANNIE Gazetteer}}{5}} 10 | \@writefile{toc}{\contentsline {subsection}{\numberline {3.4}{JAPE-Plus Transducer}}{5}} 11 | \@writefile{toc}{\contentsline {section}{\numberline {4}Performance}{5}} 12 | -------------------------------------------------------------------------------- /TJBatchExtractor/Documentation/TJBatchExtractor.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/Documentation/TJBatchExtractor.pdf -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/TJInfoExtractor.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/TJInfoExtractor.zip -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/body_parts.lst: -------------------------------------------------------------------------------- 1 | hair 2 | head 3 | eyes 4 | eye 5 | eyed 6 | lips 7 | lip 8 | skin 9 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/color.list: -------------------------------------------------------------------------------- 1 | blonde 2 | brown 3 | caramel 4 | hazel 5 | amber 6 | copper 7 | ruby 8 | burgundy 9 | cherry 10 | black 11 | honey 12 | golden 13 | auburn 14 | blond 15 | red 16 | chestnut 17 | gray 18 | grey 19 | white 20 | pink 21 | purple 22 | yellow 23 | straw 24 | orange 25 | blue 26 | green 27 | violet 28 | brunette 29 | strawberry 30 | flaming 31 | ginger 32 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/color.lst: -------------------------------------------------------------------------------- 1 | brown 2 | caramel 3 | hazel 4 | amber 5 | copper 6 | ruby 7 | burgundy 8 | cherry 9 | black 10 | honey 11 | golden 12 | auburn 13 | red 14 | chestnut 15 | gray 16 | grey 17 | white 18 | pink 19 | purple 20 | yellow 21 | straw 22 | orange 23 | blue 24 | green 25 | violet 26 | strawberry 27 | brwn 28 | grn 29 | blueish 30 | brownish 31 | greenish 32 | redish 33 | reddish 34 | burgandy 35 | fair 36 | ebony 37 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/color_pref.lst: -------------------------------------------------------------------------------- 1 | light 2 | dark 3 | lite 4 | sky 5 | sea 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/country_adj_plural.lst: -------------------------------------------------------------------------------- 1 | African Americans 2 | blacks 3 | AA 4 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/cup0.list: -------------------------------------------------------------------------------- 1 | double 2 | triple 3 | natural 4 | cup size 5 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/cup1.list: -------------------------------------------------------------------------------- 1 | A 2 | AA 3 | B 4 | C 5 | D 6 | DD 7 | DDD 8 | DDDD 9 | DEE 10 | As 11 | AAs 12 | Bs 13 | Cs 14 | Ds 15 | DDs 16 | DDDs 17 | DDDDs 18 | DEEs 19 | E 20 | F 21 | EE 22 | FF 23 | Es 24 | Fs 25 | G 26 | Gs 27 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/cup2.list: -------------------------------------------------------------------------------- 1 | cup 2 | cups 3 | 's 4 | breasts 5 | breast 6 | boob 7 | boobs 8 | tit 9 | tits 10 | boobies 11 | tities 12 | chest 13 | bust 14 | cupsize 15 | titties 16 | 'z 17 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/cup3.lst: -------------------------------------------------------------------------------- 1 | chest 2 | bust 3 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/cup4.lst: -------------------------------------------------------------------------------- 1 | waist 2 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/cup5.lst: -------------------------------------------------------------------------------- 1 | hip 2 | hips 3 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/currency: -------------------------------------------------------------------------------- 1 | dollar 2 | dollars 3 | dlr 4 | bucks 5 | dlrs 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/date_key.lst: -------------------------------------------------------------------------------- 1 | today 2 | yesterday 3 | tomorrow 4 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/date_unit.lst: -------------------------------------------------------------------------------- 1 | day 2 | days 3 | week 4 | weeks 5 | fortnight 6 | fortnights 7 | month 8 | months 9 | quarter 10 | quarters 11 | fiscal quarter 12 | fiscal quarters 13 | half 14 | first-half 15 | fiscal half 16 | year 17 | years 18 | financial year 19 | financial years 20 | fiscal year 21 | tax year 22 | decade 23 | decades 24 | century 25 | centuries 26 | half-year 27 | half year 28 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/day.lst: -------------------------------------------------------------------------------- 1 | Wednesday 2 | Wed. 3 | Wed 4 | Tuesday 5 | Tues. 6 | Tues 7 | Tue. 8 | Tue 9 | Thursday 10 | Thurs. 11 | Thurs 12 | Thur. 13 | Thur 14 | Thu. 15 | Thu 16 | Sunday 17 | Sun. 18 | Sun 19 | Saturday 20 | Sat. 21 | Sat 22 | Monday 23 | Mon. 24 | Mon 25 | Friday 26 | Fri. 27 | Fri 28 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/eye.lst: -------------------------------------------------------------------------------- 1 | eyes 2 | peepers 3 | eye 4 | -eyed 5 | eyed 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/facility_key.lst: -------------------------------------------------------------------------------- 1 | GARDENS 2 | Airbase 3 | Airport 4 | Avenue 5 | Barracks 6 | Base 7 | Building 8 | Cafe 9 | Camp 10 | Castle 11 | Center 12 | Centre 13 | Cinema 14 | Coliseum 15 | Common 16 | Forum 17 | Gardens 18 | GHQ 19 | General Head Quarters 20 | General Headquarters 21 | HQ 22 | Hall 23 | Head Quarters 24 | Headquarters 25 | Hotel 26 | House 27 | International Airport 28 | international airport 29 | Jail 30 | jail 31 | Museum 32 | Office 33 | Palace 34 | Park 35 | Pavilion 36 | Plant 37 | Playhouse 38 | playhouse 39 | Prison Camp 40 | Pub 41 | Ranch 42 | Refuge 43 | Refugee Camp 44 | Resort 45 | Restaurant 46 | School 47 | Ski Resort 48 | Square 49 | Station 50 | Stadium 51 | Street 52 | Theatre 53 | Theater 54 | Tower 55 | Trail 56 | trail 57 | Wall 58 | Zoo 59 | Federal Building 60 | federal building 61 | prison camp 62 | refugee camp 63 | school 64 | Hilton 65 | cemetery 66 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/firstpersonPronouns.lst: -------------------------------------------------------------------------------- 1 | I 2 | Im 3 | me 4 | my 5 | mine 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/greeting.lst: -------------------------------------------------------------------------------- 1 | Dear 2 | dear 3 | Dearest 4 | dearest 5 | Hello 6 | hello 7 | Hi 8 | hi 9 | Good morning 10 | good morning 11 | Hey 12 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/hair.list: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/hair.list -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/hair.lst: -------------------------------------------------------------------------------- 1 | brunette 2 | blonde 3 | blond 4 | strawberry blonde 5 | stawberry blond 6 | platinum blond 7 | platinum blonde 8 | redhead 9 | blondee 10 | blondie 11 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/hair2.list: -------------------------------------------------------------------------------- 1 | head 2 | hair 3 | colour hair 4 | color hair 5 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/hair2.lst: -------------------------------------------------------------------------------- 1 | hair 2 | head 3 | color hair 4 | colour hair 5 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/hair_pref.lst: -------------------------------------------------------------------------------- 1 | light 2 | dark 3 | dirty 4 | sandy 5 | lite 6 | flaming 7 | dirtty 8 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/height.lst: -------------------------------------------------------------------------------- 1 | foot 2 | feet 3 | ft 4 | ' 5 | fts 6 | foots 7 | feets 8 | ` 9 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/height2.lst: -------------------------------------------------------------------------------- 1 | " 2 | in 3 | inch 4 | inches 5 | inchs 6 | ins 7 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/hour.lst: -------------------------------------------------------------------------------- 1 | one 2 | two 3 | three 4 | four 5 | five 6 | six 7 | seven 8 | eight 9 | nine 10 | ten 11 | eleven 12 | twelve 13 | midnight 14 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/loc_generalkey.lst: -------------------------------------------------------------------------------- 1 | village 2 | city 3 | town 4 | province 5 | state 6 | island 7 | township 8 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/loc_key.lst: -------------------------------------------------------------------------------- 1 | sea 2 | ocean 3 | Wall 4 | Village 5 | Valley 6 | Trail 7 | Straits 8 | Strait 9 | Station 10 | Stadium 11 | Square 12 | Springs 13 | Sea 14 | Sands 15 | River 16 | Republic 17 | Province 18 | Plains 19 | Parkway 20 | Park 21 | Ocean 22 | Mountains 23 | Mountain 24 | Lakes 25 | Islands 26 | Island 27 | Hills 28 | Hill 29 | Heights 30 | Headquarters 31 | Head Quarters 32 | Harbor 33 | HQ 34 | General Headquarters 35 | General Head Quarters 36 | GHQ 37 | Fjord 38 | Everglades 39 | Estates 40 | District 41 | Desert 42 | Delta 43 | Creek 44 | County 45 | City 46 | Channel 47 | Castle 48 | Canal 49 | Boulevard 50 | Blvd. 51 | Belt 52 | Beach 53 | Bay Area 54 | Bay 55 | Basin 56 | Base 57 | Barracks 58 | Airbase 59 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/loc_prekey.lst: -------------------------------------------------------------------------------- 1 | Camp 2 | Cape 3 | Central 4 | East 5 | Eastern 6 | Fort 7 | Lake 8 | Mount 9 | North 10 | North-Eastern 11 | North-Western 12 | Northeast 13 | Northeastern 14 | Northern 15 | Northwest 16 | Northwestern 17 | Porto 18 | Puerto 19 | South 20 | South-Eastern 21 | South-Western 22 | Southeast 23 | Southeastern 24 | Southern 25 | Southwest 26 | Southwestern 27 | West 28 | Western 29 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/men.lst: -------------------------------------------------------------------------------- 1 | guy 2 | guys 3 | men 4 | man 5 | gentlemen 6 | gentleman 7 | boy 8 | boys 9 | male 10 | males 11 | gent 12 | gents 13 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/minutes.lst: -------------------------------------------------------------------------------- 1 | five 2 | ten 3 | fifteen 4 | twenty 5 | twenty five 6 | thirty 7 | thirty five 8 | forty 9 | forty five 10 | fifty 11 | fifty five 12 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/months.lst: -------------------------------------------------------------------------------- 1 | APR 2 | APR. 3 | APRIL 4 | AUG 5 | AUG. 6 | AUGUST 7 | AUTUMN 8 | Apr 9 | Apr. 10 | April 11 | Aug 12 | Aug. 13 | August 14 | Autumn 15 | DEC 16 | DEC. 17 | DECEMBER 18 | Dec 19 | Dec. 20 | December 21 | FEB 22 | FEB. 23 | FEBRUARY 24 | Feb 25 | Feb. 26 | February 27 | JAN 28 | JAN. 29 | JANUARY 30 | JUL 31 | JUL. 32 | JULY 33 | JUN 34 | JUN. 35 | JUNE 36 | Jan 37 | Jan. 38 | January 39 | Jul 40 | Jul. 41 | July 42 | Jun 43 | Jun. 44 | June 45 | MAR 46 | MAR. 47 | MARCH 48 | MAY 49 | Mar 50 | Mar. 51 | March 52 | May 53 | NOV 54 | NOV. 55 | NOVEMBER 56 | Nov 57 | Nov. 58 | November 59 | OCT 60 | OCT. 61 | OCTOBER 62 | Oct 63 | Oct. 64 | October 65 | SEP 66 | SEP. 67 | SEPT 68 | SEPT. 69 | SEPTEMBER 70 | Sep 71 | Sep. 72 | Sept 73 | Sept. 74 | September 75 | Summer 76 | Winter 77 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/name_pre.lst: -------------------------------------------------------------------------------- 1 | Ms 2 | Ms. 3 | Mrs. 4 | Mrs 5 | Miz 6 | Mz. 7 | Mz 8 | Miz. 9 | name is 10 | name's 11 | they call me 12 | I'm called 13 | Im called 14 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/numbers2.lst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/numbers2.lst -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/percent.lst: -------------------------------------------------------------------------------- 1 | % 2 | percent 3 | per cent 4 | precent 5 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/person_ambig.lst: -------------------------------------------------------------------------------- 1 | Silver 2 | Dame 3 | Jan 4 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/person_ending.lst: -------------------------------------------------------------------------------- 1 | Jr 2 | Jr. 3 | jr 4 | jr. 5 | Sr 6 | Sr. 7 | sr 8 | sr. 9 | II 10 | III 11 | Q.C. 12 | QC 13 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/person_full.lst: -------------------------------------------------------------------------------- 1 | Bach 2 | Brahms 3 | Carter 4 | Clinton 5 | Gaugin 6 | Gauguin 7 | George Bush 8 | Hillary Clinton 9 | Hillary Rodham Clinton 10 | John the Baptist 11 | Margaret Thatcher 12 | Messiaen 13 | Mozart 14 | Nixon 15 | Pope John Paul II 16 | Richard Nixon 17 | Ronald Reagan 18 | Saddam Hussain 19 | Saint-Saens 20 | Shostakovich 21 | Strauss 22 | Thatcher 23 | The Clintons 24 | The Queen 25 | the Queen 26 | Van Gogh 27 | Virgin Mary 28 | Vivaldi 29 | the Clintons 30 | van Gogh 31 | Barack Obama 32 | Obama 33 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/person_relig.lst: -------------------------------------------------------------------------------- 1 | God 2 | Jesus 3 | Jesus Christ 4 | Lord Jesus Christ 5 | Lord God 6 | the Holy Ghost 7 | the Holy Spirit 8 | Christ 9 | Nicademus 10 | Nicodemus 11 | Anzakias 12 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/phone_prefix.lst: -------------------------------------------------------------------------------- 1 | Phone 2 | phone 3 | Telephone 4 | telephone 5 | Tel. 6 | Tel 7 | tel. 8 | tel 9 | Fax 10 | fax 11 | no. 12 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/service_event.lst: -------------------------------------------------------------------------------- 1 | quickie 2 | quick 3 | stop by 4 | overnighter 5 | special 6 | specials 7 | spcl 8 | spl 9 | $pecial 10 | $pecials 11 | $pecial$ 12 | $pcl 13 | $pl 14 | blo 15 | blow 16 | incall 17 | incalls 18 | outcall 19 | outcalls 20 | specialz 21 | $pecialz 22 | in call 23 | in calls 24 | out call 25 | out calls 26 | spls 27 | spcls 28 | splz 29 | spclz 30 | $pls 31 | $plz 32 | $pcls 33 | short stay 34 | ss 35 | in-call 36 | in-calls 37 | out-call 38 | out-calls 39 | donation 40 | spe 41 | $pe 42 | spec 43 | $pec 44 | session 45 | overnights 46 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/service_massage.lst: -------------------------------------------------------------------------------- 1 | Nuru 2 | Massage 3 | tissue 4 | deep tissue 5 | deep-tissue 6 | shiatsu 7 | hot oil 8 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/skin.lst: -------------------------------------------------------------------------------- 1 | skin 2 | skinn 3 | skinned 4 | skined 5 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/skin2.lst: -------------------------------------------------------------------------------- 1 | redbone 2 | red bone 3 | white 4 | black 5 | ebony 6 | caramel 7 | brown 8 | brwn 9 | carmel 10 | blk 11 | caucasian 12 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/skin3.lst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/skin3.lst -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/sports.lst: -------------------------------------------------------------------------------- 1 | tennis 2 | TENNIS 3 | skiing 4 | SKIING 5 | skating 6 | SKATING 7 | gymnastics 8 | GYMNASTICS 9 | swimming 10 | SWIMMING 11 | football 12 | FOOTBALL 13 | rugby 14 | RUGBY 15 | golf 16 | GOLF 17 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/stop.lst: -------------------------------------------------------------------------------- 1 | About 2 | Because 3 | By 4 | Can 5 | First 6 | For 7 | He 8 | Here 9 | I 10 | It 11 | May 12 | Next 13 | Of 14 | Perhaps 15 | Please 16 | Shall 17 | She 18 | So 19 | That 20 | Then 21 | They 22 | We 23 | Will 24 | Without 25 | You 26 | And 27 | From 28 | With 29 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/stop2.lst: -------------------------------------------------------------------------------- 1 | a 2 | again 3 | an 4 | any 5 | are 6 | aren't 7 | at 8 | be 9 | been 10 | being 11 | can't 12 | cannot 13 | do 14 | doing 15 | don't 16 | down 17 | during 18 | each 19 | few 20 | haven't 21 | having 22 | he 23 | he'd 24 | he'll 25 | her 26 | here 27 | hers 28 | him 29 | himself 30 | his 31 | how's 32 | i 33 | i'd 34 | i'll 35 | i've 36 | if 37 | in 38 | isn't 39 | it 40 | itself 41 | let's 42 | more 43 | most 44 | mustn't 45 | my 46 | no 47 | nor 48 | off 49 | other 50 | ought 51 | ours 52 | ourselves 53 | out 54 | shan't 55 | she 56 | she'd 57 | she'll 58 | some 59 | such 60 | that 61 | the 62 | their 63 | them 64 | themselves 65 | there 66 | these 67 | they 68 | they'd 69 | they'll 70 | they're 71 | they've 72 | this 73 | those 74 | up 75 | very 76 | we 77 | we'd 78 | we'll 79 | we've 80 | what 81 | which 82 | who 83 | whom 84 | you 85 | you'd 86 | you'll 87 | you're 88 | you've 89 | your 90 | yours 91 | yourself 92 | yourselves 93 | 94 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/stop_identifier.lst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/stop_identifier.lst -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/street.lst: -------------------------------------------------------------------------------- 1 | Avenue 2 | Crescent 3 | Hill 4 | Lane 5 | Rise 6 | Road 7 | Row 8 | Street 9 | Way 10 | Ave 11 | St 12 | Rd 13 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/surname_prefix.lst: -------------------------------------------------------------------------------- 1 | de 2 | De 3 | del 4 | Del 5 | d' 6 | D' 7 | O' 8 | von 9 | Von 10 | van 11 | Van 12 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/thirdpersonPronouns.lst: -------------------------------------------------------------------------------- 1 | she 2 | her 3 | hers 4 | shes 5 | this girl 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/time.lst: -------------------------------------------------------------------------------- 1 | midday 2 | midnight 3 | noon 4 | dawn 5 | dusk 6 | sunrise 7 | sunset 8 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/time_ampm.lst: -------------------------------------------------------------------------------- 1 | a.m. 2 | A M 3 | AM 4 | A.M. 5 | pm 6 | p.m. 7 | PM 8 | P.M. 9 | P M 10 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/time_day.lst: -------------------------------------------------------------------------------- 1 | afternoon 2 | evening 3 | morning 4 | night 5 | overnight 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/time_hr.list: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/time_hr.list -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/time_hr.lst: -------------------------------------------------------------------------------- 1 | fh 2 | full hr 3 | full h 4 | full hour 5 | h 6 | hr 7 | hour 8 | hourly 9 | hours 10 | hh 11 | hhr 12 | half hr 13 | half hour 14 | half h 15 | whole hour 16 | whole h 17 | whole hr 18 | wh 19 | whr 20 | full 21 | hlf 22 | half 23 | hrs 24 | hlf hr 25 | hf 26 | hf hr 27 | hf hour 28 | hfh 29 | hrly 30 | f 31 | q 32 | qrt 33 | quarter 34 | fullhour 35 | fullhr 36 | fullh 37 | halfh 38 | halfhr 39 | halfhour 40 | wholeh 41 | wholehr 42 | wholehour 43 | a half 44 | a half hour 45 | a half hr 46 | a hr 47 | a hour 48 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/time_min.list: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/time_min.list -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/time_min.lst: -------------------------------------------------------------------------------- 1 | m 2 | min 3 | mins 4 | minute 5 | minutes 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/time_modifier.lst: -------------------------------------------------------------------------------- 1 | coming 2 | last 3 | next 4 | past 5 | previous 6 | this 7 | Last 8 | Next 9 | This 10 | forthcoming 11 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/time_unit.lst: -------------------------------------------------------------------------------- 1 | hour 2 | hours 3 | minute 4 | minutes 5 | second 6 | seconds 7 | morning 8 | afternoon 9 | evening 10 | night 11 | hh 12 | hhr 13 | hr 14 | h 15 | wh 16 | whr 17 | overnight 18 | min 19 | hourly 20 | m 21 | fh 22 | half hour 23 | half hr 24 | full hour 25 | full hr 26 | half h 27 | full h 28 | mins 29 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/time_unithr.lst: -------------------------------------------------------------------------------- 1 | 1/2 hour 2 | 0.5 hour 3 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/title_female.lst: -------------------------------------------------------------------------------- 1 | Madam 2 | Madame 3 | Mademoiselle 4 | Miss 5 | Mlle 6 | Mme 7 | Mme. 8 | Mrs 9 | Mrs. 10 | Ms 11 | Ms. 12 | Sister 13 | Spokeswoman 14 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/title_male.lst: -------------------------------------------------------------------------------- 1 | Marquis 2 | Messr 3 | Messr. 4 | Monsieur 5 | Mr 6 | Mr. 7 | Prince 8 | Rabbi 9 | Sheik 10 | Sir 11 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/weight.lst: -------------------------------------------------------------------------------- 1 | pounds 2 | lbs 3 | lb 4 | pound 5 | Ibs 6 | Ib 7 | p 8 | pds 9 | pd 10 | wt 11 | weigh 12 | weight 13 | wht 14 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/year.lst: -------------------------------------------------------------------------------- 1 | 1970 2 | 1971 3 | 1972 4 | 1973 5 | 1974 6 | 1975 7 | 1976 8 | 1977 9 | 1978 10 | 1979 11 | 1980 12 | 1981 13 | 1982 14 | 1983 15 | 1984 16 | 1985 17 | 1986 18 | 1987 19 | 1988 20 | 1989 21 | 1990 22 | 1991 23 | 1992 24 | 1993 25 | 1994 26 | 1995 27 | 1996 28 | 1997 29 | 1998 30 | 1999 31 | 2000 32 | 2001 33 | 2002 34 | 2003 35 | 2004 36 | 2005 37 | 2006 38 | 2007 39 | 2008 40 | 2009 41 | 2010 42 | 2011 43 | 2012 44 | 2013 45 | 2014 46 | 2015 47 | 2016 48 | 2017 49 | 2018 50 | 2019 51 | 2020 52 | 2021 53 | 2022 54 | 2023 55 | 2024 56 | 2025 57 | 2026 58 | 2027 59 | 2028 60 | 2029 61 | 2030 62 | 2031 63 | 2032 64 | 2033 65 | 2034 66 | 2035 67 | 2036 68 | 2037 69 | 2038 70 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/TJ_Gazetteer/yearsold.lst: -------------------------------------------------------------------------------- 1 | years 2 | year 3 | y.o. 4 | yo 5 | y.o 6 | yrs 7 | yr 8 | years old 9 | years of age 10 | yrs old 11 | yrs of age 12 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/jape_transducers/.cost.jape.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/application-resources/jape_transducers/.cost.jape.swp -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/jape_transducers/TJ_Annotation.jape: -------------------------------------------------------------------------------- 1 | /* 2 | File: TJ_Annotation.jape 3 | Author: Kyle Miller 4 | Created: July, 2014 5 | Description: a GATE JAPE file that attempts to annotate information in text 6 | 7 | Copyright (C) 2014, Carnegie Mellon University 8 | */ 9 | /* 10 | * This file is free software, 11 | * licenced under the GNU Lesser General Public License, Version 3, June 2007 12 | * (in the distribution as file licence.html) 13 | * 14 | */ 15 | MultiPhase: TJIE 16 | Phases: 17 | restrictions 18 | age 19 | cleanNumbers 20 | cleanNumbers2 21 | percent 22 | time 23 | time2 24 | time3 25 | measurements 26 | heightweight 27 | heightweight2 28 | cost_duration 29 | cost_dollarsign 30 | cost_unitless 31 | haireyeskin 32 | ethnicity 33 | Names 34 | dotcom 35 | email 36 | url_pre 37 | url 38 | cleandups 39 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/jape_transducers/TJ_Annotation.jape~: -------------------------------------------------------------------------------- 1 | /* 2 | File: TJ_Annotation.jape 3 | Author: Kyle Miller 4 | Created: July, 2014 5 | Description: a GATE JAPE file that attempts to annotate information in text 6 | 7 | Copyright (C) 2014, Carnegie Mellon University 8 | */ 9 | /* 10 | * This file is free software, 11 | * licenced under the GNU Library General Public License, Version 3, June 2007 12 | * (in the distribution as file licence.html) 13 | * 14 | */ 15 | MultiPhase: TJIE 16 | Phases: 17 | restrictions 18 | age 19 | cleanNumbers 20 | cleanNumbers2 21 | percent 22 | time 23 | time2 24 | time3 25 | measurements 26 | heightweight 27 | heightweight2 28 | cost_duration 29 | cost_dollarsign 30 | cost_unitless 31 | haireyeskin 32 | ethnicity 33 | Names 34 | dotcom 35 | email 36 | url_pre 37 | url 38 | cleandups 39 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/jape_transducers/ethnicity.jape: -------------------------------------------------------------------------------- 1 | /* 2 | File: ethnicity.jape 3 | Author: Kyle Miller 4 | Created: July, 2014 5 | Description: a GATE JAPE file that attempts to annotate information in text 6 | 7 | Copyright (C) 2014, Carnegie Mellon University 8 | */ 9 | /* 10 | * This file is free software, 11 | * licenced under the GNU Lesser General Public License, Version 3, June 2007 12 | * (in the distribution as file licence.html) 13 | * 14 | */ 15 | 16 | Phase:ethnicity 17 | Input: Token Lookup 18 | Options: control = all negationGrouping = false 19 | 20 | Rule: ethnicity0 21 | ({Token, !Token.string ==~ "(?i)no", !Lookup.minorType == massageservice})[3] 22 | ( { Lookup.majorType == country_adj } ):eth 23 | ({Token, !Token.string ==~ "(?i)americans", !Lookup.majorType==men, !Lookup.minorType == massageservice}) 24 | ({Token, !Lookup.majorType==men, !Lookup.minorType == massageservice})[3] 25 | --> 26 | :eth.Ethnicity = { rule = ethnicity0, ethnicity = :eth@string } 27 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/jape_transducers/ethnicity.jape~: -------------------------------------------------------------------------------- 1 | /* 2 | File: ethnicity.jape 3 | Author: Kyle Miller 4 | Created: July, 2014 5 | Description: a GATE JAPE file that attempts to annotate information in text 6 | 7 | Copyright (C) 2014, Carnegie Mellon University 8 | */ 9 | /* 10 | * This file is free software, 11 | * licenced under the GNU Library General Public License, Version 3, June 2007 12 | * (in the distribution as file licence.html) 13 | * 14 | */ 15 | 16 | Phase:ethnicity 17 | Input: Token Lookup 18 | Options: control = all negationGrouping = false 19 | 20 | Rule: ethnicity0 21 | ({Token, !Token.string ==~ "(?i)no", !Lookup.minorType == massageservice})[3] 22 | ( { Lookup.majorType == country_adj } ):eth 23 | ({Token, !Token.string ==~ "(?i)americans", !Lookup.majorType==men, !Lookup.minorType == massageservice}) 24 | ({Token, !Lookup.majorType==men, !Lookup.minorType == massageservice})[3] 25 | --> 26 | :eth.Ethnicity = { rule = ethnicity0, ethnicity = :eth@string } 27 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/jape_transducers/percent.jape: -------------------------------------------------------------------------------- 1 | /* 2 | File: percent.jape 3 | Author: Kyle Miller 4 | Created: July, 2014 5 | Description: a GATE JAPE file that attempts to annotate information in text 6 | 7 | Copyright (C) 2014, Carnegie Mellon University 8 | */ 9 | /* 10 | * This file is free software, 11 | * licenced under the GNU Lesser General Public License, Version 3, June 2007 12 | * (in the distribution as file licence.html) 13 | * 14 | */ 15 | Phase:percent 16 | Input: Token Integer Lookup 17 | Options: control = appelt 18 | 19 | Rule: PercentClean 20 | ({ Integer.type == numbers } {Lookup.majorType==percent}):num 21 | --> 22 | :num.Percent = {rule = PercentClean} 23 | 24 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/jape_transducers/percent.jape~: -------------------------------------------------------------------------------- 1 | /* 2 | File: percent.jape 3 | Author: Kyle Miller 4 | Created: July, 2014 5 | Description: a GATE JAPE file that attempts to annotate information in text 6 | 7 | Copyright (C) 2014, Carnegie Mellon University 8 | */ 9 | /* 10 | * This file is free software, 11 | * licenced under the GNU Library General Public License, Version 3, June 2007 12 | * (in the distribution as file licence.html) 13 | * 14 | */ 15 | Phase:percent 16 | Input: Token Integer Lookup 17 | Options: control = appelt 18 | 19 | Rule: PercentClean 20 | ({ Integer.type == numbers } {Lookup.majorType==percent}):num 21 | --> 22 | :num.Percent = {rule = PercentClean} 23 | 24 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/jape_transducers/time3.jape: -------------------------------------------------------------------------------- 1 | /* 2 | File: time3.jape 3 | Author: Kyle Miller 4 | Created: July, 2014 5 | Description: a GATE JAPE file that attempts to annotate information in text 6 | 7 | Copyright (C) 2014, Carnegie Mellon University 8 | */ 9 | /* 10 | * This file is free software, 11 | * licenced under the GNU Lesser General Public License, Version 3, June 2007 12 | * (in the distribution as file licence.html) 13 | * 14 | */ 15 | Phase:timepass3 16 | Input: Lookup Integer 17 | Options: control = first 18 | 19 | Rule: cleanPhoneNumber 20 | ({Lookup.majorType == time_unit, !Lookup within Integer}):tm 21 | --> 22 | { 23 | AnnotationSet tmSet = bindings.get("tm"); 24 | Long start = tmSet.firstNode().getOffset(); 25 | Long end = tmSet.lastNode().getOffset(); 26 | 27 | AnnotationSet toremove = inputAS.get(start,end).get("PhoneNumber"); 28 | if( toremove!=null) inputAS.removeAll(toremove); 29 | } 30 | 31 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/jape_transducers/time3.jape~: -------------------------------------------------------------------------------- 1 | /* 2 | File: time3.jape 3 | Author: Kyle Miller 4 | Created: July, 2014 5 | Description: a GATE JAPE file that attempts to annotate information in text 6 | 7 | Copyright (C) 2014, Carnegie Mellon University 8 | */ 9 | /* 10 | * This file is free software, 11 | * licenced under the GNU Library General Public License, Version 3, June 2007 12 | * (in the distribution as file licence.html) 13 | * 14 | */ 15 | Phase:timepass3 16 | Input: Lookup Integer 17 | Options: control = first 18 | 19 | Rule: cleanPhoneNumber 20 | ({Lookup.majorType == time_unit, !Lookup within Integer}):tm 21 | --> 22 | { 23 | AnnotationSet tmSet = bindings.get("tm"); 24 | Long start = tmSet.firstNode().getOffset(); 25 | Long end = tmSet.lastNode().getOffset(); 26 | 27 | AnnotationSet toremove = inputAS.get(start,end).get("PhoneNumber"); 28 | if( toremove!=null) inputAS.removeAll(toremove); 29 | } 30 | 31 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/jape_transducers/url_pre.jape: -------------------------------------------------------------------------------- 1 | /* 2 | File: url_pre.jape 3 | Author: Kyle Miller 4 | Created: July, 2014 5 | Description: a GATE JAPE file that attempts to annotate information in text 6 | 7 | Copyright (C) 2014, Carnegie Mellon University 8 | */ 9 | /* 10 | * This file is free software, 11 | * licenced under the GNU Lesser General Public License, Version 3, June 2007 12 | * (in the distribution as file licence.html) 13 | * 14 | */ 15 | Phase: UrlPre 16 | Input: Token SpaceToken DotCom 17 | Options: control = appelt 18 | 19 | Rule: Urlpre 20 | 21 | ( (({Token.string ==~ "(?i)http[s]?"} | 22 | {Token.string ==~ "(?i)[s]?ftp"}) 23 | {Token.string == ":"} 24 | {Token.string == "/"} 25 | {Token.string == "/"} 26 | ) | 27 | ({Token.string ==~ "(?i)www"} 28 | ({Token.string == "."} | {DotCom.kind == dot}) 29 | ) 30 | ):urlpre 31 | --> 32 | :urlpre.UrlPre = {rule = "UrlPre"} 33 | 34 | 35 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/application-resources/jape_transducers/url_pre.jape~: -------------------------------------------------------------------------------- 1 | /* 2 | File: url_pre.jape 3 | Author: Kyle Miller 4 | Created: July, 2014 5 | Description: a GATE JAPE file that attempts to annotate information in text 6 | 7 | Copyright (C) 2014, Carnegie Mellon University 8 | */ 9 | /* 10 | * This file is free software, 11 | * licenced under the GNU Library General Public License, Version 3, June 2007 12 | * (in the distribution as file licence.html) 13 | * 14 | */ 15 | Phase: UrlPre 16 | Input: Token SpaceToken DotCom 17 | Options: control = appelt 18 | 19 | Rule: Urlpre 20 | 21 | ( (({Token.string ==~ "(?i)http[s]?"} | 22 | {Token.string ==~ "(?i)[s]?ftp"}) 23 | {Token.string == ":"} 24 | {Token.string == "/"} 25 | {Token.string == "/"} 26 | ) | 27 | ({Token.string ==~ "(?i)www"} 28 | ({Token.string == "."} | {DotCom.kind == dot}) 29 | ) 30 | ):urlpre 31 | --> 32 | :urlpre.UrlPre = {rule = "UrlPre"} 33 | 34 | 35 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/.annie-defaults-metadata/ann-job-annie.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/.annie-defaults-metadata/ann-job-annie.png -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/.annie-defaults-metadata/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/.annie-defaults-metadata/icon.png -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/.annie-defaults-metadata/long-desc.html: -------------------------------------------------------------------------------- 1 |

ANNIE is 2 | a named entity recognition pipeline that identifies basic entity types, such 3 | as Person, Location, Organization, Money 4 | amounts, Time and Date expressions.

5 | 6 |

It is the prototypical information extraction pipeline distributed 7 | with the GATE framework and forms the base of 8 | many more complex GATE-based IE applications.

9 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/.annie-defaults-metadata/metadata.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | ANNIE Named Entity Recognizer 4 | :Address, :Date, :Location, :Organization, :Person 5 | 6 | English 7 | Named Entity 8 | 9 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/.annie-defaults-metadata/short-desc.html: -------------------------------------------------------------------------------- 1 | The ANNIE named entity recognition service. Identifies names of 2 | persons, locations, organizations, as well as 3 | money amounts, time and date expressions. -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/NE/jobtitle.jape: -------------------------------------------------------------------------------- 1 | /* 2 | * jobtitle.jape 3 | * 4 | * Copyright (c) 1998-2004, The University of Sheffield. 5 | * 6 | * This file is part of GATE (see http://gate.ac.uk/), and is free 7 | * software, licenced under the GNU Library General Public License, 8 | * Version 2, June 1991 (in the distribution as file licence.html, 9 | * and also available at http://gate.ac.uk/gate/licence.html). 10 | * 11 | * Diana Maynard, 10 Sep 2001 12 | * 13 | * $Id: jobtitle.jape 5921 2004-07-21 17:00:37Z akshay $ 14 | */ 15 | 16 | 17 | Phase: Jobtitle 18 | Input: Lookup Token 19 | Options: control = appelt 20 | 21 | Rule: Jobtitle1 22 | ( 23 | {Lookup.majorType == jobtitle} 24 | ( 25 | {Lookup.majorType == jobtitle} 26 | )? 27 | ) 28 | :jobtitle 29 | --> 30 | :jobtitle.JobTitle = {rule = "JobTitle1"} 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/NE/main.jape: -------------------------------------------------------------------------------- 1 | /* 2 | * main.jape 3 | * 4 | * Copyright (c) 1998-2004, The University of Sheffield. 5 | * 6 | * This file is part of GATE (see http://gate.ac.uk/), and is free 7 | * software, licenced under the GNU Library General Public License, 8 | * Version 2, June 1991 (in the distribution as file licence.html, 9 | * and also available at http://gate.ac.uk/gate/licence.html). 10 | * 11 | * Diana Maynard, 02 Aug 2001 12 | * 13 | * $Id: main.jape 9233 2007-11-23 13:01:52Z dgmaynard $ 14 | */ 15 | 16 | MultiPhase: TestTheGrammars 17 | Phases: 18 | first 19 | firstname 20 | name 21 | name_post 22 | date_pre 23 | date 24 | reldate 25 | number 26 | address 27 | url_pre 28 | url 29 | email 30 | identifier 31 | jobtitle 32 | final 33 | unknown 34 | name_context 35 | org_context 36 | loc_context 37 | clean 38 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/NE/unknown.jape: -------------------------------------------------------------------------------- 1 | /* 2 | * unknown.jape 3 | * 4 | * Copyright (c) 1998-2004, The University of Sheffield. 5 | * 6 | * This file is part of GATE (see http://gate.ac.uk/), and is free 7 | * software, licenced under the GNU Library General Public License, 8 | * Version 2, June 1991 (in the distribution as file licence.html, 9 | * and also available at http://gate.ac.uk/gate/licence.html). 10 | * 11 | * Diana Maynard, 10 Sep 2001 12 | * 13 | * $Id: unknown.jape 7736 2006-10-21 23:29:08Z johann_p $ 14 | */ 15 | 16 | Phase: Unknown 17 | Input: Location Person Date Organization Address Money Percent Token JobTitle Lookup 18 | Options: control = appelt 19 | 20 | 21 | Rule: Known 22 | Priority: 100 23 | ( 24 | {Location}| 25 | {Person}| 26 | {Date}| 27 | {Organization}| 28 | {Address}| 29 | {Money} | 30 | {Percent}| 31 | {Token.string == "Dear"}| 32 | {JobTitle}| 33 | {Lookup} 34 | ):known 35 | --> 36 | {} 37 | 38 | 39 | Rule:Unknown 40 | Priority: 50 41 | ( 42 | {Token.category == NNP} 43 | ) 44 | :unknown 45 | --> 46 | :unknown.Unknown = {kind = "PN", rule = Unknown} 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/NE/url_pre.jape: -------------------------------------------------------------------------------- 1 | /* 2 | * url_pre.jape 3 | * 4 | * Copyright (c) 1998-2004, The University of Sheffield. 5 | * 6 | * This file is part of GATE (see http://gate.ac.uk/), and is free 7 | * software, licenced under the GNU Library General Public License, 8 | * Version 2, June 1991 (in the distribution as file licence.html, 9 | * and also available at http://gate.ac.uk/gate/licence.html). 10 | * 11 | * Diana Maynard, 19 April 2001 12 | * 13 | * $Id: url_pre.jape 5921 2004-07-21 17:00:37Z akshay $ 14 | */ 15 | 16 | Phase: UrlPre 17 | Input: Token SpaceToken 18 | Options: control = appelt 19 | 20 | Rule: Urlpre 21 | 22 | ( (({Token.string == "http"} | 23 | {Token.string == "ftp"}) 24 | {Token.string == ":"} 25 | {Token.string == "/"} 26 | {Token.string == "/"} 27 | ) | 28 | ({Token.string == "www"} 29 | {Token.string == "."} 30 | ) 31 | ):urlpre 32 | --> 33 | :urlpre.UrlPre = {rule = "UrlPre"} 34 | 35 | 36 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/abbreviations.lst: -------------------------------------------------------------------------------- 1 | MOT 2 | M.O.T. 3 | M O T 4 | M. O. T. -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/adbc.lst: -------------------------------------------------------------------------------- 1 | ad 2 | bc 3 | a.d. 4 | b.c. 5 | AD 6 | BC 7 | A.D. 8 | B.C. 9 | A D 10 | B C 11 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/airport.lst: -------------------------------------------------------------------------------- 1 | Charles de Gaulle 2 | Heathrow 3 | London Heathrow 4 | Schipol 5 | JFK 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/airports.lst: -------------------------------------------------------------------------------- 1 | Heathrow 2 | Washington Dulles 3 | Paris Charles de Gaulle 4 | Luton 5 | Stansted 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/charities.lst: -------------------------------------------------------------------------------- 1 | ACET 2 | Acet 3 | AIDS Care Education and Training 4 | AIDS Care Education & Training 5 | BDA 6 | Body Positive 7 | Children In Need 8 | Children in Need 9 | CAH 10 | Christian Aids Help 11 | JDF 12 | JDF International 13 | Heartwatch 14 | IP-UK 15 | Insulin Pumpers UK 16 | Insulin Pumpers 17 | Diabetes UK 18 | PALS 19 | P.A.L.S. 20 | Portsmouth AIDS Link Support 21 | Q.U.I.T. 22 | QUIT 23 | RNIB 24 | RNCB 25 | Salvation Army 26 | Scottish Aids Monitor 27 | the Link Project 28 | TEAR Fund 29 | YMCA 30 | YWCA 31 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/colours.lst: -------------------------------------------------------------------------------- 1 | aqua 2 | beige 3 | black 4 | blue 5 | brown 6 | crimson 7 | cyan 8 | gray 9 | green 10 | grey 11 | indigo 12 | khaki 13 | maroon 14 | pink 15 | purple 16 | red 17 | white 18 | yellow -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/country_abbrev.lst: -------------------------------------------------------------------------------- 1 | AUS -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/currency: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/currency -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/currency_prefix.lst: -------------------------------------------------------------------------------- 1 | US 2 | NZ 3 | NT 4 | AUS 5 | U.S. 6 | U.S. 7 | N.Z. 8 | N.Z. 9 | nz 10 | aus 11 | FF 12 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/date.lst: -------------------------------------------------------------------------------- 1 | hier 2 | 3 | hiver 4 | 5 | demain 6 | 7 | aujourd'hui 8 | 9 | été 10 | 11 | printemps 12 | 13 | automne 14 | 15 | Hier 16 | 17 | Hiver 18 | 19 | Demain 20 | 21 | Aujourd'hui 22 | 23 | Été 24 | 25 | Eté 26 | 27 | Printemps 28 | 29 | Automne 30 | 31 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/date_key.lst: -------------------------------------------------------------------------------- 1 | today 2 | yesterday 3 | tomorrow -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/date_pre.lst: -------------------------------------------------------------------------------- 1 | past 2 | mid- 3 | mid - 4 | end of 5 | almost 6 | all of 7 | Past 8 | Almost 9 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/date_unit.lst: -------------------------------------------------------------------------------- 1 | day 2 | days 3 | week 4 | weeks 5 | fortnight 6 | fortnights 7 | month 8 | months 9 | quarter 10 | quarters 11 | fiscal quarter 12 | fiscal quarters 13 | half 14 | first-half 15 | fiscal half 16 | year 17 | years 18 | financial year 19 | financial years 20 | fiscal year 21 | tax year 22 | decade 23 | decades 24 | century 25 | centuries 26 | half-year 27 | half year 28 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/datespan.lst: -------------------------------------------------------------------------------- 1 | wks 2 | weeks 3 | months 4 | milleniums 5 | millenia 6 | decades 7 | days 8 | centuries 9 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/day.lst: -------------------------------------------------------------------------------- 1 | Wednesday 2 | Wed. 3 | Wed 4 | Tuesday 5 | Tues. 6 | Tues 7 | Tue. 8 | Tue 9 | Thursday 10 | Thurs. 11 | Thurs 12 | Thur. 13 | Thur 14 | Thu. 15 | Thu 16 | Sunday 17 | Sun. 18 | Sun 19 | Saturday 20 | Sat. 21 | Sat 22 | Monday 23 | Mon. 24 | Mon 25 | Friday 26 | Fri. 27 | Fri -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/day_cap.lst: -------------------------------------------------------------------------------- 1 | FRI 2 | FRI. 3 | FRIDAY 4 | MON. 5 | MON 6 | MONDAY 7 | SAT. 8 | SAT 9 | SATURDAY 10 | SUN. 11 | SUN 12 | SUNDAY 13 | THU. 14 | THU 15 | THUR. 16 | THUR 17 | THURS. 18 | THURS 19 | THURSDAY 20 | TUE. 21 | TUE 22 | TUES. 23 | TUES 24 | TUESDAY 25 | WED. 26 | WED 27 | WEDNESDAY 28 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/department.lst: -------------------------------------------------------------------------------- 1 | Treasury Department 2 | Treasury 3 | Transportation Department 4 | TREASURY 5 | State Department 6 | Labor Department 7 | Justice Department 8 | Interior Department 9 | Highway Department 10 | Department of the Interior 11 | Department of Treasury 12 | Department of Transportation 13 | Department of Trade and Industry 14 | Department of Labor 15 | Department of Justice 16 | Department of Interior 17 | Department of Housing and Community Development 18 | Department of Health and Human Services 19 | Department of Commerce 20 | Department of Agriculture 21 | Commerce Department 22 | Agriculture Department 23 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/determiner.lst: -------------------------------------------------------------------------------- 1 | A 2 | a 3 | The 4 | the 5 | One 6 | one -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/facility.lst: -------------------------------------------------------------------------------- 1 | Angkor Wat 2 | Broadway 3 | Watergate 4 | White House 5 | Notre Dame 6 | Kremlin 7 | Pentagon 8 | Auschwitz 9 | Radwaniyah 10 | McDonalds 11 | McDonald's 12 | New York Stock Exchange 13 | Mir 14 | Storgata 15 | Madison Square Garden 16 | Salman Pak 17 | Maple Leaf Gardens 18 | Saint-Germain-des-Pres Church 19 | Highgrove 20 | The Osborn 21 | East 21st Street 22 | 30th Precinct 23 | Cafe Centro 24 | Tropica 25 | Assembly Drive 26 | British Museum 27 | Cleveland Row 28 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/facility_key.lst: -------------------------------------------------------------------------------- 1 | GARDENS 2 | Airbase 3 | Airport 4 | Avenue 5 | Barracks 6 | Base 7 | Building 8 | Cafe 9 | Camp 10 | Castle 11 | Center 12 | Centre 13 | Cinema 14 | Coliseum 15 | Common 16 | Forum 17 | Gardens 18 | GHQ 19 | General Head Quarters 20 | General Headquarters 21 | HQ 22 | Hall 23 | Head Quarters 24 | Headquarters 25 | Hotel 26 | House 27 | International Airport 28 | international airport 29 | Jail 30 | jail 31 | Museum 32 | Office 33 | Palace 34 | Park 35 | Pavilion 36 | Plant 37 | Playhouse 38 | playhouse 39 | Prison Camp 40 | Pub 41 | Ranch 42 | Refuge 43 | Refugee Camp 44 | Resort 45 | Restaurant 46 | School 47 | Ski Resort 48 | Square 49 | Station 50 | Stadium 51 | Street 52 | Theatre 53 | Theater 54 | Tower 55 | Trail 56 | trail 57 | Wall 58 | Zoo 59 | Federal Building 60 | federal building 61 | prison camp 62 | refugee camp 63 | school 64 | Hilton 65 | cemetery 66 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/facility_key_ext.lst: -------------------------------------------------------------------------------- 1 | airport 2 | garden 3 | park 4 | ranch 5 | tower 6 | air base 7 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/govern_key.lst: -------------------------------------------------------------------------------- 1 | legislature 2 | embassy 3 | corps 4 | consulate 5 | circuit 6 | academy 7 | Project 8 | Operations 9 | National 10 | Legislature 11 | Fleet 12 | Federal 13 | Embassy 14 | Chamber 15 | Court of Appeals 16 | Court 17 | Corps 18 | Consulate 19 | Command 20 | Circuit Court 21 | Circuit 22 | Academy 23 | 24 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/greeting.lst: -------------------------------------------------------------------------------- 1 | Dear 2 | dear 3 | Dearest 4 | dearest 5 | Hello 6 | hello 7 | Hi 8 | hi 9 | Good morning 10 | good morning 11 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/hour.lst: -------------------------------------------------------------------------------- 1 | one 2 | two 3 | three 4 | four 5 | five 6 | six 7 | seven 8 | eight 9 | nine 10 | ten 11 | eleven 12 | twelve 13 | midnight 14 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/ident_prekey.lst: -------------------------------------------------------------------------------- 1 | id 2 | id 3 | i.d. 4 | ID 5 | I.D. 6 | ref 7 | Ref 8 | REF 9 | reference 10 | reference no 11 | reference no 12 | Reference No 13 | Reference no 14 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/loc_generalkey.lst: -------------------------------------------------------------------------------- 1 | village 2 | city 3 | town 4 | province 5 | state 6 | island 7 | township -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/loc_key.lst: -------------------------------------------------------------------------------- 1 | sea 2 | ocean 3 | Wall 4 | Village 5 | Valley 6 | Trail 7 | Straits 8 | Strait 9 | Station 10 | Stadium 11 | Square 12 | Springs 13 | Sea 14 | Sands 15 | River 16 | Republic 17 | Province 18 | Plains 19 | Parkway 20 | Park 21 | Ocean 22 | Mountains 23 | Mountain 24 | Lakes 25 | Islands 26 | Island 27 | Hills 28 | Hill 29 | Heights 30 | Headquarters 31 | Head Quarters 32 | Harbor 33 | HQ 34 | General Headquarters 35 | General Head Quarters 36 | GHQ 37 | Fjord 38 | Everglades 39 | Estates 40 | District 41 | Desert 42 | Delta 43 | Creek 44 | County 45 | City 46 | Channel 47 | Castle 48 | Canal 49 | Boulevard 50 | Blvd. 51 | Belt 52 | Beach 53 | Bay Area 54 | Bay 55 | Basin 56 | Base 57 | Barracks 58 | Airbase -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/loc_prekey.lst: -------------------------------------------------------------------------------- 1 | Camp 2 | Cape 3 | Central 4 | East 5 | Eastern 6 | Fort 7 | Lake 8 | Mount 9 | North 10 | North-Eastern 11 | North-Western 12 | Northeast 13 | Northeastern 14 | Northern 15 | Northwest 16 | Northwestern 17 | Porto 18 | Puerto 19 | South 20 | South-Eastern 21 | South-Western 22 | Southeast 23 | Southeastern 24 | Southern 25 | Southwest 26 | Southwestern 27 | West 28 | Western 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/loc_prekey_lower.lst: -------------------------------------------------------------------------------- 1 | camp 2 | cape 3 | central 4 | east 5 | eastern 6 | fort 7 | lake 8 | mount 9 | north 10 | north-eastern 11 | north-western 12 | northeast 13 | northeastern 14 | northern 15 | northwest 16 | northwestern 17 | porto 18 | puerto 19 | south 20 | south-eastern 21 | south-western 22 | southeast 23 | southeastern 24 | southern 25 | southwest 26 | southwestern 27 | west 28 | western -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/loc_relig.lst: -------------------------------------------------------------------------------- 1 | Eden 2 | Garden of Eden 3 | Nazareth 4 | Judea 5 | Bethlehem 6 | Israel 7 | Jericho 8 | Calvary -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/mapping.def: -------------------------------------------------------------------------------- 1 | person_male.lst:http://gate.ac.uk/tests/demo.owl:Person 2 | person_male_cap.lst:http://gate.ac.uk/tests/demo.owl:Person 3 | person_male_lower.lst:http://gate.ac.uk/tests/demo.owl:Person 4 | person_female.lst:http://gate.ac.uk/tests/demo.owl:Person 5 | person_female_cap.lst:http://gate.ac.uk/tests/demo.owl:Person 6 | person_female_lower.lst:http://gate.ac.uk/tests/demo.owl:Person -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/ministry.lst: -------------------------------------------------------------------------------- 1 | Trade and Industry Ministry 2 | Ministry of Radio 3 | Ministry of Posts and Telecommunications 4 | Ministry of Posts and Telecommunication 5 | Ministry of Oil Refining and Petrochemical 6 | Ministry of Oil Industry 7 | Ministry of Oil 8 | Ministry of Mineral Fertilizers 9 | Ministry of International Trade and Industry 10 | Ministry of Geology 11 | Ministry of Foreign Economic Relations 12 | Ministry of Finance 13 | Ministry of Electronics Industry 14 | Ministry of Defense 15 | Foreign Ministry 16 | Finance Ministry 17 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/minutes.lst: -------------------------------------------------------------------------------- 1 | five 2 | ten 3 | fifteen 4 | twenty 5 | twenty five 6 | thirty 7 | thirty five 8 | forty 9 | forty five 10 | fifty 11 | fifty five -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/months.lst: -------------------------------------------------------------------------------- 1 | APR 2 | APR. 3 | APRIL 4 | AUG 5 | AUG. 6 | AUGUST 7 | AUTUMN 8 | Apr 9 | Apr. 10 | April 11 | Aug 12 | Aug. 13 | August 14 | Autumn 15 | DEC 16 | DEC. 17 | DECEMBER 18 | Dec 19 | Dec. 20 | December 21 | FEB 22 | FEB. 23 | FEBRUARY 24 | Feb 25 | Feb. 26 | February 27 | JAN 28 | JAN. 29 | JANUARY 30 | JUL 31 | JUL. 32 | JULY 33 | JUN 34 | JUN. 35 | JUNE 36 | Jan 37 | Jan. 38 | January 39 | Jul 40 | Jul. 41 | July 42 | Jun 43 | Jun. 44 | June 45 | MAR 46 | MAR. 47 | MARCH 48 | MAY 49 | Mar 50 | Mar. 51 | March 52 | May 53 | NOV 54 | NOV. 55 | NOVEMBER 56 | Nov 57 | Nov. 58 | November 59 | OCT 60 | OCT. 61 | OCTOBER 62 | Oct 63 | Oct. 64 | October 65 | SEP 66 | SEP. 67 | SEPT 68 | SEPT. 69 | SEPTEMBER 70 | Sep 71 | Sep. 72 | Sept 73 | Sept. 74 | September 75 | Summer 76 | Winter 77 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/months_lower.lst: -------------------------------------------------------------------------------- 1 | apr 2 | apr. 3 | april 4 | aug 5 | aug. 6 | august 7 | autumn 8 | dec 9 | dec. 10 | december 11 | feb 12 | feb. 13 | february 14 | jan 15 | jan. 16 | january 17 | jul 18 | jul. 19 | july 20 | jun 21 | jun. 22 | june 23 | mar 24 | mar. 25 | march 26 | nov 27 | nov. 28 | november 29 | oct 30 | oct. 31 | october 32 | sep 33 | sep. 34 | sept 35 | sept. 36 | september 37 | summer 38 | winter 39 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/mountain.lst: -------------------------------------------------------------------------------- 1 | Alps 2 | Andes 3 | Himalayas 4 | Pyrenees 5 | Snowdonia -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/newspapers.lst: -------------------------------------------------------------------------------- 1 | independent 2 | Independent on Sunday 3 | Independent Digital 4 | Investor's Daily 5 | New York Daily News 6 | New York Post 7 | New York Times 8 | New York Times News Service 9 | New York Times Special Features 10 | New York Times Syndicate 11 | Newsweek 12 | Readers Digest 13 | The Boston Globe 14 | The Financial Times 15 | The Finanical Times Limited 16 | The Independent 17 | The Independent on Sunday 18 | The Phoenix Gazette 19 | Times Mirror 20 | Washington Post 21 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/non_company.lst: -------------------------------------------------------------------------------- 1 | United Nations 2 | UNESCO 3 | UK Atomic Energy Authority 4 | U.N. 5 | Organization of Petroleum Exporting Countries 6 | Organization for Economic Cooperation and Development 7 | OPEC 8 | North Atlantic Treaty Organization 9 | NATO 10 | Museum of Flight 11 | Japanese Space Agency 12 | Japan International Development Organization 13 | Japan Industrial Development Organization 14 | Italian Space Agency 15 | Hezbollah 16 | General Agreement on Tariffs and Trade 17 | European Union 18 | European Space Agency 19 | European Community 20 | Electronics Research and Service Organization 21 | Electronics Research & Service Organization 22 | Electronics Research & Service Organization 23 | ESA 24 | Association of South East Asian Nations 25 | Association of Flight Attendants 26 | 27 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/nonspec_date.lst: -------------------------------------------------------------------------------- 1 | year 2 | weekend 3 | week 4 | month 5 | hour 6 | decade 7 | day 8 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/not_org.lst: -------------------------------------------------------------------------------- 1 | British National 2 | British National Corpus -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/number_fold.lst: -------------------------------------------------------------------------------- 1 | two-fold 2 | three-fold 3 | four-fold 4 | five-fold 5 | six-fold 6 | seven-fold 7 | eight-fold 8 | nine-fold 9 | ten-fold 10 | twenty-fold 11 | thirty-fold 12 | forty-fold 13 | fifty-fold 14 | hundred-fold 15 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/org_pre.lst: -------------------------------------------------------------------------------- 1 | Federal 2 | Royal 3 | National -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/org_spur.lst: -------------------------------------------------------------------------------- 1 | ABM 2 | Resolution 3 | War 4 | Olympics 5 | Super Bowl 6 | World Series 7 | Discovery 8 | Challenger 9 | FREE COLLEGE 10 | Free College 11 | Copyright 12 | Corriere della Sera 13 | Chanel No. 5 14 | Limitation 15 | Soul 16 | Service 17 | Start 18 | National League 19 | Series 20 | NMD 21 | TMD 22 | Treaty 23 | N.B. 24 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/other_people.lst: -------------------------------------------------------------------------------- 1 | shareholders 2 | investors 3 | insiders -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/othorg_key.lst: -------------------------------------------------------------------------------- 1 | Local 2 | District 3 | 4 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/percent.lst: -------------------------------------------------------------------------------- 1 | % 2 | percent 3 | per cent 4 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/person_ambig.lst: -------------------------------------------------------------------------------- 1 | Silver 2 | Dame 3 | Jan 4 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/person_ambig2.lst: -------------------------------------------------------------------------------- 1 | Brittany 2 | May 3 | June 4 | April 5 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/person_ending.lst: -------------------------------------------------------------------------------- 1 | Jr 2 | Jr. 3 | jr 4 | jr. 5 | Sr 6 | Sr. 7 | sr 8 | sr. 9 | II 10 | III 11 | Q.C. 12 | QC -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/person_full.lst: -------------------------------------------------------------------------------- 1 | Bach 2 | Brahms 3 | Carter 4 | Clinton 5 | Gaugin 6 | Gauguin 7 | George Bush 8 | Hillary Clinton 9 | Hillary Rodham Clinton 10 | John the Baptist 11 | Margaret Thatcher 12 | Messiaen 13 | Mozart 14 | Nixon 15 | Pope John Paul II 16 | Richard Nixon 17 | Ronald Reagan 18 | Saddam Hussain 19 | Saint-Saens 20 | Shostakovich 21 | Strauss 22 | Thatcher 23 | The Clintons 24 | The Queen 25 | the Queen 26 | Van Gogh 27 | Virgin Mary 28 | Vivaldi 29 | the Clintons 30 | van Gogh 31 | Barack Obama 32 | Obama 33 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/person_male_cap.lst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/person_male_cap.lst -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/person_relig.lst: -------------------------------------------------------------------------------- 1 | God 2 | Jesus 3 | Jesus Christ 4 | Lord Jesus Christ 5 | Lord God 6 | the Holy Ghost 7 | the Holy Spirit 8 | Christ 9 | Nicademus 10 | Nicodemus 11 | Anzakias -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/person_sci.lst: -------------------------------------------------------------------------------- 1 | Bloch 2 | Lorenz 3 | Maxwell-Bloch -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/person_spur.lst: -------------------------------------------------------------------------------- 1 | Er 2 | Colleague 3 | Colleagues 4 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/phone_prefix.lst: -------------------------------------------------------------------------------- 1 | Phone 2 | phone 3 | Telephone 4 | telephone 5 | Tel. 6 | Tel 7 | tel. 8 | tel 9 | Fax 10 | fax 11 | no. -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/planet.lst: -------------------------------------------------------------------------------- 1 | red planet 2 | moon 3 | lunar 4 | Venus 5 | Red Planet 6 | Moon 7 | Mars 8 | Io 9 | Ganymede 10 | Eros 11 | 12 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/racecourse.lst: -------------------------------------------------------------------------------- 1 | Doncaster 2 | Haydock Park 3 | Uttoxeter 4 | Epsom 5 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/rivers.lst: -------------------------------------------------------------------------------- 1 | Severn 2 | Thames 3 | Tyne 4 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/sports.lst: -------------------------------------------------------------------------------- 1 | tennis 2 | TENNIS 3 | skiing 4 | SKIING 5 | skating 6 | SKATING 7 | gymnastics 8 | GYMNASTICS 9 | swimming 10 | SWIMMING 11 | football 12 | FOOTBALL 13 | rugby 14 | RUGBY 15 | golf 16 | GOLF 17 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/spur_ident.lst: -------------------------------------------------------------------------------- 1 | H2O 2 | CO2 -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/stop.lst: -------------------------------------------------------------------------------- 1 | About 2 | And 3 | Because 4 | By 5 | Can 6 | First 7 | For 8 | From 9 | He 10 | Here 11 | I 12 | It 13 | May 14 | Next 15 | Of 16 | Perhaps 17 | Please 18 | Shall 19 | She 20 | So 21 | That 22 | Then 23 | They 24 | We 25 | Will 26 | With 27 | Without 28 | You 29 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/street.lst: -------------------------------------------------------------------------------- 1 | Avenue 2 | Crescent 3 | Hill 4 | Lane 5 | Rise 6 | Road 7 | Row 8 | Street 9 | Way 10 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/surname_prefix.lst: -------------------------------------------------------------------------------- 1 | de 2 | De 3 | del 4 | Del 5 | d' 6 | D' 7 | O' 8 | von 9 | Von 10 | van 11 | Van -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/team.lst: -------------------------------------------------------------------------------- 1 | Boston red sox 2 | Anaheim Angels 3 | red sox 4 | angels 5 | Miami Fusion 6 | Columbus Crew 7 | Copa Libertadores 8 | Toronto Maple Leafs 9 | Montreal Canadiens -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/time.lst: -------------------------------------------------------------------------------- 1 | midday 2 | midnight 3 | noon 4 | dawn 5 | dusk 6 | sunrise 7 | sunset 8 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/time_ampm.lst: -------------------------------------------------------------------------------- 1 | a.m. 2 | A M 3 | AM 4 | A.M. 5 | pm 6 | p.m. 7 | PM 8 | P.M. 9 | P M -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/time_key.lst: -------------------------------------------------------------------------------- 1 | today 2 | tomorrow 3 | yesterday 4 | this morning 5 | this afternoon 6 | this evening 7 | in the morning 8 | in the afternoon 9 | in the evening -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/time_modifier.lst: -------------------------------------------------------------------------------- 1 | coming 2 | last 3 | next 4 | past 5 | previous 6 | this 7 | Last 8 | Next 9 | This 10 | forthcoming 11 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/time_unit.lst: -------------------------------------------------------------------------------- 1 | hour 2 | hours 3 | minute 4 | minutes 5 | second 6 | seconds 7 | morning 8 | afternoon 9 | evening 10 | night -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/times.lst: -------------------------------------------------------------------------------- 1 | witching hour 2 | tonight 3 | start of the day 4 | pre-dawn 5 | overnight 6 | noontide 7 | noonday 8 | noon 9 | nightfall 10 | morning 11 | midnight 12 | middle of the day 13 | midday 14 | midafternoon 15 | mid-day 16 | mid - day 17 | gloaming 18 | evening 19 | end of the day 20 | dusk 21 | daybreak 22 | dawn 23 | break of the day 24 | break of day 25 | afternoon 26 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/timespan.lst: -------------------------------------------------------------------------------- 1 | secs 2 | seconds 3 | minutes 4 | mins 5 | hours 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/timex_pre.lst: -------------------------------------------------------------------------------- 1 | previous 2 | pre- 3 | pre - 4 | next 5 | later 6 | late on 7 | late 8 | last 9 | early 10 | earlier 11 | during 12 | before 13 | after 14 | Previous 15 | Pre- 16 | Pre - 17 | Next 18 | Later 19 | Late on 20 | Late 21 | Last 22 | Early 23 | Earlier 24 | During 25 | Before 26 | After 27 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/title_female.lst: -------------------------------------------------------------------------------- 1 | Madam 2 | Madame 3 | Mademoiselle 4 | Miss 5 | Mlle 6 | Mme 7 | Mme. 8 | Mrs 9 | Mrs. 10 | Ms 11 | Ms. 12 | Sister 13 | Spokeswoman -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/title_male.lst: -------------------------------------------------------------------------------- 1 | Marquis 2 | Messr 3 | Messr. 4 | Monsieur 5 | Mr 6 | Mr. 7 | Prince 8 | Rabbi 9 | Sheik 10 | Sir -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/tvcompany.lst: -------------------------------------------------------------------------------- 1 | ABC 2 | Fox 3 | BBC 4 | ITV 5 | Channel 4 6 | Channel 5 7 | Channel 50 8 | VOA 9 | WFTY-TV 10 | WHLL-TV 11 | katv 12 | KATV 13 | VH1 14 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/water.lst: -------------------------------------------------------------------------------- 1 | Pacific 2 | North Atlantic 3 | New York Harbor 4 | Indian Ocean 5 | Gulf of Mexico 6 | Great Lakes 7 | Everglades 8 | Caribbean 9 | Bay of Biscay 10 | Atlantic 11 | Arctic 12 | Antarctic 13 | Nile 14 | Amazon 15 | Thames -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/gazetteer/year.lst: -------------------------------------------------------------------------------- 1 | 1970 2 | 1971 3 | 1972 4 | 1973 5 | 1974 6 | 1975 7 | 1976 8 | 1977 9 | 1978 10 | 1979 11 | 1980 12 | 1981 13 | 1982 14 | 1983 15 | 1984 16 | 1985 17 | 1986 18 | 1987 19 | 1988 20 | 1989 21 | 1990 22 | 1991 23 | 1992 24 | 1993 25 | 1994 26 | 1995 27 | 1996 28 | 1997 29 | 1998 30 | 1999 31 | 2000 32 | 2001 33 | 2002 34 | 2003 35 | 2004 36 | 2005 37 | 2006 38 | 2007 39 | 2008 40 | 2009 41 | 2010 42 | 2011 43 | 2012 44 | 2013 45 | 2014 46 | 2015 47 | 2016 48 | 2017 49 | 2018 50 | 2019 51 | 2020 52 | 2021 53 | 2022 54 | 2023 55 | 2024 56 | 2025 57 | 2026 58 | 2027 59 | 2028 60 | 2029 61 | 2030 62 | 2031 63 | 2032 64 | 2033 65 | 2034 66 | 2035 67 | 2036 68 | 2037 69 | 2038 70 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/othomatcher/acelists.def: -------------------------------------------------------------------------------- 1 | cdg.lst:cdg 2 | spur_match.lst:spur_match 3 | prepos.lst:prepos 4 | def_art.lst:def_art 5 | connnector.lst:connnector 6 | alias_ace.lst:alias 7 | nickname.txt:nickname -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/othomatcher/alias.lst: -------------------------------------------------------------------------------- 1 | National Aeronautics and Space Administration£1 2 | NASA£1 3 | New York Stock Exchange£2 4 | Big Board£2 5 | Aluminum Co.£3 6 | Aluminum Co£3 7 | Alcoa£3 8 | New York Times Inc.£4 9 | Times£4 10 | New York Times£4 11 | Coca-Cola Co.£5 12 | Coca-Cola Co£5 13 | Coca-Cola£5 14 | Coke£5 15 | IBM£6 16 | Big Blue£6 17 | New York£7 18 | Big Apple£7 19 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/othomatcher/connnector.lst: -------------------------------------------------------------------------------- 1 | of£con 2 | for£con 3 | de£con 4 | di£con 5 | von£con 6 | van£con 7 | &£con 8 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/othomatcher/def_art.lst: -------------------------------------------------------------------------------- 1 | The£def 2 | the£def 3 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/othomatcher/listsNM.def: -------------------------------------------------------------------------------- 1 | # cdg.lst:cdg 2 | ../gazetteer/org_ending.lst:cdg 3 | spur_match.lst:spur_match 4 | prepos.lst:prepos 5 | def_art.lst:def_art 6 | connnector.lst:connnector 7 | alias.lst:alias 8 | nickname.txt:nickname -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/othomatcher/prepos.lst: -------------------------------------------------------------------------------- 1 | of£prepos 2 | for£prepos 3 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/othomatcher/spur_match.lst: -------------------------------------------------------------------------------- 1 | Eastern Airways£1 2 | Eastern Air£1 3 | BT Cellnet£2 4 | BT Wireless£2 5 | BT£2 6 | 7 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/regex-splitter/external-split-patterns.txt: -------------------------------------------------------------------------------- 1 | //These are patterns for sentence splits 2 | // 3 | // Valentin Tablan, 24 Aug 2007 4 | // 5 | // 6 | // Lines starting with // are comments; empty lines are ignored 7 | 8 | //more than 2 new lines 9 | (?:[\u00A0\u2007\u202F\p{javaWhitespace}&&[^\n\r]])*(\n\r|\r\n|\n|\r)(?:(?:[\u00A0\u2007\u202F\p{javaWhitespace}&&[^\n\r]])*\1)+ 10 | 11 | //the end of the document is also an external split, so that there is no 12 | //orphaned text 13 | \s*\z 14 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/regex-splitter/internal-split-patterns.txt: -------------------------------------------------------------------------------- 1 | //These are patterns for sentence splits 2 | // 3 | // Valentin Tablan, 24 Aug 2007 4 | // 5 | // 6 | // Lines starting with // are comments; empty lines are ignored 7 | 8 | //between 1 and 3 full stops 9 | \.{1,3}"? 10 | 11 | //up to 4 ! or ? in sequence 12 | (!|\?){1,4}"? -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/schema/ANNIE-Schemas.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/schema/AddressSchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/schema/DateSchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/schema/IdentifierSchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/schema/LocationSchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/schema/MoneySchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/schema/OrganizationSchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/schema/PercentSchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/schema/PersonSchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/schema/PhoneSchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/schema/SentenceSchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/sentenceSplitter/gazetteer/abbreviations.lst: -------------------------------------------------------------------------------- 1 | AG 2 | APR 3 | Apr 4 | AUG 5 | Aug 6 | Adm 7 | Brig 8 | CO 9 | CORP 10 | Capt 11 | Cmdr 12 | Co 13 | Col 14 | Comdr 15 | DEC 16 | Dec 17 | DR 18 | Dr 19 | FEB 20 | Feb 21 | Fig 22 | FRI 23 | GMBH 24 | Gen 25 | Gov 26 | INC 27 | JAN 28 | Jan 29 | JUL 30 | Jul 31 | JUN 32 | Jun 33 | LTD 34 | Lt 35 | Ltd 36 | MAR 37 | Mar 38 | MON 39 | Mon 40 | MP 41 | Maj 42 | Mr 43 | Mrs 44 | Ms 45 | NA 46 | NOV 47 | Nov 48 | NV 49 | OCT 50 | Oct 51 | Oy 52 | PLC 53 | Prof 54 | Rep 55 | SA 56 | SAT 57 | Sat 58 | SEP 59 | Sep 60 | SIR 61 | SR 62 | SUN 63 | Sun 64 | Sen 65 | Sgt 66 | SpA 67 | St 68 | THU 69 | Thu 70 | THUR 71 | Thur 72 | TUE 73 | Tue 74 | VP 75 | WED 76 | Wed 77 | ad 78 | al 79 | b 80 | ed 81 | eds 82 | eg 83 | e.g 84 | (e.g 85 | [e.g 86 | et 87 | etc 88 | fig 89 | i.e 90 | (i.e 91 | [i.e 92 | p 93 | usu 94 | vs 95 | yr 96 | yrs 97 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/sentenceSplitter/gazetteer/lists.def: -------------------------------------------------------------------------------- 1 | abbreviations.lst:splitter_abbreviation 2 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/sentenceSplitter/grammar/cleanup.jape: -------------------------------------------------------------------------------- 1 | /* 2 | * cleanup.jape 3 | * 4 | * Copyright (c) 1998-2007, The University of Sheffield. 5 | * 6 | * This file is part of GATE (see http://gate.ac.uk/), and is free 7 | * software, licenced under the GNU Library General Public License, 8 | * Version 2, June 1991 (in the distribution as file licence.html, 9 | * and also available at http://gate.ac.uk/gate/licence.html). 10 | * 11 | * Valentin Tablan, March 7th, 2007 12 | * 13 | * $Id$ 14 | */ 15 | 16 | Phase: cleanup 17 | Input: Token 18 | Options: control = once 19 | 20 | //Removes temporary data created by the sentence splitter 21 | Rule: cleanUp 22 | {Token} 23 | --> 24 | { 25 | //if there were any sentences created, then we need to remove the document 26 | //feature -> useful for future runs 27 | doc.getFeatures().remove("temp-last-sentence-end"); 28 | //remove all lookups used for abbreviations 29 | FeatureMap constraints = Factory.newFeatureMap(); 30 | constraints.put("majorType", "splitter_abbreviation"); 31 | AnnotationSet toRemove = inputAS.get("Lookup", constraints); 32 | if(toRemove != null) inputAS.removeAll(toRemove); 33 | } 34 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/sentenceSplitter/grammar/main-single-nl.jape: -------------------------------------------------------------------------------- 1 | // SplitMain 2 | // Valentin Tablan 17/05/2001 3 | 4 | 5 | //A sentence splitter 6 | MultiPhase: main 7 | Phases: 8 | 9 | prepare 10 | find-single-nl 11 | split 12 | cleanup 13 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/sentenceSplitter/grammar/main.jape: -------------------------------------------------------------------------------- 1 | // SplitMain 2 | // Valentin Tablan 17/05/2001 3 | 4 | 5 | //A sentence splitter 6 | MultiPhase: main 7 | Phases: 8 | 9 | prepare 10 | find 11 | split 12 | cleanup 13 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/sentenceSplitter/grammar/no-splits.jape: -------------------------------------------------------------------------------- 1 | /* 2 | * no-splits.jape 3 | * 4 | * Copyright (c) 1998-2004, The University of Sheffield. 5 | * 6 | * This file is part of GATE (see http://gate.ac.uk/), and is free 7 | * software, licenced under the GNU Library General Public License, 8 | * Version 2, June 1991 (in the distribution as file licence.html, 9 | * and also available at http://gate.ac.uk/gate/licence.html). 10 | * 11 | * Valentin Tablan, 23 Jan 2007 12 | * 13 | * $Id$ 14 | */ 15 | 16 | //This grammar deals with documents that have no splits 17 | 18 | Phase:noSplits 19 | Input: Token Split 20 | Options: control = once 21 | 22 | Rule: blah 23 | {Token} 24 | --> 25 | { 26 | AnnotationSet splits = inputAS.get("Split"); 27 | if(splits == null || splits.isEmpty()){ 28 | outputAS.add(outputAS.firstNode(), outputAS.lastNode(), 29 | "TempNoSplitText", Factory.newFeatureMap()); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/ANNIE/resources/sentenceSplitter/grammar/prepare.jape: -------------------------------------------------------------------------------- 1 | /* 2 | * prepare.jape 3 | * 4 | * Copyright (c) 1998-2007, The University of Sheffield. 5 | * 6 | * This file is part of GATE (see http://gate.ac.uk/), and is free 7 | * software, licenced under the GNU Library General Public License, 8 | * Version 2, June 1991 (in the distribution as file licence.html, 9 | * and also available at http://gate.ac.uk/gate/licence.html). 10 | * 11 | * Valentin Tablan, June 22nd, 2007 12 | * 13 | * $Id$ 14 | */ 15 | 16 | Phase: prepare 17 | Input: Token 18 | Options: control = once 19 | 20 | //Makes sure there is no temporary data created by a previous run of the 21 | //sentence splitter 22 | Rule: cleanUp 23 | {Token} 24 | --> 25 | { 26 | //if there were any sentences created, then we need to remove the document 27 | //feature -> useful for future runs 28 | doc.getFeatures().remove("temp-last-sentence-end"); 29 | } 30 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | GATE-plugin-JAPE_Plus 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.jdt.core.javanature 16 | 17 | 18 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/creole.xml: -------------------------------------------------------------------------------- 1 | 2 | jape-plus.jar 3 | lib/colt-1.2.0-free.jar 4 | 5 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/doc/javadoc/gate/gui/jape/plus/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | gate.gui.jape.plus (${plugin.name} JavaDoc) 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | gate.gui.jape.plus 20 | 21 | 22 | 27 | 28 |
23 | Classes  24 | 25 |
26 | Viewer
29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/doc/javadoc/gate/resources/img/svg/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | gate.resources.img.svg (${plugin.name} JavaDoc) 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | gate.resources.img.svg 20 | 21 | 22 | 27 | 28 |
23 | Classes  24 | 25 |
26 | JapePlusIcon
29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/doc/javadoc/package-list: -------------------------------------------------------------------------------- 1 | com.ontotext.jape.automaton 2 | com.ontotext.jape.pda 3 | gate.gui.jape.plus 4 | gate.jape.plus 5 | gate.resources.img.svg 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/doc/javadoc/resources/inherit.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/doc/javadoc/resources/inherit.gif -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/jape-plus.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/jape-plus.jar -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/lib/colt-1.2.0-free.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/lib/colt-1.2.0-free.jar -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/lib/licence-colt-1.2.0-free.txt: -------------------------------------------------------------------------------- 1 | Packages cern.colt* , cern.jet*, cern.clhep 2 | 3 | Copyright (c) 1999 CERN - European Organization for Nuclear Research. 4 | 5 | Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose is hereby granted without fee, provided that the above copyright notice appear in all copies and that both that copyright notice and this permission notice appear in supporting documentation. CERN makes no representations about the suitability of this software for any purpose. It is provided "as is" without expressed or implied warranty. 6 | 7 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/lib/src/colt-1.2.0-free-src.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/lib/src/colt-1.2.0-free-src.jar -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/src/com/ontotext/jape/automaton/Constants.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Constants.java 3 | * 4 | * Copyright (c) 2010-2011, Ontotext (www.ontotext.com). 5 | * 6 | * This file is part of GATE (see http://gate.ac.uk/), and is free 7 | * software, licenced under the GNU Library General Public License, 8 | * Version 2, June 1991 (in the distribution as file licence.html, 9 | * and also available at http://gate.ac.uk/gate/licence.html). 10 | * 11 | * 12 | * $Id$ 13 | */ 14 | package com.ontotext.jape.automaton; 15 | 16 | public class Constants { 17 | public static final int NO = -1; 18 | public static final int hashStep = 107; 19 | public static final int hashBase = 257; 20 | } 21 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/src/gate/gui/jape/plus/Viewer.java: -------------------------------------------------------------------------------- 1 | package gate.gui.jape.plus; 2 | 3 | import gate.creole.metadata.CreoleResource; 4 | import gate.creole.metadata.GuiType; 5 | import gate.gui.jape.JapeViewer; 6 | 7 | @CreoleResource(name = "JAPE-Plus Viewer", comment = "A JAPE grammar file viewer", resourceDisplayed = "gate.jape.plus.Transducer", mainViewer = true, helpURL = "http://gate.ac.uk/userguide/chap:jape", guiType = GuiType.LARGE) 8 | public class Viewer extends JapeViewer { 9 | // an empty file to allow the same VR to be used for two different PRs without 10 | // a common superclass 11 | } -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/test/test-contains.jape: -------------------------------------------------------------------------------- 1 | Phase: Contains 2 | Input: A B C 3 | Options: control = appelt 4 | 5 | 6 | Rule: A 7 | ({A contains {B@length==3}}):bind 8 | --> 9 | :bind.Out = {}, 10 | :bind { 11 | System.out.print("["); 12 | boolean first = true; 13 | for(Annotation ann: bindAnnots){ 14 | if(first) first = false; else System.out.print(", "); 15 | System.out.print(ann.getId()); 16 | } 17 | System.out.println("]"); 18 | } 19 | 20 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/test/test-contains.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | gate.SourceURL 8 | created from String 9 | 10 | 11 | 12 | 13 | AAAABBBAAAA 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | feat 22 | val 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/test/test-not-contains.jape: -------------------------------------------------------------------------------- 1 | Phase: Contains 2 | Input: A B C 3 | Options: control = appelt 4 | 5 | 6 | Rule: A 7 | ({A notContains {C}}):bind 8 | --> 9 | :bind.Out = {}, 10 | :bind { 11 | System.out.print("["); 12 | boolean first = true; 13 | for(Annotation ann: bindAnnots){ 14 | if(first) first = false; else System.out.print(", "); 15 | System.out.print(ann.getId()); 16 | } 17 | System.out.println("]"); 18 | } 19 | 20 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/test/test-not-within.jape: -------------------------------------------------------------------------------- 1 | Phase: Contains 2 | Input: A B C 3 | Options: control = appelt 4 | 5 | 6 | Rule: A 7 | ({B notWithin {C}}):bind 8 | --> 9 | :bind.Out = {}, 10 | :bind { 11 | System.out.print("["); 12 | boolean first = true; 13 | for(Annotation ann: bindAnnots){ 14 | if(first) first = false; else System.out.print(", "); 15 | System.out.print(ann.getId()); 16 | } 17 | System.out.println("]"); 18 | } 19 | 20 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/test/test-within.jape: -------------------------------------------------------------------------------- 1 | Phase: Contains 2 | Input: A B C 3 | Options: control = appelt 4 | 5 | 6 | Rule: A 7 | ({B within {A.feat=="val"}}):bind 8 | --> 9 | :bind.Out = {}, 10 | :bind { 11 | System.out.print("["); 12 | boolean first = true; 13 | for(Annotation ann: bindAnnots){ 14 | if(first) first = false; else System.out.print(", "); 15 | System.out.print(ann.getId()); 16 | } 17 | System.out.println("]"); 18 | } 19 | 20 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/test/test-within.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | gate.SourceURL 8 | created from String 9 | 10 | 11 | 12 | 13 | AAABBBAAA 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | feat 22 | val 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/test/test_actioncontext.jape: -------------------------------------------------------------------------------- 1 | 2 | Phase: phase1 3 | Options: control = once 4 | 5 | Rule: rule1 6 | ({Token}):label 7 | --> 8 | { 9 | System.out.println("Testing the action context:"); 10 | System.out.println("Corpus is "+ctx.getCorpus()); 11 | System.out.println("Features "+ctx.getPRFeatures()); 12 | } 13 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/test/testabc.jape: -------------------------------------------------------------------------------- 1 | Phase: ABC 2 | 3 | Input: A B C 4 | Options: control = appelt 5 | 6 | Rule: A 7 | ( 8 | {A} 9 | ):a 10 | --> 11 | :a.TempA={} 12 | 13 | Rule: AB 14 | ( 15 | {A}{B} 16 | ):a 17 | --> 18 | :a.TempAB={} 19 | 20 | Rule: ABC 21 | ( 22 | {A}{B}{C} 23 | ):a 24 | --> 25 | :a.TempABC={} 26 | 27 | Rule: APlus 28 | ( 29 | ({A})+ 30 | ):a 31 | --> 32 | :a.TempAPlus={} 33 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/test/testdoc.txt: -------------------------------------------------------------------------------- 1 | Just a little test document. 2 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/test/testonto.jape: -------------------------------------------------------------------------------- 1 | 2 | Phase: testOnto 3 | Input: Token SpaceToken 4 | 5 | 6 | Rule: rule1 7 | ({Token.class == "Person"}):label 8 | --> 9 | :label.Match = {} 10 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/test/testonto.rdf.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/JAPE_Plus/test/zero-length-ann.jape: -------------------------------------------------------------------------------- 1 | Phase: Aaaa 2 | Input: A B C 3 | Options: control = appelt 4 | 5 | 6 | Rule: B 7 | ({C})*:bind 8 | --> 9 | :bind.Bsingle = {}, 10 | :bind { 11 | System.out.print("["); 12 | boolean first = true; 13 | for(Annotation ann: bindAnnots){ 14 | if(first) first = false; else System.out.print(", "); 15 | System.out.print(ann.getId()); 16 | } 17 | System.out.println("]"); 18 | } 19 | 20 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | GATE-plugin-Tagger_Numbers 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.jdt.core.javanature 16 | 17 | 18 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/IntegerTagger.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/IntegerTagger.jar -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/README: -------------------------------------------------------------------------------- 1 | This plugin reads positive integer numbers in words & digits (& unicode symbols) and parses out their numerical value. 2 | 3 | To build 4 | 1. Place in GATE plugins directory 5 | 2. $ ant clean jar 6 | 7 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/classes/gate/creole/integers/package-info.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/classes/gate/creole/integers/package-info.class -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/classes/gate/resources/img/numbers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/classes/gate/resources/img/numbers.png -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/classes/integers/AnnotationConstants.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/classes/integers/AnnotationConstants.class -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/classes/integers/IntegerTagger$1.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/classes/integers/IntegerTagger$1.class -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/classes/integers/IntegerTagger$2.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/classes/integers/IntegerTagger$2.class -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/classes/integers/IntegerTagger$Config$1.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/classes/integers/IntegerTagger$Config$1.class -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/classes/integers/IntegerTagger$Config.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/classes/integers/IntegerTagger$Config.class -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/classes/integers/IntegerTagger$Multiplier$Type.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/classes/integers/IntegerTagger$Multiplier$Type.class -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/classes/integers/IntegerTagger$Multiplier.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/classes/integers/IntegerTagger$Multiplier.class -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/classes/integers/IntegerTagger.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/classes/integers/IntegerTagger.class -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/creole.xml: -------------------------------------------------------------------------------- 1 | 2 | IntegerTagger.jar 3 | 4 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/resources/languages/english_and_symbols.xml: -------------------------------------------------------------------------------- 1 | 2 | English and Symbols 3 | 4 | symbols.xml 5 | english.xml 6 | English_Dictionary.xml 7 | 8 | 9 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/src/gate/creole/integers/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | File: package-info.java 3 | Author: Kyle Miller 4 | Created: July, 2014 5 | Description: a GATE PR that attempts to annotate integers 6 | 7 | Copyright (C) 2014, Carnegie Mellon University 8 | */ 9 | 10 | /* 11 | * This file is free software, 12 | 13 | * licenced under the GNU Lesser General Public License, Version 3, June 2007 14 | 15 | * (in the distribution as file licence.html) 16 | * 17 | * This file was created by modifying the Tagger_Number plugin distributed with GATE (see http://gate.ac.uk/) under the same license (see https://gate.ac.uk/gate/licence.html as accessed July, 2014) 18 | * 19 | */ 20 | 21 | 22 | package integers; 23 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/src/gate/creole/integers/package-info.java~: -------------------------------------------------------------------------------- 1 | /* 2 | File: package-info.java 3 | Author: Kyle Miller 4 | Created: July, 2014 5 | Description: a GATE PR that attempts to annotate integers 6 | 7 | Copyright (C) 2014, Carnegie Mellon University 8 | */ 9 | 10 | /* 11 | * This file is free software, 12 | * licenced under the GNU Library General Public License, Version 3, June 2007 13 | * (in the distribution as file licence.html) 14 | * 15 | * This file was created by modifying the Tagger_Number plugin distributed with GATE (see http://gate.ac.uk/) 16 | * 17 | */ 18 | 19 | package integers; 20 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/src/gate/resources/img/numbers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_Integer/src/gate/resources/img/numbers.png -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | GATE-plugin-Tagger_Numbers 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.jdt.core.javanature 16 | 17 | 18 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/PhoneNumberTagger.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/PhoneNumberTagger.jar -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/README: -------------------------------------------------------------------------------- 1 | This plugin tries to identify phone number is messy text. 2 | 3 | To build 4 | 1. Place in GATE plugins directory 5 | 2. $ ant clean jar 6 | 7 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/classes/gate/creole/phonenumbers/package-info.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/classes/gate/creole/phonenumbers/package-info.class -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/classes/gate/resources/img/numbers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/classes/gate/resources/img/numbers.png -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/classes/phonenumbers/AnnotationConstants.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/classes/phonenumbers/AnnotationConstants.class -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/classes/phonenumbers/PhoneNumberTagger$1.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/classes/phonenumbers/PhoneNumberTagger$1.class -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/classes/phonenumbers/PhoneNumberTagger$AreaCodeDescription.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/classes/phonenumbers/PhoneNumberTagger$AreaCodeDescription.class -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/classes/phonenumbers/PhoneNumberTagger$Config$1.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/classes/phonenumbers/PhoneNumberTagger$Config$1.class -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/classes/phonenumbers/PhoneNumberTagger$Config.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/classes/phonenumbers/PhoneNumberTagger$Config.class -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/classes/phonenumbers/PhoneNumberTagger$DigitGroup.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/classes/phonenumbers/PhoneNumberTagger$DigitGroup.class -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/classes/phonenumbers/PhoneNumberTagger.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/classes/phonenumbers/PhoneNumberTagger.class -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/creole.xml: -------------------------------------------------------------------------------- 1 | 2 | PhoneNumberTagger.jar 3 | 4 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/resources/AreaCode.xml: -------------------------------------------------------------------------------- 1 | 2 | Known Area Codes 3 | 4 | areacodes_2014-01-29.xml 5 | 6 | 7 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/src/gate/creole/phonenumbers/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | File: package-info.java 3 | Author: Kyle Miller 4 | Created: July, 2014 5 | Description: a GATE PR that attempts to annotate phone numbers 6 | 7 | Copyright (C) 2014, Carnegie Mellon University 8 | */ 9 | 10 | /* 11 | * This file is free software, 12 | 13 | * licenced under the GNU Lesser General Public License, Version 3, June 2007 14 | 15 | * (in the distribution as file licence.html) 16 | * 17 | * This file was created by modifying the Tagger_Number plugin distributed with GATE (see http://gate.ac.uk/) under the same license (see https://gate.ac.uk/gate/licence.html as accessed July, 2014) 18 | * 19 | */ 20 | 21 | 22 | package phonenumbers; 23 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/src/gate/creole/phonenumbers/package-info.java~: -------------------------------------------------------------------------------- 1 | /* 2 | File: package-info.java 3 | Author: Kyle Miller 4 | Created: July, 2014 5 | Description: a GATE PR that attempts to annotate phone numbers 6 | 7 | Copyright (C) 2014, Carnegie Mellon University 8 | */ 9 | 10 | /* 11 | * This file is free software, 12 | * licenced under the GNU Library General Public License, Version 3, June 2007 13 | * (in the distribution as file licence.html) 14 | * 15 | * This file was created by modifying the Tagger_Number plugin distributed with GATE (see http://gate.ac.uk/) 16 | * 17 | */ 18 | 19 | package phonenumbers; 20 | -------------------------------------------------------------------------------- /TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/src/gate/resources/img/numbers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/TJInfoExtractor/plugins/Tagger_PhoneNumber/src/gate/resources/img/numbers.png -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/ant-launcher.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/ant-launcher.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/ant.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/ant.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/aopalliance.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/aopalliance.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/apache-mime4j-core.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/apache-mime4j-core.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/apache-mime4j-dom.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/apache-mime4j-dom.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/bcmail-jdk15.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/bcmail-jdk15.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/bcprov-jdk15.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/bcprov-jdk15.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/commons-codec.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/commons-codec.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/commons-compress.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/commons-compress.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/commons-io.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/commons-io.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/commons-lang.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/commons-lang.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/commons-logging.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/commons-logging.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/dom4j.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/dom4j.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/flying-saucer-core.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/flying-saucer-core.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/fontbox.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/fontbox.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/gate-asm.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/gate-asm.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/gate-compiler-jdt.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/gate-compiler-jdt.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/gate.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/gate.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/ivy.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/ivy.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/jackson-annotations.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/jackson-annotations.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/jackson-core.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/jackson-core.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/jackson-databind.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/jackson-databind.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/java-getopt.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/java-getopt.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/jaxen.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/jaxen.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/jdom.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/jdom.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/jempbox.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/jempbox.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/jsoup-1.7.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/jsoup-1.7.3.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/junit.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/junit.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/log4j.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/log4j.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/lucene-core.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/lucene-core.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/metadata-extractor.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/metadata-extractor.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/nekohtml.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/nekohtml.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/pdfbox.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/pdfbox.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/poi-ooxml-schemas.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/poi-ooxml-schemas.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/poi-ooxml.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/poi-ooxml.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/poi-scratchpad.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/poi-scratchpad.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/poi.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/poi.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/spring-aop.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/spring-aop.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/spring-beans.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/spring-beans.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/spring-core.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/spring-core.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/stax2-api.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/stax2-api.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/tika-core.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/tika-core.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/tika-parsers.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/tika-parsers.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/woodstox-core-lgpl.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/woodstox-core-lgpl.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/xercesImpl.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/xercesImpl.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/xmlbeans.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/xmlbeans.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/xmlunit.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/xmlunit.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/xpp3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/xpp3.jar -------------------------------------------------------------------------------- /TJBatchExtractor/dependencies/xstream.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/dependencies/xstream.jar -------------------------------------------------------------------------------- /TJBatchExtractor/gate.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/.annie-defaults-metadata/ann-job-annie.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/plugins/ANNIE/.annie-defaults-metadata/ann-job-annie.png -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/.annie-defaults-metadata/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/plugins/ANNIE/.annie-defaults-metadata/icon.png -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/.annie-defaults-metadata/long-desc.html: -------------------------------------------------------------------------------- 1 |

ANNIE is 2 | a named entity recognition pipeline that identifies basic entity types, such 3 | as Person, Location, Organization, Money 4 | amounts, Time and Date expressions.

5 | 6 |

It is the prototypical information extraction pipeline distributed 7 | with the GATE framework and forms the base of 8 | many more complex GATE-based IE applications.

9 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/.annie-defaults-metadata/metadata.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | ANNIE Named Entity Recognizer 4 | :Address, :Date, :Location, :Organization, :Person 5 | 6 | English 7 | Named Entity 8 | 9 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/.annie-defaults-metadata/short-desc.html: -------------------------------------------------------------------------------- 1 | The ANNIE named entity recognition service. Identifies names of 2 | persons, locations, organizations, as well as 3 | money amounts, time and date expressions. -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/NE/clean.jape: -------------------------------------------------------------------------------- 1 | /* 2 | * clean.jape 3 | * 4 | * Copyright (c) 1998-2004, The University of Sheffield. 5 | * 6 | * This file is part of GATE (see http://gate.ac.uk/), and is free 7 | * software, licenced under the GNU Library General Public License, 8 | * Version 2, June 1991 (in the distribution as file licence.html, 9 | * and also available at http://gate.ac.uk/gate/licence.html). 10 | * 11 | * Diana Maynard, 10 Sep 2001 12 | * 13 | * $Id: clean.jape 11879 2009-11-03 13:24:55Z markagreenwood $ 14 | */ 15 | 16 | Phase: Clean 17 | Input: TempPerson TempLocation TempOrganization TempDate TempTime TempYear TempZone Street Postcode Email Url Phone Ip TempIdentifier TempSpecs 18 | Options: control = appelt 19 | 20 | Rule:CleanTempAnnotations 21 | ( 22 | {TempPerson}| 23 | {TempLocation}| 24 | {TempOrganization}| 25 | {TempDate}| 26 | {TempTime}| 27 | {TempYear}| 28 | {TempZone}| 29 | {Street}| 30 | {Postcode}| 31 | {Email}| 32 | {Url}| 33 | {Phone}| 34 | {Ip}| 35 | {TempIdentifier}| 36 | {TempSpecs} 37 | ):temp 38 | --> 39 | { 40 | gate.AnnotationSet temp = (gate.AnnotationSet)bindings.get("temp"); 41 | outputAS.removeAll(temp); 42 | } 43 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/NE/jobtitle.jape: -------------------------------------------------------------------------------- 1 | /* 2 | * jobtitle.jape 3 | * 4 | * Copyright (c) 1998-2004, The University of Sheffield. 5 | * 6 | * This file is part of GATE (see http://gate.ac.uk/), and is free 7 | * software, licenced under the GNU Library General Public License, 8 | * Version 2, June 1991 (in the distribution as file licence.html, 9 | * and also available at http://gate.ac.uk/gate/licence.html). 10 | * 11 | * Diana Maynard, 10 Sep 2001 12 | * 13 | * $Id: jobtitle.jape 5921 2004-07-21 17:00:37Z akshay $ 14 | */ 15 | 16 | 17 | Phase: Jobtitle 18 | Input: Lookup Token 19 | Options: control = appelt 20 | 21 | Rule: Jobtitle1 22 | ( 23 | {Lookup.majorType == jobtitle} 24 | ( 25 | {Lookup.majorType == jobtitle} 26 | )? 27 | ) 28 | :jobtitle 29 | --> 30 | :jobtitle.JobTitle = {rule = "JobTitle1"} 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/NE/main.jape: -------------------------------------------------------------------------------- 1 | /* 2 | * main.jape 3 | * 4 | * Copyright (c) 1998-2004, The University of Sheffield. 5 | * 6 | * This file is part of GATE (see http://gate.ac.uk/), and is free 7 | * software, licenced under the GNU Library General Public License, 8 | * Version 2, June 1991 (in the distribution as file licence.html, 9 | * and also available at http://gate.ac.uk/gate/licence.html). 10 | * 11 | * Diana Maynard, 02 Aug 2001 12 | * 13 | * $Id: main.jape 9233 2007-11-23 13:01:52Z dgmaynard $ 14 | */ 15 | 16 | MultiPhase: TestTheGrammars 17 | Phases: 18 | first 19 | firstname 20 | name 21 | name_post 22 | date_pre 23 | date 24 | reldate 25 | number 26 | address 27 | url_pre 28 | url 29 | email 30 | identifier 31 | jobtitle 32 | final 33 | unknown 34 | name_context 35 | org_context 36 | loc_context 37 | clean 38 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/NE/unknown.jape: -------------------------------------------------------------------------------- 1 | /* 2 | * unknown.jape 3 | * 4 | * Copyright (c) 1998-2004, The University of Sheffield. 5 | * 6 | * This file is part of GATE (see http://gate.ac.uk/), and is free 7 | * software, licenced under the GNU Library General Public License, 8 | * Version 2, June 1991 (in the distribution as file licence.html, 9 | * and also available at http://gate.ac.uk/gate/licence.html). 10 | * 11 | * Diana Maynard, 10 Sep 2001 12 | * 13 | * $Id: unknown.jape 7736 2006-10-21 23:29:08Z johann_p $ 14 | */ 15 | 16 | Phase: Unknown 17 | Input: Location Person Date Organization Address Money Percent Token JobTitle Lookup 18 | Options: control = appelt 19 | 20 | 21 | Rule: Known 22 | Priority: 100 23 | ( 24 | {Location}| 25 | {Person}| 26 | {Date}| 27 | {Organization}| 28 | {Address}| 29 | {Money} | 30 | {Percent}| 31 | {Token.string == "Dear"}| 32 | {JobTitle}| 33 | {Lookup} 34 | ):known 35 | --> 36 | {} 37 | 38 | 39 | Rule:Unknown 40 | Priority: 50 41 | ( 42 | {Token.category == NNP} 43 | ) 44 | :unknown 45 | --> 46 | :unknown.Unknown = {kind = "PN", rule = Unknown} 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/NE/url_pre.jape: -------------------------------------------------------------------------------- 1 | /* 2 | * url_pre.jape 3 | * 4 | * Copyright (c) 1998-2004, The University of Sheffield. 5 | * 6 | * This file is part of GATE (see http://gate.ac.uk/), and is free 7 | * software, licenced under the GNU Library General Public License, 8 | * Version 2, June 1991 (in the distribution as file licence.html, 9 | * and also available at http://gate.ac.uk/gate/licence.html). 10 | * 11 | * Diana Maynard, 19 April 2001 12 | * 13 | * $Id: url_pre.jape 5921 2004-07-21 17:00:37Z akshay $ 14 | */ 15 | 16 | Phase: UrlPre 17 | Input: Token SpaceToken 18 | Options: control = appelt 19 | 20 | Rule: Urlpre 21 | 22 | ( (({Token.string == "http"} | 23 | {Token.string == "ftp"}) 24 | {Token.string == ":"} 25 | {Token.string == "/"} 26 | {Token.string == "/"} 27 | ) | 28 | ({Token.string == "www"} 29 | {Token.string == "."} 30 | ) 31 | ):urlpre 32 | --> 33 | :urlpre.UrlPre = {rule = "UrlPre"} 34 | 35 | 36 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/abbreviations.lst: -------------------------------------------------------------------------------- 1 | MOT 2 | M.O.T. 3 | M O T 4 | M. O. T. -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/adbc.lst: -------------------------------------------------------------------------------- 1 | ad 2 | bc 3 | a.d. 4 | b.c. 5 | AD 6 | BC 7 | A.D. 8 | B.C. 9 | A D 10 | B C 11 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/airport.lst: -------------------------------------------------------------------------------- 1 | Charles de Gaulle 2 | Heathrow 3 | London Heathrow 4 | Schipol 5 | JFK 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/airports.lst: -------------------------------------------------------------------------------- 1 | Heathrow 2 | Washington Dulles 3 | Paris Charles de Gaulle 4 | Luton 5 | Stansted 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/charities.lst: -------------------------------------------------------------------------------- 1 | ACET 2 | Acet 3 | AIDS Care Education and Training 4 | AIDS Care Education & Training 5 | BDA 6 | Body Positive 7 | Children In Need 8 | Children in Need 9 | CAH 10 | Christian Aids Help 11 | JDF 12 | JDF International 13 | Heartwatch 14 | IP-UK 15 | Insulin Pumpers UK 16 | Insulin Pumpers 17 | Diabetes UK 18 | PALS 19 | P.A.L.S. 20 | Portsmouth AIDS Link Support 21 | Q.U.I.T. 22 | QUIT 23 | RNIB 24 | RNCB 25 | Salvation Army 26 | Scottish Aids Monitor 27 | the Link Project 28 | TEAR Fund 29 | YMCA 30 | YWCA 31 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/colours.lst: -------------------------------------------------------------------------------- 1 | aqua 2 | beige 3 | black 4 | blue 5 | brown 6 | crimson 7 | cyan 8 | gray 9 | green 10 | grey 11 | indigo 12 | khaki 13 | maroon 14 | pink 15 | purple 16 | red 17 | white 18 | yellow -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/country_abbrev.lst: -------------------------------------------------------------------------------- 1 | AUS -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/currency_prefix.lst: -------------------------------------------------------------------------------- 1 | US 2 | NZ 3 | NT 4 | AUS 5 | U.S. 6 | U.S. 7 | N.Z. 8 | N.Z. 9 | nz 10 | aus 11 | FF 12 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/date.lst: -------------------------------------------------------------------------------- 1 | hier 2 | 3 | hiver 4 | 5 | demain 6 | 7 | aujourd'hui 8 | 9 | été 10 | 11 | printemps 12 | 13 | automne 14 | 15 | Hier 16 | 17 | Hiver 18 | 19 | Demain 20 | 21 | Aujourd'hui 22 | 23 | Été 24 | 25 | Eté 26 | 27 | Printemps 28 | 29 | Automne 30 | 31 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/date_key.lst: -------------------------------------------------------------------------------- 1 | today 2 | yesterday 3 | tomorrow -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/date_pre.lst: -------------------------------------------------------------------------------- 1 | past 2 | mid- 3 | mid - 4 | end of 5 | almost 6 | all of 7 | Past 8 | Almost 9 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/date_unit.lst: -------------------------------------------------------------------------------- 1 | day 2 | days 3 | week 4 | weeks 5 | fortnight 6 | fortnights 7 | month 8 | months 9 | quarter 10 | quarters 11 | fiscal quarter 12 | fiscal quarters 13 | half 14 | first-half 15 | fiscal half 16 | year 17 | years 18 | financial year 19 | financial years 20 | fiscal year 21 | tax year 22 | decade 23 | decades 24 | century 25 | centuries 26 | half-year 27 | half year 28 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/datespan.lst: -------------------------------------------------------------------------------- 1 | wks 2 | weeks 3 | months 4 | milleniums 5 | millenia 6 | decades 7 | days 8 | centuries 9 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/day.lst: -------------------------------------------------------------------------------- 1 | Wednesday 2 | Wed. 3 | Wed 4 | Tuesday 5 | Tues. 6 | Tues 7 | Tue. 8 | Tue 9 | Thursday 10 | Thurs. 11 | Thurs 12 | Thur. 13 | Thur 14 | Thu. 15 | Thu 16 | Sunday 17 | Sun. 18 | Sun 19 | Saturday 20 | Sat. 21 | Sat 22 | Monday 23 | Mon. 24 | Mon 25 | Friday 26 | Fri. 27 | Fri -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/day_cap.lst: -------------------------------------------------------------------------------- 1 | FRI 2 | FRI. 3 | FRIDAY 4 | MON. 5 | MON 6 | MONDAY 7 | SAT. 8 | SAT 9 | SATURDAY 10 | SUN. 11 | SUN 12 | SUNDAY 13 | THU. 14 | THU 15 | THUR. 16 | THUR 17 | THURS. 18 | THURS 19 | THURSDAY 20 | TUE. 21 | TUE 22 | TUES. 23 | TUES 24 | TUESDAY 25 | WED. 26 | WED 27 | WEDNESDAY 28 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/department.lst: -------------------------------------------------------------------------------- 1 | Treasury Department 2 | Treasury 3 | Transportation Department 4 | TREASURY 5 | State Department 6 | Labor Department 7 | Justice Department 8 | Interior Department 9 | Highway Department 10 | Department of the Interior 11 | Department of Treasury 12 | Department of Transportation 13 | Department of Trade and Industry 14 | Department of Labor 15 | Department of Justice 16 | Department of Interior 17 | Department of Housing and Community Development 18 | Department of Health and Human Services 19 | Department of Commerce 20 | Department of Agriculture 21 | Commerce Department 22 | Agriculture Department 23 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/determiner.lst: -------------------------------------------------------------------------------- 1 | A 2 | a 3 | The 4 | the 5 | One 6 | one -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/facility.lst: -------------------------------------------------------------------------------- 1 | Angkor Wat 2 | Broadway 3 | Watergate 4 | White House 5 | Notre Dame 6 | Kremlin 7 | Pentagon 8 | Auschwitz 9 | Radwaniyah 10 | McDonalds 11 | McDonald's 12 | New York Stock Exchange 13 | Mir 14 | Storgata 15 | Madison Square Garden 16 | Salman Pak 17 | Maple Leaf Gardens 18 | Saint-Germain-des-Pres Church 19 | Highgrove 20 | The Osborn 21 | East 21st Street 22 | 30th Precinct 23 | Cafe Centro 24 | Tropica 25 | Assembly Drive 26 | British Museum 27 | Cleveland Row 28 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/facility_key.lst: -------------------------------------------------------------------------------- 1 | GARDENS 2 | Airbase 3 | Airport 4 | Avenue 5 | Barracks 6 | Base 7 | Building 8 | Cafe 9 | Camp 10 | Castle 11 | Center 12 | Centre 13 | Cinema 14 | Coliseum 15 | Common 16 | Forum 17 | Gardens 18 | GHQ 19 | General Head Quarters 20 | General Headquarters 21 | HQ 22 | Hall 23 | Head Quarters 24 | Headquarters 25 | Hotel 26 | House 27 | International Airport 28 | international airport 29 | Jail 30 | jail 31 | Museum 32 | Office 33 | Palace 34 | Park 35 | Pavilion 36 | Plant 37 | Playhouse 38 | playhouse 39 | Prison Camp 40 | Pub 41 | Ranch 42 | Refuge 43 | Refugee Camp 44 | Resort 45 | Restaurant 46 | School 47 | Ski Resort 48 | Square 49 | Station 50 | Stadium 51 | Street 52 | Theatre 53 | Theater 54 | Tower 55 | Trail 56 | trail 57 | Wall 58 | Zoo 59 | Federal Building 60 | federal building 61 | prison camp 62 | refugee camp 63 | school 64 | Hilton 65 | cemetery 66 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/facility_key_ext.lst: -------------------------------------------------------------------------------- 1 | airport 2 | garden 3 | park 4 | ranch 5 | tower 6 | air base 7 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/govern_key.lst: -------------------------------------------------------------------------------- 1 | legislature 2 | embassy 3 | corps 4 | consulate 5 | circuit 6 | academy 7 | Project 8 | Operations 9 | National 10 | Legislature 11 | Fleet 12 | Federal 13 | Embassy 14 | Chamber 15 | Court of Appeals 16 | Court 17 | Corps 18 | Consulate 19 | Command 20 | Circuit Court 21 | Circuit 22 | Academy 23 | 24 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/greeting.lst: -------------------------------------------------------------------------------- 1 | Dear 2 | dear 3 | Dearest 4 | dearest 5 | Hello 6 | hello 7 | Hi 8 | hi 9 | Good morning 10 | good morning 11 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/hour.lst: -------------------------------------------------------------------------------- 1 | one 2 | two 3 | three 4 | four 5 | five 6 | six 7 | seven 8 | eight 9 | nine 10 | ten 11 | eleven 12 | twelve 13 | midnight 14 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/ident_prekey.lst: -------------------------------------------------------------------------------- 1 | id 2 | id 3 | i.d. 4 | ID 5 | I.D. 6 | ref 7 | Ref 8 | REF 9 | reference 10 | reference no 11 | reference no 12 | Reference No 13 | Reference no 14 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/loc_generalkey.lst: -------------------------------------------------------------------------------- 1 | village 2 | city 3 | town 4 | province 5 | state 6 | island 7 | township -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/loc_key.lst: -------------------------------------------------------------------------------- 1 | sea 2 | ocean 3 | Wall 4 | Village 5 | Valley 6 | Trail 7 | Straits 8 | Strait 9 | Station 10 | Stadium 11 | Square 12 | Springs 13 | Sea 14 | Sands 15 | River 16 | Republic 17 | Province 18 | Plains 19 | Parkway 20 | Park 21 | Ocean 22 | Mountains 23 | Mountain 24 | Lakes 25 | Islands 26 | Island 27 | Hills 28 | Hill 29 | Heights 30 | Headquarters 31 | Head Quarters 32 | Harbor 33 | HQ 34 | General Headquarters 35 | General Head Quarters 36 | GHQ 37 | Fjord 38 | Everglades 39 | Estates 40 | District 41 | Desert 42 | Delta 43 | Creek 44 | County 45 | City 46 | Channel 47 | Castle 48 | Canal 49 | Boulevard 50 | Blvd. 51 | Belt 52 | Beach 53 | Bay Area 54 | Bay 55 | Basin 56 | Base 57 | Barracks 58 | Airbase -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/loc_prekey.lst: -------------------------------------------------------------------------------- 1 | Camp 2 | Cape 3 | Central 4 | East 5 | Eastern 6 | Fort 7 | Lake 8 | Mount 9 | North 10 | North-Eastern 11 | North-Western 12 | Northeast 13 | Northeastern 14 | Northern 15 | Northwest 16 | Northwestern 17 | Porto 18 | Puerto 19 | South 20 | South-Eastern 21 | South-Western 22 | Southeast 23 | Southeastern 24 | Southern 25 | Southwest 26 | Southwestern 27 | West 28 | Western 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/loc_prekey_lower.lst: -------------------------------------------------------------------------------- 1 | camp 2 | cape 3 | central 4 | east 5 | eastern 6 | fort 7 | lake 8 | mount 9 | north 10 | north-eastern 11 | north-western 12 | northeast 13 | northeastern 14 | northern 15 | northwest 16 | northwestern 17 | porto 18 | puerto 19 | south 20 | south-eastern 21 | south-western 22 | southeast 23 | southeastern 24 | southern 25 | southwest 26 | southwestern 27 | west 28 | western -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/loc_relig.lst: -------------------------------------------------------------------------------- 1 | Eden 2 | Garden of Eden 3 | Nazareth 4 | Judea 5 | Bethlehem 6 | Israel 7 | Jericho 8 | Calvary -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/mapping.def: -------------------------------------------------------------------------------- 1 | person_male.lst:http://gate.ac.uk/tests/demo.owl:Person 2 | person_male_cap.lst:http://gate.ac.uk/tests/demo.owl:Person 3 | person_male_lower.lst:http://gate.ac.uk/tests/demo.owl:Person 4 | person_female.lst:http://gate.ac.uk/tests/demo.owl:Person 5 | person_female_cap.lst:http://gate.ac.uk/tests/demo.owl:Person 6 | person_female_lower.lst:http://gate.ac.uk/tests/demo.owl:Person -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/ministry.lst: -------------------------------------------------------------------------------- 1 | Trade and Industry Ministry 2 | Ministry of Radio 3 | Ministry of Posts and Telecommunications 4 | Ministry of Posts and Telecommunication 5 | Ministry of Oil Refining and Petrochemical 6 | Ministry of Oil Industry 7 | Ministry of Oil 8 | Ministry of Mineral Fertilizers 9 | Ministry of International Trade and Industry 10 | Ministry of Geology 11 | Ministry of Foreign Economic Relations 12 | Ministry of Finance 13 | Ministry of Electronics Industry 14 | Ministry of Defense 15 | Foreign Ministry 16 | Finance Ministry 17 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/minutes.lst: -------------------------------------------------------------------------------- 1 | five 2 | ten 3 | fifteen 4 | twenty 5 | twenty five 6 | thirty 7 | thirty five 8 | forty 9 | forty five 10 | fifty 11 | fifty five -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/months.lst: -------------------------------------------------------------------------------- 1 | APR 2 | APR. 3 | APRIL 4 | AUG 5 | AUG. 6 | AUGUST 7 | AUTUMN 8 | Apr 9 | Apr. 10 | April 11 | Aug 12 | Aug. 13 | August 14 | Autumn 15 | DEC 16 | DEC. 17 | DECEMBER 18 | Dec 19 | Dec. 20 | December 21 | FEB 22 | FEB. 23 | FEBRUARY 24 | Feb 25 | Feb. 26 | February 27 | JAN 28 | JAN. 29 | JANUARY 30 | JUL 31 | JUL. 32 | JULY 33 | JUN 34 | JUN. 35 | JUNE 36 | Jan 37 | Jan. 38 | January 39 | Jul 40 | Jul. 41 | July 42 | Jun 43 | Jun. 44 | June 45 | MAR 46 | MAR. 47 | MARCH 48 | MAY 49 | Mar 50 | Mar. 51 | March 52 | May 53 | NOV 54 | NOV. 55 | NOVEMBER 56 | Nov 57 | Nov. 58 | November 59 | OCT 60 | OCT. 61 | OCTOBER 62 | Oct 63 | Oct. 64 | October 65 | SEP 66 | SEP. 67 | SEPT 68 | SEPT. 69 | SEPTEMBER 70 | Sep 71 | Sep. 72 | Sept 73 | Sept. 74 | September 75 | Summer 76 | Winter 77 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/months_lower.lst: -------------------------------------------------------------------------------- 1 | apr 2 | apr. 3 | april 4 | aug 5 | aug. 6 | august 7 | autumn 8 | dec 9 | dec. 10 | december 11 | feb 12 | feb. 13 | february 14 | jan 15 | jan. 16 | january 17 | jul 18 | jul. 19 | july 20 | jun 21 | jun. 22 | june 23 | mar 24 | mar. 25 | march 26 | nov 27 | nov. 28 | november 29 | oct 30 | oct. 31 | october 32 | sep 33 | sep. 34 | sept 35 | sept. 36 | september 37 | summer 38 | winter 39 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/mountain.lst: -------------------------------------------------------------------------------- 1 | Alps 2 | Andes 3 | Himalayas 4 | Pyrenees 5 | Snowdonia -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/newspapers.lst: -------------------------------------------------------------------------------- 1 | independent 2 | Independent on Sunday 3 | Independent Digital 4 | Investor's Daily 5 | New York Daily News 6 | New York Post 7 | New York Times 8 | New York Times News Service 9 | New York Times Special Features 10 | New York Times Syndicate 11 | Newsweek 12 | Readers Digest 13 | The Boston Globe 14 | The Financial Times 15 | The Finanical Times Limited 16 | The Independent 17 | The Independent on Sunday 18 | The Phoenix Gazette 19 | Times Mirror 20 | Washington Post 21 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/non_company.lst: -------------------------------------------------------------------------------- 1 | United Nations 2 | UNESCO 3 | UK Atomic Energy Authority 4 | U.N. 5 | Organization of Petroleum Exporting Countries 6 | Organization for Economic Cooperation and Development 7 | OPEC 8 | North Atlantic Treaty Organization 9 | NATO 10 | Museum of Flight 11 | Japanese Space Agency 12 | Japan International Development Organization 13 | Japan Industrial Development Organization 14 | Italian Space Agency 15 | Hezbollah 16 | General Agreement on Tariffs and Trade 17 | European Union 18 | European Space Agency 19 | European Community 20 | Electronics Research and Service Organization 21 | Electronics Research & Service Organization 22 | Electronics Research & Service Organization 23 | ESA 24 | Association of South East Asian Nations 25 | Association of Flight Attendants 26 | 27 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/nonspec_date.lst: -------------------------------------------------------------------------------- 1 | year 2 | weekend 3 | week 4 | month 5 | hour 6 | decade 7 | day 8 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/not_org.lst: -------------------------------------------------------------------------------- 1 | British National 2 | British National Corpus -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/number_fold.lst: -------------------------------------------------------------------------------- 1 | two-fold 2 | three-fold 3 | four-fold 4 | five-fold 5 | six-fold 6 | seven-fold 7 | eight-fold 8 | nine-fold 9 | ten-fold 10 | twenty-fold 11 | thirty-fold 12 | forty-fold 13 | fifty-fold 14 | hundred-fold 15 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/org_pre.lst: -------------------------------------------------------------------------------- 1 | Federal 2 | Royal 3 | National -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/org_spur.lst: -------------------------------------------------------------------------------- 1 | ABM 2 | Resolution 3 | War 4 | Olympics 5 | Super Bowl 6 | World Series 7 | Discovery 8 | Challenger 9 | FREE COLLEGE 10 | Free College 11 | Copyright 12 | Corriere della Sera 13 | Chanel No. 5 14 | Limitation 15 | Soul 16 | Service 17 | Start 18 | National League 19 | Series 20 | NMD 21 | TMD 22 | Treaty 23 | N.B. 24 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/other_people.lst: -------------------------------------------------------------------------------- 1 | shareholders 2 | investors 3 | insiders -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/othorg_key.lst: -------------------------------------------------------------------------------- 1 | Local 2 | District 3 | 4 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/percent.lst: -------------------------------------------------------------------------------- 1 | % 2 | percent 3 | per cent 4 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/person_ambig.lst: -------------------------------------------------------------------------------- 1 | Silver 2 | Dame 3 | Jan 4 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/person_ambig2.lst: -------------------------------------------------------------------------------- 1 | Brittany 2 | May 3 | June 4 | April 5 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/person_ending.lst: -------------------------------------------------------------------------------- 1 | Jr 2 | Jr. 3 | jr 4 | jr. 5 | Sr 6 | Sr. 7 | sr 8 | sr. 9 | II 10 | III 11 | Q.C. 12 | QC -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/person_full.lst: -------------------------------------------------------------------------------- 1 | Bach 2 | Brahms 3 | Carter 4 | Clinton 5 | Gaugin 6 | Gauguin 7 | George Bush 8 | Hillary Clinton 9 | Hillary Rodham Clinton 10 | John the Baptist 11 | Margaret Thatcher 12 | Messiaen 13 | Mozart 14 | Nixon 15 | Pope John Paul II 16 | Richard Nixon 17 | Ronald Reagan 18 | Saddam Hussain 19 | Saint-Saens 20 | Shostakovich 21 | Strauss 22 | Thatcher 23 | The Clintons 24 | The Queen 25 | the Queen 26 | Van Gogh 27 | Virgin Mary 28 | Vivaldi 29 | the Clintons 30 | van Gogh 31 | Barack Obama 32 | Obama 33 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/person_male_cap.lst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/plugins/ANNIE/resources/gazetteer/person_male_cap.lst -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/person_relig.lst: -------------------------------------------------------------------------------- 1 | God 2 | Jesus 3 | Jesus Christ 4 | Lord Jesus Christ 5 | Lord God 6 | the Holy Ghost 7 | the Holy Spirit 8 | Christ 9 | Nicademus 10 | Nicodemus 11 | Anzakias -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/person_sci.lst: -------------------------------------------------------------------------------- 1 | Bloch 2 | Lorenz 3 | Maxwell-Bloch -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/person_spur.lst: -------------------------------------------------------------------------------- 1 | Er 2 | Colleague 3 | Colleagues 4 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/phone_prefix.lst: -------------------------------------------------------------------------------- 1 | Phone 2 | phone 3 | Telephone 4 | telephone 5 | Tel. 6 | Tel 7 | tel. 8 | tel 9 | Fax 10 | fax 11 | no. -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/planet.lst: -------------------------------------------------------------------------------- 1 | red planet 2 | moon 3 | lunar 4 | Venus 5 | Red Planet 6 | Moon 7 | Mars 8 | Io 9 | Ganymede 10 | Eros 11 | 12 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/racecourse.lst: -------------------------------------------------------------------------------- 1 | Doncaster 2 | Haydock Park 3 | Uttoxeter 4 | Epsom 5 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/rivers.lst: -------------------------------------------------------------------------------- 1 | Severn 2 | Thames 3 | Tyne 4 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/sports.lst: -------------------------------------------------------------------------------- 1 | tennis 2 | TENNIS 3 | skiing 4 | SKIING 5 | skating 6 | SKATING 7 | gymnastics 8 | GYMNASTICS 9 | swimming 10 | SWIMMING 11 | football 12 | FOOTBALL 13 | rugby 14 | RUGBY 15 | golf 16 | GOLF 17 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/spur_ident.lst: -------------------------------------------------------------------------------- 1 | H2O 2 | CO2 -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/stop.lst: -------------------------------------------------------------------------------- 1 | About 2 | And 3 | Because 4 | By 5 | Can 6 | First 7 | For 8 | From 9 | He 10 | Here 11 | I 12 | It 13 | May 14 | Next 15 | Of 16 | Perhaps 17 | Please 18 | Shall 19 | She 20 | So 21 | That 22 | Then 23 | They 24 | We 25 | Will 26 | With 27 | Without 28 | You 29 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/street.lst: -------------------------------------------------------------------------------- 1 | Avenue 2 | Crescent 3 | Hill 4 | Lane 5 | Rise 6 | Road 7 | Row 8 | Street 9 | Way 10 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/surname_prefix.lst: -------------------------------------------------------------------------------- 1 | de 2 | De 3 | del 4 | Del 5 | d' 6 | D' 7 | O' 8 | von 9 | Von 10 | van 11 | Van -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/team.lst: -------------------------------------------------------------------------------- 1 | Boston red sox 2 | Anaheim Angels 3 | red sox 4 | angels 5 | Miami Fusion 6 | Columbus Crew 7 | Copa Libertadores 8 | Toronto Maple Leafs 9 | Montreal Canadiens -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/time.lst: -------------------------------------------------------------------------------- 1 | midday 2 | midnight 3 | noon 4 | dawn 5 | dusk 6 | sunrise 7 | sunset 8 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/time_ampm.lst: -------------------------------------------------------------------------------- 1 | a.m. 2 | A M 3 | AM 4 | A.M. 5 | pm 6 | p.m. 7 | PM 8 | P.M. 9 | P M -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/time_key.lst: -------------------------------------------------------------------------------- 1 | today 2 | tomorrow 3 | yesterday 4 | this morning 5 | this afternoon 6 | this evening 7 | in the morning 8 | in the afternoon 9 | in the evening -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/time_modifier.lst: -------------------------------------------------------------------------------- 1 | coming 2 | last 3 | next 4 | past 5 | previous 6 | this 7 | Last 8 | Next 9 | This 10 | forthcoming 11 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/time_unit.lst: -------------------------------------------------------------------------------- 1 | hour 2 | hours 3 | minute 4 | minutes 5 | second 6 | seconds 7 | morning 8 | afternoon 9 | evening 10 | night -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/times.lst: -------------------------------------------------------------------------------- 1 | witching hour 2 | tonight 3 | start of the day 4 | pre-dawn 5 | overnight 6 | noontide 7 | noonday 8 | noon 9 | nightfall 10 | morning 11 | midnight 12 | middle of the day 13 | midday 14 | midafternoon 15 | mid-day 16 | mid - day 17 | gloaming 18 | evening 19 | end of the day 20 | dusk 21 | daybreak 22 | dawn 23 | break of the day 24 | break of day 25 | afternoon 26 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/timespan.lst: -------------------------------------------------------------------------------- 1 | secs 2 | seconds 3 | minutes 4 | mins 5 | hours 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/timex_pre.lst: -------------------------------------------------------------------------------- 1 | previous 2 | pre- 3 | pre - 4 | next 5 | later 6 | late on 7 | late 8 | last 9 | early 10 | earlier 11 | during 12 | before 13 | after 14 | Previous 15 | Pre- 16 | Pre - 17 | Next 18 | Later 19 | Late on 20 | Late 21 | Last 22 | Early 23 | Earlier 24 | During 25 | Before 26 | After 27 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/timezone.lst: -------------------------------------------------------------------------------- 1 | Zulu Standard Time 2 | Zone Standard Time 3 | ZST 4 | Z.S.T. 5 | Western time 6 | Western Time 7 | Western Standard Time 8 | Western Day Time 9 | Western Daily Time 10 | Western Coast Time 11 | West Day Time 12 | West Daily Time 13 | West Coast Time 14 | WST 15 | WDT 16 | WCT 17 | W.S.T. 18 | W.D.T. 19 | W.C.T. 20 | Southern time 21 | Southern Time 22 | Pacific time 23 | Pacific Time 24 | Pacific Day Time 25 | Pacific Daily Time 26 | PDT 27 | P.D.T. 28 | Northern time 29 | Northern Time 30 | Middle Eastern Time 31 | MET 32 | M.E.T. 33 | Greenwich Time 34 | Greenwich Meantime 35 | Greenwich MeanTime 36 | Greenwich Mean Time 37 | GMT 38 | G.M.T. 39 | Eastern time 40 | Eastern Time 41 | Eastern Standard Time 42 | Eastern Day Time 43 | Eastern Daily Time 44 | Eastern Coast Time 45 | East Day Time 46 | East Daily Time 47 | East Coast Time 48 | ET 49 | EST 50 | EDT 51 | ECT 52 | E.T. 53 | E.S.T. 54 | E.D.T. 55 | E.C.T. 56 | Central Time 57 | Central Standard Time 58 | Central European Time 59 | Central Day Time 60 | Central Daily Time 61 | CST 62 | CET 63 | CDT 64 | C.S.T. 65 | C.E.T. 66 | C.D.T. 67 | British Summer Time 68 | BST 69 | B.S.T. 70 | 71 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/title_female.lst: -------------------------------------------------------------------------------- 1 | Madam 2 | Madame 3 | Mademoiselle 4 | Miss 5 | Mlle 6 | Mme 7 | Mme. 8 | Mrs 9 | Mrs. 10 | Ms 11 | Ms. 12 | Sister 13 | Spokeswoman -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/title_male.lst: -------------------------------------------------------------------------------- 1 | Marquis 2 | Messr 3 | Messr. 4 | Monsieur 5 | Mr 6 | Mr. 7 | Prince 8 | Rabbi 9 | Sheik 10 | Sir -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/tvcompany.lst: -------------------------------------------------------------------------------- 1 | ABC 2 | Fox 3 | BBC 4 | ITV 5 | Channel 4 6 | Channel 5 7 | Channel 50 8 | VOA 9 | WFTY-TV 10 | WHLL-TV 11 | katv 12 | KATV 13 | VH1 14 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/water.lst: -------------------------------------------------------------------------------- 1 | Pacific 2 | North Atlantic 3 | New York Harbor 4 | Indian Ocean 5 | Gulf of Mexico 6 | Great Lakes 7 | Everglades 8 | Caribbean 9 | Bay of Biscay 10 | Atlantic 11 | Arctic 12 | Antarctic 13 | Nile 14 | Amazon 15 | Thames -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/gazetteer/year.lst: -------------------------------------------------------------------------------- 1 | 1970 2 | 1971 3 | 1972 4 | 1973 5 | 1974 6 | 1975 7 | 1976 8 | 1977 9 | 1978 10 | 1979 11 | 1980 12 | 1981 13 | 1982 14 | 1983 15 | 1984 16 | 1985 17 | 1986 18 | 1987 19 | 1988 20 | 1989 21 | 1990 22 | 1991 23 | 1992 24 | 1993 25 | 1994 26 | 1995 27 | 1996 28 | 1997 29 | 1998 30 | 1999 31 | 2000 32 | 2001 33 | 2002 34 | 2003 35 | 2004 36 | 2005 37 | 2006 38 | 2007 39 | 2008 40 | 2009 41 | 2010 42 | 2011 43 | 2012 44 | 2013 45 | 2014 46 | 2015 47 | 2016 48 | 2017 49 | 2018 50 | 2019 51 | 2020 52 | 2021 53 | 2022 54 | 2023 55 | 2024 56 | 2025 57 | 2026 58 | 2027 59 | 2028 60 | 2029 61 | 2030 62 | 2031 63 | 2032 64 | 2033 65 | 2034 66 | 2035 67 | 2036 68 | 2037 69 | 2038 70 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/othomatcher/acelists.def: -------------------------------------------------------------------------------- 1 | cdg.lst:cdg 2 | spur_match.lst:spur_match 3 | prepos.lst:prepos 4 | def_art.lst:def_art 5 | connnector.lst:connnector 6 | alias_ace.lst:alias 7 | nickname.txt:nickname -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/othomatcher/alias.lst: -------------------------------------------------------------------------------- 1 | National Aeronautics and Space Administration£1 2 | NASA£1 3 | New York Stock Exchange£2 4 | Big Board£2 5 | Aluminum Co.£3 6 | Aluminum Co£3 7 | Alcoa£3 8 | New York Times Inc.£4 9 | Times£4 10 | New York Times£4 11 | Coca-Cola Co.£5 12 | Coca-Cola Co£5 13 | Coca-Cola£5 14 | Coke£5 15 | IBM£6 16 | Big Blue£6 17 | New York£7 18 | Big Apple£7 19 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/othomatcher/connnector.lst: -------------------------------------------------------------------------------- 1 | of£con 2 | for£con 3 | de£con 4 | di£con 5 | von£con 6 | van£con 7 | &£con 8 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/othomatcher/def_art.lst: -------------------------------------------------------------------------------- 1 | The£def 2 | the£def 3 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/othomatcher/listsNM.def: -------------------------------------------------------------------------------- 1 | # cdg.lst:cdg 2 | ../gazetteer/org_ending.lst:cdg 3 | spur_match.lst:spur_match 4 | prepos.lst:prepos 5 | def_art.lst:def_art 6 | connnector.lst:connnector 7 | alias.lst:alias 8 | nickname.txt:nickname -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/othomatcher/prepos.lst: -------------------------------------------------------------------------------- 1 | of£prepos 2 | for£prepos 3 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/othomatcher/spur_match.lst: -------------------------------------------------------------------------------- 1 | Eastern Airways£1 2 | Eastern Air£1 3 | BT Cellnet£2 4 | BT Wireless£2 5 | BT£2 6 | 7 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/regex-splitter/external-split-patterns.txt: -------------------------------------------------------------------------------- 1 | //These are patterns for sentence splits 2 | // 3 | // Valentin Tablan, 24 Aug 2007 4 | // 5 | // 6 | // Lines starting with // are comments; empty lines are ignored 7 | 8 | //more than 2 new lines 9 | (?:[\u00A0\u2007\u202F\p{javaWhitespace}&&[^\n\r]])*(\n\r|\r\n|\n|\r)(?:(?:[\u00A0\u2007\u202F\p{javaWhitespace}&&[^\n\r]])*\1)+ 10 | 11 | //the end of the document is also an external split, so that there is no 12 | //orphaned text 13 | \s*\z 14 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/regex-splitter/internal-split-patterns.txt: -------------------------------------------------------------------------------- 1 | //These are patterns for sentence splits 2 | // 3 | // Valentin Tablan, 24 Aug 2007 4 | // 5 | // 6 | // Lines starting with // are comments; empty lines are ignored 7 | 8 | //between 1 and 3 full stops 9 | \.{1,3}"? 10 | 11 | //up to 4 ! or ? in sequence 12 | (!|\?){1,4}"? -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/schema/ANNIE-Schemas.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/schema/AddressSchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/schema/DateSchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/schema/IdentifierSchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/schema/LocationSchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/schema/MoneySchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/schema/OrganizationSchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/schema/PercentSchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/schema/PersonSchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/schema/PhoneSchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/schema/SentenceSchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/sentenceSplitter/gazetteer/abbreviations.lst: -------------------------------------------------------------------------------- 1 | AG 2 | APR 3 | Apr 4 | AUG 5 | Aug 6 | Adm 7 | Brig 8 | CO 9 | CORP 10 | Capt 11 | Cmdr 12 | Co 13 | Col 14 | Comdr 15 | DEC 16 | Dec 17 | DR 18 | Dr 19 | FEB 20 | Feb 21 | Fig 22 | FRI 23 | GMBH 24 | Gen 25 | Gov 26 | INC 27 | JAN 28 | Jan 29 | JUL 30 | Jul 31 | JUN 32 | Jun 33 | LTD 34 | Lt 35 | Ltd 36 | MAR 37 | Mar 38 | MON 39 | Mon 40 | MP 41 | Maj 42 | Mr 43 | Mrs 44 | Ms 45 | NA 46 | NOV 47 | Nov 48 | NV 49 | OCT 50 | Oct 51 | Oy 52 | PLC 53 | Prof 54 | Rep 55 | SA 56 | SAT 57 | Sat 58 | SEP 59 | Sep 60 | SIR 61 | SR 62 | SUN 63 | Sun 64 | Sen 65 | Sgt 66 | SpA 67 | St 68 | THU 69 | Thu 70 | THUR 71 | Thur 72 | TUE 73 | Tue 74 | VP 75 | WED 76 | Wed 77 | ad 78 | al 79 | b 80 | ed 81 | eds 82 | eg 83 | e.g 84 | (e.g 85 | [e.g 86 | et 87 | etc 88 | fig 89 | i.e 90 | (i.e 91 | [i.e 92 | p 93 | usu 94 | vs 95 | yr 96 | yrs 97 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/sentenceSplitter/gazetteer/lists.def: -------------------------------------------------------------------------------- 1 | abbreviations.lst:splitter_abbreviation 2 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/sentenceSplitter/grammar/cleanup.jape: -------------------------------------------------------------------------------- 1 | /* 2 | * cleanup.jape 3 | * 4 | * Copyright (c) 1998-2007, The University of Sheffield. 5 | * 6 | * This file is part of GATE (see http://gate.ac.uk/), and is free 7 | * software, licenced under the GNU Library General Public License, 8 | * Version 2, June 1991 (in the distribution as file licence.html, 9 | * and also available at http://gate.ac.uk/gate/licence.html). 10 | * 11 | * Valentin Tablan, March 7th, 2007 12 | * 13 | * $Id$ 14 | */ 15 | 16 | Phase: cleanup 17 | Input: Token 18 | Options: control = once 19 | 20 | //Removes temporary data created by the sentence splitter 21 | Rule: cleanUp 22 | {Token} 23 | --> 24 | { 25 | //if there were any sentences created, then we need to remove the document 26 | //feature -> useful for future runs 27 | doc.getFeatures().remove("temp-last-sentence-end"); 28 | //remove all lookups used for abbreviations 29 | FeatureMap constraints = Factory.newFeatureMap(); 30 | constraints.put("majorType", "splitter_abbreviation"); 31 | AnnotationSet toRemove = inputAS.get("Lookup", constraints); 32 | if(toRemove != null) inputAS.removeAll(toRemove); 33 | } 34 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/sentenceSplitter/grammar/main-single-nl.jape: -------------------------------------------------------------------------------- 1 | // SplitMain 2 | // Valentin Tablan 17/05/2001 3 | 4 | 5 | //A sentence splitter 6 | MultiPhase: main 7 | Phases: 8 | 9 | prepare 10 | find-single-nl 11 | split 12 | cleanup 13 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/sentenceSplitter/grammar/main.jape: -------------------------------------------------------------------------------- 1 | // SplitMain 2 | // Valentin Tablan 17/05/2001 3 | 4 | 5 | //A sentence splitter 6 | MultiPhase: main 7 | Phases: 8 | 9 | prepare 10 | find 11 | split 12 | cleanup 13 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/sentenceSplitter/grammar/no-splits.jape: -------------------------------------------------------------------------------- 1 | /* 2 | * no-splits.jape 3 | * 4 | * Copyright (c) 1998-2004, The University of Sheffield. 5 | * 6 | * This file is part of GATE (see http://gate.ac.uk/), and is free 7 | * software, licenced under the GNU Library General Public License, 8 | * Version 2, June 1991 (in the distribution as file licence.html, 9 | * and also available at http://gate.ac.uk/gate/licence.html). 10 | * 11 | * Valentin Tablan, 23 Jan 2007 12 | * 13 | * $Id$ 14 | */ 15 | 16 | //This grammar deals with documents that have no splits 17 | 18 | Phase:noSplits 19 | Input: Token Split 20 | Options: control = once 21 | 22 | Rule: blah 23 | {Token} 24 | --> 25 | { 26 | AnnotationSet splits = inputAS.get("Split"); 27 | if(splits == null || splits.isEmpty()){ 28 | outputAS.add(outputAS.firstNode(), outputAS.lastNode(), 29 | "TempNoSplitText", Factory.newFeatureMap()); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/ANNIE/resources/sentenceSplitter/grammar/prepare.jape: -------------------------------------------------------------------------------- 1 | /* 2 | * prepare.jape 3 | * 4 | * Copyright (c) 1998-2007, The University of Sheffield. 5 | * 6 | * This file is part of GATE (see http://gate.ac.uk/), and is free 7 | * software, licenced under the GNU Library General Public License, 8 | * Version 2, June 1991 (in the distribution as file licence.html, 9 | * and also available at http://gate.ac.uk/gate/licence.html). 10 | * 11 | * Valentin Tablan, June 22nd, 2007 12 | * 13 | * $Id$ 14 | */ 15 | 16 | Phase: prepare 17 | Input: Token 18 | Options: control = once 19 | 20 | //Makes sure there is no temporary data created by a previous run of the 21 | //sentence splitter 22 | Rule: cleanUp 23 | {Token} 24 | --> 25 | { 26 | //if there were any sentences created, then we need to remove the document 27 | //feature -> useful for future runs 28 | doc.getFeatures().remove("temp-last-sentence-end"); 29 | } 30 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | GATE-plugin-JAPE_Plus 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.jdt.core.javanature 16 | 17 | 18 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/creole.xml: -------------------------------------------------------------------------------- 1 | 2 | jape-plus.jar 3 | lib/colt-1.2.0-free.jar 4 | 5 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/doc/javadoc/gate/gui/jape/plus/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | gate.gui.jape.plus (${plugin.name} JavaDoc) 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | gate.gui.jape.plus 20 | 21 | 22 | 27 | 28 |
23 | Classes  24 | 25 |
26 | Viewer
29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/doc/javadoc/gate/resources/img/svg/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | gate.resources.img.svg (${plugin.name} JavaDoc) 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | gate.resources.img.svg 20 | 21 | 22 | 27 | 28 |
23 | Classes  24 | 25 |
26 | JapePlusIcon
29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/doc/javadoc/package-list: -------------------------------------------------------------------------------- 1 | com.ontotext.jape.automaton 2 | com.ontotext.jape.pda 3 | gate.gui.jape.plus 4 | gate.jape.plus 5 | gate.resources.img.svg 6 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/doc/javadoc/resources/inherit.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/plugins/JAPE_Plus/doc/javadoc/resources/inherit.gif -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/jape-plus.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/plugins/JAPE_Plus/jape-plus.jar -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/lib/colt-1.2.0-free.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/plugins/JAPE_Plus/lib/colt-1.2.0-free.jar -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/lib/licence-colt-1.2.0-free.txt: -------------------------------------------------------------------------------- 1 | Packages cern.colt* , cern.jet*, cern.clhep 2 | 3 | Copyright (c) 1999 CERN - European Organization for Nuclear Research. 4 | 5 | Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose is hereby granted without fee, provided that the above copyright notice appear in all copies and that both that copyright notice and this permission notice appear in supporting documentation. CERN makes no representations about the suitability of this software for any purpose. It is provided "as is" without expressed or implied warranty. 6 | 7 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/lib/src/colt-1.2.0-free-src.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/plugins/JAPE_Plus/lib/src/colt-1.2.0-free-src.jar -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/src/com/ontotext/jape/automaton/Constants.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Constants.java 3 | * 4 | * Copyright (c) 2010-2011, Ontotext (www.ontotext.com). 5 | * 6 | * This file is part of GATE (see http://gate.ac.uk/), and is free 7 | * software, licenced under the GNU Library General Public License, 8 | * Version 2, June 1991 (in the distribution as file licence.html, 9 | * and also available at http://gate.ac.uk/gate/licence.html). 10 | * 11 | * 12 | * $Id$ 13 | */ 14 | package com.ontotext.jape.automaton; 15 | 16 | public class Constants { 17 | public static final int NO = -1; 18 | public static final int hashStep = 107; 19 | public static final int hashBase = 257; 20 | } 21 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/src/gate/gui/jape/plus/Viewer.java: -------------------------------------------------------------------------------- 1 | package gate.gui.jape.plus; 2 | 3 | import gate.creole.metadata.CreoleResource; 4 | import gate.creole.metadata.GuiType; 5 | import gate.gui.jape.JapeViewer; 6 | 7 | @CreoleResource(name = "JAPE-Plus Viewer", comment = "A JAPE grammar file viewer", resourceDisplayed = "gate.jape.plus.Transducer", mainViewer = true, helpURL = "http://gate.ac.uk/userguide/chap:jape", guiType = GuiType.LARGE) 8 | public class Viewer extends JapeViewer { 9 | // an empty file to allow the same VR to be used for two different PRs without 10 | // a common superclass 11 | } -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/test/test-contains.jape: -------------------------------------------------------------------------------- 1 | Phase: Contains 2 | Input: A B C 3 | Options: control = appelt 4 | 5 | 6 | Rule: A 7 | ({A contains {B@length==3}}):bind 8 | --> 9 | :bind.Out = {}, 10 | :bind { 11 | System.out.print("["); 12 | boolean first = true; 13 | for(Annotation ann: bindAnnots){ 14 | if(first) first = false; else System.out.print(", "); 15 | System.out.print(ann.getId()); 16 | } 17 | System.out.println("]"); 18 | } 19 | 20 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/test/test-contains.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | gate.SourceURL 8 | created from String 9 | 10 | 11 | 12 | 13 | AAAABBBAAAA 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | feat 22 | val 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/test/test-not-contains.jape: -------------------------------------------------------------------------------- 1 | Phase: Contains 2 | Input: A B C 3 | Options: control = appelt 4 | 5 | 6 | Rule: A 7 | ({A notContains {C}}):bind 8 | --> 9 | :bind.Out = {}, 10 | :bind { 11 | System.out.print("["); 12 | boolean first = true; 13 | for(Annotation ann: bindAnnots){ 14 | if(first) first = false; else System.out.print(", "); 15 | System.out.print(ann.getId()); 16 | } 17 | System.out.println("]"); 18 | } 19 | 20 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/test/test-not-within.jape: -------------------------------------------------------------------------------- 1 | Phase: Contains 2 | Input: A B C 3 | Options: control = appelt 4 | 5 | 6 | Rule: A 7 | ({B notWithin {C}}):bind 8 | --> 9 | :bind.Out = {}, 10 | :bind { 11 | System.out.print("["); 12 | boolean first = true; 13 | for(Annotation ann: bindAnnots){ 14 | if(first) first = false; else System.out.print(", "); 15 | System.out.print(ann.getId()); 16 | } 17 | System.out.println("]"); 18 | } 19 | 20 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/test/test-within.jape: -------------------------------------------------------------------------------- 1 | Phase: Contains 2 | Input: A B C 3 | Options: control = appelt 4 | 5 | 6 | Rule: A 7 | ({B within {A.feat=="val"}}):bind 8 | --> 9 | :bind.Out = {}, 10 | :bind { 11 | System.out.print("["); 12 | boolean first = true; 13 | for(Annotation ann: bindAnnots){ 14 | if(first) first = false; else System.out.print(", "); 15 | System.out.print(ann.getId()); 16 | } 17 | System.out.println("]"); 18 | } 19 | 20 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/test/test-within.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | gate.SourceURL 8 | created from String 9 | 10 | 11 | 12 | 13 | AAABBBAAA 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | feat 22 | val 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/test/test_actioncontext.jape: -------------------------------------------------------------------------------- 1 | 2 | Phase: phase1 3 | Options: control = once 4 | 5 | Rule: rule1 6 | ({Token}):label 7 | --> 8 | { 9 | System.out.println("Testing the action context:"); 10 | System.out.println("Corpus is "+ctx.getCorpus()); 11 | System.out.println("Features "+ctx.getPRFeatures()); 12 | } 13 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/test/testabc.jape: -------------------------------------------------------------------------------- 1 | Phase: ABC 2 | 3 | Input: A B C 4 | Options: control = appelt 5 | 6 | Rule: A 7 | ( 8 | {A} 9 | ):a 10 | --> 11 | :a.TempA={} 12 | 13 | Rule: AB 14 | ( 15 | {A}{B} 16 | ):a 17 | --> 18 | :a.TempAB={} 19 | 20 | Rule: ABC 21 | ( 22 | {A}{B}{C} 23 | ):a 24 | --> 25 | :a.TempABC={} 26 | 27 | Rule: APlus 28 | ( 29 | ({A})+ 30 | ):a 31 | --> 32 | :a.TempAPlus={} 33 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/test/testdoc.txt: -------------------------------------------------------------------------------- 1 | Just a little test document. 2 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/test/testonto.jape: -------------------------------------------------------------------------------- 1 | 2 | Phase: testOnto 3 | Input: Token SpaceToken 4 | 5 | 6 | Rule: rule1 7 | ({Token.class == "Person"}):label 8 | --> 9 | :label.Match = {} 10 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/test/testonto.rdf.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/test/zero-length-ann.jape: -------------------------------------------------------------------------------- 1 | Phase: Aaaa 2 | Input: A B C 3 | Options: control = appelt 4 | 5 | 6 | Rule: B 7 | ({C})*:bind 8 | --> 9 | :bind.Bsingle = {}, 10 | :bind { 11 | System.out.print("["); 12 | boolean first = true; 13 | for(Annotation ann: bindAnnots){ 14 | if(first) first = false; else System.out.print(", "); 15 | System.out.print(ann.getId()); 16 | } 17 | System.out.println("]"); 18 | } 19 | 20 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/JAPE_Plus/test/zero-length-ann.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | gate.SourceURL 8 | created from String 9 | 10 | 11 | 12 | 13 | A A B A A 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/Tools/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/Tools/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | GATE-plugin-Tools 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.jdt.core.javanature 16 | 17 | 18 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/Tools/doc/javadoc/gate/configurableexporter/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | gate.configurableexporter (Tools JavaDoc) 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | gate.configurableexporter 20 | 21 | 22 | 27 | 28 |
23 | Classes  24 | 25 |
26 | ConfigurableExporter
29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/Tools/doc/javadoc/package-list: -------------------------------------------------------------------------------- 1 | gate.configurableexporter 2 | gate.qa 3 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/Tools/doc/javadoc/resources/inherit.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/plugins/Tools/doc/javadoc/resources/inherit.gif -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/Tools/resources/configurableexporter/example.conf: -------------------------------------------------------------------------------- 1 | "{Token.string}", "{Token.category}" 2 | 3 | Everything after the first line in this configuration file is treated as a 4 | comment. 5 | 6 | Using this configuration file with a instance type of Token will generate an 7 | output file containing one line for each Token annotation in the input 8 | document. -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/Tools/resources/morph/adv.dat: -------------------------------------------------------------------------------- 1 | best ==> well 2 | better ==> well 3 | deeper ==> deeply 4 | farther ==> far 5 | further ==> far 6 | harder ==> hard 7 | hardest ==> hard 8 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/Tools/resources/schemas/SyntaxTreeNodeSchema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/Tools/src/gate/qa/Measure.java: -------------------------------------------------------------------------------- 1 | package gate.qa; 2 | 3 | /** 4 | * ENUM for different measure types. User can select one of these as a 5 | * parameter for the PRs such as QualityAssurance PR and QA for Teamware PR. 6 | * 7 | * @author niraj 8 | * 9 | */ 10 | public enum Measure { 11 | F1_STRICT, F1_AVERAGE, F1_LENIENT, F05_STRICT, F05_AVERAGE, F05_LENIENT 12 | } 13 | -------------------------------------------------------------------------------- /TJBatchExtractor/plugins/Tools/tools.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mille856/CMU_memex/149052058436c474f4a479de836e3dac528ef9c1/TJBatchExtractor/plugins/Tools/tools.jar --------------------------------------------------------------------------------