├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── build.sbt
├── data
├── experimentcorpus
│ ├── ReadMe
│ ├── Relnoun1.1.txt
│ ├── Relnoun1.1_plus_NNPrelationalNouns.txt
│ ├── Relnoun1.1_plus_NNPrelationalNouns_plus_ORGwords.txt
│ ├── Relnoun1.1_plus_NNPrelationalNouns_plus_ORGwords_plus_demonyms.txt
│ └── Relnoun2.2.txt
├── gold.txt
└── sentences.txt
├── project
├── Release.scala
├── build.properties
└── plugins.sbt
├── release
└── release_notes_relnoun.md
├── src
├── main
│ ├── resources
│ │ ├── edu
│ │ │ └── knowitall
│ │ │ │ └── chunkedextractor
│ │ │ │ ├── confidence
│ │ │ │ └── relnoun-confidence.txt
│ │ │ │ ├── demonyms.csv
│ │ │ │ ├── nouns.txt
│ │ │ │ ├── nouns_of.txt
│ │ │ │ ├── org_words.txt
│ │ │ │ ├── prp_mapping.csv
│ │ │ │ └── relnoun_prefixes.txt
│ │ └── logging.properties
│ └── scala
│ │ └── edu
│ │ └── knowitall
│ │ └── chunkedextractor
│ │ ├── Expressions.scala
│ │ ├── Extraction.scala
│ │ ├── Extractor.scala
│ │ ├── JavaChunkedExtractor.scala
│ │ ├── Nesty.scala
│ │ ├── PatternExtractor.scala
│ │ ├── R2A2.scala
│ │ ├── ReVerb.scala
│ │ ├── Relnoun.scala
│ │ └── confidence
│ │ ├── ChunkedExtractorConfidenceFunction.scala
│ │ ├── ChunkedExtractorFeatureSet.scala
│ │ └── TrainChunkedExtractor.scala
└── test
│ └── scala
│ └── edu
│ └── knowitall
│ └── chunkedextractor
│ ├── NestySpecTest.scala
│ ├── R2A2SpecTest.scala
│ └── RelnounSpecTest.scala
└── version.sbt
/.gitignore:
--------------------------------------------------------------------------------
1 | project/project
2 | project/target
3 | target
4 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: scala
2 | scala:
3 | - "2.10.3"
4 | jdk:
5 | - oraclejdk7
6 | - openjdk7
7 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | ReVerb Software License Agreement
2 |
3 | ReVerb Software
4 | (C) 2011-2012, University of Washington. All rights reserved.
5 | US patent number 7,877,343 and 12/970,155 patent pending
6 |
7 | The University of Washington (UW), Professor Oren Etzioni, Anthony Fader,
8 | Michael Schmitz, Robert Bart, Janara Christensen, and Niranjan Balasubramanian
9 | (Developers) give permission for you and your laboratory (University) to use
10 | ReVerb. ReVerb is a system that extracts relational triples from text. ReVerb
11 | is protected by a United States copyright and patents. The National Science
12 | Foundation supported work on ReVerb. Under University of Washington's
13 | patents 7,877,343 (issued) and 12/970,155 (patent pending), the UW grants to
14 | you the non-exclusive right to use patent claims practiced by the University of
15 | Washington's ReVerb software solely for non-commercial purposes and as long as
16 | you comply with the terms of this ReVerb Software License Agreement. UW and
17 | the Developers allow you to copy and modify ReVerb for non-commercial purposes,
18 | and to distribute modifications through GitHub or directly to the University of
19 | Washington, on the following conditions:
20 |
21 |
22 | 1. ReVerb is not used for any commercial purposes, or as part of a system
23 | which has commercial purposes.
24 |
25 |
26 | 2. Any software derived from ReVerb must carry prominent notices stating that
27 | you modified it along with the date modified. The derivative must also carry
28 | prominent notices stating that it is released under this ReVerb Software
29 | License Agreement
30 |
31 | If you wish to obtain ReVerb or to obtain any patent rights for any commercial
32 | purposes, you will need to contact the University of Washington to see if
33 | rights are available and to negotiate a commercial license and pay a fee. This
34 | includes, but is not limited to, using ReVerb to provide services to outside
35 | parties for a fee. In that case please contact:
36 |
37 | UW Center for Commercialization
38 | University of Washington
39 | 4311 11th Ave. NE,
40 | Suite 500 Seattle, WA 98105-4608
41 |
42 | Phone: (206) 543-3970
43 | Email: license@u.washington.edu
44 |
45 |
46 | 3. You retain in ReVerb and any modifications to ReVerb, the copyright,
47 | trademark, patent or other notices pertaining to ReVerb as provided by UW.
48 |
49 |
50 | 4. You provide the Developers with feedback on the use of the ReVerb software
51 | in your research, and that the Developers and UW are permitted to use any
52 | information you provide in making changes to the ReVerb software. All bug
53 | reports and technical questions shall be sent to: afader@cs.washington.edu.
54 | Modifications may be communicated through GitHub pull requests at:
55 |
56 | https://github.com/knowitall/
57 |
58 |
59 | 5. You acknowledge that the Developers, UW and its licensees may develop
60 | modifications to ReVerb that may be substantially similar to your modifications
61 | of ReVerb, and that the Developers, UW and its licensees shall not be
62 | constrained in any way by you in UW's or its licensees' use or management of
63 | such modifications. You acknowledge the right of the Developers and UW to
64 | prepare and publish modifications to ReVerb that may be substantially similar
65 | or functionally equivalent to your modifications and improvements, and if you
66 | obtain patent protection for any modification or improvement to ReVerb you
67 | agree not to allege or enjoin infringement of your patent by the Developers,
68 | the UW or by any of UW's licensees obtaining modifications or improvements to
69 | ReVerb from the University of Washington or the Developers.
70 |
71 |
72 | 6. If utilization of the ReVerb software results in outcomes which will be
73 | published, please specify the version of ReVerb you used and cite the UW
74 | Developers.
75 |
76 | @inproceedings{Fader11,
77 | author = {Anthony Fader and Stephen Soderland and Oren Etzioni},
78 | title = {Identifying Relations for Open Information Extraction},
79 | booktitle = {Proceedings of the Conference of Empirical Methods
80 | in Natural Language Processing ({EMNLP} '11)},
81 | year = {2011},
82 | month = {July 27-31},
83 | address = {Edinburgh, Scotland, UK}
84 | }
85 |
86 |
87 | 7. Any risk associated with using the ReVerb software at your organization is
88 | with you and your organization. ReVerb is experimental in nature and is made
89 | available as a research courtesy "AS IS," without obligation by UW to provide
90 | accompanying services or support.
91 |
92 |
93 | UW AND THE AUTHORS EXPRESSLY DISCLAIM ANY AND ALL WARRANTIES REGARDING THE
94 | SOFTWARE, WHETHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO WARRANTIES
95 | PERTAINING TO MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
96 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ** **DEPRECATED!** ** Please see https://github.com/dair-iitd/OpenIE-standalone, which has combined multiple projects into a single project and maintains the latest version of Open IE (Open IE 5). It is based on another repository https://github.com/allenai/openie-standalone, which has an older version of Open IE.
2 |
3 | # Chunked Extractors
4 |
5 | The chunked extractors project is a collection of three extractors.
6 |
7 | 1. [ReVerb](http://reverb.cs.washington.edu/)--an extractor for verb-mediated relations (`Sally sells sea shells`).
8 | 2. Relnoun--an extractor for noun-mediate relation (`United States president Barack Obama`).
9 | 3. Nesty--an extractor for nested relations (`Some people say that we never landed on the moon`).
10 |
11 | This project provides a common interface to the three extractors by using the nlptools project for chunking and lemmatization.
12 |
13 | ## Citing Relnoun
14 |
15 | Harinder Pal, Mausam. "Demonyms and Compound Relational Nouns in Nominal Open IE". Workshop on Automated Knowledge Base Construction (AKBC) at NAACL. San Diego, CA, USA. June 2016.
16 |
17 | ## Google Group
18 |
19 | * [knowitall_openie](https://groups.google.com/forum/#!forum/knowitall_openie)
20 |
21 | ## Notifications
22 |
23 | * [01/15/2016][Relnoun] The version 2.2.0 is released ([release notes](https://github.com/knowitall/chunkedextractor/blob/master/release/release_notes_relnoun.md)).
24 | * [12/30/2015][Relnoun] The version 2.0.0 is released ([release notes](https://github.com/knowitall/chunkedextractor/blob/master/release/release_notes_relnoun.md)).
25 |
--------------------------------------------------------------------------------
/build.sbt:
--------------------------------------------------------------------------------
1 | ReleaseSettings.defaults
2 |
3 | organization := "edu.washington.cs.knowitall.chunkedextractor"
4 |
5 | name := "chunkedextractor"
6 |
7 | description := "Wrapper and implementation for extractors of chunked sentences."
8 |
9 | crossScalaVersions := Seq("2.10.3")
10 |
11 | scalaVersion <<= crossScalaVersions { (vs: Seq[String]) => vs.head }
12 |
13 | libraryDependencies ++= Seq(
14 | "edu.washington.cs.knowitall" %% "openregex-scala" % "1.1.2",
15 | "edu.washington.cs.knowitall" % "reverb-core" % "1.4.3",
16 | "edu.washington.cs.knowitall.nlptools" %% "nlptools-conf-breeze" % "2.4.5",
17 | "edu.washington.cs.knowitall.nlptools" %% "nlptools-core" % "2.4.5",
18 | "edu.washington.cs.knowitall.nlptools" %% "nlptools-chunk-opennlp" % "2.4.5",
19 | "edu.washington.cs.knowitall.nlptools" %% "nlptools-stem-morpha" % "2.4.5",
20 | // resource management
21 | "com.jsuereth" %% "scala-arm" % "1.3",
22 | "junit" % "junit" % "4.11" % "test",
23 | "org.specs2" % "specs2" % "1.12.3" % "test" cross CrossVersion.binaryMapped {
24 | case "2.9.3" => "2.9.2"
25 | case "2.10.2" => "2.10"
26 | case x => x
27 | })
28 |
29 | scalacOptions ++= Seq("-unchecked", "-deprecation")
30 |
31 | licenses := Seq("Academic License" -> url("http://reverb.cs.washington.edu/LICENSE.txt"))
32 |
33 | homepage := Some(url("http://github.com/knowitall/chunkedextractor"))
34 |
35 | resolvers += "Sonatype OSS Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots"
36 |
37 | publishMavenStyle := true
38 |
39 | publishTo <<= version { (v: String) =>
40 | val nexus = "https://oss.sonatype.org/"
41 | if (v.trim.endsWith("SNAPSHOT"))
42 | Some("snapshots" at nexus + "content/repositories/snapshots")
43 | else
44 | Some("releases" at nexus + "service/local/staging/deploy/maven2")
45 | }
46 |
47 | pomExtra := (
48 |
49 | https://github.com/knowitall/chunkedextractor
50 | scm:git://github.com/knowitall/chunkedextractor.git
51 | scm:git:git@github.com:knowitall/chunkedextractor.git
52 | HEAD
53 |
54 |
55 |
56 | Michael Schmitz
57 |
58 | )
59 |
--------------------------------------------------------------------------------
/data/experimentcorpus/ReadMe:
--------------------------------------------------------------------------------
1 | This folder contains the experiment corpus (along with manual annotations) used to evaluate Relnoun1.1 with Relnoun2.2 as mentioned in the paper entitled "Demonyms and Compound Relational Nouns in Nominal Open IE" (Harinder Pal, Mausam. Workshop on Automated Knowledge Base Construction (AKBC) at NAACL. San Diego, CA, USA. June 2016)
2 |
--------------------------------------------------------------------------------
/data/gold.txt:
--------------------------------------------------------------------------------
1 | 0 which are an essential presentation in a virtual studio
2 | 1 Wendy is the daughter of Eldon
3 | 1 Amphora is a publication of the American Philological Association
4 | 0 who was a guest on the show
5 | 0 who was a guest on the show
6 | 1 Testosterone is an important hormone for muscles
7 | 1 Jafrum is a cool site for motorcycle parts
8 | 1 Rock Island is a city in Rock Island County
9 | 1 Love is the opposite of greed
10 | 1 Alberta was a federal electoral district in the Northwest Territories
11 | 0 ASP.NET is the latest version of Microsoft
12 | 0 's Active Server Pages technology .|ASP.NET is the latest version of Microsoft
13 | 0 West Lothian is a county of Scotland .|Before 1975
14 | 1 West Lothian was a county of Scotland
15 | 1 April is a long-time supporter of DHCC
16 | 0 December , 1989 [is] Carlos Salinas became president of Mexico
17 | 1 The DFSG is our definition of free software
18 | 0 Scarce was every kind of food
19 | 1 Dan is a graduate of Emory University
20 | 0 Florida State .|Dan is a graduate of Emory University
21 | 1 Dried figs are an excellent source of iron
22 | 1 2006 was a great year for electronic music
23 | 1 Slashfood is a member of the Weblogs
24 | 0 Contamination is a married guy from Saitama
25 | 1 CODEINE is matter of miscommunication
26 | 1 Adam Sandler is a waste of film
27 | 1 Chester is part of the PoughkeepsieNewburghMiddletown
28 | 0 the March 6 , 2006 [is] issue of Oil
29 | 1 Aleppo is the oldest inhabited city in history
30 | 1 A social institution is a group of people
31 | 1 CR is the abbreviation of Card Reader
32 | 1 Carl Hoff [is] fan [of] jazz
33 | 1 Dr. Weil is a proponent of Integrative Medicine
34 | 1 James Wang is a photographer in San Diego
35 | 1 Saint Peters is a city in St . Charles County
36 | 1 Woolite is a trademark of Reckitt Benckiser
37 | 1 RADIO is the only source of info
38 | 1 Piracy is an integral part of exploration
39 | 1 Backdraft is an amazing piece of film work
40 | 1 Tya is the art director for Coalesce
41 | 1 Jane is the mother of Fielding
42 | 0 solutions are an university of principal buttons
43 | 1 Mr Pullicino is Minister of Resources
44 | 1 There were 14 deaths during the journey
45 | 0 which were children under 5 years
46 | 1 East Asia is a region of economic dynamism
47 | 1 Downtown Asheville is the cultural center of Western North Carolina
48 | 1 A lab test is the most reliable means of detection
49 | 1 Trondheim is a city of schools
50 | 1 Riven is a trademark of Cyan
51 | 1 Alkalines are an example of primary cells
52 | 1 Pat watt is the uncle of www.workbizfromhom.com
53 | 1 The TIN is one such piece of information
54 | 1 Key 12 is the law of reversal
55 | 1 Jack was a man of taste
56 | 1 Bossier was game in defeat
57 | 1 Clinton Township is a township in Butler County
58 | 1 Impatience is another attribute of selfishness
59 | 1 Abstract algebra is the study of operations
60 | 1 brevity is the soul of Twitter
61 | 0 Pass-Guaranteed is your source for the Cisco 350-024 exam
62 | 1 Steele is president of the Oglala Sioux Tribe
63 | 1 Destiny is just an excuse for bad management
64 | 0 affairs is simply the consequence of some natural order
65 | 1 the Social Media Club [is] an original member of the Media 2.0 Workgroup
66 | 1 Solis is co-founder of the Social Media Club
67 | 0 the Social Media Collective .|Solis is co-founder of the Social Media Club
68 | 0 the Social Media Collective .|Solis is co-founder of the Social Media Club
69 | 0 .org .|Solis is co-founder of the Social Media Club
70 | 0 Archie Donahue [is] ace [of] segments highlight Marine
71 | 0 LASP is one of a few
72 | 1 The Hurricanes is a story of grit
73 | 1 Maroua is the capital city of the Far North Province
74 | 1 A tribe is any group of people
75 | 0 o...|A tribe is any group of people
76 | 1 Mikael is a married guy from California
77 | 1 A child is a gift from the Creator
78 | 1 Cholesterol is a major component of myelin
79 | 1 Guy is the co-creator of Nancy
80 | 1 Bacteria are a diverse group of single-celled organisms
81 | 1 Play is an intergral part of childhood
82 | 1 82 is a freelance writer in San Francisco
83 | 1 Felix was the first Bishop of the East Angles
84 | 1 Dr. Herman is a fellow of the American College
85 | 0 uploads are an shred of short buttons
86 | 0 three were veterans of World War II
87 | 1 Harold Pinter is a master of language
88 | 1 David was a king of wars
89 | 1 Dr. David Oliver [is] a connection of the Spencer family
90 | 1 Vampire Weekend is an album by Vampire Weekend
91 | 1 Night Watch was a huge hit in Russia
92 | 1 Hair Design is a hair salon in Wilmington
93 | 0 FIG. 2B is a side elevational view of FIG.
94 | 0 Kristina is a Champion for 1 Cause
95 | 1 Beings are owners of kammas
96 | 0 their homing-place .|Beings are owners of kamma
97 | 1 Unknown tongues are a sign of God
98 | 1 summer vacation season is the prime time for home burglaries
99 | 1 they are a good source of fiber
100 | 1 Attachments are a gateway for spammers
101 |
--------------------------------------------------------------------------------
/data/sentences.txt:
--------------------------------------------------------------------------------
1 | 31 were licenced in 1662 , as Hackney carriages .
2 | Race order will rotate on a weekly basis .
3 | Length is contained within three fields .
4 | In the mid 7th century , Islam was introduced to China .
5 | The EMR consists of four components .
6 | In 2000 , Lisbon had a median family income of $ 47,566 .
7 | BMI , mentioned above , is a trigger of IC.
8 | OPEN HOUSE relies on donations , large and small , to carry out its work .
9 | 7. Eve will invest in properties .
10 | Freedom is not the absence of commitments , but the ability to choose--and commit myself to--what is best for me .|Freedom is not the absence of commitment , but the ability to commit to whatever is right for you .|Freedom is not the absence of commitments , but the ...
11 | Complete support is provided for video walls , which are an essential presentation in a virtual studio .
12 | Guests can opt for in-room , poolside , or beachside massages , and more than 60 skin and body therapy treatments .
13 | Perl is predominantly used for data conversion , data management , and site or page creation .
14 | Wendy is the daughter of Eldon and Pam Korinek .
15 | Amphora is a publication of the American Philological Association .
16 | A cure should last for 2-3 weeks .
17 | Crocker is not a bad guy .
18 | Jay said to Miley , who was a guest on the show along with dad Billy Ray .|You also drink ketchup , which seems odd , Jay said to Miley , who was a guest on the show along with dad Billy Ray .
19 | Testosterone is an important hormone for muscles .
20 | Jafrum is a cool site for motorcycle parts , accessories , and clothing for the motorcyclist .
21 | 7:04 AM MST on Mon. , Apr .
22 | 3. Verse 11 can be understood as a prayer .
23 | Life was difficult in pioneer times .
24 | Ryan Adams needs to develop single mindedness and focus .
25 | BIG FISH would take an hour or more .
26 | The South Carolina Aquarium features thousands of marine animals , including sharks , loggerhead turtles , otters , eels , and seahorses .
27 | Godwin certainly has a plan .
28 | In fact , teams are composed of players and fans .
29 | Uaz has been withdrawn by icon .
30 | Wedding Camera with Flash .
31 | Friends are on a seesaw .
32 | Jamie uses a multitude of symbols , more than an observer can respond to upon single observation - light versus dark , green versus brown , landscapes abounding in water versus barren expanses of rock and dirt - but the unity of theme relies on the use of reflections .
33 | Flowers will vary by availability .
34 | Rock Island is a city in Rock Island County , Illinois , United States .
35 | However , WER grows faster than DER .
36 | International students must have a minimum TOEFL score of 500 .
37 | The Compromise of 1850 see Martin , pp .
38 | The Washington , D.C. , native is in the College Honors Program .
39 | Denise is available to speak on dog behavior , behavior problems , learning theory , becoming a dog trainer and other topics relating to dogs .
40 | Abstracts are due by February 28 , 2001 .
41 | If we are , Bradyn does n't circulates .
42 | Ice skating is also very popular in London , with some of the most famous outdoor rinks being at Alexandra Palace , and Somerset House .
43 | Pembroke is located in Pembrokeshire , Wales .
44 | Robin battled Mr. Freeze .
45 | In general , a growing population means increased demand for real estate .
46 | Naphtali is yet seven years .
47 | Mama was now a ghost .
48 | Glass bottles are not permitted on Park beaches .
49 | Digital storage media have a long history of fragility , as well as obsolescence .
50 | 2009 is going to rock balls .
51 | Purity paves the way to intimacy .
52 | SAFAR is committed to harness , strengthen and empower mainly the women , minorities and other marginalized sections of the society towards their rights through capacity building process .
53 | A memorial may help Literacy .
54 | BGMC provided money for a stove , refrigerator , freezer , dishes , and pans .
55 | Love is the opposite of greed .
56 | Ironbridge Gorge is on the River Severn , 5 miles south of Telford in Shropshire .
57 | Majandra Delfino was born in Caracas , Venezuela , but moved with her family to Miami at age 3.
58 | When he wishes , Heinlein speaks the language of the Saints , to the glory of Man .
59 | Licensed fluids are discouraged to accessible technical support .
60 | Alberta was a federal electoral district in the Northwest Territories , Canada , that was represented in the Canadian House of Commons from 1887 to 1908 .
61 | 883 million is also a record .
62 | And regular people would say , 'you should be on TV . '
63 | Engineering is not just about technology .
64 | Since four-way meetings begin right after the initial client-counsel meeting , counsel need not wait for a triggering event .
65 | Health preformers are pragmatic for ten days .
66 | 70 % of the staff holds advanced degrees .
67 | In 1996 , Fowler took the helm of Worlds , Inc. , in an effort to revitalize and sell the company .
68 | After They make the Plan , the Plan is projected to Initiates , then to Disciples , then to Aspirants and then to humanity .
69 | Firstly , ASP.NET is the latest version of Microsoft 's Active Server Pages technology .|ASP.NET is the latest version of Microsoft 's Active Server Pages technology .
70 | Conditions were last updated on March 29 , 2008 .
71 | Participants should wear a bike helmet .
72 | Evil is not inherent in human nature , it is learned . . . .
73 | The CPS sample consisted of 179 women , and the criminal justice system sample consisted of 57 women .
74 | Chelsea is in a difficult position , and she is holding up very well .
75 | Old game hear of sides .
76 | Middle school is really fun .
77 | 42 was committed 2 weeks , 2 days ago .
78 | Finally , implementation must be set in motion .
79 | Part of them is printed by state support .
80 | Care is provided by an outside agency .
81 | Opportunities include focusing on academic enrichment , sports , arts , and much more .See the guidance bulletin board for details .
82 | Separate consideration is given to fiscal policies , family allowances , family and work , leave-taking and caretaking policies , housing , policies concerning family violence , social aid and poverty , and education and culture .
83 | Full-text is available for 2006+ .
84 | Southwestern improved to 12-4 and 1-0 .
85 | Fellows will receive up to five years of support .
86 | Elena will be living in Siberia , studying management of natural resources and the relationship between Siberia 's natural riches and its people .
87 | Also , Zimbabwe finally has a new Prime Minister .
88 | Gary49er , JHK is not selling fear .
89 | A couple things bring back memories .
90 | Rip Hamilton returned for the Pistons but struggled .
91 | Postnatal depression is extremely common illness .
92 | Anarchism is glorified thumb-sucking .
93 | Neptune is associated with alcohol and drugs .
94 | Wee talked to Tooth and Nail , Fuel by Ramen , and Universal and it 's something we are committed to .
95 | Beck comes to a similar interpretation .
96 | Protectionism has also been a concern .
97 | Cow urine is basically ammonia .
98 | Item is made with metal and plastic .
99 | West Lothian is a county of Scotland .|Before 1975 , West Lothian was a county of Scotland .
100 | These Aircraft Operating Rules constitute part of Club Policy , violations of which will result in fines , membership review , or expulsion from the Club as set forth elsewhere in Club Policy .
101 | Communication is essential between teachers and families .
102 | PRE-REGISTRATION IS REQUIRED FOR THE MAT.
103 | Video ruin controversy comes in true forms , and future is a relevant subject .
104 | For the first time , the Tampa Bay Rays are headed to the World Series .|The Tampa Bay Rays are headed to the World Series .
105 | Healthy families and communities practice the art of compromise .
106 | Rugrat is reported by F-Secure .
107 | However , this is not the case and , in fact , BTP contains a number of optimizations .
108 | April is a long-time supporter of DHCC , serving on the Board since its inception in 1972 .
109 | Page processed in 0.930238 sec.
110 | TKD grows in Malaysia , Singapore and Brunei .
111 | Transportation would be a really good thing .
112 | The CD is titled Traffic and Weather .
113 | The Cathedral was built between 1846 and 1864 .
114 | The City Hall is located in Via Roma 15 , phone ++39 0765 872025 , fax ++39 0765 872025 .
115 | Hikaru let out a relieved breath .
116 | ERP vendors provide professional services in consultancy , customization , and support .
117 | Il Nido di Gatto Panceri .
118 | Seasonal hunting is permitted for partridge , quail , deer , pronghorn , and bighorn sheep.
119 | Downsizing may be worth exploring .
120 | Craigslist may be in for tough sledding .
121 | Before borrowing , students should exercise caution .
122 | Food irradiation is banned in Europe , largely due to the above concerns .
123 | In December , 1989 , Carlos Salinas became president of Mexico .
124 | Man Utd have a game in hand .
125 | The DFSG is our definition of free software .
126 | Universal energy is experienced through Tai Chi , Acupuncture , Yoga , and Qigong .
127 | Sainte-Colombe was indeed a musical genius .
128 | BLOOD AND GOLD will be published in the Fall .
129 | Maintenance fees are approximately $ 630 .
130 | Harry was no stranger to pain , his uncle had seen to that , but there was nothing that could have ever prepared him to feel anything like this .
131 | Apart from the owner , the Trust also became the centre of controversy .
132 | Lisa has a Bachelor 's Degree from the University of Pittsburgh .
133 | The Chief Justice of the Supreme Court is sending a message to the U.S. Congress , and if they are not listening , there must be a massive petition drive to recall them .
134 | Generic Acyclovir is contraindicated for patients , who develop hypersensitivity or chemical intolerance to the components of this formulation .
135 | I have to say , MobileMe got off to a rocky start , but seems to be OK now .|MobileMe got off to a rocky start .
136 | An example is seen in Phosphorus , which is a very cold patient , but his stomach symptoms are better from cold drinks .
137 | SQL keywords fall into a number of groups .
138 | Fortune has come full circle for Masque , which began as one of the 80 's hottest underground bands and , some twenty years later , released its successful CD entitled Face First in 2008 featuring songs recorded during their Hollywood Sunset Strip heyday .
139 | GE Fanuc Spindle Servo A06B-6055-H106 unit are available for exchange , surplus + rebuild refurbishment and full repairs are also available if time is not critical .
140 | Four of these were installed in June , 1998 .
141 | Scarce was every kind of food .
142 | 92 was committed 2 weeks , 2 days ago .
143 | Cuba has a well developed system of direct democracy , through workplaces and neighbourhood committees , and most Cubans are well aware of the options available to them .
144 | The Internet Assigned Numbers Authority will maintain a registry of URL schemes .
145 | Martorana lined out to rf .
146 | Nagi did fall from grace .
147 | Jezebel walks over to Luli .
148 | The judge had scarcely finished , when , with a growl like a wild beast , a dozen men sprang on Maxwell .
149 | Dan is a graduate of Emory University , Atlanta GA and has completed his post graduate studies in International Business at Florida State .|Dan is a graduate of Emory University .
150 | Bankruptcies are also increasing in Canada .
151 | Dried figs are an excellent source of iron .
152 | Good list , 2006 was a great year for electronic music .
153 | Isabella was far from a small woman .
154 | Slashfood is a member of the Weblogs , Inc. Network .
155 | Cologne can look back on a long and eventful history , and you can see evidence of this all over the city .
156 | MySQL AB has started an affiliate program .
157 | Contamination is a married guy from Saitama , Japan .
158 | Still , CODEINE is matter of miscommunication , or lack of sexual drive .
159 | Watson enlisted in July 1942 .
160 | Day is Dying in the West .
161 | Orlando Bloom was in a car accident , but he was not banging Jessica Simpson at the time .
162 | Therefore , dampening is compromised on concrete , forcing you to alter your stride mechanics .
163 | The Electoral College is an affront to basic democracy , warping competition and subverting political equality -- even when it works .
164 | Ronnie James Dio was destined for greatness .
165 | Adam Sandler is a waste of film .
166 | Well one has to follow the Pope .
167 | Tony loved President Bush , Perino said .|Tony loved President Bush .
168 | Bathrooms comprises of showers , bidets and additionaly toiletries .
169 | Chester is part of the PoughkeepsieNewburghMiddletown , NY Metropolitan Statistical Area as well as the larger New YorkNewarkBridgeport , NY-NJ-CT-PA Combined Statistical Area .
170 | Outsiders have the wrong idea about homecomings .
171 | The Impact stole the show in L.A.
172 | Tin comes in Gold .
173 | Site is in English and Finnish .
174 | Neff writes in the March 6 , 2006 , issue of Oil and Gas Journal .
175 | Fans tend to be of two minds .
176 | Recitation of the rosary will be at 11 a.m.
177 | Reportedly , Clarence was very good with children .|Clarence was very good with children .
178 | Aleppo is the oldest inhabited city in history .
179 | A social institution is a group of people , organized by status .
180 | Page created in 1.410816192627 seconds .
181 | Fields 1-4 are available for manure application .
182 | At its most basic , community stands for common interest .
183 | An elevated level can indicate the presence of congestive heart failure .
184 | Next stop was at the Dead Sea , near Ein Bokek .
185 | Paul glanced over to Alice , his accordion teacher , and they exchanged thumbs-up signs .
186 | Schedules can be customized for various types of businesses .
187 | The Body Mass Index is considered a good indicator of nutritional status .
188 | Monopolies are blind to politics , except when politics can be manipulated to establish or extend the monopoly .
189 | Louis loved all kinds of music .
190 | Governments respond aggressively to global downturn .
191 | Canape also met at Ruby Skye .
192 | John B. died on 01 Mar 1945 .
193 | Sequels can be a bitch .
194 | Phone calls are welcome at 603 527 1490 .
195 | CR is the abbreviation of Card Reader .
196 | Leandro, F advanced to third .
197 | Protectionism amounts to armed robbery .
198 | Additional meal tickets may also be purchased for family and friends .
199 | Children had not gone to school .|Because this was a holiday week , children had not gone to school .
200 | Uz is distinguished from Edom .
201 | O'Day married again in 1942 , this time to golf pro and jazz fan Carl Hoff .
202 | Joe and Mike are still on board .
203 | Lets start with coffee .
204 | Benjamin joined the Army in 1815 , Galway .
205 | In 1988 , Lawn Darts were officially banned in the United States .
206 | Ideas can make a difference .
207 | Thousands have been sent to mental hospitals .
208 | Dr. Weil is a proponent of Integrative Medicine , which combines the best ideas and practices of alternative and conventional medicine in order to maximize the body 's natural healing mechanisms .
209 | James Wang is a photographer in San Diego .
210 | Tommy is on a roll , riding a streak of political good luck .
211 | The Windows version is in development .
212 | Moss and lichen cannot grow on copper , keeping gutters clear and making them excellent for rainwater harvesting .
213 | Schools remain closed in Jabalia , Beit Hanoun and Beit Lahia affecting 30,000 UNRWA students and 8,000 students from the state sector .
214 | Alderbrook appears on the Bethlehem U.S. Geological Survey Map .
215 | Bids do not include shipping and handling charges .
216 | Kermit is slang for Road .
217 | PCI slots are used for expansion .
218 | The Illini will have to wait until morning .
219 | Orientals start at $ 500 .
220 | Saint Peters is a city in St . Charles County , Missouri , United States .
221 | Rehabilitation consists of counselling , examination of the need for rehabilitation , different therapies , rehabilitation periods in an institution , adaptation training , rehabilitation counselling and aids .
222 | Degrees do n't guarantee competence .
223 | OpalSoft is headquartered in San Jose , California with consultants providing a full range of consulting services throughout North America .
224 | CNN 's Elaine Quijano is live in Chicago .
225 | Christian Bale talks about Harsh Times .
226 | 3. The Titanic set sail on April 10 1912 .
227 | The Moon is opposed to the Sun , and rises about sunset .
228 | St Julien les Villas .
229 | Caddyshack Looks GOOD on HD DVD ! ! ! !
230 | Woolite is a trademark of Reckitt Benckiser .
231 | A memorandum is submitted to Dr. Manmohan Singh , Honorable Prime Minister of India with demands of providing reservation in private sectors , enacting reservation bill and fulfill the vacant SC seats in Government jobs signed by national executive members and state presidents and secretaries of SC Confederation .
232 | Turn left on to Tremont Street .
233 | Sustainable development cannot be imposed from above .
234 | Shimon Peres is back in office , a minister with responsibility for developing the Galilee and the Negev.
235 | Kida believes in statistics , whereas people evolved to believe in anecdotes .
236 | RADIO is the only source of info .
237 | The Budgerigar can function as an example .
238 | Helping people live there best life .
239 | Methadone is used for chronic pain .
240 | Discussion related to video gaming .
241 | This is terrible , Flash is only good for video and games , anyone making websites that heavily rely on flash need to start thinking about using plain html , css , and javascript to build their websites .
242 | Piracy is an integral part of exploration .
243 | Lillian D. Windmeier , 79 passed away in Venice , Florida November 7 , 2008 .
244 | Backdraft is an amazing piece of film work .
245 | C is for Centaur , the gallant half men steeds .
246 | Spirit matters more than point of view .
247 | After the lunch break , a second exam booklet , ursing , part II is given to test-takers .
248 | The Colonel paused for a moment .
249 | Stevens may be part of a trend , where an increasing number of men are becoming later-in-life dads .
250 | Panels covered a variety of topics , from Jewish-Berber relations in Morocco to Jewish philanthropists in Russia , Canada and the United States .
251 | John McCain projected winner in South Carolina , 80 per cent of the vote giving McCain a 3 per cent lead .
252 | Ananda Yoga was founded by Paramahansa Yogananda .
253 | Postures are called asanas .
254 | Number 4 came in dead last .
255 | Billy moved in front of her , blocking her view of the invention .
256 | Tya is the art director for Coalesce , and has been with the agency for three years .
257 | Jane is the mother of Fielding , Gordon , Jr. and Kimbrell Smith Teegarden .
258 | A sign here says La Florida .
259 | Depp is perfect for Barnabas , but Im not sure how I feel about Burton .
260 | GT5 will also be a big event .
261 | The Bull Pen expands to two hours , 1-3 p.m. , with the arrival of football .
262 | Sharing culture is a service to humankind .
263 | Mbeki was wrong on AIDS , and he 's wrong on this .
264 | Titles are arranged alphabetically by author last names .
265 | Between 1949 and 1960 , The Company introduced to Ontario , Speed Queen washers and dryers , Emerson and Motorola electronic products , Gibson home appliances and to Canada , Jenn-Air cooking equipment , Sub-Zero refrigerators , Deep Freeze refrigerators and freezers , Weber barbecues and the Panasonic line of electronics .
266 | Phrase searches can be made within quotation marks .
267 | The KDE web site also lists current job openings .
268 | Physically , solutions are an university of principal buttons , or keys .
269 | Report data was collected through two steps .
270 | Bear shot on Hinchinbrook Island , October 14 , 2001 .
271 | Bluehost excels in customer service .
272 | Blogs are not personal homepages .
273 | Mark has been invited to speak to organizations , businesses , at major project management conferences including Project World , and PMI chapters .
274 | Advent Children belongs to Square Enix .
275 | Mr Pullicino is Minister of Resources and Rural Affairs .
276 | Terrorist is first in fleet .
277 | Specific rates apply to on-site testing .
278 | There were 14 deaths during the journey , nine of which were children under 5 years of age .
279 | East Asia is a region of economic dynamism .
280 | To Schuller , sin is merely the lack of self-esteem .
281 | As of the 2000 census , the CDP had a total population of 6,188 .
282 | He felt in his very bones that for the Melanesian , Westernisation was the road to death .
283 | Hurricane Ivan has been a catastrophe for Grenada .
284 | H was also showing ZED , its new series of small format analog mixers for live performance or recording .
285 | Products passed the certificates of ISO9001 , HALAL , ...
286 | Top-notch caught masturbating stories porn .
287 | On Thursday , November 23 , Havel spoke to a crowd of more than 300,000 .
288 | Histoplasmosis is common in the Northeast .
289 | Requests are processed on a first-come first-served basis .
290 | On July 15 , 1993 , Gordon commenced an action in Quebec , and on July 16 in Ontario .
291 | Witch is advantageous in two respects .
292 | Space limited to 10 pairs .
293 | Natural progesterone is not only for women .
294 | Science magazine has a special issue on coherence .
295 | Torchwood is also up for an award .
296 | Big companies always need sacrificial lambs , said Penelope .
297 | Of these , therefor ipv6 is solely in use .
298 | Guests enjoyed a selection of Big Band music , toured the exhibit , and had the opportunity to interact with Milton Greenberg and the other speakers from the symposium .
299 | First , estrogen has a protective effect on bone , and reduced levels of the hormone trigger bone loss .|Estrogen has a protective effect on bone .
300 | DRDA provides opportunities for education , peace building , income generation and hope .
301 | Delivery should be made during weekdays .
302 | Fur went down three musketeers .
303 | AJ also starred in The Virgin Suicides .
304 | An order was entered on August 1 , 1992 .
305 | Operating expenses have remained under control .
306 | Hearings are held in Philadelphia , Pittsburgh , and Harrisburg .
307 | 2 Wells , Creek runs thru property .
308 | Downtown Asheville is the cultural center of Western North Carolina .
309 | Blackjack was originated in France , and is one of the most popular casino games ...
310 | Complaints were sent by post and email .
311 | Devon may not be left by approbation .
312 | 36 spoke alloy rims .
313 | The Free Shipping applies only to the Lower 48 states .
314 | Page loaded in 0.0714721679688 seconds .
315 | DVBT is used by 152 users of Software Informer .
316 | Robert Gates is still secretary of defense .
317 | Similarly , surveys must show empathy .
318 | Rick is hiring a designer .
319 | Pressure might work for ice skaters , but it 's not much help for sleds , skis , snowboards , or any other device that slides on a large , flat surface .
320 | Canned peas are often used as a garnish .
321 | Headquarters are located in Vienna , Austria in Central Europe .
322 | Shane is in town , making tables for Graceland in our garage .
323 | 576 CORPORATION is currently located in DE .
324 | Amino acids can have a powerful impact on depression , with minimal side effects .
325 | A lab test is the most reliable means of detection .
326 | Research is also being conducted on meningiomas , a more benign tumor that nonetheless is capable of invading into the normal brain .
327 | Student membership provides a range of dedicated services , which enable students to borrow books and gain access to information that is n't available in the public domain .
328 | The Maharaja requested Pt .
329 | Staff are available by appointment .
330 | God 's World is full of opportunities .
331 | Nugan turns to Dalineous .
332 | O'Brien advanced to third .
333 | Regional differences must be taken into account .
334 | Tabor was born in Holland , Vermont .
335 | Deployment means traveling to Antarctica .
336 | Like Ronald Reagan in 1976 , Mitt Romney is not part of the Washington establishment .
337 | Cap rate is watchful for capitalization rate .
338 | Restaurants give food to food banks .
339 | Dave grew up in Kitchener , Ontario .
340 | LANXESS does not conduct fundamental research .
341 | Didnt think that was in question .
342 | Mark Young was born in New Zealand , but has lived for more than half his life in Australia .
343 | The CIPROFLOXACIN could be a logical suspect .
344 | Beginning in 1951 , Axelrod conducted research on codeine , ephedrine , methamphetamine , and morphine .
345 | Grand Final is held at the Melbourne Cricket Ground .
346 | In the absence of the Chairman , the Vice-Chairman will chair meetings of the Board of Directors .
347 | 2003 Jamie acted in Threat Matrix .
348 | Anger seethed in Steven 's chest , obscuring his vision and drowning out the last of court .
349 | 0146.0005 is now available for Ad-Aware Anniversary Edition .
350 | Collective bargaining is governed by the National Labor Relations Act .
351 | Dolby 2.0 stated on cover , but only the 2.0 is available .
352 | Mr. Rogers specializes in renewable energy , political intelligence and cross border transactions .
353 | The VCAA makes a recommendation to the Chancellor .
354 | Salinity can also be affected by urbanization .
355 | 7. Keep out of reach of children .
356 | The Hummingbird Centre features plenty of musical entertainment .
357 | A newspaper was published here in 1851 , and in 1869 the first railroad reached town .
358 | Military service is often a family tradition .
359 | After all , Gimp is best learned through curiosity .
360 | Smokey Bones is located in Fort Myers .
361 | Cover and pages are in great condition .
362 | Garland is a town located in Tipton County , Tennessee .
363 | 3i has acquired Civica , a market leading provider of services to the public sector .
364 | Trondheim is a city of schools , with many educational institutions .
365 | Instruments includes articles on early instruments , the development and form of the orchestra and of course the instruments themselves .
366 | Curently , production is running 20-25 days .
367 | Dr. Erwin joins Drs .
368 | Marlin was rushed to Hermann Hospital , but to no avail .
369 | Weve always killed rattlesnakes .
370 | The Open-air Museum is closed on 24 and 31 December .
371 | Living bone consists of three layers .
372 | LOL just had an impulse .
373 | The Finance Committee will look at options .
374 | Public transport consists of the Metro Bus , and these form important means of transport for non-drivers and tourists .
375 | Weiskopf utilizes elements of jazz and classical music , skillfully integrating improvisation and written themes .
376 | Flow is obviously very important in poetry .
377 | Jorge works for Marriott , and Dea gets all the love she can from her parents , grandparents and great grandparent .
378 | Page loaded in 0.158607006073 seconds .
379 | Riven is a trademark of Cyan , Inc.
380 | Mini skirts are not acceptable attire .
381 | World Congress hosted in Beijing , China .
382 | Alkalines are an example of primary cells .
383 | A state is known for Ayurveda , a traditional patterns of medicine which has found the fresh market in the growing holidaymaker industry .
384 | Fujian claims that in doing so , Customs made an error of law .
385 | Stanhope has an extensive background in accounting , commercial property management and commercial development .
386 | A receding hairline is also very common among men .
387 | China 's internet prototypes connect to internetcafe .
388 | Ava was being flown by midsummer .
389 | Lucia was established in 2001 , destiny by jim brickmann destiny church ok with destiny capital destiny capital management inc.
390 | Epinions has the best comparison shopping information on Royal Air Maroc. .
391 | Trendy look with three-quarter sleeves .
392 | Two other brothers work for low wages .
393 | The Lord Jesus says in Matthew 16 :18 , on this Rock I will build My church .
394 | The FAQ was last updated on July 6 , 2004 .
395 | Keyword-dense webpages are useful for Web presence .
396 | Pat watt is the uncle of www.workbizfromhom.com .
397 | Maxi did all kinds of flowers .
398 | Licensed orders are connected to realistic technical support .
399 | HOODIA did n't get along with Sunni extremists .
400 | Husband and wife lived in HARRISON Twp .
401 | Guy Kawasaki has a great post up about Kiva .
402 | Form 53 will no longer be accepted for enrollment .
403 | Classes are also available in Kennewick , Bellingham , Yakima , Spokane , Poulsbo , Tacoma and Alaska through Children 's video and teleconferencing program .
404 | Cipro can also cause gas , etc .
405 | Macrame is great for hemp necklaces .
406 | A good fit is essential for children 's eyeglasses .
407 | The TIN is one such piece of information .
408 | Alex danced with Mrs. Lambert .
409 | Livulpi flied out to cf .
410 | Price reduced to $ 459,900 .
411 | Monique is n't being busted by representative .
412 | Beginning with this version , KateOS addresses the needs of multiple user groups .
413 | Jay-Z is currently CEO of Def Jam , a label held by Universal Music while Beyonc is in contract with Sony .
414 | Candles are n't just for birthdays .
415 | Kasih means love in the Indonesian language .
416 | Streams swell in springtime , carrying water from snowmelt to valleys .
417 | Cross Purposes will normally consist of 6-8 members , a mixture of experienced members and young people engaged in peer ministry who have received appropriate training .
418 | St . Joan of Arc has developed a reputation for outstanding music , both during Masses and in special events .
419 | Key 12 is the law of reversal .
420 | Anna originally auditioned for Good Morning , Who Are You ? but very wisely turned the part down after reading the script .
421 | Laser printed for best quality .
422 | In a two-year time frame , his body has changed , Burke said of Bernier .
423 | Augusta is situated on M-96 , about five miles northeast of Galesburg , and about seven miles west of downtown Battle Creek .
424 | Journalists do n't accept terms .
425 | Workshops are delivered to community members , college students , and young children .
426 | Proper attire consists of closed toe shoes , shirt , and shorts .
427 | Miniature trees usually reach a height of 6 to 8 feet , but still produce full size fruit .
428 | Discussion includes effects of wetland destruction .
429 | Joe Gibbs autographed Football Mini Helmet .
430 | Things have gone kaput .
431 | Women workers are particularly active in social education .
432 | VAT charged at 17.5 % .
433 | Reservation deposits are non-refundable upon cancellation .
434 | Jack was a man of taste , who never forgot his roots as a studio animator .
435 | Logistics can be difficult in India .
436 | Eastern part will be absorbed by the USA , Japan and China .
437 | Lacy might have done a better job .
438 | Bossier was game in defeat , dressing roughly 30 players .
439 | Clinton Township is a township in Butler County , Pennsylvania , United States .
440 | Impatience is another attribute of selfishness .
441 | Residents can expect to plays games , watch movies , grab coffee and other treats , and just sit around and talk - all in Spanish of course .
442 | Grades vary enormously with ore bodies .
443 | Digesters are now running on forage crops , in some cases requiring no slurry component at all .
444 | Yoga poses for abs .
445 | Cuvee is located in Boston .
446 | The River Adur has its source in the South Downs , southern England and cuts its way through the soft chalk to its mouth at the port town of Shoreham-by-Sea .
447 | Free parking is available in Lot A , next to University Hall , for all performances .
448 | Domestic prices fell in response , as did domestic production .
449 | Karl is also a current member of the Screen Actors Guild .
450 | In 2003 , Savio filed for divorce .
451 | COD accepted for postage .
452 | The NowCast is not currently available for Islip , New York .
453 | Domain advance are reviewing younger and cheaper .
454 | Action is possible in 2007 .
455 | Quin has a number of different moods , all of which are adorable .
456 | The ETV Web Site has additional information on ETV , including general program topics , as well as detailed information on the ETV verification centers .
457 | Mike Knox was eliminated by HBK.
458 | Dr. Anthony came to Lauderdale County , April 4 , 1843 , and engaged in the practice of medicine , which he still successfully continues .
459 | John Gerrard Oil Stick Work closes in 34 days .
460 | First , hair loss can be a side effect of diabetes .
461 | Abstract algebra is the study of operations , their properties , and the structures that support them .
462 | Silence fell on the Congregation .
463 | The ACC has just 7 .
464 | British media circulate in Ireland .
465 | Hensley pinch hit for Sevilla .
466 | Early experiences also play a role .
467 | Google maps is definitely better than Yahoo , MapQuest , and MSN.
468 | Registrations will be allocated on a first-come , first served basis , so register early !
469 | Almost all of them are for online poker , or online casinos .
470 | Bond singled to right field .
471 | Interment will be made in Middletown Cemetery .|Interment will be made in Middletown Cemetery , Middletown .
472 | Instead of long-winded posts like this one , brevity is the soul of Twitter .
473 | Born about 1887 , Doyle died in 1945 .
474 | France is in civil war .
475 | Frankly , Michael Johnson is talking garbage .
476 | Price goes up in April .
477 | Bargaining is acceptable in Israel , but not everywhere .
478 | Pass-Guaranteed is your source for the Cisco 350-024 exam .
479 | Nectar is provided for hummingbirds .
480 | Steele is president of the Oglala Sioux Tribe .
481 | Destiny is just an excuse for bad management , Foege said in deploring those who believe the worlds current state of affairs is simply the consequence of some natural order .
482 | Process improvement is about business value .
483 | 25 learn to make tamales .
484 | Solis is co-founder of the Social Media Club , is an original member of the Media 2.0 Workgroup , and also is a contributor to the Social Media Collective .|Solis is co-founder of the Social Media Club , is an original member of the Media 2.0 Workgroup , and also contributes to the Social Media Collective .|Solis is co-founder of the Social Media Club , an original member of the Media 2.0 Workgroup , and a contributor to the Social Media Collective and http : .org .|Solis is co-founder of the Social Media Club , is an original member of the Media 2.0 Workgroup , and also is a contributor to the Social Media Collective and ConversationalMedia .org .
485 | MICHAEL typed in obsession , and it , too , failed .
486 | Heart , Hibbard told the Boston Globe in April .
487 | Wines and refreshments were placed on a table , on which the writings were displayed , ready for signature .
488 | Mice live far from water .
489 | Homes is more like mansions .
490 | After receiving the district 's determination , petitioner commenced this appeal on June 6 , 1991 .
491 | Cliff Robertson plays host to Jimmy Stewart , Bob Hope , Robert Stack and George Bush , and segments highlight Marine ace Archie Donahue and General Jimmy H. Doolittle .
492 | Duncan had not been mechanizing a wheatgerm .
493 | Pets welcome only by previous arrangement .
494 | Fosrenol is in the FDA pregnancy category C.
495 | A third lesson was to trust people .
496 | Chapter 3 has entries for Major Generals .
497 | A background provides a number of benefits , including skills , contacts and possibly equipment .
498 | Moats were also used for moat spans .
499 | Impeachment has been presented to Congress .
500 | John wrote in Ephesus .
501 | In 1920 , Gidlow moved to New York City , where she worked for Pearson 's , a progressive magazine .
502 | Thousands of children die every year in pools , and it simple does not have to be that way .
503 | Polls can be added to posts .
504 | Fees can be paid in three equal instalments .
505 | Interior living space ranges from approximately 1,500-2,300 sq .
506 | Cheyenne is also the largest city in the Equality State , supporting a population of around 53,000 people .
507 | LASP is one of a few .
508 | The Hurricanes is a story of grit , strength , racial boundaries and sheer determination as this ragtag team bands together in the face of adversity .
509 | Beach plums grow in sandy soil , even dunes , from New Jersey to eastern Canada .
510 | Roles can be used to define access control .
511 | STIC will provide access to a wide range of materials .
512 | The Start button will turn into a Stop button .
513 | Bugsy Siegel did not reside in Las Vegas , broadway shows in las vegas cannot annex an unincorporated township .|Bugsy Siegel did not reside in Las Vegas .
514 | 3. Stay out of yellow zones .
515 | Steve was named Chairman of the Board instead .
516 | Programs flooded for compressed advertising , current as pen or cookies , centralize similarly a worth mess of forcing wages which are rarely represented spyware , and the robots they are avoided for are the most innocuous , i should say .
517 | Data analysis was performed using GraphPad Prism .
518 | MSF provided support to health facilities , trained staff and donated drugs and diagnostic tests .
519 | Trade organizations are comprised of corporations , private companies and branches .
520 | A non refundable deposit is required upon initial registration .
521 | Prosecco is Italian for bubbly .
522 | 1991 ROYN 0.0.20 hatched during 1991 .
523 | Vashti moved to Muskogee , Oklahoma by 1920 and by 1930 she was living in West Palm Beach , Florida with her son .
524 | Place left hand on waist , remembering to keep the back straight .|Place left hand on waist .
525 | Final restoration is completed before project completion .
526 | The Admiral was of 600 .
527 | This is primarily because Apple , unlike Microsoft , is not talking about this next generation platform publicly , Rob Enderle writes for MacNews World .
528 | States is more sympathetic to the Armenians .
529 | 235 have paid 1996 dues .
530 | And , luckily , friends stepped in as supporters .
531 | Buggy wins the match in 2 sets .
532 | God 's eternal purpose is being carried out in Christ Jesus .
533 | John Hawkins is just a good guy .
534 | ChangeLog removed from CVS.
535 | Ron was born in Richmond , Ellen was born in Richmond .
536 | Ron was born in Richmond , Ellen was born in Richmond .
537 | Listings are based on self-reported information .
538 | Joey opened with ACETONE , then played TOPLINeS , then SHEWERs .
539 | Vanunu made a statement in English .
540 | Choice Hotels offers the best in rooms , no matter where you are traveling to .
541 | A third issue is due in 2008 .
542 | Maroua is the capital city of the Far North Province of Cameroon .
543 | Unemployment rose to half a million .
544 | Video representations are enough meant for entertainment .
545 | A tribe is any group of people , large or small , who are connected to o...|A tribe is any group of people , large or small , who are connected to one another , a leader , and an idea .
546 | Deahl lives in Hamilton , Ontario .
547 | Mikael is a married guy from California , USA .
548 | 6 entries found for engineer .
549 | A child is a gift from the Creator .
550 | Page generated in 0.011743 seconds .
551 | Cholesterol is a major component of myelin , and glia-produced cholesterol serves as a glial growth factor in synaptogenesis .
552 | On Monday , the Globes will likely follow suit .
553 | Baumatic offer an extensive range of dishwashers , in order to suit the tastes and requirements of each and every individual .
554 | The Zohar rose has 13 petals .
555 | Guy is the co-creator of Nancy .
556 | Ling was decided in 1985 .
557 | As an independently funded institution governed by a council , Gresham does not charge fees for online study , and it can thus judge the medium purely on its merits .
558 | FLEM ! is hosted on Keenspot .
559 | Even money will be paid for a blackjack .
560 | Costco is opening its outlet in Pocatello , Idaho .
561 | Hunt begins promptly at 1 p.m.
562 | Detail is of utmost importance .|Details are of utmost importance .|Detail is of utmost importance , and the specific context each word and sentence of will carefully considered in the translation process .
563 | Designers should also be aware that at the moment , Red Bubble only prints on white shirts .
564 | Bacteria are a diverse group of single-celled organisms , most of which are microscopic .
565 | Digital asset management is n't just for media companies .
566 | Attendance policy is established by individual faculty .
567 | Situated just 350m , across a road , from sandy Nabq Bay , a bus runs to Na'ama Bay , 30 minutes away* .
568 | Play is an intergral part of childhood .
569 | 82 is a freelance writer in San Francisco .
570 | Lauren does n't slings , when you sell .
571 | Anglian Kings , Felix was the first Bishop of the East Angles .
572 | Oxygen supplementation may be advised in severe cases .
573 | KTLA will probably only get a 47 share .
574 | 6million move the Mets .
575 | Neil is kidnapped by the Shredder , the turtle guys have to rescue her .
576 | Fines can range from $ 25 to $ 500 .
577 | A transliteration would read G .
578 | Dr. Herman is a fellow of the American College of Surgeons , and is certified by the American Board of Plastic Surgery .
579 | A full UK tour will follow later in 2009 .
580 | 67 season rolled to a close , super-heroes were falling by the wayside .
581 | Amateur astronomers must check out the Lowell Observatory , made famous by the discovery of Pluto in 1930 .
582 | Search engines provide a multitude of results , but may not provide enough distinction between relevant and irrelevant articles .|Search engines provide a multitude of results .
583 | Despair can quickly turn into doubt .
584 | Furthermore , documentation must be produced to archival standards .
585 | Dean does n't own the Press Democrat .
586 | A PR person specializes in Communication , but must also be well-educated in diverse fields .
587 | Great men were found only in great nations .
588 | Physically , uploads are an shred of short buttons , or keys .
589 | Terry drew in a sharp breath , then trotted off to find one of the paramedics .
590 | Courtesy cars are available on request .
591 | Golf is run by morons .
592 | Beauford moved to Boston , Massachusetts when he was a teenager .
593 | He served as a professor in archaeology to Colombia University from October , 1921 to April , 1922 whereupon be became Associate Curator at the Freer Gallery of Art , Smithsonian Institute , Washington , D.C. for the next twenty years , nine of which were spent in China , until his death on June 18 , 1942 .
594 | For nearly three decades , Borla has pushing the boundaries of performance exhaust systems .
595 | A ballot is usually conducted for 30 days , however this period is flexible .
596 | Registration is now open for Session # 2 .
597 | The Homes are inferior in Davie , Cabarrus , Iredell , Transylvania , Wilson , Yadkin , Yancey .
598 | Landsat 7 was successfully launched in April 1999 .
599 | Punta Prima beach is about a 20-25 minute walk .
600 | According to the same Census Bureau statistics , of the 23 who served in Korea , three were veterans of World War II .
601 | The Transition Program features a wide variety of classroom , campus , and community activities .
602 | Energy can be saved in a number of ways .
603 | Image hosting can be quite a lucrative market .
604 | Harold Pinter is a master of language , but the play was apparently inspired by a very brief and silent scene .
605 | Automated films are installed bots .
606 | A 30 day supply is only $ 2.57 .
607 | CYPROHEPTADINE is on for IBD.
608 | Albert also has 12 magazines .
609 | WikiAnswers What - are the requirements for Stanford .
610 | Crappies have been biting on Julia .
611 | Commercial reproduction requires written permission .
612 | Inns were scattered in various corners , noisy with the din of man and beast on market days .
613 | Depositions can also be taken from 3rd parties .
614 | Video discs are judiciously meant for entertainment .
615 | Greg heads up Business Development , Sales and Client Service efforts at Become .com .
616 | Doctors also look for a teachable moment .
617 | Here in Texas , Poe had a reputation for being a bit of a racists .
618 | The AZITHROMYCIN was for a bladder infection .
619 | Cells can be selected in a variety of ways .
620 | Communication is seeming for sharp results .
621 | David was a king of wars , but at the moment , the LORD had given him a while of rest .
622 | So no , Zambrano was not a given .
623 | Men are not lost souls .
624 | 25 minutes drive to Sarasota .
625 | Depths range from 10 to 60 feet .
626 | Greece is not a very beautiful place .
627 | Cross listed as BIOL 3525 .
628 | In this study , electro-acupuncture was used instead of traditional acupuncture .
629 | Brownfield properties vary in size , location , age , and past use .
630 | A memorial fund will support summer research .
631 | Of the island 's 900 plant species , almost a third is endemic to Mauritius .|Of the island 's 900 plant species , almost a third are endemic to Mauritius .
632 | Credit Unions category provides listings of relevant and useful websites of Tuvalu .
633 | At the age of thirteen , Dr. Lord went to Brookville , Indiana , under the patronage of Dr. David Oliver , a connection of the Spencer family , well known in the pioneer history of Ohio .
634 | Dehydration can significantly contribute to heat cramps , heat exhaustion , and eventually heat stroke .
635 | Cameron dies in prison , leaving Tessa seven million dollars and a Sydney mansion .
636 | Bleach made from urine .
637 | Required items are shown in blue .
638 | Reilly did with Michael .
639 | Audio-visual equipment can be supplied on request .
640 | Satire can be a lucrative business .
641 | Cooperative Education opportunities are available in the Marketing program .
642 | Sullivan has found a 12cm .
643 | Knowledge is not power , Knowledge applied is power .
644 | Billy West is also a vegan .
645 | Vampire Weekend is an album by Vampire Weekend .
646 | Washington DC is on Eastern Standard Time zone .
647 | Wartburg is reviled by thunderstorm .
648 | Ordering information can be obtained from Cambridge University Press .
649 | Nearly all foods contain a mixture of vitamins .
650 | Brent is around $ 72 .
651 | Lily is also training to be a private investigator .
652 | Condition is seeing lift .
653 | And even in uncomplicated open-heart surgeries , recovery times are weeks to months .
654 | Night Watch was a huge hit in Russia , out grossing even Lord of the Rings : The Return of the King in that country .
655 | Silence of the Lambs was sort of scary .
656 | Kim tries to remember Rebecca .
657 | Kansas pictured in 11 panels .
658 | Vanuatu is weaker than Xanax .
659 | Close encounters in India .
660 | Mrs. Quinn was of Lutheran faith .
661 | Image File history File links Savoie_flag .
662 | Hair Design is a hair salon in Wilmington .
663 | At some point , Edna had started pole dancing .
664 | FIG. 2B is a side elevational view of FIG.
665 | Sri Radha used to experience Mahabhava .
666 | As a freelance journalist , Cathi has contributed to Mojo , Uncut , Kerrang ! , GQ and many more .
667 | A It will be VERY different .
668 | Two were found dead at Camp Taji , Iraq , 11 days apart .
669 | Bible contains a lot of history , specially about civilizations in which present-day archaeology is also interested .
670 | Abaza is related to Abkhaz , Adyghian , Kabardian , and Ubykh , which constitute the Abkhazo-Adyghian , or Northwest Caucasian , language group .|Abaza is related to Abkhaz , ...|Abaza is related to Abkhaz , Adyghian , Kabardian , and Ubykh , which constitute the Abkhazo-Adyghian , or ...
671 | Cenobites should 've been closer to number one .
672 | The Society has a large archive of information , photographs and photographic slides pertaining to the town , and also a record of burials in the West Road Cemetary from 1922 - 2000 .
673 | Two funds have been raised to date .
674 | OHIO DEBT CONSOLIDATION is not a god .
675 | Animals are being programmed for disaster , for extinction .
676 | Western blot was carried out using standard techniques .
677 | The WTO sets the rules for trade , but has no rules about procedures for its meetings about how they should be run and organised .
678 | During those four magical nights , Donny had quite a bit of fun .
679 | Quayle now lives in Phoenix , Arizona with his wife Marilyn .
680 | Summons ordered for Geo .
681 | Death has changed dramatically in recent decades .
682 | Graham singled to third base .
683 | El Salvador is traditionally an agricultural country .
684 | Mrs. Graham was full of thoughts , she knew Mr. Stebbins would not understand , and Mr. Stebbins felt it was an unpleasant subject .
685 | Posts : 59 . Cyrus is on a distinguished road .
686 | Kristina is a Champion for 1 Cause .
687 | Elevators are located in Grand Court , next to Dillard 's & Nordstrom and in Bay Street .
688 | Intertrigo have been eroded by generics .
689 | Close Realty provides the following for GRESHAM , OR real estate agents , GRESHAM , OR teams of realtors , GRESHAM , OR realty offices : real estate websites , idx , real estate email marketing , internet marketing for realtors .
690 | Social movements have occurred throughout history .
691 | Beings are owners of kammas , student , heirs of kammas , they have kammas as their progenitor , kammas as their kin , kammas as their homing-place .|Beings are owners of kamma , heir to kamma , born of kamma , related through kamma , and have kamma as their arbitrator .
692 | Yamaguchi is originally from Miyazaki , Japan and lives in Kettering , Ohio with his wife Miki .
693 | ZIP files are there for a reason .
694 | Tom has a list of suspects , but he 's not ready to name names on the air .
695 | Style may differ slightly from picture above .
696 | Two of us lived in West Bend , me and the German guy and three guys lived not far from Milwaukee .
697 | AJAX does have downsides .
698 | Audio is played through electro-acoustics .
699 | Wimax is now commercially available in Malaysia .
700 | Self-talk is also a form of self-therapy .
701 | Unknown tongues are a sign of God 's anger .
702 | Lilah crawls on top of him . .
703 | Rica has to offer in nightlife .
704 | Gabrielle spoke with great admiration .
705 | Four were forced back to Indonesia , and three sank .
706 | Gold coins are of 999 standard , with 3.11 grams of pure gold .
707 | Ahlberg was free on bond .
708 | Parking is available under Building 12 .
709 | Statistical analysis was done using SYSTAT software .
710 | Well , summer vacation season is the prime time for home burglaries .
711 | Apartment buildings gave way to Victorians .
712 | Tomatoes have high levels of antioxidants , they are a good source of fiber and they are known to help flush out waste and fat from your body .
713 | In her role , Stephanie focuses on sales development , opportunity development , administration and resource management .
714 | Calibration is made by Kalman filtering .
715 | Attachments are a gateway for spammers , and large attachments in bulk numbers can cause problems for the Internet Service Provider .
716 |
--------------------------------------------------------------------------------
/project/Release.scala:
--------------------------------------------------------------------------------
1 | import sbt._
2 | import Keys._
3 |
4 | import sbtrelease._
5 | import ReleasePlugin._
6 | import ReleaseKeys._
7 | import ReleaseStateTransformations._
8 | import Utilities._
9 |
10 | import com.typesafe.sbt.SbtPgp.PgpKeys._
11 |
12 | object ReleaseSettings {
13 | val defaults = releaseSettings ++ Seq(
14 | releaseProcess := Seq[ReleaseStep](
15 | checkSnapshotDependencies,
16 | inquireVersions,
17 | runTest,
18 | setReleaseVersion,
19 | commitReleaseVersion,
20 | tagRelease,
21 | publishArtifacts.copy(action = publishSignedAction),
22 | setNextVersion,
23 | commitNextVersion
24 | ))
25 |
26 | lazy val publishSignedAction = { st: State =>
27 | val extracted = st.extract
28 | val ref = extracted.get(thisProjectRef)
29 | extracted.runAggregated(publishSigned in Global in ref, st)
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=0.13.0
2 |
--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("com.typesafe.sbt" % "sbt-pgp" % "0.8.1")
2 |
3 | addSbtPlugin("com.github.gseitz" % "sbt-release" % "0.8")
4 |
--------------------------------------------------------------------------------
/release/release_notes_relnoun.md:
--------------------------------------------------------------------------------
1 | # Release Notes
2 |
3 | ## Version 2.2.0 (01/15/2016)
4 |
5 | * Extended patterns for pronouns
6 | * "His father John, => (John; [is] father [of]; Him)"
7 | * Updated relnoun_prefixes list
8 | * (Selective Prefix check) Less dependency of relnoun_prefixes : Better extractions in the sentences with relational prefixes
9 | * "Iranian film director Jafar Panahi => (Jafar Panahi; [is] film director [of]; Iran)"
10 |
11 | ## Version 2.0.0 (12/30/2015)
12 |
13 | * Allows nnp relation words (previous version only allowed nn relation words)
14 | * Works well with Demonyms
15 | * "Indian president Mukherjee => Mukherjee; [is] president [of]; India"
16 | * Modified the patterns to allow reloun_prefixes (500+ prefixes list as of now)
17 | * "West Bengali Chief Minister Mamata Banerjee => (Mamata Banerjee; [is] Chief Minister [of]; West Bengal)"
18 | * Title Extractor (configurable)
19 | * "President Obama was born in Hawaii on August 4, 1961 => (Obama; [is] President [of]; [UNKNOWN])"
20 | * AppositiveExtractor2
21 | * "Lauren Faust, a cartoonist, => (Lauren Faust; [is]; a cartoonist)"
22 | * OfCommaExtractor
23 | * "The father of Michael, John, => (John; [is] The father of; Michael)"
24 | * Distinguishes the [from] & [of] extractions
25 | * "Indian player Sachin Tendulkar received the Arjuna Award in 1994. => (Sachin Tendulkar, [is] player [from], India)"
26 | * "United States President Barack Obama gave a speech today. => (Barack Obama, [is] President [of], United States)"
27 | * Includes a File Mode
28 |
--------------------------------------------------------------------------------
/src/main/resources/edu/knowitall/chunkedextractor/confidence/relnoun-confidence.txt:
--------------------------------------------------------------------------------
1 | Intercept -0.0
2 | arg1 contains PRP$ -5.002240337280573E-5
3 | arg1 contains pronoun -0.0
4 | arg1 is proper 0.0
5 | arg2 contains PRP$ -5.002240337280573E-5
6 | arg2 contains pronoun -0.0
7 | arg2 is proper -0.009364550346926247
8 | conj before rel 0.0
9 | extr covers sent -8.773456214182152E-5
10 | np after extr -0.0
11 | np before extr -0.23351207210461408
12 | prep after extr 0.0
13 | prep before extr 0.0
14 | sent ends w/ extr -8.773456214182152E-5
15 | sent starts w/ extr 1.028710313064414
16 | verb after extr 0.0
17 | which|who|that before rel -0.2586414256565373
18 |
--------------------------------------------------------------------------------
/src/main/resources/edu/knowitall/chunkedextractor/demonyms.csv:
--------------------------------------------------------------------------------
1 | Aalborgenser,Aalborg
2 | Aberdonian,Aberdeen
3 | Abkhaz,Abkhazia
4 | Abkhazian,Abkhazia
5 | Abrenian,Abra
6 | Abruzzese,Abruzzo
7 | Abyssinian,Abyssinia
8 | Acadian,Acadia
9 | Acadien,Acadie
10 | Acadienne,Acadie
11 | Acarnanian,Acarnania
12 | Acehnese,Aceh
13 | Achaean,Achaea
14 | Acreano,Acre
15 | Adelaidean,Adelaide
16 | Adelaidian,Adelaide
17 | Aeginetan,Aegina
18 | Aethaean,Aethaea
19 | Aetolian,Aetolia
20 | Afghan,Afghanistan
21 | African,Africa
22 | Afro-Eurasian,Afro-Eurasia
23 | Agote,Navarra
24 | Aguascalentense,Aguascalientes
25 | Akkadian,Akkadia
26 | Aklan,Aklan
27 | Aklanese,Aklan
28 | Aklanon,Aklan
29 | Akronite,Akron
30 | Alabamian,Alabama
31 | Alagoano,Alagoas
32 | Åland Island,Åland Islands
33 | Ålandish,Åland
34 | Alaskan,Alaska
35 | Alavense,Álava
36 | Alavés,Álava
37 | Albaceteño,Albacete
38 | Albacetense,Albacete
39 | Albanense,Albacete
40 | Albanian,Albania
41 | Albasitense,Albacete
42 | Albay,Albay
43 | Albayan,Albay
44 | Albayano,Albay
45 | Albertan,Alberta
46 | Alcarreño,Guadalajara
47 | Aleppine,Aleppo
48 | Aleut,Aleutian Islands
49 | Aleutian,Aleutian Islands
50 | Alexandrian,Alexandria
51 | Algerian,Algeria
52 | Alicantino,Alicante
53 | Allahabadi,Allahabad
54 | Almerian,Almería
55 | Almeriense,Almería
56 | Alorn,Aloria
57 | Alpine,Alps
58 | Alsatia,Alsace
59 | Alsatian,Alsace
60 | Amalfitan,Amalfi
61 | Amapaense,Amapá
62 | Amazonense,Amazonas
63 | Amazonian,Amazon River And Region
64 | American,United States
65 | American Samoan,American Samoa
66 | Americana,Americas
67 | Amsterdammer,Amsterdam
68 | Ancashino,Ancash
69 | Andalusian,Andalusia
70 | Andaman,Andaman And Nicobar Islands
71 | Andamanese,Andaman And Nicobar Islands
72 | Andhraite,Andhra
73 | Andhrudu,Andhra Pradesh
74 | Andorran,Andorra
75 | Andorrano,Andorra
76 | Angeleno,Los Angeles
77 | Angelino,Angeles
78 | Angelopolitano,Puebla
79 | Angolan,Angola
80 | Anguillan,Anguilla
81 | Ann Arborite,Ann Arbor
82 | Annapolitan,Annapolis
83 | Antarctic,Antarctica
84 | Antarctican,Antarctica
85 | Antiguans,Antigua And Barbuda
86 | Antillean,Antilles
87 | Antioquenian,Antioquia
88 | Antipodean,Antipodesa
89 | Antiquenian,Antique
90 | Antiqueño,Antique
91 | Aostain,Aosta
92 | Aostan,Aosta
93 | Apayao,Apayao
94 | Apayaonian,Apayao
95 | Apennine,Apennines
96 | Appalachian,Appalachia
97 | Apulian,Apulia
98 | Aquilonemtexian,University Of North Texas
99 | Aquitanian,Aquitania
100 | Arab,Haugesund
101 | Araber,Haugesund
102 | Aracajuense,Aracaju
103 | Aragonese,Aragon
104 | Arcadian,Arcadia
105 | Arequipeño,Arequipa
106 | Argentine,Argentina
107 | Argentinean,Argentina
108 | Argentinian,Argentina
109 | Argentino,Argentina
110 | Argive,Argos
111 | Århusianer,Århus
112 | Arizonian,Arizona
113 | Arkansawyer,Arkansas
114 | Arkie,Arkansas
115 | Armachian,Armagh
116 | Armenian,Armenia
117 | Armidilian,Armidale
118 | Arretine,Arretium
119 | Arriacense,Guadalajara
120 | Aruban,Aruba
121 | Arunachali,Arunachal Pradesh
122 | Arundense,Ronda
123 | Ashburnian,Ashbourne
124 | Ashevillain,Asheville
125 | Asian,Asia
126 | Assamese,Assam
127 | Asturian,Asturias
128 | Asturiano,Asturias
129 | Athenian,Athens
130 | Atlantan,Atlanta
131 | Atlantean,Atlantis
132 | Atlantine,Atlantis
133 | Auckland,Auckland
134 | Aucklander,Auckland
135 | Aurgitano,Jaén
136 | Auroran,Aurora
137 | Ausense,Vich
138 | Ausentano,Vich
139 | Ausonense,Vich
140 | Aussie,Australia
141 | Aust-Agding,Aust-Agder
142 | Austinite,Austin
143 | Austonian,Austin
144 | Australasian,Australasia
145 | Australian,Australia
146 | Australianb,Australia
147 | Australien,Australia
148 | Austrian,Austria
149 | Austro-Hungarian,Austria–Hungary
150 | Avileño,Ávila
151 | Ayachunano,Ayacucho
152 | Ayacuchano,Ayacucho
153 | Azawadi,Azawad
154 | Azerbaijani,Azerbaijan
155 | Azeri,Azerbaijan
156 | Azeri'S,Azerbaijan
157 | Azragi,Hazara
158 | Babylonian,Babylonia
159 | Bactrian,Bactria
160 | Badajoceño,Badajoz
161 | Badajocense,Badajoz
162 | Badger,Wisconsin
163 | Bæring,Bærum
164 | Bahamian,The Bahamas
165 | Bahraini,Bahrain
166 | Baiano,Bahia
167 | Bajacaliforniano,Baja California
168 | Bajan,Barbados
169 | Bajoran,Bajor
170 | Bajun,Barbados
171 | Bajuns,Barbados
172 | Balear,Illes Balears
173 | Balinese,Bali
174 | Balkan,Balkans
175 | Ballaratian,Ballarat
176 | Baloch,Balochistan
177 | Balochi,Balochistan
178 | Balochistani,Balochistan
179 | Baltic,Baltic Region
180 | Baltimorean,Baltimore
181 | Baltimoron,Baltimore
182 | Baluch,Balochistan
183 | Banana Bender,Queensland
184 | Bananabender,Queensland
185 | Bandungite,Bandung
186 | Bañezano,La Bañeza
187 | Bangalorean,Bangalore
188 | Bangkokian,Bangkok
189 | Bangla,West Bengal
190 | Bangladeshi,Bangladesh
191 | Barbadian,Barbados
192 | Barbudan,Antigua And Barbuda
193 | Barcelonense,Barcelona
194 | Barcelonés,Barcelona
195 | Barcelonian,Barcelona
196 | Barranquino,Barranco
197 | Barriga-Verde,Santa Catarina
198 | Barrovian,Barrow-In-Furness
199 | Barthélemois,Saint Barthélemy
200 | Bashkir,Bashkortostan
201 | Basilan,Basilan
202 | Basilanese,Basilan
203 | Basileño,Basilan
204 | Basotho,Lesotho
205 | Basque,Basque Country
206 | Bataan,Bataan
207 | Bataanese,Bataan
208 | Bataeño,Bataan
209 | Batangan,Batangas
210 | Batangueño,Batangas
211 | Batanic,Batanes
212 | Bathonian,Bath
213 | Baton Rougean,Baton Rouge
214 | Batswana,Botswana
215 | Bavarian,Bavaria
216 | Bay Of Plenty,Bay Of Plenty Region
217 | Bay Stater,Massachusetts
218 | Bedfordian,Bedford
219 | Beijinger,Beijing
220 | Beiruti,Beirut
221 | Belarusian,Belarus
222 | Belenense,Belém
223 | Belfast,Belfast
224 | Belfastian,Belfast
225 | Belgian,Belgium
226 | Belgrader,Belgrade
227 | Belgradian,Belgrade
228 | Belizean,Belize
229 | Bellifontain,Fontainebleau
230 | Belo-Horizontino,Belo Horizonte
231 | Bendigonian,Bendigo
232 | Beneventan,Benevento
233 | Bengalese,West Bengal
234 | Bengali,Bengal
235 | Benguet,Benguet
236 | Benguetian,Benguet
237 | Beninois,Benin
238 | Beotian,Boeotia
239 | Bergamasque,Bergamo
240 | Bergenser,Bergen
241 | Berkeleyan,Berkeley
242 | Berliner,Berlin
243 | Bermudan,Bermuda
244 | Bermudian,Bermuda
245 | Bernese,Bern
246 | Bessarabian,Bessarabia
247 | Beturiense,Badajoz
248 | Bharati,Bharat
249 | Bhutanese,Bhutan
250 | Biafran,Biafra
251 | Big Bender,Tennessee
252 | Bihari,Bihar
253 | Bilbilitano,Calatayud
254 | Biliran,Biliran
255 | Biliranian,Biliran
256 | Biot,British Indian Ocean Territory
257 | Birminghamian,Birmingham
258 | Bissau-Guinean,Guinea-Bissau
259 | Blefuscudian,Blefuscu
260 | Blue Hen'S Chicken,Delaware
261 | Bluenoser,Nova Scotia
262 | Boa-Vistense,Boa Vista
263 | Boeotian,Boeotia
264 | Bogotan,Bogotá
265 | Bohemian,Bohemia
266 | Boholan,Bohol
267 | Boholano,Bohol
268 | Boius,Boiohaemum
269 | Bolivian,Bolivia
270 | Bolognese,Bologna
271 | Boltonian,Bolton
272 | Bonaerense,Buenos Aires
273 | Bonaire,Bonaire
274 | Bonin Islander,Bonin Islands
275 | Bordelais,Bordeaux
276 | Boricua,Puerto Rico
277 | Bornean,Borneo
278 | Bornholmer,Bornholm
279 | Bosnian,Bosnia And Herzegovina
280 | Bosnian-Herzegovinian,Bosnia And Herzegovina
281 | Bosphoran,Bosporus
282 | Bosporan,Bosporus
283 | Bostonian,Boston
284 | Botswanan,Botswana
285 | Boulderite,Boulder
286 | Bouvet Island,Bouvet Island
287 | Brasileiro,Brazil
288 | Brasiliense,Brasilia
289 | Bratislavan,Bratislava
290 | Brazilian,Brazil
291 | Brescian,Brescia
292 | Breton,Brittany
293 | Brigantino,A Coruña
294 | Brightonian,Brighton
295 | Brisbanite,Brisbane
296 | Brisso,Brisbane
297 | Bristolian,Bristol
298 | Brisvegan,Brisbane
299 | British,United Kingdom
300 | British Columbian,British Columbia
301 | British Virgin Island,British Virgin Islands
302 | British Virgin Islander,British Virgin Islands
303 | Briton,Britain
304 | Brobdingnagian,Brobdingnag
305 | Bronx,The Bronx
306 | Brooklyn,Brooklyn
307 | Brooklynite,Brooklyn
308 | Brummie,Birmingham
309 | Bruneian,Brunei
310 | Bruttian,Bruttium
311 | Bruxellois,Brussels
312 | Bucharestian,Bucharest
313 | Buckeye,Ohio
314 | Budapester,Budapest
315 | Budapesti,Budapest
316 | Buddie,Paisley
317 | Buffalonian,Buffalo
318 | Bukidnon,Bukidnon
319 | Bulacan,Bulacan
320 | Bulacanese,Bulacan
321 | Bulaqueño,Bulacan
322 | Bulgarian,Bulgaria
323 | Burcés,Burgos
324 | Burgalés,Burgos
325 | Burgense,Burgos
326 | Burgués,Burgos
327 | Burgundian,Burgundy
328 | Burkinabe,Burkina Faso
329 | Burkinabé,Burkina Fasoa
330 | Burkinabè,Burkina Faso
331 | Burmese,Burma
332 | Burqueño,Albuquerque
333 | Burundian,Burundi
334 | Butternut,Tennessee
335 | Buzzard,Georgia
336 | Byzantine,Byzantium
337 | Cabo Verdean,Cabo Verde
338 | Cacereño,Cáceres
339 | Cadiceño,Cádiz
340 | Cadicense,Cádiz
341 | Caditano,Cádiz
342 | Cagayan,Cagayan
343 | Cagayanese,Cagayan
344 | Cagayano,Cagayan
345 | Cagayanon,Cagayan De Oro
346 | Cairene,Cairo
347 | Cairnsitte,Cairns
348 | Cajamarquino,Cajamarca
349 | Cajun,Lousiana
350 | Calabrese,Calabria
351 | Calabrian,Calabria
352 | Calcuttan,Kolkata
353 | Calgarian,Calgary
354 | Californian,California
355 | Californio,California
356 | Camarinense,Camarines Sur
357 | Cambodian,Cambodia
358 | Cameroonian,Cameroon
359 | Camiguin,Camiguin
360 | Campanian,Campania
361 | Campechan,Campeche
362 | Campechano,Campeche
363 | Campineiro,Campinas
364 | Canadian,Canada
365 | Canadien,Canada
366 | Canadienne,Canada
367 | Canarese,Karnataka
368 | Canarian,Canary Islands
369 | Canberran,Canberra
370 | Candelariero,Santa Cruz De Tenerife
371 | Cantab,Cambridge
372 | Cantabrian,Cantabria
373 | Cantabrigian,Cambridge
374 | Cantabrio,Cantabria
375 | Cántabro,Cantabria
376 | Canterbury,Canterbury Region
377 | Cantonese,Canton
378 | Cantuarian,Canterbury
379 | Canuck,Canada
380 | Cape Verdian,Cape Verde
381 | Caper,Cape Breton
382 | Capetonian,Cape Town
383 | Capitalino,Mexico City
384 | Capixaba,Espírito Santo
385 | Capizeño,Capiz
386 | Capizian,Capiz
387 | Caracense,Guadalajara
388 | Caraquenian,Caracas
389 | Cardassian,Cardassia Prime
390 | Cardi,Aberteifi
391 | Cardiffian,Cardiff
392 | Carian,Caria
393 | Caribbean,Caribbean
394 | Carinthian,Carinthia
395 | Carioca,Rio De Janeiro
396 | Carlislian,Carlisle
397 | Carlopolitano,São Carlos
398 | Carriacense,Guadalajara
399 | Carsonite,Carson City
400 | Carthaginian,Carthage
401 | Carystian,Carystus
402 | Castellano,Castilla-La Mancha
403 | Castellanomanchego,Castilla-La Mancha
404 | Castellonense,Castellón
405 | Castellonero,Castellón
406 | Castilian,Castile
407 | Catalá,Catalonia
408 | Catalan,Catalonia
409 | Catalán,Catalonia
410 | Catalonian,Catalonia
411 | Catamarqueño,Catamarca
412 | Catanduanean,Catanduanes
413 | Catandueño,Catanduanes
414 | Catarinense,Santa Catarina
415 | Caucasian,Caucasus
416 | Caucasic,Caucasus
417 | Cavitenian,Cavite
418 | Caviteño,Cavite
419 | Caymanian,Cayman Islands
420 | Cearense,Ceará
421 | Cebu,Cebu
422 | Cebuan,Cebu
423 | Cebuano,Cebu
424 | Central African,Central African Republic
425 | Central American,Central America
426 | Cephalonian,Cephalonia
427 | Cesaraugustano,Zaragoza
428 | Cesetano,Tarragona
429 | Ceutí,Ceuta
430 | Ceylonese,Ceylon
431 | Chachapoyano,Chachapoyas
432 | Chadian,Chad
433 | Chalaco,Callao
434 | Chalcidian,Chalcis
435 | Chandigarhi,Chandigarh
436 | Channel Island,Guernsey
437 | Channel Islander,Guernsey
438 | Chaqueño,Chaco
439 | Charlottean,Charlotte
440 | Charro,Salamanca
441 | Chechen,Chechnya
442 | Chelmsfordian,Chelmsford
443 | Chennaite,Chennai
444 | Chetumaleño,Chetumal
445 | Chhattisgarhi,Chhattisgarh
446 | Chian,Chios
447 | Chiapan,Chiapas
448 | Chiapaneco,Chiapas
449 | Chicagoan,Chicago
450 | Chihuahuan,Chihuahua
451 | Chihuahuense,Chihuahua
452 | Chilango,Mexico City
453 | Chilean,Chile
454 | Chilpancingueño,Chilpancingo
455 | Chimbotano,Chimbote
456 | Chinese,China
457 | Chino,China
458 | Chorrillano,Chorrillos
459 | Christmas Island,Christmas Island
460 | Christmas Islander,Christmas Island
461 | Chubutense,Chubut
462 | Cincinnatian,Cincinnati
463 | Citizen Of The Holy See,Vatican City
464 | Ciudadrealeño,Ciudad Real
465 | Ciudadrealino,Ciudad Real
466 | Clevelander,Cleveland
467 | Cluniense,Ciudad Real
468 | Coahuilan,Coahuila
469 | Coahuilense,Coahuila
470 | Cockney,London
471 | Cockroach,New South Wales
472 | Cocos Island,Cocos Islands
473 | Cocos Islander,Cocos Islands
474 | Colcestrian,Colchester
475 | Colchian,Colchis
476 | Colimeño,Colima
477 | Colimense,Colima
478 | Colombian,Colombia
479 | Coloradoan,Colorado
480 | Colossian,Colossae
481 | Comeperros,Huancayo
482 | Comoran,Comoros
483 | Comorian,Comoros
484 | Compostelan,Compostela Valley
485 | Comval,Compostela Valley
486 | Comvaleño,Compostela Valley
487 | Conch,Key West
488 | Confederate,Confederate States Of America
489 | Congolese,Republic Of The Congo
490 | Connecticotian,Connecticut
491 | Connecticutensian,Connecticut
492 | Connecticuter,Connecticut
493 | Connecticutian,Connecticut
494 | Connetian,Connecticut
495 | Conquense,Cuenca
496 | Consentian,Consentia
497 | Cook Island,Cook Islands
498 | Cook Islander,Cook Islands
499 | Copenhagener,Copenhagen
500 | Corcyrean,Corcyra
501 | Cordobense,Córdoba
502 | Cordobés,Córdoba
503 | Corfiot,Corfu
504 | Corinthian,Corinth
505 | Cork,Cork
506 | Corkonian,County Cork
507 | Corncracker,Kentucky
508 | Cornhusker,Nebraska
509 | Cornichon,Reims
510 | Cornish,Cornwall
511 | Correntino,Corrientes
512 | Corsican,Corsica
513 | Cortubí,Córdoba
514 | Coruñés,A Coruña
515 | Cosetano,Tarragona
516 | Costa Rican,Costa Rica
517 | Cotabatan,Cotabato
518 | Cotabateño,Cotabato
519 | Cotabato,Cotabato
520 | Couronian,Courland
521 | Coventrian,Coventry
522 | Cracker,Florida
523 | Cracovian,Kraków
524 | Cretan,Crete
525 | Crimean,Crimea
526 | Croat,Croatia
527 | Croatian,Croatia
528 | Crotonian,Croton
529 | Crow Eater,South Australia
530 | Croweater,South Australia
531 | Cruceño,Santa Cruz De La Sierra
532 | Crucian,St. Croix
533 | Cruzan,St. Croix
534 | CSA,Confederate States Of America
535 | Cuban,Cuba
536 | Cuencano,Cuenca
537 | Cuernavaqueño,Cuernavaca
538 | Cuernavaquense,Cuernavaca
539 | Culiacanense,Culiacán
540 | Cumbrian,Cumbria
541 | Curaçaoan,Curaçao
542 | Curitibano,Curitiba
543 | Curitibense,Curitiba
544 | Curonian,Courland
545 | Cuzqueño,Cuzco
546 | Cycladian,Cyclades
547 | Cypriot,Cyprus
548 | Cypriote,Cyprus
549 | Cyrenaic,Cyrenaica
550 | Cyrene,Cyrenaica
551 | Cyrenian,Cyrene
552 | Czech,Czech Republic
553 | Czechb,Czechoslovakia
554 | Czechoslovak,Czechoslovakia
555 | Czechoslovakian,Czechoslovakia
556 | Dacian,Dacia
557 | Dagestani,Dagestan
558 | Dahomeyan,Dahomey
559 | Dalatese,Da Lat
560 | Dallasite,Dallas
561 | Dalmatian,Dalmatia
562 | Damanese,Daman And Diu
563 | Damascene,Damascus
564 | Dane,Denmark
565 | Danish,Denmark
566 | Dansker,Denmark
567 | Danubian,Danube River
568 | Darwinian,Darwin
569 | Davaoeño,Davao
570 | Dawsonite,Dawson City
571 | Daytonian,Dayton
572 | Defeño,Federal District
573 | Delawarean,Delaware
574 | Delhian,Delhi
575 | Delhiite,Delhi
576 | Delhite,Delhi
577 | Delian,Delos
578 | Denver,Denver
579 | Denverite,Denver
580 | Derry,Derry
581 | Detroiter,Detroit
582 | Devonian,Devon
583 | Devonportian,Devonport
584 | Dinagat,Dinagat Islands
585 | Dinagatan,Dinagat Islands
586 | Diuese,Daman And Diu
587 | Djibouti,Djibouti
588 | Djiboutian,Djibouti
589 | Dodecanesian,Dodecanese
590 | Dominican,Dominica
591 | Down Easter,Maine
592 | Downeaster,Maine
593 | Drammenser,Drammen
594 | Dresdener,Dresden
595 | Dublin,Dublin
596 | Dubliner,Dublin
597 | Duluthian,Duluth
598 | Dundonian,Dundee
599 | Dunedin,Dunedin
600 | Dunedinite,Dunedin
601 | Dunelmensis,University Of Durham
602 | Duopontino,Pontevedra
603 | Duosicilian,Two Sicilies
604 | Durangoan,Durango
605 | Duranguense,Durango
606 | Durban,Durban
607 | Durbanite,Durban
608 | Dutch,Netherlands
609 | East Coast,Gisborne Region
610 | East Malaysian,East Malaysia
611 | East Mindorenian,Oriental Mindoro
612 | East Misamis,Misamis Oriental
613 | East Negros,Negros Oriental
614 | East Timorese,East Timor
615 | East Trojan,East Troy
616 | Eastern Davao,Davao Oriental
617 | Eastern Samarian,Eastern Samar
618 | Ebjergenser,Esbjerg
619 | Ecuadorian,Ecuador
620 | Edinbourgeois,Edinburgh
621 | Edinburgensian,Edinburgh
622 | Edinburgher,Edinburgh
623 | Edmontonian,Edmonton
624 | Edonian,Edonia
625 | Egabrense,Cabra
626 | Egestan,Egesta
627 | Egyptian,Egypt
628 | El Paseño,El Paso
629 | El Pasoan,El Paso
630 | Elamite,Elam
631 | Elamitic,Elam
632 | Elamitish,Elam
633 | Eleusian,Eleusina
634 | Eleusinian,Eleusina
635 | Elian,Elis
636 | Emirati,United Arab Emirates
637 | Emirian,United Arab Emirates
638 | English,England
639 | Englishman,England
640 | Englishwoman,England
641 | Entrerriano,Entre Rios
642 | Ephesian,Ephesus
643 | Epidamnian,Epidamnus
644 | Epidaurian,Epidaurus
645 | Epirote,Epirus
646 | Equatoguinean,Equatorial Guinea
647 | Equatorial Guinean,Equatorial Guinea
648 | Eretrian,Eretria
649 | Erieite,Erie
650 | Eritrean,Eritrea
651 | Espírito-Santense,Espírito Santo
652 | Essiburn,Ashbourne
653 | Estonian,Estonia
654 | Ethiopian,Ethiopia
655 | Etruscan,Etruria
656 | Euboean,Euboea
657 | Eurasian,Eurasia
658 | European,Europe
659 | Exonian,Exeter
660 | Extremaduran,Extremadura
661 | Falkland Island,Falkland Islands
662 | Falkland Islander,Falkland Islands
663 | Falstring,Falster
664 | Faridabadi,Faridabad
665 | Faroese,Faroe Islands
666 | Faroite,Faro
667 | Ferengi,Ferenginar
668 | Fife,Fife
669 | Fifer,Fife
670 | Fijian,Fiji
671 | Filipino,Philippines
672 | Finn,Finland
673 | Finnic,Finland
674 | Finnish,Finland
675 | Finnmarking,Finnmark
676 | Flatlander,Michigan
677 | Fleming,Flanders
678 | Flemish,Flanders
679 | Florentia,Florence
680 | Florentine,Florence
681 | Florianopolitano,Florianópolis
682 | Florida Cracker,Florida
683 | Floridan,Florida
684 | Floridian,Florida
685 | Fluminense,Rio De Janeiro
686 | Formoseño,Formosa
687 | Fort Worthian,Fort Worth
688 | Fortalezense,Fortaleza
689 | Fox,Maine
690 | Franco-Albertan,Alberta
691 | Franco-Manitobain,Manitoba
692 | Franconian,Franconia
693 | Franco-Ontarian,Ontario
694 | Franco-Saskatchewanian,Saskatchewan
695 | Frankfurter,Frankfurt
696 | Frederictonian,Fredericton
697 | French,France
698 | French Guianese,French Guiana
699 | French Polynesian,French Polynesia
700 | French Southern Territories,French Southern Territories
701 | Frenchman,France
702 | Frenchwoman,France
703 | Friesian,Friesland
704 | Frisian,Friesland
705 | Fueguino,Tierra Del Fuego
706 | Fukuokan,Fukuoka
707 | Futunan,Wallis And Futuna
708 | Fynbo,Funen
709 | Gabonese,Gabon
710 | Gaderita,Cádiz
711 | Gaditano,Cádiz
712 | Galatian,Galatia
713 | Galician,Galicia
714 | Galilean,Galilee
715 | Gallaecus,Gallaecia
716 | Galleguense,Río Gallegos
717 | Gallifreyan,Gallifrey
718 | Gallovidian,Galloway
719 | Galway,Galway
720 | Galwegian,Galway
721 | Gambian,The Gambia
722 | Ganadí,Granada
723 | Garnatí,Granada
724 | Gascon,Gascony
725 | Gaúcho,Rio Grande Do Sul
726 | Gaulish,Gallia
727 | Geat,Geatland
728 | Genevan,Geneva
729 | Genevese,Geneva
730 | Genoese,Genoa
731 | Genoshan,Genosha
732 | Genovese,Genoa
733 | Geordie,Newcastle
734 | Georgian,Georgia
735 | German,Germany
736 | Germanian,Germania
737 | Germanus,Germania
738 | Germistonian,Germiston
739 | Gerundense,Girona
740 | Gerundí,Girona
741 | Ghanaian,Ghana
742 | Gibraltar,Gibraltar
743 | Gibraltarian,Gibraltar
744 | Gimnesiense,Illes Balears
745 | Gironés,Girona
746 | Gisborne,Gisborne Region
747 | Glaswegian,Glasgow
748 | Goan,Goa
749 | Goanese,Goa
750 | Goenkar,Goa
751 | Gold Coast,Gold Coast
752 | Goober-Grabber,Georgia
753 | Gozitan,Gozo
754 | Granadés,Granada
755 | Granadí,Granada
756 | Granadino,Granada
757 | Grand Rapidian,Grand Rapids
758 | Grasshopper,Kansas
759 | Grazer,Graz
760 | Greek,Greece
761 | Greenlandic,Greenland
762 | Greenvillian,Greenville
763 | Grenadian,Grenada
764 | Guadalajareño,Guadalajara
765 | Guadalajarense,Guadalajara
766 | Guadalupense,Guadalupe
767 | Guadeloupe,Guadeloupe
768 | Guadeloupean,Guadeloupe
769 | Guamanian,Guam
770 | Guambat,Guam
771 | Guanajuateño,Guanajuato
772 | Guanajuatense,Guanajuato
773 | Guatemalan,Guatemala
774 | Guernsey,Guernsey
775 | Guerreran,Guerrero
776 | Guerrerense,Guerrero
777 | Guianan,Guiana
778 | Guimarasian,Guimaras
779 | Guinea-Bissauan,Guinea-Bissau
780 | Guinean,Guinea
781 | Guipuzcoano,Guipúzcoa
782 | Gujarati,Gujarat
783 | Gurkhas,Nepal
784 | Guyanese,Guyana
785 | Hagenaar,The Hague
786 | Hagenees,The Hague
787 | Haitian,Haiti
788 | Haldenser,Halden
789 | Halifaxian,Halifax
790 | Haligonian,Halifax
791 | Halling,Hallingdal
792 | Hamburger,Hamburg
793 | Hamiltonian,Hamilton
794 | Hanoian,Hanoi
795 | Hanoverian,Hanover
796 | Harcourtian,Port Harcourt
797 | Harrovian,Harrow
798 | Hartlepudlian,Hartlepool
799 | Haryanvi,Haryana
800 | Hastingite,Hastings
801 | Haugesunder,Haugesund
802 | Hawaii Resident,Hawaii
803 | Hawaiian,Hawaii
804 | Hawke'S Bay,Hawke'S Bay Region
805 | Hawkeye,Iowa
806 | Hayastani,Hayastan
807 | Hazaragi,Hazara
808 | Heard Island,Heard Island And Mcdonald Islands
809 | Hebridean,Hebrides
810 | Hedmarking,Hedmark
811 | Hellene,Greece
812 | Hellenic,Greece
813 | Helsingoraner,Elsinore
814 | Helsinkian,Helsinki
815 | Hermosillense,Hermosillo
816 | Herzegovinian,Bosnia And Herzegovina
817 | Hessian,Hesse
818 | Hidalguense,Hidalgo
819 | Hidrocálido,Aguascalientes
820 | Himachali,Himachal Pradesh
821 | Himmerlænding,Himmerland
822 | Hiroshiman,Hiroshima
823 | Hispalense,Seville
824 | Hispanic,Hispania
825 | Hobartian,Hobart
826 | Hokkaidoan,Hokkaido
827 | Hokkaidoite,Hokkaido
828 | Hollander,Netherlands
829 | Holsteinian,Holstein
830 | Honduran,Honduras
831 | Hong Kong,Hong Kong
832 | Hong Kong Chinese,Hong Kong
833 | Hong Konger,Hong Kong
834 | Hongkonger,Hong Kong
835 | Hongkongese,Hong Kong
836 | Hongkongish,Hong Kong
837 | Honolulan,Honolulu
838 | Honshuan,Honshu
839 | Hoosier,Indiana
840 | Hordalending,Hordaland
841 | Houstonian,Houston
842 | Huanca,Huancayo
843 | Huancaíno,Huancayo
844 | Huancavelicano,Huancavelica
845 | Huanuqueño,Huanuco
846 | Huaracino,Huaraz
847 | Huelveño,Huelva
848 | Hunanese,Hunan
849 | Hungarian,Hungary
850 | Hyderabadi,Hyderabad
851 | Iberian,Iberia
852 | Icelandic,Iceland
853 | Idahoan,Idaho
854 | Ifugao,Ifugao
855 | Iguaçuense,Foz Do Iguaçu
856 | I-Kiribati,Kiribati
857 | Iliberitano,Granada
858 | Illinian,Illinois
859 | Illinoian,Illinois
860 | Illinoisan,Illinois
861 | Illinoisian,Illinois
862 | Illyrian,Illyria
863 | Ilocan,Ilocos Sur
864 | Ilocano,Ilocos Sur
865 | Iloilo,Iloilo
866 | Iloiloan,Iloilo
867 | Ilonggo,Iloilo
868 | Indian,India
869 | Indianian,Indiana
870 | Indochinese,Indochina
871 | Indonesian,Indonesia
872 | Ingush,Ingushetia
873 | Innuit,Alaska
874 | Invercargill,Invercargill
875 | Invernessian,Inverness
876 | Ionian,Ionia
877 | Iowa Citian,Iowa City
878 | Iowan,Iowa
879 | Iowegian,Iowa
880 | Iqueño,Ica
881 | Irani,Iran
882 | Iranian,Iran
883 | Iraqi,Iraq
884 | Irish,Ireland
885 | Irishman,Ireland
886 | Irishwoman,Ireland
887 | Irmite,Irmo
888 | Isabelan,Isabela
889 | Islamabadi,Islamabad
890 | Islander,Hawaii
891 | Israeli,Israel
892 | Israelite,Israel
893 | Istanbulite,Istanbul
894 | Itabirano,Itabira
895 | Italian,Italy
896 | Ivatan,Batanes
897 | Ivatanic,Batanes
898 | Ivorian,Ivory Coast
899 | Ivory Coast, Ivory Coast
900 | Iwo Jiman,Iwo Jima
901 | Izmirian,Izmir
902 | Jack,Swansea
903 | Jacksonvillian,Jacksonville
904 | Jaenés,Jaén
905 | Jakartan,Jakarta
906 | Jaliscan,Jalisco
907 | Jalisciense,Jalisco
908 | Jamaican,Jamaica
909 | Jammu,Jammu And Kashmir
910 | Jan Mayen,Jan Mayen
911 | Janner,Cornwall
912 | Japanese,Japan
913 | Jarocho,Veracruz
914 | Jaujano,Jauja
915 | Javan,Java
916 | Javanese,Java
917 | Jaxon,Jacksonville
918 | Jerseyan,Jersey
919 | Jerseyite,New Jersey
920 | Jerusalemite,Jerusalem
921 | Jharkhandi,Jharkhand
922 | Jienense,Jaén
923 | Jock,Scotland
924 | Johannesburg,Johannesburg
925 | Johorean,Johor
926 | Jordanian,Jordan
927 | Judaean,Judea
928 | Judean,Judea
929 | Juiz-Forano,Juiz De Fora
930 | Jujeño,Jujuy
931 | Jute,Jutland
932 | Jyde,Jutland
933 | Kagoshiman,Kagoshima
934 | Kalamazooan,Kalamazoo
935 | Kalingan,Kalinga
936 | Kalundborgenser,Kalundborg
937 | Kaluzhanin,Kaluga
938 | Kalymnian,Kalymnos
939 | Kamaaina,Hawaii
940 | Kamaʻāina,Hawaii
941 | Kannadiga,Karnataka
942 | Kansan,Kansas
943 | Kansas Citian,Kansas City
944 | Karachiite,Karachi
945 | Karelian,Karelia
946 | Karnatakan,Karnataka
947 | Kashmiran,Kashmir
948 | Kashmiri,Jammu And Kashmir
949 | Kastorian,Kastoria
950 | Katangese,Katanga
951 | Kaulonian,Kaulonia
952 | Kazakhstani,Kazakhstan
953 | Kedahan,Kedah
954 | Kelantanese,Kelantan
955 | Kenoshan,Kenosha
956 | Kentuckian,Kentucky
957 | Kenyan,Kenya
958 | Keralite,Kerala
959 | Kernewek,Cornwall
960 | Khmer,Cambodia
961 | Kievan,Kiev
962 | Kingstonian,Kingston
963 | Kirghiz,Kyrgyz Republic
964 | Kitchenerite,Kitchener
965 | Kittian,Saint Kitts And Nevis
966 | Kittian And Nevisian,Saint Kitts And Nevis
967 | Kittitian,Saint Kitts And Nevis
968 | Kiwi,New Zealand
969 | Klingon,Qo'Nos
970 | Knickerbocker,New York
971 | Knossian,Knossos
972 | Knoxvillian,Knoxville
973 | Københavner,Copenhagen
974 | Koldingenser,Kolding
975 | Kosovan,Kosovo And Methohija
976 | Krakovian,Kraków
977 | Kuala Lumpurian,Kuala Lumpur
978 | Kuchingite,Kuching
979 | Kurdish,Kurdistan
980 | Kurilian,Kuril Islands
981 | Kuwaiti,Kuwait
982 | Kyotoite,Kyoto
983 | Kyrgyz,Kyrgyzstan
984 | Kyushuan,Kyushu
985 | La Ueño,La Union
986 | La Unionian,La Union
987 | Labradorian,Newfoundland And Labrador
988 | Labuanese,Labuan
989 | Laccadivian,Lakshadweep
990 | Lagosian,Lagos
991 | Laguneño,Laguna
992 | Lagunense,Laguna
993 | Lagunian,Laguna
994 | Lahori,Lahore
995 | Lakedaemonian,Lakedaimon
996 | Lakedaimonian,Lakedaimon
997 | Lambayequino,Lambayeque
998 | Lancastrian,Lancaster
999 | Lao,Laos
1000 | Laotian,Laos
1001 | Lapp,Lapland
1002 | Laputan,Laputa
1003 | Larissan,Larissa
1004 | Las Vegan,Las Vegas
1005 | Latin,Latium
1006 | Latverian,Latveria
1007 | Latvian,Latvia
1008 | Laurentian,St. Lawrence River
1009 | Lavalois,Laval
1010 | Lavaloise,Laval
1011 | Lebanese,Lebanon
1012 | Leghornese,Livorno
1013 | Legionense,León
1014 | Leipziger,Leipzig
1015 | Leodensian,Leeds
1016 | Leonés,León
1017 | Leonese,León
1018 | Leontinian,Leontini
1019 | Leopolitan,Lviv
1020 | Lerense,Pontevedra
1021 | Lesbian,Lesbos
1022 | Lethbridgian,Lethbridge
1023 | Levantine,The Levant
1024 | Leytenian,Leyte
1025 | Liberian,Liberia
1026 | Liberteño,La Libertad
1027 | Libyan,Libya
1028 | Liechtenstein,Liechtenstein
1029 | Liechtensteiner,Liechtenstein
1030 | Ligurian,Liguria
1031 | Lilliputian,Lilliput
1032 | Limenian,Lima
1033 | Limeño,Lima
1034 | Linarense,Linares
1035 | Linzer,Linz
1036 | Lisboeta,Lisbon
1037 | Lithuanian,Lithuania
1038 | Liverpudlian,Liverpool
1039 | Livonian,Livonia
1040 | Livornese,Livorno
1041 | Locrian,Locris
1042 | Loiner,Leeds
1043 | Lollik,Lolland
1044 | Lombard,Lombardy
1045 | Lombardic,Lombardy
1046 | London,London
1047 | Londoner,London
1048 | Londrinense,Londrina
1049 | Long Island,Long Island
1050 | Long Islander,Long Island
1051 | Loretano,Loreto
1052 | Lorrainer,Lorraine
1053 | Lorrainian,Lorraine
1054 | Los Angelean,Los Angeles
1055 | Los Angeleno,Los Angeles
1056 | Los Angelino,Los Angeles
1057 | Louisianian,Louisiana
1058 | Louisvillian,Louisville
1059 | Louperivois,Rivière-Du-Loup
1060 | Lousianan,Lousiana
1061 | Lousianian,Lousiana
1062 | Lower Californian,Baja California
1063 | Lucanian,Lucania
1064 | Lucchese,Lucca
1065 | Lucense,Lugo
1066 | Ludovicense,São Luís
1067 | Luqués,Lugo
1068 | Lusatian,Lusatia
1069 | Luxembourg,Luxembourg
1070 | Luxembourger,Luxembourg
1071 | Luxembourgish,Luxembourg
1072 | Lydian,Lydia
1073 | Lyonese,Lyon
1074 | Lyonnais,Lyon
1075 | Macanese,Macau
1076 | Macapense,Macapá
1077 | Maccam,Sunderland
1078 | Macedonian,Macedonia
1079 | Maceioense,Maceió
1080 | Madeiran,Madeira
1081 | Madhya Pradeshi,Madhya Pradesh
1082 | Madredediosense,Madre De Dios
1083 | Madrideño,Madrid
1084 | Madrileño,Madrid
1085 | Maeonian,Maeonia
1086 | Mageritense,Madrid
1087 | Maguindanaoan,Maguindanao
1088 | Magyar,Hungary
1089 | Maharashtrian,Maharashtra
1090 | Mahorais,Mayotte
1091 | Mahoran,Mayotte
1092 | Maineiac,Maine
1093 | Mainer,Maine
1094 | Mainiac,Maine
1095 | Mainland,South Island
1096 | Majorcan,Majorca
1097 | Malaccan,Malacca
1098 | Malacitano,Málaga
1099 | Malagasy,Madagascar
1100 | Malagenean,Málaga
1101 | Malagueño,Málaga
1102 | Malawian,Malawi
1103 | Malay,Malaya
1104 | Malayan,Malaya
1105 | Malaysian,Malaysia
1106 | Maldivian,Maldives
1107 | Maldonadino,Puerto Maldonado
1108 | Malian,Mali
1109 | Malihini,Hawaii
1110 | Malinese,Mali
1111 | Maltese,Malta
1112 | Man Of Kent,Kent
1113 | Manauara,Manaus
1114 | Manauense,Manaus
1115 | Manchego,Ciudad Real
1116 | Manchurian,Manchuria
1117 | Mancunian,Manchester
1118 | Manhattan,Manhattan
1119 | Manhattanite,Manhattan
1120 | Manilan,Metro Manila
1121 | Manileño,Manila
1122 | Manipuri,Manipur
1123 | Manitobain,Manitoba
1124 | Manitoban,Manitoba
1125 | Mantinean,Mantineia
1126 | Mantuan,Mantua
1127 | Manxwoman,Isle Of Man
1128 | Maqueta,Castilla-La Mancha
1129 | Maranao,Lanao Del Sur
1130 | Maranhense,Maranhão
1131 | Marathonian,Marathon
1132 | Marinduquenian,Marinduque
1133 | Marinduqueño,Marinduque
1134 | Maritime,Maritimes
1135 | Maritimer,Maritime Region
1136 | Marseillais,Marseille
1137 | Marshallese,Marshall Islands
1138 | Martinican,Martinique
1139 | Martiniquais,Martinique
1140 | Marylander,Maryland
1141 | Masbatenian,Masbate
1142 | Masbateño,Masbate
1143 | Mashugana,Michigan
1144 | Massachusettsan,Massachusetts
1145 | Massachusite,Massachusetts
1146 | Masshole,Massachusetts
1147 | Masurian,Masuria
1148 | Matritense,Madrid
1149 | Mauritanian,Mauritania
1150 | Mauritian,Mauritius
1151 | Mayrití,Madrid
1152 | Mazatleco,Mazatlán
1153 | Mcdonald Islands,Heard Island And Mcdonald Islands
1154 | Mckinnian,Mckinney
1155 | Median,Media
1156 | Mediterranean,Mediterranean
1157 | Megaran,Megara
1158 | Megarian,Megara
1159 | Meghalayan,Meghalaya
1160 | Melanesian,Melanesia
1161 | Melbournian,Melbourne
1162 | Melbournite,Melbourne
1163 | Melburnian,Melbourne
1164 | Melian,Melos
1165 | Melillense,Melilla
1166 | Melitian,Melite
1167 | Memphian,Memphis
1168 | Mendocino,Mendoza
1169 | Mentonasque,Menton
1170 | Merian,Meru
1171 | Meridano,Mérida
1172 | Mesopotamian,Mesopotamia
1173 | Messenian,Messenia
1174 | Messinese,Messina
1175 | Methymnian,Mithymna
1176 | Mexicalense,Mexicali
1177 | Mexican,Mexico
1178 | Mexican,Mexico
1179 | Mexicano,Mexico
1180 | Mexiquense,México
1181 | Miamian,Miami
1182 | Michigander,Michigan
1183 | Michiganer,Michigan
1184 | Michiganese,Michigan
1185 | Michiganian,Michigan
1186 | Michiganite,Michigan
1187 | Michigine,Michigan
1188 | Michoacano,Michoacán
1189 | Micronesian,Micronesia
1190 | Middle Eastern,Middle East
1191 | Middle Easterner,Middle East
1192 | Milanese,Milan
1193 | Milesian,Miletus
1194 | Milwaukeean,Milwaukee
1195 | Mindoreño,Oriental Mindoro
1196 | Mineiro,Minas Gerais
1197 | Minneapolitan,Minneapolis
1198 | Minnesotan,Minnesota
1199 | Minorcan,Minorca
1200 | Minotian,Minot
1201 | Miquelonnais,Saint Pierre And Miquelon
1202 | Miraflorino,Miraflores
1203 | Misionero,Misiones
1204 | Misratan,Misrata
1205 | Mississippian,Mississippi
1206 | Missourian,Missouri
1207 | Mizo,Mizoram
1208 | Modenese,Modena
1209 | Moldavian,Moldavia
1210 | Moldenser,Molde
1211 | Moldovan,Moldova
1212 | Moluccan,Moluccas
1213 | Monacan,Monaco
1214 | Monégasque,Monaco
1215 | Mongol,Mongolia
1216 | Mongolian,Mongolia
1217 | Montanan,Montana
1218 | Montenegrin,Montenegro
1219 | Montreal,Montreal
1220 | Montréalais,Montreal
1221 | Montrealer,Montreal
1222 | Montréaler,Montreal
1223 | Montserratian,Montserrat
1224 | Moose Javian,Moose Jaw
1225 | Moosejavian,Moose Jaw
1226 | Moqueguano,Moquegua
1227 | Moravian,Moravia
1228 | Morelense,Morelos
1229 | Moreliano,Morelia
1230 | Møring,Møre Og Romsdal
1231 | Moroccan,Morocco
1232 | Moroleonés,Moroleón
1233 | Morsing,Mors
1234 | Mosotho,Lesotho
1235 | Motswana,Botswana
1236 | Mountaineers,Mountain Province
1237 | Mozambican,Mozambique
1238 | Mumbaikar,Mumbai
1239 | Mumbaikars,Mumbai
1240 | Münchner,Munich
1241 | Munsonian,Muncie
1242 | Murcí,Murcia
1243 | Murcian,Murcia
1244 | Murciano,Murcia
1245 | Muscovia,Moscow
1246 | Muscovite,Moscow
1247 | Muskrat,Delaware
1248 | Myanmar,Burma
1249 | Myanmarese,Myanmar
1250 | Mycenaean,Mycenae
1251 | Mytilenean,Mytilene
1252 | Naga,Nagaland
1253 | Nagalandese,Nagaland
1254 | Nagasakian,Nagasaki
1255 | Nagoyan,Nagoya
1256 | Namibian,Namibia
1257 | Napieran,Napier
1258 | Napoletano,Naples
1259 | Narnian,Narnia
1260 | Nashvillian,Nashville
1261 | Nassuvian,Nassau
1262 | Natalense,Natal
1263 | Naupactian,Naupactus
1264 | Nauruan,Nauru
1265 | Navarrese,Navarre
1266 | Navarro,Navarra
1267 | Naxian,Naxos
1268 | Nayarita,Nayarit
1269 | Nazarene,Nazareth
1270 | Neapolitan,Naples
1271 | Nebraskan,Nebraska
1272 | Negeri Sembilanese,Negeri Sembilan
1273 | Negrense,Negros Oriental
1274 | Nelson,Nelson
1275 | Nelsonian,Nelson Region
1276 | Neolonés,Nuevo León
1277 | Nepali,Nepal
1278 | Netherlander,The Netherlands
1279 | Netherlandic,Netherlands
1280 | Neuquino,Neuquen
1281 | Nevadian,Nevada
1282 | Nevisian,Saint Kitts And Nevis
1283 | New Brunswick,New Brunswick
1284 | New Brunswickan,New Brunswick
1285 | New Brunswickian,New Brunswick
1286 | New Caledonian,New Caledonia
1287 | New England,New England
1288 | New Englander,New England
1289 | New Guinian,New Guinea
1290 | New Hampshireman,New Hampshire
1291 | New Hampshirewoman,New Hampshire
1292 | New Hampshirite,New Hampshire
1293 | New Jerseyite,New Jersey
1294 | New Mexican,New Mexico
1295 | New Orleanian,New Orleans
1296 | New South Walesian,New South Wales
1297 | New South Welsh,New South Wales
1298 | New York,New York City
1299 | New Yorker,New York
1300 | New Zealand,New Zealand
1301 | New Zealander,New Zealand
1302 | Newf,Newfoundland And Labrador
1303 | Newfie,Newfoundland And Labrador
1304 | Newfoundland And Labrador,Newfoundland And Labrador
1305 | Newfoundlander,Newfoundland And Labrador
1306 | Newfy,Newfoundland And Labrador
1307 | Newportonian,Newport
1308 | Nicaraguan,Nicaragua
1309 | Nicobar,Andaman And Nicobar Islands
1310 | Nicobarese,Andaman And Nicobar Islands
1311 | Niçois,Nice
1312 | Nicolaíta,San Nicolás De Los Garza
1313 | Nicosian,Nicosia
1314 | Nicoya,Nicaragua
1315 | Nigerian,Nigeria
1316 | Nigerien,Niger
1317 | Nihonjin,Nihon
1318 | Nilotic,Nile River
1319 | Nipponjin,Nihon
1320 | Nisyrian,Nisyros
1321 | Niteroiense,Niterói
1322 | Niuean,Niue
1323 | Ni-Vanuatu,Vanuatu
1324 | None,Vatican City
1325 | Norbiense,Cáceres
1326 | Nordlending,Nordland
1327 | Nordmann,Norway
1328 | Nordtrønder,Nord-Trøndelag
1329 | Norfolk Island,Norfolk Island
1330 | Norfolk Islander,Norfolk Island
1331 | Norman,Normandy
1332 | Norte-Rio-Grandense,Rio Grande Do Norte
1333 | North Agusanian,Agusan Del Norte
1334 | North American,North America
1335 | North Camarinean,Camarines Norte
1336 | North Carolinian,North Carolina
1337 | North Dakotan,North Dakota
1338 | North Davao,Davao Del Norte
1339 | North Ilocos,Ilocos Norte
1340 | North Island,North Island
1341 | North Korean,North Korea
1342 | North Lanao,Lanao Del Norte
1343 | North Samarian,Northern Samar
1344 | North Surigaoan,Surigao Del Norte
1345 | North Zamboangan,Zamboanga Del Norte
1346 | Northamptonian,Northampton
1347 | Northern Irish,Northern Ireland
1348 | Northern Irishman,Northern Ireland
1349 | Northern Irishwoman,Northern Ireland
1350 | Northern Marianan,Northern Mariana Islands
1351 | Northern Territory,Northern Territory
1352 | Northland,Northland Region
1353 | Northumbrian,Northumberland
1354 | Northwest Territorian,Northwest Territories
1355 | Northwesterner,Northwest Territories
1356 | Norwegian,Norway
1357 | Nottinghamian,Nottingham
1358 | Nova Scotian,Nova Scotia
1359 | Novo Ecijano,Nueva Ecija
1360 | Novo Vizcayano,Nueva Vizcaya
1361 | Novocastrian,Newcastle
1362 | Nubian,Nubia
1363 | Nueva Ecijan,Nueva Ecija
1364 | Nueva Vizcayan,Nueva Vizcaya
1365 | Nuevoleonés,Nuevo León
1366 | Numedøl,Numedal
1367 | Nunavitian,Nunavut Territory
1368 | Nunavummiuq,Nunavut
1369 | Nunavut,Nunavut
1370 | Nutmegger,Connecticut
1371 | Nz,New Zealand
1372 | Oakbankian,Oakbank
1373 | Oaklander,Oakland
1374 | Oamaru,Oamaru
1375 | Oamaruvian,Oamaru
1376 | Oaxacan,Oaxaca
1377 | Oaxaqueño,Oaxaca
1378 | Obanite,Oban
1379 | Oceanian,Oceania
1380 | Odenseaner,Odense
1381 | Odessit,Odessa
1382 | Odessite,Odessa
1383 | Odia,Odisha
1384 | Odishan,Odisha
1385 | Odissi,Odisha
1386 | Oean,Oea
1387 | Ohioan,Ohio
1388 | Oiniadan,Oiniades
1389 | Okie,Oklahoma
1390 | Okinawan,Okinawa
1391 | Oklahoman,Oklahoma
1392 | Olvisino,Huelva
1393 | Olympian,Olympia
1394 | Omakian,Omak
1395 | Omani,Oman
1396 | Ontarian,Ontario
1397 | Ontario,Ontario
1398 | Onubense,Huelva
1399 | Opplending,Oppland
1400 | Orcadian,Orkney Islands
1401 | Orcelitano,Orihuela
1402 | Oregonian,Oregon
1403 | Oriolano,Orihuela
1404 | Oriya,Odisha
1405 | Osakan,Osaka
1406 | Oscense,Huesca
1407 | Oslofolk,Oslo
1408 | Oslovian,Oslo
1409 | Østfolding,Østfold
1410 | Otago,Otago Region
1411 | Ottawan,Ottawa
1412 | Oxonian,Oxford
1413 | Ozian,Oz
1414 | Ozzie,Australia
1415 | Paceño,La Paz
1416 | Pacense,Badajoz
1417 | Pachuqueño,Pachuca
1418 | Padane,Po River
1419 | Paddock Laker,Paddock Lake
1420 | Paduan,Padua
1421 | Paeonian,Paeonia
1422 | Pahangite,Pahang
1423 | Paisa,Antioquia
1424 | Pakistani,Pakistan
1425 | Palatine,Palatinate
1426 | Palauan,Palau
1427 | Palawan,Palawan
1428 | Palaweño,Palawan
1429 | Palentino,Palencia
1430 | Palermitan,Palermo
1431 | Palestinian,Palestine
1432 | Palmense,Palmas
1433 | Palmerstonian,Palmerston North
1434 | Paludian,Slough
1435 | Pampangan,Pampanga
1436 | Pampangueño,Pampanga
1437 | Pampeaño,La Pampa
1438 | Pamphylian,Pamphylia
1439 | Panamanian,Panama
1440 | Pangasinan,Pangasinan
1441 | Pangasinense,Pangasinan
1442 | Papal,Papal States
1443 | Papua New Guinean,Papua New Guinea
1444 | Papuan,Papua New Guinea
1445 | Paraense,Pará
1446 | Paraguayan,Paraguay
1447 | Paraibano,Paraíba
1448 | Paranaense,Paraná
1449 | Paranense,Paraná
1450 | Parian,Paros
1451 | Parisian,Paris
1452 | Parisienne,Paris
1453 | Parmesan,Parma
1454 | Parmigiano,Parma
1455 | Pasqueño,Pasco
1456 | Patagonian,Patagonia
1457 | Patmian,Patmos
1458 | Patriciense,Córdoba
1459 | Paulista,São Paulo
1460 | Paulistano,São Paulo
1461 | Pegswardian,Pegswood
1462 | Pekinese,Beijing
1463 | Peloponnesian,Peloponnese
1464 | Penangite,Penang
1465 | Peninsular,Peninsular Malaysia
1466 | Pennamite,Pennsylvania
1467 | Pennine,Pennines
1468 | Pennsylvanian,Pennsylvania
1469 | Perakian,Perak
1470 | Pergamian,Pergamum
1471 | Perlisian,Perlis
1472 | Pernambucano,Pernambuco
1473 | Persian,Persia
1474 | Perth,Perth
1475 | Perthian,Perth
1476 | Perthite,Perth
1477 | Perthling,Perth
1478 | Perugian,Perugia
1479 | Peruvian,Peru
1480 | Pessoense,João Pessoa
1481 | Peterborian,Peterborough
1482 | Peterbourian,Peterborough
1483 | Phanariote,Phanar
1484 | Philadelphian,Philladelphia
1485 | Philippian,Philippi
1486 | Philippine,Philippines
1487 | Philistine,Philistia
1488 | Phliasian,Phlius
1489 | Phocian,Phocis
1490 | Phoenician,Phoenix
1491 | Phoenixer,Phoenix
1492 | Phrygian,Phrygia
1493 | Piauiense,Piauí
1494 | Picard,Picardy
1495 | Piedmontese,Piedmont
1496 | Pinay,Philippines
1497 | Pinciano,Valladolid
1498 | Pine Tree,Maine
1499 | Pinoy,Philippines
1500 | Pisan,Pisa
1501 | Pisidian,Pisidia
1502 | Pisqueño,Pisco
1503 | Pitcairn Island,Pitcairn Islands
1504 | Pitcairn Islander,Pitcairn Islands
1505 | Pittsburgher,Pittsburgh
1506 | Pittsburghese,Pittsburgh
1507 | Piurano,Piura
1508 | Platense,La Plata
1509 | Plymothian,Plymouth
1510 | Plymouthian,Plymouth
1511 | Poblano,Puebla
1512 | Polaco,Catalonia
1513 | Pole,Poland
1514 | Polish,Poland
1515 | Polmontarian,Polmont
1516 | Polynesian,Polynesia
1517 | Pomeranian,Pomerania
1518 | Pompeian,Pompeii
1519 | Pompeiian,Pompeii
1520 | Pondicherrian,Puducherry
1521 | Ponferradian,Ponferrada
1522 | Pontevedrés,Pontevedra
1523 | Pontian,Pontus
1524 | Porteño,Buenos Aires
1525 | Portlander,Portland
1526 | Porto-Alegrense,Porto Alegre
1527 | Porto-Velhense,Porto Velho
1528 | Portuense,Porto
1529 | Portuguese,Portugal
1530 | Posadeño,Posadas
1531 | Potiguar,Rio Grande Do Norte
1532 | Potosino,San Luis Potosí
1533 | Praguer,Prague
1534 | Prince Edward Island,Prince Edward Island
1535 | Prince Edward Islander,Prince Edward Island
1536 | Provençal,Provence
1537 | Providentian,Providence
1538 | Prussian,Prussia
1539 | Pucelano,Valladolid
1540 | Pueblan,Puebla
1541 | Puerto Rican,Puerto Rico
1542 | Punekar,Pune
1543 | Punekars,Pune
1544 | Puneño,Puno
1545 | Punjabi,Punjab
1546 | Puntlander,Puntland
1547 | Putrajayan,Putrajaya
1548 | Pylian,Pylos
1549 | Pylosian,Pylos
1550 | Qatari,Qatar
1551 | Quebec,Quebec
1552 | Quebecan,Quebec
1553 | Quebecer,Quebec
1554 | Quebecers,Quebec
1555 | Quebecian,Quebec
1556 | Quebecker,Quebec
1557 | Quebeckers,Quebec
1558 | Quebecois,Quebec
1559 | Québécois,Quebec
1560 | Queens,Queens
1561 | Queensite,Queens
1562 | Queensland,Queensland
1563 | Queenslander,Queensland
1564 | Queretan,Querétaro
1565 | Queretano,Querétaro
1566 | Quezonian,Quezon
1567 | Quintanaroan,Quintana Roo
1568 | Quintanarroense,Quintana Roo
1569 | Quirinian,Quirino
1570 | Rajasthani,Rajasthan
1571 | Randallite,Randall
1572 | Randrusianer,Randers
1573 | Rawsense,Rawson
1574 | Readingite,Reading
1575 | Recifense,Recife
1576 | Regina,Regina
1577 | Reginaian,Regina
1578 | Reginan,Regina
1579 | Regiomontano,Monterrey
1580 | Reinero,Nuevo León
1581 | Reman,Remus
1582 | Rémois,Reims
1583 | Renoite,Reno
1584 | Resistenciano,Resistencia
1585 | Rethymnian,Rethymno
1586 | Reunionese,Reunion
1587 | Réunionese,Réunion
1588 | Réunionnais,Réunion
1589 | Rhegian,Rhegion
1590 | Rhegine,Rhegion
1591 | Rhenish,Rhineland
1592 | Rhinelander,Rhineland
1593 | Rhithymnian,Rhithymna
1594 | Rhode Islander,Rhode Island
1595 | Rhodean,Rhode Island
1596 | Rhodesian,Rhodesia
1597 | Rhodian,Rhodes
1598 | Richmonder,Richmond
1599 | Rigan,Riga
1600 | Rio-Branquense,Rio Branco
1601 | Rio-Grandense-Do-Norte,Rio Grande Do Norte
1602 | Rio-Grandense-Do-Sul,Rio Grande Do Sul
1603 | Riojano,La Rioja
1604 | Rionegrino,Río Negro
1605 | Riverian,Rivers State
1606 | Rizaleño,Rizal
1607 | Rizalian,Rizal
1608 | Rochesterian,Rochester
1609 | Rogalending,Rogaland
1610 | Rohirric,Rohan
1611 | Roman,Rome
1612 | Romanian,Romania
1613 | Romblonian,Romblon
1614 | Romsdøl,Møre Og Romsdal
1615 | Romulan,Romulus
1616 | Rondonense,Rondônia
1617 | Roraimense,Roraima
1618 | Roskildenser,Roskilde
1619 | Rotoruan,Rotorua
1620 | Rotterdammer,Rotterdam
1621 | Ruskinite,Ruskin
1622 | Russian,Russia
1623 | Russianc,Soviet Union
1624 | Rwandan,Rwanda
1625 | Rwandese,Rwanda
1626 | Ryukyuan,Ryukyu Islands
1627 | Saami,Lapland
1628 | Saba,Saba
1629 | Sabahan,Sabah
1630 | Sabine,Sabinium
1631 | Sacramentan,Sacramento
1632 | Særping,Sarpsborg
1633 | Saharan,Sahara
1634 | Sahraoui,Western Sahara
1635 | Sahrawi,Western Sahara
1636 | Sahrawian,Western Sahara
1637 | Saigoner,Saigon
1638 | Saigonese,Ho Chi Minh City
1639 | Saint Helenian,Saint Helena
1640 | Saint Lucian,Saint Lucia
1641 | Saint Petersburgian,Saint Petersburg
1642 | Saint Vincentian,Saint Vincent And Grenadines
1643 | Saint-Martinoise,Saint Martin
1644 | Saint-Pierrais,Saint Pierre And Miquelon
1645 | Salamanqués,Salamanca
1646 | Salamanquino,Salamanca
1647 | Salaminian,Salamis
1648 | Saldubense,Zaragoza
1649 | Salemander,Salem
1650 | Salernitan,Salerno
1651 | Salmanticense,Salamanca
1652 | Salmantino,Salamanca
1653 | Salopian,Shropshire
1654 | Salt Lake,Salt Lake City
1655 | Salteño,Salta
1656 | Saltillense,Saltillo
1657 | Salvadoran,El Salvador
1658 | Salvadoreño,El Salvador
1659 | Salvadorense,Salvador
1660 | Samareño,Samar
1661 | Samaritan,Samaria
1662 | Samian,Samos
1663 | Sammarinese,San Marino
1664 | Samnite,Samnium
1665 | Samoan,Samoa
1666 | Samogitian,Samogitia
1667 | Sampetrino,San Pedro Garza García
1668 | Samsing,Samsø
1669 | San Antonian,San Antonio
1670 | San Diegan,San Diego
1671 | San Franciscan,San Francisco
1672 | San Josean,San Jose
1673 | San Marinese,San Marino
1674 | Sand Cutter,Arizona
1675 | Sandgroper,Western Australia
1676 | Sand-Hiller,Georgia
1677 | Sandlapper,South Carolina
1678 | Sanjuanino,San Juan
1679 | Sanluiseño,San Luis
1680 | Sanmartinense,San Martín
1681 | Santa Claritan,Santa Clarita
1682 | Santa Cruzan,Santa Cruz
1683 | Santacruceño,Santa Cruz
1684 | Santafecino,Santa Fe
1685 | Santarroseño,Santa Rosa
1686 | Santiagueño,Santiago Del Estero
1687 | Sao Tomean,Sao Tome And Principe
1688 | São Toméan,São Tomé And Príncipe
1689 | Sãocarlense,São Carlos
1690 | São-Luisense,São Luís
1691 | Sarajevan,Sarajevo
1692 | Sarangan,Sarangani
1693 | Sarawakian,Sarawak
1694 | Sardi,Sardinia
1695 | Sardian,Sardis
1696 | Sardianus,Sardis
1697 | Sardinian,Sardinia
1698 | Sarkese,Sark
1699 | Sarmata,Sarmatia
1700 | Sarmatian,Sarmatia
1701 | Sarnian,Guernsey
1702 | Sarping,Sarpsborg
1703 | Saskatchewan,Saskatchewan
1704 | Saskatchewanan,Saskatchewan
1705 | Saskatchewanian,Saskatchewan
1706 | Saskatonian,Saskatoon
1707 | Saskatoon,Saskatoon
1708 | Saudi,Saudi Arabia
1709 | Saudi Arabian,Saudi Arabia
1710 | Sauromata,Sarmatia
1711 | Savannaian,Savannah
1712 | Savoyard,Savoy
1713 | Saxon,Saxony
1714 | Scandinavian,Scandinavia
1715 | Scillonian,Isles Of Scilly
1716 | Sconnie,Wisconsin
1717 | Scot,Scotland
1718 | Scots,Scotland
1719 | Scottish,Scotland
1720 | Scouse,Liverpool
1721 | Scouser,Liverpool
1722 | Scytha,Scythia
1723 | Scythian,Scythia
1724 | Seattleite,Seattle
1725 | Segobricense,Segorbe
1726 | Segobrigense,Segorbe
1727 | Segorbino,Segorbe
1728 | Segovian,Segovia
1729 | Segoviano,Segovia
1730 | Selangorean,Selangor
1731 | Semitic,Semite
1732 | Senegalese,Senegal
1733 | Seoulite,Seoul
1734 | Septense,Ceuta
1735 | Serb,Serbia
1736 | Serbian,Serbia
1737 | Sergipano,Sergipe
1738 | Serrean,Serrae
1739 | Sevillano,Sevilla
1740 | Sevillian,Seville
1741 | Seychellois,Seychelles
1742 | Shanghainese,Shanghai
1743 | Shikokuan,Shikoku
1744 | Shkodran,Shkoder
1745 | Siamese,Siam
1746 | Siberian,Siberia
1747 | Sibugay,Zamboanga Sibugay
1748 | Sicilian,Sicily
1749 | Sicyonese,Sicyon
1750 | Siddis,Stavanger
1751 | Sidonian,Sidon
1752 | Sienese,Siena
1753 | Siennese,Siena
1754 | Sierra Leonean,Sierra Leone
1755 | Sikkimese,Sikkim
1756 | Silesian,Silesia
1757 | Silhillian,Solihull
1758 | Silver Laker,Silver Lake
1759 | Sinaloan,Sinaloa
1760 | Sinaloense,Sinaloa
1761 | Sindhi,Sindh
1762 | Singapore,Singapore
1763 | Singaporean,Singapore
1764 | Sint Eustatius,Sint Eustatius
1765 | Sint Maarten,Sint Maarten
1766 | Sioux Cityan,Sioux City
1767 | Siquijorian,Siquijor
1768 | Sjællænder,Zealand
1769 | Skagbo,Skagen
1770 | Skaroene,Skaro
1771 | Skarosian,Skaro
1772 | Skopelitan,Skopelos
1773 | Skopjan,Skopje
1774 | Sligonian,Sligo
1775 | Slovak,Slovakia
1776 | Slovakian,Slovakia
1777 | Slovene,Slovenia
1778 | Slovenian,Slovenia
1779 | Sluff,Slough
1780 | Sofian,Sofia
1781 | Sogning,Sogn Og Fjordane
1782 | Solomon Island,Solomon Islands
1783 | Solomon Islander,Solomon Islands
1784 | Somali,Somalia
1785 | Somalian,Somalia
1786 | Somalilander,Somaliland
1787 | Somersite,Somers
1788 | Sonoran,Sonora
1789 | Sonorense,Sonora
1790 | Sontaran,Sontar
1791 | Sooner,Oklahoma
1792 | Soriano,Soria
1793 | Sorrentine,Sorrento
1794 | Sorsogonian,Sorsogon
1795 | Sorsogueño,Sorsogon
1796 | Sørtrønder,Sør-Trøndelag
1797 | Soteropolitano,Salvador
1798 | South African,South Africa
1799 | South Agusanian,Agusan Del Sur
1800 | South American,South America
1801 | South Australian,South Australia
1802 | South Camarinean,Camarines Sur
1803 | South Carolinian,South Carolina
1804 | South Cotabatan,South Cotabato
1805 | South Cotabateño,South Cotabato
1806 | South Dakotan,South Dakota
1807 | South Davao,Davao Del Sur
1808 | South Georgia,South Georgia And The South Sandwich Islands
1809 | South Ilocos,Ilocos Sur
1810 | South Island,South Island
1811 | South Korean,South Korea
1812 | South Lanao,Lanao Del Sur
1813 | South Lower Californian,Baja California Sur
1814 | South Ossetian,South Ossetia
1815 | South Sandwich Islands,South Georgia And The South Sandwich Islands
1816 | South Sudanese,South Sudan
1817 | South Surigaoan,Surigao Del Sur
1818 | South Zamboangan,Zamboanga Del Sur
1819 | Southern Australian,South Australia
1820 | Southern Leytenian,Southern Leyte
1821 | Southland,Southland Region
1822 | Soviet,Soviet Union
1823 | Spaniard,Spain
1824 | Spanish,Spain
1825 | Spartan,Sparta
1826 | Spokanite,Spokane
1827 | Spoletian,Spoleto
1828 | Springfieldianite,Springfield
1829 | Sri Lankan,Sri Lanka
1830 | St. Crucian,St. Croix
1831 | St. Louisan,St. Louis
1832 | St.Tomian,St. Thomas
1833 | Staten Island,Staten Island
1834 | Statian,Sint Eustatius
1835 | Stephanois,St Etienne
1836 | Stewart Island,Stewart Island
1837 | Stockholmer,Stockholm
1838 | Stopfordian,Stockport
1839 | Strasbourgeois,Strasbourg
1840 | Stril,Bergen
1841 | Stuttgarter,Stuttgart
1842 | Styrian,Styria
1843 | Sudanese,Sudan
1844 | Sudetes,Sudetenland
1845 | Suebius,Suebia
1846 | Sul-Rio-Grandense,Rio Grande Do Sul
1847 | Sultan Kudaratenian,Sultan Kudarat
1848 | Sultan Kudarateño,Sultan Kudarat
1849 | Sulu,Sulu
1850 | Suluan,Sulu
1851 | Suluano,Sulu
1852 | Sumatran,Sumatra
1853 | Sumerian,Sumer
1854 | Sundanese,Java
1855 | Sunflower,Kansas
1856 | Sunnmøring,Sunnmøre
1857 | Surcano,Surco
1858 | Surinamer,Suriname
1859 | Surinamese,Suriname
1860 | Svalbard,Svalbard
1861 | Swamp Yankee,Rhode Island
1862 | Swansean,Swansea
1863 | Swazi,Swaziland
1864 | Swede,Sweden
1865 | Swedish,Sweden
1866 | Swindonian,Swindon
1867 | Swiss,Switzerland
1868 | Sydney,Sydney
1869 | Sydneysider,Sydney
1870 | Sydney-Sider,Sydney
1871 | Symian,Symi
1872 | Syracusan,Syracuse
1873 | Syracusian,Syracuse
1874 | Syrian,Syria
1875 | Tab,Cambridge
1876 | Tabasqueño,Tabasco
1877 | Tacneño,Tacna
1878 | Tacomian,Tacoma
1879 | Tadzhik,Tajikistan
1880 | Taff,Wales
1881 | Tahitian,Tahiti
1882 | Taiwanese,Taiwan
1883 | Tajik,Tajikistan
1884 | Tajikistani,Tajikistan
1885 | Talabricense,Talavera De La Reina
1886 | Talaverano,Talavera De La Reina
1887 | Tallinner,Tallinn
1888 | Tamaulipan,Tamaulipas
1889 | Tamaulipeco,Tamaulipas
1890 | Tamil,Tamil Nadu
1891 | Tamilian,Tamil Nadu
1892 | Tampanian,Tampa
1893 | Tanzanian,Tanzania
1894 | Tapatío,Guadalajara
1895 | Tar Boiler,North Carolina
1896 | Tar Heel,North Carolina
1897 | Taranaki,Taranaki Region
1898 | Tarantine,Taranto
1899 | Tarapotino,Tarapoto
1900 | Tarentine,Taranto
1901 | Tarentumian,Taras
1902 | Tarlaquenian,Tarlac
1903 | Tarlaqueño,Tarlac
1904 | Tarraconense,Tarragona
1905 | Tarraconista,Tarragona
1906 | Tasman,Tasman Region
1907 | Tasmaniac,Tasmania
1908 | Tasmanian,Tasmania
1909 | Tassie,Tasmania
1910 | Taswegian,Tasmania
1911 | Tatar,Tatarstan
1912 | Tawi-Tawian,Tawi-Tawi
1913 | Tbilisian,Tbilisi
1914 | Tegean,Tegea
1915 | Tejano,Texas
1916 | Tel Avivi,Tel Aviv
1917 | Telanganite,Telangana
1918 | Telemarking,Telemark
1919 | Tenedian,Tenedos
1920 | Tenesseean,Tennessee
1921 | Tennesseean,Tennessee
1922 | Tepiqueño,Tepic
1923 | Tepiquense,Tepic
1924 | Terengganuan,Terengganu
1925 | Teresinense,Teresina
1926 | Territorian,Northern Territory
1927 | Texian,Texas
1928 | Texican,Texas
1929 | Thai,Thailand
1930 | Thasian,Thasos
1931 | Theban,Thebes
1932 | Thespian,Thespis
1933 | Thessalian,Thessaly
1934 | Thessalonian,Thessaloniki
1935 | Thracian,Thrace
1936 | Thriasian,Thria
1937 | Thurian,Thurii
1938 | Thuriat,Thuria
1939 | Thuringian,Thuringia
1940 | Thybo,Thy
1941 | Tibetan,Tibet
1942 | Timorese,Timor-Leste
1943 | Tinerfeño,Santa Cruz De Tenerife
1944 | Tiranas,Tirana
1945 | Titletowner,Green Bay
1946 | Tlaxcalteca,Tlaxcala
1947 | Tobagonian,Trinidad And Tobago
1948 | Toboesco,El Toboso
1949 | Toboseño,El Toboso
1950 | Tobosino,El Toboso
1951 | Tocantinense,Tocatins
1952 | Togolese,Togo
1953 | Tokelauan,Tokelau
1954 | Tokyoite,Tokyo
1955 | Toledano,Toledo
1956 | Toledoan,Toledo
1957 | Toluqueño,Toluca
1958 | Tongan,Tonga
1959 | Toowoomban,Toowoomba
1960 | Toresano,Toro
1961 | Torinese,Turin
1962 | Torontonian,Toronto
1963 | Tournaisian,Tournai
1964 | Towcestrian,Towcester
1965 | Townsvillian,Townsville
1966 | Transylvanian,Transylvania
1967 | Trevorite,Trevor
1968 | Trichonian,Trichonos
1969 | Tridentine,Trent
1970 | Triestine,Trieste
1971 | Trifluvian,Trois-Rivières
1972 | Trifluvien,Trois-Rivières
1973 | Trifluvienne,Trois-Rivières
1974 | Trinibagonians,Trinidad And Tobago
1975 | Trinidadian,Trinidad And Tobago
1976 | Trinis,Trinidad And Tobago
1977 | Tripolitan,Tripoli
1978 | Tripuran,Tripura
1979 | Tripuri,Tripura
1980 | Troezenian,Troezen
1981 | Trois-Rivieran,Trois-Rivières
1982 | Trojan,Troy
1983 | Troll,Michigan
1984 | Tromsøværing,Tromsø
1985 | Tromsværing,Troms
1986 | Trønder,Trondheim
1987 | Trujillano,Trujillo
1988 | Tucsonan,Tucson
1989 | Tucumano,Tucumán
1990 | Tukker,Twente
1991 | Tulsan,Tulsa
1992 | Tumbecino,Tumbes
1993 | Tunisian,Tunisia
1994 | Turboleta,Teruel
1995 | Turinese,Turin
1996 | Turk,Turkey
1997 | Turkic,Turkey
1998 | Turkish,Turkey
1999 | Turkmen,Turkmenistan
2000 | Turkmenistani,Turkmenistan
2001 | Turks And Caicos Island,Turks And Caicos Islands
2002 | Turolense,Teruel
2003 | Tuscan,Tuscany
2004 | Tuvaluan,Tuvalu
2005 | Tuxtleco,Tuxtla Gutiérrez
2006 | Twin Laker,Twin Lakes
2007 | Tyke,Yorkshire
2008 | Tyrolean,Tyrol
2009 | Tyrolese,Tyrol
2010 | U.S. Virgin Island,United States Virgin Islands
2011 | Uberlandense,Uberlândia
2012 | Ugandan,Uganda
2013 | Uk,United Kingdom
2014 | Ukrainian,Ukraine
2015 | Ulsterman,Northern Ireland
2016 | Umbrian,Umbria
2017 | United States,United States
2018 | Urcitano,Almería
2019 | Uruguayan,Uruguay
2020 | US,United States
2021 | U.S.,United States
2022 | Ushuaiense,Ushuaia
2023 | Utahn,Utah
2024 | Utopian,Utopia
2025 | Uttar Pradeshi,Uttar Pradesh
2026 | Uttarakhandi,Uttarakhand
2027 | Uzbek,Uzbekistan
2028 | Uzbekistani,Uzbekistan
2029 | Valencian,Valencia
2030 | Valenciano,Valencia
2031 | Valisoletano,Valladolid
2032 | Vallisoletano,Valladolid
2033 | Vancouverite,Vancouver
2034 | Vanuatuan,Vanuatu
2035 | Varsovian,Warsaw
2036 | Vashonite,Vashon Island
2037 | Vatican,Vatican City State
2038 | Venetian,Venice
2039 | Venezuelan,Venezuela
2040 | Veracruzano,Veracruz
2041 | Vermonter,Vermont
2042 | Veronese,Verona
2043 | Verulamian,St Albans
2044 | Vest-Agding,Vest-Agder
2045 | Vestfolding,Vestfold
2046 | Vicense,Vich
2047 | Vicentine,Vicenza
2048 | Vichyssois,Vichy
2049 | Victorian,Victoria
2050 | Viedmense,Viedma
2051 | Viennese,Vienna
2052 | Vietnamese,Vietnam
2053 | Vigitano,Vich
2054 | Villahermosino,Villahermosa
2055 | Villermosino,Villahermosa
2056 | Vilnian,Vilnius
2057 | Vincentian,Saint Vincent And Grenadines
2058 | Virgin Islander,Virgin Islands
2059 | Virginian,Virginia
2060 | Virginians,Virginia
2061 | Vitoriense,Vitória
2062 | Vizcayense,Vizcaya
2063 | Volcano Islander,Volcano Islands
2064 | Vulcan,Vulcan
2065 | Vulcanian,Vulcan
2066 | Waikato,Waikato Region
2067 | Wakandan,Wakanda
2068 | Wakean,Wake Island
2069 | Wallachian,Wallachia
2070 | Wallis And Futuna,Wallis And Futuna
2071 | Wallisian,Wallis And Futuna
2072 | Walloon,Wallonia
2073 | Washingtonian,Washington
2074 | Waterfordian,Waterford
2075 | Waterluvian,Waterloo
2076 | Weegie,Glasgow
2077 | Wellington,Wellington
2078 | Welsh,Wales
2079 | Welshman,Wales
2080 | Welshwoman,Wales
2081 | Wenhamite,Wenham
2082 | West Coast,West Coast Region
2083 | West Indian,West Indies
2084 | West Malaysian,Peninsular Malaysia
2085 | West Mindorenian,Occidental Mindoro
2086 | West Misamis,Misamis Occidental
2087 | West Negros,Negros Occidental
2088 | West Papuan,West Papua
2089 | West Samarian,Samar
2090 | West Virginian,West Virginia
2091 | Western Australian,Western Australia
2092 | Western Davao,Davao Occidental
2093 | Westphalian,Westphalia
2094 | Westralian,Western Australia
2095 | Wheatlander,Wheatland
2096 | Whittierite,Whittier
2097 | Wichitan,Wichita
2098 | Wiener,Vienna
2099 | Wilmotter,Wilmot
2100 | Winnipegger,Winnipeg
2101 | Wisconsinite,Wisconsin
2102 | Wolverine,Michigan
2103 | Wrexhamite,Wrexham
2104 | Wulfrunian,Wolverhampton
2105 | Wyomingite,Wyoming
2106 | Xalapeño,Xalapa
2107 | Xanthian,Xanthi
2108 | Yakut,Yakutia
2109 | Yellowknifer,Yellowknife
2110 | Yemeni,Yemen
2111 | Yemenite,Yemen
2112 | Yerevantsi,Yerevan
2113 | Yerushalmi,Jerusalem
2114 | Yokohaman,Yokohama
2115 | Yooper,Michigan
2116 | Yorkie,Yorkshire
2117 | Yorkshireman,Yorkshire
2118 | Yorkshirewoman,Yorkshire
2119 | Yucateco,Yucatán
2120 | Yugoslav,Yugoslavia
2121 | Yukon,Yukon
2122 | Yukoner,Yukon Territory
2123 | Yukonian,Yukon Territory
2124 | Yukonite,Yukon Territory
2125 | Yupper,Upper Peninsula
2126 | Zacatecan,Zacatecas
2127 | Zacatecano,Zacatecas
2128 | Zagrebian,Zagreb
2129 | Zairean,Zaire
2130 | Zakyntian,Zakynthos
2131 | Zambalean,Zambales
2132 | Zambaleño,Zambales
2133 | Zambian,Zambia
2134 | Zamboangueño,Zamboanga Del Norte
2135 | Zamorano,Zamora
2136 | Zamorense,Zamora
2137 | Zaragocés,Zaragoza
2138 | Zaragozano,Zaragoza
2139 | Zealander,Zealand
2140 | Zimbabwean,Zimbabwe
2141 | Zintani,Zintan
2142 | Zulu,Zululand
2143 | Zuricher,Zurich
2144 | Ecuadoran,Ecuador
2145 |
--------------------------------------------------------------------------------
/src/main/resources/edu/knowitall/chunkedextractor/nouns.txt:
--------------------------------------------------------------------------------
1 | ...*ist
2 | abbot
3 | abomination
4 | accessory
5 | accompanist
6 | accomplice
7 | accountant
8 | accuser
9 | ace
10 | acquaintance
11 | active
12 | activist
13 | adherent
14 | adjunct
15 | administrator
16 | admiral
17 | admirer
18 | adopter
19 | adult
20 | adversary
21 | advertiser
22 | adviser
23 | advisor
24 | advocate
25 | affiliate
26 | aficionado
27 | agent
28 | aggressor
29 | agonist
30 | aide
31 | alien
32 | ally
33 | alternate
34 | alum
35 | alumna
36 | alumnus
37 | ambassador
38 | anachronism
39 | analyst
40 | anathema
41 | ancestor
42 | anchor
43 | ancient
44 | angel
45 | announcer
46 | annoyance
47 | anomaly
48 | antagonist
49 | apologist
50 | apostle
51 | apotheosis
52 | applicant
53 | appointment
54 | apprentice
55 | arbiter
56 | arbitrator
57 | archbishop
58 | arrival
59 | artist
60 | ass
61 | asshole
62 | assignee
63 | assistant
64 | associate
65 | atheist
66 | attendant
67 | attendee
68 | attorney
69 | attraction
70 | auditor
71 | aunt
72 | authority
73 | avatar
74 | babe
75 | baby
76 | bachelor
77 | back
78 | backer
79 | backup
80 | bag
81 | banker
82 | barber
83 | barrister
84 | bartender
85 | bassist
86 | batsman
87 | bear
88 | bearer
89 | beast
90 | beat
91 | beauty
92 | beginner
93 | believer
94 | belle
95 | bellwether
96 | beloved
97 | benefactor
98 | beneficiary
99 | better
100 | bidder
101 | bird
102 | birth
103 | bishop
104 | bitch
105 | blacksmith
106 | blade
107 | blogger
108 | blonde
109 | blood
110 | bomb-expert
111 | bomb-maker
112 | bomber
113 | bookkeeper
114 | booster
115 | bore
116 | borrower
117 | boss
118 | bouncer
119 | bowler
120 | boy
121 | boyfriend
122 | brain
123 | breadwinner
124 | breaker
125 | breeder
126 | bride
127 | bridesmaid
128 | broadcaster
129 | broker
130 | brother
131 | brother-in-law
132 | browser
133 | brunette
134 | buddy
135 | buff
136 | builder
137 | bull
138 | bully
139 | businessman
140 | butcher
141 | butt
142 | buyer
143 | cadet
144 | calculator
145 | camper
146 | canary
147 | candidate
148 | canon
149 | captain
150 | captive
151 | card
152 | caregiver
153 | caretaker
154 | carpenter
155 | carrier
156 | case
157 | cashier
158 | casualty
159 | cat
160 | catch
161 | catcher
162 | caterer
163 | celebrity
164 | center
165 | CEO
166 | CFO
167 | chair
168 | chairman
169 | chairperson
170 | chairwoman
171 | champ
172 | champion
173 | chancellor
174 | chaplain
175 | character
176 | charge
177 | cheerleader
178 | chick
179 | chicken
180 | chief
181 | chieftain
182 | child
183 | chiropractor
184 | chorister
185 | christ
186 | cinematographer
187 | cipher
188 | citizen
189 | classic
190 | classmate
191 | cleaner
192 | cleric
193 | clerk
194 | client
195 | clone
196 | closer
197 | co-conspirator
198 | co-founder
199 | co-worker
200 | coach
201 | coaster
202 | coauthor
203 | cofounder
204 | cog
205 | collaborator
206 | colleague
207 | collector
208 | colonel
209 | columnist
210 | comer
211 | commandant
212 | commander
213 | commentator
214 | commissioner
215 | communicant
216 | communicator
217 | commuter
218 | companion
219 | company
220 | competition
221 | competitor
222 | compiler
223 | complainant
224 | computer
225 | conductor
226 | confidant
227 | congressman
228 | connection
229 | connoisseur
230 | conservative
231 | consort
232 | conspirator
233 | constituent
234 | constructor
235 | consultant
236 | consumer
237 | contact
238 | contemporary
239 | contender
240 | contestant
241 | contractor
242 | contributor
243 | controller
244 | convener
245 | convert
246 | convict
247 | cook
248 | coordinator
249 | cop
250 | corporal
251 | correspondent
252 | cosmopolitan
253 | councillor
254 | councilman
255 | counsel
256 | counselor
257 | count
258 | counter
259 | cousin
260 | cow
261 | coward
262 | cowboy
263 | cracker
264 | crazy
265 | creator
266 | creature
267 | creditor
268 | critic
269 | crossover
270 | crusader
271 | culprit
272 | cultist
273 | curator
274 | custodian
275 | customer
276 | czar
277 | dad
278 | daddy
279 | dame
280 | dancer
281 | darling
282 | date
283 | daughter
284 | daughter-in-law
285 | deacon
286 | dealer
287 | dean
288 | dearest
289 | debtor
290 | defendant
291 | defender
292 | delegate
293 | democrat
294 | demon
295 | denizen
296 | dependent
297 | descendant
298 | descendent
299 | designer
300 | destroyer
301 | detective
302 | developer
303 | deviant
304 | devil
305 | devotee
306 | dick
307 | dictator
308 | differentiator
309 | diplomat
310 | diplomate
311 | disciple
312 | discoverer
313 | dish
314 | dissenter
315 | distributor
316 | diver
317 | dj
318 | doer
319 | dog
320 | donor
321 | double
322 | doyen
323 | dragon
324 | draw
325 | driver
326 | drummer
327 | dry
328 | duchess
329 | dud
330 | dude
331 | duke
332 | earl
333 | editor
334 | educator
335 | elder
336 | eldest
337 | elector
338 | electrician
339 | embodiment
340 | emcee
341 | emeritus
342 | employee
343 | employer
344 | end
345 | enemy
346 | enthusiast
347 | entrant
348 | entrepreneur
349 | envoy
350 | equal
351 | escapee
352 | evangelist
353 | ex-wife
354 | examiner
355 | executive
356 | executor
357 | exhibitor
358 | expert
359 | exponent
360 | exporter
361 | extra
362 | extremist
363 | eyewitness
364 | face
365 | facilitator
366 | factor
367 | failure
368 | faller
369 | familiar
370 | family
371 | fan
372 | farmer
373 | father
374 | father-in-law
375 | favorite
376 | favourite
377 | fellow
378 | female
379 | fighter
380 | figure
381 | figurehead
382 | filmmaker
383 | finalist
384 | finder
385 | finisher
386 | firefighter
387 | fireman
388 | firstborn
389 | fisherman
390 | fixture
391 | flop
392 | florist
393 | flyer
394 | fodder
395 | follower
396 | fool
397 | foot
398 | forefather
399 | foreigner
400 | foreman
401 | forerunner
402 | forward
403 | founder
404 | fraud
405 | freak
406 | freshman
407 | friend
408 | front
409 | front-runner
410 | fugitive
411 | fundamentalist
412 | fundraiser
413 | gainer
414 | gatekeeper
415 | geek
416 | gem
417 | general
418 | generator
419 | genius
420 | gentleman
421 | ghost
422 | giant
423 | girl
424 | girlfriend
425 | giver
426 | glutton
427 | goalie
428 | goalkeeper
429 | god
430 | godmother
431 | governor
432 | grader
433 | graduate
434 | granddaddy
435 | granddaughter
436 | grandfather
437 | grandmother
438 | grandson
439 | great
440 | grind
441 | groomsman
442 | grower
443 | guarantor
444 | guard
445 | guardian
446 | guest
447 | guide
448 | guitarist
449 | gunman
450 | gunner
451 | guru
452 | guy
453 | half-brother
454 | half-sister
455 | hand
456 | handler
457 | handmaid
458 | handmaiden
459 | hangover
460 | head
461 | headliner
462 | headmaster
463 | healer
464 | heartbreaker
465 | heavy
466 | heel
467 | heir
468 | heiress
469 | help
470 | herald
471 | hero
472 | heroine
473 | hijacker
474 | hire
475 | hitter
476 | holder
477 | holdover
478 | homemaker
479 | homeowner
480 | hope
481 | host
482 | hostage
483 | housewife
484 | hunk
485 | hunter
486 | husband
487 | hypocrite
488 | ideal
489 | ideologist
490 | idiot
491 | idol
492 | image
493 | imam
494 | immigrant
495 | import
496 | importer
497 | incarnation
498 | indexer
499 | individual
500 | inducer
501 | inductee
502 | industrialist
503 | infant
504 | informant
505 | inhabitant
506 | inheritor
507 | initiate
508 | initiator
509 | inmate
510 | innovator
511 | inpatient
512 | insider
513 | inspector
514 | instigator
515 | instructor
516 | instrument
517 | insurgent
518 | intermediary
519 | intern
520 | interpreter
521 | intimate
522 | investigator
523 | investor
524 | issue
525 | jack
526 | janitor
527 | jerk
528 | jewel
529 | jihadist
530 | joker
531 | junior
532 | justice
533 | keeper
534 | keyboardist
535 | kicker
536 | kid
537 | killer
538 | king
539 | kingpin
540 | knight
541 | knower
542 | lad
543 | lady
544 | lamb
545 | landlord
546 | landowner
547 | latecomer
548 | laughingstock
549 | laureate
550 | lawmaker
551 | lawyer
552 | lead
553 | leader
554 | learner
555 | lecturer
556 | lender
557 | lesbian
558 | lessee
559 | lessor
560 | letter
561 | liar
562 | liberal
563 | licensee
564 | lieutenant
565 | life
566 | lifesaver
567 | light
568 | linebacker
569 | lion
570 | lobbyist
571 | locator
572 | loner
573 | longer
574 | lord
575 | loser
576 | love
577 | lover
578 | loyalist
579 | lump
580 | machine
581 | machinist
582 | maid
583 | mainstay
584 | maintainer
585 | major
586 | maker
587 | male
588 | man
589 | manager
590 | manufacturer
591 | marine
592 | mark
593 | marketer
594 | marshal
595 | martyr
596 | mason
597 | master
598 | mastermind
599 | match
600 | mate
601 | mater
602 | material
603 | matriarch
604 | matron
605 | md
606 | mechanic
607 | medalist
608 | mediator
609 | medium
610 | member
611 | mentor
612 | merchant
613 | messenger
614 | messiah
615 | middleman
616 | midwife
617 | militant
618 | millionaire
619 | mind
620 | minister
621 | minor
622 | miss
623 | mistress
624 | mod
625 | model
626 | moderator
627 | modern
628 | mole
629 | mom
630 | monarch
631 | monitor
632 | monk
633 | monster
634 | moron
635 | mother
636 | mouse
637 | mouth
638 | mouthpiece
639 | mover
640 | mp
641 | murderer
642 | muscle
643 | mvp
644 | name
645 | namesake
646 | nanny
647 | narrator
648 | national
649 | nationalist
650 | native
651 | natural
652 | neighbor
653 | neighbour
654 | nephew
655 | nerd
656 | newbie
657 | newcomer
658 | niece
659 | nigger
660 | no-show
661 | nobody
662 | nominee
663 | nonresident
664 | notable
665 | novice
666 | nuisance
667 | nut
668 | observer
669 | occupant
670 | offender
671 | officer
672 | official
673 | offspring
674 | ombudsman
675 | opener
676 | operative
677 | operator
678 | opponent
679 | opposite
680 | opposition
681 | oracle
682 | ordinary
683 | organiser
684 | organist
685 | organizer
686 | originator
687 | outcast
688 | outfielder
689 | outsider
690 | overseer
691 | owner
692 | pain
693 | pallbearer
694 | panelist
695 | paragon
696 | paralegal
697 | paranoid
698 | parasite
699 | paratrooper
700 | parent
701 | pariah
702 | parishioner
703 | parliamentarian
704 | part-owner
705 | participant
706 | partner
707 | party
708 | passenger
709 | passer
710 | pastor
711 | patient
712 | patriarch
713 | patriot
714 | patron
715 | patroness
716 | pawn
717 | payer
718 | paymaster
719 | pediatrician
720 | peer
721 | perfectionist
722 | performer
723 | perpetrator
724 | person
725 | personality
726 | personification
727 | pest
728 | pet
729 | petitioner
730 | pig
731 | pill
732 | pillar
733 | pimp
734 | pioneer
735 | pitcher
736 | pivot
737 | placeholder
738 | plaintiff
739 | planet
740 | planner
741 | plant
742 | pledge
743 | policeman
744 | pop
745 | pope
746 | possessor
747 | postdoc
748 | poster
749 | pow
750 | power
751 | powerhouse
752 | practitioner
753 | prayer
754 | precursor
755 | predator
756 | predecessor
757 | predictor
758 | premier
759 | presenter
760 | prey
761 | priest
762 | priestess
763 | primitive
764 | prince
765 | principal
766 | prior
767 | prisoner
768 | processor
769 | producer
770 | professor
771 | progenitor
772 | progeny
773 | progressive
774 | promoter
775 | proofreader
776 | prophet
777 | proponent
778 | proprietor
779 | prosecutor
780 | prospect
781 | prostitute
782 | protagonist
783 | protector
784 | protege
785 | provider
786 | proxy
787 | psychiatrist
788 | psychotherapist
789 | publisher
790 | punk
791 | pupil
792 | puppet
793 | purchaser
794 | purveyor
795 | qualifier
796 | quarter
797 | quarterback
798 | queen
799 | rabbi
800 | racist
801 | radical
802 | raiser
803 | rapper
804 | rat
805 | reader
806 | rebel
807 | receiver
808 | receptionist
809 | recipient
810 | recruiter
811 | rector
812 | redeemer
813 | referee
814 | referral
815 | refugee
816 | registrant
817 | registrar
818 | regular
819 | regulator
820 | reincarnation
821 | relation
822 | relative
823 | relief
824 | religious
825 | reminder
826 | remover
827 | rep
828 | replacement
829 | reporter
830 | repository
831 | representative
832 | republican
833 | researcher
834 | reservist
835 | resident
836 | respondent
837 | retailer
838 | revenue
839 | reviewer
840 | rider
841 | ringer
842 | ringleader
843 | rip
844 | rival
845 | rn
846 | rock
847 | romantic
848 | rookie
849 | roommate
850 | root
851 | ruler
852 | runner-up
853 | runt
854 | sage
855 | saint
856 | salesman
857 | sampler
858 | satellite
859 | saver
860 | savior
861 | saviour
862 | scanner
863 | scapegoat
864 | scholar
865 | schoolteacher
866 | scion
867 | scorer
868 | scourge
869 | scout
870 | scratch
871 | screw
872 | second
873 | secretary
874 | seed
875 | seeker
876 | self
877 | self-starter
878 | seller
879 | semifinalist
880 | senator
881 | sender
882 | senior
883 | sensation
884 | sensitive
885 | seperatist
886 | sergeant
887 | servant
888 | server
889 | settler
890 | shadow
891 | sham
892 | shareholder
893 | sharper
894 | sheep
895 | shepherd
896 | sheriff
897 | shill
898 | shit
899 | shocker
900 | shoemaker
901 | shooter
902 | shortstop
903 | sibling
904 | signatory
905 | signer
906 | silly
907 | simple
908 | sinner
909 | sire
910 | sister
911 | sister-in-law
912 | skipper
913 | slave
914 | slayer
915 | sleeper
916 | slip
917 | smoker
918 | snake
919 | sneak
920 | sniper
921 | soldier
922 | solicitor
923 | soloist
924 | someone
925 | son
926 | son-in-law
927 | sophisticate
928 | sophomore
929 | sort
930 | soul
931 | source
932 | sovereign
933 | speaker
934 | spearhead
935 | specialist
936 | spectator
937 | speechwriter
938 | spoiler
939 | spokesman
940 | spokesperson
941 | spokeswoman
942 | sponsor
943 | spouse
944 | square
945 | staffer
946 | stakeholder
947 | stalwart
948 | stand-in
949 | standard-bearer
950 | star
951 | starter
952 | stepdaughter
953 | stepfather
954 | stepson
955 | steward
956 | stickler
957 | stiff
958 | stockholder
959 | stranger
960 | strategist
961 | stroke
962 | strongman
963 | stud
964 | student
965 | study
966 | subcontractor
967 | subject
968 | subscriber
969 | subsidiary
970 | success
971 | successor
972 | sufferer
973 | suit
974 | sultan
975 | sun
976 | superintendent
977 | superior
978 | superstar
979 | supervisor
980 | supplier
981 | supporter
982 | suppressor
983 | supremacist
984 | surgeon
985 | surrogate
986 | survivor
987 | suspect
988 | sustainer
989 | sweep
990 | sweetheart
991 | swell
992 | tail
993 | tailor
994 | talent
995 | target
996 | taxpayer
997 | teacher
998 | teammate
999 | teaser
1000 | technician
1001 | technologist
1002 | teen
1003 | teenager
1004 | tenant
1005 | tender
1006 | terror
1007 | terrorist
1008 | tester
1009 | therapist
1010 | thief
1011 | threat
1012 | tiger
1013 | tiller
1014 | timekeeper
1015 | titan
1016 | toast
1017 | tool
1018 | tough
1019 | tourist
1020 | trader
1021 | trailblazer
1022 | trailer
1023 | trainer
1024 | traitor
1025 | transfer
1026 | translator
1027 | treasurer
1028 | trick
1029 | trier
1030 | triggerman
1031 | trooper
1032 | trustee
1033 | tutor
1034 | twin
1035 | type
1036 | uncle
1037 | underdog
1038 | undergrad
1039 | undergraduate
1040 | understudy
1041 | underwriter
1042 | user
1043 | usher
1044 | vagabond
1045 | valedictorian
1046 | vassal
1047 | vendor
1048 | veteran
1049 | vicar
1050 | victim
1051 | victor
1052 | viewer
1053 | villain
1054 | violinist
1055 | virgin
1056 | virtuoso
1057 | visitor
1058 | vocalist
1059 | voice
1060 | volunteer
1061 | voter
1062 | waiter
1063 | waitress
1064 | ward
1065 | warden
1066 | warlord
1067 | warrior
1068 | watch
1069 | watchdog
1070 | webmaster
1071 | whale
1072 | whiz
1073 | wholesaler
1074 | whore
1075 | widow
1076 | widower
1077 | wife
1078 | winemaker
1079 | wing
1080 | winner
1081 | witch
1082 | witness
1083 | wizard
1084 | woman
1085 | worker
1086 | worm
1087 | worshipper
1088 | worthy
1089 | wrestler
1090 | writer
1091 | youngster
1092 | youth
1093 | ------------------------------------
1094 | accordionist
1095 | actor
1096 | actress
1097 | anthropologist
1098 | archaeologist
1099 | architect
1100 | archivist
1101 | assassin
1102 | astronaut
1103 | astronomer
1104 | astrophysicist
1105 | author
1106 | aviator
1107 | biochemist
1108 | biographer
1109 | biologist
1110 | botanist
1111 | boxer
1112 | bullfighter
1113 | caricaturist
1114 | cartographer
1115 | cartoonist
1116 | censor
1117 | chef
1118 | chemist
1119 | choreographer
1120 | climber
1121 | clown
1122 | coleopterist
1123 | comedian
1124 | composer
1125 | conquistadore
1126 | cricketer
1127 | dentist
1128 | dermatologist
1129 | director
1130 | doctor
1131 | economist
1132 | engineer
1133 | entertainer
1134 | entomologist
1135 | explorer
1136 | feminist
1137 | footballer
1138 | geneticist
1139 | geographer
1140 | geologist
1141 | geometer
1142 | geophysicist
1143 | gerontologist
1144 | godfather
1145 | golfer
1146 | guerrilla
1147 | gymnast
1148 | hacker
1149 | herpetologist
1150 | historian
1151 | illustrator
1152 | incumbent
1153 | inventor
1154 | journalist
1155 | judge
1156 | jurist
1157 | kickboxer
1158 | lexicographer
1159 | librarian
1160 | lifeguard
1161 | linguist
1162 | logician
1163 | magician
1164 | mathematician
1165 | meteorologist
1166 | mineralogist
1167 | missionary
1168 | musician
1169 | myrmecologist
1170 | neurochemist
1171 | neurologist
1172 | neuroscientist
1173 | nurse
1174 | oenologist
1175 | ornithologist
1176 | painter
1177 | paleontologist
1178 | pathologist
1179 | pharmacist
1180 | philatelist
1181 | philosopher
1182 | photochemist
1183 | photographer
1184 | photojournalist
1185 | physician
1186 | physicist
1187 | pianist
1188 | pilot
1189 | pirate
1190 | player
1191 | playwright
1192 | poet
1193 | politician
1194 | preacher
1195 | programmer
1196 | psephologist
1197 | psychologist
1198 | rheologist
1199 | runologist
1200 | sailor
1201 | scientist
1202 | screenwriter
1203 | singer
1204 | sociologist
1205 | songwriter
1206 | songwriter
1207 | sportsman
1208 | sportswoman
1209 | sportsperson
1210 | spy
1211 | statistician
1212 | stripper
1213 | swimmer
1214 | televangelist
1215 | theologian
1216 | theorist
1217 | ufologist
1218 | ventriloquist
1219 | veterinarian
1220 | vexillologist
1221 | acrobat
1222 | amateur
1223 | athlete
1224 | ballplayer
1225 | basketeer
1226 | cager
1227 | climber
1228 | cricketer
1229 | footballer
1230 | gymnast
1231 | hooker
1232 | hurdler
1233 | jock
1234 | jumper
1235 | letterman
1236 | lifter
1237 | Olympian
1238 | pentathlete
1239 | pro
1240 | professional
1241 | reserve
1242 | runner
1243 | sharpshooter
1244 | skater
1245 | skier
1246 | sledder
1247 | sport
1248 | sportswoman
1249 | striker
1250 | substitute
1251 | swimmer
1252 | swinger
1253 | vaulter
1254 | weightlifter
1255 | winger
1256 |
--------------------------------------------------------------------------------
/src/main/resources/edu/knowitall/chunkedextractor/nouns_of.txt:
--------------------------------------------------------------------------------
1 | emperor
2 | mayor
3 | president
4 | minister
5 | chancellor
6 | prince
7 | king
8 | queen
9 | governor
10 | premier
11 |
--------------------------------------------------------------------------------
/src/main/resources/edu/knowitall/chunkedextractor/org_words.txt:
--------------------------------------------------------------------------------
1 | Party
2 | Corporation
3 | Ltd.
4 | Board
5 | Council
6 | Inc.
7 | Committee
8 | Inc
9 | Ltd
10 | Group
11 | Limited
12 | Incorporation
13 | Association
14 | Company
15 | Co.
16 | Corp.
17 | Corp
18 | Club
19 | Foundation
20 | Fund
21 | Incorporated
22 | Institute
23 | Society
24 | Syndicate
25 | Union
26 | Authority
27 | Agency
28 | Transit
29 | Group
30 | Corporations
31 | Industries
32 | Industry
33 | Industriale
34 | System
35 | Exchange
36 | Center
37 | Enterprise
38 | Holdings
39 | Holding
40 | Bank
41 | Banks
42 | Post
43 | Productions
44 | Centre
45 | Services
46 | Service
47 | Post
48 | S.A.
49 | C.V.
50 | Ltda.
51 | S.L.
52 | B.V.
53 | Office
54 | Utilities
55 | Nigam
56 | Organisation
57 | Organisations
58 | Commission
59 | Federation
60 | Undertaking
61 | Division
62 | Department
63 | Academy
64 | Bureau
65 | Force
66 | Corps
67 | Administration
68 | Institutions
69 | Cell
70 | Works
71 | Factory
72 | Workshop
73 | Workshops
74 | AB
75 | League
76 | Labs
77 | Lab
78 | Studio
79 | Studios
80 | Cooperation
81 | S.P.A.
82 | ÇİMENTO
83 | Scheme
84 | Express
85 | Establishment
86 | Airlines
87 | Airways
88 | Railways
89 | Air
90 | Research
91 | Programme
92 | Network
93 | Project
94 | Angeles
95 | Indies
96 | Nations
97 | Africa
98 | Lanka
99 | Francisco
100 | Guinea
101 | Carolina
102 | City
103 | Norte
104 | Territory
105 | Peninsula
106 | Region
107 | Island
108 | Islands
109 | Region
110 | Reina
111 | Kudarat
112 | Sul
113 | Croix
114 | Louis
115 | Maarten
116 | Lake
117 | Leone
118 | Sibugay
119 | Eustatius
120 | Scilly
121 | Samoa
122 | Arbor
123 | Barbuda
124 | Pradesh
125 | Bahamas
126 | California
127 | Balears
128 | Bengal
129 | Bañeza
130 | Catarina
131 | Barthélemy
132 | Country
133 | Horizonte
134 | Scotia
135 | Vista
136 | Bender
137 | Herzegovina
138 | Rico
139 | Islander
140 | Aires
141 | Kingdom
142 | Columbia
143 | Coruña
144 | Bronx
145 | Faso
146 | Fasoa
147 | Oro
148 | Verdean
149 | Verde
150 | Tenerife
151 | Town
152 | Janeiro
153 | Carlos
154 | Mancha
155 | Wales
156 | Valley
157 | Rica
158 | Worth
159 | Cracker
160 | Futuna
161 | Fuego
162 | Gallegos
163 | Coast
164 | Rapids
165 | Hague
166 | Kong
167 | Konger
168 | Nevis
169 | Lumpur
170 | Vegas
171 | Province
172 | Hampshire
173 | Lake
174 | Laker
175 | said
176 | met
177 | united
178 | Lankan
179 |
--------------------------------------------------------------------------------
/src/main/resources/edu/knowitall/chunkedextractor/prp_mapping.csv:
--------------------------------------------------------------------------------
1 | my,mine
2 | your,yours
3 | his,him
4 | her,hers
5 | our,ours
6 | their,theirs
7 | My,Mine
8 | Your,Yours
9 | His,Him
10 | Her,Hers
11 | Our,Ours
12 | Their,Theirs
13 |
--------------------------------------------------------------------------------
/src/main/resources/logging.properties:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/knowitall/chunkedextractor/33e63d66d723ddbf1d4234f8500183310454d888/src/main/resources/logging.properties
--------------------------------------------------------------------------------
/src/main/scala/edu/knowitall/chunkedextractor/Expressions.scala:
--------------------------------------------------------------------------------
1 | package edu.knowitall
2 | package chunkedextractor
3 |
4 | import java.util.regex.Pattern
5 | import edu.knowitall.tool.stem.Lemmatized
6 | import edu.knowitall.tool.chunk.ChunkedToken
7 |
8 | object Expressions {
9 | type Token = Lemmatized[ChunkedToken]
10 |
11 | /**
12 | * A regular expression that is evaluated against the string portion of a
13 | * token.
14 | *
15 | * This comparison is case-sensitive.
16 | *
17 | * @author schmmd
18 | */
19 | class CaseSensitiveStringExpression(val pattern: Pattern) extends Function[Token, Boolean] {
20 | def this(string: String, flags: Int) {
21 | this(Pattern.compile(string, flags))
22 | }
23 |
24 | def this(string: String) {
25 | this(string, 0)
26 | }
27 |
28 | override def apply(token: Token): Boolean =
29 | return pattern.matcher(token.token.string).matches()
30 | }
31 |
32 | /**
33 | * A regular expression that is evaluated against the string portion of a
34 | * token.
35 | *
36 | * This comparison is case-insensitive.
37 | *
38 | * @author schmmd
39 | */
40 | class StringExpression(val pattern: Pattern) extends Function[Token, Boolean] {
41 | def this(string: String, flags: Int) {
42 | this(Pattern.compile(string, flags))
43 | }
44 |
45 | def this(string: String) {
46 | this(string, Pattern.CASE_INSENSITIVE)
47 | }
48 |
49 | override def apply(token: Token): Boolean =
50 | pattern.matcher(token.token.string).matches()
51 | }
52 |
53 | /**
54 | * A regular expression that is evaluated against the lemma portion of a
55 | * token.
56 | * @author schmmd
57 | */
58 | class LemmaExpression(val pattern: Pattern) extends Function[Token, Boolean] {
59 | def this(string: String, flags: Int) {
60 | this(Pattern.compile(string, flags))
61 | }
62 |
63 | def this(string: String) {
64 | this(string, Pattern.CASE_INSENSITIVE)
65 | }
66 |
67 | override def apply(token: Token): Boolean =
68 | pattern.matcher(token.lemma).matches()
69 | }
70 |
71 | /**
72 | * A regular expression that is evaluated against the POS tag portion of a
73 | * token.
74 | * @author schmmd
75 | */
76 | class PostagExpression(val pattern: Pattern) extends Function[Token, Boolean] {
77 | def this(string: String, flags: Int) {
78 | this(Pattern.compile(string, flags))
79 | }
80 |
81 | def this(string: String) {
82 | this(string, Pattern.CASE_INSENSITIVE)
83 | }
84 |
85 | override def apply(token: Token): Boolean =
86 | pattern.matcher(token.token.postag).matches()
87 | }
88 |
89 | /**
90 | * A regular expression that is evaluated against the chunk tag portion of a
91 | * token.
92 | * @author schmmd
93 | */
94 | class ChunkExpression(val pattern: Pattern) extends Function[Token, Boolean] {
95 | def this(string: String, flags: Int) {
96 | this(Pattern.compile(string, flags))
97 | }
98 |
99 | def this(string: String) {
100 | this(string, Pattern.CASE_INSENSITIVE)
101 | }
102 |
103 | override def apply(token: Token): Boolean =
104 | pattern.matcher(token.token.chunk).matches()
105 | }
106 | }
107 |
--------------------------------------------------------------------------------
/src/main/scala/edu/knowitall/chunkedextractor/Extraction.scala:
--------------------------------------------------------------------------------
1 | package edu.knowitall
2 | package chunkedextractor
3 |
4 | import edu.knowitall.collection.immutable.Interval
5 | import edu.knowitall.tool.stem.Lemmatized
6 | import edu.knowitall.tool.chunk.ChunkedToken
7 | import edu.knowitall.tool.tokenize.Token
8 |
9 | case class ExtractionPart[+T <% Token](text: String, tokens: Seq[T], tokenInterval: Interval) {
10 | override def toString = text
11 |
12 | def offsetInterval = Interval.open(tokens.head.offsets.start, tokens.last.offsets.end)
13 |
14 | @deprecated("1.0.3", "Use tokenInterval instead.")
15 | def interval = tokenInterval
16 | }
17 |
18 | object ExtractionPart {
19 | def fromSentenceTokens[T <% Token](sentenceTokens: Seq[T], tokenInterval: Interval, text: String) =
20 | new ExtractionPart[T](text, sentenceTokens.view(tokenInterval.start, tokenInterval.end), tokenInterval)
21 |
22 | def fromSentenceTokens[T <% Token](sentenceTokens: Seq[T], tokenInterval: Interval) =
23 | new ExtractionPart(sentenceTokens.view(tokenInterval.start, tokenInterval.end).iterator.map(_.string).mkString(" "), sentenceTokens.view(tokenInterval.start, tokenInterval.end), tokenInterval)
24 | }
25 |
26 | case class BinaryExtraction[+T <% Token](arg1: ExtractionPart[T], rel: ExtractionPart[T], arg2: ExtractionPart[T]) {
27 | override def toString = Iterable(arg1, rel, arg2).mkString("(", "; ", ")")
28 |
29 | def text = Iterable(arg1.text, rel.text, arg2.text).mkString(" ")
30 | def tokenInterval = Interval.span(Iterable(arg1.tokenInterval, rel.tokenInterval, arg2.tokenInterval))
31 | def offsetInterval = Interval.span(Iterable(arg1.offsetInterval, rel.offsetInterval, arg2.offsetInterval))
32 | def tokens = arg1.tokens ++ rel.tokens ++ arg2.tokens
33 |
34 | @deprecated("1.0.3", "Use tokenInterval instead.")
35 | def interval = tokenInterval
36 | }
37 |
38 | class BinaryExtractionInstance[+T <% Token](val extr: BinaryExtraction[T], val sent: Seq[T]) {
39 | override def toString = extr.toString + " <- \"" + sent.map(_.string).mkString(" ") + "\""
40 | }
41 |
--------------------------------------------------------------------------------
/src/main/scala/edu/knowitall/chunkedextractor/Extractor.scala:
--------------------------------------------------------------------------------
1 | package edu.knowitall.chunkedextractor
2 |
3 | abstract class Extractor[A, B] extends Function[A, Iterable[B]] {
4 | def extract(a: A) = this.apply(a)
5 | }
6 |
--------------------------------------------------------------------------------
/src/main/scala/edu/knowitall/chunkedextractor/JavaChunkedExtractor.scala:
--------------------------------------------------------------------------------
1 | package edu.knowitall.chunkedextractor
2 |
3 | import edu.knowitall.tool.chunk.ChunkedToken
4 |
5 | trait JavaChunkedExtractor {
6 | def apply(tokens: Seq[ChunkedToken]): Seq[BinaryExtractionInstance[ChunkedToken]]
7 | def extractWithConfidence(tokens: Seq[ChunkedToken]): Seq[(Double, BinaryExtractionInstance[ChunkedToken])]
8 | }
9 |
--------------------------------------------------------------------------------
/src/main/scala/edu/knowitall/chunkedextractor/Nesty.scala:
--------------------------------------------------------------------------------
1 | package edu.knowitall
2 | package chunkedextractor
3 |
4 | import edu.knowitall.tool.chunk.ChunkedToken
5 | import edu.knowitall.tool.chunk.OpenNlpChunker
6 | import edu.knowitall.collection.immutable.Interval
7 | import edu.knowitall.tool.stem.MorphaStemmer
8 | import edu.knowitall.tool.stem.Lemmatized
9 | import edu.knowitall.openregex
10 |
11 | import scala.collection.JavaConverters._
12 |
13 | class Nesty
14 | extends BinaryPatternExtractor[Nesty.ExtractionInstance](Nesty.pattern) {
15 |
16 | lazy val reverb = new ReVerb
17 |
18 | override def apply(tokens: Seq[PatternExtractor.Token]): Iterable[Nesty.ExtractionInstance] = {
19 | val reverbExtractions = reverb.extract(tokens.map(_.token))
20 | this.apply(tokens, reverbExtractions.map(_.extr))
21 | }
22 |
23 | def apply(tokens: Seq[PatternExtractor.Token], reverbExtractions: Iterable[BinaryExtraction[ChunkedToken]]): Iterable[Nesty.ExtractionInstance] = {
24 | val transformed =
25 | tokens.iterator.zipWithIndex.map { case (t, i) =>
26 | val ext = reverbExtractions.flatMap {
27 | case extr if (extr.arg1.tokenInterval.start == i) =>
28 | Some("B-ARG1")
29 | case extr if (extr.arg1.tokenInterval superset Interval.singleton(i)) =>
30 | Some("I-ARG1")
31 | case extr if (extr.rel.tokenInterval.start == i) =>
32 | Some("B-REL")
33 | case extr if (extr.rel.tokenInterval superset Interval.singleton(i)) =>
34 | Some("I-REL")
35 | case extr if (extr.arg2.tokenInterval.start == i) =>
36 | Some("B-ARG2")
37 | case extr if (extr.arg2.tokenInterval superset Interval.singleton(i)) =>
38 | Some("I-ARG2")
39 | case _ => None
40 | }.mkString(":")
41 |
42 | t.copy(token= ChunkedToken(t.token.chunk + ":" + ext, t.token.postag, t.token.string, t.token.offset))
43 | }.toSeq
44 |
45 | super.apply(transformed)
46 | }
47 |
48 | override def buildExtraction(tokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = {
49 | implicit def patternTokenAsToken2(lemmatized: PatternExtractor.Token): edu.knowitall.tool.tokenize.Token = lemmatized.token
50 | val relation = ExtractionPart.fromSentenceTokens[Nesty.Token](tokens.map(_.token), PatternExtractor.intervalFromGroup(m.group("baseRelation").get))
51 |
52 | val extr = new Nesty.NestedExtraction(
53 | ExtractionPart.fromSentenceTokens[Nesty.Token](tokens.map(_.token), PatternExtractor.intervalFromGroup(m.group("arg1").get)),
54 | ExtractionPart.fromSentenceTokens[Nesty.Token](tokens.map(_.token), PatternExtractor.intervalFromGroup(m.group("nestedRelation").get)),
55 | new BinaryExtraction[Nesty.Token](
56 | ExtractionPart.fromSentenceTokens[Nesty.Token](tokens.map(_.token), PatternExtractor.intervalFromGroup(m.group("baseArg1").get)),
57 | relation,
58 | ExtractionPart.fromSentenceTokens[Nesty.Token](tokens.map(_.token), PatternExtractor.intervalFromGroup(m.group("baseArg2").get))))
59 |
60 | Some(new Nesty.ExtractionInstance(extr, tokens.map(_.token)))
61 | }
62 | }
63 |
64 | object Nesty {
65 | type Token = ChunkedToken
66 |
67 | class ExtractionInstance(override val extr: Nesty.NestedExtraction, sent: Seq[Nesty.Token])
68 | extends BinaryExtractionInstance(extr, sent)
69 |
70 | class NestedExtraction(arg1: ExtractionPart[Token], rel: ExtractionPart[Token], val nested: BinaryExtraction[Nesty.Token])
71 | extends BinaryExtraction(arg1, rel, new ExtractionPart[Token](nested.text, nested.tokens, nested.tokenInterval)) {
72 | }
73 |
74 | val verbs = List("be", "say", "have", "believe",
75 | "tell", "suggest", "argue", "indicate", "claim", "note", "know",
76 | "show", "state", "find", "conclude", "report", "means", "announce",
77 | "think", "warn", "write", "add", "demonstrate", "appear", "reveal",
78 | "agree", "assert", "acknowledge", "hope", "realize", "fear",
79 | "suspect", "mean", "feel", "see", "explain", "confirm", "mention",
80 | "ask", "seem", "observe", "estimate", "admit", "recognize",
81 | "allege", "insist", "require", "discover", "declare", "imply",
82 | "give", "deny", "understand", "express", "recommend", "worry",
83 | "point", "maintain", "contend", "stress", "prove", "demand",
84 | "learn", "hear", "assume", "predict", "inform", "complain",
85 | "reflect", "provide", "hold", "testify", "request", "notice",
86 | "assess", "remind", "wish", "speculate", "raise", "forget",
87 | "decide", "promise", "pray", "expect", "ensure", "challenge",
88 | "recall", "present", "determine", "doubt", "concede", "reply",
89 | "reject", "propose", "make", "include", "emphasize", "concern",
90 | "charge", "take", "remains", "receive", "exist", "assure", "teach",
91 | "rule", "respond", "remember", "reinforce", "happen", "do",
92 | "caution", "underscore", "turn", "threaten", "signal", "repeat",
93 | "release", "relate", "presume", "discuss", "confess", "bear",
94 | "advise", "trust", "reiterate", "disclose", "dictate", "convince",
95 | "consider", "anticipate", "answer", "accept", "will", "swear",
96 | "regret", "order", "issue", "increase", "illustrate", "ignore",
97 | "establish", "continue", "certify", "bemoan", "affirm", "surface",
98 | "support", "seize", "sa", "refute", "prompt", "posit", "offer",
99 | "live", "leave", "lack", "invite", "imagine", "highlight",
100 | "follow", "fail", "examine", "build", "boast", "begin", "urge",
101 | "theorize", "represent", "remain", "reason", "proclaim", "pretend",
102 | "postulate", "perceive", "outline", "mark", "list", "judge",
103 | "guarantee", "grasp", "go", "face", "emerge", "echo", "dispute",
104 | "deserve", "describe", "decree", "control", "contain", "cite",
105 | "cause", "brag", "bolster", "aver", "alert", "wwa", "vow", "voice",
106 | "use", "underline", "thank", "survey", "strengthen", "spread",
107 | "specify", "speak", "signify", "sense", "send", "rebuff", "read",
108 | "reach", "put", "protest", "prescribe", "postpone", "pledge",
109 | "pick", "persuade", "operate", "object", "need", "nag", "muslim",
110 | "mirror", "lose", "lessen", "keep", "get", "generate", "form",
111 | "explore", "eply", "embody", "dismiss", "disagree", "detonate",
112 | "denounce", "come", "claus", "clarify", "circulate", "call",
113 | "brush", "bring", "alarm")
114 |
115 | final val nestedRelationPatternString =
116 | "(?:(?:(?:? ? (?:) ? ?)+" +
117 | "(?:(?: *)*)) |" +
118 | "(?:(?:? ? (?:) ? ?)+" +
119 | "(?:(?: *)* )))"
120 |
121 | // The pattern for a nested relation is defined as...
122 | final val pattern =
123 | // A noun phrase, with optional PP attachment (don't allow because)
124 | "(: * (?: *)* ?)" +
125 | // Followed by a nested relation phrase (with negative lookahead to
126 | // prevent the next noun phrase from "absorbing" part of the nested
127 | // relation)
128 | "(:" + nestedRelationPatternString + "+)" +
129 | // Followed by another noun phrase
130 | // Sometimes OpenNLP classifies "that" as the start of the chunk so we
131 | // allow the chunk to start with I-NP.
132 | "(: *)" +
133 | // Followed by a base relation phrase (again with negative lookahead)
134 | "(: *)" +
135 | // Followed by another noun phrase (possibly starting from inside)
136 | "(: *)"
137 |
138 | def main(args: Array[String]) {
139 | System.out.println("Creating the nested relation extractor... ")
140 | val nesty = new Nesty()
141 |
142 | if (args.length > 0 && (args(0) equals "--pattern")) {
143 | System.out.println(Nesty.pattern)
144 | } else {
145 | System.out.println("Creating the sentence chunker... ")
146 | val chunker = new OpenNlpChunker()
147 | val stemmer = new MorphaStemmer()
148 | System.out.println("Please enter a sentence:")
149 |
150 | try {
151 | for (line <- scala.io.Source.stdin.getLines) {
152 | val chunked = chunker.chunk(line)
153 | val tokens = chunked map stemmer.lemmatizeToken
154 |
155 | for (extraction <- nesty(tokens)) {
156 | println(extraction.extr)
157 | }
158 |
159 | System.out.println()
160 | }
161 | } catch {
162 | case e: Exception =>
163 | e.printStackTrace()
164 | System.exit(2)
165 | }
166 | }
167 | }
168 | }
169 |
--------------------------------------------------------------------------------
/src/main/scala/edu/knowitall/chunkedextractor/PatternExtractor.scala:
--------------------------------------------------------------------------------
1 | package edu.knowitall.chunkedextractor
2 |
3 | import java.util.regex.Pattern
4 |
5 | import scala.collection.JavaConverters.asScalaBufferConverter
6 | import scala.collection.JavaConverters.seqAsJavaListConverter
7 |
8 | import com.google.common.base.{Function => GuavaFunction}
9 |
10 | import edu.knowitall.collection.immutable.Interval
11 | import edu.knowitall.tool.chunk.ChunkedToken
12 | import edu.knowitall.tool.stem.Lemmatized
13 |
14 | import edu.knowitall.openregex
15 | import edu.washington.cs.knowitall.logic.{Expression => LExpression}
16 | import edu.washington.cs.knowitall.logic.LogicExpression
17 | import edu.washington.cs.knowitall.regex.Expression
18 | import edu.washington.cs.knowitall.regex.Match
19 | import edu.washington.cs.knowitall.regex.RegularExpression
20 |
21 | object PatternExtractor {
22 | type Token = Lemmatized[ChunkedToken]
23 | object Token {
24 | implicit def patternTokenAsToken(lemmatized: PatternExtractor.Token): edu.knowitall.tool.tokenize.Token = lemmatized.token
25 | }
26 |
27 | implicit def guavaFromFunction[A, B](f: A => B) = new GuavaFunction[A, B] {
28 | override def apply(a: A) = f(a)
29 | }
30 |
31 | implicit def logicArgFromFunction[T](f: T => Boolean) = new LExpression.Arg[T] {
32 | override def apply(token: T) = f(token)
33 | }
34 |
35 | def compile(pattern: String) =
36 | openregex.Pattern.compile(pattern, (expression: String) => {
37 | val valuePattern = Pattern.compile("([\"'])(.*)\\1")
38 |
39 | val deserializeToken: String => (Token => Boolean) = (argument: String) => {
40 | val Array(base, value) = argument.split("=")
41 |
42 | val matcher = valuePattern.matcher(value)
43 | if (!matcher.matches()) {
44 | throw new IllegalArgumentException("Value not enclosed in quote (\") or ('): " + argument)
45 | }
46 |
47 | val string = matcher.group(2)
48 |
49 | base match {
50 | case "string" => new Expressions.StringExpression(string)
51 | case "lemma" => new Expressions.LemmaExpression(string)
52 | case "pos" => new Expressions.PostagExpression(string)
53 | case "chunk" => new Expressions.ChunkExpression(string)
54 | }
55 | }
56 |
57 | val logic: LogicExpression[Token] =
58 | LogicExpression.compile(expression, deserializeToken andThen logicArgFromFunction[Token])
59 |
60 | (token: Token) => {
61 | logic.apply(token)
62 | }
63 | })
64 |
65 | def intervalFromGroup(group: openregex.Pattern.Group[_]): Interval = {
66 | val interval = group.interval
67 |
68 | if (interval.start == -1 || interval.end == -1) {
69 | Interval.empty
70 | } else {
71 | interval
72 | }
73 | }
74 | }
75 |
76 | abstract class BinaryPatternExtractor[B](val expression: openregex.Pattern[PatternExtractor.Token])
77 | extends Extractor[Seq[PatternExtractor.Token], B] {
78 | def this(pattern: String) = this(PatternExtractor.compile(pattern))
79 |
80 | def apply(tokens: Seq[PatternExtractor.Token]): Iterable[B] = {
81 | val matches = expression.findAll(tokens.toList);
82 |
83 | for (
84 | m <- matches;
85 | extraction = buildExtraction(tokens, m);
86 |
87 | if !filterExtraction(extraction)
88 | ) yield extraction.get
89 | }
90 |
91 | protected def filterExtraction(extraction: Option[B]): Boolean =
92 | extraction match {
93 | case None => true
94 | case _ => false
95 | }
96 |
97 | protected def buildExtraction(tokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]): Option[B]
98 | }
99 |
--------------------------------------------------------------------------------
/src/main/scala/edu/knowitall/chunkedextractor/R2A2.scala:
--------------------------------------------------------------------------------
1 | package edu.knowitall.chunkedextractor
2 |
3 | import edu.knowitall.tool.chunk.ChunkedToken
4 | import edu.knowitall.collection.immutable.Interval
5 |
6 | import edu.washington.cs.knowitall.extractor.ReVerbExtractor
7 | import edu.washington.cs.knowitall.commonlib.Range
8 | import edu.washington.cs.knowitall.nlp.ChunkedSentence
9 | import edu.washington.cs.knowitall.nlp.extraction.ChunkedExtraction
10 | import edu.washington.cs.knowitall.extractor.conf.ConfidenceFunction
11 | import edu.washington.cs.knowitall.util.DefaultObjects
12 | import edu.washington.cs.knowitall.extractor.conf.ReVerbOpenNlpConfFunction
13 | import edu.washington.cs.knowitall.nlp.extraction.ChunkedBinaryExtraction
14 | import edu.washington.cs.knowitall.extractor
15 | import edu.washington.cs.knowitall.argumentidentifier.ConfidenceMetric
16 |
17 | class R2A2(val r2a2: extractor.R2A2, val conf: Option[ConfidenceMetric] = None) extends Extractor[Seq[ChunkedToken], BinaryExtractionInstance[ChunkedToken]] with JavaChunkedExtractor {
18 | def this() = this(new extractor.R2A2, Some(new ConfidenceMetric))
19 |
20 | private def confidence(extr: ChunkedBinaryExtraction): Double =
21 | (conf map (_ getConf extr)).getOrElse {
22 | throw new IllegalArgumentException("No confidence function defined.")
23 | }
24 |
25 | private def reverbExtract(tokens: Seq[ChunkedToken]) = {
26 | import collection.JavaConverters._
27 |
28 | val chunkedSentence = new ChunkedSentence(
29 | tokens.map(token => Range.fromInterval(token.offset, token.offset + token.string.length)).toArray,
30 | tokens.map(_.string).toArray,
31 | tokens.map(_.postag).toArray,
32 | tokens.map(_.chunk).toArray)
33 |
34 | val extrs = r2a2.extract(chunkedSentence)
35 | extrs.asScala
36 | }
37 |
38 | private def convertExtraction(tokens: Seq[ChunkedToken])(extr: ChunkedBinaryExtraction) = {
39 | def convertPart(ce: ChunkedExtraction) = {
40 | val interval = Interval.open(ce.getRange.getStart, ce.getRange.getEnd)
41 | new ExtractionPart(ce.getText, tokens.view(interval.start, interval.end), interval)
42 | }
43 |
44 | new BinaryExtraction(convertPart(extr.getArgument1), convertPart(extr.getRelation), convertPart(extr.getArgument2))
45 | }
46 |
47 | def apply(tokens: Seq[ChunkedToken]): Seq[BinaryExtractionInstance[ChunkedToken]] = {
48 | (reverbExtract(tokens) map convertExtraction(tokens) map (extr => new BinaryExtractionInstance(extr, tokens)))(
49 | scala.collection.breakOut)
50 | }
51 |
52 | @deprecated("Use extractWithConfidence", "2.4.1")
53 | def extractWithConf(tokens: Seq[ChunkedToken]): Seq[(Option[Double], BinaryExtractionInstance[ChunkedToken])] = {
54 | val extrs = reverbExtract(tokens)
55 | val confs = extrs map { extr =>
56 | conf.map(_.getConf(extr))
57 | }
58 |
59 | val converted = extrs map (extr => new BinaryExtractionInstance(convertExtraction(tokens)(extr), tokens))
60 | (confs.iterator zip converted.iterator).toList
61 | }
62 |
63 | def extractWithConfidence(tokens: Seq[ChunkedToken]): Seq[(Double, BinaryExtractionInstance[ChunkedToken])] = {
64 | val extrs = reverbExtract(tokens)
65 | val confs = extrs map this.confidence
66 |
67 | val converted = extrs map (extr => new BinaryExtractionInstance(convertExtraction(tokens)(extr), tokens))
68 | (confs.iterator zip converted.iterator).toList
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/src/main/scala/edu/knowitall/chunkedextractor/ReVerb.scala:
--------------------------------------------------------------------------------
1 | package edu.knowitall.chunkedextractor
2 |
3 | import edu.knowitall.tool.chunk.ChunkedToken
4 | import edu.knowitall.collection.immutable.Interval
5 |
6 | import edu.washington.cs.knowitall.extractor.ReVerbExtractor
7 | import edu.washington.cs.knowitall.nlp.ChunkedSentence
8 | import edu.washington.cs.knowitall.commonlib.Range
9 | import edu.washington.cs.knowitall.nlp.extraction.ChunkedExtraction
10 | import edu.washington.cs.knowitall.extractor.conf.ConfidenceFunction
11 | import edu.washington.cs.knowitall.util.DefaultObjects
12 | import edu.washington.cs.knowitall.extractor.conf.ReVerbOpenNlpConfFunction
13 | import edu.washington.cs.knowitall.nlp.extraction.ChunkedBinaryExtraction
14 |
15 | class ReVerb(val reverb: ReVerbExtractor, val conf: Option[ConfidenceFunction] = None) extends Extractor[Seq[ChunkedToken], BinaryExtractionInstance[ChunkedToken]] with JavaChunkedExtractor {
16 | def this() = this(new ReVerbExtractor, Some(new ReVerbOpenNlpConfFunction))
17 |
18 | private def confidence(extr: ChunkedBinaryExtraction): Double =
19 | (conf map (_ getConf extr)).getOrElse {
20 | throw new IllegalArgumentException("No confidence function defined.")
21 | }
22 |
23 | private def reverbExtract(tokens: Seq[ChunkedToken]) = {
24 | import collection.JavaConverters._
25 |
26 | val chunkedSentence = new ChunkedSentence(
27 | tokens.map(token => Range.fromInterval(token.offset, token.offset + token.string.length)).toArray,
28 | tokens.map(_.string).toArray,
29 | tokens.map(_.postag).toArray,
30 | tokens.map(_.chunk).toArray)
31 |
32 | val extrs = reverb.extract(chunkedSentence)
33 | extrs.asScala
34 | }
35 |
36 | private def convertExtraction(tokens: Seq[ChunkedToken])(extr: ChunkedBinaryExtraction) = {
37 | def convertPart(ce: ChunkedExtraction) = {
38 | val interval = Interval.open(ce.getRange.getStart, ce.getRange.getEnd)
39 | new ExtractionPart(ce.getText, tokens.view(interval.start, interval.end), interval)
40 | }
41 |
42 | new BinaryExtraction(convertPart(extr.getArgument1), convertPart(extr.getRelation), convertPart(extr.getArgument2))
43 | }
44 |
45 | def apply(tokens: Seq[ChunkedToken]): Seq[BinaryExtractionInstance[ChunkedToken]] = {
46 | (reverbExtract(tokens) map convertExtraction(tokens) map (extr => new BinaryExtractionInstance(extr, tokens)))(
47 | scala.collection.breakOut)
48 | }
49 |
50 | def extractWithConfidence(tokens: Seq[ChunkedToken]): Seq[(Double, BinaryExtractionInstance[ChunkedToken])] = {
51 | val extrs = reverbExtract(tokens)
52 | val confs = extrs map this.confidence
53 |
54 | val converted = extrs map (extr => new BinaryExtractionInstance(convertExtraction(tokens)(extr), tokens))
55 | (confs.iterator zip converted.iterator).toList
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/src/main/scala/edu/knowitall/chunkedextractor/Relnoun.scala:
--------------------------------------------------------------------------------
1 | package edu.knowitall
2 | package chunkedextractor
3 |
4 | import resource._
5 |
6 | import edu.knowitall.tool.chunk.ChunkedToken
7 | import edu.knowitall.collection.immutable.Interval
8 | import edu.knowitall.tool.chunk.OpenNlpChunker
9 | import edu.knowitall.tool.stem.MorphaStemmer
10 | import edu.knowitall.tool.stem.Lemmatized
11 | import scala.io.Source
12 | import edu.knowitall.common.Timing
13 | import scala.collection.JavaConverters._
14 | import edu.knowitall.openregex
15 | import edu.washington.cs.knowitall.regex.Match
16 | import edu.washington.cs.knowitall.regex.RegularExpression
17 | import Relnoun._
18 | import java.io.PrintStream
19 | import java.io.PrintWriter
20 | import java.io.File
21 | import java.nio.charset.MalformedInputException
22 |
23 | class Relnoun(val encloseInferredWords: Boolean = true, val includeReverbRelnouns: Boolean = true, val includeUnknownArg2: Boolean = false)
24 | extends Extractor[Seq[PatternExtractor.Token], BinaryExtractionInstance[Relnoun.Token]] {
25 |
26 | val subextractors: Seq[BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]]] = Seq(
27 | new AppositiveExtractor(this.encloseInferredWords, this.includeUnknownArg2),
28 | new AppositiveExtractor2(this.encloseInferredWords, this.includeUnknownArg2),
29 | new AdjectiveDescriptorExtractor(this.encloseInferredWords, this.includeUnknownArg2),
30 | new PossessiveExtractor(this.encloseInferredWords, this.includeUnknownArg2),
31 | new PossessiveAppositiveExtractor(this.encloseInferredWords, this.includeUnknownArg2),
32 | new PossessiveIsExtractor(this.encloseInferredWords, this.includeUnknownArg2),
33 | new IsPossessiveExtractor(this.encloseInferredWords, this.includeUnknownArg2),
34 | new OfIsExtractor(this.encloseInferredWords, this.includeUnknownArg2),
35 | new OfCommaExtractor(this.encloseInferredWords, this.includeUnknownArg2),
36 | new PossessiveReverseExtractor(this.encloseInferredWords, this.includeUnknownArg2),
37 | new ProperNounAdjectiveExtractor(this.encloseInferredWords, this.includeUnknownArg2)) ++
38 | (if (includeReverbRelnouns) Seq(new VerbBasedExtractor(this.encloseInferredWords, this.includeUnknownArg2)) else Seq.empty)
39 |
40 | def apply(tokens: Seq[Lemmatized[ChunkedToken]]): Seq[BinaryExtractionInstance[Relnoun.Token]] = {
41 | val extrs = for (
42 | sub <- subextractors;
43 | extr <- sub(tokens)
44 | ) yield extr
45 |
46 | //removing duplicate [UNKNOWN] extractions
47 | var final_extrs = Seq.empty[BinaryExtractionInstance[Relnoun.Token]]
48 |
49 | for (extr1 <- extrs) {
50 |
51 | if (extr1.extr.arg2.text.equals(UNKNOWN)) {
52 | val arg1_1 = extr1.extr.arg1.text
53 | val rel_1 = extr1.extr.rel.text
54 |
55 | var isDuplicate = false
56 | for (extr2 <- extrs) {
57 | val arg1_2 = extr2.extr.arg1.text
58 | val rel_2 = extr2.extr.rel.text
59 | val arg2_2 = extr2.extr.arg2.text
60 |
61 | if (arg1_1.equals(arg1_2) && !arg2_2.equals(UNKNOWN)) isDuplicate = true
62 | }
63 |
64 | if (!isDuplicate) final_extrs = final_extrs :+ (extr1)
65 | }
66 |
67 | else {
68 | final_extrs = final_extrs :+ (extr1)
69 | }
70 | }
71 |
72 | final_extrs
73 | }
74 | }
75 |
76 | object Relnoun {
77 |
78 | type Token = ChunkedToken
79 |
80 | val demonyms_url = Option(this.getClass.getResource("demonyms.csv")).getOrElse {
81 | throw new IllegalArgumentException("Could not load demonyms.csv")
82 | }
83 |
84 | val demonyms_iter = Source.fromInputStream(demonyms_url.openStream(),"UTF-8").getLines().map(_.split(","))
85 |
86 | val prp_mapping_url = Option(this.getClass.getResource("prp_mapping.csv")).getOrElse {
87 | throw new IllegalArgumentException("Could not load prp_mapping.csv")
88 | }
89 |
90 | val prp_mapping_iter = Source.fromInputStream(prp_mapping_url.openStream(),"UTF-8").getLines().map(_.split(","))
91 |
92 | val nouns_url = Option(this.getClass.getResource("nouns.txt")).getOrElse {
93 | throw new IllegalArgumentException("Could not load nouns.txt")
94 | }
95 |
96 | val ofNouns_url = Option(this.getClass.getResource("nouns_of.txt")).getOrElse {
97 | throw new IllegalArgumentException("Could not load nouns_of.txt")
98 | }
99 |
100 | val orgsWords_url = Option(this.getClass.getResource("org_words.txt")).getOrElse {
101 | throw new IllegalArgumentException("Could not load org_words.txt")
102 | }
103 |
104 | val relnoun_prefixes_url = Option(this.getClass.getResource("relnoun_prefixes.txt")).getOrElse {
105 | throw new IllegalArgumentException("Could not load relnoun_prefixes.txt")
106 | }
107 |
108 | var prp_mapping_map = scala.collection.mutable.Map[String, String]()
109 | while(prp_mapping_iter.hasNext) {
110 | val arr = prp_mapping_iter.next
111 | prp_mapping_map += arr(0) -> arr(1)
112 | }
113 |
114 | var demonyms_map = scala.collection.mutable.Map[String, String]()
115 | while(demonyms_iter.hasNext) {
116 | val arr = demonyms_iter.next
117 | demonyms_map += arr(0) -> arr(1)
118 | demonyms_map += ("South" + " " + arr(0)) -> ("South" + " " + arr(1))
119 | demonyms_map += ("North" + " " + arr(0)) -> ("North" + " " + arr(1))
120 | demonyms_map += ("East" + " " + arr(0)) -> ("East" + " " + arr(1))
121 | demonyms_map += ("West" + " " + arr(0)) -> ("West" + " " + arr(1))
122 | demonyms_map += ("Southern" + " " + arr(0)) -> ("Southern" + " " + arr(1))
123 | demonyms_map += ("Northern" + " " + arr(0)) -> ("Northern" + " " + arr(1))
124 | demonyms_map += ("Eastern" + " " + arr(0)) -> ("Eastern" + " " + arr(1))
125 | demonyms_map += ("Western" + " " + arr(0)) -> ("Western" + " " + arr(1))
126 | demonyms_map += ("Central" + " " + arr(0)) -> ("Central" + " " + arr(1))
127 | }
128 |
129 | val (demonyms_key, demonyms_val) = demonyms_map.toSeq.unzip
130 | val locations = (demonyms_key ++ demonyms_val)
131 |
132 | val nounChunk = "(?: *)"
133 | val properNounChunk = "(?: *) | (?: * *)";
134 | val properRelnounChunk = "(: +) | (: +)"
135 |
136 | val pronoun = ""
137 | val pronoun_possessive = ""
138 |
139 | val relnoun = "(string='${relnoun}' | string='${ofNoun}')";
140 | val relnoun_prefix = "string=\"${relnoun_prefixes}\""
141 | val relnoun_prefix_noPrefixCheck = "!(string=\"${demonyms}\")"
142 |
143 | val relnoun_prefix_pos = " & pos=\"JJS?|VBDS?|VBNS?|NNS?|NNPS?|RBS?\" & !(string=\"${orgwords}\")"
144 |
145 | val relnoun_prefix_tagged = "<" + relnoun_prefix + relnoun_prefix_pos + ">*"
146 | val relnoun_prefix_tagged_noPrefixCheck = "<" + relnoun_prefix_noPrefixCheck + relnoun_prefix_pos + ">*"
147 |
148 |
149 | val input_nouns = Source.fromInputStream(nouns_url.openStream(),"UTF-8").getLines().map(_.trim()).toArray
150 | val ex_nouns = input_nouns.map { x => "ex-"+x }
151 | val nouns = input_nouns ++ ex_nouns
152 |
153 | private final val orgs = Source.fromInputStream(orgsWords_url.openStream(),"UTF-8").getLines().map(_.trim()).toArray
154 | private final val ofNouns = Source.fromInputStream(ofNouns_url.openStream(),"UTF-8").getLines().map(_.trim()).toArray
155 | private final val adjs = Source.fromInputStream(relnoun_prefixes_url.openStream(),"UTF-8").getLines().map(_.trim()).toArray
156 |
157 | val UNKNOWN = "[UNKNOWN]"
158 | val arg1_notAllowed = List("Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday",
159 | "January","February","March","April","May","June","July","August","September","October","November","December")
160 |
161 | abstract class BaseExtractor {
162 | val pattern: String
163 | }
164 |
165 | def patternReplace(extractor: BaseExtractor) =
166 | extractor.pattern
167 | .replace("${relnoun}", nouns.mkString("|"))
168 | .replace("${ofNoun}", ofNouns.mkString("|"))
169 | .replace("${orgwords}", orgs.mkString("|"))
170 | .replace("${relnoun_prefixes}", adjs.mkString("|"))
171 | .replace("${demonyms}", locations.mkString("|"))
172 |
173 | protected def finalizeExtraction[B](m: openregex.Pattern.Match[PatternExtractor.Token], encloseInferredWords: Boolean, patternTokens: Seq[PatternExtractor.Token],
174 | arg1: ExtractionPart[ChunkedToken], relation: ExtractionPart[ChunkedToken], arg2: ExtractionPart[ChunkedToken],
175 | includeUnknownArg2:Boolean, includeIs:Boolean, includePost: Boolean): Option[BinaryExtractionInstance[Relnoun.Token]] = {
176 |
177 | val tokens = patternTokens.map(_.token)
178 |
179 | var isValidExtraction = true
180 | var arg2_modified = arg2
181 |
182 | //replacing prp
183 | val prpMappingVal = Relnoun.prp_mapping_map.get(arg2.text)
184 | arg2_modified = prpMappingVal match {
185 | case Some(s) => ExtractionPart.fromSentenceTokens(tokens, arg2_modified.tokenInterval, s)
186 | case None => arg2_modified
187 | }
188 |
189 | //Setting arg2 as [UNKNOWN] if not present or if "its"(possible came by AdjectiveDescriptorExtractor(prp))
190 | if((arg2.text == "" || arg2.text == "its") && includeUnknownArg2) arg2_modified = ExtractionPart.fromSentenceTokens(tokens, relation.tokenInterval, UNKNOWN)
191 | if(arg1.text=="it" || arg2_modified.text == "its") isValidExtraction = false
192 |
193 | //replacing demonyms
194 |
195 | val demonymVal = Relnoun.demonyms_map.get(arg2.text)
196 | arg2_modified = demonymVal match {
197 | case Some(s) => ExtractionPart.fromSentenceTokens(tokens, arg2_modified.tokenInterval, s)
198 | case None => arg2_modified
199 | }
200 |
201 | if(arg2_modified.text == "") isValidExtraction = false
202 |
203 | //remove extractions with arg1 as Sunday,Monday..,January, February...
204 | if(arg1_notAllowed.contains(arg1.text)) isValidExtraction = false
205 | if(arg1_notAllowed.contains(arg2_modified.text)) isValidExtraction = false
206 |
207 | if(!isValidExtraction) None
208 |
209 | else {
210 | val inferredIs = if (encloseInferredWords) "[is]" else "is"
211 |
212 | var rel_text = relation.text
213 | if(includeIs) rel_text = inferredIs + " " + rel_text
214 | if(includePost) rel_text = rel_text + " " + inferred_post(m, encloseInferredWords, arg2_modified.text)
215 | val relation_modified = ExtractionPart.fromSentenceTokens(tokens, relation.tokenInterval, rel_text)
216 |
217 | val extr = new BinaryExtraction(arg1, relation_modified, arg2_modified)
218 | Some(new BinaryExtractionInstance[Relnoun.Token](extr, tokens))
219 | }
220 | }
221 |
222 |
223 | def inferred_post(m: openregex.Pattern.Match[PatternExtractor.Token], encloseInferredWords: Boolean, arg2_text: String): String = {
224 | val inferredOf = if (encloseInferredWords) "[of]" else "of"
225 | val inferredFrom = if (encloseInferredWords) "[from]" else "from"
226 |
227 | if(!locations.contains(arg2_text)) inferredOf //if arg2 is not a demonym, use inferredOf
228 | else {
229 | m.group("relnoun") match {
230 | case None => inferredOf
231 | case _ => inferredFrom
232 | }
233 | }
234 | }
235 |
236 | /**
237 | * Extracts relations from phrases such as:
238 | * "Barack Obama is the president of the United States."
239 | * (Barack Obama, is the president of, the United States)
240 | *
241 | * @author schmmd
242 | */
243 | class VerbBasedExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean)
244 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]](
245 | patternReplace(VerbBasedExtractor)) {
246 |
247 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = {
248 | val tokens = patternTokens.map(_.token)
249 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(2)), m.groups(2).tokens.map(_.token.string).mkString(" "))
250 |
251 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1)))
252 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(3)))
253 |
254 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, false, false)
255 | }
256 | }
257 |
258 | object VerbBasedExtractor extends BaseExtractor {
259 | val pattern =
260 | // {nouns} (no preposition)
261 | "(" + nounChunk + ")" +
262 | // {be} {adverb} {adjective} {relnoun} {prep}
263 | "( ? <" + relnoun + "> )" +
264 | // {proper np chunk}
265 | "(" + nounChunk + ")";
266 | }
267 |
268 | /**
269 | * *
270 | * Extracts relations from phrases such as:
271 | * "Chris Curran, a lawyer for Al-Rajhi Banking."
272 | * (Chris Curran, [is] a lawyer for, Al-Rajhi Banking)
273 | *
274 | * @author schmmd
275 | *
276 | */
277 | class AppositiveExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean)
278 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]](
279 | patternReplace(AppositiveExtractor)) {
280 |
281 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = {
282 | val tokens = patternTokens.map(_.token)
283 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(2)), m.groups(2).tokens.map(_.token.string).mkString(" "))
284 |
285 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1)))
286 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(3)))
287 |
288 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, true, false)
289 | }
290 | }
291 |
292 | object AppositiveExtractor extends BaseExtractor {
293 | val pattern: String =
294 | // {proper noun}
295 | "(" + properNounChunk + "|" + pronoun + ")" +
296 | // {comma}
297 | "" +
298 | // {article}
299 | "(*" +
300 | // {adjective or noun}
301 | "*" +
302 | // {relnoun} {preposition}
303 | "<" + relnoun + "& pos=\"NN|NNP\"> )" +
304 | "( *)"
305 | }
306 |
307 | /***
308 | * Extracts relations from phrases such as:
309 | * "Lauren Faust, a cartoonist,"
310 | * (Lauren Faust; [is]; a cartoonist)
311 | */
312 | class AppositiveExtractor2(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean)
313 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]](
314 | patternReplace(AppositiveExtractor2)) {
315 |
316 | private val inferredIs = if (this.encloseInferredWords) "[is]" else "is"
317 |
318 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = {
319 | val tokens = patternTokens.map(_.token)
320 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1)), this.inferredIs)
321 |
322 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1)))
323 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(2)))
324 |
325 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, false, false)
326 | }
327 | }
328 |
329 | object AppositiveExtractor2 extends BaseExtractor {
330 | val pattern: String =
331 | // {proper noun}
332 | "(" + properNounChunk + "|" + pronoun + ")" +
333 | // {comma}
334 | "" +
335 | // adverb
336 | "?" +
337 | // {article}
338 | "(*" +
339 | // {adjective or noun}
340 | "*" +
341 | // {relnoun} {preposition}
342 | relnoun_prefix_tagged_noPrefixCheck + " <" + relnoun + "& pos=\"NN|NNP\">)" +
343 | ""
344 | }
345 |
346 |
347 | /***
348 | * Extracts relations from phrases such as:
349 | * "United States President Barack Obama"
350 | * (Barack Obama; [is] President [of]; United States)
351 | *
352 | * "Indian player Sachin Tendulkar"
353 | * (Sachin Tendulkar; [is] player [from]; India)
354 | *
355 | * @author schmmd
356 | *
357 | */
358 | class AdjectiveDescriptorExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean)
359 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]](
360 | patternReplace(AdjectiveDescriptorExtractor)) {
361 |
362 | private val inferredIs = if (this.encloseInferredWords) "[is]" else "is"
363 |
364 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = {
365 | val tokens = patternTokens.map(_.token)
366 |
367 | val adjectiveGroup = m.group("adj").get match {
368 | case g if g.text.isEmpty => None
369 | case g => Some(g)
370 | }
371 |
372 | val adjective = adjectiveGroup map { adj =>
373 | adj.tokens.map(_.token.string).mkString(" ")
374 | }
375 |
376 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.group("pred").get), inferredIs + adjective.map(" " + _ + " ").getOrElse(" ") +
377 | m.group("pred").get.tokens.map(_.token.string).mkString(" "))
378 |
379 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.group("arg1").get))
380 | var arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.group("arg2").get))
381 |
382 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, false, true)
383 | }
384 | }
385 |
386 | //arg1: shall contain atleast one nnp that is not a {orgword}
387 | //arg2: allow “relnoun_prefixes” followed by NNP ("Foreign Ministry spokesman Qin Gang.")
388 | //arg2: allow pos=JJ only if the word is in the list of demonyms ("outgoing Chairperson Bonnie Peng.")
389 | object AdjectiveDescriptorExtractor extends BaseExtractor {
390 | val pattern =
391 | // {adjective}
392 | "(: *)" +
393 | "(((: (* " + "+) | (" + pronoun_possessive + ")? )" +
394 | "(: " + relnoun_prefix_tagged + properRelnounChunk + "))" + "|" +
395 | "((: (+) )" +
396 | "(: " + relnoun_prefix_tagged_noPrefixCheck + properRelnounChunk + ")))" +
397 | "?" + // {comma}
398 | "(: * + *)";
399 | }
400 |
401 | /**
402 | * *
403 | * Extracts relations from phrases such as:
404 | * "Hakani's nephew John"
405 | * (John, [is] nephew [of], Hakani)
406 | *
407 | * "India's player Tendulkar"
408 | * (Tendulkar; [is] player [from]; India)
409 | *
410 | * @author schmmd
411 | *
412 | */
413 | class PossessiveExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean)
414 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]](
415 | patternReplace(PossessiveExtractor)) {
416 |
417 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = {
418 | val tokens = patternTokens.map(_.token)
419 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(2)), m.groups(2).tokens.map(_.token.string).mkString(" "))
420 |
421 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.group("arg1").get))
422 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1)))
423 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, true, true)
424 | }
425 | }
426 |
427 | object PossessiveExtractor extends BaseExtractor {
428 | val pattern =
429 | // {proper noun} (no preposition)
430 | "(* + *)" +
431 | // {possessive}
432 | "" +
433 | // {adverb} {adjective} {relnoun}
434 | "(*" + relnoun_prefix_tagged_noPrefixCheck + properRelnounChunk + ")" +
435 | // {proper noun} (no preposition)
436 | "(: * + *)";
437 | }
438 |
439 | /**
440 | * Extracts relations from phrases such as:
441 | * "AUC's leader, Carlos Castano"
442 | * (Carlos Castano, [is] leader [of], AUC)
443 | *
444 | * "India's player, Tendulkar"
445 | * (Tendulkar; [is] player [from]; India)
446 | *
447 | * @author schmmd
448 | *
449 | */
450 | class PossessiveAppositiveExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean)
451 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]](
452 | patternReplace(PossessiveAppositiveExtractor)) {
453 |
454 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = {
455 | val tokens = patternTokens.map(_.token)
456 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(2)), m.groups(2).tokens.map(_.token.string).mkString(" "))
457 |
458 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.group("arg1").get))
459 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1)))
460 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, true, true)
461 | }
462 | }
463 |
464 | object PossessiveAppositiveExtractor extends BaseExtractor {
465 | val pattern: String =
466 | // {nouns} (no preposition)
467 | "(+)" +
468 | // {possessive}
469 | "" +
470 | // {adverb} {adjective} {relnoun}
471 | "(* *" + relnoun_prefix_tagged_noPrefixCheck + properRelnounChunk + ")" +
472 | // {comma}
473 | "" +
474 | // {proper np chunk}
475 | "(:" + properNounChunk + ")";
476 | }
477 |
478 | /**
479 | * Extracts relations from phrases such as:
480 | * "AUC's leader is Carlos Castano"
481 | * (Carlos Castano, is leader [of], AUC)
482 | *
483 | * "India's Player is Sachin."
484 | * (Sachin; is Player [from]; India)
485 | *
486 | * @author schmmd
487 | */
488 | class PossessiveIsExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean)
489 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]](
490 | patternReplace(PossessiveIsExtractor)) {
491 |
492 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = {
493 | val tokens = patternTokens.map(_.token)
494 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(2)), m.group("lemma_be").get.tokens.map(_.token.string).mkString(" ") + " " + m.groups(2).tokens.map(_.token.string).mkString(" "))
495 |
496 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.group("arg1").get))
497 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1)))
498 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, false, true)
499 | }
500 | }
501 |
502 | object PossessiveIsExtractor extends BaseExtractor {
503 | val pattern =
504 | // {nouns} (no preposition)
505 | "(? * * +)" +
506 | // {possessive}
507 | "" +
508 | // {adverb} {adjective} {relnoun}
509 | "(* *" + relnoun_prefix_tagged_noPrefixCheck + properRelnounChunk + ")" +
510 | // be
511 | "(: )" +
512 | // {proper np chunk}
513 | "(:" + properNounChunk + ")";
514 | }
515 |
516 | /**
517 | * Extracts relations from phrases such as:
518 | * "Barack Obama is America's President"
519 | * (Barack Obama; is President [of]; America)
520 | *
521 | * "Tendulkar is India's player."
522 | * (Tendulkar; is player [from]; India)
523 | *
524 | * @author schmmd
525 | */
526 | class IsPossessiveExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean)
527 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]](
528 | patternReplace(IsPossessiveExtractor)) {
529 |
530 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = {
531 | val tokens = patternTokens.map(_.token)
532 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(4)), m.groups(2).tokens.map(_.token.string).mkString(" ") + " " + m.groups(4).tokens.map(_.token.string).mkString(" "))
533 |
534 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1)))
535 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(3)))
536 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, false, true)
537 | }
538 | }
539 |
540 | object IsPossessiveExtractor extends BaseExtractor {
541 | val pattern =
542 | // {nouns} (no preposition)
543 | "(" + properNounChunk + "|" + pronoun + ")" +
544 | "()" +
545 | "(+)" +
546 | "" +
547 | "(* *" + relnoun_prefix_tagged_noPrefixCheck + properRelnounChunk + ")";
548 | }
549 |
550 | /**
551 | * Extracts relations from phrases such as:
552 | * "the father of Michael is John"
553 | * (John; is the father of; Michael)
554 | * @author schmmd
555 | */
556 | class OfIsExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean)
557 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]](
558 | patternReplace(OfIsExtractor)) {
559 |
560 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = {
561 | val tokens = patternTokens.map(_.token)
562 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1)), m.groups(3).tokens.map(_.token.string).mkString(" ") + " " + m.groups(1).tokens.map(_.token.string).mkString(" "))
563 |
564 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.group("arg1").get))
565 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(2)))
566 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, false, false)
567 | }
568 | }
569 |
570 | object OfIsExtractor extends BaseExtractor {
571 | val pattern =
572 | "( * <" + relnoun + "& pos='NN|NNP' & chunk='I-NP'> " +
573 | ") " +
574 | "( * ? ? *) " +
575 | "() " +
576 | "(: ( *) |" + pronoun + ")";
577 | }
578 |
579 | /**
580 | * Extracts relations from phrases such as:
581 | * "the father of Michael, John,"
582 | * (John; [is] the father of; Michael)
583 | *
584 | * @author harinder
585 | */
586 | class OfCommaExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean)
587 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]](
588 | patternReplace(OfCommaExtractor)) {
589 |
590 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = {
591 | val tokens = patternTokens.map(_.token)
592 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1)), m.groups(1).tokens.map(_.token.string).mkString(" "))
593 |
594 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(4)))
595 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(2)))
596 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, true, false)
597 | }
598 | }
599 |
600 | object OfCommaExtractor extends BaseExtractor {
601 | val pattern =
602 | "( * <" + relnoun + "& pos='NN|NNP' & chunk='I-NP'> " +
603 | ") " +
604 | "( * ? ? *) " +
605 | "() " +
606 | //{proper np chunk}
607 | "(" + properNounChunk + "|" + pronoun + ")" +
608 | "() " ;
609 | }
610 |
611 |
612 | /**
613 | * Extracts relations from phrases such as:
614 | * "Mohammed Jamal, bin Laden's brother"
615 | * (Mohammed Jamal, [is] brother [of], bin Laden)
616 | *
617 | * "Tendulkar, India's player"
618 | * (Tendulkar; [is] player [from]; India)
619 | *
620 | * @author schmmd
621 | */
622 | class PossessiveReverseExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean)
623 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]](
624 | patternReplace(PossessiveReverseExtractor)) {
625 |
626 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = {
627 | val tokens = patternTokens.map(_.token)
628 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(3)), m.groups(3).tokens.map(_.token.string).mkString(" "));
629 |
630 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1)))
631 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(2)))
632 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, true, true)
633 | }
634 | }
635 |
636 | object PossessiveReverseExtractor extends BaseExtractor {
637 | val pattern =
638 | // {proper noun} (no preposition)
639 | "(" + properNounChunk + "|" + pronoun + ")" +
640 | // comma
641 | "" +
642 | // {np chunk}
643 | "( *)" +
644 | // {possessive}
645 | "" +
646 | "(* *" + relnoun_prefix_tagged_noPrefixCheck + properRelnounChunk + ")" +
647 | "(?:|$)";
648 | }
649 |
650 | /**
651 | * Extracts relations from phrases such as:
652 | * "Obama, the US president."
653 | * (Obama, [is] president [of], United States)
654 | *
655 | * "Tendulkar, the Indian player."
656 | * (Tendulkar; [is] the player [from]; India)
657 | *
658 | * @author schmmd
659 | */
660 | class ProperNounAdjectiveExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean)
661 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]](
662 | patternReplace(ProperNounAdjectiveExtractor)) {
663 |
664 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = {
665 | val tokens = patternTokens.map(_.token)
666 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.group("pred").get),
667 | (m.groups(2).tokens.map(_.token.string) ++ m.group("pred").get.tokens.map(_.token.string)).mkString(" "))
668 |
669 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.group("arg1").get))
670 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.group("arg2").get))
671 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, true, true)
672 | }
673 | }
674 |
675 | object ProperNounAdjectiveExtractor extends BaseExtractor {
676 | val pattern =
677 | "(: " + properNounChunk + "|" + pronoun + ")" +
678 | "" +
679 | "( *)" +
680 | "(((: (* " + "+) )" +
681 | "(: " + relnoun_prefix_tagged + properRelnounChunk + "))" + "|" +
682 | "((: (+) )" +
683 | "(: " + relnoun_prefix_tagged_noPrefixCheck + properRelnounChunk + ")))"
684 | }
685 |
686 |
687 | /***
688 | * A class that represents the command line configuration
689 | * of the application.
690 | *
691 | * @param inputFile The file to use as input
692 | * @param outputFile The file to use as output
693 | */
694 | case class Config(inputFile: Option[File] = None,
695 | outputFile: Option[File] = None,
696 | encoding: String = "UTF-8",
697 | printPatterns: Boolean = false) {
698 |
699 | /***
700 | * Create the input source from a file or stdin.
701 | */
702 | def source() = {
703 | inputFile match {
704 | case Some(file) => Source.fromFile(file, encoding)
705 | case None => Source.fromInputStream(System.in, encoding)
706 | }
707 | }
708 |
709 | /***
710 | * Create a writer to a file or stdout.
711 | */
712 | def writer() = {
713 | outputFile match {
714 | case Some(file) => new PrintWriter(file, encoding)
715 | case None => new PrintWriter(new PrintStream(System.out, true, encoding))
716 | }
717 | }
718 | }
719 |
720 |
721 |
722 | def main(args: Array[String]) {
723 | // definition for command-line argument parser
724 | val argumentParser = new scopt.immutable.OptionParser[Config]("openie") {
725 | def options = Seq(
726 | argOpt("input-file", "input file") { (string, config) =>
727 | val file = new File(string)
728 | require(file.exists, "input file does not exist: " + file)
729 | config.copy(inputFile = Some(file))
730 | },
731 | argOpt("ouput-file", "output file") { (string, config) =>
732 | val file = new File(string)
733 | config.copy(outputFile = Some(file))
734 | },
735 | opt("encoding", "Character encoding") { (string, config) =>
736 | config.copy(encoding = string)
737 | },
738 | flag("p", "pattern", "Prints the patterns") { config =>
739 | config.copy(printPatterns = true)
740 | })
741 | }
742 |
743 | argumentParser.parse(args, Config()) match {
744 | case Some(config) =>
745 | try {
746 | run(config)
747 | }
748 | catch {
749 | case e: MalformedInputException =>
750 | System.err.println(
751 | "\nError: a MalformedInputException was thrown.\n" +
752 | "This usually means there is a mismatch between what is expected and the input file.\n" +
753 | "Try changing the input file's character encoding to UTF-8 or specifying the correct character encoding for the input file with '--encoding'.\n")
754 | e.printStackTrace()
755 | }
756 | case None => // usage will be shown
757 | }
758 | }
759 |
760 | def run(config: Config) {
761 | System.out.println("Creating the relational noun extractor... ")
762 | val relnoun = new Relnoun(true, true, true)
763 | val conf = confidence.RelnounConfidenceFunction.loadDefaultClassifier()
764 |
765 | config.inputFile.foreach { file =>
766 | System.err.println("Processing file: " + file)
767 | }
768 |
769 | if(config.printPatterns) {
770 | for (extractor <- relnoun.subextractors) {
771 | System.out.println(extractor.expression);
772 | }
773 | } else {
774 |
775 | System.err.println("Creating the sentence chunker... ")
776 | val chunker = new OpenNlpChunker()
777 | val stemmer = new MorphaStemmer()
778 |
779 | Timing.timeThen {
780 |
781 | for {
782 | source <- managed(config.source())
783 | writer <- managed(config.writer())
784 | } {
785 | try {
786 | for (line <- source.getLines) {
787 | val chunked = chunker.chunk(line);
788 | val tokens = chunked map stemmer.lemmatizeToken
789 |
790 | writer.println(line)
791 | for (inst <- relnoun(tokens)) {
792 | writer.println(("%1.2f" format conf(inst)) + ": " + inst.extr);
793 | }
794 |
795 | writer.println();
796 | writer.flush();
797 | }
798 | } catch {
799 | case e: Exception =>
800 | e.printStackTrace()
801 | System.exit(2)
802 | }
803 | }
804 | }{ ns =>
805 | System.err.println("extraction completed in: " + Timing.Seconds.format(ns))
806 | }
807 |
808 | config.outputFile.foreach { file =>
809 | System.err.println("Output written to file: " + file)
810 | }
811 | }
812 | }
813 | }
814 |
--------------------------------------------------------------------------------
/src/main/scala/edu/knowitall/chunkedextractor/confidence/ChunkedExtractorConfidenceFunction.scala:
--------------------------------------------------------------------------------
1 | package edu.knowitall.chunkedextractor.confidence
2 |
3 | import org.slf4j.LoggerFactory
4 | import edu.knowitall.tool.conf.impl.LogisticRegression
5 | import edu.knowitall.tool.conf.FeatureSet
6 | import java.net.URL
7 | import edu.knowitall.chunkedextractor.BinaryExtractionInstance
8 | import edu.knowitall.tool.chunk.ChunkedToken
9 |
10 | object ChunkedExtractorConfidenceFunction {
11 | val logger = LoggerFactory.getLogger(this.getClass)
12 |
13 | def fromUrl(featureSet: FeatureSet[BinaryExtractionInstance[ChunkedToken], Double], url: URL) = {
14 | LogisticRegression.fromUrl(featureSet, url)
15 | }
16 | }
17 |
18 | object RelnounConfidenceFunction {
19 | val defaultModelUrl = Option(this.getClass.getResource("relnoun-confidence.txt")).getOrElse {
20 | throw new IllegalArgumentException("Could not load confidence function resource.")
21 | }
22 |
23 | def loadDefaultClassifier(): LogisticRegression[BinaryExtractionInstance[ChunkedToken]] = {
24 | ChunkedExtractorConfidenceFunction.fromUrl(ChunkedExtractorFeatureSet, defaultModelUrl)
25 | }
26 | }
--------------------------------------------------------------------------------
/src/main/scala/edu/knowitall/chunkedextractor/confidence/ChunkedExtractorFeatureSet.scala:
--------------------------------------------------------------------------------
1 | package edu.knowitall.chunkedextractor.confidence
2 |
3 | import edu.knowitall.tool.conf.FeatureSet
4 | import edu.knowitall.tool.conf.Feature
5 | import scala.collection.immutable.SortedMap
6 | import edu.knowitall.tool.srl.FrameHierarchy
7 | import java.util.regex.Pattern
8 | import java.util.regex.Pattern
9 | import edu.knowitall.chunkedextractor.BinaryExtractionInstance
10 | import edu.knowitall.tool.chunk.ChunkedToken
11 |
12 | object ChunkedExtractorFeatureSet extends FeatureSet[BinaryExtractionInstance[ChunkedToken], Double](ChunkedExtractorFeatures.featureMap)
13 |
14 | /** Features defined for OllieExtractionInstances */
15 | object ChunkedExtractorFeatures {
16 | type ChunkedExtractorFeature = Feature[BinaryExtractionInstance[ChunkedToken], Double]
17 |
18 | implicit def boolToDouble(bool: Boolean) = if (bool) 1.0 else 0.0
19 |
20 | object startExtr extends ChunkedExtractorFeature("sent starts w/ extr") {
21 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = {
22 | inst.extr.arg1.tokenInterval.start == 0 ||
23 | inst.extr.arg2.tokenInterval.start == 0
24 | }
25 | }
26 |
27 | object endArg2 extends ChunkedExtractorFeature("sent ends w/ extr") {
28 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = {
29 | inst.extr.arg1.tokenInterval.end == inst.sent.size ||
30 | inst.extr.arg2.tokenInterval.end == inst.sent.size
31 | }
32 | }
33 |
34 | object pronounBeforeRel extends ChunkedExtractorFeature("which|who|that before rel") {
35 | val targets = Set("which", "who", "that")
36 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = {
37 | val res = inst.sent.take(inst.extr.rel.tokenInterval.start).lastOption.map { prev =>
38 | targets contains prev.string
39 | }.getOrElse(false)
40 |
41 | res
42 | }
43 | }
44 |
45 | object arg1Proper extends ChunkedExtractorFeature("arg1 is proper") {
46 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = {
47 | inst.extr.arg1.tokens.exists(_.isProperNoun)
48 | }
49 | }
50 |
51 | object arg2Proper extends ChunkedExtractorFeature("arg2 is proper") {
52 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = {
53 | inst.extr.arg2.tokens.exists(_.isProperNoun)
54 | }
55 | }
56 |
57 | object extrCoversSentence extends ChunkedExtractorFeature("extr covers sent") {
58 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = {
59 | inst.extr.tokenInterval.start == 0 && inst.extr.tokenInterval.end == inst.sent.size
60 | }
61 | }
62 |
63 | object npBeforeExtr extends ChunkedExtractorFeature("np before extr") {
64 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = {
65 | inst.sent.take(inst.extr.tokenInterval.start).exists(_.chunk == "B-NP")
66 | }
67 | }
68 |
69 | object npAfterExtr extends ChunkedExtractorFeature("np after extr") {
70 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = {
71 | val next = inst.sent.drop(inst.extr.rel.tokenInterval.end).headOption
72 | val res = next.map(next => next.chunk == "B-NP" || next.chunk == "I-NP").getOrElse(false)
73 | res
74 | }
75 | }
76 |
77 | object conjBeforeRel extends ChunkedExtractorFeature("conj before rel") {
78 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = {
79 | val res = inst.sent.take(inst.extr.rel.tokenInterval.start).lastOption.map(_.chunk == "CC").getOrElse(false)
80 | res
81 | }
82 | }
83 |
84 | object prepBeforeExtr extends ChunkedExtractorFeature("prep before extr") {
85 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = {
86 | val res = inst.sent.take(inst.extr.rel.tokenInterval.start).lastOption.map(_.chunk == "IN").getOrElse(false)
87 | res
88 | }
89 | }
90 |
91 | object verbAfterExtr extends ChunkedExtractorFeature("verb after extr") {
92 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = {
93 | val res = inst.sent.drop(inst.extr.rel.tokenInterval.end).headOption.map(_.isVerb).getOrElse(false)
94 | res
95 | }
96 | }
97 |
98 | object prepAfterExtr extends ChunkedExtractorFeature("prep after extr") {
99 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = {
100 | val res = inst.sent.drop(inst.extr.rel.tokenInterval.end).headOption.map(_.isVerb).getOrElse(false)
101 | res
102 | }
103 | }
104 |
105 | object arg1ContainsPronoun extends ChunkedExtractorFeature("arg1 contains pronoun") {
106 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = {
107 | inst.extr.arg1.tokens.exists(_.isPronoun)
108 | }
109 | }
110 |
111 | object arg2ContainsPronoun extends ChunkedExtractorFeature("arg2 contains pronoun") {
112 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = {
113 | inst.extr.arg1.tokens.exists(_.isPronoun)
114 | }
115 | }
116 |
117 | object arg1ContainsPosPronoun extends ChunkedExtractorFeature("arg1 contains PRP$") {
118 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = {
119 | inst.extr.arg1.tokens.exists(_.isPossessivePronoun)
120 | }
121 | }
122 |
123 | object arg2ContainsPosPronoun extends ChunkedExtractorFeature("arg2 contains PRP$") {
124 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = {
125 | inst.extr.arg1.tokens.exists(_.isPossessivePronoun)
126 | }
127 | }
128 |
129 | def features: Seq[ChunkedExtractorFeature] = Seq(
130 | startExtr,
131 | endArg2,
132 | pronounBeforeRel,
133 | arg1Proper,
134 | arg2Proper,
135 | extrCoversSentence,
136 | npBeforeExtr,
137 | npAfterExtr,
138 | conjBeforeRel,
139 | prepBeforeExtr,
140 | verbAfterExtr,
141 | prepAfterExtr,
142 | arg1ContainsPronoun,
143 | arg2ContainsPronoun,
144 | arg1ContainsPosPronoun,
145 | arg2ContainsPosPronoun
146 | )
147 |
148 | def featureMap: SortedMap[String, ChunkedExtractorFeature] = {
149 | (for (f <- features) yield (f.name -> Feature.from(f.name, f.apply _)))(scala.collection.breakOut)
150 | }
151 | }
152 |
--------------------------------------------------------------------------------
/src/main/scala/edu/knowitall/chunkedextractor/confidence/TrainChunkedExtractor.scala:
--------------------------------------------------------------------------------
1 | package edu.knowitall.chunkedextractor.confidence
2 |
3 | import java.io.File
4 | import edu.knowitall.common.Resource
5 | import scala.io.Source
6 | import edu.knowitall.chunkedextractor.Relnoun
7 | import edu.knowitall.tool.chunk.OpenNlpChunker
8 | import edu.knowitall.tool.stem.MorphaStemmer
9 | import edu.knowitall.common.Analysis
10 | import edu.knowitall.tool.conf.BreezeLogisticRegressionTrainer
11 | import edu.knowitall.chunkedextractor.BinaryExtractionInstance
12 | import edu.knowitall.tool.conf.Labelled
13 |
14 | object TrainChunkedExtractor extends App {
15 | case class Config(
16 | inputFile: File = null,
17 | outputFile: File = null,
18 | goldFile: File = null) {
19 | }
20 |
21 | val parser = new scopt.immutable.OptionParser[Config]("trainer") {
22 | def options = Seq(
23 | arg("", "sentences") { (path: String, config: Config) =>
24 | val file = new File(path)
25 | require(file.exists(), "file does not exist: " + path)
26 | config.copy(inputFile = file)
27 | },
28 | arg("", "gold") { (path: String, config: Config) =>
29 | val file = new File(path)
30 | require(file.exists(), "file does not exist: " + path)
31 | config.copy(goldFile = file)
32 | },
33 | arg("", "output") { (path: String, config: Config) =>
34 | val file = new File(path)
35 | require(!file.exists(), "file already exist: " + path)
36 | config.copy(outputFile = file)
37 | })
38 | }
39 |
40 | parser.parse(args, Config()) match {
41 | case Some(config) => run(config)
42 | case None =>
43 | }
44 |
45 | def run(config: Config) = {
46 | val relnoun = new Relnoun()
47 |
48 | val chunker = new OpenNlpChunker()
49 |
50 | val gold = Resource.using(Source.fromFile(config.goldFile)) { goldSource =>
51 | goldSource.getLines.map(_.split("\t") match {
52 | case Array(label, arg1, rel, arg2) => (arg1, rel, arg2) -> (label == "1")
53 | }).toMap
54 | }
55 | val examples =
56 | Resource.using(Source.fromFile(config.inputFile)) { source =>
57 | for {
58 | line <- source.getLines.toList
59 | chunked = chunker(line) map MorphaStemmer.lemmatizePostaggedToken
60 |
61 | inst <- relnoun.extract(chunked)
62 |
63 | extr = inst.extr
64 | label = gold(extr.arg1.text, extr.rel.text, extr.arg2.text)
65 | } yield {
66 | new Labelled(label, inst)
67 | }
68 | }
69 |
70 | val trainer = new BreezeLogisticRegressionTrainer(ChunkedExtractorFeatureSet)
71 | val trained = trainer.train(examples)
72 |
73 | trained.saveFile(config.outputFile)
74 | }
75 | }
--------------------------------------------------------------------------------
/src/test/scala/edu/knowitall/chunkedextractor/NestySpecTest.scala:
--------------------------------------------------------------------------------
1 | package edu.knowitall.chunkedextractor
2 |
3 | import org.junit.runner.RunWith
4 | import org.specs2.mutable.Specification
5 | import org.specs2.runner.JUnitRunner
6 | import edu.knowitall.tool.chunk.OpenNlpChunker
7 | import edu.knowitall.tool.stem.MorphaStemmer
8 |
9 | @RunWith(classOf[JUnitRunner])
10 | object NestySpecTest extends Specification {
11 | def extract(sentence: String) = {
12 | val chunker = new OpenNlpChunker
13 | val nesty = new Nesty
14 | val chunked = chunker.chunk(sentence)
15 | val lemmatized = chunked.map(MorphaStemmer.lemmatizeToken)
16 | nesty(lemmatized)
17 | }
18 |
19 | "nesty" should {
20 | val extrs = extract("Michael said that nesty extends reverb.")
21 | "have a single extraction" in {
22 | extrs.size must_== 1
23 | }
24 | "have the correct extraction" in {
25 | extrs.head.extr.toString must_== "(Michael; said that; nesty extends reverb)"
26 | }
27 | }
28 |
29 | "nesty without that" should {
30 | val extrs = extract("Michael said nesty extends reverb.")
31 | "have a single extraction" in {
32 | extrs.size must_== 1
33 | }
34 | "have the correct extraction" in {
35 | extrs.head.extr.toString must_== "(Michael; said; nesty extends reverb)"
36 | }
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/src/test/scala/edu/knowitall/chunkedextractor/R2A2SpecTest.scala:
--------------------------------------------------------------------------------
1 | package edu.knowitall.chunkedextractor
2 |
3 | import org.junit.runner.RunWith
4 | import org.specs2.mutable.Specification
5 | import org.specs2.runner.JUnitRunner
6 | import edu.knowitall.tool.chunk.OpenNlpChunker
7 | import edu.knowitall.tool.stem.MorphaStemmer
8 |
9 | @RunWith(classOf[JUnitRunner])
10 | object R2A2SpecTest extends Specification {
11 | def extract(sentence: String) = {
12 | val chunker = new OpenNlpChunker
13 | val r2a2 = new R2A2
14 | val chunked = chunker.chunk(sentence)
15 | r2a2(chunked)
16 | }
17 |
18 | "r2a2" should {
19 | val extrs = extract("Michael ate at the best restaurant in London")
20 | "have a single extraction" in {
21 | extrs.size must_== 1
22 | }
23 | "have the correct extraction" in {
24 | extrs.head.extr.toString must_== "(Michael; ate at; the best restaurant in London)"
25 | }
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/src/test/scala/edu/knowitall/chunkedextractor/RelnounSpecTest.scala:
--------------------------------------------------------------------------------
1 | package edu.knowitall.chunkedextractor
2 |
3 | import org.junit.runner.RunWith
4 | import org.specs2.mutable.Specification
5 | import org.specs2.runner.JUnitRunner
6 | import edu.knowitall.tool.chunk.OpenNlpChunker
7 | import edu.knowitall.tool.stem.MorphaStemmer
8 |
9 | @RunWith(classOf[JUnitRunner])
10 | object RelnounSpecTest extends Specification {
11 | def extract(sentence: String) = {
12 | val chunker = new OpenNlpChunker
13 | val relnoun = new Relnoun(true, true, true)
14 | val chunked = chunker.chunk(sentence)
15 | val lemmatized = chunked.map(MorphaStemmer.lemmatizeToken)
16 | relnoun(lemmatized)
17 | }
18 |
19 | def test(name: String, sentence: String, extraction: (String, String, String)) = {
20 | name should {
21 | val extrs = extract(sentence)
22 | "have a single extraction" in {
23 | extrs.size must_== 1
24 | }
25 | "have the correct extraction" in {
26 | extrs.head.extr.rel.toString must_== extraction._2
27 | extrs.head.extr.arg1.toString must_== extraction._1
28 | extrs.head.extr.arg2.toString must_== extraction._3
29 | }
30 | }
31 | }
32 |
33 | test("VerbBasedExtractor",
34 | "Barack Obama is the president of the United States.",
35 | ("Barack Obama", "is the president of", "the United States"))
36 |
37 |
38 | test("AppositiveExtractor",
39 | "Barack Obama, the President of the U.S.",
40 | ("Barack Obama", "[is] the President of", "the U.S."))
41 |
42 | test("AppositiveExtractor_pronoun",
43 | "He, the President of the U.S.",
44 | ("He", "[is] the President of", "the U.S."))
45 |
46 | test("AppositiveExtractor2",
47 | "Lauren Faust, a cartoonist,",
48 | ("Lauren Faust", "[is]", "a cartoonist"))
49 |
50 | test("AppositiveExtractor2_pronoun",
51 | "He, a cartoonist,",
52 | ("He", "[is]", "a cartoonist"))
53 |
54 | test("AdjectiveDescriptorExtractor_[of]",
55 | "United States President Barack Obama gave a speech today.",
56 | ("Barack Obama", "[is] President [of]", "United States"))
57 |
58 | test("AdjectiveDescriptorExtractor__[from]",
59 | "Indian player Sachin Tendulkar received the Arjuna Award in 1994.",
60 | ("Sachin Tendulkar", "[is] player [from]", "India"))
61 |
62 | test("AdjectiveDescriptorExtractor_title",
63 | "President Barack Obama gave a speech today.",
64 | ("Barack Obama", "[is] President [of]", "[UNKNOWN]"))
65 |
66 | test("AdjectiveDescriptorExtractor_title_more_1",
67 | "Prime Minister Narendra Modi gave a speech today.",
68 | ("Narendra Modi", "[is] Prime Minister [of]", "[UNKNOWN]"))
69 |
70 | test("AdjectiveDescriptorExtractor_prefix",
71 | "Indian Vice President Modi.",
72 | ("Modi", "[is] Vice President [of]", "India"))
73 |
74 | test("AdjectiveDescriptorExtractor_pronoun",
75 | "His father John,",
76 | ("John", "[is] father [of]", "Him"))
77 |
78 | test("AdjectiveDescriptorExtractor_more_1",
79 | "Foreign Ministry spokesman Qin Gang.",
80 | ("Qin Gang", "[is] spokesman [of]", "Foreign Ministry"))
81 |
82 | test("AdjectiveDescriptorExtractor_more_2",
83 | "New Yorker's best staff writer Adam.",
84 | ("Adam", "[is] best staff writer [from]", "New York"))
85 |
86 | test("AdjectiveDescriptorExtractor_more_3",
87 | "General Motors interim chief executive Ed Whitacre.",
88 | ("Ed Whitacre", "[is] interim chief executive [of]", "General Motors"))
89 |
90 | test("AdjectiveDescriptorExtractor_more_4",
91 | "foreign Indian spokesman Qin Gang.",
92 | ("Qin Gang", "[is] foreign spokesman [from]", "India"))
93 |
94 | test("AdjectiveDescriptorExtractor_more_5",
95 | "first Indian spokesman Qin Gang.",
96 | ("Qin Gang", "[is] first spokesman [from]", "India"))
97 |
98 | test("AdjectiveDescriptorExtractor_more_6",
99 | "New Zealand coach Steve Hansen.",
100 | ("Steve Hansen", "[is] coach [from]", "New Zealand"))
101 |
102 | /*test("AdjectiveDescriptorExtractor_more_7",
103 | "Costa Rican President Luis Guillermo.",
104 | ("Luis Guillermo", "[is] President [of]", "Costa Rica"))*/
105 |
106 | test("AdjectiveDescriptorExtractor_more_8",
107 | "New Zealand President Luis Guillermo.",
108 | ("Luis Guillermo", "[is] President [of]", "New Zealand"))
109 |
110 | /*test("AdjectiveDescriptorExtractor_more_9",
111 | "North Korean President Obama.",
112 | ("Obama", "[is] President [of]", "North Korea"))*/
113 |
114 | test("AdjectiveDescriptorExtractor_more_10",
115 | "New York governor Eliot Spitzer.",
116 | ("Eliot Spitzer", "[is] governor [of]", "New York City"))
117 |
118 | test("AdjectiveDescriptorExtractor_more_11",
119 | "Seattle Badminton Player Michael.",
120 | ("Michael", "[is] Badminton Player [from]", "Seattle"))
121 |
122 | /*test("AdjectiveDescriptorExtractor_more_12",
123 | "Badminton Player Michael.",
124 | ("Michael", "[is] Player [of]", "Badminton"))*/
125 |
126 | test("AdjectiveDescriptorExtractor_more_13",
127 | "West Bengali chief minister Mamata Banerjee.",
128 | ("Mamata Banerjee", "[is] chief minister [of]", "West Bengal"))
129 |
130 | test("AdjectiveDescriptorExtractor_demonym",
131 | "Indian President Pranab Mukherjee gave a speech today.",
132 | ("Pranab Mukherjee", "[is] President [of]", "India"))
133 |
134 |
135 | test("PossessiveExtractor_[of]",
136 | "United States' President Barack Obama was in a debate on Wednesday.",
137 | ("Barack Obama", "[is] President [of]", "United States"))
138 |
139 | test("PossessiveExtractor_[from]",
140 | "India's player Tendulkar received the Arjuna Award in 1994.",
141 | ("Tendulkar", "[is] player [from]", "India"))
142 |
143 | test("PossessiveExtractor_more_1",
144 | "New Zealand's President Luis Guillermo.",
145 | ("Luis Guillermo", "[is] President [of]", "New Zealand"))
146 |
147 |
148 | test("PossessiveAppositiveExtractor_[of]",
149 | "United States' President, Barack Obama, was in a debate on Wednesday.",
150 | ("Barack Obama", "[is] President [of]", "United States"))
151 |
152 | test("PossessiveAppositiveExtractor_[from]",
153 | "India's player, Tendulkar, received the Arjuna Award in 1994.",
154 | ("Tendulkar", "[is] player [from]", "India"))
155 |
156 |
157 | test("PossessiveIsExtractor_[of]",
158 | "America's President is Barack Obama.",
159 | ("Barack Obama", "is President [of]", "America"))
160 |
161 | test("PossessiveIsExtractor_[from]",
162 | "India's Player is Sachin.",
163 | ("Sachin", "is Player [from]", "India"))
164 |
165 |
166 | test("IsPossessiveExtractor_[of]",
167 | "Barack Obama is America's President.",
168 | ("Barack Obama", "is President [of]", "America"))
169 |
170 | test("IsPossessiveExtractor_[from]",
171 | "Tendulkar is India's player.",
172 | ("Tendulkar", "is player [from]", "India"))
173 |
174 | test("IsPossessiveExtractor_more_1",
175 | "Luis Guillermo is New Zealand's President.",
176 | ("Luis Guillermo", "is President [of]", "New Zealand"))
177 |
178 | test("IsPossessiveExtractor_pronoun",
179 | "He is America's President.",
180 | ("He", "is President [of]", "America"))
181 |
182 |
183 | test("OfIsExtractor",
184 | "The President of the United States is Barack Obama.",
185 | ("Barack Obama", "is The President of", "the United States"))
186 |
187 | test("OfIsExtractor_pronoun",
188 | "The President of the United States is he.",
189 | ("he", "is The President of", "the United States"))
190 |
191 |
192 | test("OfCommaExtractor",
193 | "The Chairperson of the Commission of the African Union, Jean Ping, on Tuesday...",
194 | ("Jean Ping", "[is] The Chairperson of", "the Commission of the African Union"))
195 |
196 | test("OfCommaExtractor_pronoun",
197 | "The Chairperson of the Commission of the African Union, he, on Tuesday...",
198 | ("he", "[is] The Chairperson of", "the Commission of the African Union"))
199 |
200 |
201 | test("PossessiveReverseExtractor_[of]",
202 | "Barack Obama, America's President, gave a debate on Wednesday.",
203 | ("Barack Obama", "[is] President [of]", "America"))
204 |
205 | test("PossessiveReverseExtractor_[from]",
206 | "Tendulkar, India's player, received the Arjuna Award in 1994.",
207 | ("Tendulkar", "[is] player [from]", "India"))
208 |
209 | test("PossessiveReverseExtractor_pronoun",
210 | "He, America's President, gave a debate on Wednesday.",
211 | ("He", "[is] President [of]", "America"))
212 |
213 |
214 | test("ProperNounAdjectiveExtractor_[of]",
215 | "Barack Obama, the US President, gave a debate on Wednesday.",
216 | ("Barack Obama", "[is] the President [of]", "United States"))
217 |
218 | test("ProperNounAdjectiveExtractor_[from]",
219 | "Tendulkar, the Indian player, received the Arjuna Award in 1994.",
220 | ("Tendulkar", "[is] the player [from]", "India"))
221 |
222 | test("ProperNounAdjectiveExtractor_pronoun",
223 | "He, the Indian player, received the Arjuna Award in 1994.",
224 | ("He", "[is] the player [from]", "India"))
225 | }
226 |
227 |
--------------------------------------------------------------------------------
/version.sbt:
--------------------------------------------------------------------------------
1 | version in ThisBuild := "2.2.2-SNAPSHOT"
--------------------------------------------------------------------------------