├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── build.sbt ├── data ├── experimentcorpus │ ├── ReadMe │ ├── Relnoun1.1.txt │ ├── Relnoun1.1_plus_NNPrelationalNouns.txt │ ├── Relnoun1.1_plus_NNPrelationalNouns_plus_ORGwords.txt │ ├── Relnoun1.1_plus_NNPrelationalNouns_plus_ORGwords_plus_demonyms.txt │ └── Relnoun2.2.txt ├── gold.txt └── sentences.txt ├── project ├── Release.scala ├── build.properties └── plugins.sbt ├── release └── release_notes_relnoun.md ├── src ├── main │ ├── resources │ │ ├── edu │ │ │ └── knowitall │ │ │ │ └── chunkedextractor │ │ │ │ ├── confidence │ │ │ │ └── relnoun-confidence.txt │ │ │ │ ├── demonyms.csv │ │ │ │ ├── nouns.txt │ │ │ │ ├── nouns_of.txt │ │ │ │ ├── org_words.txt │ │ │ │ ├── prp_mapping.csv │ │ │ │ └── relnoun_prefixes.txt │ │ └── logging.properties │ └── scala │ │ └── edu │ │ └── knowitall │ │ └── chunkedextractor │ │ ├── Expressions.scala │ │ ├── Extraction.scala │ │ ├── Extractor.scala │ │ ├── JavaChunkedExtractor.scala │ │ ├── Nesty.scala │ │ ├── PatternExtractor.scala │ │ ├── R2A2.scala │ │ ├── ReVerb.scala │ │ ├── Relnoun.scala │ │ └── confidence │ │ ├── ChunkedExtractorConfidenceFunction.scala │ │ ├── ChunkedExtractorFeatureSet.scala │ │ └── TrainChunkedExtractor.scala └── test │ └── scala │ └── edu │ └── knowitall │ └── chunkedextractor │ ├── NestySpecTest.scala │ ├── R2A2SpecTest.scala │ └── RelnounSpecTest.scala └── version.sbt /.gitignore: -------------------------------------------------------------------------------- 1 | project/project 2 | project/target 3 | target 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: scala 2 | scala: 3 | - "2.10.3" 4 | jdk: 5 | - oraclejdk7 6 | - openjdk7 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | ReVerb Software License Agreement 2 | 3 | ReVerb Software 4 | (C) 2011-2012, University of Washington. All rights reserved. 5 | US patent number 7,877,343 and 12/970,155 patent pending 6 | 7 | The University of Washington (UW), Professor Oren Etzioni, Anthony Fader, 8 | Michael Schmitz, Robert Bart, Janara Christensen, and Niranjan Balasubramanian 9 | (Developers) give permission for you and your laboratory (University) to use 10 | ReVerb. ReVerb is a system that extracts relational triples from text. ReVerb 11 | is protected by a United States copyright and patents. The National Science 12 | Foundation supported work on ReVerb. Under University of Washington's 13 | patents 7,877,343 (issued) and 12/970,155 (patent pending), the UW grants to 14 | you the non-exclusive right to use patent claims practiced by the University of 15 | Washington's ReVerb software solely for non-commercial purposes and as long as 16 | you comply with the terms of this ReVerb Software License Agreement. UW and 17 | the Developers allow you to copy and modify ReVerb for non-commercial purposes, 18 | and to distribute modifications through GitHub or directly to the University of 19 | Washington, on the following conditions: 20 | 21 | 22 | 1. ReVerb is not used for any commercial purposes, or as part of a system 23 | which has commercial purposes. 24 | 25 | 26 | 2. Any software derived from ReVerb must carry prominent notices stating that 27 | you modified it along with the date modified. The derivative must also carry 28 | prominent notices stating that it is released under this ReVerb Software 29 | License Agreement 30 | 31 | If you wish to obtain ReVerb or to obtain any patent rights for any commercial 32 | purposes, you will need to contact the University of Washington to see if 33 | rights are available and to negotiate a commercial license and pay a fee. This 34 | includes, but is not limited to, using ReVerb to provide services to outside 35 | parties for a fee. In that case please contact: 36 | 37 | UW Center for Commercialization 38 | University of Washington 39 | 4311 11th Ave. NE, 40 | Suite 500 Seattle, WA 98105-4608 41 | 42 | Phone: (206) 543-3970 43 | Email: license@u.washington.edu 44 | 45 | 46 | 3. You retain in ReVerb and any modifications to ReVerb, the copyright, 47 | trademark, patent or other notices pertaining to ReVerb as provided by UW. 48 | 49 | 50 | 4. You provide the Developers with feedback on the use of the ReVerb software 51 | in your research, and that the Developers and UW are permitted to use any 52 | information you provide in making changes to the ReVerb software. All bug 53 | reports and technical questions shall be sent to: afader@cs.washington.edu. 54 | Modifications may be communicated through GitHub pull requests at: 55 | 56 | https://github.com/knowitall/ 57 | 58 | 59 | 5. You acknowledge that the Developers, UW and its licensees may develop 60 | modifications to ReVerb that may be substantially similar to your modifications 61 | of ReVerb, and that the Developers, UW and its licensees shall not be 62 | constrained in any way by you in UW's or its licensees' use or management of 63 | such modifications. You acknowledge the right of the Developers and UW to 64 | prepare and publish modifications to ReVerb that may be substantially similar 65 | or functionally equivalent to your modifications and improvements, and if you 66 | obtain patent protection for any modification or improvement to ReVerb you 67 | agree not to allege or enjoin infringement of your patent by the Developers, 68 | the UW or by any of UW's licensees obtaining modifications or improvements to 69 | ReVerb from the University of Washington or the Developers. 70 | 71 | 72 | 6. If utilization of the ReVerb software results in outcomes which will be 73 | published, please specify the version of ReVerb you used and cite the UW 74 | Developers. 75 | 76 | @inproceedings{Fader11, 77 | author = {Anthony Fader and Stephen Soderland and Oren Etzioni}, 78 | title = {Identifying Relations for Open Information Extraction}, 79 | booktitle = {Proceedings of the Conference of Empirical Methods 80 | in Natural Language Processing ({EMNLP} '11)}, 81 | year = {2011}, 82 | month = {July 27-31}, 83 | address = {Edinburgh, Scotland, UK} 84 | } 85 | 86 | 87 | 7. Any risk associated with using the ReVerb software at your organization is 88 | with you and your organization. ReVerb is experimental in nature and is made 89 | available as a research courtesy "AS IS," without obligation by UW to provide 90 | accompanying services or support. 91 | 92 | 93 | UW AND THE AUTHORS EXPRESSLY DISCLAIM ANY AND ALL WARRANTIES REGARDING THE 94 | SOFTWARE, WHETHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO WARRANTIES 95 | PERTAINING TO MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 96 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ** **DEPRECATED!** ** Please see https://github.com/dair-iitd/OpenIE-standalone, which has combined multiple projects into a single project and maintains the latest version of Open IE (Open IE 5). It is based on another repository https://github.com/allenai/openie-standalone, which has an older version of Open IE. 2 | 3 | # Chunked Extractors 4 | 5 | The chunked extractors project is a collection of three extractors. 6 | 7 | 1. [ReVerb](http://reverb.cs.washington.edu/)--an extractor for verb-mediated relations (`Sally sells sea shells`). 8 | 2. Relnoun--an extractor for noun-mediate relation (`United States president Barack Obama`). 9 | 3. Nesty--an extractor for nested relations (`Some people say that we never landed on the moon`). 10 | 11 | This project provides a common interface to the three extractors by using the nlptools project for chunking and lemmatization. 12 | 13 | ## Citing Relnoun 14 | 15 | Harinder Pal, Mausam. "Demonyms and Compound Relational Nouns in Nominal Open IE". Workshop on Automated Knowledge Base Construction (AKBC) at NAACL. San Diego, CA, USA. June 2016. 16 | 17 | ## Google Group 18 | 19 | * [knowitall_openie](https://groups.google.com/forum/#!forum/knowitall_openie) 20 | 21 | ## Notifications 22 | 23 | * [01/15/2016][Relnoun] The version 2.2.0 is released ([release notes](https://github.com/knowitall/chunkedextractor/blob/master/release/release_notes_relnoun.md)). 24 | * [12/30/2015][Relnoun] The version 2.0.0 is released ([release notes](https://github.com/knowitall/chunkedextractor/blob/master/release/release_notes_relnoun.md)). 25 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | ReleaseSettings.defaults 2 | 3 | organization := "edu.washington.cs.knowitall.chunkedextractor" 4 | 5 | name := "chunkedextractor" 6 | 7 | description := "Wrapper and implementation for extractors of chunked sentences." 8 | 9 | crossScalaVersions := Seq("2.10.3") 10 | 11 | scalaVersion <<= crossScalaVersions { (vs: Seq[String]) => vs.head } 12 | 13 | libraryDependencies ++= Seq( 14 | "edu.washington.cs.knowitall" %% "openregex-scala" % "1.1.2", 15 | "edu.washington.cs.knowitall" % "reverb-core" % "1.4.3", 16 | "edu.washington.cs.knowitall.nlptools" %% "nlptools-conf-breeze" % "2.4.5", 17 | "edu.washington.cs.knowitall.nlptools" %% "nlptools-core" % "2.4.5", 18 | "edu.washington.cs.knowitall.nlptools" %% "nlptools-chunk-opennlp" % "2.4.5", 19 | "edu.washington.cs.knowitall.nlptools" %% "nlptools-stem-morpha" % "2.4.5", 20 | // resource management 21 | "com.jsuereth" %% "scala-arm" % "1.3", 22 | "junit" % "junit" % "4.11" % "test", 23 | "org.specs2" % "specs2" % "1.12.3" % "test" cross CrossVersion.binaryMapped { 24 | case "2.9.3" => "2.9.2" 25 | case "2.10.2" => "2.10" 26 | case x => x 27 | }) 28 | 29 | scalacOptions ++= Seq("-unchecked", "-deprecation") 30 | 31 | licenses := Seq("Academic License" -> url("http://reverb.cs.washington.edu/LICENSE.txt")) 32 | 33 | homepage := Some(url("http://github.com/knowitall/chunkedextractor")) 34 | 35 | resolvers += "Sonatype OSS Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots" 36 | 37 | publishMavenStyle := true 38 | 39 | publishTo <<= version { (v: String) => 40 | val nexus = "https://oss.sonatype.org/" 41 | if (v.trim.endsWith("SNAPSHOT")) 42 | Some("snapshots" at nexus + "content/repositories/snapshots") 43 | else 44 | Some("releases" at nexus + "service/local/staging/deploy/maven2") 45 | } 46 | 47 | pomExtra := ( 48 | 49 | https://github.com/knowitall/chunkedextractor 50 | scm:git://github.com/knowitall/chunkedextractor.git 51 | scm:git:git@github.com:knowitall/chunkedextractor.git 52 | HEAD 53 | 54 | 55 | 56 | Michael Schmitz 57 | 58 | ) 59 | -------------------------------------------------------------------------------- /data/experimentcorpus/ReadMe: -------------------------------------------------------------------------------- 1 | This folder contains the experiment corpus (along with manual annotations) used to evaluate Relnoun1.1 with Relnoun2.2 as mentioned in the paper entitled "Demonyms and Compound Relational Nouns in Nominal Open IE" (Harinder Pal, Mausam. Workshop on Automated Knowledge Base Construction (AKBC) at NAACL. San Diego, CA, USA. June 2016) 2 | -------------------------------------------------------------------------------- /data/gold.txt: -------------------------------------------------------------------------------- 1 | 0 which are an essential presentation in a virtual studio 2 | 1 Wendy is the daughter of Eldon 3 | 1 Amphora is a publication of the American Philological Association 4 | 0 who was a guest on the show 5 | 0 who was a guest on the show 6 | 1 Testosterone is an important hormone for muscles 7 | 1 Jafrum is a cool site for motorcycle parts 8 | 1 Rock Island is a city in Rock Island County 9 | 1 Love is the opposite of greed 10 | 1 Alberta was a federal electoral district in the Northwest Territories 11 | 0 ASP.NET is the latest version of Microsoft 12 | 0 's Active Server Pages technology .|ASP.NET is the latest version of Microsoft 13 | 0 West Lothian is a county of Scotland .|Before 1975 14 | 1 West Lothian was a county of Scotland 15 | 1 April is a long-time supporter of DHCC 16 | 0 December , 1989 [is] Carlos Salinas became president of Mexico 17 | 1 The DFSG is our definition of free software 18 | 0 Scarce was every kind of food 19 | 1 Dan is a graduate of Emory University 20 | 0 Florida State .|Dan is a graduate of Emory University 21 | 1 Dried figs are an excellent source of iron 22 | 1 2006 was a great year for electronic music 23 | 1 Slashfood is a member of the Weblogs 24 | 0 Contamination is a married guy from Saitama 25 | 1 CODEINE is matter of miscommunication 26 | 1 Adam Sandler is a waste of film 27 | 1 Chester is part of the PoughkeepsieNewburghMiddletown 28 | 0 the March 6 , 2006 [is] issue of Oil 29 | 1 Aleppo is the oldest inhabited city in history 30 | 1 A social institution is a group of people 31 | 1 CR is the abbreviation of Card Reader 32 | 1 Carl Hoff [is] fan [of] jazz 33 | 1 Dr. Weil is a proponent of Integrative Medicine 34 | 1 James Wang is a photographer in San Diego 35 | 1 Saint Peters is a city in St . Charles County 36 | 1 Woolite is a trademark of Reckitt Benckiser 37 | 1 RADIO is the only source of info 38 | 1 Piracy is an integral part of exploration 39 | 1 Backdraft is an amazing piece of film work 40 | 1 Tya is the art director for Coalesce 41 | 1 Jane is the mother of Fielding 42 | 0 solutions are an university of principal buttons 43 | 1 Mr Pullicino is Minister of Resources 44 | 1 There were 14 deaths during the journey 45 | 0 which were children under 5 years 46 | 1 East Asia is a region of economic dynamism 47 | 1 Downtown Asheville is the cultural center of Western North Carolina 48 | 1 A lab test is the most reliable means of detection 49 | 1 Trondheim is a city of schools 50 | 1 Riven is a trademark of Cyan 51 | 1 Alkalines are an example of primary cells 52 | 1 Pat watt is the uncle of www.workbizfromhom.com 53 | 1 The TIN is one such piece of information 54 | 1 Key 12 is the law of reversal 55 | 1 Jack was a man of taste 56 | 1 Bossier was game in defeat 57 | 1 Clinton Township is a township in Butler County 58 | 1 Impatience is another attribute of selfishness 59 | 1 Abstract algebra is the study of operations 60 | 1 brevity is the soul of Twitter 61 | 0 Pass-Guaranteed is your source for the Cisco 350-024 exam 62 | 1 Steele is president of the Oglala Sioux Tribe 63 | 1 Destiny is just an excuse for bad management 64 | 0 affairs is simply the consequence of some natural order 65 | 1 the Social Media Club [is] an original member of the Media 2.0 Workgroup 66 | 1 Solis is co-founder of the Social Media Club 67 | 0 the Social Media Collective .|Solis is co-founder of the Social Media Club 68 | 0 the Social Media Collective .|Solis is co-founder of the Social Media Club 69 | 0 .org .|Solis is co-founder of the Social Media Club 70 | 0 Archie Donahue [is] ace [of] segments highlight Marine 71 | 0 LASP is one of a few 72 | 1 The Hurricanes is a story of grit 73 | 1 Maroua is the capital city of the Far North Province 74 | 1 A tribe is any group of people 75 | 0 o...|A tribe is any group of people 76 | 1 Mikael is a married guy from California 77 | 1 A child is a gift from the Creator 78 | 1 Cholesterol is a major component of myelin 79 | 1 Guy is the co-creator of Nancy 80 | 1 Bacteria are a diverse group of single-celled organisms 81 | 1 Play is an intergral part of childhood 82 | 1 82 is a freelance writer in San Francisco 83 | 1 Felix was the first Bishop of the East Angles 84 | 1 Dr. Herman is a fellow of the American College 85 | 0 uploads are an shred of short buttons 86 | 0 three were veterans of World War II 87 | 1 Harold Pinter is a master of language 88 | 1 David was a king of wars 89 | 1 Dr. David Oliver [is] a connection of the Spencer family 90 | 1 Vampire Weekend is an album by Vampire Weekend 91 | 1 Night Watch was a huge hit in Russia 92 | 1 Hair Design is a hair salon in Wilmington 93 | 0 FIG. 2B is a side elevational view of FIG. 94 | 0 Kristina is a Champion for 1 Cause 95 | 1 Beings are owners of kammas 96 | 0 their homing-place .|Beings are owners of kamma 97 | 1 Unknown tongues are a sign of God 98 | 1 summer vacation season is the prime time for home burglaries 99 | 1 they are a good source of fiber 100 | 1 Attachments are a gateway for spammers 101 | -------------------------------------------------------------------------------- /data/sentences.txt: -------------------------------------------------------------------------------- 1 | 31 were licenced in 1662 , as Hackney carriages . 2 | Race order will rotate on a weekly basis . 3 | Length is contained within three fields . 4 | In the mid 7th century , Islam was introduced to China . 5 | The EMR consists of four components . 6 | In 2000 , Lisbon had a median family income of $ 47,566 . 7 | BMI , mentioned above , is a trigger of IC. 8 | OPEN HOUSE relies on donations , large and small , to carry out its work . 9 | 7. Eve will invest in properties . 10 | Freedom is not the absence of commitments , but the ability to choose--and commit myself to--what is best for me .|Freedom is not the absence of commitment , but the ability to commit to whatever is right for you .|Freedom is not the absence of commitments , but the ... 11 | Complete support is provided for video walls , which are an essential presentation in a virtual studio . 12 | Guests can opt for in-room , poolside , or beachside massages , and more than 60 skin and body therapy treatments . 13 | Perl is predominantly used for data conversion , data management , and site or page creation . 14 | Wendy is the daughter of Eldon and Pam Korinek . 15 | Amphora is a publication of the American Philological Association . 16 | A cure should last for 2-3 weeks . 17 | Crocker is not a bad guy . 18 | Jay said to Miley , who was a guest on the show along with dad Billy Ray .|You also drink ketchup , which seems odd , Jay said to Miley , who was a guest on the show along with dad Billy Ray . 19 | Testosterone is an important hormone for muscles . 20 | Jafrum is a cool site for motorcycle parts , accessories , and clothing for the motorcyclist . 21 | 7:04 AM MST on Mon. , Apr . 22 | 3. Verse 11 can be understood as a prayer . 23 | Life was difficult in pioneer times . 24 | Ryan Adams needs to develop single mindedness and focus . 25 | BIG FISH would take an hour or more . 26 | The South Carolina Aquarium features thousands of marine animals , including sharks , loggerhead turtles , otters , eels , and seahorses . 27 | Godwin certainly has a plan . 28 | In fact , teams are composed of players and fans . 29 | Uaz has been withdrawn by icon . 30 | Wedding Camera with Flash . 31 | Friends are on a seesaw . 32 | Jamie uses a multitude of symbols , more than an observer can respond to upon single observation - light versus dark , green versus brown , landscapes abounding in water versus barren expanses of rock and dirt - but the unity of theme relies on the use of reflections . 33 | Flowers will vary by availability . 34 | Rock Island is a city in Rock Island County , Illinois , United States . 35 | However , WER grows faster than DER . 36 | International students must have a minimum TOEFL score of 500 . 37 | The Compromise of 1850 see Martin , pp . 38 | The Washington , D.C. , native is in the College Honors Program . 39 | Denise is available to speak on dog behavior , behavior problems , learning theory , becoming a dog trainer and other topics relating to dogs . 40 | Abstracts are due by February 28 , 2001 . 41 | If we are , Bradyn does n't circulates . 42 | Ice skating is also very popular in London , with some of the most famous outdoor rinks being at Alexandra Palace , and Somerset House . 43 | Pembroke is located in Pembrokeshire , Wales . 44 | Robin battled Mr. Freeze . 45 | In general , a growing population means increased demand for real estate . 46 | Naphtali is yet seven years . 47 | Mama was now a ghost . 48 | Glass bottles are not permitted on Park beaches . 49 | Digital storage media have a long history of fragility , as well as obsolescence . 50 | 2009 is going to rock balls . 51 | Purity paves the way to intimacy . 52 | SAFAR is committed to harness , strengthen and empower mainly the women , minorities and other marginalized sections of the society towards their rights through capacity building process . 53 | A memorial may help Literacy . 54 | BGMC provided money for a stove , refrigerator , freezer , dishes , and pans . 55 | Love is the opposite of greed . 56 | Ironbridge Gorge is on the River Severn , 5 miles south of Telford in Shropshire . 57 | Majandra Delfino was born in Caracas , Venezuela , but moved with her family to Miami at age 3. 58 | When he wishes , Heinlein speaks the language of the Saints , to the glory of Man . 59 | Licensed fluids are discouraged to accessible technical support . 60 | Alberta was a federal electoral district in the Northwest Territories , Canada , that was represented in the Canadian House of Commons from 1887 to 1908 . 61 | 883 million is also a record . 62 | And regular people would say , 'you should be on TV . ' 63 | Engineering is not just about technology . 64 | Since four-way meetings begin right after the initial client-counsel meeting , counsel need not wait for a triggering event . 65 | Health preformers are pragmatic for ten days . 66 | 70 % of the staff holds advanced degrees . 67 | In 1996 , Fowler took the helm of Worlds , Inc. , in an effort to revitalize and sell the company . 68 | After They make the Plan , the Plan is projected to Initiates , then to Disciples , then to Aspirants and then to humanity . 69 | Firstly , ASP.NET is the latest version of Microsoft 's Active Server Pages technology .|ASP.NET is the latest version of Microsoft 's Active Server Pages technology . 70 | Conditions were last updated on March 29 , 2008 . 71 | Participants should wear a bike helmet . 72 | Evil is not inherent in human nature , it is learned . . . . 73 | The CPS sample consisted of 179 women , and the criminal justice system sample consisted of 57 women . 74 | Chelsea is in a difficult position , and she is holding up very well . 75 | Old game hear of sides . 76 | Middle school is really fun . 77 | 42 was committed 2 weeks , 2 days ago . 78 | Finally , implementation must be set in motion . 79 | Part of them is printed by state support . 80 | Care is provided by an outside agency . 81 | Opportunities include focusing on academic enrichment , sports , arts , and much more .See the guidance bulletin board for details . 82 | Separate consideration is given to fiscal policies , family allowances , family and work , leave-taking and caretaking policies , housing , policies concerning family violence , social aid and poverty , and education and culture . 83 | Full-text is available for 2006+ . 84 | Southwestern improved to 12-4 and 1-0 . 85 | Fellows will receive up to five years of support . 86 | Elena will be living in Siberia , studying management of natural resources and the relationship between Siberia 's natural riches and its people . 87 | Also , Zimbabwe finally has a new Prime Minister . 88 | Gary49er , JHK is not selling fear . 89 | A couple things bring back memories . 90 | Rip Hamilton returned for the Pistons but struggled . 91 | Postnatal depression is extremely common illness . 92 | Anarchism is glorified thumb-sucking . 93 | Neptune is associated with alcohol and drugs . 94 | Wee talked to Tooth and Nail , Fuel by Ramen , and Universal and it 's something we are committed to . 95 | Beck comes to a similar interpretation . 96 | Protectionism has also been a concern . 97 | Cow urine is basically ammonia . 98 | Item is made with metal and plastic . 99 | West Lothian is a county of Scotland .|Before 1975 , West Lothian was a county of Scotland . 100 | These Aircraft Operating Rules constitute part of Club Policy , violations of which will result in fines , membership review , or expulsion from the Club as set forth elsewhere in Club Policy . 101 | Communication is essential between teachers and families . 102 | PRE-REGISTRATION IS REQUIRED FOR THE MAT. 103 | Video ruin controversy comes in true forms , and future is a relevant subject . 104 | For the first time , the Tampa Bay Rays are headed to the World Series .|The Tampa Bay Rays are headed to the World Series . 105 | Healthy families and communities practice the art of compromise . 106 | Rugrat is reported by F-Secure . 107 | However , this is not the case and , in fact , BTP contains a number of optimizations . 108 | April is a long-time supporter of DHCC , serving on the Board since its inception in 1972 . 109 | Page processed in 0.930238 sec. 110 | TKD grows in Malaysia , Singapore and Brunei . 111 | Transportation would be a really good thing . 112 | The CD is titled Traffic and Weather . 113 | The Cathedral was built between 1846 and 1864 . 114 | The City Hall is located in Via Roma 15 , phone ++39 0765 872025 , fax ++39 0765 872025 . 115 | Hikaru let out a relieved breath . 116 | ERP vendors provide professional services in consultancy , customization , and support . 117 | Il Nido di Gatto Panceri . 118 | Seasonal hunting is permitted for partridge , quail , deer , pronghorn , and bighorn sheep. 119 | Downsizing may be worth exploring . 120 | Craigslist may be in for tough sledding . 121 | Before borrowing , students should exercise caution . 122 | Food irradiation is banned in Europe , largely due to the above concerns . 123 | In December , 1989 , Carlos Salinas became president of Mexico . 124 | Man Utd have a game in hand . 125 | The DFSG is our definition of free software . 126 | Universal energy is experienced through Tai Chi , Acupuncture , Yoga , and Qigong . 127 | Sainte-Colombe was indeed a musical genius . 128 | BLOOD AND GOLD will be published in the Fall . 129 | Maintenance fees are approximately $ 630 . 130 | Harry was no stranger to pain , his uncle had seen to that , but there was nothing that could have ever prepared him to feel anything like this . 131 | Apart from the owner , the Trust also became the centre of controversy . 132 | Lisa has a Bachelor 's Degree from the University of Pittsburgh . 133 | The Chief Justice of the Supreme Court is sending a message to the U.S. Congress , and if they are not listening , there must be a massive petition drive to recall them . 134 | Generic Acyclovir is contraindicated for patients , who develop hypersensitivity or chemical intolerance to the components of this formulation . 135 | I have to say , MobileMe got off to a rocky start , but seems to be OK now .|MobileMe got off to a rocky start . 136 | An example is seen in Phosphorus , which is a very cold patient , but his stomach symptoms are better from cold drinks . 137 | SQL keywords fall into a number of groups . 138 | Fortune has come full circle for Masque , which began as one of the 80 's hottest underground bands and , some twenty years later , released its successful CD entitled Face First in 2008 featuring songs recorded during their Hollywood Sunset Strip heyday . 139 | GE Fanuc Spindle Servo A06B-6055-H106 unit are available for exchange , surplus + rebuild refurbishment and full repairs are also available if time is not critical . 140 | Four of these were installed in June , 1998 . 141 | Scarce was every kind of food . 142 | 92 was committed 2 weeks , 2 days ago . 143 | Cuba has a well developed system of direct democracy , through workplaces and neighbourhood committees , and most Cubans are well aware of the options available to them . 144 | The Internet Assigned Numbers Authority will maintain a registry of URL schemes . 145 | Martorana lined out to rf . 146 | Nagi did fall from grace . 147 | Jezebel walks over to Luli . 148 | The judge had scarcely finished , when , with a growl like a wild beast , a dozen men sprang on Maxwell . 149 | Dan is a graduate of Emory University , Atlanta GA and has completed his post graduate studies in International Business at Florida State .|Dan is a graduate of Emory University . 150 | Bankruptcies are also increasing in Canada . 151 | Dried figs are an excellent source of iron . 152 | Good list , 2006 was a great year for electronic music . 153 | Isabella was far from a small woman . 154 | Slashfood is a member of the Weblogs , Inc. Network . 155 | Cologne can look back on a long and eventful history , and you can see evidence of this all over the city . 156 | MySQL AB has started an affiliate program . 157 | Contamination is a married guy from Saitama , Japan . 158 | Still , CODEINE is matter of miscommunication , or lack of sexual drive . 159 | Watson enlisted in July 1942 . 160 | Day is Dying in the West . 161 | Orlando Bloom was in a car accident , but he was not banging Jessica Simpson at the time . 162 | Therefore , dampening is compromised on concrete , forcing you to alter your stride mechanics . 163 | The Electoral College is an affront to basic democracy , warping competition and subverting political equality -- even when it works . 164 | Ronnie James Dio was destined for greatness . 165 | Adam Sandler is a waste of film . 166 | Well one has to follow the Pope . 167 | Tony loved President Bush , Perino said .|Tony loved President Bush . 168 | Bathrooms comprises of showers , bidets and additionaly toiletries . 169 | Chester is part of the PoughkeepsieNewburghMiddletown , NY Metropolitan Statistical Area as well as the larger New YorkNewarkBridgeport , NY-NJ-CT-PA Combined Statistical Area . 170 | Outsiders have the wrong idea about homecomings . 171 | The Impact stole the show in L.A. 172 | Tin comes in Gold . 173 | Site is in English and Finnish . 174 | Neff writes in the March 6 , 2006 , issue of Oil and Gas Journal . 175 | Fans tend to be of two minds . 176 | Recitation of the rosary will be at 11 a.m. 177 | Reportedly , Clarence was very good with children .|Clarence was very good with children . 178 | Aleppo is the oldest inhabited city in history . 179 | A social institution is a group of people , organized by status . 180 | Page created in 1.410816192627 seconds . 181 | Fields 1-4 are available for manure application . 182 | At its most basic , community stands for common interest . 183 | An elevated level can indicate the presence of congestive heart failure . 184 | Next stop was at the Dead Sea , near Ein Bokek . 185 | Paul glanced over to Alice , his accordion teacher , and they exchanged thumbs-up signs . 186 | Schedules can be customized for various types of businesses . 187 | The Body Mass Index is considered a good indicator of nutritional status . 188 | Monopolies are blind to politics , except when politics can be manipulated to establish or extend the monopoly . 189 | Louis loved all kinds of music . 190 | Governments respond aggressively to global downturn . 191 | Canape also met at Ruby Skye . 192 | John B. died on 01 Mar 1945 . 193 | Sequels can be a bitch . 194 | Phone calls are welcome at 603 527 1490 . 195 | CR is the abbreviation of Card Reader . 196 | Leandro, F advanced to third . 197 | Protectionism amounts to armed robbery . 198 | Additional meal tickets may also be purchased for family and friends . 199 | Children had not gone to school .|Because this was a holiday week , children had not gone to school . 200 | Uz is distinguished from Edom . 201 | O'Day married again in 1942 , this time to golf pro and jazz fan Carl Hoff . 202 | Joe and Mike are still on board . 203 | Lets start with coffee . 204 | Benjamin joined the Army in 1815 , Galway . 205 | In 1988 , Lawn Darts were officially banned in the United States . 206 | Ideas can make a difference . 207 | Thousands have been sent to mental hospitals . 208 | Dr. Weil is a proponent of Integrative Medicine , which combines the best ideas and practices of alternative and conventional medicine in order to maximize the body 's natural healing mechanisms . 209 | James Wang is a photographer in San Diego . 210 | Tommy is on a roll , riding a streak of political good luck . 211 | The Windows version is in development . 212 | Moss and lichen cannot grow on copper , keeping gutters clear and making them excellent for rainwater harvesting . 213 | Schools remain closed in Jabalia , Beit Hanoun and Beit Lahia affecting 30,000 UNRWA students and 8,000 students from the state sector . 214 | Alderbrook appears on the Bethlehem U.S. Geological Survey Map . 215 | Bids do not include shipping and handling charges . 216 | Kermit is slang for Road . 217 | PCI slots are used for expansion . 218 | The Illini will have to wait until morning . 219 | Orientals start at $ 500 . 220 | Saint Peters is a city in St . Charles County , Missouri , United States . 221 | Rehabilitation consists of counselling , examination of the need for rehabilitation , different therapies , rehabilitation periods in an institution , adaptation training , rehabilitation counselling and aids . 222 | Degrees do n't guarantee competence . 223 | OpalSoft is headquartered in San Jose , California with consultants providing a full range of consulting services throughout North America . 224 | CNN 's Elaine Quijano is live in Chicago . 225 | Christian Bale talks about Harsh Times . 226 | 3. The Titanic set sail on April 10 1912 . 227 | The Moon is opposed to the Sun , and rises about sunset . 228 | St Julien les Villas . 229 | Caddyshack Looks GOOD on HD DVD ! ! ! ! 230 | Woolite is a trademark of Reckitt Benckiser . 231 | A memorandum is submitted to Dr. Manmohan Singh , Honorable Prime Minister of India with demands of providing reservation in private sectors , enacting reservation bill and fulfill the vacant SC seats in Government jobs signed by national executive members and state presidents and secretaries of SC Confederation . 232 | Turn left on to Tremont Street . 233 | Sustainable development cannot be imposed from above . 234 | Shimon Peres is back in office , a minister with responsibility for developing the Galilee and the Negev. 235 | Kida believes in statistics , whereas people evolved to believe in anecdotes . 236 | RADIO is the only source of info . 237 | The Budgerigar can function as an example . 238 | Helping people live there best life . 239 | Methadone is used for chronic pain . 240 | Discussion related to video gaming . 241 | This is terrible , Flash is only good for video and games , anyone making websites that heavily rely on flash need to start thinking about using plain html , css , and javascript to build their websites . 242 | Piracy is an integral part of exploration . 243 | Lillian D. Windmeier , 79 passed away in Venice , Florida November 7 , 2008 . 244 | Backdraft is an amazing piece of film work . 245 | C is for Centaur , the gallant half men steeds . 246 | Spirit matters more than point of view . 247 | After the lunch break , a second exam booklet , ursing , part II is given to test-takers . 248 | The Colonel paused for a moment . 249 | Stevens may be part of a trend , where an increasing number of men are becoming later-in-life dads . 250 | Panels covered a variety of topics , from Jewish-Berber relations in Morocco to Jewish philanthropists in Russia , Canada and the United States . 251 | John McCain projected winner in South Carolina , 80 per cent of the vote giving McCain a 3 per cent lead . 252 | Ananda Yoga was founded by Paramahansa Yogananda . 253 | Postures are called asanas . 254 | Number 4 came in dead last . 255 | Billy moved in front of her , blocking her view of the invention . 256 | Tya is the art director for Coalesce , and has been with the agency for three years . 257 | Jane is the mother of Fielding , Gordon , Jr. and Kimbrell Smith Teegarden . 258 | A sign here says La Florida . 259 | Depp is perfect for Barnabas , but Im not sure how I feel about Burton . 260 | GT5 will also be a big event . 261 | The Bull Pen expands to two hours , 1-3 p.m. , with the arrival of football . 262 | Sharing culture is a service to humankind . 263 | Mbeki was wrong on AIDS , and he 's wrong on this . 264 | Titles are arranged alphabetically by author last names . 265 | Between 1949 and 1960 , The Company introduced to Ontario , Speed Queen washers and dryers , Emerson and Motorola electronic products , Gibson home appliances and to Canada , Jenn-Air cooking equipment , Sub-Zero refrigerators , Deep Freeze refrigerators and freezers , Weber barbecues and the Panasonic line of electronics . 266 | Phrase searches can be made within quotation marks . 267 | The KDE web site also lists current job openings . 268 | Physically , solutions are an university of principal buttons , or keys . 269 | Report data was collected through two steps . 270 | Bear shot on Hinchinbrook Island , October 14 , 2001 . 271 | Bluehost excels in customer service . 272 | Blogs are not personal homepages . 273 | Mark has been invited to speak to organizations , businesses , at major project management conferences including Project World , and PMI chapters . 274 | Advent Children belongs to Square Enix . 275 | Mr Pullicino is Minister of Resources and Rural Affairs . 276 | Terrorist is first in fleet . 277 | Specific rates apply to on-site testing . 278 | There were 14 deaths during the journey , nine of which were children under 5 years of age . 279 | East Asia is a region of economic dynamism . 280 | To Schuller , sin is merely the lack of self-esteem . 281 | As of the 2000 census , the CDP had a total population of 6,188 . 282 | He felt in his very bones that for the Melanesian , Westernisation was the road to death . 283 | Hurricane Ivan has been a catastrophe for Grenada . 284 | H was also showing ZED , its new series of small format analog mixers for live performance or recording . 285 | Products passed the certificates of ISO9001 , HALAL , ... 286 | Top-notch caught masturbating stories porn . 287 | On Thursday , November 23 , Havel spoke to a crowd of more than 300,000 . 288 | Histoplasmosis is common in the Northeast . 289 | Requests are processed on a first-come first-served basis . 290 | On July 15 , 1993 , Gordon commenced an action in Quebec , and on July 16 in Ontario . 291 | Witch is advantageous in two respects . 292 | Space limited to 10 pairs . 293 | Natural progesterone is not only for women . 294 | Science magazine has a special issue on coherence . 295 | Torchwood is also up for an award . 296 | Big companies always need sacrificial lambs , said Penelope . 297 | Of these , therefor ipv6 is solely in use . 298 | Guests enjoyed a selection of Big Band music , toured the exhibit , and had the opportunity to interact with Milton Greenberg and the other speakers from the symposium . 299 | First , estrogen has a protective effect on bone , and reduced levels of the hormone trigger bone loss .|Estrogen has a protective effect on bone . 300 | DRDA provides opportunities for education , peace building , income generation and hope . 301 | Delivery should be made during weekdays . 302 | Fur went down three musketeers . 303 | AJ also starred in The Virgin Suicides . 304 | An order was entered on August 1 , 1992 . 305 | Operating expenses have remained under control . 306 | Hearings are held in Philadelphia , Pittsburgh , and Harrisburg . 307 | 2 Wells , Creek runs thru property . 308 | Downtown Asheville is the cultural center of Western North Carolina . 309 | Blackjack was originated in France , and is one of the most popular casino games ... 310 | Complaints were sent by post and email . 311 | Devon may not be left by approbation . 312 | 36 spoke alloy rims . 313 | The Free Shipping applies only to the Lower 48 states . 314 | Page loaded in 0.0714721679688 seconds . 315 | DVBT is used by 152 users of Software Informer . 316 | Robert Gates is still secretary of defense . 317 | Similarly , surveys must show empathy . 318 | Rick is hiring a designer . 319 | Pressure might work for ice skaters , but it 's not much help for sleds , skis , snowboards , or any other device that slides on a large , flat surface . 320 | Canned peas are often used as a garnish . 321 | Headquarters are located in Vienna , Austria in Central Europe . 322 | Shane is in town , making tables for Graceland in our garage . 323 | 576 CORPORATION is currently located in DE . 324 | Amino acids can have a powerful impact on depression , with minimal side effects . 325 | A lab test is the most reliable means of detection . 326 | Research is also being conducted on meningiomas , a more benign tumor that nonetheless is capable of invading into the normal brain . 327 | Student membership provides a range of dedicated services , which enable students to borrow books and gain access to information that is n't available in the public domain . 328 | The Maharaja requested Pt . 329 | Staff are available by appointment . 330 | God 's World is full of opportunities . 331 | Nugan turns to Dalineous . 332 | O'Brien advanced to third . 333 | Regional differences must be taken into account . 334 | Tabor was born in Holland , Vermont . 335 | Deployment means traveling to Antarctica . 336 | Like Ronald Reagan in 1976 , Mitt Romney is not part of the Washington establishment . 337 | Cap rate is watchful for capitalization rate . 338 | Restaurants give food to food banks . 339 | Dave grew up in Kitchener , Ontario . 340 | LANXESS does not conduct fundamental research . 341 | Didnt think that was in question . 342 | Mark Young was born in New Zealand , but has lived for more than half his life in Australia . 343 | The CIPROFLOXACIN could be a logical suspect . 344 | Beginning in 1951 , Axelrod conducted research on codeine , ephedrine , methamphetamine , and morphine . 345 | Grand Final is held at the Melbourne Cricket Ground . 346 | In the absence of the Chairman , the Vice-Chairman will chair meetings of the Board of Directors . 347 | 2003 Jamie acted in Threat Matrix . 348 | Anger seethed in Steven 's chest , obscuring his vision and drowning out the last of court . 349 | 0146.0005 is now available for Ad-Aware Anniversary Edition . 350 | Collective bargaining is governed by the National Labor Relations Act . 351 | Dolby 2.0 stated on cover , but only the 2.0 is available . 352 | Mr. Rogers specializes in renewable energy , political intelligence and cross border transactions . 353 | The VCAA makes a recommendation to the Chancellor . 354 | Salinity can also be affected by urbanization . 355 | 7. Keep out of reach of children . 356 | The Hummingbird Centre features plenty of musical entertainment . 357 | A newspaper was published here in 1851 , and in 1869 the first railroad reached town . 358 | Military service is often a family tradition . 359 | After all , Gimp is best learned through curiosity . 360 | Smokey Bones is located in Fort Myers . 361 | Cover and pages are in great condition . 362 | Garland is a town located in Tipton County , Tennessee . 363 | 3i has acquired Civica , a market leading provider of services to the public sector . 364 | Trondheim is a city of schools , with many educational institutions . 365 | Instruments includes articles on early instruments , the development and form of the orchestra and of course the instruments themselves . 366 | Curently , production is running 20-25 days . 367 | Dr. Erwin joins Drs . 368 | Marlin was rushed to Hermann Hospital , but to no avail . 369 | Weve always killed rattlesnakes . 370 | The Open-air Museum is closed on 24 and 31 December . 371 | Living bone consists of three layers . 372 | LOL just had an impulse . 373 | The Finance Committee will look at options . 374 | Public transport consists of the Metro Bus , and these form important means of transport for non-drivers and tourists . 375 | Weiskopf utilizes elements of jazz and classical music , skillfully integrating improvisation and written themes . 376 | Flow is obviously very important in poetry . 377 | Jorge works for Marriott , and Dea gets all the love she can from her parents , grandparents and great grandparent . 378 | Page loaded in 0.158607006073 seconds . 379 | Riven is a trademark of Cyan , Inc. 380 | Mini skirts are not acceptable attire . 381 | World Congress hosted in Beijing , China . 382 | Alkalines are an example of primary cells . 383 | A state is known for Ayurveda , a traditional patterns of medicine which has found the fresh market in the growing holidaymaker industry . 384 | Fujian claims that in doing so , Customs made an error of law . 385 | Stanhope has an extensive background in accounting , commercial property management and commercial development . 386 | A receding hairline is also very common among men . 387 | China 's internet prototypes connect to internetcafe . 388 | Ava was being flown by midsummer . 389 | Lucia was established in 2001 , destiny by jim brickmann destiny church ok with destiny capital destiny capital management inc. 390 | Epinions has the best comparison shopping information on Royal Air Maroc. . 391 | Trendy look with three-quarter sleeves . 392 | Two other brothers work for low wages . 393 | The Lord Jesus says in Matthew 16 :18 , on this Rock I will build My church . 394 | The FAQ was last updated on July 6 , 2004 . 395 | Keyword-dense webpages are useful for Web presence . 396 | Pat watt is the uncle of www.workbizfromhom.com . 397 | Maxi did all kinds of flowers . 398 | Licensed orders are connected to realistic technical support . 399 | HOODIA did n't get along with Sunni extremists . 400 | Husband and wife lived in HARRISON Twp . 401 | Guy Kawasaki has a great post up about Kiva . 402 | Form 53 will no longer be accepted for enrollment . 403 | Classes are also available in Kennewick , Bellingham , Yakima , Spokane , Poulsbo , Tacoma and Alaska through Children 's video and teleconferencing program . 404 | Cipro can also cause gas , etc . 405 | Macrame is great for hemp necklaces . 406 | A good fit is essential for children 's eyeglasses . 407 | The TIN is one such piece of information . 408 | Alex danced with Mrs. Lambert . 409 | Livulpi flied out to cf . 410 | Price reduced to $ 459,900 . 411 | Monique is n't being busted by representative . 412 | Beginning with this version , KateOS addresses the needs of multiple user groups . 413 | Jay-Z is currently CEO of Def Jam , a label held by Universal Music while Beyonc is in contract with Sony . 414 | Candles are n't just for birthdays . 415 | Kasih means love in the Indonesian language . 416 | Streams swell in springtime , carrying water from snowmelt to valleys . 417 | Cross Purposes will normally consist of 6-8 members , a mixture of experienced members and young people engaged in peer ministry who have received appropriate training . 418 | St . Joan of Arc has developed a reputation for outstanding music , both during Masses and in special events . 419 | Key 12 is the law of reversal . 420 | Anna originally auditioned for Good Morning , Who Are You ? but very wisely turned the part down after reading the script . 421 | Laser printed for best quality . 422 | In a two-year time frame , his body has changed , Burke said of Bernier . 423 | Augusta is situated on M-96 , about five miles northeast of Galesburg , and about seven miles west of downtown Battle Creek . 424 | Journalists do n't accept terms . 425 | Workshops are delivered to community members , college students , and young children . 426 | Proper attire consists of closed toe shoes , shirt , and shorts . 427 | Miniature trees usually reach a height of 6 to 8 feet , but still produce full size fruit . 428 | Discussion includes effects of wetland destruction . 429 | Joe Gibbs autographed Football Mini Helmet . 430 | Things have gone kaput . 431 | Women workers are particularly active in social education . 432 | VAT charged at 17.5 % . 433 | Reservation deposits are non-refundable upon cancellation . 434 | Jack was a man of taste , who never forgot his roots as a studio animator . 435 | Logistics can be difficult in India . 436 | Eastern part will be absorbed by the USA , Japan and China . 437 | Lacy might have done a better job . 438 | Bossier was game in defeat , dressing roughly 30 players . 439 | Clinton Township is a township in Butler County , Pennsylvania , United States . 440 | Impatience is another attribute of selfishness . 441 | Residents can expect to plays games , watch movies , grab coffee and other treats , and just sit around and talk - all in Spanish of course . 442 | Grades vary enormously with ore bodies . 443 | Digesters are now running on forage crops , in some cases requiring no slurry component at all . 444 | Yoga poses for abs . 445 | Cuvee is located in Boston . 446 | The River Adur has its source in the South Downs , southern England and cuts its way through the soft chalk to its mouth at the port town of Shoreham-by-Sea . 447 | Free parking is available in Lot A , next to University Hall , for all performances . 448 | Domestic prices fell in response , as did domestic production . 449 | Karl is also a current member of the Screen Actors Guild . 450 | In 2003 , Savio filed for divorce . 451 | COD accepted for postage . 452 | The NowCast is not currently available for Islip , New York . 453 | Domain advance are reviewing younger and cheaper . 454 | Action is possible in 2007 . 455 | Quin has a number of different moods , all of which are adorable . 456 | The ETV Web Site has additional information on ETV , including general program topics , as well as detailed information on the ETV verification centers . 457 | Mike Knox was eliminated by HBK. 458 | Dr. Anthony came to Lauderdale County , April 4 , 1843 , and engaged in the practice of medicine , which he still successfully continues . 459 | John Gerrard Oil Stick Work closes in 34 days . 460 | First , hair loss can be a side effect of diabetes . 461 | Abstract algebra is the study of operations , their properties , and the structures that support them . 462 | Silence fell on the Congregation . 463 | The ACC has just 7 . 464 | British media circulate in Ireland . 465 | Hensley pinch hit for Sevilla . 466 | Early experiences also play a role . 467 | Google maps is definitely better than Yahoo , MapQuest , and MSN. 468 | Registrations will be allocated on a first-come , first served basis , so register early ! 469 | Almost all of them are for online poker , or online casinos . 470 | Bond singled to right field . 471 | Interment will be made in Middletown Cemetery .|Interment will be made in Middletown Cemetery , Middletown . 472 | Instead of long-winded posts like this one , brevity is the soul of Twitter . 473 | Born about 1887 , Doyle died in 1945 . 474 | France is in civil war . 475 | Frankly , Michael Johnson is talking garbage . 476 | Price goes up in April . 477 | Bargaining is acceptable in Israel , but not everywhere . 478 | Pass-Guaranteed is your source for the Cisco 350-024 exam . 479 | Nectar is provided for hummingbirds . 480 | Steele is president of the Oglala Sioux Tribe . 481 | Destiny is just an excuse for bad management , Foege said in deploring those who believe the worlds current state of affairs is simply the consequence of some natural order . 482 | Process improvement is about business value . 483 | 25 learn to make tamales . 484 | Solis is co-founder of the Social Media Club , is an original member of the Media 2.0 Workgroup , and also is a contributor to the Social Media Collective .|Solis is co-founder of the Social Media Club , is an original member of the Media 2.0 Workgroup , and also contributes to the Social Media Collective .|Solis is co-founder of the Social Media Club , an original member of the Media 2.0 Workgroup , and a contributor to the Social Media Collective and http : .org .|Solis is co-founder of the Social Media Club , is an original member of the Media 2.0 Workgroup , and also is a contributor to the Social Media Collective and ConversationalMedia .org . 485 | MICHAEL typed in obsession , and it , too , failed . 486 | Heart , Hibbard told the Boston Globe in April . 487 | Wines and refreshments were placed on a table , on which the writings were displayed , ready for signature . 488 | Mice live far from water . 489 | Homes is more like mansions . 490 | After receiving the district 's determination , petitioner commenced this appeal on June 6 , 1991 . 491 | Cliff Robertson plays host to Jimmy Stewart , Bob Hope , Robert Stack and George Bush , and segments highlight Marine ace Archie Donahue and General Jimmy H. Doolittle . 492 | Duncan had not been mechanizing a wheatgerm . 493 | Pets welcome only by previous arrangement . 494 | Fosrenol is in the FDA pregnancy category C. 495 | A third lesson was to trust people . 496 | Chapter 3 has entries for Major Generals . 497 | A background provides a number of benefits , including skills , contacts and possibly equipment . 498 | Moats were also used for moat spans . 499 | Impeachment has been presented to Congress . 500 | John wrote in Ephesus . 501 | In 1920 , Gidlow moved to New York City , where she worked for Pearson 's , a progressive magazine . 502 | Thousands of children die every year in pools , and it simple does not have to be that way . 503 | Polls can be added to posts . 504 | Fees can be paid in three equal instalments . 505 | Interior living space ranges from approximately 1,500-2,300 sq . 506 | Cheyenne is also the largest city in the Equality State , supporting a population of around 53,000 people . 507 | LASP is one of a few . 508 | The Hurricanes is a story of grit , strength , racial boundaries and sheer determination as this ragtag team bands together in the face of adversity . 509 | Beach plums grow in sandy soil , even dunes , from New Jersey to eastern Canada . 510 | Roles can be used to define access control . 511 | STIC will provide access to a wide range of materials . 512 | The Start button will turn into a Stop button . 513 | Bugsy Siegel did not reside in Las Vegas , broadway shows in las vegas cannot annex an unincorporated township .|Bugsy Siegel did not reside in Las Vegas . 514 | 3. Stay out of yellow zones . 515 | Steve was named Chairman of the Board instead . 516 | Programs flooded for compressed advertising , current as pen or cookies , centralize similarly a worth mess of forcing wages which are rarely represented spyware , and the robots they are avoided for are the most innocuous , i should say . 517 | Data analysis was performed using GraphPad Prism . 518 | MSF provided support to health facilities , trained staff and donated drugs and diagnostic tests . 519 | Trade organizations are comprised of corporations , private companies and branches . 520 | A non refundable deposit is required upon initial registration . 521 | Prosecco is Italian for bubbly . 522 | 1991 ROYN 0.0.20 hatched during 1991 . 523 | Vashti moved to Muskogee , Oklahoma by 1920 and by 1930 she was living in West Palm Beach , Florida with her son . 524 | Place left hand on waist , remembering to keep the back straight .|Place left hand on waist . 525 | Final restoration is completed before project completion . 526 | The Admiral was of 600 . 527 | This is primarily because Apple , unlike Microsoft , is not talking about this next generation platform publicly , Rob Enderle writes for MacNews World . 528 | States is more sympathetic to the Armenians . 529 | 235 have paid 1996 dues . 530 | And , luckily , friends stepped in as supporters . 531 | Buggy wins the match in 2 sets . 532 | God 's eternal purpose is being carried out in Christ Jesus . 533 | John Hawkins is just a good guy . 534 | ChangeLog removed from CVS. 535 | Ron was born in Richmond , Ellen was born in Richmond . 536 | Ron was born in Richmond , Ellen was born in Richmond . 537 | Listings are based on self-reported information . 538 | Joey opened with ACETONE , then played TOPLINeS , then SHEWERs . 539 | Vanunu made a statement in English . 540 | Choice Hotels offers the best in rooms , no matter where you are traveling to . 541 | A third issue is due in 2008 . 542 | Maroua is the capital city of the Far North Province of Cameroon . 543 | Unemployment rose to half a million . 544 | Video representations are enough meant for entertainment . 545 | A tribe is any group of people , large or small , who are connected to o...|A tribe is any group of people , large or small , who are connected to one another , a leader , and an idea . 546 | Deahl lives in Hamilton , Ontario . 547 | Mikael is a married guy from California , USA . 548 | 6 entries found for engineer . 549 | A child is a gift from the Creator . 550 | Page generated in 0.011743 seconds . 551 | Cholesterol is a major component of myelin , and glia-produced cholesterol serves as a glial growth factor in synaptogenesis . 552 | On Monday , the Globes will likely follow suit . 553 | Baumatic offer an extensive range of dishwashers , in order to suit the tastes and requirements of each and every individual . 554 | The Zohar rose has 13 petals . 555 | Guy is the co-creator of Nancy . 556 | Ling was decided in 1985 . 557 | As an independently funded institution governed by a council , Gresham does not charge fees for online study , and it can thus judge the medium purely on its merits . 558 | FLEM ! is hosted on Keenspot . 559 | Even money will be paid for a blackjack . 560 | Costco is opening its outlet in Pocatello , Idaho . 561 | Hunt begins promptly at 1 p.m. 562 | Detail is of utmost importance .|Details are of utmost importance .|Detail is of utmost importance , and the specific context each word and sentence of will carefully considered in the translation process . 563 | Designers should also be aware that at the moment , Red Bubble only prints on white shirts . 564 | Bacteria are a diverse group of single-celled organisms , most of which are microscopic . 565 | Digital asset management is n't just for media companies . 566 | Attendance policy is established by individual faculty . 567 | Situated just 350m , across a road , from sandy Nabq Bay , a bus runs to Na'ama Bay , 30 minutes away* . 568 | Play is an intergral part of childhood . 569 | 82 is a freelance writer in San Francisco . 570 | Lauren does n't slings , when you sell . 571 | Anglian Kings , Felix was the first Bishop of the East Angles . 572 | Oxygen supplementation may be advised in severe cases . 573 | KTLA will probably only get a 47 share . 574 | 6million move the Mets . 575 | Neil is kidnapped by the Shredder , the turtle guys have to rescue her . 576 | Fines can range from $ 25 to $ 500 . 577 | A transliteration would read G . 578 | Dr. Herman is a fellow of the American College of Surgeons , and is certified by the American Board of Plastic Surgery . 579 | A full UK tour will follow later in 2009 . 580 | 67 season rolled to a close , super-heroes were falling by the wayside . 581 | Amateur astronomers must check out the Lowell Observatory , made famous by the discovery of Pluto in 1930 . 582 | Search engines provide a multitude of results , but may not provide enough distinction between relevant and irrelevant articles .|Search engines provide a multitude of results . 583 | Despair can quickly turn into doubt . 584 | Furthermore , documentation must be produced to archival standards . 585 | Dean does n't own the Press Democrat . 586 | A PR person specializes in Communication , but must also be well-educated in diverse fields . 587 | Great men were found only in great nations . 588 | Physically , uploads are an shred of short buttons , or keys . 589 | Terry drew in a sharp breath , then trotted off to find one of the paramedics . 590 | Courtesy cars are available on request . 591 | Golf is run by morons . 592 | Beauford moved to Boston , Massachusetts when he was a teenager . 593 | He served as a professor in archaeology to Colombia University from October , 1921 to April , 1922 whereupon be became Associate Curator at the Freer Gallery of Art , Smithsonian Institute , Washington , D.C. for the next twenty years , nine of which were spent in China , until his death on June 18 , 1942 . 594 | For nearly three decades , Borla has pushing the boundaries of performance exhaust systems . 595 | A ballot is usually conducted for 30 days , however this period is flexible . 596 | Registration is now open for Session # 2 . 597 | The Homes are inferior in Davie , Cabarrus , Iredell , Transylvania , Wilson , Yadkin , Yancey . 598 | Landsat 7 was successfully launched in April 1999 . 599 | Punta Prima beach is about a 20-25 minute walk . 600 | According to the same Census Bureau statistics , of the 23 who served in Korea , three were veterans of World War II . 601 | The Transition Program features a wide variety of classroom , campus , and community activities . 602 | Energy can be saved in a number of ways . 603 | Image hosting can be quite a lucrative market . 604 | Harold Pinter is a master of language , but the play was apparently inspired by a very brief and silent scene . 605 | Automated films are installed bots . 606 | A 30 day supply is only $ 2.57 . 607 | CYPROHEPTADINE is on for IBD. 608 | Albert also has 12 magazines . 609 | WikiAnswers What - are the requirements for Stanford . 610 | Crappies have been biting on Julia . 611 | Commercial reproduction requires written permission . 612 | Inns were scattered in various corners , noisy with the din of man and beast on market days . 613 | Depositions can also be taken from 3rd parties . 614 | Video discs are judiciously meant for entertainment . 615 | Greg heads up Business Development , Sales and Client Service efforts at Become .com . 616 | Doctors also look for a teachable moment . 617 | Here in Texas , Poe had a reputation for being a bit of a racists . 618 | The AZITHROMYCIN was for a bladder infection . 619 | Cells can be selected in a variety of ways . 620 | Communication is seeming for sharp results . 621 | David was a king of wars , but at the moment , the LORD had given him a while of rest . 622 | So no , Zambrano was not a given . 623 | Men are not lost souls . 624 | 25 minutes drive to Sarasota . 625 | Depths range from 10 to 60 feet . 626 | Greece is not a very beautiful place . 627 | Cross listed as BIOL 3525 . 628 | In this study , electro-acupuncture was used instead of traditional acupuncture . 629 | Brownfield properties vary in size , location , age , and past use . 630 | A memorial fund will support summer research . 631 | Of the island 's 900 plant species , almost a third is endemic to Mauritius .|Of the island 's 900 plant species , almost a third are endemic to Mauritius . 632 | Credit Unions category provides listings of relevant and useful websites of Tuvalu . 633 | At the age of thirteen , Dr. Lord went to Brookville , Indiana , under the patronage of Dr. David Oliver , a connection of the Spencer family , well known in the pioneer history of Ohio . 634 | Dehydration can significantly contribute to heat cramps , heat exhaustion , and eventually heat stroke . 635 | Cameron dies in prison , leaving Tessa seven million dollars and a Sydney mansion . 636 | Bleach made from urine . 637 | Required items are shown in blue . 638 | Reilly did with Michael . 639 | Audio-visual equipment can be supplied on request . 640 | Satire can be a lucrative business . 641 | Cooperative Education opportunities are available in the Marketing program . 642 | Sullivan has found a 12cm . 643 | Knowledge is not power , Knowledge applied is power . 644 | Billy West is also a vegan . 645 | Vampire Weekend is an album by Vampire Weekend . 646 | Washington DC is on Eastern Standard Time zone . 647 | Wartburg is reviled by thunderstorm . 648 | Ordering information can be obtained from Cambridge University Press . 649 | Nearly all foods contain a mixture of vitamins . 650 | Brent is around $ 72 . 651 | Lily is also training to be a private investigator . 652 | Condition is seeing lift . 653 | And even in uncomplicated open-heart surgeries , recovery times are weeks to months . 654 | Night Watch was a huge hit in Russia , out grossing even Lord of the Rings : The Return of the King in that country . 655 | Silence of the Lambs was sort of scary . 656 | Kim tries to remember Rebecca . 657 | Kansas pictured in 11 panels . 658 | Vanuatu is weaker than Xanax . 659 | Close encounters in India . 660 | Mrs. Quinn was of Lutheran faith . 661 | Image File history File links Savoie_flag . 662 | Hair Design is a hair salon in Wilmington . 663 | At some point , Edna had started pole dancing . 664 | FIG. 2B is a side elevational view of FIG. 665 | Sri Radha used to experience Mahabhava . 666 | As a freelance journalist , Cathi has contributed to Mojo , Uncut , Kerrang ! , GQ and many more . 667 | A It will be VERY different . 668 | Two were found dead at Camp Taji , Iraq , 11 days apart . 669 | Bible contains a lot of history , specially about civilizations in which present-day archaeology is also interested . 670 | Abaza is related to Abkhaz , Adyghian , Kabardian , and Ubykh , which constitute the Abkhazo-Adyghian , or Northwest Caucasian , language group .|Abaza is related to Abkhaz , ...|Abaza is related to Abkhaz , Adyghian , Kabardian , and Ubykh , which constitute the Abkhazo-Adyghian , or ... 671 | Cenobites should 've been closer to number one . 672 | The Society has a large archive of information , photographs and photographic slides pertaining to the town , and also a record of burials in the West Road Cemetary from 1922 - 2000 . 673 | Two funds have been raised to date . 674 | OHIO DEBT CONSOLIDATION is not a god . 675 | Animals are being programmed for disaster , for extinction . 676 | Western blot was carried out using standard techniques . 677 | The WTO sets the rules for trade , but has no rules about procedures for its meetings about how they should be run and organised . 678 | During those four magical nights , Donny had quite a bit of fun . 679 | Quayle now lives in Phoenix , Arizona with his wife Marilyn . 680 | Summons ordered for Geo . 681 | Death has changed dramatically in recent decades . 682 | Graham singled to third base . 683 | El Salvador is traditionally an agricultural country . 684 | Mrs. Graham was full of thoughts , she knew Mr. Stebbins would not understand , and Mr. Stebbins felt it was an unpleasant subject . 685 | Posts : 59 . Cyrus is on a distinguished road . 686 | Kristina is a Champion for 1 Cause . 687 | Elevators are located in Grand Court , next to Dillard 's & Nordstrom and in Bay Street . 688 | Intertrigo have been eroded by generics . 689 | Close Realty provides the following for GRESHAM , OR real estate agents , GRESHAM , OR teams of realtors , GRESHAM , OR realty offices : real estate websites , idx , real estate email marketing , internet marketing for realtors . 690 | Social movements have occurred throughout history . 691 | Beings are owners of kammas , student , heirs of kammas , they have kammas as their progenitor , kammas as their kin , kammas as their homing-place .|Beings are owners of kamma , heir to kamma , born of kamma , related through kamma , and have kamma as their arbitrator . 692 | Yamaguchi is originally from Miyazaki , Japan and lives in Kettering , Ohio with his wife Miki . 693 | ZIP files are there for a reason . 694 | Tom has a list of suspects , but he 's not ready to name names on the air . 695 | Style may differ slightly from picture above . 696 | Two of us lived in West Bend , me and the German guy and three guys lived not far from Milwaukee . 697 | AJAX does have downsides . 698 | Audio is played through electro-acoustics . 699 | Wimax is now commercially available in Malaysia . 700 | Self-talk is also a form of self-therapy . 701 | Unknown tongues are a sign of God 's anger . 702 | Lilah crawls on top of him . . 703 | Rica has to offer in nightlife . 704 | Gabrielle spoke with great admiration . 705 | Four were forced back to Indonesia , and three sank . 706 | Gold coins are of 999 standard , with 3.11 grams of pure gold . 707 | Ahlberg was free on bond . 708 | Parking is available under Building 12 . 709 | Statistical analysis was done using SYSTAT software . 710 | Well , summer vacation season is the prime time for home burglaries . 711 | Apartment buildings gave way to Victorians . 712 | Tomatoes have high levels of antioxidants , they are a good source of fiber and they are known to help flush out waste and fat from your body . 713 | In her role , Stephanie focuses on sales development , opportunity development , administration and resource management . 714 | Calibration is made by Kalman filtering . 715 | Attachments are a gateway for spammers , and large attachments in bulk numbers can cause problems for the Internet Service Provider . 716 | -------------------------------------------------------------------------------- /project/Release.scala: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | import Keys._ 3 | 4 | import sbtrelease._ 5 | import ReleasePlugin._ 6 | import ReleaseKeys._ 7 | import ReleaseStateTransformations._ 8 | import Utilities._ 9 | 10 | import com.typesafe.sbt.SbtPgp.PgpKeys._ 11 | 12 | object ReleaseSettings { 13 | val defaults = releaseSettings ++ Seq( 14 | releaseProcess := Seq[ReleaseStep]( 15 | checkSnapshotDependencies, 16 | inquireVersions, 17 | runTest, 18 | setReleaseVersion, 19 | commitReleaseVersion, 20 | tagRelease, 21 | publishArtifacts.copy(action = publishSignedAction), 22 | setNextVersion, 23 | commitNextVersion 24 | )) 25 | 26 | lazy val publishSignedAction = { st: State => 27 | val extracted = st.extract 28 | val ref = extracted.get(thisProjectRef) 29 | extracted.runAggregated(publishSigned in Global in ref, st) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.0 2 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.typesafe.sbt" % "sbt-pgp" % "0.8.1") 2 | 3 | addSbtPlugin("com.github.gseitz" % "sbt-release" % "0.8") 4 | -------------------------------------------------------------------------------- /release/release_notes_relnoun.md: -------------------------------------------------------------------------------- 1 | # Release Notes 2 | 3 | ## Version 2.2.0 (01/15/2016) 4 | 5 | * Extended patterns for pronouns 6 | * "His father John, => (John; [is] father [of]; Him)" 7 | * Updated relnoun_prefixes list 8 | * (Selective Prefix check) Less dependency of relnoun_prefixes : Better extractions in the sentences with relational prefixes 9 | * "Iranian film director Jafar Panahi => (Jafar Panahi; [is] film director [of]; Iran)" 10 | 11 | ## Version 2.0.0 (12/30/2015) 12 | 13 | * Allows nnp relation words (previous version only allowed nn relation words) 14 | * Works well with Demonyms 15 | * "Indian president Mukherjee => Mukherjee; [is] president [of]; India" 16 | * Modified the patterns to allow reloun_prefixes (500+ prefixes list as of now) 17 | * "West Bengali Chief Minister Mamata Banerjee => (Mamata Banerjee; [is] Chief Minister [of]; West Bengal)" 18 | * Title Extractor (configurable) 19 | * "President Obama was born in Hawaii on August 4, 1961 => (Obama; [is] President [of]; [UNKNOWN])" 20 | * AppositiveExtractor2 21 | * "Lauren Faust, a cartoonist, => (Lauren Faust; [is]; a cartoonist)" 22 | * OfCommaExtractor 23 | * "The father of Michael, John, => (John; [is] The father of; Michael)" 24 | * Distinguishes the [from] & [of] extractions 25 | * "Indian player Sachin Tendulkar received the Arjuna Award in 1994. => (Sachin Tendulkar, [is] player [from], India)" 26 | * "United States President Barack Obama gave a speech today. => (Barack Obama, [is] President [of], United States)" 27 | * Includes a File Mode 28 | -------------------------------------------------------------------------------- /src/main/resources/edu/knowitall/chunkedextractor/confidence/relnoun-confidence.txt: -------------------------------------------------------------------------------- 1 | Intercept -0.0 2 | arg1 contains PRP$ -5.002240337280573E-5 3 | arg1 contains pronoun -0.0 4 | arg1 is proper 0.0 5 | arg2 contains PRP$ -5.002240337280573E-5 6 | arg2 contains pronoun -0.0 7 | arg2 is proper -0.009364550346926247 8 | conj before rel 0.0 9 | extr covers sent -8.773456214182152E-5 10 | np after extr -0.0 11 | np before extr -0.23351207210461408 12 | prep after extr 0.0 13 | prep before extr 0.0 14 | sent ends w/ extr -8.773456214182152E-5 15 | sent starts w/ extr 1.028710313064414 16 | verb after extr 0.0 17 | which|who|that before rel -0.2586414256565373 18 | -------------------------------------------------------------------------------- /src/main/resources/edu/knowitall/chunkedextractor/demonyms.csv: -------------------------------------------------------------------------------- 1 | Aalborgenser,Aalborg 2 | Aberdonian,Aberdeen 3 | Abkhaz,Abkhazia 4 | Abkhazian,Abkhazia 5 | Abrenian,Abra 6 | Abruzzese,Abruzzo 7 | Abyssinian,Abyssinia 8 | Acadian,Acadia 9 | Acadien,Acadie 10 | Acadienne,Acadie 11 | Acarnanian,Acarnania 12 | Acehnese,Aceh 13 | Achaean,Achaea 14 | Acreano,Acre 15 | Adelaidean,Adelaide 16 | Adelaidian,Adelaide 17 | Aeginetan,Aegina 18 | Aethaean,Aethaea 19 | Aetolian,Aetolia 20 | Afghan,Afghanistan 21 | African,Africa 22 | Afro-Eurasian,Afro-Eurasia 23 | Agote,Navarra 24 | Aguascalentense,Aguascalientes 25 | Akkadian,Akkadia 26 | Aklan,Aklan 27 | Aklanese,Aklan 28 | Aklanon,Aklan 29 | Akronite,Akron 30 | Alabamian,Alabama 31 | Alagoano,Alagoas 32 | Åland Island,Åland Islands 33 | Ålandish,Åland 34 | Alaskan,Alaska 35 | Alavense,Álava 36 | Alavés,Álava 37 | Albaceteño,Albacete 38 | Albacetense,Albacete 39 | Albanense,Albacete 40 | Albanian,Albania 41 | Albasitense,Albacete 42 | Albay,Albay 43 | Albayan,Albay 44 | Albayano,Albay 45 | Albertan,Alberta 46 | Alcarreño,Guadalajara 47 | Aleppine,Aleppo 48 | Aleut,Aleutian Islands 49 | Aleutian,Aleutian Islands 50 | Alexandrian,Alexandria 51 | Algerian,Algeria 52 | Alicantino,Alicante 53 | Allahabadi,Allahabad 54 | Almerian,Almería 55 | Almeriense,Almería 56 | Alorn,Aloria 57 | Alpine,Alps 58 | Alsatia,Alsace 59 | Alsatian,Alsace 60 | Amalfitan,Amalfi 61 | Amapaense,Amapá 62 | Amazonense,Amazonas 63 | Amazonian,Amazon River And Region 64 | American,United States 65 | American Samoan,American Samoa 66 | Americana,Americas 67 | Amsterdammer,Amsterdam 68 | Ancashino,Ancash 69 | Andalusian,Andalusia 70 | Andaman,Andaman And Nicobar Islands 71 | Andamanese,Andaman And Nicobar Islands 72 | Andhraite,Andhra 73 | Andhrudu,Andhra Pradesh 74 | Andorran,Andorra 75 | Andorrano,Andorra 76 | Angeleno,Los Angeles 77 | Angelino,Angeles 78 | Angelopolitano,Puebla 79 | Angolan,Angola 80 | Anguillan,Anguilla 81 | Ann Arborite,Ann Arbor 82 | Annapolitan,Annapolis 83 | Antarctic,Antarctica 84 | Antarctican,Antarctica 85 | Antiguans,Antigua And Barbuda 86 | Antillean,Antilles 87 | Antioquenian,Antioquia 88 | Antipodean,Antipodesa 89 | Antiquenian,Antique 90 | Antiqueño,Antique 91 | Aostain,Aosta 92 | Aostan,Aosta 93 | Apayao,Apayao 94 | Apayaonian,Apayao 95 | Apennine,Apennines 96 | Appalachian,Appalachia 97 | Apulian,Apulia 98 | Aquilonemtexian,University Of North Texas 99 | Aquitanian,Aquitania 100 | Arab,Haugesund 101 | Araber,Haugesund 102 | Aracajuense,Aracaju 103 | Aragonese,Aragon 104 | Arcadian,Arcadia 105 | Arequipeño,Arequipa 106 | Argentine,Argentina 107 | Argentinean,Argentina 108 | Argentinian,Argentina 109 | Argentino,Argentina 110 | Argive,Argos 111 | Århusianer,Århus 112 | Arizonian,Arizona 113 | Arkansawyer,Arkansas 114 | Arkie,Arkansas 115 | Armachian,Armagh 116 | Armenian,Armenia 117 | Armidilian,Armidale 118 | Arretine,Arretium 119 | Arriacense,Guadalajara 120 | Aruban,Aruba 121 | Arunachali,Arunachal Pradesh 122 | Arundense,Ronda 123 | Ashburnian,Ashbourne 124 | Ashevillain,Asheville 125 | Asian,Asia 126 | Assamese,Assam 127 | Asturian,Asturias 128 | Asturiano,Asturias 129 | Athenian,Athens 130 | Atlantan,Atlanta 131 | Atlantean,Atlantis 132 | Atlantine,Atlantis 133 | Auckland,Auckland 134 | Aucklander,Auckland 135 | Aurgitano,Jaén 136 | Auroran,Aurora 137 | Ausense,Vich 138 | Ausentano,Vich 139 | Ausonense,Vich 140 | Aussie,Australia 141 | Aust-Agding,Aust-Agder 142 | Austinite,Austin 143 | Austonian,Austin 144 | Australasian,Australasia 145 | Australian,Australia 146 | Australianb,Australia 147 | Australien,Australia 148 | Austrian,Austria 149 | Austro-Hungarian,Austria–Hungary 150 | Avileño,Ávila 151 | Ayachunano,Ayacucho 152 | Ayacuchano,Ayacucho 153 | Azawadi,Azawad 154 | Azerbaijani,Azerbaijan 155 | Azeri,Azerbaijan 156 | Azeri'S,Azerbaijan 157 | Azragi,Hazara 158 | Babylonian,Babylonia 159 | Bactrian,Bactria 160 | Badajoceño,Badajoz 161 | Badajocense,Badajoz 162 | Badger,Wisconsin 163 | Bæring,Bærum 164 | Bahamian,The Bahamas 165 | Bahraini,Bahrain 166 | Baiano,Bahia 167 | Bajacaliforniano,Baja California 168 | Bajan,Barbados 169 | Bajoran,Bajor 170 | Bajun,Barbados 171 | Bajuns,Barbados 172 | Balear,Illes Balears 173 | Balinese,Bali 174 | Balkan,Balkans 175 | Ballaratian,Ballarat 176 | Baloch,Balochistan 177 | Balochi,Balochistan 178 | Balochistani,Balochistan 179 | Baltic,Baltic Region 180 | Baltimorean,Baltimore 181 | Baltimoron,Baltimore 182 | Baluch,Balochistan 183 | Banana Bender,Queensland 184 | Bananabender,Queensland 185 | Bandungite,Bandung 186 | Bañezano,La Bañeza 187 | Bangalorean,Bangalore 188 | Bangkokian,Bangkok 189 | Bangla,West Bengal 190 | Bangladeshi,Bangladesh 191 | Barbadian,Barbados 192 | Barbudan,Antigua And Barbuda 193 | Barcelonense,Barcelona 194 | Barcelonés,Barcelona 195 | Barcelonian,Barcelona 196 | Barranquino,Barranco 197 | Barriga-Verde,Santa Catarina 198 | Barrovian,Barrow-In-Furness 199 | Barthélemois,Saint Barthélemy 200 | Bashkir,Bashkortostan 201 | Basilan,Basilan 202 | Basilanese,Basilan 203 | Basileño,Basilan 204 | Basotho,Lesotho 205 | Basque,Basque Country 206 | Bataan,Bataan 207 | Bataanese,Bataan 208 | Bataeño,Bataan 209 | Batangan,Batangas 210 | Batangueño,Batangas 211 | Batanic,Batanes 212 | Bathonian,Bath 213 | Baton Rougean,Baton Rouge 214 | Batswana,Botswana 215 | Bavarian,Bavaria 216 | Bay Of Plenty,Bay Of Plenty Region 217 | Bay Stater,Massachusetts 218 | Bedfordian,Bedford 219 | Beijinger,Beijing 220 | Beiruti,Beirut 221 | Belarusian,Belarus 222 | Belenense,Belém 223 | Belfast,Belfast 224 | Belfastian,Belfast 225 | Belgian,Belgium 226 | Belgrader,Belgrade 227 | Belgradian,Belgrade 228 | Belizean,Belize 229 | Bellifontain,Fontainebleau 230 | Belo-Horizontino,Belo Horizonte 231 | Bendigonian,Bendigo 232 | Beneventan,Benevento 233 | Bengalese,West Bengal 234 | Bengali,Bengal 235 | Benguet,Benguet 236 | Benguetian,Benguet 237 | Beninois,Benin 238 | Beotian,Boeotia 239 | Bergamasque,Bergamo 240 | Bergenser,Bergen 241 | Berkeleyan,Berkeley 242 | Berliner,Berlin 243 | Bermudan,Bermuda 244 | Bermudian,Bermuda 245 | Bernese,Bern 246 | Bessarabian,Bessarabia 247 | Beturiense,Badajoz 248 | Bharati,Bharat 249 | Bhutanese,Bhutan 250 | Biafran,Biafra 251 | Big Bender,Tennessee 252 | Bihari,Bihar 253 | Bilbilitano,Calatayud 254 | Biliran,Biliran 255 | Biliranian,Biliran 256 | Biot,British Indian Ocean Territory 257 | Birminghamian,Birmingham 258 | Bissau-Guinean,Guinea-Bissau 259 | Blefuscudian,Blefuscu 260 | Blue Hen'S Chicken,Delaware 261 | Bluenoser,Nova Scotia 262 | Boa-Vistense,Boa Vista 263 | Boeotian,Boeotia 264 | Bogotan,Bogotá 265 | Bohemian,Bohemia 266 | Boholan,Bohol 267 | Boholano,Bohol 268 | Boius,Boiohaemum 269 | Bolivian,Bolivia 270 | Bolognese,Bologna 271 | Boltonian,Bolton 272 | Bonaerense,Buenos Aires 273 | Bonaire,Bonaire 274 | Bonin Islander,Bonin Islands 275 | Bordelais,Bordeaux 276 | Boricua,Puerto Rico 277 | Bornean,Borneo 278 | Bornholmer,Bornholm 279 | Bosnian,Bosnia And Herzegovina 280 | Bosnian-Herzegovinian,Bosnia And Herzegovina 281 | Bosphoran,Bosporus 282 | Bosporan,Bosporus 283 | Bostonian,Boston 284 | Botswanan,Botswana 285 | Boulderite,Boulder 286 | Bouvet Island,Bouvet Island 287 | Brasileiro,Brazil 288 | Brasiliense,Brasilia 289 | Bratislavan,Bratislava 290 | Brazilian,Brazil 291 | Brescian,Brescia 292 | Breton,Brittany 293 | Brigantino,A Coruña 294 | Brightonian,Brighton 295 | Brisbanite,Brisbane 296 | Brisso,Brisbane 297 | Bristolian,Bristol 298 | Brisvegan,Brisbane 299 | British,United Kingdom 300 | British Columbian,British Columbia 301 | British Virgin Island,British Virgin Islands 302 | British Virgin Islander,British Virgin Islands 303 | Briton,Britain 304 | Brobdingnagian,Brobdingnag 305 | Bronx,The Bronx 306 | Brooklyn,Brooklyn 307 | Brooklynite,Brooklyn 308 | Brummie,Birmingham 309 | Bruneian,Brunei 310 | Bruttian,Bruttium 311 | Bruxellois,Brussels 312 | Bucharestian,Bucharest 313 | Buckeye,Ohio 314 | Budapester,Budapest 315 | Budapesti,Budapest 316 | Buddie,Paisley 317 | Buffalonian,Buffalo 318 | Bukidnon,Bukidnon 319 | Bulacan,Bulacan 320 | Bulacanese,Bulacan 321 | Bulaqueño,Bulacan 322 | Bulgarian,Bulgaria 323 | Burcés,Burgos 324 | Burgalés,Burgos 325 | Burgense,Burgos 326 | Burgués,Burgos 327 | Burgundian,Burgundy 328 | Burkinabe,Burkina Faso 329 | Burkinabé,Burkina Fasoa 330 | Burkinabè,Burkina Faso 331 | Burmese,Burma 332 | Burqueño,Albuquerque 333 | Burundian,Burundi 334 | Butternut,Tennessee 335 | Buzzard,Georgia 336 | Byzantine,Byzantium 337 | Cabo Verdean,Cabo Verde 338 | Cacereño,Cáceres 339 | Cadiceño,Cádiz 340 | Cadicense,Cádiz 341 | Caditano,Cádiz 342 | Cagayan,Cagayan 343 | Cagayanese,Cagayan 344 | Cagayano,Cagayan 345 | Cagayanon,Cagayan De Oro 346 | Cairene,Cairo 347 | Cairnsitte,Cairns 348 | Cajamarquino,Cajamarca 349 | Cajun,Lousiana 350 | Calabrese,Calabria 351 | Calabrian,Calabria 352 | Calcuttan,Kolkata 353 | Calgarian,Calgary 354 | Californian,California 355 | Californio,California 356 | Camarinense,Camarines Sur 357 | Cambodian,Cambodia 358 | Cameroonian,Cameroon 359 | Camiguin,Camiguin 360 | Campanian,Campania 361 | Campechan,Campeche 362 | Campechano,Campeche 363 | Campineiro,Campinas 364 | Canadian,Canada 365 | Canadien,Canada 366 | Canadienne,Canada 367 | Canarese,Karnataka 368 | Canarian,Canary Islands 369 | Canberran,Canberra 370 | Candelariero,Santa Cruz De Tenerife 371 | Cantab,Cambridge 372 | Cantabrian,Cantabria 373 | Cantabrigian,Cambridge 374 | Cantabrio,Cantabria 375 | Cántabro,Cantabria 376 | Canterbury,Canterbury Region 377 | Cantonese,Canton 378 | Cantuarian,Canterbury 379 | Canuck,Canada 380 | Cape Verdian,Cape Verde 381 | Caper,Cape Breton 382 | Capetonian,Cape Town 383 | Capitalino,Mexico City 384 | Capixaba,Espírito Santo 385 | Capizeño,Capiz 386 | Capizian,Capiz 387 | Caracense,Guadalajara 388 | Caraquenian,Caracas 389 | Cardassian,Cardassia Prime 390 | Cardi,Aberteifi 391 | Cardiffian,Cardiff 392 | Carian,Caria 393 | Caribbean,Caribbean 394 | Carinthian,Carinthia 395 | Carioca,Rio De Janeiro 396 | Carlislian,Carlisle 397 | Carlopolitano,São Carlos 398 | Carriacense,Guadalajara 399 | Carsonite,Carson City 400 | Carthaginian,Carthage 401 | Carystian,Carystus 402 | Castellano,Castilla-La Mancha 403 | Castellanomanchego,Castilla-La Mancha 404 | Castellonense,Castellón 405 | Castellonero,Castellón 406 | Castilian,Castile 407 | Catalá,Catalonia 408 | Catalan,Catalonia 409 | Catalán,Catalonia 410 | Catalonian,Catalonia 411 | Catamarqueño,Catamarca 412 | Catanduanean,Catanduanes 413 | Catandueño,Catanduanes 414 | Catarinense,Santa Catarina 415 | Caucasian,Caucasus 416 | Caucasic,Caucasus 417 | Cavitenian,Cavite 418 | Caviteño,Cavite 419 | Caymanian,Cayman Islands 420 | Cearense,Ceará 421 | Cebu,Cebu 422 | Cebuan,Cebu 423 | Cebuano,Cebu 424 | Central African,Central African Republic 425 | Central American,Central America 426 | Cephalonian,Cephalonia 427 | Cesaraugustano,Zaragoza 428 | Cesetano,Tarragona 429 | Ceutí,Ceuta 430 | Ceylonese,Ceylon 431 | Chachapoyano,Chachapoyas 432 | Chadian,Chad 433 | Chalaco,Callao 434 | Chalcidian,Chalcis 435 | Chandigarhi,Chandigarh 436 | Channel Island,Guernsey 437 | Channel Islander,Guernsey 438 | Chaqueño,Chaco 439 | Charlottean,Charlotte 440 | Charro,Salamanca 441 | Chechen,Chechnya 442 | Chelmsfordian,Chelmsford 443 | Chennaite,Chennai 444 | Chetumaleño,Chetumal 445 | Chhattisgarhi,Chhattisgarh 446 | Chian,Chios 447 | Chiapan,Chiapas 448 | Chiapaneco,Chiapas 449 | Chicagoan,Chicago 450 | Chihuahuan,Chihuahua 451 | Chihuahuense,Chihuahua 452 | Chilango,Mexico City 453 | Chilean,Chile 454 | Chilpancingueño,Chilpancingo 455 | Chimbotano,Chimbote 456 | Chinese,China 457 | Chino,China 458 | Chorrillano,Chorrillos 459 | Christmas Island,Christmas Island 460 | Christmas Islander,Christmas Island 461 | Chubutense,Chubut 462 | Cincinnatian,Cincinnati 463 | Citizen Of The Holy See,Vatican City 464 | Ciudadrealeño,Ciudad Real 465 | Ciudadrealino,Ciudad Real 466 | Clevelander,Cleveland 467 | Cluniense,Ciudad Real 468 | Coahuilan,Coahuila 469 | Coahuilense,Coahuila 470 | Cockney,London 471 | Cockroach,New South Wales 472 | Cocos Island,Cocos Islands 473 | Cocos Islander,Cocos Islands 474 | Colcestrian,Colchester 475 | Colchian,Colchis 476 | Colimeño,Colima 477 | Colimense,Colima 478 | Colombian,Colombia 479 | Coloradoan,Colorado 480 | Colossian,Colossae 481 | Comeperros,Huancayo 482 | Comoran,Comoros 483 | Comorian,Comoros 484 | Compostelan,Compostela Valley 485 | Comval,Compostela Valley 486 | Comvaleño,Compostela Valley 487 | Conch,Key West 488 | Confederate,Confederate States Of America 489 | Congolese,Republic Of The Congo 490 | Connecticotian,Connecticut 491 | Connecticutensian,Connecticut 492 | Connecticuter,Connecticut 493 | Connecticutian,Connecticut 494 | Connetian,Connecticut 495 | Conquense,Cuenca 496 | Consentian,Consentia 497 | Cook Island,Cook Islands 498 | Cook Islander,Cook Islands 499 | Copenhagener,Copenhagen 500 | Corcyrean,Corcyra 501 | Cordobense,Córdoba 502 | Cordobés,Córdoba 503 | Corfiot,Corfu 504 | Corinthian,Corinth 505 | Cork,Cork 506 | Corkonian,County Cork 507 | Corncracker,Kentucky 508 | Cornhusker,Nebraska 509 | Cornichon,Reims 510 | Cornish,Cornwall 511 | Correntino,Corrientes 512 | Corsican,Corsica 513 | Cortubí,Córdoba 514 | Coruñés,A Coruña 515 | Cosetano,Tarragona 516 | Costa Rican,Costa Rica 517 | Cotabatan,Cotabato 518 | Cotabateño,Cotabato 519 | Cotabato,Cotabato 520 | Couronian,Courland 521 | Coventrian,Coventry 522 | Cracker,Florida 523 | Cracovian,Kraków 524 | Cretan,Crete 525 | Crimean,Crimea 526 | Croat,Croatia 527 | Croatian,Croatia 528 | Crotonian,Croton 529 | Crow Eater,South Australia 530 | Croweater,South Australia 531 | Cruceño,Santa Cruz De La Sierra 532 | Crucian,St. Croix 533 | Cruzan,St. Croix 534 | CSA,Confederate States Of America 535 | Cuban,Cuba 536 | Cuencano,Cuenca 537 | Cuernavaqueño,Cuernavaca 538 | Cuernavaquense,Cuernavaca 539 | Culiacanense,Culiacán 540 | Cumbrian,Cumbria 541 | Curaçaoan,Curaçao 542 | Curitibano,Curitiba 543 | Curitibense,Curitiba 544 | Curonian,Courland 545 | Cuzqueño,Cuzco 546 | Cycladian,Cyclades 547 | Cypriot,Cyprus 548 | Cypriote,Cyprus 549 | Cyrenaic,Cyrenaica 550 | Cyrene,Cyrenaica 551 | Cyrenian,Cyrene 552 | Czech,Czech Republic 553 | Czechb,Czechoslovakia 554 | Czechoslovak,Czechoslovakia 555 | Czechoslovakian,Czechoslovakia 556 | Dacian,Dacia 557 | Dagestani,Dagestan 558 | Dahomeyan,Dahomey 559 | Dalatese,Da Lat 560 | Dallasite,Dallas 561 | Dalmatian,Dalmatia 562 | Damanese,Daman And Diu 563 | Damascene,Damascus 564 | Dane,Denmark 565 | Danish,Denmark 566 | Dansker,Denmark 567 | Danubian,Danube River 568 | Darwinian,Darwin 569 | Davaoeño,Davao 570 | Dawsonite,Dawson City 571 | Daytonian,Dayton 572 | Defeño,Federal District 573 | Delawarean,Delaware 574 | Delhian,Delhi 575 | Delhiite,Delhi 576 | Delhite,Delhi 577 | Delian,Delos 578 | Denver,Denver 579 | Denverite,Denver 580 | Derry,Derry 581 | Detroiter,Detroit 582 | Devonian,Devon 583 | Devonportian,Devonport 584 | Dinagat,Dinagat Islands 585 | Dinagatan,Dinagat Islands 586 | Diuese,Daman And Diu 587 | Djibouti,Djibouti 588 | Djiboutian,Djibouti 589 | Dodecanesian,Dodecanese 590 | Dominican,Dominica 591 | Down Easter,Maine 592 | Downeaster,Maine 593 | Drammenser,Drammen 594 | Dresdener,Dresden 595 | Dublin,Dublin 596 | Dubliner,Dublin 597 | Duluthian,Duluth 598 | Dundonian,Dundee 599 | Dunedin,Dunedin 600 | Dunedinite,Dunedin 601 | Dunelmensis,University Of Durham 602 | Duopontino,Pontevedra 603 | Duosicilian,Two Sicilies 604 | Durangoan,Durango 605 | Duranguense,Durango 606 | Durban,Durban 607 | Durbanite,Durban 608 | Dutch,Netherlands 609 | East Coast,Gisborne Region 610 | East Malaysian,East Malaysia 611 | East Mindorenian,Oriental Mindoro 612 | East Misamis,Misamis Oriental 613 | East Negros,Negros Oriental 614 | East Timorese,East Timor 615 | East Trojan,East Troy 616 | Eastern Davao,Davao Oriental 617 | Eastern Samarian,Eastern Samar 618 | Ebjergenser,Esbjerg 619 | Ecuadorian,Ecuador 620 | Edinbourgeois,Edinburgh 621 | Edinburgensian,Edinburgh 622 | Edinburgher,Edinburgh 623 | Edmontonian,Edmonton 624 | Edonian,Edonia 625 | Egabrense,Cabra 626 | Egestan,Egesta 627 | Egyptian,Egypt 628 | El Paseño,El Paso 629 | El Pasoan,El Paso 630 | Elamite,Elam 631 | Elamitic,Elam 632 | Elamitish,Elam 633 | Eleusian,Eleusina 634 | Eleusinian,Eleusina 635 | Elian,Elis 636 | Emirati,United Arab Emirates 637 | Emirian,United Arab Emirates 638 | English,England 639 | Englishman,England 640 | Englishwoman,England 641 | Entrerriano,Entre Rios 642 | Ephesian,Ephesus 643 | Epidamnian,Epidamnus 644 | Epidaurian,Epidaurus 645 | Epirote,Epirus 646 | Equatoguinean,Equatorial Guinea 647 | Equatorial Guinean,Equatorial Guinea 648 | Eretrian,Eretria 649 | Erieite,Erie 650 | Eritrean,Eritrea 651 | Espírito-Santense,Espírito Santo 652 | Essiburn,Ashbourne 653 | Estonian,Estonia 654 | Ethiopian,Ethiopia 655 | Etruscan,Etruria 656 | Euboean,Euboea 657 | Eurasian,Eurasia 658 | European,Europe 659 | Exonian,Exeter 660 | Extremaduran,Extremadura 661 | Falkland Island,Falkland Islands 662 | Falkland Islander,Falkland Islands 663 | Falstring,Falster 664 | Faridabadi,Faridabad 665 | Faroese,Faroe Islands 666 | Faroite,Faro 667 | Ferengi,Ferenginar 668 | Fife,Fife 669 | Fifer,Fife 670 | Fijian,Fiji 671 | Filipino,Philippines 672 | Finn,Finland 673 | Finnic,Finland 674 | Finnish,Finland 675 | Finnmarking,Finnmark 676 | Flatlander,Michigan 677 | Fleming,Flanders 678 | Flemish,Flanders 679 | Florentia,Florence 680 | Florentine,Florence 681 | Florianopolitano,Florianópolis 682 | Florida Cracker,Florida 683 | Floridan,Florida 684 | Floridian,Florida 685 | Fluminense,Rio De Janeiro 686 | Formoseño,Formosa 687 | Fort Worthian,Fort Worth 688 | Fortalezense,Fortaleza 689 | Fox,Maine 690 | Franco-Albertan,Alberta 691 | Franco-Manitobain,Manitoba 692 | Franconian,Franconia 693 | Franco-Ontarian,Ontario 694 | Franco-Saskatchewanian,Saskatchewan 695 | Frankfurter,Frankfurt 696 | Frederictonian,Fredericton 697 | French,France 698 | French Guianese,French Guiana 699 | French Polynesian,French Polynesia 700 | French Southern Territories,French Southern Territories 701 | Frenchman,France 702 | Frenchwoman,France 703 | Friesian,Friesland 704 | Frisian,Friesland 705 | Fueguino,Tierra Del Fuego 706 | Fukuokan,Fukuoka 707 | Futunan,Wallis And Futuna 708 | Fynbo,Funen 709 | Gabonese,Gabon 710 | Gaderita,Cádiz 711 | Gaditano,Cádiz 712 | Galatian,Galatia 713 | Galician,Galicia 714 | Galilean,Galilee 715 | Gallaecus,Gallaecia 716 | Galleguense,Río Gallegos 717 | Gallifreyan,Gallifrey 718 | Gallovidian,Galloway 719 | Galway,Galway 720 | Galwegian,Galway 721 | Gambian,The Gambia 722 | Ganadí,Granada 723 | Garnatí,Granada 724 | Gascon,Gascony 725 | Gaúcho,Rio Grande Do Sul 726 | Gaulish,Gallia 727 | Geat,Geatland 728 | Genevan,Geneva 729 | Genevese,Geneva 730 | Genoese,Genoa 731 | Genoshan,Genosha 732 | Genovese,Genoa 733 | Geordie,Newcastle 734 | Georgian,Georgia 735 | German,Germany 736 | Germanian,Germania 737 | Germanus,Germania 738 | Germistonian,Germiston 739 | Gerundense,Girona 740 | Gerundí,Girona 741 | Ghanaian,Ghana 742 | Gibraltar,Gibraltar 743 | Gibraltarian,Gibraltar 744 | Gimnesiense,Illes Balears 745 | Gironés,Girona 746 | Gisborne,Gisborne Region 747 | Glaswegian,Glasgow 748 | Goan,Goa 749 | Goanese,Goa 750 | Goenkar,Goa 751 | Gold Coast,Gold Coast 752 | Goober-Grabber,Georgia 753 | Gozitan,Gozo 754 | Granadés,Granada 755 | Granadí,Granada 756 | Granadino,Granada 757 | Grand Rapidian,Grand Rapids 758 | Grasshopper,Kansas 759 | Grazer,Graz 760 | Greek,Greece 761 | Greenlandic,Greenland 762 | Greenvillian,Greenville 763 | Grenadian,Grenada 764 | Guadalajareño,Guadalajara 765 | Guadalajarense,Guadalajara 766 | Guadalupense,Guadalupe 767 | Guadeloupe,Guadeloupe 768 | Guadeloupean,Guadeloupe 769 | Guamanian,Guam 770 | Guambat,Guam 771 | Guanajuateño,Guanajuato 772 | Guanajuatense,Guanajuato 773 | Guatemalan,Guatemala 774 | Guernsey,Guernsey 775 | Guerreran,Guerrero 776 | Guerrerense,Guerrero 777 | Guianan,Guiana 778 | Guimarasian,Guimaras 779 | Guinea-Bissauan,Guinea-Bissau 780 | Guinean,Guinea 781 | Guipuzcoano,Guipúzcoa 782 | Gujarati,Gujarat 783 | Gurkhas,Nepal 784 | Guyanese,Guyana 785 | Hagenaar,The Hague 786 | Hagenees,The Hague 787 | Haitian,Haiti 788 | Haldenser,Halden 789 | Halifaxian,Halifax 790 | Haligonian,Halifax 791 | Halling,Hallingdal 792 | Hamburger,Hamburg 793 | Hamiltonian,Hamilton 794 | Hanoian,Hanoi 795 | Hanoverian,Hanover 796 | Harcourtian,Port Harcourt 797 | Harrovian,Harrow 798 | Hartlepudlian,Hartlepool 799 | Haryanvi,Haryana 800 | Hastingite,Hastings 801 | Haugesunder,Haugesund 802 | Hawaii Resident,Hawaii 803 | Hawaiian,Hawaii 804 | Hawke'S Bay,Hawke'S Bay Region 805 | Hawkeye,Iowa 806 | Hayastani,Hayastan 807 | Hazaragi,Hazara 808 | Heard Island,Heard Island And Mcdonald Islands 809 | Hebridean,Hebrides 810 | Hedmarking,Hedmark 811 | Hellene,Greece 812 | Hellenic,Greece 813 | Helsingoraner,Elsinore 814 | Helsinkian,Helsinki 815 | Hermosillense,Hermosillo 816 | Herzegovinian,Bosnia And Herzegovina 817 | Hessian,Hesse 818 | Hidalguense,Hidalgo 819 | Hidrocálido,Aguascalientes 820 | Himachali,Himachal Pradesh 821 | Himmerlænding,Himmerland 822 | Hiroshiman,Hiroshima 823 | Hispalense,Seville 824 | Hispanic,Hispania 825 | Hobartian,Hobart 826 | Hokkaidoan,Hokkaido 827 | Hokkaidoite,Hokkaido 828 | Hollander,Netherlands 829 | Holsteinian,Holstein 830 | Honduran,Honduras 831 | Hong Kong,Hong Kong 832 | Hong Kong Chinese,Hong Kong 833 | Hong Konger,Hong Kong 834 | Hongkonger,Hong Kong 835 | Hongkongese,Hong Kong 836 | Hongkongish,Hong Kong 837 | Honolulan,Honolulu 838 | Honshuan,Honshu 839 | Hoosier,Indiana 840 | Hordalending,Hordaland 841 | Houstonian,Houston 842 | Huanca,Huancayo 843 | Huancaíno,Huancayo 844 | Huancavelicano,Huancavelica 845 | Huanuqueño,Huanuco 846 | Huaracino,Huaraz 847 | Huelveño,Huelva 848 | Hunanese,Hunan 849 | Hungarian,Hungary 850 | Hyderabadi,Hyderabad 851 | Iberian,Iberia 852 | Icelandic,Iceland 853 | Idahoan,Idaho 854 | Ifugao,Ifugao 855 | Iguaçuense,Foz Do Iguaçu 856 | I-Kiribati,Kiribati 857 | Iliberitano,Granada 858 | Illinian,Illinois 859 | Illinoian,Illinois 860 | Illinoisan,Illinois 861 | Illinoisian,Illinois 862 | Illyrian,Illyria 863 | Ilocan,Ilocos Sur 864 | Ilocano,Ilocos Sur 865 | Iloilo,Iloilo 866 | Iloiloan,Iloilo 867 | Ilonggo,Iloilo 868 | Indian,India 869 | Indianian,Indiana 870 | Indochinese,Indochina 871 | Indonesian,Indonesia 872 | Ingush,Ingushetia 873 | Innuit,Alaska 874 | Invercargill,Invercargill 875 | Invernessian,Inverness 876 | Ionian,Ionia 877 | Iowa Citian,Iowa City 878 | Iowan,Iowa 879 | Iowegian,Iowa 880 | Iqueño,Ica 881 | Irani,Iran 882 | Iranian,Iran 883 | Iraqi,Iraq 884 | Irish,Ireland 885 | Irishman,Ireland 886 | Irishwoman,Ireland 887 | Irmite,Irmo 888 | Isabelan,Isabela 889 | Islamabadi,Islamabad 890 | Islander,Hawaii 891 | Israeli,Israel 892 | Israelite,Israel 893 | Istanbulite,Istanbul 894 | Itabirano,Itabira 895 | Italian,Italy 896 | Ivatan,Batanes 897 | Ivatanic,Batanes 898 | Ivorian,Ivory Coast 899 | Ivory Coast, Ivory Coast 900 | Iwo Jiman,Iwo Jima 901 | Izmirian,Izmir 902 | Jack,Swansea 903 | Jacksonvillian,Jacksonville 904 | Jaenés,Jaén 905 | Jakartan,Jakarta 906 | Jaliscan,Jalisco 907 | Jalisciense,Jalisco 908 | Jamaican,Jamaica 909 | Jammu,Jammu And Kashmir 910 | Jan Mayen,Jan Mayen 911 | Janner,Cornwall 912 | Japanese,Japan 913 | Jarocho,Veracruz 914 | Jaujano,Jauja 915 | Javan,Java 916 | Javanese,Java 917 | Jaxon,Jacksonville 918 | Jerseyan,Jersey 919 | Jerseyite,New Jersey 920 | Jerusalemite,Jerusalem 921 | Jharkhandi,Jharkhand 922 | Jienense,Jaén 923 | Jock,Scotland 924 | Johannesburg,Johannesburg 925 | Johorean,Johor 926 | Jordanian,Jordan 927 | Judaean,Judea 928 | Judean,Judea 929 | Juiz-Forano,Juiz De Fora 930 | Jujeño,Jujuy 931 | Jute,Jutland 932 | Jyde,Jutland 933 | Kagoshiman,Kagoshima 934 | Kalamazooan,Kalamazoo 935 | Kalingan,Kalinga 936 | Kalundborgenser,Kalundborg 937 | Kaluzhanin,Kaluga 938 | Kalymnian,Kalymnos 939 | Kamaaina,Hawaii 940 | Kamaʻāina,Hawaii 941 | Kannadiga,Karnataka 942 | Kansan,Kansas 943 | Kansas Citian,Kansas City 944 | Karachiite,Karachi 945 | Karelian,Karelia 946 | Karnatakan,Karnataka 947 | Kashmiran,Kashmir 948 | Kashmiri,Jammu And Kashmir 949 | Kastorian,Kastoria 950 | Katangese,Katanga 951 | Kaulonian,Kaulonia 952 | Kazakhstani,Kazakhstan 953 | Kedahan,Kedah 954 | Kelantanese,Kelantan 955 | Kenoshan,Kenosha 956 | Kentuckian,Kentucky 957 | Kenyan,Kenya 958 | Keralite,Kerala 959 | Kernewek,Cornwall 960 | Khmer,Cambodia 961 | Kievan,Kiev 962 | Kingstonian,Kingston 963 | Kirghiz,Kyrgyz Republic 964 | Kitchenerite,Kitchener 965 | Kittian,Saint Kitts And Nevis 966 | Kittian And Nevisian,Saint Kitts And Nevis 967 | Kittitian,Saint Kitts And Nevis 968 | Kiwi,New Zealand 969 | Klingon,Qo'Nos 970 | Knickerbocker,New York 971 | Knossian,Knossos 972 | Knoxvillian,Knoxville 973 | Københavner,Copenhagen 974 | Koldingenser,Kolding 975 | Kosovan,Kosovo And Methohija 976 | Krakovian,Kraków 977 | Kuala Lumpurian,Kuala Lumpur 978 | Kuchingite,Kuching 979 | Kurdish,Kurdistan 980 | Kurilian,Kuril Islands 981 | Kuwaiti,Kuwait 982 | Kyotoite,Kyoto 983 | Kyrgyz,Kyrgyzstan 984 | Kyushuan,Kyushu 985 | La Ueño,La Union 986 | La Unionian,La Union 987 | Labradorian,Newfoundland And Labrador 988 | Labuanese,Labuan 989 | Laccadivian,Lakshadweep 990 | Lagosian,Lagos 991 | Laguneño,Laguna 992 | Lagunense,Laguna 993 | Lagunian,Laguna 994 | Lahori,Lahore 995 | Lakedaemonian,Lakedaimon 996 | Lakedaimonian,Lakedaimon 997 | Lambayequino,Lambayeque 998 | Lancastrian,Lancaster 999 | Lao,Laos 1000 | Laotian,Laos 1001 | Lapp,Lapland 1002 | Laputan,Laputa 1003 | Larissan,Larissa 1004 | Las Vegan,Las Vegas 1005 | Latin,Latium 1006 | Latverian,Latveria 1007 | Latvian,Latvia 1008 | Laurentian,St. Lawrence River 1009 | Lavalois,Laval 1010 | Lavaloise,Laval 1011 | Lebanese,Lebanon 1012 | Leghornese,Livorno 1013 | Legionense,León 1014 | Leipziger,Leipzig 1015 | Leodensian,Leeds 1016 | Leonés,León 1017 | Leonese,León 1018 | Leontinian,Leontini 1019 | Leopolitan,Lviv 1020 | Lerense,Pontevedra 1021 | Lesbian,Lesbos 1022 | Lethbridgian,Lethbridge 1023 | Levantine,The Levant 1024 | Leytenian,Leyte 1025 | Liberian,Liberia 1026 | Liberteño,La Libertad 1027 | Libyan,Libya 1028 | Liechtenstein,Liechtenstein 1029 | Liechtensteiner,Liechtenstein 1030 | Ligurian,Liguria 1031 | Lilliputian,Lilliput 1032 | Limenian,Lima 1033 | Limeño,Lima 1034 | Linarense,Linares 1035 | Linzer,Linz 1036 | Lisboeta,Lisbon 1037 | Lithuanian,Lithuania 1038 | Liverpudlian,Liverpool 1039 | Livonian,Livonia 1040 | Livornese,Livorno 1041 | Locrian,Locris 1042 | Loiner,Leeds 1043 | Lollik,Lolland 1044 | Lombard,Lombardy 1045 | Lombardic,Lombardy 1046 | London,London 1047 | Londoner,London 1048 | Londrinense,Londrina 1049 | Long Island,Long Island 1050 | Long Islander,Long Island 1051 | Loretano,Loreto 1052 | Lorrainer,Lorraine 1053 | Lorrainian,Lorraine 1054 | Los Angelean,Los Angeles 1055 | Los Angeleno,Los Angeles 1056 | Los Angelino,Los Angeles 1057 | Louisianian,Louisiana 1058 | Louisvillian,Louisville 1059 | Louperivois,Rivière-Du-Loup 1060 | Lousianan,Lousiana 1061 | Lousianian,Lousiana 1062 | Lower Californian,Baja California 1063 | Lucanian,Lucania 1064 | Lucchese,Lucca 1065 | Lucense,Lugo 1066 | Ludovicense,São Luís 1067 | Luqués,Lugo 1068 | Lusatian,Lusatia 1069 | Luxembourg,Luxembourg 1070 | Luxembourger,Luxembourg 1071 | Luxembourgish,Luxembourg 1072 | Lydian,Lydia 1073 | Lyonese,Lyon 1074 | Lyonnais,Lyon 1075 | Macanese,Macau 1076 | Macapense,Macapá 1077 | Maccam,Sunderland 1078 | Macedonian,Macedonia 1079 | Maceioense,Maceió 1080 | Madeiran,Madeira 1081 | Madhya Pradeshi,Madhya Pradesh 1082 | Madredediosense,Madre De Dios 1083 | Madrideño,Madrid 1084 | Madrileño,Madrid 1085 | Maeonian,Maeonia 1086 | Mageritense,Madrid 1087 | Maguindanaoan,Maguindanao 1088 | Magyar,Hungary 1089 | Maharashtrian,Maharashtra 1090 | Mahorais,Mayotte 1091 | Mahoran,Mayotte 1092 | Maineiac,Maine 1093 | Mainer,Maine 1094 | Mainiac,Maine 1095 | Mainland,South Island 1096 | Majorcan,Majorca 1097 | Malaccan,Malacca 1098 | Malacitano,Málaga 1099 | Malagasy,Madagascar 1100 | Malagenean,Málaga 1101 | Malagueño,Málaga 1102 | Malawian,Malawi 1103 | Malay,Malaya 1104 | Malayan,Malaya 1105 | Malaysian,Malaysia 1106 | Maldivian,Maldives 1107 | Maldonadino,Puerto Maldonado 1108 | Malian,Mali 1109 | Malihini,Hawaii 1110 | Malinese,Mali 1111 | Maltese,Malta 1112 | Man Of Kent,Kent 1113 | Manauara,Manaus 1114 | Manauense,Manaus 1115 | Manchego,Ciudad Real 1116 | Manchurian,Manchuria 1117 | Mancunian,Manchester 1118 | Manhattan,Manhattan 1119 | Manhattanite,Manhattan 1120 | Manilan,Metro Manila 1121 | Manileño,Manila 1122 | Manipuri,Manipur 1123 | Manitobain,Manitoba 1124 | Manitoban,Manitoba 1125 | Mantinean,Mantineia 1126 | Mantuan,Mantua 1127 | Manxwoman,Isle Of Man 1128 | Maqueta,Castilla-La Mancha 1129 | Maranao,Lanao Del Sur 1130 | Maranhense,Maranhão 1131 | Marathonian,Marathon 1132 | Marinduquenian,Marinduque 1133 | Marinduqueño,Marinduque 1134 | Maritime,Maritimes 1135 | Maritimer,Maritime Region 1136 | Marseillais,Marseille 1137 | Marshallese,Marshall Islands 1138 | Martinican,Martinique 1139 | Martiniquais,Martinique 1140 | Marylander,Maryland 1141 | Masbatenian,Masbate 1142 | Masbateño,Masbate 1143 | Mashugana,Michigan 1144 | Massachusettsan,Massachusetts 1145 | Massachusite,Massachusetts 1146 | Masshole,Massachusetts 1147 | Masurian,Masuria 1148 | Matritense,Madrid 1149 | Mauritanian,Mauritania 1150 | Mauritian,Mauritius 1151 | Mayrití,Madrid 1152 | Mazatleco,Mazatlán 1153 | Mcdonald Islands,Heard Island And Mcdonald Islands 1154 | Mckinnian,Mckinney 1155 | Median,Media 1156 | Mediterranean,Mediterranean 1157 | Megaran,Megara 1158 | Megarian,Megara 1159 | Meghalayan,Meghalaya 1160 | Melanesian,Melanesia 1161 | Melbournian,Melbourne 1162 | Melbournite,Melbourne 1163 | Melburnian,Melbourne 1164 | Melian,Melos 1165 | Melillense,Melilla 1166 | Melitian,Melite 1167 | Memphian,Memphis 1168 | Mendocino,Mendoza 1169 | Mentonasque,Menton 1170 | Merian,Meru 1171 | Meridano,Mérida 1172 | Mesopotamian,Mesopotamia 1173 | Messenian,Messenia 1174 | Messinese,Messina 1175 | Methymnian,Mithymna 1176 | Mexicalense,Mexicali 1177 | Mexican,Mexico 1178 | Mexican,Mexico 1179 | Mexicano,Mexico 1180 | Mexiquense,México 1181 | Miamian,Miami 1182 | Michigander,Michigan 1183 | Michiganer,Michigan 1184 | Michiganese,Michigan 1185 | Michiganian,Michigan 1186 | Michiganite,Michigan 1187 | Michigine,Michigan 1188 | Michoacano,Michoacán 1189 | Micronesian,Micronesia 1190 | Middle Eastern,Middle East 1191 | Middle Easterner,Middle East 1192 | Milanese,Milan 1193 | Milesian,Miletus 1194 | Milwaukeean,Milwaukee 1195 | Mindoreño,Oriental Mindoro 1196 | Mineiro,Minas Gerais 1197 | Minneapolitan,Minneapolis 1198 | Minnesotan,Minnesota 1199 | Minorcan,Minorca 1200 | Minotian,Minot 1201 | Miquelonnais,Saint Pierre And Miquelon 1202 | Miraflorino,Miraflores 1203 | Misionero,Misiones 1204 | Misratan,Misrata 1205 | Mississippian,Mississippi 1206 | Missourian,Missouri 1207 | Mizo,Mizoram 1208 | Modenese,Modena 1209 | Moldavian,Moldavia 1210 | Moldenser,Molde 1211 | Moldovan,Moldova 1212 | Moluccan,Moluccas 1213 | Monacan,Monaco 1214 | Monégasque,Monaco 1215 | Mongol,Mongolia 1216 | Mongolian,Mongolia 1217 | Montanan,Montana 1218 | Montenegrin,Montenegro 1219 | Montreal,Montreal 1220 | Montréalais,Montreal 1221 | Montrealer,Montreal 1222 | Montréaler,Montreal 1223 | Montserratian,Montserrat 1224 | Moose Javian,Moose Jaw 1225 | Moosejavian,Moose Jaw 1226 | Moqueguano,Moquegua 1227 | Moravian,Moravia 1228 | Morelense,Morelos 1229 | Moreliano,Morelia 1230 | Møring,Møre Og Romsdal 1231 | Moroccan,Morocco 1232 | Moroleonés,Moroleón 1233 | Morsing,Mors 1234 | Mosotho,Lesotho 1235 | Motswana,Botswana 1236 | Mountaineers,Mountain Province 1237 | Mozambican,Mozambique 1238 | Mumbaikar,Mumbai 1239 | Mumbaikars,Mumbai 1240 | Münchner,Munich 1241 | Munsonian,Muncie 1242 | Murcí,Murcia 1243 | Murcian,Murcia 1244 | Murciano,Murcia 1245 | Muscovia,Moscow 1246 | Muscovite,Moscow 1247 | Muskrat,Delaware 1248 | Myanmar,Burma 1249 | Myanmarese,Myanmar 1250 | Mycenaean,Mycenae 1251 | Mytilenean,Mytilene 1252 | Naga,Nagaland 1253 | Nagalandese,Nagaland 1254 | Nagasakian,Nagasaki 1255 | Nagoyan,Nagoya 1256 | Namibian,Namibia 1257 | Napieran,Napier 1258 | Napoletano,Naples 1259 | Narnian,Narnia 1260 | Nashvillian,Nashville 1261 | Nassuvian,Nassau 1262 | Natalense,Natal 1263 | Naupactian,Naupactus 1264 | Nauruan,Nauru 1265 | Navarrese,Navarre 1266 | Navarro,Navarra 1267 | Naxian,Naxos 1268 | Nayarita,Nayarit 1269 | Nazarene,Nazareth 1270 | Neapolitan,Naples 1271 | Nebraskan,Nebraska 1272 | Negeri Sembilanese,Negeri Sembilan 1273 | Negrense,Negros Oriental 1274 | Nelson,Nelson 1275 | Nelsonian,Nelson Region 1276 | Neolonés,Nuevo León 1277 | Nepali,Nepal 1278 | Netherlander,The Netherlands 1279 | Netherlandic,Netherlands 1280 | Neuquino,Neuquen 1281 | Nevadian,Nevada 1282 | Nevisian,Saint Kitts And Nevis 1283 | New Brunswick,New Brunswick 1284 | New Brunswickan,New Brunswick 1285 | New Brunswickian,New Brunswick 1286 | New Caledonian,New Caledonia 1287 | New England,New England 1288 | New Englander,New England 1289 | New Guinian,New Guinea 1290 | New Hampshireman,New Hampshire 1291 | New Hampshirewoman,New Hampshire 1292 | New Hampshirite,New Hampshire 1293 | New Jerseyite,New Jersey 1294 | New Mexican,New Mexico 1295 | New Orleanian,New Orleans 1296 | New South Walesian,New South Wales 1297 | New South Welsh,New South Wales 1298 | New York,New York City 1299 | New Yorker,New York 1300 | New Zealand,New Zealand 1301 | New Zealander,New Zealand 1302 | Newf,Newfoundland And Labrador 1303 | Newfie,Newfoundland And Labrador 1304 | Newfoundland And Labrador,Newfoundland And Labrador 1305 | Newfoundlander,Newfoundland And Labrador 1306 | Newfy,Newfoundland And Labrador 1307 | Newportonian,Newport 1308 | Nicaraguan,Nicaragua 1309 | Nicobar,Andaman And Nicobar Islands 1310 | Nicobarese,Andaman And Nicobar Islands 1311 | Niçois,Nice 1312 | Nicolaíta,San Nicolás De Los Garza 1313 | Nicosian,Nicosia 1314 | Nicoya,Nicaragua 1315 | Nigerian,Nigeria 1316 | Nigerien,Niger 1317 | Nihonjin,Nihon 1318 | Nilotic,Nile River 1319 | Nipponjin,Nihon 1320 | Nisyrian,Nisyros 1321 | Niteroiense,Niterói 1322 | Niuean,Niue 1323 | Ni-Vanuatu,Vanuatu 1324 | None,Vatican City 1325 | Norbiense,Cáceres 1326 | Nordlending,Nordland 1327 | Nordmann,Norway 1328 | Nordtrønder,Nord-Trøndelag 1329 | Norfolk Island,Norfolk Island 1330 | Norfolk Islander,Norfolk Island 1331 | Norman,Normandy 1332 | Norte-Rio-Grandense,Rio Grande Do Norte 1333 | North Agusanian,Agusan Del Norte 1334 | North American,North America 1335 | North Camarinean,Camarines Norte 1336 | North Carolinian,North Carolina 1337 | North Dakotan,North Dakota 1338 | North Davao,Davao Del Norte 1339 | North Ilocos,Ilocos Norte 1340 | North Island,North Island 1341 | North Korean,North Korea 1342 | North Lanao,Lanao Del Norte 1343 | North Samarian,Northern Samar 1344 | North Surigaoan,Surigao Del Norte 1345 | North Zamboangan,Zamboanga Del Norte 1346 | Northamptonian,Northampton 1347 | Northern Irish,Northern Ireland 1348 | Northern Irishman,Northern Ireland 1349 | Northern Irishwoman,Northern Ireland 1350 | Northern Marianan,Northern Mariana Islands 1351 | Northern Territory,Northern Territory 1352 | Northland,Northland Region 1353 | Northumbrian,Northumberland 1354 | Northwest Territorian,Northwest Territories 1355 | Northwesterner,Northwest Territories 1356 | Norwegian,Norway 1357 | Nottinghamian,Nottingham 1358 | Nova Scotian,Nova Scotia 1359 | Novo Ecijano,Nueva Ecija 1360 | Novo Vizcayano,Nueva Vizcaya 1361 | Novocastrian,Newcastle 1362 | Nubian,Nubia 1363 | Nueva Ecijan,Nueva Ecija 1364 | Nueva Vizcayan,Nueva Vizcaya 1365 | Nuevoleonés,Nuevo León 1366 | Numedøl,Numedal 1367 | Nunavitian,Nunavut Territory 1368 | Nunavummiuq,Nunavut 1369 | Nunavut,Nunavut 1370 | Nutmegger,Connecticut 1371 | Nz,New Zealand 1372 | Oakbankian,Oakbank 1373 | Oaklander,Oakland 1374 | Oamaru,Oamaru 1375 | Oamaruvian,Oamaru 1376 | Oaxacan,Oaxaca 1377 | Oaxaqueño,Oaxaca 1378 | Obanite,Oban 1379 | Oceanian,Oceania 1380 | Odenseaner,Odense 1381 | Odessit,Odessa 1382 | Odessite,Odessa 1383 | Odia,Odisha 1384 | Odishan,Odisha 1385 | Odissi,Odisha 1386 | Oean,Oea 1387 | Ohioan,Ohio 1388 | Oiniadan,Oiniades 1389 | Okie,Oklahoma 1390 | Okinawan,Okinawa 1391 | Oklahoman,Oklahoma 1392 | Olvisino,Huelva 1393 | Olympian,Olympia 1394 | Omakian,Omak 1395 | Omani,Oman 1396 | Ontarian,Ontario 1397 | Ontario,Ontario 1398 | Onubense,Huelva 1399 | Opplending,Oppland 1400 | Orcadian,Orkney Islands 1401 | Orcelitano,Orihuela 1402 | Oregonian,Oregon 1403 | Oriolano,Orihuela 1404 | Oriya,Odisha 1405 | Osakan,Osaka 1406 | Oscense,Huesca 1407 | Oslofolk,Oslo 1408 | Oslovian,Oslo 1409 | Østfolding,Østfold 1410 | Otago,Otago Region 1411 | Ottawan,Ottawa 1412 | Oxonian,Oxford 1413 | Ozian,Oz 1414 | Ozzie,Australia 1415 | Paceño,La Paz 1416 | Pacense,Badajoz 1417 | Pachuqueño,Pachuca 1418 | Padane,Po River 1419 | Paddock Laker,Paddock Lake 1420 | Paduan,Padua 1421 | Paeonian,Paeonia 1422 | Pahangite,Pahang 1423 | Paisa,Antioquia 1424 | Pakistani,Pakistan 1425 | Palatine,Palatinate 1426 | Palauan,Palau 1427 | Palawan,Palawan 1428 | Palaweño,Palawan 1429 | Palentino,Palencia 1430 | Palermitan,Palermo 1431 | Palestinian,Palestine 1432 | Palmense,Palmas 1433 | Palmerstonian,Palmerston North 1434 | Paludian,Slough 1435 | Pampangan,Pampanga 1436 | Pampangueño,Pampanga 1437 | Pampeaño,La Pampa 1438 | Pamphylian,Pamphylia 1439 | Panamanian,Panama 1440 | Pangasinan,Pangasinan 1441 | Pangasinense,Pangasinan 1442 | Papal,Papal States 1443 | Papua New Guinean,Papua New Guinea 1444 | Papuan,Papua New Guinea 1445 | Paraense,Pará 1446 | Paraguayan,Paraguay 1447 | Paraibano,Paraíba 1448 | Paranaense,Paraná 1449 | Paranense,Paraná 1450 | Parian,Paros 1451 | Parisian,Paris 1452 | Parisienne,Paris 1453 | Parmesan,Parma 1454 | Parmigiano,Parma 1455 | Pasqueño,Pasco 1456 | Patagonian,Patagonia 1457 | Patmian,Patmos 1458 | Patriciense,Córdoba 1459 | Paulista,São Paulo 1460 | Paulistano,São Paulo 1461 | Pegswardian,Pegswood 1462 | Pekinese,Beijing 1463 | Peloponnesian,Peloponnese 1464 | Penangite,Penang 1465 | Peninsular,Peninsular Malaysia 1466 | Pennamite,Pennsylvania 1467 | Pennine,Pennines 1468 | Pennsylvanian,Pennsylvania 1469 | Perakian,Perak 1470 | Pergamian,Pergamum 1471 | Perlisian,Perlis 1472 | Pernambucano,Pernambuco 1473 | Persian,Persia 1474 | Perth,Perth 1475 | Perthian,Perth 1476 | Perthite,Perth 1477 | Perthling,Perth 1478 | Perugian,Perugia 1479 | Peruvian,Peru 1480 | Pessoense,João Pessoa 1481 | Peterborian,Peterborough 1482 | Peterbourian,Peterborough 1483 | Phanariote,Phanar 1484 | Philadelphian,Philladelphia 1485 | Philippian,Philippi 1486 | Philippine,Philippines 1487 | Philistine,Philistia 1488 | Phliasian,Phlius 1489 | Phocian,Phocis 1490 | Phoenician,Phoenix 1491 | Phoenixer,Phoenix 1492 | Phrygian,Phrygia 1493 | Piauiense,Piauí 1494 | Picard,Picardy 1495 | Piedmontese,Piedmont 1496 | Pinay,Philippines 1497 | Pinciano,Valladolid 1498 | Pine Tree,Maine 1499 | Pinoy,Philippines 1500 | Pisan,Pisa 1501 | Pisidian,Pisidia 1502 | Pisqueño,Pisco 1503 | Pitcairn Island,Pitcairn Islands 1504 | Pitcairn Islander,Pitcairn Islands 1505 | Pittsburgher,Pittsburgh 1506 | Pittsburghese,Pittsburgh 1507 | Piurano,Piura 1508 | Platense,La Plata 1509 | Plymothian,Plymouth 1510 | Plymouthian,Plymouth 1511 | Poblano,Puebla 1512 | Polaco,Catalonia 1513 | Pole,Poland 1514 | Polish,Poland 1515 | Polmontarian,Polmont 1516 | Polynesian,Polynesia 1517 | Pomeranian,Pomerania 1518 | Pompeian,Pompeii 1519 | Pompeiian,Pompeii 1520 | Pondicherrian,Puducherry 1521 | Ponferradian,Ponferrada 1522 | Pontevedrés,Pontevedra 1523 | Pontian,Pontus 1524 | Porteño,Buenos Aires 1525 | Portlander,Portland 1526 | Porto-Alegrense,Porto Alegre 1527 | Porto-Velhense,Porto Velho 1528 | Portuense,Porto 1529 | Portuguese,Portugal 1530 | Posadeño,Posadas 1531 | Potiguar,Rio Grande Do Norte 1532 | Potosino,San Luis Potosí 1533 | Praguer,Prague 1534 | Prince Edward Island,Prince Edward Island 1535 | Prince Edward Islander,Prince Edward Island 1536 | Provençal,Provence 1537 | Providentian,Providence 1538 | Prussian,Prussia 1539 | Pucelano,Valladolid 1540 | Pueblan,Puebla 1541 | Puerto Rican,Puerto Rico 1542 | Punekar,Pune 1543 | Punekars,Pune 1544 | Puneño,Puno 1545 | Punjabi,Punjab 1546 | Puntlander,Puntland 1547 | Putrajayan,Putrajaya 1548 | Pylian,Pylos 1549 | Pylosian,Pylos 1550 | Qatari,Qatar 1551 | Quebec,Quebec 1552 | Quebecan,Quebec 1553 | Quebecer,Quebec 1554 | Quebecers,Quebec 1555 | Quebecian,Quebec 1556 | Quebecker,Quebec 1557 | Quebeckers,Quebec 1558 | Quebecois,Quebec 1559 | Québécois,Quebec 1560 | Queens,Queens 1561 | Queensite,Queens 1562 | Queensland,Queensland 1563 | Queenslander,Queensland 1564 | Queretan,Querétaro 1565 | Queretano,Querétaro 1566 | Quezonian,Quezon 1567 | Quintanaroan,Quintana Roo 1568 | Quintanarroense,Quintana Roo 1569 | Quirinian,Quirino 1570 | Rajasthani,Rajasthan 1571 | Randallite,Randall 1572 | Randrusianer,Randers 1573 | Rawsense,Rawson 1574 | Readingite,Reading 1575 | Recifense,Recife 1576 | Regina,Regina 1577 | Reginaian,Regina 1578 | Reginan,Regina 1579 | Regiomontano,Monterrey 1580 | Reinero,Nuevo León 1581 | Reman,Remus 1582 | Rémois,Reims 1583 | Renoite,Reno 1584 | Resistenciano,Resistencia 1585 | Rethymnian,Rethymno 1586 | Reunionese,Reunion 1587 | Réunionese,Réunion 1588 | Réunionnais,Réunion 1589 | Rhegian,Rhegion 1590 | Rhegine,Rhegion 1591 | Rhenish,Rhineland 1592 | Rhinelander,Rhineland 1593 | Rhithymnian,Rhithymna 1594 | Rhode Islander,Rhode Island 1595 | Rhodean,Rhode Island 1596 | Rhodesian,Rhodesia 1597 | Rhodian,Rhodes 1598 | Richmonder,Richmond 1599 | Rigan,Riga 1600 | Rio-Branquense,Rio Branco 1601 | Rio-Grandense-Do-Norte,Rio Grande Do Norte 1602 | Rio-Grandense-Do-Sul,Rio Grande Do Sul 1603 | Riojano,La Rioja 1604 | Rionegrino,Río Negro 1605 | Riverian,Rivers State 1606 | Rizaleño,Rizal 1607 | Rizalian,Rizal 1608 | Rochesterian,Rochester 1609 | Rogalending,Rogaland 1610 | Rohirric,Rohan 1611 | Roman,Rome 1612 | Romanian,Romania 1613 | Romblonian,Romblon 1614 | Romsdøl,Møre Og Romsdal 1615 | Romulan,Romulus 1616 | Rondonense,Rondônia 1617 | Roraimense,Roraima 1618 | Roskildenser,Roskilde 1619 | Rotoruan,Rotorua 1620 | Rotterdammer,Rotterdam 1621 | Ruskinite,Ruskin 1622 | Russian,Russia 1623 | Russianc,Soviet Union 1624 | Rwandan,Rwanda 1625 | Rwandese,Rwanda 1626 | Ryukyuan,Ryukyu Islands 1627 | Saami,Lapland 1628 | Saba,Saba 1629 | Sabahan,Sabah 1630 | Sabine,Sabinium 1631 | Sacramentan,Sacramento 1632 | Særping,Sarpsborg 1633 | Saharan,Sahara 1634 | Sahraoui,Western Sahara 1635 | Sahrawi,Western Sahara 1636 | Sahrawian,Western Sahara 1637 | Saigoner,Saigon 1638 | Saigonese,Ho Chi Minh City 1639 | Saint Helenian,Saint Helena 1640 | Saint Lucian,Saint Lucia 1641 | Saint Petersburgian,Saint Petersburg 1642 | Saint Vincentian,Saint Vincent And Grenadines 1643 | Saint-Martinoise,Saint Martin 1644 | Saint-Pierrais,Saint Pierre And Miquelon 1645 | Salamanqués,Salamanca 1646 | Salamanquino,Salamanca 1647 | Salaminian,Salamis 1648 | Saldubense,Zaragoza 1649 | Salemander,Salem 1650 | Salernitan,Salerno 1651 | Salmanticense,Salamanca 1652 | Salmantino,Salamanca 1653 | Salopian,Shropshire 1654 | Salt Lake,Salt Lake City 1655 | Salteño,Salta 1656 | Saltillense,Saltillo 1657 | Salvadoran,El Salvador 1658 | Salvadoreño,El Salvador 1659 | Salvadorense,Salvador 1660 | Samareño,Samar 1661 | Samaritan,Samaria 1662 | Samian,Samos 1663 | Sammarinese,San Marino 1664 | Samnite,Samnium 1665 | Samoan,Samoa 1666 | Samogitian,Samogitia 1667 | Sampetrino,San Pedro Garza García 1668 | Samsing,Samsø 1669 | San Antonian,San Antonio 1670 | San Diegan,San Diego 1671 | San Franciscan,San Francisco 1672 | San Josean,San Jose 1673 | San Marinese,San Marino 1674 | Sand Cutter,Arizona 1675 | Sandgroper,Western Australia 1676 | Sand-Hiller,Georgia 1677 | Sandlapper,South Carolina 1678 | Sanjuanino,San Juan 1679 | Sanluiseño,San Luis 1680 | Sanmartinense,San Martín 1681 | Santa Claritan,Santa Clarita 1682 | Santa Cruzan,Santa Cruz 1683 | Santacruceño,Santa Cruz 1684 | Santafecino,Santa Fe 1685 | Santarroseño,Santa Rosa 1686 | Santiagueño,Santiago Del Estero 1687 | Sao Tomean,Sao Tome And Principe 1688 | São Toméan,São Tomé And Príncipe 1689 | Sãocarlense,São Carlos 1690 | São-Luisense,São Luís 1691 | Sarajevan,Sarajevo 1692 | Sarangan,Sarangani 1693 | Sarawakian,Sarawak 1694 | Sardi,Sardinia 1695 | Sardian,Sardis 1696 | Sardianus,Sardis 1697 | Sardinian,Sardinia 1698 | Sarkese,Sark 1699 | Sarmata,Sarmatia 1700 | Sarmatian,Sarmatia 1701 | Sarnian,Guernsey 1702 | Sarping,Sarpsborg 1703 | Saskatchewan,Saskatchewan 1704 | Saskatchewanan,Saskatchewan 1705 | Saskatchewanian,Saskatchewan 1706 | Saskatonian,Saskatoon 1707 | Saskatoon,Saskatoon 1708 | Saudi,Saudi Arabia 1709 | Saudi Arabian,Saudi Arabia 1710 | Sauromata,Sarmatia 1711 | Savannaian,Savannah 1712 | Savoyard,Savoy 1713 | Saxon,Saxony 1714 | Scandinavian,Scandinavia 1715 | Scillonian,Isles Of Scilly 1716 | Sconnie,Wisconsin 1717 | Scot,Scotland 1718 | Scots,Scotland 1719 | Scottish,Scotland 1720 | Scouse,Liverpool 1721 | Scouser,Liverpool 1722 | Scytha,Scythia 1723 | Scythian,Scythia 1724 | Seattleite,Seattle 1725 | Segobricense,Segorbe 1726 | Segobrigense,Segorbe 1727 | Segorbino,Segorbe 1728 | Segovian,Segovia 1729 | Segoviano,Segovia 1730 | Selangorean,Selangor 1731 | Semitic,Semite 1732 | Senegalese,Senegal 1733 | Seoulite,Seoul 1734 | Septense,Ceuta 1735 | Serb,Serbia 1736 | Serbian,Serbia 1737 | Sergipano,Sergipe 1738 | Serrean,Serrae 1739 | Sevillano,Sevilla 1740 | Sevillian,Seville 1741 | Seychellois,Seychelles 1742 | Shanghainese,Shanghai 1743 | Shikokuan,Shikoku 1744 | Shkodran,Shkoder 1745 | Siamese,Siam 1746 | Siberian,Siberia 1747 | Sibugay,Zamboanga Sibugay 1748 | Sicilian,Sicily 1749 | Sicyonese,Sicyon 1750 | Siddis,Stavanger 1751 | Sidonian,Sidon 1752 | Sienese,Siena 1753 | Siennese,Siena 1754 | Sierra Leonean,Sierra Leone 1755 | Sikkimese,Sikkim 1756 | Silesian,Silesia 1757 | Silhillian,Solihull 1758 | Silver Laker,Silver Lake 1759 | Sinaloan,Sinaloa 1760 | Sinaloense,Sinaloa 1761 | Sindhi,Sindh 1762 | Singapore,Singapore 1763 | Singaporean,Singapore 1764 | Sint Eustatius,Sint Eustatius 1765 | Sint Maarten,Sint Maarten 1766 | Sioux Cityan,Sioux City 1767 | Siquijorian,Siquijor 1768 | Sjællænder,Zealand 1769 | Skagbo,Skagen 1770 | Skaroene,Skaro 1771 | Skarosian,Skaro 1772 | Skopelitan,Skopelos 1773 | Skopjan,Skopje 1774 | Sligonian,Sligo 1775 | Slovak,Slovakia 1776 | Slovakian,Slovakia 1777 | Slovene,Slovenia 1778 | Slovenian,Slovenia 1779 | Sluff,Slough 1780 | Sofian,Sofia 1781 | Sogning,Sogn Og Fjordane 1782 | Solomon Island,Solomon Islands 1783 | Solomon Islander,Solomon Islands 1784 | Somali,Somalia 1785 | Somalian,Somalia 1786 | Somalilander,Somaliland 1787 | Somersite,Somers 1788 | Sonoran,Sonora 1789 | Sonorense,Sonora 1790 | Sontaran,Sontar 1791 | Sooner,Oklahoma 1792 | Soriano,Soria 1793 | Sorrentine,Sorrento 1794 | Sorsogonian,Sorsogon 1795 | Sorsogueño,Sorsogon 1796 | Sørtrønder,Sør-Trøndelag 1797 | Soteropolitano,Salvador 1798 | South African,South Africa 1799 | South Agusanian,Agusan Del Sur 1800 | South American,South America 1801 | South Australian,South Australia 1802 | South Camarinean,Camarines Sur 1803 | South Carolinian,South Carolina 1804 | South Cotabatan,South Cotabato 1805 | South Cotabateño,South Cotabato 1806 | South Dakotan,South Dakota 1807 | South Davao,Davao Del Sur 1808 | South Georgia,South Georgia And The South Sandwich Islands 1809 | South Ilocos,Ilocos Sur 1810 | South Island,South Island 1811 | South Korean,South Korea 1812 | South Lanao,Lanao Del Sur 1813 | South Lower Californian,Baja California Sur 1814 | South Ossetian,South Ossetia 1815 | South Sandwich Islands,South Georgia And The South Sandwich Islands 1816 | South Sudanese,South Sudan 1817 | South Surigaoan,Surigao Del Sur 1818 | South Zamboangan,Zamboanga Del Sur 1819 | Southern Australian,South Australia 1820 | Southern Leytenian,Southern Leyte 1821 | Southland,Southland Region 1822 | Soviet,Soviet Union 1823 | Spaniard,Spain 1824 | Spanish,Spain 1825 | Spartan,Sparta 1826 | Spokanite,Spokane 1827 | Spoletian,Spoleto 1828 | Springfieldianite,Springfield 1829 | Sri Lankan,Sri Lanka 1830 | St. Crucian,St. Croix 1831 | St. Louisan,St. Louis 1832 | St.Tomian,St. Thomas 1833 | Staten Island,Staten Island 1834 | Statian,Sint Eustatius 1835 | Stephanois,St Etienne 1836 | Stewart Island,Stewart Island 1837 | Stockholmer,Stockholm 1838 | Stopfordian,Stockport 1839 | Strasbourgeois,Strasbourg 1840 | Stril,Bergen 1841 | Stuttgarter,Stuttgart 1842 | Styrian,Styria 1843 | Sudanese,Sudan 1844 | Sudetes,Sudetenland 1845 | Suebius,Suebia 1846 | Sul-Rio-Grandense,Rio Grande Do Sul 1847 | Sultan Kudaratenian,Sultan Kudarat 1848 | Sultan Kudarateño,Sultan Kudarat 1849 | Sulu,Sulu 1850 | Suluan,Sulu 1851 | Suluano,Sulu 1852 | Sumatran,Sumatra 1853 | Sumerian,Sumer 1854 | Sundanese,Java 1855 | Sunflower,Kansas 1856 | Sunnmøring,Sunnmøre 1857 | Surcano,Surco 1858 | Surinamer,Suriname 1859 | Surinamese,Suriname 1860 | Svalbard,Svalbard 1861 | Swamp Yankee,Rhode Island 1862 | Swansean,Swansea 1863 | Swazi,Swaziland 1864 | Swede,Sweden 1865 | Swedish,Sweden 1866 | Swindonian,Swindon 1867 | Swiss,Switzerland 1868 | Sydney,Sydney 1869 | Sydneysider,Sydney 1870 | Sydney-Sider,Sydney 1871 | Symian,Symi 1872 | Syracusan,Syracuse 1873 | Syracusian,Syracuse 1874 | Syrian,Syria 1875 | Tab,Cambridge 1876 | Tabasqueño,Tabasco 1877 | Tacneño,Tacna 1878 | Tacomian,Tacoma 1879 | Tadzhik,Tajikistan 1880 | Taff,Wales 1881 | Tahitian,Tahiti 1882 | Taiwanese,Taiwan 1883 | Tajik,Tajikistan 1884 | Tajikistani,Tajikistan 1885 | Talabricense,Talavera De La Reina 1886 | Talaverano,Talavera De La Reina 1887 | Tallinner,Tallinn 1888 | Tamaulipan,Tamaulipas 1889 | Tamaulipeco,Tamaulipas 1890 | Tamil,Tamil Nadu 1891 | Tamilian,Tamil Nadu 1892 | Tampanian,Tampa 1893 | Tanzanian,Tanzania 1894 | Tapatío,Guadalajara 1895 | Tar Boiler,North Carolina 1896 | Tar Heel,North Carolina 1897 | Taranaki,Taranaki Region 1898 | Tarantine,Taranto 1899 | Tarapotino,Tarapoto 1900 | Tarentine,Taranto 1901 | Tarentumian,Taras 1902 | Tarlaquenian,Tarlac 1903 | Tarlaqueño,Tarlac 1904 | Tarraconense,Tarragona 1905 | Tarraconista,Tarragona 1906 | Tasman,Tasman Region 1907 | Tasmaniac,Tasmania 1908 | Tasmanian,Tasmania 1909 | Tassie,Tasmania 1910 | Taswegian,Tasmania 1911 | Tatar,Tatarstan 1912 | Tawi-Tawian,Tawi-Tawi 1913 | Tbilisian,Tbilisi 1914 | Tegean,Tegea 1915 | Tejano,Texas 1916 | Tel Avivi,Tel Aviv 1917 | Telanganite,Telangana 1918 | Telemarking,Telemark 1919 | Tenedian,Tenedos 1920 | Tenesseean,Tennessee 1921 | Tennesseean,Tennessee 1922 | Tepiqueño,Tepic 1923 | Tepiquense,Tepic 1924 | Terengganuan,Terengganu 1925 | Teresinense,Teresina 1926 | Territorian,Northern Territory 1927 | Texian,Texas 1928 | Texican,Texas 1929 | Thai,Thailand 1930 | Thasian,Thasos 1931 | Theban,Thebes 1932 | Thespian,Thespis 1933 | Thessalian,Thessaly 1934 | Thessalonian,Thessaloniki 1935 | Thracian,Thrace 1936 | Thriasian,Thria 1937 | Thurian,Thurii 1938 | Thuriat,Thuria 1939 | Thuringian,Thuringia 1940 | Thybo,Thy 1941 | Tibetan,Tibet 1942 | Timorese,Timor-Leste 1943 | Tinerfeño,Santa Cruz De Tenerife 1944 | Tiranas,Tirana 1945 | Titletowner,Green Bay 1946 | Tlaxcalteca,Tlaxcala 1947 | Tobagonian,Trinidad And Tobago 1948 | Toboesco,El Toboso 1949 | Toboseño,El Toboso 1950 | Tobosino,El Toboso 1951 | Tocantinense,Tocatins 1952 | Togolese,Togo 1953 | Tokelauan,Tokelau 1954 | Tokyoite,Tokyo 1955 | Toledano,Toledo 1956 | Toledoan,Toledo 1957 | Toluqueño,Toluca 1958 | Tongan,Tonga 1959 | Toowoomban,Toowoomba 1960 | Toresano,Toro 1961 | Torinese,Turin 1962 | Torontonian,Toronto 1963 | Tournaisian,Tournai 1964 | Towcestrian,Towcester 1965 | Townsvillian,Townsville 1966 | Transylvanian,Transylvania 1967 | Trevorite,Trevor 1968 | Trichonian,Trichonos 1969 | Tridentine,Trent 1970 | Triestine,Trieste 1971 | Trifluvian,Trois-Rivières 1972 | Trifluvien,Trois-Rivières 1973 | Trifluvienne,Trois-Rivières 1974 | Trinibagonians,Trinidad And Tobago 1975 | Trinidadian,Trinidad And Tobago 1976 | Trinis,Trinidad And Tobago 1977 | Tripolitan,Tripoli 1978 | Tripuran,Tripura 1979 | Tripuri,Tripura 1980 | Troezenian,Troezen 1981 | Trois-Rivieran,Trois-Rivières 1982 | Trojan,Troy 1983 | Troll,Michigan 1984 | Tromsøværing,Tromsø 1985 | Tromsværing,Troms 1986 | Trønder,Trondheim 1987 | Trujillano,Trujillo 1988 | Tucsonan,Tucson 1989 | Tucumano,Tucumán 1990 | Tukker,Twente 1991 | Tulsan,Tulsa 1992 | Tumbecino,Tumbes 1993 | Tunisian,Tunisia 1994 | Turboleta,Teruel 1995 | Turinese,Turin 1996 | Turk,Turkey 1997 | Turkic,Turkey 1998 | Turkish,Turkey 1999 | Turkmen,Turkmenistan 2000 | Turkmenistani,Turkmenistan 2001 | Turks And Caicos Island,Turks And Caicos Islands 2002 | Turolense,Teruel 2003 | Tuscan,Tuscany 2004 | Tuvaluan,Tuvalu 2005 | Tuxtleco,Tuxtla Gutiérrez 2006 | Twin Laker,Twin Lakes 2007 | Tyke,Yorkshire 2008 | Tyrolean,Tyrol 2009 | Tyrolese,Tyrol 2010 | U.S. Virgin Island,United States Virgin Islands 2011 | Uberlandense,Uberlândia 2012 | Ugandan,Uganda 2013 | Uk,United Kingdom 2014 | Ukrainian,Ukraine 2015 | Ulsterman,Northern Ireland 2016 | Umbrian,Umbria 2017 | United States,United States 2018 | Urcitano,Almería 2019 | Uruguayan,Uruguay 2020 | US,United States 2021 | U.S.,United States 2022 | Ushuaiense,Ushuaia 2023 | Utahn,Utah 2024 | Utopian,Utopia 2025 | Uttar Pradeshi,Uttar Pradesh 2026 | Uttarakhandi,Uttarakhand 2027 | Uzbek,Uzbekistan 2028 | Uzbekistani,Uzbekistan 2029 | Valencian,Valencia 2030 | Valenciano,Valencia 2031 | Valisoletano,Valladolid 2032 | Vallisoletano,Valladolid 2033 | Vancouverite,Vancouver 2034 | Vanuatuan,Vanuatu 2035 | Varsovian,Warsaw 2036 | Vashonite,Vashon Island 2037 | Vatican,Vatican City State 2038 | Venetian,Venice 2039 | Venezuelan,Venezuela 2040 | Veracruzano,Veracruz 2041 | Vermonter,Vermont 2042 | Veronese,Verona 2043 | Verulamian,St Albans 2044 | Vest-Agding,Vest-Agder 2045 | Vestfolding,Vestfold 2046 | Vicense,Vich 2047 | Vicentine,Vicenza 2048 | Vichyssois,Vichy 2049 | Victorian,Victoria 2050 | Viedmense,Viedma 2051 | Viennese,Vienna 2052 | Vietnamese,Vietnam 2053 | Vigitano,Vich 2054 | Villahermosino,Villahermosa 2055 | Villermosino,Villahermosa 2056 | Vilnian,Vilnius 2057 | Vincentian,Saint Vincent And Grenadines 2058 | Virgin Islander,Virgin Islands 2059 | Virginian,Virginia 2060 | Virginians,Virginia 2061 | Vitoriense,Vitória 2062 | Vizcayense,Vizcaya 2063 | Volcano Islander,Volcano Islands 2064 | Vulcan,Vulcan 2065 | Vulcanian,Vulcan 2066 | Waikato,Waikato Region 2067 | Wakandan,Wakanda 2068 | Wakean,Wake Island 2069 | Wallachian,Wallachia 2070 | Wallis And Futuna,Wallis And Futuna 2071 | Wallisian,Wallis And Futuna 2072 | Walloon,Wallonia 2073 | Washingtonian,Washington 2074 | Waterfordian,Waterford 2075 | Waterluvian,Waterloo 2076 | Weegie,Glasgow 2077 | Wellington,Wellington 2078 | Welsh,Wales 2079 | Welshman,Wales 2080 | Welshwoman,Wales 2081 | Wenhamite,Wenham 2082 | West Coast,West Coast Region 2083 | West Indian,West Indies 2084 | West Malaysian,Peninsular Malaysia 2085 | West Mindorenian,Occidental Mindoro 2086 | West Misamis,Misamis Occidental 2087 | West Negros,Negros Occidental 2088 | West Papuan,West Papua 2089 | West Samarian,Samar 2090 | West Virginian,West Virginia 2091 | Western Australian,Western Australia 2092 | Western Davao,Davao Occidental 2093 | Westphalian,Westphalia 2094 | Westralian,Western Australia 2095 | Wheatlander,Wheatland 2096 | Whittierite,Whittier 2097 | Wichitan,Wichita 2098 | Wiener,Vienna 2099 | Wilmotter,Wilmot 2100 | Winnipegger,Winnipeg 2101 | Wisconsinite,Wisconsin 2102 | Wolverine,Michigan 2103 | Wrexhamite,Wrexham 2104 | Wulfrunian,Wolverhampton 2105 | Wyomingite,Wyoming 2106 | Xalapeño,Xalapa 2107 | Xanthian,Xanthi 2108 | Yakut,Yakutia 2109 | Yellowknifer,Yellowknife 2110 | Yemeni,Yemen 2111 | Yemenite,Yemen 2112 | Yerevantsi,Yerevan 2113 | Yerushalmi,Jerusalem 2114 | Yokohaman,Yokohama 2115 | Yooper,Michigan 2116 | Yorkie,Yorkshire 2117 | Yorkshireman,Yorkshire 2118 | Yorkshirewoman,Yorkshire 2119 | Yucateco,Yucatán 2120 | Yugoslav,Yugoslavia 2121 | Yukon,Yukon 2122 | Yukoner,Yukon Territory 2123 | Yukonian,Yukon Territory 2124 | Yukonite,Yukon Territory 2125 | Yupper,Upper Peninsula 2126 | Zacatecan,Zacatecas 2127 | Zacatecano,Zacatecas 2128 | Zagrebian,Zagreb 2129 | Zairean,Zaire 2130 | Zakyntian,Zakynthos 2131 | Zambalean,Zambales 2132 | Zambaleño,Zambales 2133 | Zambian,Zambia 2134 | Zamboangueño,Zamboanga Del Norte 2135 | Zamorano,Zamora 2136 | Zamorense,Zamora 2137 | Zaragocés,Zaragoza 2138 | Zaragozano,Zaragoza 2139 | Zealander,Zealand 2140 | Zimbabwean,Zimbabwe 2141 | Zintani,Zintan 2142 | Zulu,Zululand 2143 | Zuricher,Zurich 2144 | Ecuadoran,Ecuador 2145 | -------------------------------------------------------------------------------- /src/main/resources/edu/knowitall/chunkedextractor/nouns.txt: -------------------------------------------------------------------------------- 1 | ...*ist 2 | abbot 3 | abomination 4 | accessory 5 | accompanist 6 | accomplice 7 | accountant 8 | accuser 9 | ace 10 | acquaintance 11 | active 12 | activist 13 | adherent 14 | adjunct 15 | administrator 16 | admiral 17 | admirer 18 | adopter 19 | adult 20 | adversary 21 | advertiser 22 | adviser 23 | advisor 24 | advocate 25 | affiliate 26 | aficionado 27 | agent 28 | aggressor 29 | agonist 30 | aide 31 | alien 32 | ally 33 | alternate 34 | alum 35 | alumna 36 | alumnus 37 | ambassador 38 | anachronism 39 | analyst 40 | anathema 41 | ancestor 42 | anchor 43 | ancient 44 | angel 45 | announcer 46 | annoyance 47 | anomaly 48 | antagonist 49 | apologist 50 | apostle 51 | apotheosis 52 | applicant 53 | appointment 54 | apprentice 55 | arbiter 56 | arbitrator 57 | archbishop 58 | arrival 59 | artist 60 | ass 61 | asshole 62 | assignee 63 | assistant 64 | associate 65 | atheist 66 | attendant 67 | attendee 68 | attorney 69 | attraction 70 | auditor 71 | aunt 72 | authority 73 | avatar 74 | babe 75 | baby 76 | bachelor 77 | back 78 | backer 79 | backup 80 | bag 81 | banker 82 | barber 83 | barrister 84 | bartender 85 | bassist 86 | batsman 87 | bear 88 | bearer 89 | beast 90 | beat 91 | beauty 92 | beginner 93 | believer 94 | belle 95 | bellwether 96 | beloved 97 | benefactor 98 | beneficiary 99 | better 100 | bidder 101 | bird 102 | birth 103 | bishop 104 | bitch 105 | blacksmith 106 | blade 107 | blogger 108 | blonde 109 | blood 110 | bomb-expert 111 | bomb-maker 112 | bomber 113 | bookkeeper 114 | booster 115 | bore 116 | borrower 117 | boss 118 | bouncer 119 | bowler 120 | boy 121 | boyfriend 122 | brain 123 | breadwinner 124 | breaker 125 | breeder 126 | bride 127 | bridesmaid 128 | broadcaster 129 | broker 130 | brother 131 | brother-in-law 132 | browser 133 | brunette 134 | buddy 135 | buff 136 | builder 137 | bull 138 | bully 139 | businessman 140 | butcher 141 | butt 142 | buyer 143 | cadet 144 | calculator 145 | camper 146 | canary 147 | candidate 148 | canon 149 | captain 150 | captive 151 | card 152 | caregiver 153 | caretaker 154 | carpenter 155 | carrier 156 | case 157 | cashier 158 | casualty 159 | cat 160 | catch 161 | catcher 162 | caterer 163 | celebrity 164 | center 165 | CEO 166 | CFO 167 | chair 168 | chairman 169 | chairperson 170 | chairwoman 171 | champ 172 | champion 173 | chancellor 174 | chaplain 175 | character 176 | charge 177 | cheerleader 178 | chick 179 | chicken 180 | chief 181 | chieftain 182 | child 183 | chiropractor 184 | chorister 185 | christ 186 | cinematographer 187 | cipher 188 | citizen 189 | classic 190 | classmate 191 | cleaner 192 | cleric 193 | clerk 194 | client 195 | clone 196 | closer 197 | co-conspirator 198 | co-founder 199 | co-worker 200 | coach 201 | coaster 202 | coauthor 203 | cofounder 204 | cog 205 | collaborator 206 | colleague 207 | collector 208 | colonel 209 | columnist 210 | comer 211 | commandant 212 | commander 213 | commentator 214 | commissioner 215 | communicant 216 | communicator 217 | commuter 218 | companion 219 | company 220 | competition 221 | competitor 222 | compiler 223 | complainant 224 | computer 225 | conductor 226 | confidant 227 | congressman 228 | connection 229 | connoisseur 230 | conservative 231 | consort 232 | conspirator 233 | constituent 234 | constructor 235 | consultant 236 | consumer 237 | contact 238 | contemporary 239 | contender 240 | contestant 241 | contractor 242 | contributor 243 | controller 244 | convener 245 | convert 246 | convict 247 | cook 248 | coordinator 249 | cop 250 | corporal 251 | correspondent 252 | cosmopolitan 253 | councillor 254 | councilman 255 | counsel 256 | counselor 257 | count 258 | counter 259 | cousin 260 | cow 261 | coward 262 | cowboy 263 | cracker 264 | crazy 265 | creator 266 | creature 267 | creditor 268 | critic 269 | crossover 270 | crusader 271 | culprit 272 | cultist 273 | curator 274 | custodian 275 | customer 276 | czar 277 | dad 278 | daddy 279 | dame 280 | dancer 281 | darling 282 | date 283 | daughter 284 | daughter-in-law 285 | deacon 286 | dealer 287 | dean 288 | dearest 289 | debtor 290 | defendant 291 | defender 292 | delegate 293 | democrat 294 | demon 295 | denizen 296 | dependent 297 | descendant 298 | descendent 299 | designer 300 | destroyer 301 | detective 302 | developer 303 | deviant 304 | devil 305 | devotee 306 | dick 307 | dictator 308 | differentiator 309 | diplomat 310 | diplomate 311 | disciple 312 | discoverer 313 | dish 314 | dissenter 315 | distributor 316 | diver 317 | dj 318 | doer 319 | dog 320 | donor 321 | double 322 | doyen 323 | dragon 324 | draw 325 | driver 326 | drummer 327 | dry 328 | duchess 329 | dud 330 | dude 331 | duke 332 | earl 333 | editor 334 | educator 335 | elder 336 | eldest 337 | elector 338 | electrician 339 | embodiment 340 | emcee 341 | emeritus 342 | employee 343 | employer 344 | end 345 | enemy 346 | enthusiast 347 | entrant 348 | entrepreneur 349 | envoy 350 | equal 351 | escapee 352 | evangelist 353 | ex-wife 354 | examiner 355 | executive 356 | executor 357 | exhibitor 358 | expert 359 | exponent 360 | exporter 361 | extra 362 | extremist 363 | eyewitness 364 | face 365 | facilitator 366 | factor 367 | failure 368 | faller 369 | familiar 370 | family 371 | fan 372 | farmer 373 | father 374 | father-in-law 375 | favorite 376 | favourite 377 | fellow 378 | female 379 | fighter 380 | figure 381 | figurehead 382 | filmmaker 383 | finalist 384 | finder 385 | finisher 386 | firefighter 387 | fireman 388 | firstborn 389 | fisherman 390 | fixture 391 | flop 392 | florist 393 | flyer 394 | fodder 395 | follower 396 | fool 397 | foot 398 | forefather 399 | foreigner 400 | foreman 401 | forerunner 402 | forward 403 | founder 404 | fraud 405 | freak 406 | freshman 407 | friend 408 | front 409 | front-runner 410 | fugitive 411 | fundamentalist 412 | fundraiser 413 | gainer 414 | gatekeeper 415 | geek 416 | gem 417 | general 418 | generator 419 | genius 420 | gentleman 421 | ghost 422 | giant 423 | girl 424 | girlfriend 425 | giver 426 | glutton 427 | goalie 428 | goalkeeper 429 | god 430 | godmother 431 | governor 432 | grader 433 | graduate 434 | granddaddy 435 | granddaughter 436 | grandfather 437 | grandmother 438 | grandson 439 | great 440 | grind 441 | groomsman 442 | grower 443 | guarantor 444 | guard 445 | guardian 446 | guest 447 | guide 448 | guitarist 449 | gunman 450 | gunner 451 | guru 452 | guy 453 | half-brother 454 | half-sister 455 | hand 456 | handler 457 | handmaid 458 | handmaiden 459 | hangover 460 | head 461 | headliner 462 | headmaster 463 | healer 464 | heartbreaker 465 | heavy 466 | heel 467 | heir 468 | heiress 469 | help 470 | herald 471 | hero 472 | heroine 473 | hijacker 474 | hire 475 | hitter 476 | holder 477 | holdover 478 | homemaker 479 | homeowner 480 | hope 481 | host 482 | hostage 483 | housewife 484 | hunk 485 | hunter 486 | husband 487 | hypocrite 488 | ideal 489 | ideologist 490 | idiot 491 | idol 492 | image 493 | imam 494 | immigrant 495 | import 496 | importer 497 | incarnation 498 | indexer 499 | individual 500 | inducer 501 | inductee 502 | industrialist 503 | infant 504 | informant 505 | inhabitant 506 | inheritor 507 | initiate 508 | initiator 509 | inmate 510 | innovator 511 | inpatient 512 | insider 513 | inspector 514 | instigator 515 | instructor 516 | instrument 517 | insurgent 518 | intermediary 519 | intern 520 | interpreter 521 | intimate 522 | investigator 523 | investor 524 | issue 525 | jack 526 | janitor 527 | jerk 528 | jewel 529 | jihadist 530 | joker 531 | junior 532 | justice 533 | keeper 534 | keyboardist 535 | kicker 536 | kid 537 | killer 538 | king 539 | kingpin 540 | knight 541 | knower 542 | lad 543 | lady 544 | lamb 545 | landlord 546 | landowner 547 | latecomer 548 | laughingstock 549 | laureate 550 | lawmaker 551 | lawyer 552 | lead 553 | leader 554 | learner 555 | lecturer 556 | lender 557 | lesbian 558 | lessee 559 | lessor 560 | letter 561 | liar 562 | liberal 563 | licensee 564 | lieutenant 565 | life 566 | lifesaver 567 | light 568 | linebacker 569 | lion 570 | lobbyist 571 | locator 572 | loner 573 | longer 574 | lord 575 | loser 576 | love 577 | lover 578 | loyalist 579 | lump 580 | machine 581 | machinist 582 | maid 583 | mainstay 584 | maintainer 585 | major 586 | maker 587 | male 588 | man 589 | manager 590 | manufacturer 591 | marine 592 | mark 593 | marketer 594 | marshal 595 | martyr 596 | mason 597 | master 598 | mastermind 599 | match 600 | mate 601 | mater 602 | material 603 | matriarch 604 | matron 605 | md 606 | mechanic 607 | medalist 608 | mediator 609 | medium 610 | member 611 | mentor 612 | merchant 613 | messenger 614 | messiah 615 | middleman 616 | midwife 617 | militant 618 | millionaire 619 | mind 620 | minister 621 | minor 622 | miss 623 | mistress 624 | mod 625 | model 626 | moderator 627 | modern 628 | mole 629 | mom 630 | monarch 631 | monitor 632 | monk 633 | monster 634 | moron 635 | mother 636 | mouse 637 | mouth 638 | mouthpiece 639 | mover 640 | mp 641 | murderer 642 | muscle 643 | mvp 644 | name 645 | namesake 646 | nanny 647 | narrator 648 | national 649 | nationalist 650 | native 651 | natural 652 | neighbor 653 | neighbour 654 | nephew 655 | nerd 656 | newbie 657 | newcomer 658 | niece 659 | nigger 660 | no-show 661 | nobody 662 | nominee 663 | nonresident 664 | notable 665 | novice 666 | nuisance 667 | nut 668 | observer 669 | occupant 670 | offender 671 | officer 672 | official 673 | offspring 674 | ombudsman 675 | opener 676 | operative 677 | operator 678 | opponent 679 | opposite 680 | opposition 681 | oracle 682 | ordinary 683 | organiser 684 | organist 685 | organizer 686 | originator 687 | outcast 688 | outfielder 689 | outsider 690 | overseer 691 | owner 692 | pain 693 | pallbearer 694 | panelist 695 | paragon 696 | paralegal 697 | paranoid 698 | parasite 699 | paratrooper 700 | parent 701 | pariah 702 | parishioner 703 | parliamentarian 704 | part-owner 705 | participant 706 | partner 707 | party 708 | passenger 709 | passer 710 | pastor 711 | patient 712 | patriarch 713 | patriot 714 | patron 715 | patroness 716 | pawn 717 | payer 718 | paymaster 719 | pediatrician 720 | peer 721 | perfectionist 722 | performer 723 | perpetrator 724 | person 725 | personality 726 | personification 727 | pest 728 | pet 729 | petitioner 730 | pig 731 | pill 732 | pillar 733 | pimp 734 | pioneer 735 | pitcher 736 | pivot 737 | placeholder 738 | plaintiff 739 | planet 740 | planner 741 | plant 742 | pledge 743 | policeman 744 | pop 745 | pope 746 | possessor 747 | postdoc 748 | poster 749 | pow 750 | power 751 | powerhouse 752 | practitioner 753 | prayer 754 | precursor 755 | predator 756 | predecessor 757 | predictor 758 | premier 759 | presenter 760 | prey 761 | priest 762 | priestess 763 | primitive 764 | prince 765 | principal 766 | prior 767 | prisoner 768 | processor 769 | producer 770 | professor 771 | progenitor 772 | progeny 773 | progressive 774 | promoter 775 | proofreader 776 | prophet 777 | proponent 778 | proprietor 779 | prosecutor 780 | prospect 781 | prostitute 782 | protagonist 783 | protector 784 | protege 785 | provider 786 | proxy 787 | psychiatrist 788 | psychotherapist 789 | publisher 790 | punk 791 | pupil 792 | puppet 793 | purchaser 794 | purveyor 795 | qualifier 796 | quarter 797 | quarterback 798 | queen 799 | rabbi 800 | racist 801 | radical 802 | raiser 803 | rapper 804 | rat 805 | reader 806 | rebel 807 | receiver 808 | receptionist 809 | recipient 810 | recruiter 811 | rector 812 | redeemer 813 | referee 814 | referral 815 | refugee 816 | registrant 817 | registrar 818 | regular 819 | regulator 820 | reincarnation 821 | relation 822 | relative 823 | relief 824 | religious 825 | reminder 826 | remover 827 | rep 828 | replacement 829 | reporter 830 | repository 831 | representative 832 | republican 833 | researcher 834 | reservist 835 | resident 836 | respondent 837 | retailer 838 | revenue 839 | reviewer 840 | rider 841 | ringer 842 | ringleader 843 | rip 844 | rival 845 | rn 846 | rock 847 | romantic 848 | rookie 849 | roommate 850 | root 851 | ruler 852 | runner-up 853 | runt 854 | sage 855 | saint 856 | salesman 857 | sampler 858 | satellite 859 | saver 860 | savior 861 | saviour 862 | scanner 863 | scapegoat 864 | scholar 865 | schoolteacher 866 | scion 867 | scorer 868 | scourge 869 | scout 870 | scratch 871 | screw 872 | second 873 | secretary 874 | seed 875 | seeker 876 | self 877 | self-starter 878 | seller 879 | semifinalist 880 | senator 881 | sender 882 | senior 883 | sensation 884 | sensitive 885 | seperatist 886 | sergeant 887 | servant 888 | server 889 | settler 890 | shadow 891 | sham 892 | shareholder 893 | sharper 894 | sheep 895 | shepherd 896 | sheriff 897 | shill 898 | shit 899 | shocker 900 | shoemaker 901 | shooter 902 | shortstop 903 | sibling 904 | signatory 905 | signer 906 | silly 907 | simple 908 | sinner 909 | sire 910 | sister 911 | sister-in-law 912 | skipper 913 | slave 914 | slayer 915 | sleeper 916 | slip 917 | smoker 918 | snake 919 | sneak 920 | sniper 921 | soldier 922 | solicitor 923 | soloist 924 | someone 925 | son 926 | son-in-law 927 | sophisticate 928 | sophomore 929 | sort 930 | soul 931 | source 932 | sovereign 933 | speaker 934 | spearhead 935 | specialist 936 | spectator 937 | speechwriter 938 | spoiler 939 | spokesman 940 | spokesperson 941 | spokeswoman 942 | sponsor 943 | spouse 944 | square 945 | staffer 946 | stakeholder 947 | stalwart 948 | stand-in 949 | standard-bearer 950 | star 951 | starter 952 | stepdaughter 953 | stepfather 954 | stepson 955 | steward 956 | stickler 957 | stiff 958 | stockholder 959 | stranger 960 | strategist 961 | stroke 962 | strongman 963 | stud 964 | student 965 | study 966 | subcontractor 967 | subject 968 | subscriber 969 | subsidiary 970 | success 971 | successor 972 | sufferer 973 | suit 974 | sultan 975 | sun 976 | superintendent 977 | superior 978 | superstar 979 | supervisor 980 | supplier 981 | supporter 982 | suppressor 983 | supremacist 984 | surgeon 985 | surrogate 986 | survivor 987 | suspect 988 | sustainer 989 | sweep 990 | sweetheart 991 | swell 992 | tail 993 | tailor 994 | talent 995 | target 996 | taxpayer 997 | teacher 998 | teammate 999 | teaser 1000 | technician 1001 | technologist 1002 | teen 1003 | teenager 1004 | tenant 1005 | tender 1006 | terror 1007 | terrorist 1008 | tester 1009 | therapist 1010 | thief 1011 | threat 1012 | tiger 1013 | tiller 1014 | timekeeper 1015 | titan 1016 | toast 1017 | tool 1018 | tough 1019 | tourist 1020 | trader 1021 | trailblazer 1022 | trailer 1023 | trainer 1024 | traitor 1025 | transfer 1026 | translator 1027 | treasurer 1028 | trick 1029 | trier 1030 | triggerman 1031 | trooper 1032 | trustee 1033 | tutor 1034 | twin 1035 | type 1036 | uncle 1037 | underdog 1038 | undergrad 1039 | undergraduate 1040 | understudy 1041 | underwriter 1042 | user 1043 | usher 1044 | vagabond 1045 | valedictorian 1046 | vassal 1047 | vendor 1048 | veteran 1049 | vicar 1050 | victim 1051 | victor 1052 | viewer 1053 | villain 1054 | violinist 1055 | virgin 1056 | virtuoso 1057 | visitor 1058 | vocalist 1059 | voice 1060 | volunteer 1061 | voter 1062 | waiter 1063 | waitress 1064 | ward 1065 | warden 1066 | warlord 1067 | warrior 1068 | watch 1069 | watchdog 1070 | webmaster 1071 | whale 1072 | whiz 1073 | wholesaler 1074 | whore 1075 | widow 1076 | widower 1077 | wife 1078 | winemaker 1079 | wing 1080 | winner 1081 | witch 1082 | witness 1083 | wizard 1084 | woman 1085 | worker 1086 | worm 1087 | worshipper 1088 | worthy 1089 | wrestler 1090 | writer 1091 | youngster 1092 | youth 1093 | ------------------------------------ 1094 | accordionist 1095 | actor 1096 | actress 1097 | anthropologist 1098 | archaeologist 1099 | architect 1100 | archivist 1101 | assassin 1102 | astronaut 1103 | astronomer 1104 | astrophysicist 1105 | author 1106 | aviator 1107 | biochemist 1108 | biographer 1109 | biologist 1110 | botanist 1111 | boxer 1112 | bullfighter 1113 | caricaturist 1114 | cartographer 1115 | cartoonist 1116 | censor 1117 | chef 1118 | chemist 1119 | choreographer 1120 | climber 1121 | clown 1122 | coleopterist 1123 | comedian 1124 | composer 1125 | conquistadore 1126 | cricketer 1127 | dentist 1128 | dermatologist 1129 | director 1130 | doctor 1131 | economist 1132 | engineer 1133 | entertainer 1134 | entomologist 1135 | explorer 1136 | feminist 1137 | footballer 1138 | geneticist 1139 | geographer 1140 | geologist 1141 | geometer 1142 | geophysicist 1143 | gerontologist 1144 | godfather 1145 | golfer 1146 | guerrilla 1147 | gymnast 1148 | hacker 1149 | herpetologist 1150 | historian 1151 | illustrator 1152 | incumbent 1153 | inventor 1154 | journalist 1155 | judge 1156 | jurist 1157 | kickboxer 1158 | lexicographer 1159 | librarian 1160 | lifeguard 1161 | linguist 1162 | logician 1163 | magician 1164 | mathematician 1165 | meteorologist 1166 | mineralogist 1167 | missionary 1168 | musician 1169 | myrmecologist 1170 | neurochemist 1171 | neurologist 1172 | neuroscientist 1173 | nurse 1174 | oenologist 1175 | ornithologist 1176 | painter 1177 | paleontologist 1178 | pathologist 1179 | pharmacist 1180 | philatelist 1181 | philosopher 1182 | photochemist 1183 | photographer 1184 | photojournalist 1185 | physician 1186 | physicist 1187 | pianist 1188 | pilot 1189 | pirate 1190 | player 1191 | playwright 1192 | poet 1193 | politician 1194 | preacher 1195 | programmer 1196 | psephologist 1197 | psychologist 1198 | rheologist 1199 | runologist 1200 | sailor 1201 | scientist 1202 | screenwriter 1203 | singer 1204 | sociologist 1205 | songwriter 1206 | songwriter 1207 | sportsman 1208 | sportswoman 1209 | sportsperson 1210 | spy 1211 | statistician 1212 | stripper 1213 | swimmer 1214 | televangelist 1215 | theologian 1216 | theorist 1217 | ufologist 1218 | ventriloquist 1219 | veterinarian 1220 | vexillologist 1221 | acrobat 1222 | amateur 1223 | athlete 1224 | ballplayer 1225 | basketeer 1226 | cager 1227 | climber 1228 | cricketer 1229 | footballer 1230 | gymnast 1231 | hooker 1232 | hurdler 1233 | jock 1234 | jumper 1235 | letterman 1236 | lifter 1237 | Olympian 1238 | pentathlete 1239 | pro 1240 | professional 1241 | reserve 1242 | runner 1243 | sharpshooter 1244 | skater 1245 | skier 1246 | sledder 1247 | sport 1248 | sportswoman 1249 | striker 1250 | substitute 1251 | swimmer 1252 | swinger 1253 | vaulter 1254 | weightlifter 1255 | winger 1256 | -------------------------------------------------------------------------------- /src/main/resources/edu/knowitall/chunkedextractor/nouns_of.txt: -------------------------------------------------------------------------------- 1 | emperor 2 | mayor 3 | president 4 | minister 5 | chancellor 6 | prince 7 | king 8 | queen 9 | governor 10 | premier 11 | -------------------------------------------------------------------------------- /src/main/resources/edu/knowitall/chunkedextractor/org_words.txt: -------------------------------------------------------------------------------- 1 | Party 2 | Corporation 3 | Ltd. 4 | Board 5 | Council 6 | Inc. 7 | Committee 8 | Inc 9 | Ltd 10 | Group 11 | Limited 12 | Incorporation 13 | Association 14 | Company 15 | Co. 16 | Corp. 17 | Corp 18 | Club 19 | Foundation 20 | Fund 21 | Incorporated 22 | Institute 23 | Society 24 | Syndicate 25 | Union 26 | Authority 27 | Agency 28 | Transit 29 | Group 30 | Corporations 31 | Industries 32 | Industry 33 | Industriale 34 | System 35 | Exchange 36 | Center 37 | Enterprise 38 | Holdings 39 | Holding 40 | Bank 41 | Banks 42 | Post 43 | Productions 44 | Centre 45 | Services 46 | Service 47 | Post 48 | S.A. 49 | C.V. 50 | Ltda. 51 | S.L. 52 | B.V. 53 | Office 54 | Utilities 55 | Nigam 56 | Organisation 57 | Organisations 58 | Commission 59 | Federation 60 | Undertaking 61 | Division 62 | Department 63 | Academy 64 | Bureau 65 | Force 66 | Corps 67 | Administration 68 | Institutions 69 | Cell 70 | Works 71 | Factory 72 | Workshop 73 | Workshops 74 | AB 75 | League 76 | Labs 77 | Lab 78 | Studio 79 | Studios 80 | Cooperation 81 | S.P.A. 82 | ÇİMENTO 83 | Scheme 84 | Express 85 | Establishment 86 | Airlines 87 | Airways 88 | Railways 89 | Air 90 | Research 91 | Programme 92 | Network 93 | Project 94 | Angeles 95 | Indies 96 | Nations 97 | Africa 98 | Lanka 99 | Francisco 100 | Guinea 101 | Carolina 102 | City 103 | Norte 104 | Territory 105 | Peninsula 106 | Region 107 | Island 108 | Islands 109 | Region 110 | Reina 111 | Kudarat 112 | Sul 113 | Croix 114 | Louis 115 | Maarten 116 | Lake 117 | Leone 118 | Sibugay 119 | Eustatius 120 | Scilly 121 | Samoa 122 | Arbor 123 | Barbuda 124 | Pradesh 125 | Bahamas 126 | California 127 | Balears 128 | Bengal 129 | Bañeza 130 | Catarina 131 | Barthélemy 132 | Country 133 | Horizonte 134 | Scotia 135 | Vista 136 | Bender 137 | Herzegovina 138 | Rico 139 | Islander 140 | Aires 141 | Kingdom 142 | Columbia 143 | Coruña 144 | Bronx 145 | Faso 146 | Fasoa 147 | Oro 148 | Verdean 149 | Verde 150 | Tenerife 151 | Town 152 | Janeiro 153 | Carlos 154 | Mancha 155 | Wales 156 | Valley 157 | Rica 158 | Worth 159 | Cracker 160 | Futuna 161 | Fuego 162 | Gallegos 163 | Coast 164 | Rapids 165 | Hague 166 | Kong 167 | Konger 168 | Nevis 169 | Lumpur 170 | Vegas 171 | Province 172 | Hampshire 173 | Lake 174 | Laker 175 | said 176 | met 177 | united 178 | Lankan 179 | -------------------------------------------------------------------------------- /src/main/resources/edu/knowitall/chunkedextractor/prp_mapping.csv: -------------------------------------------------------------------------------- 1 | my,mine 2 | your,yours 3 | his,him 4 | her,hers 5 | our,ours 6 | their,theirs 7 | My,Mine 8 | Your,Yours 9 | His,Him 10 | Her,Hers 11 | Our,Ours 12 | Their,Theirs 13 | -------------------------------------------------------------------------------- /src/main/resources/logging.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/knowitall/chunkedextractor/33e63d66d723ddbf1d4234f8500183310454d888/src/main/resources/logging.properties -------------------------------------------------------------------------------- /src/main/scala/edu/knowitall/chunkedextractor/Expressions.scala: -------------------------------------------------------------------------------- 1 | package edu.knowitall 2 | package chunkedextractor 3 | 4 | import java.util.regex.Pattern 5 | import edu.knowitall.tool.stem.Lemmatized 6 | import edu.knowitall.tool.chunk.ChunkedToken 7 | 8 | object Expressions { 9 | type Token = Lemmatized[ChunkedToken] 10 | 11 | /** 12 | * A regular expression that is evaluated against the string portion of a 13 | * token. 14 | * 15 | * This comparison is case-sensitive. 16 | * 17 | * @author schmmd 18 | */ 19 | class CaseSensitiveStringExpression(val pattern: Pattern) extends Function[Token, Boolean] { 20 | def this(string: String, flags: Int) { 21 | this(Pattern.compile(string, flags)) 22 | } 23 | 24 | def this(string: String) { 25 | this(string, 0) 26 | } 27 | 28 | override def apply(token: Token): Boolean = 29 | return pattern.matcher(token.token.string).matches() 30 | } 31 | 32 | /** 33 | * A regular expression that is evaluated against the string portion of a 34 | * token. 35 | * 36 | * This comparison is case-insensitive. 37 | * 38 | * @author schmmd 39 | */ 40 | class StringExpression(val pattern: Pattern) extends Function[Token, Boolean] { 41 | def this(string: String, flags: Int) { 42 | this(Pattern.compile(string, flags)) 43 | } 44 | 45 | def this(string: String) { 46 | this(string, Pattern.CASE_INSENSITIVE) 47 | } 48 | 49 | override def apply(token: Token): Boolean = 50 | pattern.matcher(token.token.string).matches() 51 | } 52 | 53 | /** 54 | * A regular expression that is evaluated against the lemma portion of a 55 | * token. 56 | * @author schmmd 57 | */ 58 | class LemmaExpression(val pattern: Pattern) extends Function[Token, Boolean] { 59 | def this(string: String, flags: Int) { 60 | this(Pattern.compile(string, flags)) 61 | } 62 | 63 | def this(string: String) { 64 | this(string, Pattern.CASE_INSENSITIVE) 65 | } 66 | 67 | override def apply(token: Token): Boolean = 68 | pattern.matcher(token.lemma).matches() 69 | } 70 | 71 | /** 72 | * A regular expression that is evaluated against the POS tag portion of a 73 | * token. 74 | * @author schmmd 75 | */ 76 | class PostagExpression(val pattern: Pattern) extends Function[Token, Boolean] { 77 | def this(string: String, flags: Int) { 78 | this(Pattern.compile(string, flags)) 79 | } 80 | 81 | def this(string: String) { 82 | this(string, Pattern.CASE_INSENSITIVE) 83 | } 84 | 85 | override def apply(token: Token): Boolean = 86 | pattern.matcher(token.token.postag).matches() 87 | } 88 | 89 | /** 90 | * A regular expression that is evaluated against the chunk tag portion of a 91 | * token. 92 | * @author schmmd 93 | */ 94 | class ChunkExpression(val pattern: Pattern) extends Function[Token, Boolean] { 95 | def this(string: String, flags: Int) { 96 | this(Pattern.compile(string, flags)) 97 | } 98 | 99 | def this(string: String) { 100 | this(string, Pattern.CASE_INSENSITIVE) 101 | } 102 | 103 | override def apply(token: Token): Boolean = 104 | pattern.matcher(token.token.chunk).matches() 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/main/scala/edu/knowitall/chunkedextractor/Extraction.scala: -------------------------------------------------------------------------------- 1 | package edu.knowitall 2 | package chunkedextractor 3 | 4 | import edu.knowitall.collection.immutable.Interval 5 | import edu.knowitall.tool.stem.Lemmatized 6 | import edu.knowitall.tool.chunk.ChunkedToken 7 | import edu.knowitall.tool.tokenize.Token 8 | 9 | case class ExtractionPart[+T <% Token](text: String, tokens: Seq[T], tokenInterval: Interval) { 10 | override def toString = text 11 | 12 | def offsetInterval = Interval.open(tokens.head.offsets.start, tokens.last.offsets.end) 13 | 14 | @deprecated("1.0.3", "Use tokenInterval instead.") 15 | def interval = tokenInterval 16 | } 17 | 18 | object ExtractionPart { 19 | def fromSentenceTokens[T <% Token](sentenceTokens: Seq[T], tokenInterval: Interval, text: String) = 20 | new ExtractionPart[T](text, sentenceTokens.view(tokenInterval.start, tokenInterval.end), tokenInterval) 21 | 22 | def fromSentenceTokens[T <% Token](sentenceTokens: Seq[T], tokenInterval: Interval) = 23 | new ExtractionPart(sentenceTokens.view(tokenInterval.start, tokenInterval.end).iterator.map(_.string).mkString(" "), sentenceTokens.view(tokenInterval.start, tokenInterval.end), tokenInterval) 24 | } 25 | 26 | case class BinaryExtraction[+T <% Token](arg1: ExtractionPart[T], rel: ExtractionPart[T], arg2: ExtractionPart[T]) { 27 | override def toString = Iterable(arg1, rel, arg2).mkString("(", "; ", ")") 28 | 29 | def text = Iterable(arg1.text, rel.text, arg2.text).mkString(" ") 30 | def tokenInterval = Interval.span(Iterable(arg1.tokenInterval, rel.tokenInterval, arg2.tokenInterval)) 31 | def offsetInterval = Interval.span(Iterable(arg1.offsetInterval, rel.offsetInterval, arg2.offsetInterval)) 32 | def tokens = arg1.tokens ++ rel.tokens ++ arg2.tokens 33 | 34 | @deprecated("1.0.3", "Use tokenInterval instead.") 35 | def interval = tokenInterval 36 | } 37 | 38 | class BinaryExtractionInstance[+T <% Token](val extr: BinaryExtraction[T], val sent: Seq[T]) { 39 | override def toString = extr.toString + " <- \"" + sent.map(_.string).mkString(" ") + "\"" 40 | } 41 | -------------------------------------------------------------------------------- /src/main/scala/edu/knowitall/chunkedextractor/Extractor.scala: -------------------------------------------------------------------------------- 1 | package edu.knowitall.chunkedextractor 2 | 3 | abstract class Extractor[A, B] extends Function[A, Iterable[B]] { 4 | def extract(a: A) = this.apply(a) 5 | } 6 | -------------------------------------------------------------------------------- /src/main/scala/edu/knowitall/chunkedextractor/JavaChunkedExtractor.scala: -------------------------------------------------------------------------------- 1 | package edu.knowitall.chunkedextractor 2 | 3 | import edu.knowitall.tool.chunk.ChunkedToken 4 | 5 | trait JavaChunkedExtractor { 6 | def apply(tokens: Seq[ChunkedToken]): Seq[BinaryExtractionInstance[ChunkedToken]] 7 | def extractWithConfidence(tokens: Seq[ChunkedToken]): Seq[(Double, BinaryExtractionInstance[ChunkedToken])] 8 | } 9 | -------------------------------------------------------------------------------- /src/main/scala/edu/knowitall/chunkedextractor/Nesty.scala: -------------------------------------------------------------------------------- 1 | package edu.knowitall 2 | package chunkedextractor 3 | 4 | import edu.knowitall.tool.chunk.ChunkedToken 5 | import edu.knowitall.tool.chunk.OpenNlpChunker 6 | import edu.knowitall.collection.immutable.Interval 7 | import edu.knowitall.tool.stem.MorphaStemmer 8 | import edu.knowitall.tool.stem.Lemmatized 9 | import edu.knowitall.openregex 10 | 11 | import scala.collection.JavaConverters._ 12 | 13 | class Nesty 14 | extends BinaryPatternExtractor[Nesty.ExtractionInstance](Nesty.pattern) { 15 | 16 | lazy val reverb = new ReVerb 17 | 18 | override def apply(tokens: Seq[PatternExtractor.Token]): Iterable[Nesty.ExtractionInstance] = { 19 | val reverbExtractions = reverb.extract(tokens.map(_.token)) 20 | this.apply(tokens, reverbExtractions.map(_.extr)) 21 | } 22 | 23 | def apply(tokens: Seq[PatternExtractor.Token], reverbExtractions: Iterable[BinaryExtraction[ChunkedToken]]): Iterable[Nesty.ExtractionInstance] = { 24 | val transformed = 25 | tokens.iterator.zipWithIndex.map { case (t, i) => 26 | val ext = reverbExtractions.flatMap { 27 | case extr if (extr.arg1.tokenInterval.start == i) => 28 | Some("B-ARG1") 29 | case extr if (extr.arg1.tokenInterval superset Interval.singleton(i)) => 30 | Some("I-ARG1") 31 | case extr if (extr.rel.tokenInterval.start == i) => 32 | Some("B-REL") 33 | case extr if (extr.rel.tokenInterval superset Interval.singleton(i)) => 34 | Some("I-REL") 35 | case extr if (extr.arg2.tokenInterval.start == i) => 36 | Some("B-ARG2") 37 | case extr if (extr.arg2.tokenInterval superset Interval.singleton(i)) => 38 | Some("I-ARG2") 39 | case _ => None 40 | }.mkString(":") 41 | 42 | t.copy(token= ChunkedToken(t.token.chunk + ":" + ext, t.token.postag, t.token.string, t.token.offset)) 43 | }.toSeq 44 | 45 | super.apply(transformed) 46 | } 47 | 48 | override def buildExtraction(tokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = { 49 | implicit def patternTokenAsToken2(lemmatized: PatternExtractor.Token): edu.knowitall.tool.tokenize.Token = lemmatized.token 50 | val relation = ExtractionPart.fromSentenceTokens[Nesty.Token](tokens.map(_.token), PatternExtractor.intervalFromGroup(m.group("baseRelation").get)) 51 | 52 | val extr = new Nesty.NestedExtraction( 53 | ExtractionPart.fromSentenceTokens[Nesty.Token](tokens.map(_.token), PatternExtractor.intervalFromGroup(m.group("arg1").get)), 54 | ExtractionPart.fromSentenceTokens[Nesty.Token](tokens.map(_.token), PatternExtractor.intervalFromGroup(m.group("nestedRelation").get)), 55 | new BinaryExtraction[Nesty.Token]( 56 | ExtractionPart.fromSentenceTokens[Nesty.Token](tokens.map(_.token), PatternExtractor.intervalFromGroup(m.group("baseArg1").get)), 57 | relation, 58 | ExtractionPart.fromSentenceTokens[Nesty.Token](tokens.map(_.token), PatternExtractor.intervalFromGroup(m.group("baseArg2").get)))) 59 | 60 | Some(new Nesty.ExtractionInstance(extr, tokens.map(_.token))) 61 | } 62 | } 63 | 64 | object Nesty { 65 | type Token = ChunkedToken 66 | 67 | class ExtractionInstance(override val extr: Nesty.NestedExtraction, sent: Seq[Nesty.Token]) 68 | extends BinaryExtractionInstance(extr, sent) 69 | 70 | class NestedExtraction(arg1: ExtractionPart[Token], rel: ExtractionPart[Token], val nested: BinaryExtraction[Nesty.Token]) 71 | extends BinaryExtraction(arg1, rel, new ExtractionPart[Token](nested.text, nested.tokens, nested.tokenInterval)) { 72 | } 73 | 74 | val verbs = List("be", "say", "have", "believe", 75 | "tell", "suggest", "argue", "indicate", "claim", "note", "know", 76 | "show", "state", "find", "conclude", "report", "means", "announce", 77 | "think", "warn", "write", "add", "demonstrate", "appear", "reveal", 78 | "agree", "assert", "acknowledge", "hope", "realize", "fear", 79 | "suspect", "mean", "feel", "see", "explain", "confirm", "mention", 80 | "ask", "seem", "observe", "estimate", "admit", "recognize", 81 | "allege", "insist", "require", "discover", "declare", "imply", 82 | "give", "deny", "understand", "express", "recommend", "worry", 83 | "point", "maintain", "contend", "stress", "prove", "demand", 84 | "learn", "hear", "assume", "predict", "inform", "complain", 85 | "reflect", "provide", "hold", "testify", "request", "notice", 86 | "assess", "remind", "wish", "speculate", "raise", "forget", 87 | "decide", "promise", "pray", "expect", "ensure", "challenge", 88 | "recall", "present", "determine", "doubt", "concede", "reply", 89 | "reject", "propose", "make", "include", "emphasize", "concern", 90 | "charge", "take", "remains", "receive", "exist", "assure", "teach", 91 | "rule", "respond", "remember", "reinforce", "happen", "do", 92 | "caution", "underscore", "turn", "threaten", "signal", "repeat", 93 | "release", "relate", "presume", "discuss", "confess", "bear", 94 | "advise", "trust", "reiterate", "disclose", "dictate", "convince", 95 | "consider", "anticipate", "answer", "accept", "will", "swear", 96 | "regret", "order", "issue", "increase", "illustrate", "ignore", 97 | "establish", "continue", "certify", "bemoan", "affirm", "surface", 98 | "support", "seize", "sa", "refute", "prompt", "posit", "offer", 99 | "live", "leave", "lack", "invite", "imagine", "highlight", 100 | "follow", "fail", "examine", "build", "boast", "begin", "urge", 101 | "theorize", "represent", "remain", "reason", "proclaim", "pretend", 102 | "postulate", "perceive", "outline", "mark", "list", "judge", 103 | "guarantee", "grasp", "go", "face", "emerge", "echo", "dispute", 104 | "deserve", "describe", "decree", "control", "contain", "cite", 105 | "cause", "brag", "bolster", "aver", "alert", "wwa", "vow", "voice", 106 | "use", "underline", "thank", "survey", "strengthen", "spread", 107 | "specify", "speak", "signify", "sense", "send", "rebuff", "read", 108 | "reach", "put", "protest", "prescribe", "postpone", "pledge", 109 | "pick", "persuade", "operate", "object", "need", "nag", "muslim", 110 | "mirror", "lose", "lessen", "keep", "get", "generate", "form", 111 | "explore", "eply", "embody", "dismiss", "disagree", "detonate", 112 | "denounce", "come", "claus", "clarify", "circulate", "call", 113 | "brush", "bring", "alarm") 114 | 115 | final val nestedRelationPatternString = 116 | "(?:(?:(?:? ? (?:) ? ?)+" + 117 | "(?:(?: *)*)) |" + 118 | "(?:(?:? ? (?:) ? ?)+" + 119 | "(?:(?: *)* )))" 120 | 121 | // The pattern for a nested relation is defined as... 122 | final val pattern = 123 | // A noun phrase, with optional PP attachment (don't allow because) 124 | "(: * (?: *)* ?)" + 125 | // Followed by a nested relation phrase (with negative lookahead to 126 | // prevent the next noun phrase from "absorbing" part of the nested 127 | // relation) 128 | "(:" + nestedRelationPatternString + "+)" + 129 | // Followed by another noun phrase 130 | // Sometimes OpenNLP classifies "that" as the start of the chunk so we 131 | // allow the chunk to start with I-NP. 132 | "(: *)" + 133 | // Followed by a base relation phrase (again with negative lookahead) 134 | "(: *)" + 135 | // Followed by another noun phrase (possibly starting from inside) 136 | "(: *)" 137 | 138 | def main(args: Array[String]) { 139 | System.out.println("Creating the nested relation extractor... ") 140 | val nesty = new Nesty() 141 | 142 | if (args.length > 0 && (args(0) equals "--pattern")) { 143 | System.out.println(Nesty.pattern) 144 | } else { 145 | System.out.println("Creating the sentence chunker... ") 146 | val chunker = new OpenNlpChunker() 147 | val stemmer = new MorphaStemmer() 148 | System.out.println("Please enter a sentence:") 149 | 150 | try { 151 | for (line <- scala.io.Source.stdin.getLines) { 152 | val chunked = chunker.chunk(line) 153 | val tokens = chunked map stemmer.lemmatizeToken 154 | 155 | for (extraction <- nesty(tokens)) { 156 | println(extraction.extr) 157 | } 158 | 159 | System.out.println() 160 | } 161 | } catch { 162 | case e: Exception => 163 | e.printStackTrace() 164 | System.exit(2) 165 | } 166 | } 167 | } 168 | } 169 | -------------------------------------------------------------------------------- /src/main/scala/edu/knowitall/chunkedextractor/PatternExtractor.scala: -------------------------------------------------------------------------------- 1 | package edu.knowitall.chunkedextractor 2 | 3 | import java.util.regex.Pattern 4 | 5 | import scala.collection.JavaConverters.asScalaBufferConverter 6 | import scala.collection.JavaConverters.seqAsJavaListConverter 7 | 8 | import com.google.common.base.{Function => GuavaFunction} 9 | 10 | import edu.knowitall.collection.immutable.Interval 11 | import edu.knowitall.tool.chunk.ChunkedToken 12 | import edu.knowitall.tool.stem.Lemmatized 13 | 14 | import edu.knowitall.openregex 15 | import edu.washington.cs.knowitall.logic.{Expression => LExpression} 16 | import edu.washington.cs.knowitall.logic.LogicExpression 17 | import edu.washington.cs.knowitall.regex.Expression 18 | import edu.washington.cs.knowitall.regex.Match 19 | import edu.washington.cs.knowitall.regex.RegularExpression 20 | 21 | object PatternExtractor { 22 | type Token = Lemmatized[ChunkedToken] 23 | object Token { 24 | implicit def patternTokenAsToken(lemmatized: PatternExtractor.Token): edu.knowitall.tool.tokenize.Token = lemmatized.token 25 | } 26 | 27 | implicit def guavaFromFunction[A, B](f: A => B) = new GuavaFunction[A, B] { 28 | override def apply(a: A) = f(a) 29 | } 30 | 31 | implicit def logicArgFromFunction[T](f: T => Boolean) = new LExpression.Arg[T] { 32 | override def apply(token: T) = f(token) 33 | } 34 | 35 | def compile(pattern: String) = 36 | openregex.Pattern.compile(pattern, (expression: String) => { 37 | val valuePattern = Pattern.compile("([\"'])(.*)\\1") 38 | 39 | val deserializeToken: String => (Token => Boolean) = (argument: String) => { 40 | val Array(base, value) = argument.split("=") 41 | 42 | val matcher = valuePattern.matcher(value) 43 | if (!matcher.matches()) { 44 | throw new IllegalArgumentException("Value not enclosed in quote (\") or ('): " + argument) 45 | } 46 | 47 | val string = matcher.group(2) 48 | 49 | base match { 50 | case "string" => new Expressions.StringExpression(string) 51 | case "lemma" => new Expressions.LemmaExpression(string) 52 | case "pos" => new Expressions.PostagExpression(string) 53 | case "chunk" => new Expressions.ChunkExpression(string) 54 | } 55 | } 56 | 57 | val logic: LogicExpression[Token] = 58 | LogicExpression.compile(expression, deserializeToken andThen logicArgFromFunction[Token]) 59 | 60 | (token: Token) => { 61 | logic.apply(token) 62 | } 63 | }) 64 | 65 | def intervalFromGroup(group: openregex.Pattern.Group[_]): Interval = { 66 | val interval = group.interval 67 | 68 | if (interval.start == -1 || interval.end == -1) { 69 | Interval.empty 70 | } else { 71 | interval 72 | } 73 | } 74 | } 75 | 76 | abstract class BinaryPatternExtractor[B](val expression: openregex.Pattern[PatternExtractor.Token]) 77 | extends Extractor[Seq[PatternExtractor.Token], B] { 78 | def this(pattern: String) = this(PatternExtractor.compile(pattern)) 79 | 80 | def apply(tokens: Seq[PatternExtractor.Token]): Iterable[B] = { 81 | val matches = expression.findAll(tokens.toList); 82 | 83 | for ( 84 | m <- matches; 85 | extraction = buildExtraction(tokens, m); 86 | 87 | if !filterExtraction(extraction) 88 | ) yield extraction.get 89 | } 90 | 91 | protected def filterExtraction(extraction: Option[B]): Boolean = 92 | extraction match { 93 | case None => true 94 | case _ => false 95 | } 96 | 97 | protected def buildExtraction(tokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]): Option[B] 98 | } 99 | -------------------------------------------------------------------------------- /src/main/scala/edu/knowitall/chunkedextractor/R2A2.scala: -------------------------------------------------------------------------------- 1 | package edu.knowitall.chunkedextractor 2 | 3 | import edu.knowitall.tool.chunk.ChunkedToken 4 | import edu.knowitall.collection.immutable.Interval 5 | 6 | import edu.washington.cs.knowitall.extractor.ReVerbExtractor 7 | import edu.washington.cs.knowitall.commonlib.Range 8 | import edu.washington.cs.knowitall.nlp.ChunkedSentence 9 | import edu.washington.cs.knowitall.nlp.extraction.ChunkedExtraction 10 | import edu.washington.cs.knowitall.extractor.conf.ConfidenceFunction 11 | import edu.washington.cs.knowitall.util.DefaultObjects 12 | import edu.washington.cs.knowitall.extractor.conf.ReVerbOpenNlpConfFunction 13 | import edu.washington.cs.knowitall.nlp.extraction.ChunkedBinaryExtraction 14 | import edu.washington.cs.knowitall.extractor 15 | import edu.washington.cs.knowitall.argumentidentifier.ConfidenceMetric 16 | 17 | class R2A2(val r2a2: extractor.R2A2, val conf: Option[ConfidenceMetric] = None) extends Extractor[Seq[ChunkedToken], BinaryExtractionInstance[ChunkedToken]] with JavaChunkedExtractor { 18 | def this() = this(new extractor.R2A2, Some(new ConfidenceMetric)) 19 | 20 | private def confidence(extr: ChunkedBinaryExtraction): Double = 21 | (conf map (_ getConf extr)).getOrElse { 22 | throw new IllegalArgumentException("No confidence function defined.") 23 | } 24 | 25 | private def reverbExtract(tokens: Seq[ChunkedToken]) = { 26 | import collection.JavaConverters._ 27 | 28 | val chunkedSentence = new ChunkedSentence( 29 | tokens.map(token => Range.fromInterval(token.offset, token.offset + token.string.length)).toArray, 30 | tokens.map(_.string).toArray, 31 | tokens.map(_.postag).toArray, 32 | tokens.map(_.chunk).toArray) 33 | 34 | val extrs = r2a2.extract(chunkedSentence) 35 | extrs.asScala 36 | } 37 | 38 | private def convertExtraction(tokens: Seq[ChunkedToken])(extr: ChunkedBinaryExtraction) = { 39 | def convertPart(ce: ChunkedExtraction) = { 40 | val interval = Interval.open(ce.getRange.getStart, ce.getRange.getEnd) 41 | new ExtractionPart(ce.getText, tokens.view(interval.start, interval.end), interval) 42 | } 43 | 44 | new BinaryExtraction(convertPart(extr.getArgument1), convertPart(extr.getRelation), convertPart(extr.getArgument2)) 45 | } 46 | 47 | def apply(tokens: Seq[ChunkedToken]): Seq[BinaryExtractionInstance[ChunkedToken]] = { 48 | (reverbExtract(tokens) map convertExtraction(tokens) map (extr => new BinaryExtractionInstance(extr, tokens)))( 49 | scala.collection.breakOut) 50 | } 51 | 52 | @deprecated("Use extractWithConfidence", "2.4.1") 53 | def extractWithConf(tokens: Seq[ChunkedToken]): Seq[(Option[Double], BinaryExtractionInstance[ChunkedToken])] = { 54 | val extrs = reverbExtract(tokens) 55 | val confs = extrs map { extr => 56 | conf.map(_.getConf(extr)) 57 | } 58 | 59 | val converted = extrs map (extr => new BinaryExtractionInstance(convertExtraction(tokens)(extr), tokens)) 60 | (confs.iterator zip converted.iterator).toList 61 | } 62 | 63 | def extractWithConfidence(tokens: Seq[ChunkedToken]): Seq[(Double, BinaryExtractionInstance[ChunkedToken])] = { 64 | val extrs = reverbExtract(tokens) 65 | val confs = extrs map this.confidence 66 | 67 | val converted = extrs map (extr => new BinaryExtractionInstance(convertExtraction(tokens)(extr), tokens)) 68 | (confs.iterator zip converted.iterator).toList 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/scala/edu/knowitall/chunkedextractor/ReVerb.scala: -------------------------------------------------------------------------------- 1 | package edu.knowitall.chunkedextractor 2 | 3 | import edu.knowitall.tool.chunk.ChunkedToken 4 | import edu.knowitall.collection.immutable.Interval 5 | 6 | import edu.washington.cs.knowitall.extractor.ReVerbExtractor 7 | import edu.washington.cs.knowitall.nlp.ChunkedSentence 8 | import edu.washington.cs.knowitall.commonlib.Range 9 | import edu.washington.cs.knowitall.nlp.extraction.ChunkedExtraction 10 | import edu.washington.cs.knowitall.extractor.conf.ConfidenceFunction 11 | import edu.washington.cs.knowitall.util.DefaultObjects 12 | import edu.washington.cs.knowitall.extractor.conf.ReVerbOpenNlpConfFunction 13 | import edu.washington.cs.knowitall.nlp.extraction.ChunkedBinaryExtraction 14 | 15 | class ReVerb(val reverb: ReVerbExtractor, val conf: Option[ConfidenceFunction] = None) extends Extractor[Seq[ChunkedToken], BinaryExtractionInstance[ChunkedToken]] with JavaChunkedExtractor { 16 | def this() = this(new ReVerbExtractor, Some(new ReVerbOpenNlpConfFunction)) 17 | 18 | private def confidence(extr: ChunkedBinaryExtraction): Double = 19 | (conf map (_ getConf extr)).getOrElse { 20 | throw new IllegalArgumentException("No confidence function defined.") 21 | } 22 | 23 | private def reverbExtract(tokens: Seq[ChunkedToken]) = { 24 | import collection.JavaConverters._ 25 | 26 | val chunkedSentence = new ChunkedSentence( 27 | tokens.map(token => Range.fromInterval(token.offset, token.offset + token.string.length)).toArray, 28 | tokens.map(_.string).toArray, 29 | tokens.map(_.postag).toArray, 30 | tokens.map(_.chunk).toArray) 31 | 32 | val extrs = reverb.extract(chunkedSentence) 33 | extrs.asScala 34 | } 35 | 36 | private def convertExtraction(tokens: Seq[ChunkedToken])(extr: ChunkedBinaryExtraction) = { 37 | def convertPart(ce: ChunkedExtraction) = { 38 | val interval = Interval.open(ce.getRange.getStart, ce.getRange.getEnd) 39 | new ExtractionPart(ce.getText, tokens.view(interval.start, interval.end), interval) 40 | } 41 | 42 | new BinaryExtraction(convertPart(extr.getArgument1), convertPart(extr.getRelation), convertPart(extr.getArgument2)) 43 | } 44 | 45 | def apply(tokens: Seq[ChunkedToken]): Seq[BinaryExtractionInstance[ChunkedToken]] = { 46 | (reverbExtract(tokens) map convertExtraction(tokens) map (extr => new BinaryExtractionInstance(extr, tokens)))( 47 | scala.collection.breakOut) 48 | } 49 | 50 | def extractWithConfidence(tokens: Seq[ChunkedToken]): Seq[(Double, BinaryExtractionInstance[ChunkedToken])] = { 51 | val extrs = reverbExtract(tokens) 52 | val confs = extrs map this.confidence 53 | 54 | val converted = extrs map (extr => new BinaryExtractionInstance(convertExtraction(tokens)(extr), tokens)) 55 | (confs.iterator zip converted.iterator).toList 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/scala/edu/knowitall/chunkedextractor/Relnoun.scala: -------------------------------------------------------------------------------- 1 | package edu.knowitall 2 | package chunkedextractor 3 | 4 | import resource._ 5 | 6 | import edu.knowitall.tool.chunk.ChunkedToken 7 | import edu.knowitall.collection.immutable.Interval 8 | import edu.knowitall.tool.chunk.OpenNlpChunker 9 | import edu.knowitall.tool.stem.MorphaStemmer 10 | import edu.knowitall.tool.stem.Lemmatized 11 | import scala.io.Source 12 | import edu.knowitall.common.Timing 13 | import scala.collection.JavaConverters._ 14 | import edu.knowitall.openregex 15 | import edu.washington.cs.knowitall.regex.Match 16 | import edu.washington.cs.knowitall.regex.RegularExpression 17 | import Relnoun._ 18 | import java.io.PrintStream 19 | import java.io.PrintWriter 20 | import java.io.File 21 | import java.nio.charset.MalformedInputException 22 | 23 | class Relnoun(val encloseInferredWords: Boolean = true, val includeReverbRelnouns: Boolean = true, val includeUnknownArg2: Boolean = false) 24 | extends Extractor[Seq[PatternExtractor.Token], BinaryExtractionInstance[Relnoun.Token]] { 25 | 26 | val subextractors: Seq[BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]]] = Seq( 27 | new AppositiveExtractor(this.encloseInferredWords, this.includeUnknownArg2), 28 | new AppositiveExtractor2(this.encloseInferredWords, this.includeUnknownArg2), 29 | new AdjectiveDescriptorExtractor(this.encloseInferredWords, this.includeUnknownArg2), 30 | new PossessiveExtractor(this.encloseInferredWords, this.includeUnknownArg2), 31 | new PossessiveAppositiveExtractor(this.encloseInferredWords, this.includeUnknownArg2), 32 | new PossessiveIsExtractor(this.encloseInferredWords, this.includeUnknownArg2), 33 | new IsPossessiveExtractor(this.encloseInferredWords, this.includeUnknownArg2), 34 | new OfIsExtractor(this.encloseInferredWords, this.includeUnknownArg2), 35 | new OfCommaExtractor(this.encloseInferredWords, this.includeUnknownArg2), 36 | new PossessiveReverseExtractor(this.encloseInferredWords, this.includeUnknownArg2), 37 | new ProperNounAdjectiveExtractor(this.encloseInferredWords, this.includeUnknownArg2)) ++ 38 | (if (includeReverbRelnouns) Seq(new VerbBasedExtractor(this.encloseInferredWords, this.includeUnknownArg2)) else Seq.empty) 39 | 40 | def apply(tokens: Seq[Lemmatized[ChunkedToken]]): Seq[BinaryExtractionInstance[Relnoun.Token]] = { 41 | val extrs = for ( 42 | sub <- subextractors; 43 | extr <- sub(tokens) 44 | ) yield extr 45 | 46 | //removing duplicate [UNKNOWN] extractions 47 | var final_extrs = Seq.empty[BinaryExtractionInstance[Relnoun.Token]] 48 | 49 | for (extr1 <- extrs) { 50 | 51 | if (extr1.extr.arg2.text.equals(UNKNOWN)) { 52 | val arg1_1 = extr1.extr.arg1.text 53 | val rel_1 = extr1.extr.rel.text 54 | 55 | var isDuplicate = false 56 | for (extr2 <- extrs) { 57 | val arg1_2 = extr2.extr.arg1.text 58 | val rel_2 = extr2.extr.rel.text 59 | val arg2_2 = extr2.extr.arg2.text 60 | 61 | if (arg1_1.equals(arg1_2) && !arg2_2.equals(UNKNOWN)) isDuplicate = true 62 | } 63 | 64 | if (!isDuplicate) final_extrs = final_extrs :+ (extr1) 65 | } 66 | 67 | else { 68 | final_extrs = final_extrs :+ (extr1) 69 | } 70 | } 71 | 72 | final_extrs 73 | } 74 | } 75 | 76 | object Relnoun { 77 | 78 | type Token = ChunkedToken 79 | 80 | val demonyms_url = Option(this.getClass.getResource("demonyms.csv")).getOrElse { 81 | throw new IllegalArgumentException("Could not load demonyms.csv") 82 | } 83 | 84 | val demonyms_iter = Source.fromInputStream(demonyms_url.openStream(),"UTF-8").getLines().map(_.split(",")) 85 | 86 | val prp_mapping_url = Option(this.getClass.getResource("prp_mapping.csv")).getOrElse { 87 | throw new IllegalArgumentException("Could not load prp_mapping.csv") 88 | } 89 | 90 | val prp_mapping_iter = Source.fromInputStream(prp_mapping_url.openStream(),"UTF-8").getLines().map(_.split(",")) 91 | 92 | val nouns_url = Option(this.getClass.getResource("nouns.txt")).getOrElse { 93 | throw new IllegalArgumentException("Could not load nouns.txt") 94 | } 95 | 96 | val ofNouns_url = Option(this.getClass.getResource("nouns_of.txt")).getOrElse { 97 | throw new IllegalArgumentException("Could not load nouns_of.txt") 98 | } 99 | 100 | val orgsWords_url = Option(this.getClass.getResource("org_words.txt")).getOrElse { 101 | throw new IllegalArgumentException("Could not load org_words.txt") 102 | } 103 | 104 | val relnoun_prefixes_url = Option(this.getClass.getResource("relnoun_prefixes.txt")).getOrElse { 105 | throw new IllegalArgumentException("Could not load relnoun_prefixes.txt") 106 | } 107 | 108 | var prp_mapping_map = scala.collection.mutable.Map[String, String]() 109 | while(prp_mapping_iter.hasNext) { 110 | val arr = prp_mapping_iter.next 111 | prp_mapping_map += arr(0) -> arr(1) 112 | } 113 | 114 | var demonyms_map = scala.collection.mutable.Map[String, String]() 115 | while(demonyms_iter.hasNext) { 116 | val arr = demonyms_iter.next 117 | demonyms_map += arr(0) -> arr(1) 118 | demonyms_map += ("South" + " " + arr(0)) -> ("South" + " " + arr(1)) 119 | demonyms_map += ("North" + " " + arr(0)) -> ("North" + " " + arr(1)) 120 | demonyms_map += ("East" + " " + arr(0)) -> ("East" + " " + arr(1)) 121 | demonyms_map += ("West" + " " + arr(0)) -> ("West" + " " + arr(1)) 122 | demonyms_map += ("Southern" + " " + arr(0)) -> ("Southern" + " " + arr(1)) 123 | demonyms_map += ("Northern" + " " + arr(0)) -> ("Northern" + " " + arr(1)) 124 | demonyms_map += ("Eastern" + " " + arr(0)) -> ("Eastern" + " " + arr(1)) 125 | demonyms_map += ("Western" + " " + arr(0)) -> ("Western" + " " + arr(1)) 126 | demonyms_map += ("Central" + " " + arr(0)) -> ("Central" + " " + arr(1)) 127 | } 128 | 129 | val (demonyms_key, demonyms_val) = demonyms_map.toSeq.unzip 130 | val locations = (demonyms_key ++ demonyms_val) 131 | 132 | val nounChunk = "(?: *)" 133 | val properNounChunk = "(?: *) | (?: * *)"; 134 | val properRelnounChunk = "(: +) | (: +)" 135 | 136 | val pronoun = "" 137 | val pronoun_possessive = "" 138 | 139 | val relnoun = "(string='${relnoun}' | string='${ofNoun}')"; 140 | val relnoun_prefix = "string=\"${relnoun_prefixes}\"" 141 | val relnoun_prefix_noPrefixCheck = "!(string=\"${demonyms}\")" 142 | 143 | val relnoun_prefix_pos = " & pos=\"JJS?|VBDS?|VBNS?|NNS?|NNPS?|RBS?\" & !(string=\"${orgwords}\")" 144 | 145 | val relnoun_prefix_tagged = "<" + relnoun_prefix + relnoun_prefix_pos + ">*" 146 | val relnoun_prefix_tagged_noPrefixCheck = "<" + relnoun_prefix_noPrefixCheck + relnoun_prefix_pos + ">*" 147 | 148 | 149 | val input_nouns = Source.fromInputStream(nouns_url.openStream(),"UTF-8").getLines().map(_.trim()).toArray 150 | val ex_nouns = input_nouns.map { x => "ex-"+x } 151 | val nouns = input_nouns ++ ex_nouns 152 | 153 | private final val orgs = Source.fromInputStream(orgsWords_url.openStream(),"UTF-8").getLines().map(_.trim()).toArray 154 | private final val ofNouns = Source.fromInputStream(ofNouns_url.openStream(),"UTF-8").getLines().map(_.trim()).toArray 155 | private final val adjs = Source.fromInputStream(relnoun_prefixes_url.openStream(),"UTF-8").getLines().map(_.trim()).toArray 156 | 157 | val UNKNOWN = "[UNKNOWN]" 158 | val arg1_notAllowed = List("Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday", 159 | "January","February","March","April","May","June","July","August","September","October","November","December") 160 | 161 | abstract class BaseExtractor { 162 | val pattern: String 163 | } 164 | 165 | def patternReplace(extractor: BaseExtractor) = 166 | extractor.pattern 167 | .replace("${relnoun}", nouns.mkString("|")) 168 | .replace("${ofNoun}", ofNouns.mkString("|")) 169 | .replace("${orgwords}", orgs.mkString("|")) 170 | .replace("${relnoun_prefixes}", adjs.mkString("|")) 171 | .replace("${demonyms}", locations.mkString("|")) 172 | 173 | protected def finalizeExtraction[B](m: openregex.Pattern.Match[PatternExtractor.Token], encloseInferredWords: Boolean, patternTokens: Seq[PatternExtractor.Token], 174 | arg1: ExtractionPart[ChunkedToken], relation: ExtractionPart[ChunkedToken], arg2: ExtractionPart[ChunkedToken], 175 | includeUnknownArg2:Boolean, includeIs:Boolean, includePost: Boolean): Option[BinaryExtractionInstance[Relnoun.Token]] = { 176 | 177 | val tokens = patternTokens.map(_.token) 178 | 179 | var isValidExtraction = true 180 | var arg2_modified = arg2 181 | 182 | //replacing prp 183 | val prpMappingVal = Relnoun.prp_mapping_map.get(arg2.text) 184 | arg2_modified = prpMappingVal match { 185 | case Some(s) => ExtractionPart.fromSentenceTokens(tokens, arg2_modified.tokenInterval, s) 186 | case None => arg2_modified 187 | } 188 | 189 | //Setting arg2 as [UNKNOWN] if not present or if "its"(possible came by AdjectiveDescriptorExtractor(prp)) 190 | if((arg2.text == "" || arg2.text == "its") && includeUnknownArg2) arg2_modified = ExtractionPart.fromSentenceTokens(tokens, relation.tokenInterval, UNKNOWN) 191 | if(arg1.text=="it" || arg2_modified.text == "its") isValidExtraction = false 192 | 193 | //replacing demonyms 194 | 195 | val demonymVal = Relnoun.demonyms_map.get(arg2.text) 196 | arg2_modified = demonymVal match { 197 | case Some(s) => ExtractionPart.fromSentenceTokens(tokens, arg2_modified.tokenInterval, s) 198 | case None => arg2_modified 199 | } 200 | 201 | if(arg2_modified.text == "") isValidExtraction = false 202 | 203 | //remove extractions with arg1 as Sunday,Monday..,January, February... 204 | if(arg1_notAllowed.contains(arg1.text)) isValidExtraction = false 205 | if(arg1_notAllowed.contains(arg2_modified.text)) isValidExtraction = false 206 | 207 | if(!isValidExtraction) None 208 | 209 | else { 210 | val inferredIs = if (encloseInferredWords) "[is]" else "is" 211 | 212 | var rel_text = relation.text 213 | if(includeIs) rel_text = inferredIs + " " + rel_text 214 | if(includePost) rel_text = rel_text + " " + inferred_post(m, encloseInferredWords, arg2_modified.text) 215 | val relation_modified = ExtractionPart.fromSentenceTokens(tokens, relation.tokenInterval, rel_text) 216 | 217 | val extr = new BinaryExtraction(arg1, relation_modified, arg2_modified) 218 | Some(new BinaryExtractionInstance[Relnoun.Token](extr, tokens)) 219 | } 220 | } 221 | 222 | 223 | def inferred_post(m: openregex.Pattern.Match[PatternExtractor.Token], encloseInferredWords: Boolean, arg2_text: String): String = { 224 | val inferredOf = if (encloseInferredWords) "[of]" else "of" 225 | val inferredFrom = if (encloseInferredWords) "[from]" else "from" 226 | 227 | if(!locations.contains(arg2_text)) inferredOf //if arg2 is not a demonym, use inferredOf 228 | else { 229 | m.group("relnoun") match { 230 | case None => inferredOf 231 | case _ => inferredFrom 232 | } 233 | } 234 | } 235 | 236 | /** 237 | * Extracts relations from phrases such as: 238 | * "Barack Obama is the president of the United States." 239 | * (Barack Obama, is the president of, the United States) 240 | * 241 | * @author schmmd 242 | */ 243 | class VerbBasedExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean) 244 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]]( 245 | patternReplace(VerbBasedExtractor)) { 246 | 247 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = { 248 | val tokens = patternTokens.map(_.token) 249 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(2)), m.groups(2).tokens.map(_.token.string).mkString(" ")) 250 | 251 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1))) 252 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(3))) 253 | 254 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, false, false) 255 | } 256 | } 257 | 258 | object VerbBasedExtractor extends BaseExtractor { 259 | val pattern = 260 | // {nouns} (no preposition) 261 | "(" + nounChunk + ")" + 262 | // {be} {adverb} {adjective} {relnoun} {prep} 263 | "( ? <" + relnoun + "> )" + 264 | // {proper np chunk} 265 | "(" + nounChunk + ")"; 266 | } 267 | 268 | /** 269 | * * 270 | * Extracts relations from phrases such as: 271 | * "Chris Curran, a lawyer for Al-Rajhi Banking." 272 | * (Chris Curran, [is] a lawyer for, Al-Rajhi Banking) 273 | * 274 | * @author schmmd 275 | * 276 | */ 277 | class AppositiveExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean) 278 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]]( 279 | patternReplace(AppositiveExtractor)) { 280 | 281 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = { 282 | val tokens = patternTokens.map(_.token) 283 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(2)), m.groups(2).tokens.map(_.token.string).mkString(" ")) 284 | 285 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1))) 286 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(3))) 287 | 288 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, true, false) 289 | } 290 | } 291 | 292 | object AppositiveExtractor extends BaseExtractor { 293 | val pattern: String = 294 | // {proper noun} 295 | "(" + properNounChunk + "|" + pronoun + ")" + 296 | // {comma} 297 | "" + 298 | // {article} 299 | "(*" + 300 | // {adjective or noun} 301 | "*" + 302 | // {relnoun} {preposition} 303 | "<" + relnoun + "& pos=\"NN|NNP\"> )" + 304 | "( *)" 305 | } 306 | 307 | /*** 308 | * Extracts relations from phrases such as: 309 | * "Lauren Faust, a cartoonist," 310 | * (Lauren Faust; [is]; a cartoonist) 311 | */ 312 | class AppositiveExtractor2(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean) 313 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]]( 314 | patternReplace(AppositiveExtractor2)) { 315 | 316 | private val inferredIs = if (this.encloseInferredWords) "[is]" else "is" 317 | 318 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = { 319 | val tokens = patternTokens.map(_.token) 320 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1)), this.inferredIs) 321 | 322 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1))) 323 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(2))) 324 | 325 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, false, false) 326 | } 327 | } 328 | 329 | object AppositiveExtractor2 extends BaseExtractor { 330 | val pattern: String = 331 | // {proper noun} 332 | "(" + properNounChunk + "|" + pronoun + ")" + 333 | // {comma} 334 | "" + 335 | // adverb 336 | "?" + 337 | // {article} 338 | "(*" + 339 | // {adjective or noun} 340 | "*" + 341 | // {relnoun} {preposition} 342 | relnoun_prefix_tagged_noPrefixCheck + " <" + relnoun + "& pos=\"NN|NNP\">)" + 343 | "" 344 | } 345 | 346 | 347 | /*** 348 | * Extracts relations from phrases such as: 349 | * "United States President Barack Obama" 350 | * (Barack Obama; [is] President [of]; United States) 351 | * 352 | * "Indian player Sachin Tendulkar" 353 | * (Sachin Tendulkar; [is] player [from]; India) 354 | * 355 | * @author schmmd 356 | * 357 | */ 358 | class AdjectiveDescriptorExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean) 359 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]]( 360 | patternReplace(AdjectiveDescriptorExtractor)) { 361 | 362 | private val inferredIs = if (this.encloseInferredWords) "[is]" else "is" 363 | 364 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = { 365 | val tokens = patternTokens.map(_.token) 366 | 367 | val adjectiveGroup = m.group("adj").get match { 368 | case g if g.text.isEmpty => None 369 | case g => Some(g) 370 | } 371 | 372 | val adjective = adjectiveGroup map { adj => 373 | adj.tokens.map(_.token.string).mkString(" ") 374 | } 375 | 376 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.group("pred").get), inferredIs + adjective.map(" " + _ + " ").getOrElse(" ") + 377 | m.group("pred").get.tokens.map(_.token.string).mkString(" ")) 378 | 379 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.group("arg1").get)) 380 | var arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.group("arg2").get)) 381 | 382 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, false, true) 383 | } 384 | } 385 | 386 | //arg1: shall contain atleast one nnp that is not a {orgword} 387 | //arg2: allow “relnoun_prefixes” followed by NNP ("Foreign Ministry spokesman Qin Gang.") 388 | //arg2: allow pos=JJ only if the word is in the list of demonyms ("outgoing Chairperson Bonnie Peng.") 389 | object AdjectiveDescriptorExtractor extends BaseExtractor { 390 | val pattern = 391 | // {adjective} 392 | "(: *)" + 393 | "(((: (* " + "+) | (" + pronoun_possessive + ")? )" + 394 | "(: " + relnoun_prefix_tagged + properRelnounChunk + "))" + "|" + 395 | "((: (+) )" + 396 | "(: " + relnoun_prefix_tagged_noPrefixCheck + properRelnounChunk + ")))" + 397 | "?" + // {comma} 398 | "(: * + *)"; 399 | } 400 | 401 | /** 402 | * * 403 | * Extracts relations from phrases such as: 404 | * "Hakani's nephew John" 405 | * (John, [is] nephew [of], Hakani) 406 | * 407 | * "India's player Tendulkar" 408 | * (Tendulkar; [is] player [from]; India) 409 | * 410 | * @author schmmd 411 | * 412 | */ 413 | class PossessiveExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean) 414 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]]( 415 | patternReplace(PossessiveExtractor)) { 416 | 417 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = { 418 | val tokens = patternTokens.map(_.token) 419 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(2)), m.groups(2).tokens.map(_.token.string).mkString(" ")) 420 | 421 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.group("arg1").get)) 422 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1))) 423 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, true, true) 424 | } 425 | } 426 | 427 | object PossessiveExtractor extends BaseExtractor { 428 | val pattern = 429 | // {proper noun} (no preposition) 430 | "(* + *)" + 431 | // {possessive} 432 | "" + 433 | // {adverb} {adjective} {relnoun} 434 | "(*" + relnoun_prefix_tagged_noPrefixCheck + properRelnounChunk + ")" + 435 | // {proper noun} (no preposition) 436 | "(: * + *)"; 437 | } 438 | 439 | /** 440 | * Extracts relations from phrases such as: 441 | * "AUC's leader, Carlos Castano" 442 | * (Carlos Castano, [is] leader [of], AUC) 443 | * 444 | * "India's player, Tendulkar" 445 | * (Tendulkar; [is] player [from]; India) 446 | * 447 | * @author schmmd 448 | * 449 | */ 450 | class PossessiveAppositiveExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean) 451 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]]( 452 | patternReplace(PossessiveAppositiveExtractor)) { 453 | 454 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = { 455 | val tokens = patternTokens.map(_.token) 456 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(2)), m.groups(2).tokens.map(_.token.string).mkString(" ")) 457 | 458 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.group("arg1").get)) 459 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1))) 460 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, true, true) 461 | } 462 | } 463 | 464 | object PossessiveAppositiveExtractor extends BaseExtractor { 465 | val pattern: String = 466 | // {nouns} (no preposition) 467 | "(+)" + 468 | // {possessive} 469 | "" + 470 | // {adverb} {adjective} {relnoun} 471 | "(* *" + relnoun_prefix_tagged_noPrefixCheck + properRelnounChunk + ")" + 472 | // {comma} 473 | "" + 474 | // {proper np chunk} 475 | "(:" + properNounChunk + ")"; 476 | } 477 | 478 | /** 479 | * Extracts relations from phrases such as: 480 | * "AUC's leader is Carlos Castano" 481 | * (Carlos Castano, is leader [of], AUC) 482 | * 483 | * "India's Player is Sachin." 484 | * (Sachin; is Player [from]; India) 485 | * 486 | * @author schmmd 487 | */ 488 | class PossessiveIsExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean) 489 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]]( 490 | patternReplace(PossessiveIsExtractor)) { 491 | 492 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = { 493 | val tokens = patternTokens.map(_.token) 494 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(2)), m.group("lemma_be").get.tokens.map(_.token.string).mkString(" ") + " " + m.groups(2).tokens.map(_.token.string).mkString(" ")) 495 | 496 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.group("arg1").get)) 497 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1))) 498 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, false, true) 499 | } 500 | } 501 | 502 | object PossessiveIsExtractor extends BaseExtractor { 503 | val pattern = 504 | // {nouns} (no preposition) 505 | "(? * * +)" + 506 | // {possessive} 507 | "" + 508 | // {adverb} {adjective} {relnoun} 509 | "(* *" + relnoun_prefix_tagged_noPrefixCheck + properRelnounChunk + ")" + 510 | // be 511 | "(: )" + 512 | // {proper np chunk} 513 | "(:" + properNounChunk + ")"; 514 | } 515 | 516 | /** 517 | * Extracts relations from phrases such as: 518 | * "Barack Obama is America's President" 519 | * (Barack Obama; is President [of]; America) 520 | * 521 | * "Tendulkar is India's player." 522 | * (Tendulkar; is player [from]; India) 523 | * 524 | * @author schmmd 525 | */ 526 | class IsPossessiveExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean) 527 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]]( 528 | patternReplace(IsPossessiveExtractor)) { 529 | 530 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = { 531 | val tokens = patternTokens.map(_.token) 532 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(4)), m.groups(2).tokens.map(_.token.string).mkString(" ") + " " + m.groups(4).tokens.map(_.token.string).mkString(" ")) 533 | 534 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1))) 535 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(3))) 536 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, false, true) 537 | } 538 | } 539 | 540 | object IsPossessiveExtractor extends BaseExtractor { 541 | val pattern = 542 | // {nouns} (no preposition) 543 | "(" + properNounChunk + "|" + pronoun + ")" + 544 | "()" + 545 | "(+)" + 546 | "" + 547 | "(* *" + relnoun_prefix_tagged_noPrefixCheck + properRelnounChunk + ")"; 548 | } 549 | 550 | /** 551 | * Extracts relations from phrases such as: 552 | * "the father of Michael is John" 553 | * (John; is the father of; Michael) 554 | * @author schmmd 555 | */ 556 | class OfIsExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean) 557 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]]( 558 | patternReplace(OfIsExtractor)) { 559 | 560 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = { 561 | val tokens = patternTokens.map(_.token) 562 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1)), m.groups(3).tokens.map(_.token.string).mkString(" ") + " " + m.groups(1).tokens.map(_.token.string).mkString(" ")) 563 | 564 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.group("arg1").get)) 565 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(2))) 566 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, false, false) 567 | } 568 | } 569 | 570 | object OfIsExtractor extends BaseExtractor { 571 | val pattern = 572 | "( * <" + relnoun + "& pos='NN|NNP' & chunk='I-NP'> " + 573 | ") " + 574 | "( * ? ? *) " + 575 | "() " + 576 | "(: ( *) |" + pronoun + ")"; 577 | } 578 | 579 | /** 580 | * Extracts relations from phrases such as: 581 | * "the father of Michael, John," 582 | * (John; [is] the father of; Michael) 583 | * 584 | * @author harinder 585 | */ 586 | class OfCommaExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean) 587 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]]( 588 | patternReplace(OfCommaExtractor)) { 589 | 590 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = { 591 | val tokens = patternTokens.map(_.token) 592 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1)), m.groups(1).tokens.map(_.token.string).mkString(" ")) 593 | 594 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(4))) 595 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(2))) 596 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, true, false) 597 | } 598 | } 599 | 600 | object OfCommaExtractor extends BaseExtractor { 601 | val pattern = 602 | "( * <" + relnoun + "& pos='NN|NNP' & chunk='I-NP'> " + 603 | ") " + 604 | "( * ? ? *) " + 605 | "() " + 606 | //{proper np chunk} 607 | "(" + properNounChunk + "|" + pronoun + ")" + 608 | "() " ; 609 | } 610 | 611 | 612 | /** 613 | * Extracts relations from phrases such as: 614 | * "Mohammed Jamal, bin Laden's brother" 615 | * (Mohammed Jamal, [is] brother [of], bin Laden) 616 | * 617 | * "Tendulkar, India's player" 618 | * (Tendulkar; [is] player [from]; India) 619 | * 620 | * @author schmmd 621 | */ 622 | class PossessiveReverseExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean) 623 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]]( 624 | patternReplace(PossessiveReverseExtractor)) { 625 | 626 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = { 627 | val tokens = patternTokens.map(_.token) 628 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(3)), m.groups(3).tokens.map(_.token.string).mkString(" ")); 629 | 630 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(1))) 631 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.groups(2))) 632 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, true, true) 633 | } 634 | } 635 | 636 | object PossessiveReverseExtractor extends BaseExtractor { 637 | val pattern = 638 | // {proper noun} (no preposition) 639 | "(" + properNounChunk + "|" + pronoun + ")" + 640 | // comma 641 | "" + 642 | // {np chunk} 643 | "( *)" + 644 | // {possessive} 645 | "" + 646 | "(* *" + relnoun_prefix_tagged_noPrefixCheck + properRelnounChunk + ")" + 647 | "(?:|$)"; 648 | } 649 | 650 | /** 651 | * Extracts relations from phrases such as: 652 | * "Obama, the US president." 653 | * (Obama, [is] president [of], United States) 654 | * 655 | * "Tendulkar, the Indian player." 656 | * (Tendulkar; [is] the player [from]; India) 657 | * 658 | * @author schmmd 659 | */ 660 | class ProperNounAdjectiveExtractor(private val encloseInferredWords: Boolean, private val includeUnknownArg2: Boolean) 661 | extends BinaryPatternExtractor[BinaryExtractionInstance[Relnoun.Token]]( 662 | patternReplace(ProperNounAdjectiveExtractor)) { 663 | 664 | override def buildExtraction(patternTokens: Seq[PatternExtractor.Token], m: openregex.Pattern.Match[PatternExtractor.Token]) = { 665 | val tokens = patternTokens.map(_.token) 666 | val relation = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.group("pred").get), 667 | (m.groups(2).tokens.map(_.token.string) ++ m.group("pred").get.tokens.map(_.token.string)).mkString(" ")) 668 | 669 | val arg1 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.group("arg1").get)) 670 | val arg2 = ExtractionPart.fromSentenceTokens(tokens, PatternExtractor.intervalFromGroup(m.group("arg2").get)) 671 | finalizeExtraction(m, encloseInferredWords, patternTokens, arg1, relation, arg2, includeUnknownArg2, true, true) 672 | } 673 | } 674 | 675 | object ProperNounAdjectiveExtractor extends BaseExtractor { 676 | val pattern = 677 | "(: " + properNounChunk + "|" + pronoun + ")" + 678 | "" + 679 | "( *)" + 680 | "(((: (* " + "+) )" + 681 | "(: " + relnoun_prefix_tagged + properRelnounChunk + "))" + "|" + 682 | "((: (+) )" + 683 | "(: " + relnoun_prefix_tagged_noPrefixCheck + properRelnounChunk + ")))" 684 | } 685 | 686 | 687 | /*** 688 | * A class that represents the command line configuration 689 | * of the application. 690 | * 691 | * @param inputFile The file to use as input 692 | * @param outputFile The file to use as output 693 | */ 694 | case class Config(inputFile: Option[File] = None, 695 | outputFile: Option[File] = None, 696 | encoding: String = "UTF-8", 697 | printPatterns: Boolean = false) { 698 | 699 | /*** 700 | * Create the input source from a file or stdin. 701 | */ 702 | def source() = { 703 | inputFile match { 704 | case Some(file) => Source.fromFile(file, encoding) 705 | case None => Source.fromInputStream(System.in, encoding) 706 | } 707 | } 708 | 709 | /*** 710 | * Create a writer to a file or stdout. 711 | */ 712 | def writer() = { 713 | outputFile match { 714 | case Some(file) => new PrintWriter(file, encoding) 715 | case None => new PrintWriter(new PrintStream(System.out, true, encoding)) 716 | } 717 | } 718 | } 719 | 720 | 721 | 722 | def main(args: Array[String]) { 723 | // definition for command-line argument parser 724 | val argumentParser = new scopt.immutable.OptionParser[Config]("openie") { 725 | def options = Seq( 726 | argOpt("input-file", "input file") { (string, config) => 727 | val file = new File(string) 728 | require(file.exists, "input file does not exist: " + file) 729 | config.copy(inputFile = Some(file)) 730 | }, 731 | argOpt("ouput-file", "output file") { (string, config) => 732 | val file = new File(string) 733 | config.copy(outputFile = Some(file)) 734 | }, 735 | opt("encoding", "Character encoding") { (string, config) => 736 | config.copy(encoding = string) 737 | }, 738 | flag("p", "pattern", "Prints the patterns") { config => 739 | config.copy(printPatterns = true) 740 | }) 741 | } 742 | 743 | argumentParser.parse(args, Config()) match { 744 | case Some(config) => 745 | try { 746 | run(config) 747 | } 748 | catch { 749 | case e: MalformedInputException => 750 | System.err.println( 751 | "\nError: a MalformedInputException was thrown.\n" + 752 | "This usually means there is a mismatch between what is expected and the input file.\n" + 753 | "Try changing the input file's character encoding to UTF-8 or specifying the correct character encoding for the input file with '--encoding'.\n") 754 | e.printStackTrace() 755 | } 756 | case None => // usage will be shown 757 | } 758 | } 759 | 760 | def run(config: Config) { 761 | System.out.println("Creating the relational noun extractor... ") 762 | val relnoun = new Relnoun(true, true, true) 763 | val conf = confidence.RelnounConfidenceFunction.loadDefaultClassifier() 764 | 765 | config.inputFile.foreach { file => 766 | System.err.println("Processing file: " + file) 767 | } 768 | 769 | if(config.printPatterns) { 770 | for (extractor <- relnoun.subextractors) { 771 | System.out.println(extractor.expression); 772 | } 773 | } else { 774 | 775 | System.err.println("Creating the sentence chunker... ") 776 | val chunker = new OpenNlpChunker() 777 | val stemmer = new MorphaStemmer() 778 | 779 | Timing.timeThen { 780 | 781 | for { 782 | source <- managed(config.source()) 783 | writer <- managed(config.writer()) 784 | } { 785 | try { 786 | for (line <- source.getLines) { 787 | val chunked = chunker.chunk(line); 788 | val tokens = chunked map stemmer.lemmatizeToken 789 | 790 | writer.println(line) 791 | for (inst <- relnoun(tokens)) { 792 | writer.println(("%1.2f" format conf(inst)) + ": " + inst.extr); 793 | } 794 | 795 | writer.println(); 796 | writer.flush(); 797 | } 798 | } catch { 799 | case e: Exception => 800 | e.printStackTrace() 801 | System.exit(2) 802 | } 803 | } 804 | }{ ns => 805 | System.err.println("extraction completed in: " + Timing.Seconds.format(ns)) 806 | } 807 | 808 | config.outputFile.foreach { file => 809 | System.err.println("Output written to file: " + file) 810 | } 811 | } 812 | } 813 | } 814 | -------------------------------------------------------------------------------- /src/main/scala/edu/knowitall/chunkedextractor/confidence/ChunkedExtractorConfidenceFunction.scala: -------------------------------------------------------------------------------- 1 | package edu.knowitall.chunkedextractor.confidence 2 | 3 | import org.slf4j.LoggerFactory 4 | import edu.knowitall.tool.conf.impl.LogisticRegression 5 | import edu.knowitall.tool.conf.FeatureSet 6 | import java.net.URL 7 | import edu.knowitall.chunkedextractor.BinaryExtractionInstance 8 | import edu.knowitall.tool.chunk.ChunkedToken 9 | 10 | object ChunkedExtractorConfidenceFunction { 11 | val logger = LoggerFactory.getLogger(this.getClass) 12 | 13 | def fromUrl(featureSet: FeatureSet[BinaryExtractionInstance[ChunkedToken], Double], url: URL) = { 14 | LogisticRegression.fromUrl(featureSet, url) 15 | } 16 | } 17 | 18 | object RelnounConfidenceFunction { 19 | val defaultModelUrl = Option(this.getClass.getResource("relnoun-confidence.txt")).getOrElse { 20 | throw new IllegalArgumentException("Could not load confidence function resource.") 21 | } 22 | 23 | def loadDefaultClassifier(): LogisticRegression[BinaryExtractionInstance[ChunkedToken]] = { 24 | ChunkedExtractorConfidenceFunction.fromUrl(ChunkedExtractorFeatureSet, defaultModelUrl) 25 | } 26 | } -------------------------------------------------------------------------------- /src/main/scala/edu/knowitall/chunkedextractor/confidence/ChunkedExtractorFeatureSet.scala: -------------------------------------------------------------------------------- 1 | package edu.knowitall.chunkedextractor.confidence 2 | 3 | import edu.knowitall.tool.conf.FeatureSet 4 | import edu.knowitall.tool.conf.Feature 5 | import scala.collection.immutable.SortedMap 6 | import edu.knowitall.tool.srl.FrameHierarchy 7 | import java.util.regex.Pattern 8 | import java.util.regex.Pattern 9 | import edu.knowitall.chunkedextractor.BinaryExtractionInstance 10 | import edu.knowitall.tool.chunk.ChunkedToken 11 | 12 | object ChunkedExtractorFeatureSet extends FeatureSet[BinaryExtractionInstance[ChunkedToken], Double](ChunkedExtractorFeatures.featureMap) 13 | 14 | /** Features defined for OllieExtractionInstances */ 15 | object ChunkedExtractorFeatures { 16 | type ChunkedExtractorFeature = Feature[BinaryExtractionInstance[ChunkedToken], Double] 17 | 18 | implicit def boolToDouble(bool: Boolean) = if (bool) 1.0 else 0.0 19 | 20 | object startExtr extends ChunkedExtractorFeature("sent starts w/ extr") { 21 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = { 22 | inst.extr.arg1.tokenInterval.start == 0 || 23 | inst.extr.arg2.tokenInterval.start == 0 24 | } 25 | } 26 | 27 | object endArg2 extends ChunkedExtractorFeature("sent ends w/ extr") { 28 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = { 29 | inst.extr.arg1.tokenInterval.end == inst.sent.size || 30 | inst.extr.arg2.tokenInterval.end == inst.sent.size 31 | } 32 | } 33 | 34 | object pronounBeforeRel extends ChunkedExtractorFeature("which|who|that before rel") { 35 | val targets = Set("which", "who", "that") 36 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = { 37 | val res = inst.sent.take(inst.extr.rel.tokenInterval.start).lastOption.map { prev => 38 | targets contains prev.string 39 | }.getOrElse(false) 40 | 41 | res 42 | } 43 | } 44 | 45 | object arg1Proper extends ChunkedExtractorFeature("arg1 is proper") { 46 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = { 47 | inst.extr.arg1.tokens.exists(_.isProperNoun) 48 | } 49 | } 50 | 51 | object arg2Proper extends ChunkedExtractorFeature("arg2 is proper") { 52 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = { 53 | inst.extr.arg2.tokens.exists(_.isProperNoun) 54 | } 55 | } 56 | 57 | object extrCoversSentence extends ChunkedExtractorFeature("extr covers sent") { 58 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = { 59 | inst.extr.tokenInterval.start == 0 && inst.extr.tokenInterval.end == inst.sent.size 60 | } 61 | } 62 | 63 | object npBeforeExtr extends ChunkedExtractorFeature("np before extr") { 64 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = { 65 | inst.sent.take(inst.extr.tokenInterval.start).exists(_.chunk == "B-NP") 66 | } 67 | } 68 | 69 | object npAfterExtr extends ChunkedExtractorFeature("np after extr") { 70 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = { 71 | val next = inst.sent.drop(inst.extr.rel.tokenInterval.end).headOption 72 | val res = next.map(next => next.chunk == "B-NP" || next.chunk == "I-NP").getOrElse(false) 73 | res 74 | } 75 | } 76 | 77 | object conjBeforeRel extends ChunkedExtractorFeature("conj before rel") { 78 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = { 79 | val res = inst.sent.take(inst.extr.rel.tokenInterval.start).lastOption.map(_.chunk == "CC").getOrElse(false) 80 | res 81 | } 82 | } 83 | 84 | object prepBeforeExtr extends ChunkedExtractorFeature("prep before extr") { 85 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = { 86 | val res = inst.sent.take(inst.extr.rel.tokenInterval.start).lastOption.map(_.chunk == "IN").getOrElse(false) 87 | res 88 | } 89 | } 90 | 91 | object verbAfterExtr extends ChunkedExtractorFeature("verb after extr") { 92 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = { 93 | val res = inst.sent.drop(inst.extr.rel.tokenInterval.end).headOption.map(_.isVerb).getOrElse(false) 94 | res 95 | } 96 | } 97 | 98 | object prepAfterExtr extends ChunkedExtractorFeature("prep after extr") { 99 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = { 100 | val res = inst.sent.drop(inst.extr.rel.tokenInterval.end).headOption.map(_.isVerb).getOrElse(false) 101 | res 102 | } 103 | } 104 | 105 | object arg1ContainsPronoun extends ChunkedExtractorFeature("arg1 contains pronoun") { 106 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = { 107 | inst.extr.arg1.tokens.exists(_.isPronoun) 108 | } 109 | } 110 | 111 | object arg2ContainsPronoun extends ChunkedExtractorFeature("arg2 contains pronoun") { 112 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = { 113 | inst.extr.arg1.tokens.exists(_.isPronoun) 114 | } 115 | } 116 | 117 | object arg1ContainsPosPronoun extends ChunkedExtractorFeature("arg1 contains PRP$") { 118 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = { 119 | inst.extr.arg1.tokens.exists(_.isPossessivePronoun) 120 | } 121 | } 122 | 123 | object arg2ContainsPosPronoun extends ChunkedExtractorFeature("arg2 contains PRP$") { 124 | override def apply(inst: BinaryExtractionInstance[ChunkedToken]): Double = { 125 | inst.extr.arg1.tokens.exists(_.isPossessivePronoun) 126 | } 127 | } 128 | 129 | def features: Seq[ChunkedExtractorFeature] = Seq( 130 | startExtr, 131 | endArg2, 132 | pronounBeforeRel, 133 | arg1Proper, 134 | arg2Proper, 135 | extrCoversSentence, 136 | npBeforeExtr, 137 | npAfterExtr, 138 | conjBeforeRel, 139 | prepBeforeExtr, 140 | verbAfterExtr, 141 | prepAfterExtr, 142 | arg1ContainsPronoun, 143 | arg2ContainsPronoun, 144 | arg1ContainsPosPronoun, 145 | arg2ContainsPosPronoun 146 | ) 147 | 148 | def featureMap: SortedMap[String, ChunkedExtractorFeature] = { 149 | (for (f <- features) yield (f.name -> Feature.from(f.name, f.apply _)))(scala.collection.breakOut) 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /src/main/scala/edu/knowitall/chunkedextractor/confidence/TrainChunkedExtractor.scala: -------------------------------------------------------------------------------- 1 | package edu.knowitall.chunkedextractor.confidence 2 | 3 | import java.io.File 4 | import edu.knowitall.common.Resource 5 | import scala.io.Source 6 | import edu.knowitall.chunkedextractor.Relnoun 7 | import edu.knowitall.tool.chunk.OpenNlpChunker 8 | import edu.knowitall.tool.stem.MorphaStemmer 9 | import edu.knowitall.common.Analysis 10 | import edu.knowitall.tool.conf.BreezeLogisticRegressionTrainer 11 | import edu.knowitall.chunkedextractor.BinaryExtractionInstance 12 | import edu.knowitall.tool.conf.Labelled 13 | 14 | object TrainChunkedExtractor extends App { 15 | case class Config( 16 | inputFile: File = null, 17 | outputFile: File = null, 18 | goldFile: File = null) { 19 | } 20 | 21 | val parser = new scopt.immutable.OptionParser[Config]("trainer") { 22 | def options = Seq( 23 | arg("", "sentences") { (path: String, config: Config) => 24 | val file = new File(path) 25 | require(file.exists(), "file does not exist: " + path) 26 | config.copy(inputFile = file) 27 | }, 28 | arg("", "gold") { (path: String, config: Config) => 29 | val file = new File(path) 30 | require(file.exists(), "file does not exist: " + path) 31 | config.copy(goldFile = file) 32 | }, 33 | arg("", "output") { (path: String, config: Config) => 34 | val file = new File(path) 35 | require(!file.exists(), "file already exist: " + path) 36 | config.copy(outputFile = file) 37 | }) 38 | } 39 | 40 | parser.parse(args, Config()) match { 41 | case Some(config) => run(config) 42 | case None => 43 | } 44 | 45 | def run(config: Config) = { 46 | val relnoun = new Relnoun() 47 | 48 | val chunker = new OpenNlpChunker() 49 | 50 | val gold = Resource.using(Source.fromFile(config.goldFile)) { goldSource => 51 | goldSource.getLines.map(_.split("\t") match { 52 | case Array(label, arg1, rel, arg2) => (arg1, rel, arg2) -> (label == "1") 53 | }).toMap 54 | } 55 | val examples = 56 | Resource.using(Source.fromFile(config.inputFile)) { source => 57 | for { 58 | line <- source.getLines.toList 59 | chunked = chunker(line) map MorphaStemmer.lemmatizePostaggedToken 60 | 61 | inst <- relnoun.extract(chunked) 62 | 63 | extr = inst.extr 64 | label = gold(extr.arg1.text, extr.rel.text, extr.arg2.text) 65 | } yield { 66 | new Labelled(label, inst) 67 | } 68 | } 69 | 70 | val trainer = new BreezeLogisticRegressionTrainer(ChunkedExtractorFeatureSet) 71 | val trained = trainer.train(examples) 72 | 73 | trained.saveFile(config.outputFile) 74 | } 75 | } -------------------------------------------------------------------------------- /src/test/scala/edu/knowitall/chunkedextractor/NestySpecTest.scala: -------------------------------------------------------------------------------- 1 | package edu.knowitall.chunkedextractor 2 | 3 | import org.junit.runner.RunWith 4 | import org.specs2.mutable.Specification 5 | import org.specs2.runner.JUnitRunner 6 | import edu.knowitall.tool.chunk.OpenNlpChunker 7 | import edu.knowitall.tool.stem.MorphaStemmer 8 | 9 | @RunWith(classOf[JUnitRunner]) 10 | object NestySpecTest extends Specification { 11 | def extract(sentence: String) = { 12 | val chunker = new OpenNlpChunker 13 | val nesty = new Nesty 14 | val chunked = chunker.chunk(sentence) 15 | val lemmatized = chunked.map(MorphaStemmer.lemmatizeToken) 16 | nesty(lemmatized) 17 | } 18 | 19 | "nesty" should { 20 | val extrs = extract("Michael said that nesty extends reverb.") 21 | "have a single extraction" in { 22 | extrs.size must_== 1 23 | } 24 | "have the correct extraction" in { 25 | extrs.head.extr.toString must_== "(Michael; said that; nesty extends reverb)" 26 | } 27 | } 28 | 29 | "nesty without that" should { 30 | val extrs = extract("Michael said nesty extends reverb.") 31 | "have a single extraction" in { 32 | extrs.size must_== 1 33 | } 34 | "have the correct extraction" in { 35 | extrs.head.extr.toString must_== "(Michael; said; nesty extends reverb)" 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/test/scala/edu/knowitall/chunkedextractor/R2A2SpecTest.scala: -------------------------------------------------------------------------------- 1 | package edu.knowitall.chunkedextractor 2 | 3 | import org.junit.runner.RunWith 4 | import org.specs2.mutable.Specification 5 | import org.specs2.runner.JUnitRunner 6 | import edu.knowitall.tool.chunk.OpenNlpChunker 7 | import edu.knowitall.tool.stem.MorphaStemmer 8 | 9 | @RunWith(classOf[JUnitRunner]) 10 | object R2A2SpecTest extends Specification { 11 | def extract(sentence: String) = { 12 | val chunker = new OpenNlpChunker 13 | val r2a2 = new R2A2 14 | val chunked = chunker.chunk(sentence) 15 | r2a2(chunked) 16 | } 17 | 18 | "r2a2" should { 19 | val extrs = extract("Michael ate at the best restaurant in London") 20 | "have a single extraction" in { 21 | extrs.size must_== 1 22 | } 23 | "have the correct extraction" in { 24 | extrs.head.extr.toString must_== "(Michael; ate at; the best restaurant in London)" 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/test/scala/edu/knowitall/chunkedextractor/RelnounSpecTest.scala: -------------------------------------------------------------------------------- 1 | package edu.knowitall.chunkedextractor 2 | 3 | import org.junit.runner.RunWith 4 | import org.specs2.mutable.Specification 5 | import org.specs2.runner.JUnitRunner 6 | import edu.knowitall.tool.chunk.OpenNlpChunker 7 | import edu.knowitall.tool.stem.MorphaStemmer 8 | 9 | @RunWith(classOf[JUnitRunner]) 10 | object RelnounSpecTest extends Specification { 11 | def extract(sentence: String) = { 12 | val chunker = new OpenNlpChunker 13 | val relnoun = new Relnoun(true, true, true) 14 | val chunked = chunker.chunk(sentence) 15 | val lemmatized = chunked.map(MorphaStemmer.lemmatizeToken) 16 | relnoun(lemmatized) 17 | } 18 | 19 | def test(name: String, sentence: String, extraction: (String, String, String)) = { 20 | name should { 21 | val extrs = extract(sentence) 22 | "have a single extraction" in { 23 | extrs.size must_== 1 24 | } 25 | "have the correct extraction" in { 26 | extrs.head.extr.rel.toString must_== extraction._2 27 | extrs.head.extr.arg1.toString must_== extraction._1 28 | extrs.head.extr.arg2.toString must_== extraction._3 29 | } 30 | } 31 | } 32 | 33 | test("VerbBasedExtractor", 34 | "Barack Obama is the president of the United States.", 35 | ("Barack Obama", "is the president of", "the United States")) 36 | 37 | 38 | test("AppositiveExtractor", 39 | "Barack Obama, the President of the U.S.", 40 | ("Barack Obama", "[is] the President of", "the U.S.")) 41 | 42 | test("AppositiveExtractor_pronoun", 43 | "He, the President of the U.S.", 44 | ("He", "[is] the President of", "the U.S.")) 45 | 46 | test("AppositiveExtractor2", 47 | "Lauren Faust, a cartoonist,", 48 | ("Lauren Faust", "[is]", "a cartoonist")) 49 | 50 | test("AppositiveExtractor2_pronoun", 51 | "He, a cartoonist,", 52 | ("He", "[is]", "a cartoonist")) 53 | 54 | test("AdjectiveDescriptorExtractor_[of]", 55 | "United States President Barack Obama gave a speech today.", 56 | ("Barack Obama", "[is] President [of]", "United States")) 57 | 58 | test("AdjectiveDescriptorExtractor__[from]", 59 | "Indian player Sachin Tendulkar received the Arjuna Award in 1994.", 60 | ("Sachin Tendulkar", "[is] player [from]", "India")) 61 | 62 | test("AdjectiveDescriptorExtractor_title", 63 | "President Barack Obama gave a speech today.", 64 | ("Barack Obama", "[is] President [of]", "[UNKNOWN]")) 65 | 66 | test("AdjectiveDescriptorExtractor_title_more_1", 67 | "Prime Minister Narendra Modi gave a speech today.", 68 | ("Narendra Modi", "[is] Prime Minister [of]", "[UNKNOWN]")) 69 | 70 | test("AdjectiveDescriptorExtractor_prefix", 71 | "Indian Vice President Modi.", 72 | ("Modi", "[is] Vice President [of]", "India")) 73 | 74 | test("AdjectiveDescriptorExtractor_pronoun", 75 | "His father John,", 76 | ("John", "[is] father [of]", "Him")) 77 | 78 | test("AdjectiveDescriptorExtractor_more_1", 79 | "Foreign Ministry spokesman Qin Gang.", 80 | ("Qin Gang", "[is] spokesman [of]", "Foreign Ministry")) 81 | 82 | test("AdjectiveDescriptorExtractor_more_2", 83 | "New Yorker's best staff writer Adam.", 84 | ("Adam", "[is] best staff writer [from]", "New York")) 85 | 86 | test("AdjectiveDescriptorExtractor_more_3", 87 | "General Motors interim chief executive Ed Whitacre.", 88 | ("Ed Whitacre", "[is] interim chief executive [of]", "General Motors")) 89 | 90 | test("AdjectiveDescriptorExtractor_more_4", 91 | "foreign Indian spokesman Qin Gang.", 92 | ("Qin Gang", "[is] foreign spokesman [from]", "India")) 93 | 94 | test("AdjectiveDescriptorExtractor_more_5", 95 | "first Indian spokesman Qin Gang.", 96 | ("Qin Gang", "[is] first spokesman [from]", "India")) 97 | 98 | test("AdjectiveDescriptorExtractor_more_6", 99 | "New Zealand coach Steve Hansen.", 100 | ("Steve Hansen", "[is] coach [from]", "New Zealand")) 101 | 102 | /*test("AdjectiveDescriptorExtractor_more_7", 103 | "Costa Rican President Luis Guillermo.", 104 | ("Luis Guillermo", "[is] President [of]", "Costa Rica"))*/ 105 | 106 | test("AdjectiveDescriptorExtractor_more_8", 107 | "New Zealand President Luis Guillermo.", 108 | ("Luis Guillermo", "[is] President [of]", "New Zealand")) 109 | 110 | /*test("AdjectiveDescriptorExtractor_more_9", 111 | "North Korean President Obama.", 112 | ("Obama", "[is] President [of]", "North Korea"))*/ 113 | 114 | test("AdjectiveDescriptorExtractor_more_10", 115 | "New York governor Eliot Spitzer.", 116 | ("Eliot Spitzer", "[is] governor [of]", "New York City")) 117 | 118 | test("AdjectiveDescriptorExtractor_more_11", 119 | "Seattle Badminton Player Michael.", 120 | ("Michael", "[is] Badminton Player [from]", "Seattle")) 121 | 122 | /*test("AdjectiveDescriptorExtractor_more_12", 123 | "Badminton Player Michael.", 124 | ("Michael", "[is] Player [of]", "Badminton"))*/ 125 | 126 | test("AdjectiveDescriptorExtractor_more_13", 127 | "West Bengali chief minister Mamata Banerjee.", 128 | ("Mamata Banerjee", "[is] chief minister [of]", "West Bengal")) 129 | 130 | test("AdjectiveDescriptorExtractor_demonym", 131 | "Indian President Pranab Mukherjee gave a speech today.", 132 | ("Pranab Mukherjee", "[is] President [of]", "India")) 133 | 134 | 135 | test("PossessiveExtractor_[of]", 136 | "United States' President Barack Obama was in a debate on Wednesday.", 137 | ("Barack Obama", "[is] President [of]", "United States")) 138 | 139 | test("PossessiveExtractor_[from]", 140 | "India's player Tendulkar received the Arjuna Award in 1994.", 141 | ("Tendulkar", "[is] player [from]", "India")) 142 | 143 | test("PossessiveExtractor_more_1", 144 | "New Zealand's President Luis Guillermo.", 145 | ("Luis Guillermo", "[is] President [of]", "New Zealand")) 146 | 147 | 148 | test("PossessiveAppositiveExtractor_[of]", 149 | "United States' President, Barack Obama, was in a debate on Wednesday.", 150 | ("Barack Obama", "[is] President [of]", "United States")) 151 | 152 | test("PossessiveAppositiveExtractor_[from]", 153 | "India's player, Tendulkar, received the Arjuna Award in 1994.", 154 | ("Tendulkar", "[is] player [from]", "India")) 155 | 156 | 157 | test("PossessiveIsExtractor_[of]", 158 | "America's President is Barack Obama.", 159 | ("Barack Obama", "is President [of]", "America")) 160 | 161 | test("PossessiveIsExtractor_[from]", 162 | "India's Player is Sachin.", 163 | ("Sachin", "is Player [from]", "India")) 164 | 165 | 166 | test("IsPossessiveExtractor_[of]", 167 | "Barack Obama is America's President.", 168 | ("Barack Obama", "is President [of]", "America")) 169 | 170 | test("IsPossessiveExtractor_[from]", 171 | "Tendulkar is India's player.", 172 | ("Tendulkar", "is player [from]", "India")) 173 | 174 | test("IsPossessiveExtractor_more_1", 175 | "Luis Guillermo is New Zealand's President.", 176 | ("Luis Guillermo", "is President [of]", "New Zealand")) 177 | 178 | test("IsPossessiveExtractor_pronoun", 179 | "He is America's President.", 180 | ("He", "is President [of]", "America")) 181 | 182 | 183 | test("OfIsExtractor", 184 | "The President of the United States is Barack Obama.", 185 | ("Barack Obama", "is The President of", "the United States")) 186 | 187 | test("OfIsExtractor_pronoun", 188 | "The President of the United States is he.", 189 | ("he", "is The President of", "the United States")) 190 | 191 | 192 | test("OfCommaExtractor", 193 | "The Chairperson of the Commission of the African Union, Jean Ping, on Tuesday...", 194 | ("Jean Ping", "[is] The Chairperson of", "the Commission of the African Union")) 195 | 196 | test("OfCommaExtractor_pronoun", 197 | "The Chairperson of the Commission of the African Union, he, on Tuesday...", 198 | ("he", "[is] The Chairperson of", "the Commission of the African Union")) 199 | 200 | 201 | test("PossessiveReverseExtractor_[of]", 202 | "Barack Obama, America's President, gave a debate on Wednesday.", 203 | ("Barack Obama", "[is] President [of]", "America")) 204 | 205 | test("PossessiveReverseExtractor_[from]", 206 | "Tendulkar, India's player, received the Arjuna Award in 1994.", 207 | ("Tendulkar", "[is] player [from]", "India")) 208 | 209 | test("PossessiveReverseExtractor_pronoun", 210 | "He, America's President, gave a debate on Wednesday.", 211 | ("He", "[is] President [of]", "America")) 212 | 213 | 214 | test("ProperNounAdjectiveExtractor_[of]", 215 | "Barack Obama, the US President, gave a debate on Wednesday.", 216 | ("Barack Obama", "[is] the President [of]", "United States")) 217 | 218 | test("ProperNounAdjectiveExtractor_[from]", 219 | "Tendulkar, the Indian player, received the Arjuna Award in 1994.", 220 | ("Tendulkar", "[is] the player [from]", "India")) 221 | 222 | test("ProperNounAdjectiveExtractor_pronoun", 223 | "He, the Indian player, received the Arjuna Award in 1994.", 224 | ("He", "[is] the player [from]", "India")) 225 | } 226 | 227 | -------------------------------------------------------------------------------- /version.sbt: -------------------------------------------------------------------------------- 1 | version in ThisBuild := "2.2.2-SNAPSHOT" --------------------------------------------------------------------------------