├── .gitignore ├── LICENSE ├── README.md ├── activator ├── activator-launch-1.3.2.jar ├── activator.bat ├── app ├── controllers │ └── Application.java ├── edu │ └── psu │ │ └── ist │ │ └── vaccine │ │ ├── analyzers │ │ ├── BasicSolrAnalyzer.java │ │ ├── ClavinAnalyzer.java │ │ ├── GeoCoderAnalyzer.java │ │ ├── GeoNamesOrgAnalyzer.java │ │ ├── HierarchyAnalyzer.java │ │ ├── LandmarkGeoCoderAnalyzer.java │ │ ├── NerAnalyzer.java │ │ ├── SimpleHierarchyAnalyzer.java │ │ └── proximityAnalyzer.java │ │ ├── corpusbuilding │ │ ├── CorpusExposerApi.java │ │ ├── DatabaseConnection.java │ │ └── ResultSubmitter.java │ │ ├── geocoder │ │ ├── GeonamesGeoCoder.java │ │ ├── SolrBasedGeoCoder.java │ │ ├── SolrModifiedGeoCoder.java │ │ └── solr │ │ │ ├── SolrAddDoc5.java │ │ │ ├── SolrQuerying.java │ │ │ └── SolrQuerying5.java │ │ └── geotxt │ │ ├── annie │ │ ├── ANNIEAdvExtractor.java │ │ └── GATEExtractor.java │ │ ├── api │ │ └── GeoTxtApi.java │ │ ├── batchprocessing │ │ ├── BatchProcessor.java │ │ └── GeoTxtBatch.java │ │ ├── benchmark │ │ ├── Benchmark.java │ │ ├── Place.java │ │ ├── ProblemInstance.java │ │ ├── TestResult.java │ │ ├── instancereading │ │ │ ├── DirectoryInstanceReader.java │ │ │ ├── InstanceReader.java │ │ │ └── ZipInstanceReader.java │ │ └── tester │ │ │ ├── GateTester.java │ │ │ ├── GenericAnalyzerTester.java │ │ │ ├── StanfordTester.java │ │ │ └── Tester.java │ │ ├── entities │ │ ├── Hashtag.java │ │ ├── Location.java │ │ ├── Organization.java │ │ ├── OtherEntity.java │ │ └── Person.java │ │ ├── hierarchy │ │ ├── MapConseqPlaces.java │ │ ├── MapHierarchyPlaces.java │ │ └── MinimizeProximity.java │ │ ├── ner │ │ ├── AbstractNer.java │ │ ├── CogCompNer.java │ │ ├── GateNer.java │ │ ├── InlineAnnotatedNer.java │ │ ├── LingPipeNer.java │ │ ├── MitNer.java │ │ ├── NamedEntities.java │ │ ├── NerEngines.java │ │ ├── OpenNlpNer.java │ │ └── StanfordNer.java │ │ ├── test │ │ └── EntityExtractionTest.java │ │ └── utils │ │ ├── Analyzer.java │ │ ├── BBox.java │ │ ├── Config.java │ │ ├── FileWriter.java │ │ ├── GeoJsonWriter.java │ │ ├── GeocodingUtils.java │ │ ├── HashtagProcessor.java │ │ ├── LocationWrapper.java │ │ ├── LocationWrapperGeonamesToponym.java │ │ ├── LocationWrapperSolrDoc.java │ │ ├── PointGeometry.java │ │ ├── SortedAnnotationList.java │ │ ├── StripStrings.java │ │ ├── TextPreprocessing.java │ │ └── TwitterStreamCollection.java └── views │ ├── addLocationUi.scala.html │ ├── codingHistoryUi.scala.html │ ├── corpusBuildingUi.scala.html │ ├── document.scala.html │ ├── geoVistaUsers.scala.html │ ├── head.scala.html │ ├── index.scala.html │ └── main.scala.html ├── bin ├── application.conf └── views │ ├── addLocationUi.scala.html │ ├── codingHistoryUi.scala.html │ ├── corpusBuildingUi.scala.html │ ├── document.scala.html │ ├── geoVistaUsers.scala.html │ ├── head.scala.html │ ├── index.scala.html │ └── main.scala.html ├── build.sbt ├── conf ├── application.conf └── routes ├── lib └── geonames-1.1.13.jar ├── project ├── build.properties └── plugins.sbt ├── public ├── images │ ├── GeoTxtLogo.png │ ├── favicon.png │ └── header30px.gif ├── javascripts │ ├── L.Control.Zoomslider.js │ ├── bootstrap.js │ ├── codingHistory.js │ ├── corpusBuilding.js │ ├── geoCodingEval.js │ ├── he.js │ ├── images │ │ ├── marker-icon-2x.png │ │ ├── marker-icon.png │ │ └── marker-shadow.png │ ├── jquery-3.2.1.min.js │ ├── jquery-3.2.1.min.map │ ├── jquery-ui.min.js │ ├── leaflet-history-src.js │ ├── leaflet.js │ ├── leaflet.label.js │ ├── mapManip.js │ ├── oms.min.js │ ├── rangy-classapplier.js │ ├── rangy-core.js │ ├── textManip.js │ └── utils.js └── stylesheets │ ├── L.Control.Zoomslider.css │ ├── L.Control.Zoomslider.ie.css │ ├── bootstrap.css │ ├── bootstrap.css.map │ ├── codingHistory.css │ ├── images │ ├── ui-bg_diagonals-thick_90_eeeeee_40x40.png │ ├── ui-bg_glass_100_e4f1fb_1x400.png │ ├── ui-bg_glass_50_3baae3_1x400.png │ ├── ui-bg_glass_80_d7ebf9_1x400.png │ ├── ui-bg_highlight-hard_100_f2f5f7_1x100.png │ ├── ui-bg_highlight-hard_70_000000_1x100.png │ ├── ui-bg_highlight-soft_100_deedf7_1x100.png │ ├── ui-bg_highlight-soft_25_ffef8f_1x100.png │ ├── ui-icons_2694e8_256x240.png │ ├── ui-icons_2e83ff_256x240.png │ ├── ui-icons_3d80b3_256x240.png │ ├── ui-icons_72a7cf_256x240.png │ └── ui-icons_ffffff_256x240.png │ ├── jquery-ui.min.css │ ├── leaflet-history-src.css │ ├── leaflet.labelModified.css │ ├── leafletModified.css │ ├── loading.gif │ ├── main.css │ └── theme.css ├── test ├── ApplicationTest.java └── IntegrationTest.java └── tweets.txt /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.class 3 | app/evaluation/Evaluation.java 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # GeoTxt 3 | GeoTxt is a scalable geoparsing system for the recognition and geolocation of place names in unstructured text. GeoTxt offers six named entity recognition (NER) algorithms for place name recognition, and utilizes Apache Solr for the indexing, ranking, and retrieval of toponyms, enabling scalable geoparsing for streaming text. GeoTxt offers a flexible application programming interface (API), generating a GeoJSON FeatureCollection as output. 4 | 5 | ### Citation 6 | GeoTxt is described in the following publication. Please use this citation to refer to the system: 7 | 8 | Karimzadeh, M., Pezanowski, S., Wallgrün, J. O., MacEachren, A. M., & Wallgrün, J. O. (2019). GeoTxt: A scalable geoparsing system for unstructured text geolocation. Transactions in GIS, 23(1), 118–136. https://doi.org/10.1111/tgis.12510 9 | 10 | ### Tutorial 11 | 12 | The codes in this project includse GeoTxt as well as the GeoAnnotator Web API / Java Core code. 13 | 14 | This file will be packaged with your application, when using `activator dist`. 15 | 16 | We plan on enriching the instructions for building the system from source soon. 17 | 18 | -------------------------------------------------------------------------------- /activator-launch-1.3.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geovista/GeoTxt/4738037ec32cf380b711fd8050e5b183eee066af/activator-launch-1.3.2.jar -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/analyzers/GeoCoderAnalyzer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package edu.psu.ist.vaccine.analyzers; 6 | 7 | import edu.psu.ist.vaccine.geotxt.entities.Location; 8 | import edu.psu.ist.vaccine.geotxt.ner.NamedEntities; 9 | import edu.psu.ist.vaccine.geotxt.ner.NerEngines; 10 | import edu.psu.ist.vaccine.geocoder.solr.SolrQuerying; 11 | import edu.psu.ist.vaccine.geotxt.utils.Analyzer; 12 | import edu.psu.ist.vaccine.geotxt.utils.LocationWrapper; 13 | import edu.psu.ist.vaccine.geotxt.utils.PointGeometry; 14 | 15 | import java.io.IOException; 16 | import java.math.BigInteger; 17 | import java.net.URISyntaxException; 18 | import java.util.HashMap; 19 | import java.util.List; 20 | import java.util.Map; 21 | import java.util.logging.Level; 22 | 23 | /** 24 | * 25 | * @author MortezaKarimzadeh 26 | * 27 | * This class GeoCodes the input text without passing it through any NER 28 | */ 29 | 30 | public class GeoCoderAnalyzer implements Analyzer { 31 | 32 | protected int maxRows = 100; 33 | 34 | // 35 | public NamedEntities analyze(String QueryText, NerEngines ner, Map context) throws IllegalArgumentException, URISyntaxException, URISyntaxException, IOException { 36 | 37 | // String strippedQuery = StripStrings.strip(QueryText); 38 | String strippedQuery = QueryText; 39 | 40 | Location loc = new Location(QueryText); 41 | NamedEntities doc = new NamedEntities(); 42 | doc.locs.add(loc); 43 | 44 | SolrQuerying sq = new SolrQuerying(); 45 | 46 | for (Location l : doc.locs) { 47 | 48 | // go over all identified place names 49 | List searchResult = null; 50 | 51 | // get best matches from solr / web service 52 | searchResult = sq.getToponymsFromSolr(l.getName(), maxRows, "General"); 53 | 54 | // filter based on relative score 55 | if (searchResult.size() > 0) { 56 | double bestScorePercentage = searchResult.get(0).getScore() * 0.1; // could 57 | // make 58 | // this 59 | // a 60 | // parameter 61 | // / 62 | // constant 63 | double bestScorePercentage2 = searchResult.get(0).getScore() * 0.2; // could 64 | // make 65 | // this 66 | // a 67 | // parameter 68 | // / 69 | // constant 70 | 71 | // for (int i = 1; i < searchResult.size(); i++) { 72 | // double score = searchResult.get(i).getScore(); 73 | // if (score < bestScorePercentage 74 | // || (score < bestScorePercentage2 && i > 5)) { 75 | // for (int j = searchResult.size() - 1; j >= i; j--) { 76 | // searchResult.remove(j); 77 | // } 78 | // break; 79 | // } 80 | // } 81 | } 82 | 83 | l.setCandidates(searchResult); 84 | 85 | // set initial geometry based on best match 86 | LocationWrapper t = null; 87 | if (searchResult.size() > 0) 88 | t = searchResult.get(0); 89 | 90 | if (t != null) { 91 | try { 92 | l.setGeometry(new PointGeometry(t.getName(), t.getLongitude(), t.getLatitude(), BigInteger.valueOf(t.getGeoNameId()))); 93 | l.setHierarchy(t.getHierarchy()); 94 | l.setCountryCode(t.getCountryCode()); 95 | l.setFeatureClass(t.getFeatureClass()); 96 | l.setFeatureCode(t.getFeatureCode()); 97 | l.setAlternateNames(t.getAlternateNames()); 98 | } catch (Exception ex) { 99 | java.util.logging.Logger.getLogger(HierarchyAnalyzer.class.getName()).log(Level.SEVERE, null, ex); 100 | } 101 | } 102 | 103 | /* 104 | * try { LocationWrapper t = SolrModifiedGeoCoder.geoCode(l.getName()); if (t != null) { l.setGeometry(new PointGeometry(t.getName(), t .getLongitude(), t.getLatitude(), BigInteger .valueOf(t.getGeoNameId()))); l.setHierarchy(t.getHierarchy()); l.setCountryCode(t.getCountryCode()); l.setFeatureClass(t.getFeatureClass()); 105 | * l.setFeatureCode(t.getFeatureCode()); 106 | * 107 | * } } catch (Exception ex) { java.util.logging.Logger.getLogger( BasicSolrStanfordAnalyzer.class.getName()).log( Level.SEVERE, null, ex); } 108 | */ 109 | 110 | } 111 | 112 | sq.closeConnection(); 113 | 114 | // GeoCogind Organizations is phased out for now. 115 | /* 116 | * for (Organization o : doc.orgs) { o.setGeometry(GeoCoder.geoCodeOrg(o.getName())); } 117 | */ 118 | 119 | return doc; 120 | } 121 | 122 | // 123 | 124 | public GeoCoderAnalyzer(int maxRows) { 125 | this.maxRows = maxRows; 126 | } 127 | 128 | public GeoCoderAnalyzer() { 129 | 130 | this.maxRows = 10; 131 | } 132 | 133 | public static void main(String args[]) throws IllegalArgumentException, IllegalArgumentException, URISyntaxException, IOException { 134 | GeoCoderAnalyzer basicStanfordAnalyizer = new GeoCoderAnalyzer(30); 135 | 136 | // temporarily pass an empty Map 137 | // object------------------------------------------------------ 138 | Map context = new HashMap(); 139 | // -------------------------------------------------------------------------------------- 140 | 141 | NamedEntities results = basicStanfordAnalyizer.analyze("Iran",NerEngines.NONE, context); 142 | 143 | System.out.println(results); 144 | 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/analyzers/GeoNamesOrgAnalyzer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package edu.psu.ist.vaccine.analyzers; 6 | 7 | import edu.psu.ist.vaccine.geotxt.entities.Location; 8 | import edu.psu.ist.vaccine.geotxt.ner.AbstractNer; 9 | import edu.psu.ist.vaccine.geotxt.ner.NamedEntities; 10 | import edu.psu.ist.vaccine.geotxt.ner.NerEngines; 11 | import edu.psu.ist.vaccine.geotxt.ner.StanfordNer; 12 | import edu.psu.ist.vaccine.geocoder.GeonamesGeoCoder; 13 | import edu.psu.ist.vaccine.geotxt.utils.Analyzer; 14 | import java.io.IOException; 15 | import java.net.URISyntaxException; 16 | import java.util.HashMap; 17 | import java.util.Map; 18 | import java.util.logging.Level; 19 | 20 | /** 21 | * 22 | * @author MortezaKarimzadeh 23 | */ 24 | // This class uses GeoNames.org web service API to query places. 25 | public class GeoNamesOrgAnalyzer implements Analyzer { 26 | 27 | public static Map nerMap = null; 28 | 29 | // 30 | public NamedEntities analyze(String QueryText, NerEngines ner, Map context) throws IllegalArgumentException, URISyntaxException, URISyntaxException, IOException { 31 | 32 | // String strippedQuery = StripStrings.strip(QueryText); 33 | String strippedQuery = QueryText; 34 | 35 | NamedEntities doc = null; 36 | 37 | doc = ((AbstractNer) nerMap.get(ner)).tagAlltoDoc(strippedQuery); 38 | 39 | if (doc == null) { 40 | return null; 41 | } 42 | 43 | for (Location l : doc.locs) { 44 | try { 45 | l.setGeometry(GeonamesGeoCoder.geoCode(l.getName())); 46 | } catch (Exception ex) { 47 | java.util.logging.Logger.getLogger(GeoNamesOrgAnalyzer.class.getName()).log(Level.SEVERE, null, ex); 48 | } 49 | } 50 | 51 | // GeoCogind Organizations is phased out for now. 52 | /* 53 | * for (Organization o : doc.orgs) { o.setGeometry(GeoCoder.geoCodeOrg(o.getName())); } 54 | */ 55 | 56 | return doc; 57 | } 58 | 59 | 60 | 61 | public GeoNamesOrgAnalyzer(String stanfordAddress) { 62 | 63 | StanfordNer st = new StanfordNer(stanfordAddress); 64 | GeoNamesOrgAnalyzer.nerMap = new HashMap(); 65 | GeoNamesOrgAnalyzer.nerMap.put(NerEngines.STANFORD, st); 66 | 67 | } 68 | 69 | public GeoNamesOrgAnalyzer(Map nerMap) { 70 | 71 | GeoNamesOrgAnalyzer.nerMap = nerMap; 72 | 73 | } 74 | 75 | public static void main(String args[]) throws IllegalArgumentException, URISyntaxException, IOException { 76 | GeoNamesOrgAnalyzer geoNamesOrg = new GeoNamesOrgAnalyzer("C:/Programs/Stanford/english.all.3class.distsim.crf.ser.gz"); 77 | 78 | // temporarily pass an empty Map object------------------------------------------------------ 79 | Map context = new HashMap(); 80 | // -------------------------------------------------------------------------------------- 81 | 82 | NamedEntities results = geoNamesOrg.analyze("I live in London, Ontario", NerEngines.STANFORD, context); 83 | 84 | System.out.println(results); 85 | 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/analyzers/LandmarkGeoCoderAnalyzer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package edu.psu.ist.vaccine.analyzers; 6 | 7 | import edu.psu.ist.vaccine.geotxt.entities.Location; 8 | import edu.psu.ist.vaccine.geotxt.ner.NamedEntities; 9 | import edu.psu.ist.vaccine.geotxt.ner.NerEngines; 10 | import edu.psu.ist.vaccine.geocoder.solr.SolrQuerying; 11 | import edu.psu.ist.vaccine.geotxt.utils.Analyzer; 12 | import edu.psu.ist.vaccine.geotxt.utils.LocationWrapper; 13 | import edu.psu.ist.vaccine.geotxt.utils.PointGeometry; 14 | 15 | import java.io.IOException; 16 | import java.math.BigInteger; 17 | import java.net.URISyntaxException; 18 | import java.util.HashMap; 19 | import java.util.List; 20 | import java.util.Map; 21 | import java.util.logging.Level; 22 | 23 | /** 24 | * 25 | * @author MortezaKarimzadeh 26 | * 27 | * This class GeoCodes the input text without passing it through any NER At the moment, the goal os this class is to support the Spatial Relations Expressions project. The Solr Boosting on this is meant to give better boosting to landmarks. 28 | */ 29 | 30 | public class LandmarkGeoCoderAnalyzer implements Analyzer { 31 | 32 | protected int maxRows = 100; 33 | 34 | // 35 | public NamedEntities analyze(String QueryText, NerEngines ner, Map context) throws IllegalArgumentException, URISyntaxException, URISyntaxException, IOException { 36 | 37 | // String strippedQuery = StripStrings.strip(QueryText); 38 | String strippedQuery = QueryText; 39 | 40 | Location loc = new Location(QueryText); 41 | NamedEntities doc = new NamedEntities(); 42 | doc.locs.add(loc); 43 | 44 | SolrQuerying sq = new SolrQuerying(); 45 | 46 | for (Location l : doc.locs) { 47 | 48 | // go over all identified place names 49 | List searchResult = null; 50 | 51 | // get best matches from solr / web service 52 | searchResult = sq.getToponymsFromSolr(l.getName(), maxRows, "landmarks"); 53 | 54 | // filter based on relative score 55 | if (searchResult.size() > 0) { 56 | double bestScorePercentage = searchResult.get(0).getScore() * 0.01; // could 57 | // make 58 | // this 59 | // a 60 | // parameter 61 | // / 62 | // constant 63 | double bestScorePercentage2 = searchResult.get(0).getScore() * 0.1; // could 64 | // make 65 | // this 66 | // a 67 | // parameter 68 | // / 69 | // constant 70 | 71 | for (int i = 1; i < searchResult.size(); i++) { 72 | double score = searchResult.get(i).getScore(); 73 | if (score < bestScorePercentage || (score < bestScorePercentage2 && i > 5)) { 74 | for (int j = searchResult.size() - 1; j >= i; j--) { 75 | searchResult.remove(j); 76 | } 77 | break; 78 | } 79 | } 80 | 81 | // for (int i = 1; i < searchResult.size(); i++) { 82 | // double score = searchResult.get(i).getScore(); 83 | // 84 | // System.out.print(searchResult.get(i).getName() ); 85 | // System.out.format("%.15f%n",score); 86 | // } 87 | // 88 | 89 | } 90 | 91 | l.setCandidates(searchResult); 92 | 93 | // set initial geometry based on best match 94 | LocationWrapper t = null; 95 | if (searchResult.size() > 0) 96 | t = searchResult.get(0); 97 | 98 | if (t != null) { 99 | try { 100 | l.setGeometry(new PointGeometry(t.getName(), t.getLongitude(), t.getLatitude(), BigInteger.valueOf(t.getGeoNameId()))); 101 | l.setHierarchy(t.getHierarchy()); 102 | l.setCountryCode(t.getCountryCode()); 103 | l.setFeatureClass(t.getFeatureClass()); 104 | l.setFeatureCode(t.getFeatureCode()); 105 | l.setAlternateNames(t.getAlternateNames()); 106 | } catch (Exception ex) { 107 | java.util.logging.Logger.getLogger(HierarchyAnalyzer.class.getName()).log(Level.SEVERE, null, ex); 108 | } 109 | } 110 | 111 | /* 112 | * try { LocationWrapper t = SolrModifiedGeoCoder.geoCode(l.getName()); if (t != null) { l.setGeometry(new PointGeometry(t.getName(), t .getLongitude(), t.getLatitude(), BigInteger .valueOf(t.getGeoNameId()))); l.setHierarchy(t.getHierarchy()); l.setCountryCode(t.getCountryCode()); l.setFeatureClass(t.getFeatureClass()); 113 | * l.setFeatureCode(t.getFeatureCode()); 114 | * 115 | * } } catch (Exception ex) { java.util.logging.Logger.getLogger( BasicSolrStanfordAnalyzer.class.getName()).log( Level.SEVERE, null, ex); } 116 | */ 117 | 118 | } 119 | 120 | sq.closeConnection(); 121 | 122 | // GeoCogind Organizations is phased out for now. 123 | /* 124 | * for (Organization o : doc.orgs) { o.setGeometry(GeoCoder.geoCodeOrg(o.getName())); } 125 | */ 126 | 127 | return doc; 128 | } 129 | 130 | // 131 | 132 | public LandmarkGeoCoderAnalyzer(int maxRows) { 133 | this.maxRows = maxRows; 134 | } 135 | 136 | public LandmarkGeoCoderAnalyzer() { 137 | 138 | this.maxRows = 10; 139 | } 140 | 141 | public static void main(String args[]) throws IllegalArgumentException, IllegalArgumentException, URISyntaxException, IOException { 142 | LandmarkGeoCoderAnalyzer basicStanfordAnalyizer = new LandmarkGeoCoderAnalyzer(30); 143 | 144 | // temporarily pass an empty Map 145 | // object------------------------------------------------------ 146 | Map context = new HashMap(); 147 | // -------------------------------------------------------------------------------------- 148 | 149 | NamedEntities results = basicStanfordAnalyizer.analyze("Iran",NerEngines.NONE, context); 150 | 151 | System.out.println(results); 152 | 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/analyzers/NerAnalyzer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package edu.psu.ist.vaccine.analyzers; 6 | 7 | import edu.psu.ist.vaccine.geotxt.api.GeoTxtApi; 8 | import edu.psu.ist.vaccine.geotxt.entities.Location; 9 | import edu.psu.ist.vaccine.geotxt.ner.AbstractNer; 10 | import edu.psu.ist.vaccine.geotxt.ner.NamedEntities; 11 | import edu.psu.ist.vaccine.geotxt.ner.NerEngines; 12 | import edu.psu.ist.vaccine.geotxt.ner.StanfordNer; 13 | import edu.psu.ist.vaccine.geocoder.SolrModifiedGeoCoder; 14 | import edu.psu.ist.vaccine.geocoder.solr.SolrQuerying; 15 | import edu.psu.ist.vaccine.geotxt.utils.Analyzer; 16 | import edu.psu.ist.vaccine.geotxt.utils.BBox; 17 | import edu.psu.ist.vaccine.geotxt.utils.HashtagProcessor; 18 | import edu.psu.ist.vaccine.geotxt.utils.LocationWrapper; 19 | import edu.psu.ist.vaccine.geotxt.utils.PointGeometry; 20 | 21 | import java.io.IOException; 22 | import java.math.BigInteger; 23 | import java.net.URISyntaxException; 24 | import java.util.HashMap; 25 | import java.util.List; 26 | import java.util.Map; 27 | import java.util.logging.Level; 28 | 29 | import org.apache.commons.lang3.StringEscapeUtils; 30 | 31 | /** 32 | * 33 | * @author MortezaKarimzadeh 34 | */ 35 | // This analyzer just does NER, with no geocoding. 36 | public class NerAnalyzer implements Analyzer { 37 | 38 | public static Map nerMap = null; 39 | 40 | // 41 | public NamedEntities analyze(String QueryText, NerEngines ner, Map context) throws IllegalArgumentException, URISyntaxException, URISyntaxException, IOException { 42 | 43 | // String strippedQuery = StripStrings.strip(QueryText); 44 | String strippedQuery = QueryText; 45 | 46 | HashtagProcessor processed = new HashtagProcessor(); 47 | processed = HashtagProcessor.processHashTags(StringEscapeUtils.unescapeHtml4(strippedQuery)); 48 | strippedQuery = processed.getHashtagRemoved(); 49 | 50 | NamedEntities doc = null; 51 | 52 | // if (nerMap.get(ner)==null){ 53 | // return doc; 54 | // } 55 | 56 | doc = ((AbstractNer) nerMap.get(ner)).tagAlltoDoc(strippedQuery); 57 | 58 | if (doc == null) { 59 | return null; 60 | } 61 | 62 | doc.adjustCharIndexesForHashtags(processed); 63 | 64 | return doc; 65 | } 66 | 67 | // 68 | 69 | public NerAnalyzer(String stanfordAddress) { 70 | StanfordNer st = new StanfordNer(stanfordAddress); 71 | NerAnalyzer.nerMap = new HashMap(); 72 | NerAnalyzer.nerMap.put(NerEngines.STANFORD, st); 73 | 74 | } 75 | 76 | public NerAnalyzer(Map nerMap) { 77 | NerAnalyzer.nerMap = nerMap; 78 | } 79 | 80 | public static void main(String args[]) throws IllegalArgumentException, IllegalArgumentException, URISyntaxException, IOException { 81 | NerAnalyzer nerAnalyizer = new NerAnalyzer("C:/Programs/Stanford/english.all.3class.distsim.crf.ser.gz"); 82 | 83 | // temporarily pass an empty Map 84 | // object------------------------------------------------------ 85 | Map context = new HashMap(); 86 | // -------------------------------------------------------------------------------------- 87 | 88 | NamedEntities results = nerAnalyizer.analyze("I live in London, Ontario", NerEngines.STANFORD, context); 89 | 90 | System.out.println(results); 91 | 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/corpusbuilding/DatabaseConnection.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.corpusbuilding; 2 | 3 | import java.sql.Connection; 4 | import java.sql.DriverManager; 5 | import java.sql.SQLException; 6 | 7 | public class DatabaseConnection { 8 | 9 | public static Connection c; 10 | // private static String url = "jdbc:postgresql://zeus.geog.psu.edu:5432"; 11 | // private static String db = "twitter_geotxt_4"; 12 | // private static String username = "morteza"; 13 | // private static String password = "new"; 14 | private static String url = "jdbc:postgresql://localhost:5432"; 15 | //private static String url = "jdbc:postgresql://oldtrent.geog.psu.edu:5435"; 16 | //private static String db = "newcorpustestc"; 17 | private static String db = "NewCorpusTestC"; 18 | 19 | private static String username = "postgres"; 20 | private static String password = "123456"; 21 | // private static String username = "geotxt"; 22 | // private static String password = "j48sb&#"; 23 | public static final String TWEET_TABLE_NAME = "tweets_final"; 24 | public static final String GC_TABLE_NAME = "GCResults"; 25 | 26 | static void connect() throws SQLException { 27 | 28 | c = DriverManager.getConnection(url + "/" + db, username, password); 29 | c.setAutoCommit(false); 30 | 31 | } 32 | 33 | static void close() throws SQLException { 34 | 35 | c.close(); 36 | 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geocoder/GeonamesGeoCoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package edu.psu.ist.vaccine.geocoder; 6 | 7 | import edu.psu.ist.vaccine.geotxt.utils.GeoJsonWriter; 8 | import edu.psu.ist.vaccine.geotxt.utils.PointGeometry; 9 | import java.math.BigInteger; 10 | import org.apache.log4j.Logger; 11 | import org.geonames.Style; 12 | import org.geonames.Toponym; 13 | import org.geonames.ToponymSearchCriteria; 14 | import org.geonames.ToponymSearchResult; 15 | import org.geonames.WebService; 16 | 17 | /** 18 | * 19 | * @author Morteza KZ 20 | * This class supports geocoding using new geonames API (api.geonames.org). It is used in basic analyzers, and uses the modified 21 | * ranking scheme based on population and string closeness. 22 | * 23 | */ 24 | public class GeonamesGeoCoder { 25 | 26 | final static int maxRows = 5; 27 | public static String username = "geopa"; 28 | public static Logger log = Logger.getLogger(GeonamesGeoCoder.class); 29 | 30 | //GeoCodes location names using the new GeoNames WS API. Fetches topmost maxRows results, and assigns the one with the most population. 31 | public static PointGeometry geoCode(String locationName) throws Exception { 32 | 33 | WebService.setUserName(username); 34 | Toponym toponym = null; 35 | // Long[] pop = new Long[maxRows]; 36 | Long maxPop = Long.MIN_VALUE; 37 | int maxPopIndex = -1; 38 | Long maxPopGoodFit = Long.MIN_VALUE; 39 | int maxPopGoodFitIndex = -1; 40 | 41 | ToponymSearchCriteria searchCriteria = new ToponymSearchCriteria(); 42 | searchCriteria.setMaxRows(maxRows); 43 | searchCriteria.setQ(locationName); 44 | searchCriteria.setStyle(Style.FULL); 45 | ToponymSearchResult searchResult = WebService.search(searchCriteria); 46 | 47 | if (searchResult.getToponyms().size() <= 1) { 48 | System.out.println("WARNING: geonames did not return any results!"); 49 | } 50 | 51 | for (int c = 0; c < searchResult.getToponyms().size(); c++) { 52 | toponym = searchResult.getToponyms().get(c); 53 | 54 | Long pop = toponym.getPopulation() != null ? toponym.getPopulation() : 0; // may cause problems with entries that dont have a population specified 55 | 56 | boolean found = false; 57 | if (toponym.getName().equalsIgnoreCase(locationName)) { 58 | found = true; 59 | } 60 | if (!found) { 61 | String[] names = toponym.getAlternateNames().split(","); 62 | for (String n : names) { 63 | if (n.equalsIgnoreCase(locationName)) { 64 | found = true; 65 | break; 66 | } 67 | } 68 | 69 | } 70 | if (found) { 71 | if (pop > maxPopGoodFit) { 72 | maxPopGoodFit = pop; 73 | maxPopGoodFitIndex = c; 74 | } 75 | } else { 76 | if (pop > maxPop) { 77 | maxPop = pop; 78 | maxPopIndex = c; 79 | } 80 | } 81 | 82 | //log.info("toponym for " + locationName + ":" + toponym + " pop=" + toponym.getPopulation()); 83 | } 84 | 85 | if (maxPopGoodFitIndex >= 0) { 86 | toponym = searchResult.getToponyms().get(maxPopGoodFitIndex); 87 | } else if (maxPopIndex >= 0) { 88 | toponym = searchResult.getToponyms().get(maxPopIndex); 89 | } else { 90 | return null; 91 | } 92 | 93 | PointGeometry point = new PointGeometry(toponym.getName(), toponym.getLongitude(), toponym.getLatitude(), BigInteger.valueOf(toponym.getGeoNameId())); 94 | 95 | return point; 96 | } 97 | 98 | 99 | public static String geoCodetoGeoJson(String locationName) throws Exception { 100 | PointGeometry geometry = geoCode(locationName); 101 | return GeoJsonWriter.pointGeometryToGeoJson(geometry, locationName); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geocoder/SolrBasedGeoCoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package edu.psu.ist.vaccine.geocoder; 6 | 7 | import edu.psu.ist.vaccine.analyzers.HierarchyAnalyzer; 8 | import edu.psu.ist.vaccine.geocoder.solr.SolrQuerying; 9 | import edu.psu.ist.vaccine.geotxt.utils.LocationWrapper; 10 | import edu.psu.ist.vaccine.geotxt.utils.PointGeometry; 11 | import java.math.BigInteger; 12 | import java.util.List; 13 | import java.util.logging.Level; 14 | 15 | /** 16 | * 17 | * @author Morteza KZ This class is built to support profile location extraction 18 | * in SP2. It has a GeoCode Method which accepts text and returns a (single) 19 | * best result. 20 | */ 21 | public class SolrBasedGeoCoder { 22 | 23 | public static SolrQuerying sq = new SolrQuerying(); 24 | 25 | public static PointGeometry geoCode(String locationName) { 26 | 27 | PointGeometry geoCodedPoint = new PointGeometry(0D, 0D, BigInteger.valueOf(0)); 28 | double[] coord = {0, 0}; 29 | 30 | System.out.println("Searching for " + locationName); 31 | List searchResult = null; 32 | 33 | // get best matches from solr / web service 34 | 35 | searchResult = sq.getToponymsFromSolr(locationName, 1, "General"); 36 | 37 | 38 | // set initial geometry based on best match 39 | LocationWrapper t = null; 40 | if (searchResult.size() > 0) { 41 | t = searchResult.get(0); 42 | } 43 | 44 | if (t != null) { 45 | try { 46 | coord[0] = t.getLongitude(); 47 | coord[1] = t.getLatitude(); 48 | geoCodedPoint.setCoordinates(coord); 49 | geoCodedPoint.setGeoNameId(BigInteger.valueOf(t.getGeoNameId())); 50 | geoCodedPoint.setToponym(t.getName()); 51 | } catch (Exception ex) { 52 | java.util.logging.Logger.getLogger(HierarchyAnalyzer.class.getName()).log(Level.SEVERE, null, ex); 53 | } 54 | } 55 | 56 | return geoCodedPoint; 57 | } 58 | 59 | public static void main(String[] args) { 60 | 61 | PointGeometry p = SolrBasedGeoCoder.geoCode("New Jersey"); 62 | System.out.print(p); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geocoder/solr/SolrAddDoc5.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geocoder.solr; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | import java.util.Iterator; 6 | import java.util.Map; 7 | import java.util.Random; 8 | import org.apache.solr.client.solrj.SolrClient; 9 | import org.apache.solr.client.solrj.SolrServerException; 10 | import org.apache.solr.client.solrj.impl.HttpSolrClient; 11 | import org.apache.solr.client.solrj.response.UpdateResponse; 12 | import org.apache.solr.common.SolrInputDocument; 13 | 14 | public class SolrAddDoc5 { 15 | 16 | protected SolrClient solr; 17 | private ArrayList solrFields; 18 | private static final long LOWER_RANGE = 0; // assign lower range value 19 | private static final long UPPER_RANGE = Long.MAX_VALUE; // assign upper 20 | // range value 21 | private static Random random = new Random(); 22 | static String commitUrl = "http://zeus.geog.psu.edu:8988/solr/geotxt_geonames"; 23 | 24 | public SolrAddDoc5() { 25 | solr = new HttpSolrClient(SolrAddDoc5.commitUrl); 26 | if (solr == null) 27 | System.out.println("WARNING: could not connect to server"); 28 | } 29 | 30 | /** 31 | * Queries SOLR database to get up to MAX_ROWS toponyms for name 32 | * 33 | * @throws IOException 34 | * @throws SolrServerException 35 | */ 36 | public long addSolrDoc(Map props) throws SolrServerException, IOException { 37 | long id = LOWER_RANGE + (long) (random.nextDouble() * (UPPER_RANGE - LOWER_RANGE)); 38 | if (props.size() > 0) { 39 | Iterator iter = props.keySet().iterator(); 40 | SolrInputDocument doc = new SolrInputDocument(); 41 | doc.addField("id", id); 42 | String latitude = ""; 43 | String longitude = ""; 44 | while (iter.hasNext()) { 45 | String key = iter.next(); 46 | String value = props.get(key); 47 | doc.addField(key, value); 48 | if (key.equals("latitude")) { 49 | latitude = value; 50 | } 51 | if (key.equals("longitude")) { 52 | longitude = value; 53 | } 54 | } 55 | 56 | String point = latitude + "," + longitude; 57 | String geom = "POINT(" + longitude + " " + latitude + ")"; 58 | doc.addField("point", point); 59 | doc.addField("geom", geom); 60 | 61 | try { 62 | UpdateResponse rsp = solr.add(doc); 63 | } catch (Exception e) { 64 | e.printStackTrace(); 65 | } 66 | solr.commit(); 67 | } 68 | return id; 69 | } 70 | } -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geocoder/solr/SolrQuerying5.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geocoder.solr; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.apache.solr.client.solrj.SolrClient; 7 | import org.apache.solr.client.solrj.SolrQuery; 8 | import org.apache.solr.client.solrj.impl.HttpSolrClient; 9 | import org.apache.solr.client.solrj.response.QueryResponse; 10 | import org.apache.solr.common.SolrDocument; 11 | import org.apache.solr.common.SolrDocumentList; 12 | import edu.psu.ist.vaccine.geotxt.utils.LocationWrapper; 13 | import edu.psu.ist.vaccine.geotxt.utils.LocationWrapperSolrDoc; 14 | 15 | public class SolrQuerying5 { 16 | 17 | /** url of our solr geonames database */ 18 | static String url = "http://www.zeus.geovista.psu.edu/geotxtsolr"; 19 | 20 | protected SolrClient solr; 21 | 22 | public SolrQuerying5() { 23 | solr = new HttpSolrClient(url); 24 | if (solr == null) System.out.println("WARNING: could not connect to server"); 25 | } 26 | 27 | /** Queries SOLR database to get up to MAX_ROWS toponyms for name */ 28 | public List getToponymsFromSolr (String name, int MAX_ROWS) { 29 | //System.out.println("Querying for "+name); 30 | SolrDocumentList docs = null; 31 | ArrayList matches = new ArrayList(); 32 | 33 | SolrQuery query = new SolrQuery(); 34 | query.set("q", "\""+name+"\""); 35 | query.set("qf","name^0.001 alternatenamesStr^10000 alternatenames^0.0001"); 36 | 37 | // "name:\""+name+"\"^0.001 OR alternatenamesStr:\""+name+"\"^10000 OR alternatenames:\""+name+"\"^0.0001" ); 38 | 39 | //query.set("q", "name:\""+name+"\"^0.001" ); 40 | /*String qu = "nameStr:\""+name+"\"^10000 OR (alternatenamesStr:\""+name+"\"^10000 NOT nameStr:\""+name+"\") "+ 41 | " OR (name:\""+name+"\"^0.01 NOT nameStr:\""+name+"\" NOT alternatenamesStr:\""+name+"\") "+ 42 | " OR (alternatenames:\""+name+"\"^0.0001 NOT nameStr:\""+name+"\" NOT alternatenamesStr:\""+name+"\" NOT name:\""+name+"\")"; 43 | System.out.println(qu); 44 | query.set("q", qu); */ 45 | 46 | 47 | query.set("defType","edismax"); 48 | query.set("bf", "population^0.005"); 49 | query.set("rows",""+MAX_ROWS); 50 | query.set("fl", "*,score"); 51 | 52 | try { 53 | //System.out.println("Server: "+server); 54 | QueryResponse rsp = solr.query(query); 55 | docs = rsp.getResults(); 56 | 57 | // System.out.println("Results:"); 58 | for (int i = 0; i < docs.size(); i++) { 59 | SolrDocument d = docs.get(i); 60 | // System.out.println(d); 61 | matches.add(new LocationWrapperSolrDoc(d)); 62 | // toponyms.add(t); 63 | // //System.out.println(t.getName()); 64 | } 65 | 66 | } catch (Exception e) { 67 | e.printStackTrace(); 68 | } 69 | 70 | return matches; 71 | } 72 | 73 | /** Queries SOLR database to get up to MAX_ROWS toponyms for name 74 | * The Bounding box is determined previously and gives us an estimate of where the place is and our confidence. 75 | * */ 76 | public List getToponymsFromSolr (String name, int MAX_ROWS, double minx, double miny, double maxx, double maxy) { 77 | //System.out.println("Querying for "+name); 78 | SolrDocumentList docs = null; 79 | ArrayList matches = new ArrayList(); 80 | 81 | SolrQuery query = new SolrQuery(); 82 | query.set("q", "\""+name+"\""); 83 | query.set("qf","name^0.001 alternatenamesStr^10000 alternatenames^0.0001"); 84 | 85 | // "name:\""+name+"\"^0.001 OR alternatenamesStr:\""+name+"\"^10000 OR alternatenames:\""+name+"\"^0.0001" ); 86 | 87 | //query.set("q", "name:\""+name+"\"^0.001" ); 88 | /*String qu = "nameStr:\""+name+"\"^10000 OR (alternatenamesStr:\""+name+"\"^10000 NOT nameStr:\""+name+"\") "+ 89 | " OR (name:\""+name+"\"^0.01 NOT nameStr:\""+name+"\" NOT alternatenamesStr:\""+name+"\") "+ 90 | " OR (alternatenames:\""+name+"\"^0.0001 NOT nameStr:\""+name+"\" NOT alternatenamesStr:\""+name+"\" NOT name:\""+name+"\")"; 91 | System.out.println(qu); 92 | query.set("q", qu); */ 93 | 94 | String centroid = ((miny + maxy) / 2) + "," + ((minx + maxx) / 2); 95 | // About 110 kilometers in a degree at the Equator. Use this as a rough estimation for the d parameter. 96 | // Find the width of the bbox, multiply it by 1.1 to expand it slightly, then multiply by 110 to get meters. 97 | double distance = (maxx - ((minx + maxx) / 2)) * 2 * 1.1 * 110; 98 | 99 | query.set("defType","edismax"); 100 | query.set("bf", "population^0.005"); 101 | query.set("sfield", "latlng_geo"); 102 | query.set("pt", centroid); 103 | query.set("d", ""+distance); 104 | query.set("bf", "recip(geodist(),2,200,20)"); 105 | query.set("rows",""+MAX_ROWS); 106 | query.set("fl", "*,score"); 107 | 108 | try { 109 | //System.out.println("Server: "+server); 110 | QueryResponse rsp = solr.query(query); 111 | docs = rsp.getResults(); 112 | 113 | // System.out.println("Results:"); 114 | for (int i = 0; i < docs.size(); i++) { 115 | SolrDocument d = docs.get(i); 116 | // System.out.println(d); 117 | matches.add(new LocationWrapperSolrDoc(d)); 118 | // toponyms.add(t); 119 | // //System.out.println(t.getName()); 120 | } 121 | 122 | } catch (Exception e) { 123 | e.printStackTrace(); 124 | } 125 | 126 | return matches; 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/annie/GATEExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | 6 | 7 | package edu.psu.ist.vaccine.geotxt.annie; 8 | 9 | import gate.Gate; 10 | import gate.util.GateException; 11 | import java.io.File; 12 | import java.io.IOException; 13 | import org.apache.log4j.Level; 14 | import org.apache.log4j.Logger; 15 | 16 | /** 17 | * 18 | * @author ajaiswal 19 | */ 20 | public class GATEExtractor { 21 | 22 | private static GATEExtractor instance = null; 23 | private static Gate gate = null; 24 | private static Logger logger = Logger.getRootLogger(); 25 | 26 | /** 27 | * Add parameter gatehome to initial the Gate 28 | */ 29 | protected GATEExtractor(String gatehome) throws GateException, IOException{ 30 | logger.info("Initializing GATE"); 31 | logger.setLevel(Level.FATAL); 32 | logger.setLevel(Level.INFO); 33 | //logger.setLevel(Level.DEBUG); 34 | //System.setProperty("gate.home", "/r2/opt/gate6"); 35 | System.setProperty("gate.home", gatehome); //TODO: 36 | // System.setProperty("gate.home", "/opt/gate6"); 37 | //System.setProperty("gate.home", "C:/gate6"); 38 | gate = new Gate(); 39 | gate.init(); 40 | logger.info("Loading ANNIE Plugin"); 41 | File gateHome = gate.getGateHome(); 42 | File pluginsHome = new File(gateHome, "plugins"); 43 | gate.getCreoleRegister().registerDirectories(new File(pluginsHome, "ANNIE").toURI().toURL()); 44 | //gate.getCreoleRegister().registerDirectories(new File(pluginsHome, "Tagger_MetaMap").toURL()); 45 | //gate.getCreoleRegister().registerDirectories(new File(pluginsHome, "Tagger_Chemistry").toURL()); 46 | //gate.getCreoleRegister().registerDirectories(new File(pluginsHome, "Tagger_Abner").toURL()); 47 | gate.getCreoleRegister().registerDirectories(new File(pluginsHome, "Tools").toURI().toURL()); 48 | //gate.getCreoleRegister().registerDirectories(new File(pluginsHome, "Gazetteer_LKB").toURL()); 49 | //gate.getCreoleRegister().registerDirectories(new File(pluginsHome, "LingPipe").toURL()); 50 | logger.info("Done Initializing GATE"); 51 | } 52 | 53 | public static Gate getGateInstance(){ 54 | return gate; 55 | } 56 | 57 | public static GATEExtractor getInstance(String gatehome) throws GateException, IOException{ 58 | if(instance==null){ 59 | logger.info("Creating Gate Loader"); 60 | instance = new GATEExtractor(gatehome); 61 | } 62 | logger.info("Returning Gate Loader Instance"); 63 | return instance; 64 | } 65 | 66 | public static void destroy(){ 67 | gate = null; 68 | instance = null; 69 | } 70 | } 71 | 72 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/batchprocessing/GeoTxtBatch.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.batchprocessing; 2 | 3 | import java.io.BufferedWriter; 4 | import java.io.IOException; 5 | import java.io.StringReader; 6 | import java.io.StringWriter; 7 | import java.net.URISyntaxException; 8 | import java.util.ArrayList; 9 | import java.util.Collections; 10 | import java.util.Comparator; 11 | import java.util.Iterator; 12 | import java.util.concurrent.ForkJoinPool; 13 | import java.util.stream.Stream; 14 | 15 | import org.apache.commons.csv.CSVFormat; 16 | import org.apache.commons.csv.CSVPrinter; 17 | import org.apache.commons.csv.CSVRecord; 18 | import org.apache.commons.csv.QuoteMode; 19 | import org.json.simple.JSONArray; 20 | import org.json.simple.JSONObject; 21 | import org.json.simple.JSONValue; 22 | 23 | import com.fasterxml.jackson.databind.JsonNode; 24 | 25 | import edu.psu.ist.vaccine.geotxt.api.GeoTxtApi; 26 | import play.libs.Json; 27 | import play.mvc.Results; 28 | 29 | public class GeoTxtBatch { 30 | 31 | private GeoTxtApi geoTxt; 32 | private int max = 10; 33 | 34 | public GeoTxtBatch(GeoTxtApi geoTxt) { 35 | this.geoTxt = geoTxt; 36 | } 37 | 38 | public ArrayList batchProcess(String dt, ArrayList items) { 39 | ArrayList ret = new ArrayList<>(); 40 | items.stream().parallel().forEach(s -> ret.add(runGeoTxt(s))); 41 | return ret; 42 | } 43 | // 44 | // private JSONObject runGeoTxt(JsonNode obj) { 45 | // String m = (String) obj.get("m"); 46 | // String q = (String) obj.get("q"); 47 | // if ((m.equals("stanford") || m.equals("gate")) || (m.equals("stanfordh") || m.equals("gateh")) 48 | // || (m.equals("stanfords") || m.equals("gates") || m.equals("none"))) { 49 | // try { 50 | // String r = geoTxt.geoCodeToGeoJson(q, m, true, 100, true, true); 51 | // obj.put("r", JSONValue.parse(r)); 52 | // } catch (IllegalArgumentException e) { 53 | // e.printStackTrace(); 54 | // } catch (URISyntaxException e) { 55 | // e.printStackTrace(); 56 | // } catch (IOException e) { 57 | // e.printStackTrace(); 58 | // } 59 | // } else { 60 | // obj.put("r", "Wrong Method"); 61 | // } 62 | // return obj; 63 | // } 64 | 65 | private String[] runGeoTxt(String[] item) { 66 | String m = item[1]; 67 | String q = item[2]; 68 | if ((m.equals("stanford") || m.equals("gate")) || (m.equals("stanfordh") || m.equals("gateh")) 69 | || (m.equals("stanfords") || m.equals("gates") || m.equals("none"))) { 70 | try { 71 | String r = geoTxt.geoCodeToGeoJson(q, m, true, 100, true, true); 72 | item[3] = r; 73 | } catch (IllegalArgumentException e) { 74 | e.printStackTrace(); 75 | } catch (URISyntaxException e) { 76 | e.printStackTrace(); 77 | } catch (IOException e) { 78 | e.printStackTrace(); 79 | } 80 | } else { 81 | item[3] = "Wrong Method"; 82 | } 83 | return item; 84 | } 85 | 86 | public void batchJSON(JSONArray arr) { 87 | for (int i = 0; i < arr.size(); i++) { 88 | JSONObject obj = (JSONObject) arr.get(i); 89 | String m = (String) obj.get("m"); 90 | String q = (String) obj.get("q"); 91 | if ((m.equals("stanford") || m.equals("gate")) || (m.equals("stanfordh") || m.equals("gateh")) 92 | || (m.equals("stanfords") || m.equals("gates") || m.equals("none"))) { 93 | try { 94 | String r = geoTxt.geoCodeToGeoJson(q, m, true, 100, true, true); 95 | obj.put("r", JSONValue.parse(r)); 96 | } catch (IllegalArgumentException e) { 97 | e.printStackTrace(); 98 | } catch (URISyntaxException e) { 99 | e.printStackTrace(); 100 | } catch (IOException e) { 101 | e.printStackTrace(); 102 | } 103 | } else { 104 | obj.put("r", "Wrong Method"); 105 | } 106 | } 107 | } 108 | 109 | } 110 | 111 | class ItemCompare implements Comparator { 112 | 113 | @Override 114 | public int compare(String[] o1, String[] o2) { 115 | // write comparison logic here like below , it's just a sample 116 | return (new Integer(o1[0])).compareTo(new Integer(o2[0])); 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/benchmark/Place.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.benchmark; 2 | 3 | /** 4 | * Class for representing a location in a problem instance including where in the 5 | * text message the location is mentioned in the text and, if available, coordinates and geonamesID. 6 | * @author jow 7 | * 8 | */ 9 | public class Place { 10 | 11 | /** 12 | * name of place at it appears in the text 13 | */ 14 | protected String nameInText; 15 | 16 | /** 17 | * index at which the first character of the place name appears in the text 18 | */ 19 | protected int startIndex; 20 | 21 | /** 22 | * index at which the last character of the place name appears in the text 23 | */ 24 | protected int endIndex; 25 | 26 | /** 27 | * name under which the place is listed in geonames 28 | */ 29 | protected String geonamesName; 30 | 31 | /** 32 | * geonames ID of the place; "" if ID is unknown 33 | */ 34 | protected String geonamesId = ""; 35 | 36 | /** 37 | * longitude of the location of the place 38 | */ 39 | protected double lon; 40 | 41 | /** 42 | * latitude of the location of the place 43 | */ 44 | protected double lat; 45 | 46 | 47 | /** 48 | * constructor to generate and initialize all instance variables of a new place 49 | * 50 | * @param nameInText 51 | * @param startIndex 52 | * @param endIndex 53 | * @param lat 54 | * @param lon 55 | * @param geonamesName 56 | * @param geonamesId 57 | */ 58 | public Place(String nameInText, int startIndex, int endIndex, 59 | double lat, double lon, String geonamesName, 60 | String geonamesId) { 61 | 62 | this.nameInText = nameInText; 63 | this.startIndex = startIndex; 64 | this.endIndex = endIndex; 65 | this.lon = lon; 66 | this.lat = lat; 67 | this.geonamesName = geonamesName; 68 | this.geonamesId = geonamesId; 69 | } 70 | 71 | public String getNameInText() { 72 | return nameInText; 73 | } 74 | 75 | public void setNameInText(String nameInText) { 76 | this.nameInText = nameInText; 77 | } 78 | 79 | public int getStartIndex() { 80 | return startIndex; 81 | } 82 | 83 | public void setStartIndex(int startIndex) { 84 | this.startIndex = startIndex; 85 | } 86 | 87 | public int getEndIndex() { 88 | return endIndex; 89 | } 90 | 91 | public void setEndIndex(int endIndex) { 92 | this.endIndex = endIndex; 93 | } 94 | 95 | public String getGeonamesName() { 96 | return geonamesName; 97 | } 98 | 99 | public void setGeonamesName(String geonamesName) { 100 | this.geonamesName = geonamesName; 101 | } 102 | 103 | public String getGeonamesId() { 104 | return geonamesId; 105 | } 106 | 107 | public void setGeonamesId(String geonamesId) { 108 | this.geonamesId = geonamesId; 109 | } 110 | 111 | public double getLon() { 112 | return lon; 113 | } 114 | 115 | public void setLon(double lon) { 116 | this.lon = lon; 117 | } 118 | 119 | public double getLat() { 120 | return lat; 121 | } 122 | 123 | public void setLat(double lat) { 124 | this.lat = lat; 125 | } 126 | 127 | public String toString() { 128 | return "Place " + this.getNameInText() + "[" + this.lat + "," + this.lon + ";" + this.getGeonamesName() + "," + this.getGeonamesId() + "]"; 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/benchmark/ProblemInstance.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.benchmark; 2 | import java.io.InputStream; 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Scanner; 6 | import java.util.StringTokenizer; 7 | 8 | 9 | /** 10 | * Class for storing a problem instance. 11 | * @author jow 12 | * 13 | */ 14 | public class ProblemInstance { 15 | 16 | /** 17 | * short name of the problem instance 18 | */ 19 | protected String name; 20 | 21 | /** 22 | * list of places appearing in the text of this problem instance 23 | */ 24 | protected List places = new ArrayList(); 25 | 26 | 27 | /** 28 | * text of this problem instance 29 | */ 30 | protected String text = ""; 31 | 32 | 33 | /** 34 | * constructs a new instance by reading and parsing the description from is. 35 | * This method will most likely raise an exception if the format of the input 36 | * is not exactly as expected. 37 | * @param is 38 | */ 39 | public ProblemInstance(InputStream is) { 40 | Scanner scanner = new Scanner(is); 41 | name = scanner.nextLine(); 42 | String t = ""; 43 | 44 | // read all location into instances of Place 45 | boolean location = false; 46 | do { 47 | location = false; 48 | t = scanner.nextLine(); 49 | if (t.startsWith("location:\t")) { 50 | StringTokenizer st = new StringTokenizer(t.substring(10),"\t"); 51 | places.add(new Place(st.nextToken(), 52 | Integer.parseInt(st.nextToken()), 53 | Integer.parseInt(st.nextToken()), 54 | Double.parseDouble(st.nextToken()), 55 | Double.parseDouble(st.nextToken()), 56 | st.nextToken(), 57 | st.nextToken())); 58 | location = true; 59 | } 60 | } while(location); 61 | 62 | text += t.substring("text:\t".length()); 63 | 64 | while (scanner.hasNext()) { 65 | text += t; 66 | } 67 | System.out.println("done"); 68 | } 69 | 70 | public String toString() { 71 | String s = "ProblemInstance " + name + "\n"; 72 | for (Place p : places) { 73 | s += "Location: " + p.toString() + "\n"; 74 | } 75 | s += "text: " + text + "\n"; 76 | return s; 77 | } 78 | 79 | public String getName() { 80 | return name; 81 | } 82 | 83 | public void setName(String name) { 84 | this.name = name; 85 | } 86 | 87 | public List getPlaces() { 88 | return places; 89 | } 90 | 91 | public void setPlaces(List places) { 92 | this.places = places; 93 | } 94 | 95 | public String getText() { 96 | return text; 97 | } 98 | 99 | public void setText(String text) { 100 | this.text = text; 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/benchmark/TestResult.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.benchmark; 2 | 3 | import edu.psu.ist.vaccine.geotxt.entities.Location; 4 | 5 | 6 | 7 | /** 8 | * Class for storing the results of applying a NER / geocoding approach to 9 | * a problem instance. 10 | * @author jow 11 | * 12 | */ 13 | public class TestResult { 14 | 15 | /** 16 | * number of places found 17 | */ 18 | protected int placesFound = 0; 19 | 20 | /** 21 | * number of places in the text that have been identified correctly 22 | */ 23 | protected int placesIdentifiedCorrectly = 0; 24 | 25 | /** 26 | * number of places in the text that have not been recognized 27 | */ 28 | protected int placesMissed = 0; 29 | 30 | /** 31 | * states whether place has been identified correctly for each place in instance 32 | */ 33 | protected boolean[] identificationStatus; 34 | 35 | /** 36 | * corresponding locations identified for each place in the instance 37 | */ 38 | protected Location[] locations; 39 | 40 | 41 | /** 42 | * computation time required 43 | */ 44 | protected double cTime = 0.0; 45 | 46 | public TestResult(int size) { 47 | identificationStatus = new boolean[size]; 48 | locations = new Location[size]; 49 | // distanceDeviations = new double[size]; 50 | // geonamesID = new String[size]; 51 | } 52 | 53 | public boolean[] getIdentificationStatus() { 54 | return identificationStatus; 55 | } 56 | public void setIdentificationStatus(boolean[] identificationStatus) { 57 | this.identificationStatus = identificationStatus; 58 | } 59 | // public double[] getDistanceDeviations() { 60 | // return distanceDeviations; 61 | // } 62 | // public void setDistanceDeviations(double[] distanceDeviations) { 63 | // this.distanceDeviations = distanceDeviations; 64 | // } 65 | // public String[] getGeonamesID() { 66 | // return geonamesID; 67 | // } 68 | // public void setGeonamesID(String[] geonamesID) { 69 | // this.geonamesID = geonamesID; 70 | // } 71 | public double getcTime() { 72 | return cTime; 73 | } 74 | public void setcTime(double cTime) { 75 | this.cTime = cTime; 76 | } 77 | public int getLocationsIdentifiedCorrectly() { 78 | return placesIdentifiedCorrectly; 79 | } 80 | public void setLocationsIdentifiedCorrectly(int locationsIdentifiedCorrectly) { 81 | this.placesIdentifiedCorrectly = locationsIdentifiedCorrectly; 82 | } 83 | public int getLocationsMissed() { 84 | return placesMissed; 85 | } 86 | public void setLocationsMissed(int locationsMissed) { 87 | this.placesMissed = locationsMissed; 88 | } 89 | public Location[] getLocations() { 90 | return locations; 91 | } 92 | public void setLocations(Location[] locations) { 93 | this.locations = locations; 94 | } 95 | public int getPlacesFound() { 96 | return placesFound; 97 | } 98 | 99 | public void setPlacesFound(int placesFound) { 100 | this.placesFound = placesFound; 101 | } 102 | 103 | } 104 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/benchmark/instancereading/DirectoryInstanceReader.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.benchmark.instancereading; 2 | 3 | import java.io.File; 4 | import java.io.FileInputStream; 5 | import java.io.InputStream; 6 | import java.util.ArrayList; 7 | import java.util.regex.Pattern; 8 | 9 | import edu.psu.ist.vaccine.geotxt.benchmark.ProblemInstance; 10 | 11 | 12 | /** 13 | * Class for reading problem instances from a set that is stored as separate files 14 | * (one per instance) contained in a directory. All files containing problem 15 | * instances need to have the extension .inst 16 | * @author jow 17 | * 18 | */ 19 | public class DirectoryInstanceReader implements InstanceReader { 20 | 21 | /** 22 | * points at the current instance that will be returned by getNextInstance() 23 | */ 24 | protected int count = 0; 25 | 26 | /** 27 | * list of all files containing problem instances 28 | */ 29 | protected ArrayList instanceFiles = new ArrayList(); 30 | 31 | /** 32 | * creates a new instance for the set of problem instances stored in 33 | * the directory dir 34 | * @param dir 35 | */ 36 | public DirectoryInstanceReader(String dir) { 37 | File fo = new File(dir); 38 | File[] files = fo.listFiles(); 39 | 40 | Pattern filePattern = Pattern.compile(".*[.]inst$"); 41 | 42 | for (int i = 0; i < files.length; i++) { 43 | if (filePattern.matcher(files[i].getName()).matches()) { 44 | instanceFiles.add(files[i]); 45 | //System.out.println("adding "+ files[i]); 46 | } 47 | } 48 | } 49 | 50 | @Override 51 | public ProblemInstance getNextInstance() { 52 | 53 | System.out.println("Trying to read instance file " + count + " " + instanceFiles.get(count).getAbsolutePath()); 54 | 55 | ProblemInstance inst = null; 56 | 57 | try { 58 | InputStream is = new FileInputStream(instanceFiles.get(count).getAbsolutePath()); 59 | inst = new ProblemInstance(is); 60 | is.close(); 61 | 62 | } catch (Exception e) { 63 | System.out.println("file operation failed, could not read file"); 64 | } 65 | 66 | count++; 67 | return inst; 68 | } 69 | 70 | @Override 71 | public int getNumOfInstances() { 72 | return instanceFiles.size(); 73 | } 74 | 75 | @Override 76 | public boolean hasMoreInstance() { 77 | return count < instanceFiles.size(); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/benchmark/instancereading/InstanceReader.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.benchmark.instancereading; 2 | 3 | import edu.psu.ist.vaccine.geotxt.benchmark.ProblemInstance; 4 | 5 | /** 6 | * Interface for defining classes provide access to a set of problem instances, e.g. 7 | * by reading it from a file, directory, or database. 8 | * 9 | * @author jow 10 | * 11 | */ 12 | public interface InstanceReader { 13 | 14 | /** 15 | * returns the next instance in the set of problem instances 16 | * @return next instance in the set of instances 17 | */ 18 | public ProblemInstance getNextInstance(); 19 | 20 | /** 21 | * returns the number of instances in the problem instance set 22 | * @return number of instance in the instance set 23 | */ 24 | public int getNumOfInstances(); 25 | 26 | /** 27 | * returns whether there are still instances in the set that have not been accessed 28 | * @return 29 | */ 30 | public boolean hasMoreInstance(); 31 | } 32 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/benchmark/instancereading/ZipInstanceReader.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.benchmark.instancereading; 2 | 3 | import java.io.ByteArrayInputStream; 4 | import java.io.File; 5 | import java.io.FileInputStream; 6 | import java.io.InputStream; 7 | import java.util.ArrayList; 8 | import java.util.regex.Pattern; 9 | import java.util.zip.ZipEntry; 10 | import java.util.zip.ZipInputStream; 11 | 12 | import edu.psu.ist.vaccine.geotxt.benchmark.ProblemInstance; 13 | 14 | 15 | /** 16 | * Class for reading problem instances from a zip file. All files containing problem 17 | * instances need to have the extension .inst. 18 | * @author jow 19 | * 20 | */ 21 | public class ZipInstanceReader implements InstanceReader { 22 | 23 | /** 24 | * points at the current instance that will be returned by getNextInstance() 25 | */ 26 | protected int count = 0; 27 | 28 | /** 29 | * list of all files containing problem instances 30 | */ 31 | protected ArrayList instanceFiles = new ArrayList(); 32 | 33 | 34 | /** 35 | * stream for zip file from which instances will be read 36 | */ 37 | protected ZipInputStream zis; 38 | 39 | /** 40 | * creates a new instance for the set of problem instances stored in 41 | * the zip file pathToZipFile 42 | * @param pathToZipFile 43 | */ 44 | public ZipInstanceReader(String pathToZipFile) { 45 | Pattern filePattern = Pattern.compile(".*[.]inst$"); 46 | 47 | try { 48 | zis = new ZipInputStream(new FileInputStream(pathToZipFile)); 49 | 50 | ZipEntry entry; 51 | while((entry = zis.getNextEntry())!=null) { 52 | if (filePattern.matcher(entry.getName()).matches()) { 53 | instanceFiles.add(entry); 54 | } 55 | } 56 | 57 | zis.close(); 58 | 59 | zis = new ZipInputStream(new FileInputStream(pathToZipFile)); 60 | 61 | 62 | } catch (Exception e) { 63 | System.out.println("file operation failed while reading file: "+e); 64 | e.printStackTrace(); 65 | System.exit(1); 66 | } 67 | } 68 | 69 | @Override 70 | public ProblemInstance getNextInstance() { 71 | 72 | System.out.println("Trying to read instance " + count + " " + instanceFiles.get(count).getName()); 73 | ProblemInstance inst = null; 74 | 75 | try { 76 | ZipEntry ze = zis.getNextEntry(); 77 | while (!ze.getName().equals(this.instanceFiles.get(count).getName())) { 78 | ze = zis.getNextEntry(); 79 | } 80 | 81 | System.out.println("zip entry: "+ze.getName()); 82 | 83 | StringBuilder sb = new StringBuilder(); 84 | for (int c = zis.read(); c != -1; c = zis.read()) { 85 | sb.append((char)c); 86 | } 87 | 88 | inst = new ProblemInstance(new ByteArrayInputStream(sb.toString().getBytes("UTF-8"))); 89 | 90 | } catch (Exception e) { 91 | System.out.println("file operation failed, could not read entry: "+e); 92 | e.printStackTrace(); 93 | } 94 | 95 | count++; 96 | return inst; 97 | } 98 | 99 | @Override 100 | public int getNumOfInstances() { 101 | return instanceFiles.size(); 102 | } 103 | 104 | @Override 105 | public boolean hasMoreInstance() { 106 | return count < instanceFiles.size(); 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/benchmark/tester/GateTester.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.benchmark.tester; 2 | 3 | import java.lang.management.ManagementFactory; 4 | import java.lang.management.ThreadMXBean; 5 | 6 | import edu.psu.ist.vaccine.geotxt.benchmark.Place; 7 | import edu.psu.ist.vaccine.geotxt.benchmark.ProblemInstance; 8 | import edu.psu.ist.vaccine.geotxt.benchmark.TestResult; 9 | import edu.psu.ist.vaccine.geotxt.entities.Location; 10 | import edu.psu.ist.vaccine.geotxt.ner.NamedEntities; 11 | import edu.psu.ist.vaccine.geotxt.ner.GateNer; 12 | 13 | /** 14 | * Wrapper class to use Gate in benchmarks 15 | * @author jow 16 | * 17 | */ 18 | public class GateTester implements Tester { 19 | 20 | GateNer gate; 21 | 22 | public GateTester(String pathToGate) { 23 | gate = new GateNer(pathToGate); 24 | } 25 | 26 | @Override 27 | public TestResult run(ProblemInstance p) { 28 | TestResult result = new TestResult(p.getPlaces().size()); 29 | 30 | ThreadMXBean threadMX = ManagementFactory.getThreadMXBean(); 31 | long startS = threadMX.getCurrentThreadUserTime(); 32 | 33 | NamedEntities doc = gate.tagAlltoDoc(p.getText()); 34 | 35 | long endS = threadMX.getCurrentThreadUserTime(); 36 | result.setcTime((endS - startS) / 1000000.0); 37 | 38 | System.out.println(doc); 39 | 40 | result.setPlacesFound(doc.locs.size()); 41 | 42 | 43 | // compare all locations identified 44 | 45 | boolean found = false; 46 | 47 | int count = 0; 48 | for (Place pl : p.getPlaces()) { 49 | found = false; 50 | 51 | for (Location l : doc.locs) { 52 | if (l.getPositions().contains(pl.getStartIndex())) { 53 | if (pl.getNameInText().equalsIgnoreCase(l.getName())) { 54 | result.setLocationsIdentifiedCorrectly(result.getLocationsIdentifiedCorrectly()+1); 55 | result.getIdentificationStatus()[count] = true; 56 | result.getLocations()[count] = l; 57 | } 58 | found = true; 59 | break; 60 | } 61 | } 62 | 63 | if (!found) result.setLocationsMissed(result.getLocationsMissed()+1); 64 | count++; 65 | } 66 | 67 | return result; 68 | } 69 | 70 | @Override 71 | public boolean supportsDistanceComparison() { 72 | return false; 73 | } 74 | 75 | @Override 76 | public boolean supportsLocationRecognition() { 77 | return true; 78 | } 79 | 80 | @Override 81 | public boolean supportsGeonamesIDs() { 82 | return false; 83 | } 84 | 85 | @Override 86 | public String getName() { 87 | return "Gate-NER"; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/benchmark/tester/GenericAnalyzerTester.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.benchmark.tester; 2 | 3 | import java.lang.management.ManagementFactory; 4 | import java.lang.management.ThreadMXBean; 5 | import java.util.HashMap; 6 | 7 | import edu.psu.ist.vaccine.geotxt.benchmark.Place; 8 | import edu.psu.ist.vaccine.geotxt.benchmark.ProblemInstance; 9 | import edu.psu.ist.vaccine.geotxt.benchmark.TestResult; 10 | import edu.psu.ist.vaccine.geotxt.entities.Location; 11 | import edu.psu.ist.vaccine.geotxt.ner.NamedEntities; 12 | import edu.psu.ist.vaccine.geotxt.ner.NerEngines; 13 | import edu.psu.ist.vaccine.geotxt.utils.Analyzer; 14 | 15 | /** 16 | * Wrapper class to run benchmark with any Analyzer class 17 | * @author jow 18 | * 19 | */ 20 | public class GenericAnalyzerTester implements Tester { 21 | 22 | protected String name; 23 | protected Analyzer analyzer; 24 | protected boolean supportsDistanceComparison; 25 | protected boolean supportsLocationRecognition; 26 | protected boolean supportsGeonamesIDs; 27 | 28 | 29 | public GenericAnalyzerTester(Analyzer analyzer, String name, boolean supportsDistanceComparison, 30 | boolean supportsLocationRecognition, boolean supportsGeonamesIDs) { 31 | this.analyzer = analyzer; 32 | this.name = name; 33 | this.supportsDistanceComparison = supportsDistanceComparison; 34 | this.supportsLocationRecognition = supportsLocationRecognition; 35 | this.supportsGeonamesIDs = supportsGeonamesIDs; 36 | } 37 | 38 | @Override 39 | public TestResult run(ProblemInstance p) { 40 | TestResult result = new TestResult(p.getPlaces().size()); 41 | 42 | 43 | ThreadMXBean threadMX = ManagementFactory.getThreadMXBean(); 44 | long startS = threadMX.getCurrentThreadUserTime(); 45 | 46 | NamedEntities doc = null; 47 | 48 | try { 49 | doc = analyzer.analyze(p.getText(), NerEngines.STANFORD, new HashMap()); 50 | } catch (Exception e) { 51 | e.printStackTrace(); 52 | System.exit(1); 53 | } 54 | 55 | long endS = threadMX.getCurrentThreadUserTime(); 56 | result.setcTime((endS - startS) / 1000000.0); 57 | 58 | result.setPlacesFound(doc.locs.size()); 59 | 60 | System.out.println("doc: "+doc); 61 | 62 | // compare all locations identified 63 | 64 | boolean found = false; 65 | 66 | int count = 0; 67 | for (Place pl : p.getPlaces()) { 68 | found = false; 69 | 70 | for (Location l : doc.locs) { 71 | if (l.getPositions().contains(pl.getStartIndex())) { 72 | if (pl.getNameInText().equalsIgnoreCase(l.getName())) { 73 | result.setLocationsIdentifiedCorrectly(result.getLocationsIdentifiedCorrectly()+1); 74 | result.getIdentificationStatus()[count] = true; 75 | result.getLocations()[count] = l; 76 | } 77 | found = true; 78 | break; 79 | } 80 | } 81 | 82 | if (!found) result.setLocationsMissed(result.getLocationsMissed()+1); 83 | count++; 84 | } 85 | 86 | return result; 87 | } 88 | 89 | @Override 90 | public boolean supportsDistanceComparison() { 91 | return supportsDistanceComparison; 92 | } 93 | 94 | @Override 95 | public boolean supportsLocationRecognition() { 96 | return supportsLocationRecognition; 97 | } 98 | 99 | @Override 100 | public boolean supportsGeonamesIDs() { 101 | return supportsGeonamesIDs; 102 | } 103 | 104 | @Override 105 | public String getName() { 106 | return name; 107 | } 108 | 109 | } 110 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/benchmark/tester/StanfordTester.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.benchmark.tester; 2 | 3 | import java.lang.management.ManagementFactory; 4 | import java.lang.management.ThreadMXBean; 5 | 6 | import edu.psu.ist.vaccine.geotxt.benchmark.Place; 7 | import edu.psu.ist.vaccine.geotxt.benchmark.ProblemInstance; 8 | import edu.psu.ist.vaccine.geotxt.benchmark.TestResult; 9 | import edu.psu.ist.vaccine.geotxt.entities.Location; 10 | import edu.psu.ist.vaccine.geotxt.ner.NamedEntities; 11 | import edu.psu.ist.vaccine.geotxt.ner.StanfordNer; 12 | 13 | /** 14 | * Wrapper class to use Stanford NER in benchmarks 15 | * @author jow 16 | * 17 | */ 18 | public class StanfordTester implements Tester { 19 | 20 | StanfordNer st; 21 | 22 | public StanfordTester(String pathToStanfordClassifier) { 23 | st = new StanfordNer(pathToStanfordClassifier); 24 | } 25 | 26 | @Override 27 | public TestResult run(ProblemInstance p) { 28 | TestResult result = new TestResult(p.getPlaces().size()); 29 | 30 | System.out.println("Stanford says..."); 31 | 32 | ThreadMXBean threadMX = ManagementFactory.getThreadMXBean(); 33 | long startS = threadMX.getCurrentThreadUserTime(); 34 | 35 | NamedEntities doc = st.tagAlltoDoc(p.getText()); 36 | 37 | long endS = threadMX.getCurrentThreadUserTime(); 38 | result.setcTime((endS - startS) / 1000000.0); 39 | 40 | System.out.println(doc); 41 | 42 | result.setPlacesFound(doc.locs.size()); 43 | 44 | // compare all locations identified 45 | 46 | boolean found = false; 47 | 48 | int count = 0; 49 | for (Place pl : p.getPlaces()) { 50 | found = false; 51 | 52 | for (Location l : doc.locs) { 53 | if (l.getPositions().contains(pl.getStartIndex())) { 54 | if (pl.getNameInText().equalsIgnoreCase(l.getName())) { 55 | result.setLocationsIdentifiedCorrectly(result.getLocationsIdentifiedCorrectly()+1); 56 | result.getIdentificationStatus()[count] = true; 57 | result.getLocations()[count] = l; 58 | } 59 | found = true; 60 | break; 61 | } 62 | } 63 | 64 | if (!found) result.setLocationsMissed(result.getLocationsMissed()+1); 65 | count++; 66 | } 67 | 68 | return result; 69 | } 70 | 71 | @Override 72 | public boolean supportsDistanceComparison() { 73 | return false; 74 | } 75 | 76 | @Override 77 | public boolean supportsLocationRecognition() { 78 | return true; 79 | } 80 | 81 | @Override 82 | public boolean supportsGeonamesIDs() { 83 | return false; 84 | } 85 | 86 | @Override 87 | public String getName() { 88 | return "Stanford-NER"; 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/benchmark/tester/Tester.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.benchmark.tester; 2 | 3 | import edu.psu.ist.vaccine.geotxt.benchmark.ProblemInstance; 4 | import edu.psu.ist.vaccine.geotxt.benchmark.TestResult; 5 | 6 | /** 7 | * Interface for wrapper classes for applying and comparing different NER / geocoding approaches 8 | * in a benchmark 9 | * @author jow 10 | * 11 | */ 12 | public interface Tester { 13 | /** 14 | * 15 | * @param p problem instance that the approach should be applied to 16 | * @return result describing the performance of the approach for the instance 17 | */ 18 | public TestResult run(ProblemInstance p); 19 | 20 | 21 | /** 22 | * yields true if approach allows for a comparison of distance to the 23 | * ground truth coordinates in which case each place in the result returned by run(...) 24 | * has to contain lon and lat coordinates (pure NER approaches typically don't but 25 | * geocoding approaches do). 26 | * @return 27 | */ 28 | public boolean supportsDistanceComparison(); 29 | 30 | /** 31 | * yields true if approach allows for a comparison of locations in the 32 | * problem instance 33 | * @return 34 | */ 35 | public boolean supportsLocationRecognition(); 36 | 37 | /** 38 | * yields true if approach allows for a comparison of geonames ID in the 39 | * @return 40 | */ 41 | public boolean supportsGeonamesIDs(); 42 | 43 | public String getName(); 44 | } 45 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/entities/Hashtag.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | 6 | package edu.psu.ist.vaccine.geotxt.entities; 7 | 8 | import edu.psu.ist.vaccine.geotxt.utils.StripStrings; 9 | 10 | /** 11 | * 12 | * @author ajaiswal 13 | */ 14 | public class Hashtag { 15 | 16 | public String hashtag = ""; 17 | public String type = ""; 18 | 19 | public String getType() { 20 | return type; 21 | } 22 | 23 | public void setType(String type) { 24 | this.type = type.toLowerCase(); 25 | } 26 | 27 | public String getHashtag() { 28 | return hashtag; 29 | } 30 | 31 | public void setHashtag(String hashtag) { 32 | this.hashtag = hashtag.toLowerCase(); 33 | } 34 | 35 | public Hashtag(String tag, String type) { 36 | this.hashtag = StripStrings.strip(tag).toLowerCase(); 37 | this.type = type.toLowerCase(); 38 | } 39 | 40 | public Hashtag(String tag) { 41 | this.hashtag = StripStrings.strip(tag).toLowerCase(); 42 | this.type = "hashtag".toLowerCase(); 43 | } 44 | 45 | @Override 46 | public String toString() { 47 | return "Name:" + this.hashtag + " Type:" + this.type; 48 | } 49 | 50 | @Override 51 | public boolean equals(Object tag) { 52 | if (this.hashtag.toLowerCase().equals(((Hashtag) tag).getHashtag().toLowerCase())) { 53 | return true; 54 | } 55 | return false; 56 | } 57 | 58 | @Override 59 | public int hashCode() { 60 | int hash = 7; 61 | hash = 29 * hash + (this.hashtag != null ? this.hashtag.toLowerCase().hashCode() : 0); 62 | hash = 29 * hash + (this.type != null ? this.type.hashCode() : 0); 63 | return hash; 64 | } 65 | 66 | } 67 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/entities/Location.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package edu.psu.ist.vaccine.geotxt.entities; 6 | 7 | import edu.psu.ist.vaccine.geotxt.utils.BBox; 8 | import edu.psu.ist.vaccine.geotxt.utils.LocationWrapper; 9 | import edu.psu.ist.vaccine.geotxt.utils.PointGeometry; 10 | import java.util.ArrayList; 11 | import java.util.Collections; 12 | import java.util.List; 13 | 14 | /** 15 | * 16 | * @author mortezakz 17 | */ 18 | public class Location { 19 | 20 | protected String name; 21 | protected String locType; 22 | protected String countryCode = null; 23 | protected String featureClass = null; 24 | protected String featureCode = null; 25 | protected String[] alternateNames = null; 26 | 27 | protected ArrayList positions = null; 28 | protected PointGeometry geometry = null; 29 | 30 | protected List candidates = new ArrayList(); 31 | protected List hierarchy = new ArrayList(); 32 | 33 | private BBox bbox = null; 34 | private String[] hLevels = null; 35 | 36 | public List getCandidates() { 37 | return candidates; 38 | } 39 | 40 | public void setCandidates(List candidates) { 41 | this.candidates = candidates; 42 | } 43 | 44 | public List getHierarchy() { 45 | return hierarchy; 46 | } 47 | 48 | public void setHierarchy(List hierarchy) { 49 | this.hierarchy = hierarchy; 50 | } 51 | 52 | public String[] getAlternateNames() { 53 | return alternateNames; 54 | } 55 | 56 | public void setAlternateNames(String[] alternateNames) { 57 | this.alternateNames = alternateNames; 58 | } 59 | 60 | public Location(String name) { 61 | // this.name = StripStrings.strip(name); 62 | this.name = name; 63 | this.positions = new ArrayList(); 64 | } 65 | 66 | public Location(String name, String locType) { 67 | // this.name = StripStrings.strip(name); 68 | this.name = name; 69 | this.locType = locType.toLowerCase(); 70 | this.positions = new ArrayList(); 71 | } 72 | 73 | public Location(String name, int position) { 74 | // this.name = StripStrings.strip(name); 75 | this.name = name; 76 | if (this.positions == null) { 77 | this.positions = new ArrayList(); 78 | } 79 | if (this.positions != null && !this.positions.contains(position)) { 80 | this.positions.add(position); 81 | } 82 | } 83 | 84 | public Location(String name, String locType, int position) { 85 | // this.name = StripStrings.strip(name); 86 | this.name = name; 87 | this.locType = locType.toLowerCase(); 88 | if (this.positions == null) { 89 | this.positions = new ArrayList(); 90 | } 91 | if (this.positions != null && !this.positions.contains(position)) { 92 | this.positions.add(position); 93 | } 94 | } 95 | 96 | public Location(String name, String locType, int position, PointGeometry geometry) { 97 | // this.name = StripStrings.strip(name).toLowerCase(); 98 | this.name = name; 99 | this.locType = locType.toLowerCase(); 100 | this.geometry = geometry; 101 | if (this.positions == null) { 102 | this.positions = new ArrayList(); 103 | } 104 | if (this.positions != null && !this.positions.contains(position)) { 105 | this.positions.add(position); 106 | } 107 | } 108 | 109 | public void addPosition(int position) { 110 | if (this.positions == null) { 111 | this.positions = new ArrayList(); 112 | } 113 | if (!this.positions.contains(position)) { 114 | this.positions.add(position); 115 | } 116 | Collections.sort(positions); 117 | } 118 | 119 | public ArrayList getPositions() { 120 | return positions; 121 | } 122 | 123 | public void setPositions(ArrayList positions) { 124 | this.positions = positions; 125 | } 126 | 127 | public String getName() { 128 | return name; 129 | } 130 | 131 | public void setName(String name) { 132 | this.name = name; 133 | } 134 | 135 | public String getLocType() { 136 | return locType; 137 | } 138 | 139 | public void setLocType(String locType) { 140 | this.locType = locType.toLowerCase(); 141 | } 142 | 143 | public String getCountryCode() { 144 | return countryCode; 145 | } 146 | 147 | public void setCountryCode(String countryCode) { 148 | this.countryCode = countryCode; 149 | } 150 | 151 | public String getFeatureClass() { 152 | return featureClass; 153 | } 154 | 155 | public void setFeatureClass(String featureClass) { 156 | this.featureClass = featureClass; 157 | } 158 | 159 | public String getFeatureCode() { 160 | return featureCode; 161 | } 162 | 163 | public void setFeatureCode(String featureCode) { 164 | this.featureCode = featureCode; 165 | } 166 | 167 | public PointGeometry getGeometry() { 168 | return geometry; 169 | } 170 | 171 | public void setGeometry(PointGeometry geometry) { 172 | this.geometry = geometry; 173 | } 174 | 175 | public BBox getBBox() { 176 | return bbox; 177 | } 178 | 179 | public void setBBox(BBox bbox) { 180 | this.bbox = bbox; 181 | } 182 | 183 | public String getSelfHLevel() { 184 | String selfHLevel = ""; 185 | // TODO - We will have different data sources so how do we associate a similar hierarchy level for each. 186 | selfHLevel = featureClass; 187 | return selfHLevel; 188 | } 189 | 190 | public String[] getHLevels() { 191 | return hLevels; 192 | } 193 | 194 | public void setHLevels(String[] hLevels) { 195 | this.hLevels = hLevels; 196 | } 197 | 198 | @Override 199 | public boolean equals(Object loc1) { 200 | boolean state = false; 201 | Location loct1 = (Location) loc1; 202 | if (loct1.getName().toLowerCase().equals(this.getName().toLowerCase())) { 203 | state = true; 204 | } 205 | return state; 206 | } 207 | 208 | @Override 209 | public int hashCode() { 210 | int hash = 7; 211 | hash = 29 * hash + (this.name != null ? this.name.hashCode() : 0); 212 | return hash; 213 | } 214 | 215 | @Override 216 | public String toString() { 217 | if (geometry != null) { 218 | return "Name: " + this.name + " Position:" + this.positions + " Geometry: " + this.geometry.toString(); 219 | } else { 220 | return "Name: " + this.name + " Position:" + this.positions; 221 | } 222 | } 223 | } 224 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/entities/Organization.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package edu.psu.ist.vaccine.geotxt.entities; 6 | 7 | import edu.psu.ist.vaccine.geotxt.utils.PointGeometry; 8 | import java.util.ArrayList; 9 | import java.util.Collections; 10 | 11 | /** 12 | * 13 | * @author ajaiswal 14 | */ 15 | public class Organization { 16 | 17 | private String name = ""; 18 | private String orgType = ""; 19 | protected ArrayList positions = null; 20 | protected PointGeometry geometry = null; 21 | 22 | public Organization(String name, String orgType) { 23 | this.name = name; 24 | this.orgType = orgType.toLowerCase(); 25 | this.positions = new ArrayList(); 26 | } 27 | 28 | public Organization(String name, int position) { 29 | this.name = name; 30 | if (this.positions == null) { 31 | this.positions = new ArrayList(); 32 | } 33 | if (this.positions != null && !this.positions.contains(position)) { 34 | this.positions.add(position); 35 | } 36 | } 37 | 38 | public Organization(String name, String orgType, int position) { 39 | this.name = name; 40 | this.orgType = orgType.toLowerCase(); 41 | if (this.positions == null) { 42 | this.positions = new ArrayList(); 43 | } 44 | if (this.positions != null && !this.positions.contains(position)) { 45 | this.positions.add(position); 46 | } 47 | } 48 | 49 | public void addPosition(int position) { 50 | if (this.positions == null) { 51 | this.positions = new ArrayList(); 52 | } 53 | if (!this.positions.contains(position)) { 54 | this.positions.add(position); 55 | } 56 | Collections.sort(positions); 57 | } 58 | 59 | public ArrayList getPositions() { 60 | return positions; 61 | } 62 | 63 | public void setPositions(ArrayList positions) { 64 | this.positions = positions; 65 | } 66 | 67 | public PointGeometry getGeometry() { 68 | return geometry; 69 | } 70 | 71 | public void setGeometry(PointGeometry geometry) { 72 | this.geometry = geometry; 73 | } 74 | 75 | @Override 76 | public boolean equals(Object obj) { 77 | Organization org1 = (Organization) obj; 78 | if (org1.getName().toLowerCase().equals(this.getName().toLowerCase()) && org1.getOrgType().toLowerCase().equals(this.getOrgType().toLowerCase())) { 79 | return true; 80 | } 81 | return false; 82 | } 83 | 84 | @Override 85 | public int hashCode() { 86 | int hash = 5; 87 | hash = 59 * hash + (this.name != null ? this.name.toLowerCase().hashCode() : 0); 88 | hash = 59 * hash + (this.orgType != null ? this.orgType.hashCode() : 0); 89 | return hash; 90 | } 91 | 92 | @Override 93 | public String toString() { 94 | if (geometry != null) { 95 | return "Name: " + this.name + " Org Type:" + this.orgType + " Position:" + this.positions + " Geometry: " + this.geometry.toString(); 96 | } else { 97 | return "Name: " + this.name + " Org Type:" + this.orgType + " Position:" + this.positions; 98 | } 99 | } 100 | 101 | public String getName() { 102 | return name; 103 | } 104 | 105 | public void setName(String name) { 106 | this.name = name; 107 | } 108 | 109 | public String getOrgType() { 110 | return orgType; 111 | } 112 | 113 | public void setOrgType(String orgType) { 114 | this.orgType = orgType.toLowerCase(); 115 | } 116 | 117 | } 118 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/entities/OtherEntity.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | 6 | package edu.psu.ist.vaccine.geotxt.entities; 7 | 8 | import edu.psu.ist.vaccine.geotxt.utils.StripStrings; 9 | 10 | /** 11 | * 12 | * @author ajaiswal 13 | */ 14 | public class OtherEntity { 15 | 16 | public OtherEntity(String name, String type) { 17 | this.name = StripStrings.strip(name); 18 | this.type = type.toLowerCase(); 19 | } 20 | 21 | private String name = ""; 22 | 23 | @Override 24 | public boolean equals(Object obj) { 25 | OtherEntity obj1 = (OtherEntity) obj; 26 | if (this.getName().toLowerCase().equals(obj1.getName().toLowerCase()) && this.getType().toLowerCase().equals(obj1.getType().toLowerCase())) 27 | return true; 28 | return false; 29 | } 30 | 31 | @Override 32 | public int hashCode() { 33 | int hash = 7; 34 | hash = 37 * hash + (this.name != null ? this.name.hashCode() : 0); 35 | hash = 37 * hash + (this.type != null ? this.type.hashCode() : 0); 36 | return hash; 37 | } 38 | 39 | @Override 40 | public String toString() { 41 | return "Name: " + this.name + " Type:" + this.type; 42 | } 43 | 44 | public String getName() { 45 | return name; 46 | } 47 | 48 | public void setName(String name) { 49 | this.name = name; 50 | } 51 | 52 | public String getType() { 53 | return type; 54 | } 55 | 56 | public void setType(String type) { 57 | this.type = type.toLowerCase(); 58 | } 59 | 60 | private String type = ""; 61 | 62 | } 63 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/entities/Person.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package edu.psu.ist.vaccine.geotxt.entities; 6 | 7 | import edu.psu.ist.vaccine.geotxt.utils.StripStrings; 8 | import java.util.ArrayList; 9 | import java.util.Collections; 10 | 11 | /** 12 | * 13 | * @author ajaiswal 14 | */ 15 | public class Person { 16 | 17 | protected String name = ""; 18 | protected String gender = ""; 19 | protected String kind = ""; 20 | protected ArrayList positions = null; 21 | 22 | public Person(String name, String gender, String kind) { 23 | this.name = StripStrings.strip(name); 24 | this.gender = gender.toLowerCase(); 25 | this.kind = kind.toLowerCase(); 26 | } 27 | 28 | public Person(String name, int position) { 29 | this.name = StripStrings.strip(name); 30 | if (this.positions == null) { 31 | this.positions = new ArrayList(); 32 | } 33 | if (this.positions != null && !this.positions.contains(position)) { 34 | this.positions.add(position); 35 | } 36 | } 37 | 38 | public Person(String name, String gender, String kind, int position) { 39 | this.name = StripStrings.strip(name); 40 | this.gender = gender.toLowerCase(); 41 | this.kind = kind.toLowerCase(); 42 | if (this.positions == null) { 43 | this.positions = new ArrayList(); 44 | } 45 | if (this.positions != null && !this.positions.contains(position)) { 46 | this.positions.add(position); 47 | } 48 | } 49 | 50 | public String getGender() { 51 | return gender; 52 | } 53 | 54 | public void setGender(String gender) { 55 | this.gender = gender.toLowerCase(); 56 | } 57 | 58 | public String getName() { 59 | return name; 60 | } 61 | 62 | public void setName(String name) { 63 | this.name = name; 64 | } 65 | 66 | public String getKind() { 67 | return kind; 68 | } 69 | 70 | public void setKind(String kind) { 71 | this.kind = kind.toLowerCase(); 72 | } 73 | 74 | public void addPosition(int position) { 75 | if (this.positions == null) { 76 | this.positions = new ArrayList(); 77 | } 78 | if (!this.positions.contains(position)) { 79 | this.positions.add(position); 80 | } 81 | Collections.sort(positions); 82 | 83 | } 84 | 85 | public ArrayList getPositions() { 86 | return positions; 87 | } 88 | 89 | public void setPositions(ArrayList positions) { 90 | this.positions = positions; 91 | } 92 | 93 | @Override 94 | public boolean equals(Object per1) { 95 | Person person1 = (Person) per1; 96 | if (person1.getName().toLowerCase().equals(this.getName().toLowerCase()) && person1.getGender().toLowerCase().equals(this.getGender().toLowerCase()) && person1.getKind().toLowerCase().equals(this.getKind().toLowerCase())) { 97 | return true; 98 | } 99 | return false; 100 | } 101 | 102 | @Override 103 | public int hashCode() { 104 | int hash = 7; 105 | hash = 89 * hash + (this.name != null ? this.name.toLowerCase().hashCode() : 0); 106 | hash = 89 * hash + (this.gender != null ? this.gender.hashCode() : 0); 107 | hash = 89 * hash + (this.kind != null ? this.kind.hashCode() : 0); 108 | return hash; 109 | } 110 | 111 | @Override 112 | public String toString() { 113 | return "Name: " + this.name + " Sex:" + this.gender + " Kind:" + this.kind; 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/ner/AbstractNer.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.ner; 2 | 3 | 4 | public abstract class AbstractNer { 5 | 6 | public abstract String tagAlltoGeoJson(String text, boolean includeAlternates, int maxCandidates, boolean includeHierarchy, boolean includeDetails); 7 | 8 | public abstract NamedEntities tagAlltoDoc(String text); 9 | 10 | } 11 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/ner/CogCompNer.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.ner; 2 | 3 | import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent; 4 | import edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation; 5 | import edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder; 6 | import edu.psu.ist.vaccine.geotxt.entities.Location; 7 | import edu.psu.ist.vaccine.geotxt.entities.Organization; 8 | import edu.psu.ist.vaccine.geotxt.entities.Person; 9 | import edu.psu.ist.vaccine.geotxt.utils.GeoJsonWriter; 10 | import edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder; 11 | import edu.illinois.cs.cogcomp.core.datastructures.ViewNames; 12 | import edu.illinois.cs.cogcomp.ner.NERAnnotator; 13 | import edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer; 14 | import java.io.IOException; 15 | //now using the 3.1.25 which is CONLL + Enron, used to be 3.1.10 that only used CoNLL apparently and was faster. 16 | public class CogCompNer extends AbstractNer{ 17 | 18 | private NERAnnotator nerAn = null; 19 | // Create a TextAnnotation using the LBJ sentence splitter and tokenizers. 20 | private TextAnnotationBuilder tab = null; 21 | // don't split on hyphens, as NER models are trained this way 22 | private boolean splitOnHyphens = false; 23 | 24 | public CogCompNer(String viewName) { 25 | 26 | if (viewName != null && viewName != "") { 27 | if (viewName == "CONLL") { 28 | tab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer(splitOnHyphens)); 29 | try { 30 | nerAn = new NERAnnotator(ViewNames.NER_CONLL); 31 | } catch (IOException e) { 32 | e.printStackTrace(); 33 | System.err.println("Could not Read NER_CONLL" + e.getMessage()); 34 | } 35 | } 36 | nerAn.doInitialize(); 37 | } 38 | 39 | } 40 | 41 | @Override 42 | 43 | public String tagAlltoGeoJson(String text, boolean includeAlternates, int maxCandidates, boolean includeHierarchy, boolean includeDetails) { 44 | NamedEntities d = tagAlltoDoc(text); 45 | return GeoJsonWriter.docToGeoJson(d, includeAlternates, maxCandidates, includeHierarchy, includeDetails); 46 | } 47 | 48 | @Override 49 | public NamedEntities tagAlltoDoc(String text) { 50 | 51 | if (text.equalsIgnoreCase("")) { 52 | return null; 53 | } 54 | 55 | NamedEntities d = new NamedEntities(); 56 | d.text = text; 57 | 58 | TextAnnotation ta = tab.createTextAnnotation(text); 59 | 60 | nerAn.addView(ta); 61 | 62 | for (Constituent c : ta.getView(ViewNames.NER_CONLL).getConstituents()) { 63 | 64 | // double score = c.getConstituentScore(); 65 | int start = c.getStartCharOffset(); 66 | String name = c.getSurfaceForm(); 67 | String tag = c.getLabel(); 68 | 69 | if (tag.equals("LOC")) { 70 | d.addLoc(new Location(name, start)); 71 | } else if (tag.equals("ORG")) { 72 | d.addOrg(new Organization(name, start)); 73 | } else if (tag.equals("PER")) { 74 | d.addPer(new Person(name, start)); 75 | } 76 | } 77 | 78 | return d; 79 | 80 | } 81 | 82 | public static void main(String[] args) throws IOException { 83 | CogCompNer il = new CogCompNer("CONLL"); 84 | NamedEntities results = il.tagAlltoDoc( 85 | "The White House has denied a report in the New York Times saying that Iran had agreed to one-on-one negotiations over its nuclear programme with the US. The report, quoting unnamed officials, said Iran had agreed to the talks for the first time but would not hold them until after US elections on 6 November.The White House said it was prepared to meet Iran bilaterally, but that there was no plan to do so.Western states think Iran is seeking nuclear weapons, something it denies.Iran has been a key foreign policy topic in the US election campaign.President Barack Obama and Republican challenger Mitt Romney will hold their third and final campaign debate on Monday, on the subject of foreign policy."); 86 | 87 | System.out.println(GeoJsonWriter.docToGeoJson(results, false, 0, false, false)); 88 | System.out.println(il.tagAlltoGeoJson("", false, 0, false, false)); 89 | 90 | } 91 | } -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/ner/GateNer.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.ner; 2 | 3 | import java.io.IOException; 4 | import java.util.logging.Level; 5 | import java.util.logging.Logger; 6 | 7 | import edu.psu.ist.vaccine.geotxt.annie.ANNIEAdvExtractor; 8 | import edu.psu.ist.vaccine.geotxt.annie.GATEExtractor; 9 | import edu.psu.ist.vaccine.geotxt.utils.GeoJsonWriter; 10 | import gate.Corpus; 11 | import gate.creole.ResourceInstantiationException; 12 | import gate.util.GateException; 13 | 14 | public class GateNer extends AbstractNer { 15 | 16 | private static ANNIEAdvExtractor annie = null; 17 | 18 | public GateNer(String gatehome) { 19 | 20 | if (annie == null) { 21 | try { 22 | GATEExtractor.getInstance(gatehome); 23 | annie = new ANNIEAdvExtractor(); 24 | annie.initAnnie(); 25 | } catch (GateException e) { 26 | Logger.getLogger(GateNer.class.getName()).log(Level.SEVERE, "Could not load OpenNlps model files. Check the OPENNLPDIR address in the app.config file"); 27 | Logger.getLogger(GateNer.class.getName()).log(Level.SEVERE, null, e.getMessage()); 28 | e.printStackTrace(); 29 | } catch (IOException e) { 30 | Logger.getLogger(GateNer.class.getName()).log(Level.SEVERE, "Could not load OpenNlps model files. Check the OPENNLPDIR address in the app.config file"); 31 | Logger.getLogger(GateNer.class.getName()).log(Level.SEVERE, null, e.getMessage()); 32 | e.printStackTrace(); 33 | } 34 | } 35 | } 36 | 37 | // public String tagAllJsonString(String text) { 38 | // Document d = tagAlltoDoc(text); 39 | // return xstream.toXML(d); 40 | // } 41 | 42 | @Override 43 | public String tagAlltoGeoJson(String text, boolean includeAlternates, int maxCandidates, boolean includeHierarchy, boolean includeDetails) { 44 | NamedEntities d = tagAlltoDoc(text); 45 | return GeoJsonWriter.docToGeoJson(d, includeAlternates, maxCandidates, includeHierarchy, includeDetails); 46 | } 47 | 48 | @Override 49 | public NamedEntities tagAlltoDoc(String text) { 50 | if (text.equalsIgnoreCase("")) { 51 | return null; 52 | } 53 | NamedEntities d = new NamedEntities(); 54 | Corpus corpus = null; 55 | try { 56 | corpus = annie.processText(text); 57 | annie.setCorpus(corpus); 58 | annie.execute(); 59 | d.text = text; 60 | d.locs = annie.getLocations(corpus); 61 | d.orgs = annie.getOrganizations(corpus); 62 | d.pers = annie.getPersons(corpus); 63 | } catch (ResourceInstantiationException e) { 64 | e.printStackTrace(); 65 | } catch (GateException e) { 66 | e.printStackTrace(); 67 | } 68 | 69 | /* 70 | * I think we should find a way to change this so that the cleanup does not happen every single document. We should have something for the API that adds the documents to a batch, processes them in this batch, and them clean them up in batch. Right now, every single document is processed and then cleaned up one by one and that doesn't seem 71 | * efficient for an API. For the GeoTxt UI, this seems fine. It was actually AJ's post from 2011 I found about cleaning up the corpus that has seemed to cut the memory usage substantially. http://anujjaiswal.wordpress.com/2011/06/01/removing-out-of-memory-errors-in-gate/ 72 | */ 73 | 74 | annie.cleanUp(); 75 | 76 | return d; 77 | } 78 | 79 | public static void main(String args[]) { 80 | GateNer gate = new GateNer("C:\\Programs\\gate-8.4-build5748-BIN"); 81 | NamedEntities results = gate.tagAlltoDoc( 82 | "The White House has denied a report in the New York Times saying that Iran had agreed to one-on-one negotiations over its nuclear programme with the US. The report, quoting unnamed officials, said Iran had agreed to the talks for the first time but would not hold them until after US elections on 6 November.The White House said it was prepared to meet Iran bilaterally, but that there was no plan to do so.Western states think Iran is seeking nuclear weapons, something it denies.Iran has been a key foreign policy topic in the US election campaign.President Barack Obama and Republican challenger Mitt Romney will hold their third and final campaign debate on Monday, on the subject of foreign policy."); 83 | 84 | System.out.println(results.toString()); 85 | 86 | System.out.println(GeoJsonWriter.docToGeoJson(results, false, 0, false, false)); 87 | System.out.println(gate.tagAlltoGeoJson("__", false, 0, false, false)); 88 | 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/ner/InlineAnnotatedNer.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.ner; 2 | 3 | import edu.psu.ist.vaccine.geotxt.entities.Location; 4 | import edu.psu.ist.vaccine.geotxt.entities.Organization; 5 | import edu.psu.ist.vaccine.geotxt.entities.Person; 6 | import edu.psu.ist.vaccine.geotxt.utils.GeoJsonWriter; 7 | import java.util.regex.Matcher; 8 | import java.util.regex.Pattern; 9 | import java.io.FileNotFoundException; 10 | import java.io.IOException; 11 | 12 | public class InlineAnnotatedNer extends AbstractNer { 13 | 14 | public InlineAnnotatedNer() { 15 | 16 | } 17 | 18 | @Override 19 | public String tagAlltoGeoJson(String text, boolean includeAlternates, int maxCandidates, boolean includeHierarchy, boolean includeDetails) { 20 | NamedEntities d = tagAlltoDoc(text); 21 | return GeoJsonWriter.docToGeoJson(d, includeAlternates, maxCandidates, includeHierarchy, includeDetails); 22 | } 23 | 24 | @Override 25 | public NamedEntities tagAlltoDoc(String text) { 26 | if (text.equalsIgnoreCase("")) { 27 | return null; 28 | } 29 | NamedEntities d = new NamedEntities(); 30 | d.text = text; 31 | String labeledText = text; 32 | /* 33 | * String labeledText = classifier.classifyWithInlineXML(text); Set tags = classifier.labels(); String background = classifier.backgroundSymbol(); String tagPattern = ""; for (String tag : tags) { if (background.equals(tag)) { continue; } if (tagPattern.length() > 0) { tagPattern += "|"; } tagPattern += tag; } 34 | */ 35 | 36 | String tagPattern = "LOCATION|PERSON|ORGANIZATION"; 37 | 38 | Pattern startPattern = Pattern.compile("<(" + tagPattern + ")>"); 39 | Pattern endPattern = Pattern.compile(""); 40 | 41 | String finalText = labeledText; 42 | Matcher m = startPattern.matcher(finalText); 43 | while (m.find()) { 44 | int start = m.start(); 45 | finalText = m.replaceFirst(""); 46 | m = endPattern.matcher(finalText); 47 | if (m.find()) { 48 | int end = m.start(); 49 | String tag = m.group(1); 50 | finalText = m.replaceFirst(""); 51 | String name = finalText.substring(start, end); 52 | 53 | if (tag.equals("LOCATION")) { 54 | d.addLoc(new Location(name, start)); 55 | } else if (tag.equals("ORGANIZATION")) { 56 | d.addOrg(new Organization(name, start)); 57 | } else if (tag.equals("PERSON")) { 58 | d.addPer(new Person(name, start)); 59 | } 60 | } 61 | m = startPattern.matcher(finalText); 62 | } 63 | return d; 64 | } 65 | 66 | public static void main(String args[]) throws FileNotFoundException, IOException { 67 | InlineAnnotatedNer st = new InlineAnnotatedNer(); 68 | NamedEntities results = st.tagAlltoDoc( 69 | "The White House has denied a report in the New York Times saying that Iran had agreed to one-on-one negotiations over its nuclear programme with the US. The report, quoting unnamed officials, said Iran had agreed to the talks for the first time but would not hold them until after US elections on 6 November.The White House said it was prepared to meet Iran bilaterally, but that there was no plan to do so.Western states think Iran is seeking nuclear weapons, something it denies.Iran has been a key foreign policy topic in the US election campaign.President Barack Obama and Republican challenger Mitt Romney will hold their third and final campaign debate on Monday, on the subject of foreign policy."); 70 | 71 | System.out.println(GeoJsonWriter.docToGeoJson(results, false, 0, false, false)); 72 | System.out.println(st.tagAlltoGeoJson("", false, 0, false, false)); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/ner/LingPipeNer.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.ner; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.IOException; 6 | import java.util.Set; 7 | import java.util.logging.Level; 8 | import java.util.logging.Logger; 9 | 10 | import com.aliasi.chunk.Chunk; 11 | import com.aliasi.chunk.Chunker; 12 | import com.aliasi.chunk.Chunking; 13 | import com.aliasi.util.AbstractExternalizable; 14 | 15 | import edu.psu.ist.vaccine.geotxt.entities.Location; 16 | import edu.psu.ist.vaccine.geotxt.entities.Organization; 17 | import edu.psu.ist.vaccine.geotxt.entities.Person; 18 | import edu.psu.ist.vaccine.geotxt.utils.Config; 19 | import edu.psu.ist.vaccine.geotxt.utils.GeoJsonWriter; 20 | 21 | 22 | 23 | public class LingPipeNer extends AbstractNer { 24 | 25 | private Chunker chunker; 26 | 27 | public LingPipeNer(String modelPath) { 28 | 29 | File modelFile = new File(modelPath); 30 | 31 | try { 32 | chunker = (Chunker) AbstractExternalizable.readObject(modelFile); 33 | } catch (ClassNotFoundException | IOException e) { 34 | e.printStackTrace(); 35 | Logger.getLogger(LingPipeNer.class.getName()).log(Level.SEVERE, "Could not load LingPipe model file. Check the LINGPIPEMODEL path in the app.config file"); 36 | Logger.getLogger(LingPipeNer.class.getName()).log(Level.SEVERE, null, e.getMessage()); 37 | System.exit(-1); 38 | } 39 | 40 | } 41 | 42 | @Override 43 | public String tagAlltoGeoJson(String text, boolean includeAlternates, int maxCandidates, boolean includeHierarchy, boolean includeDetails) { 44 | NamedEntities d = tagAlltoDoc(text); 45 | return GeoJsonWriter.docToGeoJson(d, includeAlternates, maxCandidates, includeHierarchy, includeDetails); 46 | } 47 | 48 | @Override 49 | public NamedEntities tagAlltoDoc(String text) { 50 | 51 | if (text.equalsIgnoreCase("")) { 52 | return null; 53 | } 54 | 55 | NamedEntities nes = new NamedEntities(); 56 | nes.text = text; 57 | 58 | Chunking chunking = chunker.chunk(text); 59 | Set chunkingSet = chunking.chunkSet(); 60 | 61 | for (Chunk chunk : chunkingSet) { 62 | 63 | String tag = chunk.type(); 64 | int start = chunk.start(); 65 | String name = text.substring(chunk.start(), chunk.end()); 66 | 67 | if (tag.equals("LOCATION")) { 68 | nes.addLoc(new Location(name, start)); 69 | } else if (tag.equals("ORGANIZATION")) { 70 | nes.addOrg(new Organization(name, start)); 71 | } else if (tag.equals("PERSON")) { 72 | nes.addPer(new Person(name, start)); 73 | } 74 | } 75 | 76 | return nes; 77 | } 78 | 79 | public static void main(String[] args) throws FileNotFoundException, IOException { 80 | 81 | Config config = new Config(); 82 | LingPipeNer lingPipe = new LingPipeNer(config.getLingPipeDir()); 83 | 84 | NamedEntities results = lingPipe.tagAlltoDoc( 85 | "The White House has denied a report in the New York Times saying that Iran had agreed to one-on-one negotiations over its nuclear programme with the US. The report, quoting unnamed officials, said Iran had agreed to the talks for the first time but would not hold them until after US elections on 6 November.The White House said it was prepared to meet Iran bilaterally, but that there was no plan to do so.Western states think Iran is seeking nuclear weapons, something it denies.Iran has been a key foreign policy topic in the US election campaign.President Barack Obama and Republican challenger Mitt Romney will hold their third and final campaign debate on Monday, on the subject of foreign policy."); 86 | 87 | System.out.println(GeoJsonWriter.docToGeoJson(results, false, 0, false, false)); 88 | System.out.println(lingPipe.tagAlltoGeoJson("", false, 0, false, false)); 89 | } 90 | 91 | } 92 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/ner/MitNer.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.ner; 2 | 3 | import edu.mit.ll.mitie.EntityMention; 4 | import edu.mit.ll.mitie.EntityMentionVector; 5 | import edu.mit.ll.mitie.NamedEntityExtractor; 6 | import edu.mit.ll.mitie.StringVector; 7 | import edu.mit.ll.mitie.TokenIndexVector; 8 | import edu.mit.ll.mitie.global; 9 | import edu.psu.ist.vaccine.geotxt.entities.Location; 10 | import edu.psu.ist.vaccine.geotxt.entities.Organization; 11 | import edu.psu.ist.vaccine.geotxt.entities.Person; 12 | import edu.psu.ist.vaccine.geotxt.utils.Config; 13 | import edu.psu.ist.vaccine.geotxt.utils.GeoJsonWriter; 14 | 15 | import java.io.FileNotFoundException; 16 | import java.io.IOException; 17 | 18 | public class MitNer extends AbstractNer { 19 | 20 | private NamedEntityExtractor classifier = null; 21 | 22 | public MitNer(String modelPath) { 23 | classifier = new NamedEntityExtractor(modelPath); 24 | } 25 | 26 | @Override 27 | public String tagAlltoGeoJson(String text, boolean includeAlternates, int maxCandidates, boolean includeHierarchy, boolean includeDetails) { 28 | NamedEntities d = tagAlltoDoc(text); 29 | return GeoJsonWriter.docToGeoJson(d, includeAlternates, maxCandidates, includeHierarchy, includeDetails); 30 | } 31 | 32 | @Override 33 | public NamedEntities tagAlltoDoc(String text) { 34 | if (text.equalsIgnoreCase("")) { 35 | return null; 36 | } 37 | 38 | NamedEntities d = new NamedEntities(); 39 | d.text = text; 40 | System.out.println(text); 41 | 42 | // StringVector words = global.tokenize(text); 43 | TokenIndexVector offsets = global.tokenizeWithOffsets(text); 44 | EntityMentionVector entities = classifier.extractEntities(offsets); 45 | 46 | for (int i = 0; i < entities.size(); ++i) { 47 | EntityMention entity = entities.get(i); 48 | int start = Math.toIntExact(offsets.get(entity.getStart()).getIndex()); 49 | int end = Math.toIntExact(offsets.get(entity.getEnd() - 1).getIndex() + offsets.get(entity.getEnd() - 1).getToken().length()); 50 | while (end > text.length()) { 51 | // seems to be a problem with MIT tokenization 52 | end--; 53 | } 54 | 55 | String name = text.substring(start, end); 56 | if (entity.getTag() == 1) { 57 | d.addLoc(new Location(name, start)); 58 | } else if (entity.getTag() == 2) { 59 | d.addOrg(new Organization(name, start)); 60 | } else if (entity.getTag() == 0) { 61 | d.addPer(new Person(name, start)); 62 | } 63 | } 64 | return d; 65 | } 66 | 67 | public static void main(String args[]) throws FileNotFoundException, IOException { 68 | Config config = new Config(); 69 | MitNer mit = new MitNer(config.getMit_dir()); 70 | NamedEntities results = mit.tagAlltoDoc("RT @nytimeshealth: Nigeria’s Ebola success was in part due to the existence of an emergency command center paid for by the Iran…"); 71 | System.out.println(GeoJsonWriter.docToGeoJson(results, false, 0, false, false)); 72 | System.out.println(mit.tagAlltoGeoJson("", false, 0, false, false)); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/ner/NamedEntities.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.ner; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Map; 6 | 7 | import com.ning.http.client.providers.grizzly.Utils; 8 | 9 | import edu.psu.ist.vaccine.geotxt.entities.Location; 10 | import edu.psu.ist.vaccine.geotxt.entities.Organization; 11 | import edu.psu.ist.vaccine.geotxt.entities.Person; 12 | import edu.psu.ist.vaccine.geotxt.utils.HashtagProcessor; 13 | 14 | public class NamedEntities { 15 | public String text; 16 | public List locs = new ArrayList(); 17 | public List orgs = new ArrayList(); 18 | public List pers = new ArrayList(); 19 | 20 | public void addLoc(Location loc) { 21 | if (!this.locs.contains(loc)) { 22 | this.locs.add(loc); 23 | } else { 24 | int index = this.locs.indexOf(loc); 25 | for (int p : loc.getPositions()) { 26 | this.locs.get(index).addPosition(p); 27 | } 28 | } 29 | } 30 | 31 | public void addPer(Person per) { 32 | if (!this.pers.contains(per) && !per.getName().equals("@")) { 33 | this.pers.add(per); 34 | } else { 35 | int index = this.pers.indexOf(per); 36 | for (int p : per.getPositions()) { 37 | this.pers.get(index).addPosition(p); 38 | } 39 | } 40 | } 41 | 42 | public void addOrg(Organization org) { 43 | if (!this.orgs.contains(org)) { 44 | this.orgs.add(org); 45 | } else { 46 | int index = this.orgs.indexOf(org); 47 | for (int p : org.getPositions()) { 48 | this.orgs.get(index).addPosition(p); 49 | } 50 | } 51 | } 52 | 53 | 54 | public void adjustCharIndexesForHashtags(HashtagProcessor processor) { 55 | this.shiftChars(processor.getHashtagCharIndexes(), 1); 56 | this.shiftChars(processor.getCapitalCharIndexes(), -1); 57 | } 58 | 59 | 60 | private void shiftChars(ArrayList specialChars, int charShift) { 61 | for (int specialChar : specialChars) { 62 | for (Location l : this.locs) { 63 | for (int p : l.getPositions()) { 64 | if (p >= specialChar) { 65 | l.getPositions().set(l.getPositions().indexOf(p), p + charShift); 66 | } 67 | } 68 | } 69 | for (Organization o : this.orgs) { 70 | for (int p : o.getPositions()) { 71 | if (p >= specialChar) { 72 | o.getPositions().set(o.getPositions().indexOf(p), p + charShift); 73 | } 74 | } 75 | } 76 | for (Person pe : this.pers) { 77 | for (int p : pe.getPositions()) { 78 | if (p >= specialChar) { 79 | pe.getPositions().set(pe.getPositions().indexOf(p), p + charShift); 80 | } 81 | } 82 | } 83 | } 84 | } 85 | 86 | @Override 87 | public String toString() { 88 | return "Original Text = " + text + ", Locations: " + locs.toString() + ", Organizations: " + orgs.toString() + ", Persons: " + pers.toString(); 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/ner/NerEngines.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.ner; 2 | 3 | public enum NerEngines { 4 | NONE, GATE, STANFORD, OPENNLP, COGCOMP, LINGPIPE, MIT, UNION, INLINE 5 | } 6 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/ner/OpenNlpNer.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.ner; 2 | 3 | import java.io.FileInputStream; 4 | import java.io.FileNotFoundException; 5 | import java.io.IOException; 6 | import java.io.InputStream; 7 | import java.util.logging.Level; 8 | import java.util.logging.Logger; 9 | 10 | import opennlp.tools.namefind.NameFinderME; 11 | import opennlp.tools.namefind.TokenNameFinderModel; 12 | import opennlp.tools.tokenize.TokenizerME; 13 | import opennlp.tools.tokenize.TokenizerModel; 14 | import opennlp.tools.util.Span; 15 | import edu.psu.ist.vaccine.geotxt.entities.Location; 16 | import edu.psu.ist.vaccine.geotxt.entities.Organization; 17 | import edu.psu.ist.vaccine.geotxt.entities.Person; 18 | import edu.psu.ist.vaccine.geotxt.utils.Config; 19 | import edu.psu.ist.vaccine.geotxt.utils.GeoJsonWriter; 20 | 21 | public class OpenNlpNer extends AbstractNer { 22 | 23 | private TokenizerME tokenizer; 24 | private NameFinderME locFinder; 25 | private NameFinderME orgFinder; 26 | private NameFinderME perFinder; 27 | 28 | public OpenNlpNer(String openNlpDir) { 29 | 30 | // Loading the tokenizer model 31 | InputStream inputStream; 32 | try { 33 | inputStream = new FileInputStream(openNlpDir + "en-token.bin"); 34 | 35 | TokenizerModel tokenModel = new TokenizerModel(inputStream); 36 | 37 | // Instantiating the TokenizerME class 38 | tokenizer = new TokenizerME(tokenModel); 39 | 40 | // Loading the NER models 41 | inputStream = new FileInputStream(openNlpDir + "en-ner-location.bin"); 42 | TokenNameFinderModel locModel = new TokenNameFinderModel(inputStream); 43 | inputStream = new FileInputStream(openNlpDir + "en-ner-organization.bin"); 44 | TokenNameFinderModel orgModel = new TokenNameFinderModel(inputStream); 45 | inputStream = new FileInputStream(openNlpDir + "en-ner-person.bin"); 46 | TokenNameFinderModel perModel = new TokenNameFinderModel(inputStream); 47 | 48 | // Instantiating the NameFinderME class 49 | locFinder = new NameFinderME(locModel); 50 | orgFinder = new NameFinderME(orgModel); 51 | perFinder = new NameFinderME(perModel); 52 | 53 | } catch (IOException e) { 54 | // TODO Auto-generated catch block 55 | e.printStackTrace(); 56 | Logger.getLogger(OpenNlpNer.class.getName()).log(Level.SEVERE, "Could not load OpenNlps model files. Check the OPENNLPDIR address in the app.config file"); 57 | Logger.getLogger(OpenNlpNer.class.getName()).log(Level.SEVERE, null, e.getMessage()); 58 | System.exit(-1); 59 | } 60 | 61 | } 62 | 63 | @Override 64 | public String tagAlltoGeoJson(String text, boolean includeAlternates, int maxCandidates, boolean includeHierarchy, boolean includeDetails) { 65 | NamedEntities d = tagAlltoDoc(text); 66 | return GeoJsonWriter.docToGeoJson(d, includeAlternates, maxCandidates, includeHierarchy, includeDetails); 67 | } 68 | 69 | @Override 70 | public NamedEntities tagAlltoDoc(String text) { 71 | if (text.equalsIgnoreCase("")) { 72 | return null; 73 | } 74 | NamedEntities nes = new NamedEntities(); 75 | nes.text = text; 76 | 77 | Span tokenSpans[] = tokenizer.tokenizePos(text); 78 | String tokenStrings[] = tokenizer.tokenize(text); 79 | 80 | // Finding the names in the sentence 81 | Span locSpans[] = locFinder.find(tokenStrings); 82 | Span orgSpans[] = orgFinder.find(tokenStrings); 83 | Span perSpans[] = perFinder.find(tokenStrings); 84 | 85 | // Printing the names and their spans in a sentence 86 | for (Span s : locSpans) { 87 | nes.addLoc(new Location(tokenStrings[s.getStart()], tokenSpans[s.getStart()].getStart())); 88 | } 89 | 90 | for (Span s : orgSpans) { 91 | nes.addOrg(new Organization(tokenStrings[s.getStart()], tokenSpans[s.getStart()].getStart())); 92 | } 93 | 94 | for (Span s : perSpans) { 95 | nes.addPer(new Person(tokenStrings[s.getStart()], tokenSpans[s.getStart()].getStart())); 96 | } 97 | 98 | return nes; 99 | } 100 | 101 | public static void main(String[] args) throws FileNotFoundException, IOException { 102 | Config config = new Config(); 103 | OpenNlpNer openNlp = new OpenNlpNer(config.getOpenNlpDir()); 104 | 105 | // String sentence = "I love Brack Obama and live in Pennsylvania. I would love to move to Ohio, especially Columbus, OH, to work at Starbucks."; 106 | 107 | NamedEntities results = openNlp.tagAlltoDoc( 108 | "The White House has denied a report in the New York Times saying that Iran had agreed to one-on-one negotiations over its nuclear programme with the US. The report, quoting unnamed officials, said Iran had agreed to the talks for the first time but would not hold them until after US elections on 6 November.The White House said it was prepared to meet Iran bilaterally, but that there was no plan to do so.Western states think Iran is seeking nuclear weapons, something it denies.Iran has been a key foreign policy topic in the US election campaign.President Barack Obama and Republican challenger Mitt Romney will hold their third and final campaign debate on Monday, on the subject of foreign policy."); 109 | 110 | System.out.println(GeoJsonWriter.docToGeoJson(results, false, 0, false, false)); 111 | System.out.println(openNlp.tagAlltoGeoJson("", false, 0, false, false)); 112 | 113 | } 114 | 115 | } 116 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/ner/StanfordNer.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.ner; 2 | 3 | import edu.psu.ist.vaccine.geotxt.entities.Location; 4 | import edu.psu.ist.vaccine.geotxt.entities.Organization; 5 | import edu.psu.ist.vaccine.geotxt.entities.Person; 6 | import edu.psu.ist.vaccine.geotxt.utils.Config; 7 | import edu.psu.ist.vaccine.geotxt.utils.GeoJsonWriter; 8 | 9 | import edu.stanford.nlp.ie.crf.CRFClassifier; 10 | import edu.stanford.nlp.ling.CoreLabel; 11 | import edu.stanford.nlp.util.Triple; 12 | 13 | import java.util.List; 14 | import java.io.FileNotFoundException; 15 | import java.io.IOException; 16 | 17 | public class StanfordNer extends AbstractNer { 18 | 19 | private CRFClassifier classifier = null; 20 | 21 | public StanfordNer(String modelPath) { 22 | 23 | classifier = CRFClassifier.getClassifierNoExceptions(modelPath); 24 | } 25 | 26 | @Override 27 | public String tagAlltoGeoJson(String text, boolean includeAlternates, int maxCandidates, boolean includeHierarchy, boolean includeDetails) { 28 | NamedEntities d = tagAlltoDoc(text); 29 | return GeoJsonWriter.docToGeoJson(d, includeAlternates, maxCandidates, includeHierarchy, includeDetails); 30 | } 31 | 32 | @Override 33 | public NamedEntities tagAlltoDoc(String text) { 34 | if (text.equalsIgnoreCase("")) { 35 | return null; 36 | } 37 | NamedEntities d = new NamedEntities(); 38 | d.text = text; 39 | 40 | // Set tags = classifier.labels(); 41 | List> list = classifier.classifyToCharacterOffsets(text); 42 | for (Triple namedEntity : list) { 43 | String tag = namedEntity.first(); 44 | int start = namedEntity.second(); 45 | String name = text.substring(namedEntity.second(), namedEntity.third()); 46 | 47 | if (tag.equals("LOCATION")) { 48 | d.addLoc(new Location(name, start)); 49 | } else if (tag.equals("ORGANIZATION")) { 50 | d.addOrg(new Organization(name, start)); 51 | } else if (tag.equals("PERSON")) { 52 | d.addPer(new Person(name, start)); 53 | } 54 | } 55 | 56 | return d; 57 | } 58 | 59 | public static void main(String args[]) throws FileNotFoundException, IOException { 60 | Config config = new Config(); 61 | StanfordNer st = new StanfordNer(config.getStanford_ner()); 62 | NamedEntities results = st.tagAlltoDoc( 63 | "The White House has denied a report in the New York Times saying that Iran had agreed to one-on-one negotiations over its nuclear programme with the US. The report, quoting unnamed officials, said Iran had agreed to the talks for the first time but would not hold them until after US elections on 6 November.The White House said it was prepared to meet Iran bilaterally, but that there was no plan to do so.Western states think Iran is seeking nuclear weapons, something it denies.Iran has been a key foreign policy topic in the US election campaign.President Barack Obama and Republican challenger Mitt Romney will hold their third and final campaign debate on Monday, on the subject of foreign policy."); 64 | 65 | System.out.println(GeoJsonWriter.docToGeoJson(results, false, 0, false, false)); 66 | System.out.println(st.tagAlltoGeoJson("", false, 0, false, false)); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/test/EntityExtractionTest.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.test; 2 | 3 | import edu.psu.ist.vaccine.geotxt.annie.ANNIEAdvExtractor; 4 | import edu.psu.ist.vaccine.geotxt.annie.GATEExtractor; 5 | import edu.psu.ist.vaccine.geotxt.entities.Location; 6 | import edu.psu.ist.vaccine.geotxt.entities.Organization; 7 | import edu.psu.ist.vaccine.geotxt.entities.OtherEntity; 8 | import edu.psu.ist.vaccine.geotxt.entities.Person; 9 | import gate.Corpus; 10 | import gate.Gate; 11 | import gate.creole.ResourceInstantiationException; 12 | import gate.util.GateException; 13 | 14 | import java.io.BufferedReader; 15 | import java.io.File; 16 | import java.io.FileReader; 17 | import java.io.IOException; 18 | import java.util.ArrayList; 19 | 20 | public class EntityExtractionTest { 21 | public static ANNIEAdvExtractor annie = null; 22 | 23 | public static ArrayList input(String infile) { 24 | ArrayList ret = new ArrayList(); 25 | try { 26 | BufferedReader br = new BufferedReader(new FileReader(infile)); 27 | String line = null; 28 | while ((line = br.readLine()) != null) { 29 | ret.add(line); 30 | } 31 | br.close(); 32 | } catch (IOException e) { 33 | e.printStackTrace(); 34 | } 35 | return ret; 36 | } 37 | 38 | public static void processText(String text) { 39 | try { 40 | Corpus corpus = annie.processText(text); 41 | annie.setCorpus(corpus); 42 | annie.execute(); 43 | ArrayList entities = annie.getOthers(corpus); 44 | ArrayList locs = annie.getLocations(corpus); 45 | ArrayList orgs = annie.getOrganizations(corpus); 46 | ArrayList pers = annie.getPersons(corpus); 47 | if (!locs.isEmpty()) { 48 | for (Location loc : locs) { 49 | System.out.print(loc.getName() + "\t"); 50 | } 51 | } 52 | System.out.println(locs.size()); 53 | } catch (ResourceInstantiationException e) { 54 | e.printStackTrace(); 55 | } catch (GateException e) { 56 | e.printStackTrace(); 57 | } 58 | } 59 | 60 | public static void main(String args[]) { 61 | try { 62 | GATEExtractor.getInstance("C:/Users/wzh112/workspace/gate-7.1-build4485-BIN"); 63 | annie = new ANNIEAdvExtractor(); 64 | annie.initAnnie(); 65 | ArrayList tweets = input("tweets"); 66 | for (String str : tweets) { 67 | processText(str); 68 | } 69 | } catch (GateException e) { 70 | e.printStackTrace(); 71 | } catch (IOException e) { 72 | e.printStackTrace(); 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/utils/Analyzer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package edu.psu.ist.vaccine.geotxt.utils; 6 | 7 | import java.io.IOException; 8 | import java.net.URISyntaxException; 9 | import java.util.Map; 10 | import edu.psu.ist.vaccine.geotxt.ner.NamedEntities; 11 | import edu.psu.ist.vaccine.geotxt.ner.NerEngines; 12 | 13 | /** 14 | * 15 | * @author Morteza Karimzadeh 16 | */ 17 | public interface Analyzer { 18 | 19 | NamedEntities analyze(String text, NerEngines engine, Map context) throws IllegalArgumentException, URISyntaxException, URISyntaxException, IOException; 20 | 21 | } 22 | 23 | 24 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/utils/BBox.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.utils; 2 | 3 | public class BBox { 4 | 5 | private double minx; 6 | private double miny; 7 | private double maxx; 8 | private double maxy; 9 | private int numPoints = 0; 10 | private double multiplier = 0.001; 11 | 12 | public BBox() { 13 | this.minx = -180; 14 | this.miny = -90; 15 | this.maxx = 180; 16 | this.maxy = 90; 17 | } 18 | 19 | public void expand(double x, double y) { 20 | if (numPoints == 0) { 21 | minx = x-(Math.abs(x)*multiplier); 22 | miny = y-(Math.abs(y)*multiplier); 23 | maxx = x+(Math.abs(x)*multiplier); 24 | maxy = y+(Math.abs(y)*multiplier); 25 | } else { 26 | if (x < minx) { 27 | minx = x; 28 | } 29 | if (x > maxx) { 30 | maxx = x; 31 | } 32 | if (y < miny) { 33 | miny = y; 34 | } 35 | if (y > maxy) { 36 | maxy = y; 37 | } 38 | } 39 | minx = minx > -180 ? minx : -180; 40 | miny = miny > -90 ? miny : -90; 41 | maxx = maxx < 180 ? maxx : 180; 42 | maxy = maxy < 90 ? maxy : 90; 43 | numPoints++; 44 | } 45 | 46 | public void compress(double x, double y) { 47 | if (numPoints == 0) { 48 | minx = x-(Math.abs(x)*multiplier); 49 | miny = y-(Math.abs(y)*multiplier); 50 | maxx = x+(Math.abs(x)*multiplier); 51 | maxy = y+(Math.abs(y)*multiplier); 52 | } else { 53 | if (x > minx && x < maxx) { 54 | minx = x; 55 | } 56 | if (x < maxx && x > minx) { 57 | maxx = x; 58 | } 59 | if (y > miny && y < maxy) { 60 | miny = y; 61 | } 62 | if (y < maxy && y > miny) { 63 | maxy = y; 64 | } 65 | } 66 | minx = minx > -180 ? minx : -180; 67 | miny = miny > -90 ? miny : -90; 68 | maxx = maxx < 180 ? maxx : 180; 69 | maxy = maxy < 90 ? maxy : 90; 70 | numPoints++; 71 | } 72 | 73 | public double getMinx() { 74 | return minx; 75 | } 76 | 77 | public double getMiny() { 78 | return miny; 79 | } 80 | 81 | public double getMaxx() { 82 | return maxx; 83 | } 84 | 85 | public double getMaxy() { 86 | return maxy; 87 | } 88 | 89 | } 90 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/utils/Config.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.utils; 2 | 3 | import java.io.FileInputStream; 4 | import java.io.FileNotFoundException; 5 | import java.io.IOException; 6 | import java.io.InputStream; 7 | import java.util.Properties; 8 | 9 | /** 10 | * Factory class to provide access to configuration parameters. 11 | * 12 | * @author jow 13 | * 14 | */ 15 | public class Config { 16 | 17 | /** 18 | * Property set to get configuration parameters. Properties include: 19 | * gate_home - path to home folder of gate stanford_ner_classifier - path to 20 | * classifier used for stanford NER benchmark_instance_set - path to file or 21 | * folder containing the instance set for benchmarking 22 | */ 23 | private String gate_home = ""; 24 | private String stanford_ner = ""; 25 | private String opennlp_dir = ""; 26 | private String lingpipe_dir =""; 27 | private String mit_model = ""; 28 | 29 | /** 30 | * new Properties() Loads properties from file propertyFile. 31 | * 32 | * @param propertyFile 33 | * @throws FileNotFoundException 34 | * @throws IOException 35 | */ 36 | public Config() throws FileNotFoundException, IOException { 37 | // String propertyFile = ".properties"; 38 | String propertyFile = System.getProperty("user.dir") + "\\conf\\application.conf"; 39 | 40 | FileInputStream fis = new FileInputStream(propertyFile); 41 | // InputStream fis = Config.class.getResourceAsStream(propertyFile); 42 | 43 | Properties properties = new Properties(); 44 | properties.load(fis); 45 | fis.close(); 46 | 47 | // checking to see if the address has quotation marks in the beginning 48 | // or end and then remove them. 49 | // the application.conf of play requires the quotation marks if there is 50 | // a : in the addresses. 51 | gate_home = properties.getProperty("GATEHOME").replace("\"", ""); 52 | stanford_ner = properties.getProperty("STANFORDMODEL").replace("\"", ""); 53 | opennlp_dir = properties.getProperty("OPENNLPDIR").replace("\"", ""); 54 | lingpipe_dir = properties.getProperty("LINGPIPEMODEL").replace("\"", ""); 55 | setMit_dir(properties.getProperty("MITMODEL").replace("\"", "")); 56 | 57 | } 58 | 59 | public Config(String gateAddress, String stanfordAddress, String openNlpDir, String lingPipeDir, String mitDir) { 60 | 61 | gate_home = gateAddress; 62 | stanford_ner = stanfordAddress; 63 | opennlp_dir = openNlpDir; 64 | lingpipe_dir = lingPipeDir; 65 | setMit_dir(mitDir); 66 | } 67 | 68 | public Config(boolean localAddress) { 69 | 70 | gate_home = "C:/Programs/gate-8.1-build5169-BIN"; 71 | stanford_ner = "C:/Programs/Stanford/geovista-ner-model.ser.gz"; 72 | opennlp_dir = "C:/Programs/openNlp"; 73 | lingpipe_dir = "C:/Programs/lingpipe/ne-en-news-muc6.AbstractCharLmRescoringChunker"; 74 | setMit_dir("C:/Programs/mit/ner_model.dat"); 75 | } 76 | 77 | /** 78 | * @return the gate_home 79 | */ 80 | public String getGate_home() { 81 | return gate_home; 82 | } 83 | 84 | /** 85 | * @return the stanford_ner 86 | */ 87 | public String getStanford_ner() { 88 | return stanford_ner; 89 | } 90 | 91 | /** 92 | * @return the opennlp_dir 93 | */ 94 | public String getOpenNlpDir() { 95 | return opennlp_dir; 96 | } 97 | 98 | /** 99 | * @return the LINGPIPEHOME set in app.conf 100 | */ 101 | public String getLingPipeDir() { 102 | return lingpipe_dir; 103 | } 104 | 105 | public String getMit_dir() { 106 | return mit_model; 107 | } 108 | 109 | public void setMit_dir(String mit_dir) { 110 | this.mit_model = mit_dir; 111 | } 112 | 113 | // Test to see if you get the right addresses here. 114 | public static void main(String[] args) throws FileNotFoundException, IOException { 115 | Config config = new Config(); 116 | System.out.println(config.getGate_home()); 117 | System.out.println(config.getStanford_ner()); 118 | } 119 | 120 | 121 | } 122 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/utils/FileWriter.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.utils; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.FileOutputStream; 6 | import java.io.IOException; 7 | 8 | public class FileWriter { 9 | 10 | public static void writeFile(String value, String path) throws IOException { 11 | File file = new File(path); 12 | FileOutputStream fos = new FileOutputStream(file); 13 | if (!file.exists()) { 14 | file.createNewFile(); 15 | } 16 | 17 | byte[] contentInBytes = value.getBytes(); 18 | 19 | fos.write(contentInBytes); 20 | fos.flush(); 21 | fos.close(); 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/utils/GeocodingUtils.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.utils; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Comparator; 5 | import java.util.List; 6 | import java.util.PriorityQueue; 7 | 8 | import org.geonames.InsufficientStyleException; 9 | import org.geonames.Style; 10 | import org.geonames.Toponym; 11 | import org.geonames.ToponymSearchCriteria; 12 | import org.geonames.ToponymSearchResult; 13 | import org.geonames.WebService; 14 | 15 | import edu.psu.ist.vaccine.geotxt.hierarchy.MapHierarchyPlaces; 16 | 17 | public class GeocodingUtils { 18 | 19 | /** Comparator for sorting toponyms by decreasing population size **/ 20 | private static class ToponymPopulationComparator implements Comparator { 21 | 22 | public int compare(LocationWrapper x, LocationWrapper y) { 23 | double px = 0, py = 0; 24 | 25 | 26 | px = x.getPopulation(); 27 | 28 | py = y.getPopulation(); 29 | 30 | if (px < py) return 1; 31 | else if (px > py) return -1; 32 | 33 | return 0; 34 | } 35 | } 36 | 37 | 38 | 39 | /** Queries Geonames to get up to MAX_ROWS toponyms for name */ 40 | public static List getToponymsFromGeonames(String name, int MAX_ROWS) { 41 | 42 | ToponymSearchCriteria searchCriteria = new ToponymSearchCriteria(); 43 | searchCriteria.setMaxRows(MAX_ROWS); 44 | searchCriteria.setQ(name); 45 | searchCriteria.setStyle(Style.FULL); 46 | WebService.setUserName(MapHierarchyPlaces.username); 47 | 48 | ArrayList matches = new ArrayList(); 49 | 50 | ToponymSearchResult searchResult = null; 51 | try { 52 | searchResult = WebService.search(searchCriteria); 53 | 54 | for (int i = 0; i < searchResult.getToponyms().size(); i++) { 55 | matches.add(new LocationWrapperGeonamesToponym(searchResult.getToponyms().get(i))); 56 | } 57 | } catch(Exception e) { 58 | e.printStackTrace(); 59 | } 60 | 61 | return matches; 62 | } 63 | 64 | /** determines the NUM_BEST_MATCHES best results from those returned by geonames based on how well the names match and 65 | * population size 66 | */ 67 | public static List getBestCandidates(String name,List searchResult, int NUM_BEST_MATCHES) { 68 | 69 | /* best candidates from geonames entities whose name matches exactly */ 70 | PriorityQueue bestExactMatchingToponyms = new PriorityQueue(NUM_BEST_MATCHES, new ToponymPopulationComparator() ); 71 | 72 | /* best candidates from geonames entities who have an alternative name that matches exactly */ 73 | PriorityQueue bestExactMatchingAlternativeNamesToponyms = new PriorityQueue(NUM_BEST_MATCHES, new ToponymPopulationComparator() ); 74 | 75 | /* best candidates from geonames entities that dont fall into the first two categories */ 76 | PriorityQueue bestOtherToponyms = new PriorityQueue(NUM_BEST_MATCHES, new ToponymPopulationComparator() ); 77 | 78 | ArrayList best = new ArrayList(NUM_BEST_MATCHES); 79 | 80 | if (searchResult != null) { 81 | for (LocationWrapper t : searchResult) { // go through all query results 82 | 83 | if (t.getName().equalsIgnoreCase(name)) { // first category? 84 | 85 | bestExactMatchingToponyms.offer(t); 86 | 87 | } else if (bestExactMatchingToponyms.size() < NUM_BEST_MATCHES) { 88 | String[] names = null; 89 | 90 | if (t.getAlternateNames() != null) { 91 | names = t.getAlternateNames(); 92 | } 93 | 94 | 95 | boolean found = false; 96 | if (names != null) { 97 | for (String n : names) { 98 | if (n.equalsIgnoreCase(name)) { // second category? 99 | bestExactMatchingAlternativeNamesToponyms.offer(t); 100 | found = true; 101 | break; 102 | } 103 | } 104 | } 105 | 106 | if (!found && (bestExactMatchingToponyms.size() + bestExactMatchingAlternativeNamesToponyms.size() < NUM_BEST_MATCHES)) { // third category? 107 | bestOtherToponyms.offer(t); 108 | } 109 | } 110 | } 111 | 112 | // if we have multiple place names, draw from the categories get the best candidates to use in the hierarchical approach 113 | 114 | while (!bestExactMatchingToponyms.isEmpty() && best.size() < NUM_BEST_MATCHES) { 115 | best.add(bestExactMatchingToponyms.poll()); 116 | } 117 | while (!bestExactMatchingAlternativeNamesToponyms.isEmpty() && best.size() < NUM_BEST_MATCHES) { 118 | best.add(bestExactMatchingAlternativeNamesToponyms.poll()); 119 | } 120 | while (!bestOtherToponyms.isEmpty() && best.size() < NUM_BEST_MATCHES) { 121 | best.add(bestOtherToponyms.poll()); 122 | } 123 | } 124 | 125 | return best; 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/utils/HashtagProcessor.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.utils; 2 | import java.util.ArrayList; 3 | 4 | /** 5 | * THIS IS DIFFERENT FROM THE ONE IN GEOTXT. ENHANCED BASED ON THAT. Provides 6 | * auxiliary methods to pre-process a text message before feeding it into a NER 7 | * tool. 8 | * 9 | * @author jow 10 | * @author Morteza 11 | */ 12 | 13 | public class HashtagProcessor { 14 | 15 | private String hashtagRemoved; 16 | private ArrayList hashtagCharIndexes; 17 | private ArrayList capitalCharIndexes; 18 | 19 | public HashtagProcessor( ){ 20 | this.hashtagRemoved =""; 21 | this.hashtagCharIndexes = new ArrayList(); 22 | this.capitalCharIndexes = new ArrayList(); 23 | } 24 | 25 | public HashtagProcessor(String hashtagsRemoved, ArrayList hashtagCharIndexes, ArrayList capitalCharIndexes) { 26 | this.hashtagRemoved = hashtagsRemoved; 27 | this.hashtagCharIndexes = hashtagCharIndexes; 28 | this.capitalCharIndexes = capitalCharIndexes; 29 | } 30 | 31 | public String getHashtagRemoved() { 32 | return hashtagRemoved; 33 | } 34 | 35 | public ArrayList getHashtagCharIndexes() { 36 | return hashtagCharIndexes; 37 | } 38 | 39 | public ArrayList getCapitalCharIndexes() { 40 | return capitalCharIndexes; 41 | } 42 | 43 | /** 44 | * Filters out # and splits hash tags where an upper-case letter follow an 45 | * lower-case letter in a hash tag. returns the resulting string, charindex 46 | * of hashtags and charindex of capital letters. 47 | * 48 | * @param text 49 | * message to be pre-processed 50 | * @return message with # removed and split hash tags 51 | */ 52 | public static HashtagProcessor processHashTags(String text) { 53 | 54 | int capsCount = 0; 55 | 56 | ArrayList hashtagCharIndexes = new ArrayList(); 57 | ArrayList capitalCharIndexes = new ArrayList(); 58 | 59 | boolean newWord = true; 60 | boolean hashtag = false; 61 | boolean previousIsLowerCase = false; 62 | 63 | char[] newString = new char[text.length() * 2]; 64 | 65 | int count = 0; 66 | for (int i = 0; i < text.length(); i++) { 67 | 68 | char c = text.charAt(i); 69 | 70 | if (Character.isWhitespace(c)) { // new words starts 71 | newWord = true; 72 | hashtag = false; 73 | newString[count] = c; 74 | count++; 75 | } else { 76 | if (newWord && c == '#') { // new hashtag starts here; skip # 77 | hashtag = true; 78 | hashtagCharIndexes.add(i); 79 | } else if (hashtag && previousIsLowerCase && Character.isUpperCase(c)) { // split 80 | capitalCharIndexes.add(i); 81 | capsCount++; 82 | newString[count] = ' '; 83 | newString[count + 1] = c; 84 | count += 2; 85 | } else { 86 | newString[count] = c; 87 | count++; 88 | } 89 | newWord = false; 90 | } 91 | previousIsLowerCase = Character.isLowerCase(c); 92 | } 93 | 94 | // System.out.println(capsCount); 95 | // System.out.println(hashtagCharIndexes); 96 | // System.out.println(capitalCharIndexes); 97 | 98 | HashtagProcessor processed = new HashtagProcessor(new String(newString, 0, count), hashtagCharIndexes, capitalCharIndexes); 99 | 100 | return processed; 101 | 102 | } 103 | 104 | public static final void main(String[] args) { 105 | 106 | System.out.println(">" + processHashTags("RT @shadihamid: US strikes in #Syria have badly undermined").getHashtagCharIndexes() + "<"); 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/utils/LocationWrapper.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.utils; 2 | 3 | import java.util.List; 4 | 5 | public interface LocationWrapper { 6 | public Long getPopulation(); 7 | public String getName(); 8 | public String[] getAlternateNames(); 9 | public Double getLatitude(); 10 | public Double getLongitude(); 11 | public Float getScore(); 12 | public Long getGeoNameId(); 13 | public List getHierarchy(); 14 | public String getCountryCode(); 15 | public String getFeatureCode(); 16 | public String getFeatureClass(); 17 | } 18 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/utils/LocationWrapperGeonamesToponym.java: -------------------------------------------------------------------------------- 1 | package edu.psu.ist.vaccine.geotxt.utils; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.geonames.InsufficientStyleException; 7 | import org.geonames.Toponym; 8 | 9 | public class LocationWrapperGeonamesToponym implements LocationWrapper { 10 | 11 | protected Toponym toponym; 12 | 13 | public LocationWrapperGeonamesToponym(Toponym toponym) { 14 | this.toponym = toponym; 15 | } 16 | 17 | @Override 18 | public Long getPopulation() { 19 | Long population = null; 20 | try { 21 | population = toponym.getPopulation(); 22 | } catch (InsufficientStyleException e) { 23 | } 24 | if (population == null) { 25 | population = -1L; 26 | } 27 | return population; 28 | } 29 | 30 | @Override 31 | public String getName() { 32 | return toponym.getName(); 33 | } 34 | 35 | @Override 36 | public String[] getAlternateNames() { 37 | String[] alternateName = null; 38 | try { 39 | alternateName = toponym.getAlternateNames().split("\\s*,\\s*"); 40 | } catch (InsufficientStyleException e) { 41 | } 42 | if (alternateName == null) { 43 | alternateName = new String[0]; 44 | } 45 | return alternateName; 46 | } 47 | 48 | @Override 49 | public Double getLatitude() { 50 | return toponym.getLatitude(); 51 | } 52 | 53 | @Override 54 | public Double getLongitude() { 55 | return toponym.getLongitude(); 56 | } 57 | 58 | @Override 59 | public Long getGeoNameId() { 60 | return (long) (toponym.getGeoNameId()); 61 | } 62 | 63 | @Override 64 | public List getHierarchy() { 65 | return this.getHierarchy(); 66 | } 67 | 68 | public void setHierarchy(List hierarchy) { 69 | this.setHierarchy(hierarchy); 70 | } 71 | 72 | @Override 73 | public Float getScore() { 74 | throw new UnsupportedOperationException(); 75 | } 76 | 77 | @Override 78 | public String getCountryCode() { 79 | return toponym.getCountryCode(); 80 | } 81 | 82 | @Override 83 | public String getFeatureCode() { 84 | return toponym.getFeatureCodeName(); 85 | 86 | } 87 | 88 | @Override 89 | public String getFeatureClass() { 90 | return toponym.getFeatureClassName(); 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/utils/PointGeometry.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package edu.psu.ist.vaccine.geotxt.utils; 6 | 7 | import java.math.BigInteger; 8 | 9 | /** 10 | * this is not used as the output of Geocoder. Also, the Location class and 11 | * Organization class have a member of this class, which is populated by GeoTxt. 12 | * 13 | * @author MortezaKarimzadeh 14 | */ 15 | public class PointGeometry { 16 | 17 | // GeoJSON should be similar to { "type": "Point", "coordinates": [100.0, 0.0] } 18 | protected String type = "Point"; 19 | protected String toponym = null; 20 | protected BigInteger geoNameId = null; 21 | protected double[] coordinates = new double[2]; 22 | 23 | public String getType() { 24 | return type; 25 | } 26 | 27 | // public void setType(String type) { 28 | // this.type = type; 29 | // } 30 | public String getToponym() { 31 | return toponym; 32 | } 33 | 34 | public void setToponym(String type) { 35 | this.toponym = type; 36 | } 37 | 38 | public BigInteger getGeoNameId() { 39 | return geoNameId; 40 | } 41 | 42 | public void setGeoNameId(BigInteger geoNameId) { 43 | this.geoNameId = geoNameId; 44 | } 45 | 46 | public double[] getCoordinates() { 47 | return coordinates; 48 | } 49 | 50 | public void setCoordinates(double[] coordinates) { 51 | this.coordinates = coordinates; 52 | } 53 | 54 | public PointGeometry(double lon, double lat, BigInteger geoNameId) { 55 | this.coordinates[0] = lon; 56 | this.coordinates[1] = lat; 57 | this.geoNameId = geoNameId; 58 | 59 | } 60 | 61 | public PointGeometry(String toponym, double lon, double lat, BigInteger geoNameId) { 62 | this(lon, lat, geoNameId); 63 | this.toponym = toponym; 64 | } 65 | 66 | @Override 67 | public String toString() { 68 | return "Point with toponym " + toponym + ", and GeoNameId of " + geoNameId + " at Lat, Long (" + coordinates[1] + " " + coordinates[0] + ")"; 69 | 70 | //"GeoLocated" + type + " -->" + " Longitude: " + coordinates[0] + ", Latitude: " + coordinates[1] + ", GeoNameId: " + geoNameId.toString(); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/utils/SortedAnnotationList.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | 6 | package edu.psu.ist.vaccine.geotxt.utils; 7 | 8 | import gate.Annotation; 9 | import java.util.Vector; 10 | 11 | /** 12 | * 13 | * @author ajaiswal 14 | */ 15 | public class SortedAnnotationList extends Vector { 16 | public SortedAnnotationList() { 17 | super(); 18 | } // SortedAnnotationList 19 | 20 | public boolean addSortedExclusive(Annotation annot) { 21 | Annotation currAnot = null; 22 | 23 | // overlapping check 24 | for (int i=0; i" + processHashTags("I live in #NewYork, I like it there.") + "<"); 59 | System.out.println(">" + processHashTags("#NewYork is great.") + "<"); 60 | System.out.println(">" + processHashTags("#YaYaYa #") + "<"); 61 | System.out.println(">" + processHashTags("#NY #") + "<"); 62 | System.out.println(">" + processHashTags("#PrayForGaza") + "<"); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /app/edu/psu/ist/vaccine/geotxt/utils/TwitterStreamCollection.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This is a test class used to inspect the OutofMemory exception. It will be used to fetch tweets from twitter and feed them to GeoTxt, Gate ANNIE NER or Stanford NER to find the potential memory leaks. 3 | */ 4 | package edu.psu.ist.vaccine.geotxt.utils; 5 | 6 | import edu.psu.ist.vaccine.geotxt.api.GeoTxtApi; 7 | import twitter4j.*; 8 | 9 | import java.io.FileWriter; 10 | import java.io.IOException; 11 | import java.net.URISyntaxException; 12 | import java.util.logging.Level; 13 | import java.util.logging.Logger; 14 | 15 | import twitter4j.conf.ConfigurationBuilder; 16 | 17 | /** 18 | * 19 | * @author Morteza Karimzadeh 20 | */ 21 | public class TwitterStreamCollection { 22 | 23 | static ConfigurationBuilder cb = new ConfigurationBuilder(); 24 | 25 | public static void main(String[] args) throws TwitterException, IllegalArgumentException, URISyntaxException, IOException { 26 | 27 | 28 | 29 | //get the physical address of Stanford an Gate 30 | final Config config = new Config(); 31 | 32 | //MapHierarchyPlaces.username = "siddhartha"; // put your geonames user name here 33 | 34 | //Create an instance of GeoTxt 35 | //final GeoTxtApi geoTxtApi = new GeoTxtApi(Config.properties.getProperty("gate_home"), Config.properties.getProperty("stanford_ner_classifier")); 36 | 37 | //create basic Gate Analyzer 38 | //final BasicGateAnalyzer gate = new BasicGateAnalyzer(Config.properties.getProperty("gate_home")); 39 | 40 | 41 | //Twitter Authentication (OAuth) 42 | cb.setDebugEnabled(true) 43 | .setOAuthConsumerKey("z9V9ODs2145XfgWc7gBhxQ") 44 | .setOAuthConsumerSecret("QRgRGpgNWgLATp3aNFYvC7PyUfltYxzKUpEBSD2w") 45 | .setOAuthAccessToken("904552788-A6e3LFOZxyf8grexUyOl53kkaCSOlrkne8EjENpP") 46 | .setOAuthAccessTokenSecret("X1iQKwWCEYgC7OskIR0CZXThJ5cMVLM4Un9bsNXhBI"); 47 | 48 | 49 | 50 | 51 | StatusListener listener = new StatusListener() { 52 | //create basic Stanford Analyzer 53 | // BasicStanfordAnalyzer stanford = new BasicStanfordAnalyzer(config.getStanford_ner()); 54 | // BasicGateAnalyzer gate = new BasicGateAnalyzer(config.getGate_home()); 55 | GeoTxtApi geoTxtApi = new GeoTxtApi(config.getGate_home(), config.getStanford_ner(), true, config.getOpenNlpDir(), config.getLingPipeDir(), config.getMit_dir()); 56 | FileWriter fw = new FileWriter("tweets.txt"); 57 | //Overriding StatusListener several abstract methods. 58 | //Use this part to process incoming tweets. You can use either GeoTxt, Stanford NER or Gate NER 59 | 60 | @Override 61 | public void onStatus(Status status) { 62 | 63 | 64 | System.out.println("@" + status.getUser().getScreenName() + " - " + status.getText()); 65 | 66 | try { 67 | //fw.write(">>>>>" + status.getText() + "\n"); 68 | //test GeoTxt API 69 | // System.out.println(gate.gate.tagAlltoGeoJson(status.getText())); 70 | // System.out.println(geoTxtApi.geoCodeToGeoJson(status.getText(), "gate")); 71 | //System.out.println(geoTxtApi.geoCodeToGeoJson(status.getText(), "gateh", false, 0, false,false)); 72 | System.out.println(geoTxtApi.geoCodeToGeoJson(status.getText(), "cogcomp", true, 100, true,true)); 73 | // System.out.println(geoTxtApi.geoCodeToGeoJson(status.getText(), "stanford")); 74 | // System.out.println(geoTxtApi.geoCodeToGeoJson(status.getText(), "stanfordh")); 75 | // System.out.println(geoTxtApi.geoCodeToGeoJson(status.getText(), "stanfords")); 76 | } catch (IllegalArgumentException ex) { 77 | Logger.getLogger(TwitterStreamCollection.class.getName()).log(Level.SEVERE, null, ex); 78 | // } 79 | } catch (URISyntaxException ex) { 80 | Logger.getLogger(TwitterStreamCollection.class.getName()).log(Level.SEVERE, null, ex); 81 | } catch (IOException ex) { 82 | Logger.getLogger(TwitterStreamCollection.class.getName()).log(Level.SEVERE, null, ex); 83 | } 84 | 85 | 86 | //test basic Stanford NER 87 | // System.out.println(stanford.st.tagAlltoDoc(status.getText())); 88 | 89 | //test basic Gate ANNIE 90 | // System.out.println(gate.gate.tagAlltoDoc(status.getText())); 91 | 92 | } 93 | 94 | @Override 95 | public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { 96 | System.out.println("Got a status deletion notice id:" + statusDeletionNotice.getStatusId()); 97 | } 98 | 99 | @Override 100 | public void onTrackLimitationNotice(int numberOfLimitedStatuses) { 101 | System.out.println("Got track limitation notice:" + numberOfLimitedStatuses); 102 | } 103 | 104 | @Override 105 | public void onScrubGeo(long userId, long upToStatusId) { 106 | System.out.println("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId); 107 | } 108 | 109 | @Override 110 | public void onStallWarning(StallWarning warning) { 111 | System.out.println("Got stall warning:" + warning); 112 | } 113 | 114 | @Override 115 | public void onException(Exception ex) { 116 | ex.printStackTrace(); 117 | try { 118 | fw.close(); 119 | } catch (Exception e) { 120 | } 121 | } 122 | }; 123 | 124 | 125 | 126 | TwitterStream twitterStream = new TwitterStreamFactory(cb.build()).getInstance(); 127 | twitterStream.addListener(listener); 128 | twitterStream.sample(); 129 | 130 | 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /app/views/addLocationUi.scala.html: -------------------------------------------------------------------------------- 1 | @(title: String) 2 | 3 | 4 | 5 | 6 | 7 | 8 | @title 9 | 10 | 11 | 12 | 13 | 14 | 23 | 28 |
29 | 30 | 31 |
32 | See documentation. For questions, 33 | please contact Morteza Karimzadeh of the GeoVISTA 37 | Center. 38 |
39 |
40 | 41 | 62 | -------------------------------------------------------------------------------- /app/views/codingHistoryUi.scala.html: -------------------------------------------------------------------------------- 1 | @(title: String) 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | @title 10 | 11 | 12 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 |
49 | 50 | 51 | -------------------------------------------------------------------------------- /app/views/document.scala.html: -------------------------------------------------------------------------------- 1 | @(title: String) 2 | 3 | @head(title) { 4 |
5 |
6 |

@title

7 |
8 |
9 |
10 |
11 |

GeoTxt

12 |
URL:

http://geotxt.org/v2/api/geotxt.json

13 |
Type:

GET

14 |
Parameters:
15 | 16 | 17 | 18 | 19 | 26 | 27 | 28 | 29 | 30 | 31 | 32 |
mChoose from two NER engines and multiple methods that yield 6 different choices: use (1) "gates" (without quotation marks) for Gate and 20 | our improved ranking scheme, (2) "stanfords" for Stanford NER and our improved ranking scheme, or (3) "gate" or (4) "stanford" 21 | for default GeoNames ranking scheme with each NER engine, or (5) "gateh" or (6) "stanfordh" to enable place name disambiguation. 22 | PLEASE NOTE that "stanfords" and "gates" are recommended methods, while "gateh" and "stanfordh" enable context-based disambiguatioin. 23 | Use "gate" and "stanford" only if other four methods do not produce desired results. These two latter methods are not recommended. 24 | 25 |
qA UTF-8, URL-encoded search query of 3,900 characters maximum, including operators.
33 |
Return:
34 |

35 | The API will return a GeoJSON FeatureCollection. The NER engines will extract three types of entities: locations, 36 | organizations and persons. Locations will be Geo-located using GeoNames. Please refer to the 37 | example output. 38 |

39 |
Example Request
40 |
http://geotxt.org/v2/api/geotxt.json?m=stanfords&q=Edwin Ernesto Rivera Gracias was in El Salvador and after voluntarily agreeing to return to the United States to face charges he was flown to Denver on Wednesday, according to the FBI. He surrendered to Salvadoran authorities and FBI agents on Tuesday, said FBI spokesman Dave Joly.
41 |
42 |
 43 | 						
 44 | {
 45 |     "type": "FeatureCollection",
 46 |     "features": [
 47 |         {
 48 |             "type": "Feature",
 49 |             "properties": {
 50 |                 "type": "location",				//	this is a location
 51 |                 "name": "el salvador",				//	name of the location as appears in the input text
 52 |                 "toponym": "El Salvador",			//	GeoNames official name for the geo-located place
 53 |                 "geoNameId": 3585968,				//	Universal GeoNameId of the geo-located place
 54 |                 "locationType": null,				//	type of location 
 55 |                 "positions": [					//	position(s) of the entity in original query
 56 |                     36
 57 |                 ]
 58 |             },
 59 |             "geometry": {					//	geometry info of the location
 60 |                 "type": "Point",
 61 |                 "coordinates": [
 62 |                     -88.91667,
 63 |                     13.83333
 64 |                 ]
 65 |             }
 66 |         },
 67 |         {
 68 |             "type": "Feature",
 69 |             "properties": {
 70 |                 "type": "location",
 71 |                 "name": "united states",
 72 |                 "toponym": "United States",
 73 |                 "geoNameId": 6252001,
 74 |                 "locationType": null,
 75 |                 "positions": [
 76 |                     96
 77 |                 ]
 78 |             },
 79 |             "geometry": {
 80 |                 "type": "Point",
 81 |                 "coordinates": [
 82 |                     -98.5,
 83 |                     39.76
 84 |                 ]
 85 |             }
 86 |         },
 87 |         {
 88 |             "type": "Feature",
 89 |             "properties": {
 90 |                 "type": "location",
 91 |                 "name": "denver",
 92 |                 "toponym": "Denver",
 93 |                 "geoNameId": 5419384,
 94 |                 "locationType": null,
 95 |                 "positions": [
 96 |                     142
 97 |                 ]
 98 |             },
 99 |             "geometry": {
100 |                 "type": "Point",
101 |                 "coordinates": [
102 |                     -104.9847,
103 |                     39.73915
104 |                 ]
105 |             }
106 |         },
107 |         {
108 |             "type": "Feature",
109 |             "properties": {
110 |                 "type": "organization",				//	this is a organization
111 |                 "name": "fbi",					//	name of the organization
112 |                 "toponym": null,				//	GeoName official name not found 
113 |                 "geoNameId": null,				//	GeoName ID not found 
114 |                 "organizationType": null,			// 	type of the organization 
115 |                 "positions": [					//	positions of the entity in original query
116 |                     180,
117 |                     230,
118 |                     258
119 |                 ]
120 |             },
121 |             "geometry": null
122 |         },
123 |         {
124 |             "type": "Feature",
125 |             "properties": {
126 |                 "type": "person",					//	this is a person
127 |                 "name": "edwin ernesto rivera gracias",			//	name of the person
128 |                 "personType": null,					//	type of the person
129 |                 "gender": null,						//	gender of the person
130 |                 "kind": null,						//	kind of the person
131 |                 "positions": [						// 	position of the entity in original query
132 |                     0
133 |                 ]
134 |             },
135 |             "geometry": null
136 |         },
137 |         {
138 |             "type": "Feature",
139 |             "properties": {
140 |                 "type": "person",
141 |                 "name": "dave joly",
142 |                 "personType": null,
143 |                 "gender": null,
144 |                 "kind": null,
145 |                 "positions": [
146 |                     272
147 |                 ]
148 |             },
149 |             "geometry": null
150 |         }
151 |     ]
152 | }						
153 | 					
154 |
155 |
156 |
157 | 158 | 159 |
160 | } -------------------------------------------------------------------------------- /app/views/head.scala.html: -------------------------------------------------------------------------------- 1 | @(title: String)(content: Html) 2 | 3 | 4 | 5 | 6 | 7 | @title 8 | 9 | 10 | 11 | 12 | 13 | 14 | @content 15 | 16 | 17 | -------------------------------------------------------------------------------- /app/views/main.scala.html: -------------------------------------------------------------------------------- 1 | @(title: String)(content: Html) 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | @title 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 |
23 |
24 | 26 | 29 | 30 |
31 |
32 |
33 | 34 |

If you use GeoTxt API in your work, or wish to cite it, please cite the following publication:

35 |

Morteza Karimzadeh, Wenyi Huang, Siddhartha Banerjee, Jan Oliver Wallgrün, Frank Hardisty, Scott Pezanowski,
36 | Prasenjit Mitra, and Alan M. MacEachren. 2013. GeoTxt: a web API to leverage place references in text. In Proceedings of
37 | the 7th Workshop on Geographic Information Retrieval (GIR '13) ACM, New York, NY, USA, 72-73.
38 | DOI=http://dx.doi.org/10.1145/2533888.2533942

39 | 40 | 47 | 48 | 49 |
@content 50 | 51 | 52 |
53 | See documentation. For questions, please contact Morteza Karimzadeh of the GeoVISTA 55 | Center. 56 |
57 |
58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /bin/application.conf: -------------------------------------------------------------------------------- 1 | # This is the main configuration file for the application. 2 | # ~~~~~ 3 | 4 | # Secret key 5 | # ~~~~~ 6 | # The secret key is used to secure cryptographics functions. 7 | # 8 | # This must be changed for production, but we recommend not changing it in this file. 9 | # 10 | # See http://www.playframework.com/documentation/latest/ApplicationSecret for more details. 11 | application.secret="7pQg;Lfgw8n]JJ:=?bQ/@?IGtm3Z]2raarOM0L/@F:r_G>MP2b_:^l?KQUG?TQyd" 12 | 13 | # The application languages 14 | # ~~~~~ 15 | application.langs="en" 16 | 17 | # Global object class 18 | # ~~~~~ 19 | # Define the Global object class for this application. 20 | # Default to Global in the root package. 21 | # application.global=Global 22 | 23 | # Router 24 | # ~~~~~ 25 | # Define the Router object to use for this application. 26 | # This router will be looked up first when the application is starting up, 27 | # so make sure this is the entry point. 28 | # Furthermore, it's assumed your route file is named properly. 29 | # So for an application router like `conf/my.application.Router`, 30 | # you may need to define a router file `my.application.routes`. 31 | # Default to Routes in the root package (and `conf/routes`) 32 | # application.router=my.application.Routes 33 | 34 | # Database configuration 35 | # ~~~~~ 36 | # You can declare as many datasources as you want. 37 | # By convention, the default datasource is named `default` 38 | # 39 | # db.default.driver=org.h2.Driver 40 | # db.default.url="jdbc:h2:mem:play" 41 | # db.default.user=sa 42 | # db.default.password="" 43 | # 44 | # You can expose this datasource via JNDI if needed (Useful for JPA) 45 | # db.default.jndiName=DefaultDS 46 | 47 | # Evolutions 48 | # ~~~~~ 49 | # You can disable evolutions if needed 50 | # evolutionplugin=disabled 51 | 52 | # Ebean configuration 53 | # ~~~~~ 54 | # You can declare as many Ebean servers as you want. 55 | # By convention, the default server is named `default` 56 | # 57 | # ebean.default="models.*" 58 | 59 | # Logger 60 | # ~~~~~ 61 | # You can also configure logback (http://logback.qos.ch/), 62 | # by providing an application-logger.xml file in the conf directory. 63 | 64 | # Root logger: 65 | logger.root=ERROR 66 | 67 | # Logger used by the framework: 68 | logger.play=INFO 69 | 70 | # Logger provided to your application: 71 | logger.application=DEBUG 72 | 73 | 74 | #Make play parse longer texts than the default 100kb 75 | parsers.text.maxLength=10000k 76 | 77 | #maxHeaderSize = 8192 this is the default 78 | 79 | 80 | # File path 81 | #GATEHOME = /r1/geotxt/gate-8.1-build5169-BIN 82 | STANFORDMODEL = "C:/Programs/Stanford/english.all.3class.distsim.crf.ser.gz" 83 | #STANFORDMODEL = /r1/geotxt/StanfordNER/english.all.3class.caseless.distsim.crf.ser.gz 84 | #STANFORDMODEL = /r2/sp3/StanfordNER/english.all.3class.caseless.distsim.crf.ser.gz 85 | GATEHOME = "C:/Programs/gate-8.4.1-build5753-BIN" 86 | OPENNLPDIR = "C:/Programs/openNlp/" 87 | LINGPIPEMODEL = "C:/Programs/lingpipe/ne-en-news-muc6.AbstractCharLmRescoringChunker" 88 | LOCATIONFILES = /r1/geotxt/locationfiles 89 | #LOCATIONFILES = "G:/temp" 90 | 91 | 92 | # ResultCollector Service URL 93 | RESULTCOLLECTOR = "http://localhost:8080" 94 | 95 | application.context="/v2/" 96 | -------------------------------------------------------------------------------- /bin/views/addLocationUi.scala.html: -------------------------------------------------------------------------------- 1 | @(title: String) 2 | 3 | 4 | 5 | 6 | 7 | 8 | @title 9 | 10 | 11 | 12 | 13 | 14 | 23 | 28 |
29 | 30 | 31 |
32 | See documentation. For questions, 33 | please contact Morteza Karimzadeh of the GeoVISTA 37 | Center. 38 |
39 |
40 | 41 | 62 | -------------------------------------------------------------------------------- /bin/views/codingHistoryUi.scala.html: -------------------------------------------------------------------------------- 1 | @(title: String) 2 | 3 | 4 | 5 | 6 | 7 | 8 | @title 9 | 10 | 11 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 |
51 | 52 | -------------------------------------------------------------------------------- /bin/views/document.scala.html: -------------------------------------------------------------------------------- 1 | @(title: String) 2 | 3 | @head(title) { 4 |
5 |
6 |

@title

7 |
8 |
9 |
10 |
11 |

GeoTxt

12 |
URL:

http://geotxt.org/v2/api/geotxt.json

13 |
Type:

GET

14 |
Parameters:
15 | 16 | 17 | 18 | 19 | 26 | 27 | 28 | 29 | 30 | 31 | 32 |
mChoose from two NER engines and multiple methods that yield 6 different choices: use (1) "gates" (without quotation marks) for Gate and 20 | our improved ranking scheme, (2) "stanfords" for Stanford NER and our improved ranking scheme, or (3) "gate" or (4) "stanford" 21 | for default GeoNames ranking scheme with each NER engine, or (5) "gateh" or (6) "stanfordh" to enable place name disambiguation. 22 | PLEASE NOTE that "stanfords" and "gates" are recommended methods, while "gateh" and "stanfordh" enable context-based disambiguatioin. 23 | Use "gate" and "stanford" only if other four methods do not produce desired results. These two latter methods are not recommended. 24 | 25 |
qA UTF-8, URL-encoded search query of 3,900 characters maximum, including operators.
33 |
Return:
34 |

35 | The API will return a GeoJSON FeatureCollection. The NER engines will extract three types of entities: locations, 36 | organizations and persons. Locations will be Geo-located using GeoNames. Please refer to the 37 | example output. 38 |

39 |
Example Request
40 |
http://geotxt.org/v2/api/geotxt.json?m=stanfords&q=Edwin Ernesto Rivera Gracias was in El Salvador and after voluntarily agreeing to return to the United States to face charges he was flown to Denver on Wednesday, according to the FBI. He surrendered to Salvadoran authorities and FBI agents on Tuesday, said FBI spokesman Dave Joly.
41 |
42 |
 43 | 						
 44 | {
 45 |     "type": "FeatureCollection",
 46 |     "features": [
 47 |         {
 48 |             "type": "Feature",
 49 |             "properties": {
 50 |                 "type": "location",				//	this is a location
 51 |                 "name": "el salvador",				//	name of the location as appears in the input text
 52 |                 "toponym": "El Salvador",			//	GeoNames official name for the geo-located place
 53 |                 "geoNameId": 3585968,				//	Universal GeoNameId of the geo-located place
 54 |                 "locationType": null,				//	type of location 
 55 |                 "positions": [					//	position(s) of the entity in original query
 56 |                     36
 57 |                 ]
 58 |             },
 59 |             "geometry": {					//	geometry info of the location
 60 |                 "type": "Point",
 61 |                 "coordinates": [
 62 |                     -88.91667,
 63 |                     13.83333
 64 |                 ]
 65 |             }
 66 |         },
 67 |         {
 68 |             "type": "Feature",
 69 |             "properties": {
 70 |                 "type": "location",
 71 |                 "name": "united states",
 72 |                 "toponym": "United States",
 73 |                 "geoNameId": 6252001,
 74 |                 "locationType": null,
 75 |                 "positions": [
 76 |                     96
 77 |                 ]
 78 |             },
 79 |             "geometry": {
 80 |                 "type": "Point",
 81 |                 "coordinates": [
 82 |                     -98.5,
 83 |                     39.76
 84 |                 ]
 85 |             }
 86 |         },
 87 |         {
 88 |             "type": "Feature",
 89 |             "properties": {
 90 |                 "type": "location",
 91 |                 "name": "denver",
 92 |                 "toponym": "Denver",
 93 |                 "geoNameId": 5419384,
 94 |                 "locationType": null,
 95 |                 "positions": [
 96 |                     142
 97 |                 ]
 98 |             },
 99 |             "geometry": {
100 |                 "type": "Point",
101 |                 "coordinates": [
102 |                     -104.9847,
103 |                     39.73915
104 |                 ]
105 |             }
106 |         },
107 |         {
108 |             "type": "Feature",
109 |             "properties": {
110 |                 "type": "organization",				//	this is a organization
111 |                 "name": "fbi",					//	name of the organization
112 |                 "toponym": null,				//	GeoName official name not found 
113 |                 "geoNameId": null,				//	GeoName ID not found 
114 |                 "organizationType": null,			// 	type of the organization 
115 |                 "positions": [					//	positions of the entity in original query
116 |                     180,
117 |                     230,
118 |                     258
119 |                 ]
120 |             },
121 |             "geometry": null
122 |         },
123 |         {
124 |             "type": "Feature",
125 |             "properties": {
126 |                 "type": "person",					//	this is a person
127 |                 "name": "edwin ernesto rivera gracias",			//	name of the person
128 |                 "personType": null,					//	type of the person
129 |                 "gender": null,						//	gender of the person
130 |                 "kind": null,						//	kind of the person
131 |                 "positions": [						// 	position of the entity in original query
132 |                     0
133 |                 ]
134 |             },
135 |             "geometry": null
136 |         },
137 |         {
138 |             "type": "Feature",
139 |             "properties": {
140 |                 "type": "person",
141 |                 "name": "dave joly",
142 |                 "personType": null,
143 |                 "gender": null,
144 |                 "kind": null,
145 |                 "positions": [
146 |                     272
147 |                 ]
148 |             },
149 |             "geometry": null
150 |         }
151 |     ]
152 | }						
153 | 					
154 |
155 |
156 |
157 | 158 | 159 |
160 | } -------------------------------------------------------------------------------- /bin/views/head.scala.html: -------------------------------------------------------------------------------- 1 | @(title: String)(content: Html) 2 | 3 | 4 | 5 | 6 | 7 | @title 8 | 9 | 10 | 11 | 12 | 13 | 14 | @content 15 | 16 | 17 | -------------------------------------------------------------------------------- /bin/views/main.scala.html: -------------------------------------------------------------------------------- 1 | @(title: String)(content: Html) 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | @title 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 |
23 |
24 | 26 | 29 | 30 |
31 |
32 |
33 |
34 | 35 | 42 |
@content 43 | 44 | 45 |
46 | See documentation. For questions, please contact Morteza Karimzadeh of the GeoVISTA 48 | Center. 49 |
50 |
51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | name := """GeoTxtWeb""" 2 | 3 | version := "3.0-SNAPSHOT" 4 | 5 | lazy val root = (project in file(".")).enablePlugins(PlayJava) 6 | 7 | scalaVersion := "2.11.1" 8 | 9 | libraryDependencies ++= Seq( 10 | javaJdbc, 11 | javaEbean, 12 | cache, 13 | javaWs 14 | ) 15 | 16 | libraryDependencies ++= Seq( 17 | "edu.stanford.nlp" % "stanford-corenlp" % "3.9.1", 18 | "edu.illinois.cs.cogcomp" % "illinois-ner" % "4.0.3", 19 | "org.apache.opennlp" % "opennlp-tools" % "1.8.4", 20 | "uk.ac.gate" % "gate-core" % "8.4.1", 21 | "org.postgresql" % "postgresql" % "42.2.1", 22 | "org.apache.solr" % "solr-solrj" % "6.6.0", 23 | "com.googlecode.json-simple" % "json-simple" % "1.1.1", 24 | "org.twitter4j" % "twitter4j-core" % "4.0.6", 25 | "org.twitter4j" % "twitter4j-stream" % "4.0.6", 26 | "org.apache.commons" % "commons-csv" % "1.5", 27 | "de.julielab" % "aliasi-lingpipe" % "4.1.0", 28 | "commons-httpclient" % "commons-httpclient" % "3.1", 29 | "org.apache.directory.studio" % "org.apache.commons.collections" % "3.2.1", 30 | "com.bericotech" % "clavin" % "2.1.0", 31 | "edu.mit.ll" % "mitie" % "0.8" 32 | ) 33 | 34 | resolvers += "CogcompSoftware" at "http://cogcomp.cs.illinois.edu/m2repo/" -------------------------------------------------------------------------------- /conf/application.conf: -------------------------------------------------------------------------------- 1 | # This is the main configuration file for the application. 2 | # ~~~~~ 3 | 4 | # Secret key 5 | # ~~~~~ 6 | # The secret key is used to secure cryptographics functions. 7 | # 8 | # This must be changed for production, but we recommend not changing it in this file. 9 | # 10 | # See http://www.playframework.com/documentation/latest/ApplicationSecret for more details. 11 | application.secret="7pQg;Lfgw8n]JJ:=?bQ/@?IGtm3Z]2raarOM0L/@F:r_G>MP2b_:^l?KQUG?TQyd" 12 | 13 | # The application languages 14 | # ~~~~~ 15 | application.langs="en" 16 | 17 | # Global object class 18 | # ~~~~~ 19 | # Define the Global object class for this application. 20 | # Default to Global in the root package. 21 | # application.global=Global 22 | 23 | # Router 24 | # ~~~~~ 25 | # Define the Router object to use for this application. 26 | # This router will be looked up first when the application is starting up, 27 | # so make sure this is the entry point. 28 | # Furthermore, it's assumed your route file is named properly. 29 | # So for an application router like `conf/my.application.Router`, 30 | # you may need to define a router file `my.application.routes`. 31 | # Default to Routes in the root package (and `conf/routes`) 32 | # application.router=my.application.Routes 33 | 34 | # Database configuration 35 | # ~~~~~ 36 | # You can declare as many datasources as you want. 37 | # By convention, the default datasource is named `default` 38 | # 39 | # db.default.driver=org.h2.Driver 40 | # db.default.url="jdbc:h2:mem:play" 41 | # db.default.user=sa 42 | # db.default.password="" 43 | # 44 | # You can expose this datasource via JNDI if needed (Useful for JPA) 45 | # db.default.jndiName=DefaultDS 46 | 47 | # Evolutions 48 | # ~~~~~ 49 | # You can disable evolutions if needed 50 | # evolutionplugin=disabled 51 | 52 | # Ebean configuration 53 | # ~~~~~ 54 | # You can declare as many Ebean servers as you want. 55 | # By convention, the default server is named `default` 56 | # 57 | # ebean.default="models.*" 58 | 59 | # Logger 60 | # ~~~~~ 61 | # You can also configure logback (http://logback.qos.ch/), 62 | # by providing an application-logger.xml file in the conf directory. 63 | 64 | # Root logger: 65 | logger.root=ERROR 66 | 67 | # Logger used by the framework: 68 | logger.play=INFO 69 | 70 | # Logger provided to your application: 71 | logger.application=DEBUG 72 | 73 | 74 | #Make play parse longer texts than the default 100kb 75 | parsers.text.maxLength=10000k 76 | 77 | #maxHeaderSize = 8192 this is the default 78 | 79 | 80 | # File path 81 | #GATEHOME = /r1/geotxt/gate-8.1-build5169-BIN 82 | STANFORDMODEL = "C:/Programs/Stanford/english.all.3class.distsim.crf.ser.gz" 83 | #STANFORDMODEL = "/r1/geotxt/requiredfiles_20170811/Stanford/english.all.3class.distsim.crf.ser.gz" 84 | #STANFORDMODEL = /r1/geotxt/StanfordNER/english.all.3class.caseless.distsim.crf.ser.gz 85 | #STANFORDMODEL = /r2/sp3/StanfordNER/english.all.3class.caseless.distsim.crf.ser.gz 86 | GATEHOME = "C:/Programs/gate-8.4.1-build5753-BIN" 87 | #GATEHOME = "/r1/geotxt/requiredfiles_20170811/gate-8.4.1-build5753-BIN" 88 | OPENNLPDIR = "C:/Programs/openNlp/" 89 | #OPENNLPDIR = "/r1/geotxt/requiredfiles_20170811/openNlp/" 90 | LINGPIPEMODEL = "C:/Programs/lingpipe/ne-en-news-muc6.AbstractCharLmRescoringChunker" 91 | #LINGPIPEMODEL = "/r1/geotxt/requiredfiles_20170811/lingpipe/ne-en-news-muc6.AbstractCharLmRescoringChunker" 92 | MITMODEL = "C:/Programs/mit/ner_model.dat" 93 | LOCATIONFILES = /r1/geotxt/locationfiles 94 | #LOCATIONFILES = "G:/temp" 95 | 96 | 97 | # ResultCollector Service URL 98 | RESULTCOLLECTOR = "http://localhost:8080" 99 | 100 | application.context="/v3/" 101 | -------------------------------------------------------------------------------- /conf/routes: -------------------------------------------------------------------------------- 1 | # Routes 2 | # This file defines all application routes (Higher priority routes first) 3 | # ~~~~ 4 | 5 | # Home page 6 | GET / controllers.Application.index() 7 | GET /addLocationUi/ controllers.Application.addLocationUi() 8 | GET /geovista/ controllers.Application.geovista() 9 | 10 | #document 11 | GET /api/ controllers.Application.document() 12 | 13 | # CorpusBuildingUserInterface 14 | GET /corpusBuilding/ controllers.Application.corpusBuildingUi() 15 | 16 | # CodingHistoryUserInterface 17 | GET /corpusBuilding/codingHistory.html controllers.Application.codingHistoryUi() 18 | 19 | # Tasks 20 | GET /api/geotxt.json controllers.Application.geotxt(m, q) 21 | POST /api/geotxtpst.json controllers.Application.geotxtPst 22 | POST /api/geotxtBatch.json controllers.Application.geotxtBatch(d, w, z) 23 | POST /api/geotxtAddEntry.json controllers.Application.geotxtAddEntry() 24 | GET /api/extract.json controllers.Application.extract(m, q) 25 | GET /api/geocode.json controllers.Application.geocode(q) 26 | 27 | 28 | #To get tweet from DB (Expose Tweet) for the purpose of tweet CorpusBuilding 29 | GET /corpusBuilding/tweetsRandom2/exposeCorpus.json controllers.Application.exposeCorpus(geoCoder, role) 30 | 31 | #To submit GeoCoding results to DB (SubtmitGcResults Tweet) for the purpose of tweet CorpusBuilding 32 | POST /corpusBuilding/tweetsRandom2/submitGcResults.json controllers.Application.submitGcResults 33 | 34 | 35 | # Map static resources from the /public folder to the /assets URL path 36 | GET /geovistaresources/*file controllers.Assets.at(path="/public", file) 37 | 38 | -------------------------------------------------------------------------------- /lib/geonames-1.1.13.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geovista/GeoTxt/4738037ec32cf380b711fd8050e5b183eee066af/lib/geonames-1.1.13.jar -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | #Activator-generated Properties 2 | #Mon May 04 10:56:48 EDT 2015 3 | template.uuid=f875d6f6-609b-4be0-b365-b1c501866eeb 4 | sbt.version=0.13.5 5 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | resolvers += "Typesafe repository" at "https://repo.typesafe.com/typesafe/releases/" 2 | 3 | // The Play plugin 4 | addSbtPlugin("com.typesafe.play" % "sbt-plugin" % "2.3.8") 5 | 6 | // web plugins 7 | 8 | addSbtPlugin("com.typesafe.sbt" % "sbt-coffeescript" % "1.0.0") 9 | 10 | addSbtPlugin("com.typesafe.sbt" % "sbt-less" % "1.0.0") 11 | 12 | addSbtPlugin("com.typesafe.sbt" % "sbt-jshint" % "1.0.1") 13 | 14 | addSbtPlugin("com.typesafe.sbt" % "sbt-rjs" % "1.0.1") 15 | 16 | addSbtPlugin("com.typesafe.sbt" % "sbt-digest" % "1.0.0") 17 | 18 | addSbtPlugin("com.typesafe.sbt" % "sbt-mocha" % "1.0.0") 19 | -------------------------------------------------------------------------------- /public/images/GeoTxtLogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geovista/GeoTxt/4738037ec32cf380b711fd8050e5b183eee066af/public/images/GeoTxtLogo.png -------------------------------------------------------------------------------- /public/images/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geovista/GeoTxt/4738037ec32cf380b711fd8050e5b183eee066af/public/images/favicon.png -------------------------------------------------------------------------------- /public/images/header30px.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geovista/GeoTxt/4738037ec32cf380b711fd8050e5b183eee066af/public/images/header30px.gif -------------------------------------------------------------------------------- /public/javascripts/L.Control.Zoomslider.js: -------------------------------------------------------------------------------- 1 | L.Control.Zoomslider = (function () { 2 | 3 | var Knob = L.Draggable.extend({ 4 | initialize: function (element, stepHeight, knobHeight) { 5 | L.Draggable.prototype.initialize.call(this, element, element); 6 | this._element = element; 7 | 8 | this._stepHeight = stepHeight; 9 | this._knobHeight = knobHeight; 10 | 11 | this.on('predrag', function () { 12 | this._newPos.x = 0; 13 | this._newPos.y = this._adjust(this._newPos.y); 14 | }, this); 15 | }, 16 | 17 | _adjust: function (y) { 18 | var value = Math.round(this._toValue(y)); 19 | value = Math.max(0, Math.min(this._maxValue, value)); 20 | return this._toY(value); 21 | }, 22 | 23 | // y = k*v + m 24 | _toY: function (value) { 25 | return this._k * value + this._m; 26 | }, 27 | // v = (y - m) / k 28 | _toValue: function (y) { 29 | return (y - this._m) / this._k; 30 | }, 31 | 32 | setSteps: function (steps) { 33 | var sliderHeight = steps * this._stepHeight; 34 | this._maxValue = steps - 1; 35 | 36 | // conversion parameters 37 | // the conversion is just a common linear function. 38 | this._k = -this._stepHeight; 39 | this._m = sliderHeight - (this._stepHeight + this._knobHeight) / 2; 40 | }, 41 | 42 | setPosition: function (y) { 43 | L.DomUtil.setPosition(this._element, 44 | L.point(0, this._adjust(y))); 45 | }, 46 | 47 | setValue: function (v) { 48 | this.setPosition(this._toY(v)); 49 | }, 50 | 51 | getValue: function () { 52 | return this._toValue(L.DomUtil.getPosition(this._element).y); 53 | } 54 | }); 55 | 56 | var Zoomslider = L.Control.extend({ 57 | options: { 58 | position: 'topleft', 59 | // Height of zoom-slider.png in px 60 | stepHeight: 6, 61 | // Height of the knob div in px (including border) 62 | knobHeight: 4, 63 | styleNS: 'leaflet-control-zoomslider' 64 | }, 65 | 66 | onAdd: function (map) { 67 | this._map = map; 68 | this._ui = this._createUI(); 69 | this._knob = new Knob(this._ui.knob, 70 | this.options.stepHeight, 71 | this.options.knobHeight); 72 | 73 | map.whenReady(this._initKnob, this) 74 | .whenReady(this._initEvents, this) 75 | .whenReady(this._updateSize, this) 76 | .whenReady(this._updateKnobValue, this) 77 | .whenReady(this._updateDisabled, this); 78 | return this._ui.bar; 79 | }, 80 | 81 | onRemove: function (map) { 82 | map.off('zoomlevelschange', this._updateSize, this) 83 | .off('zoomend zoomlevelschange', this._updateKnobValue, this) 84 | .off('zoomend zoomlevelschange', this._updateDisabled, this); 85 | }, 86 | 87 | _createUI: function () { 88 | var ui = {}, 89 | ns = this.options.styleNS; 90 | 91 | ui.bar = L.DomUtil.create('div', ns + ' leaflet-bar'), 92 | ui.zoomIn = this._createZoomBtn('in', 'top', ui.bar), 93 | ui.wrap = L.DomUtil.create('div', ns + '-wrap leaflet-bar-part', ui.bar), 94 | ui.zoomOut = this._createZoomBtn('out', 'bottom', ui.bar), 95 | ui.body = L.DomUtil.create('div', ns + '-body', ui.wrap), 96 | ui.knob = L.DomUtil.create('div', ns + '-knob'); 97 | 98 | L.DomEvent.disableClickPropagation(ui.bar); 99 | L.DomEvent.disableClickPropagation(ui.knob); 100 | 101 | return ui; 102 | }, 103 | _createZoomBtn: function (zoomDir, end, container) { 104 | var classDef = this.options.styleNS + '-' + zoomDir 105 | + ' leaflet-bar-part' 106 | + ' leaflet-bar-part-' + end, 107 | link = L.DomUtil.create('a', classDef, container); 108 | 109 | link.href = '#'; 110 | link.title = 'Zoom ' + zoomDir; 111 | 112 | L.DomEvent.on(link, 'click', L.DomEvent.preventDefault); 113 | 114 | return link; 115 | }, 116 | 117 | _initKnob: function () { 118 | this._knob.enable(); 119 | this._ui.body.appendChild(this._ui.knob); 120 | }, 121 | _initEvents: function (map) { 122 | this._map 123 | .on('zoomlevelschange', this._updateSize, this) 124 | .on('zoomend zoomlevelschange', this._updateKnobValue, this) 125 | .on('zoomend zoomlevelschange', this._updateDisabled, this); 126 | 127 | L.DomEvent.on(this._ui.body, 'click', this._onSliderClick, this); 128 | L.DomEvent.on(this._ui.zoomIn, 'click', this._zoomIn, this); 129 | L.DomEvent.on(this._ui.zoomOut, 'click', this._zoomOut, this); 130 | 131 | this._knob.on('dragend', this._updateMapZoom, this); 132 | }, 133 | 134 | _onSliderClick: function (e) { 135 | var first = (e.touches && e.touches.length === 1 ? e.touches[0] : e), 136 | y = L.DomEvent.getMousePosition(first, this._ui.body).y; 137 | 138 | this._knob.setPosition(y); 139 | this._updateMapZoom(); 140 | }, 141 | 142 | _zoomIn: function (e) { 143 | this._map.zoomIn(e.shiftKey ? 3 : 1); 144 | }, 145 | _zoomOut: function (e) { 146 | this._map.zoomOut(e.shiftKey ? 3 : 1); 147 | }, 148 | 149 | _zoomLevels: function () { 150 | var zoomLevels = this._map.getMaxZoom() - this._map.getMinZoom() + 1; 151 | return zoomLevels < Infinity ? zoomLevels : 0; 152 | }, 153 | _toZoomLevel: function (value) { 154 | return value + this._map.getMinZoom(); 155 | }, 156 | _toValue: function (zoomLevel) { 157 | return zoomLevel - this._map.getMinZoom(); 158 | }, 159 | 160 | _updateSize: function () { 161 | var steps = this._zoomLevels(); 162 | 163 | this._ui.body.style.height = this.options.stepHeight * steps + 'px'; 164 | this._knob.setSteps(steps); 165 | }, 166 | _updateMapZoom: function () { 167 | this._map.setZoom(this._toZoomLevel(this._knob.getValue())); 168 | }, 169 | _updateKnobValue: function () { 170 | this._knob.setValue(this._toValue(this._map.getZoom())); 171 | }, 172 | _updateDisabled: function () { 173 | var zoomLevel = this._map.getZoom(), 174 | className = this.options.styleNS + '-disabled'; 175 | 176 | L.DomUtil.removeClass(this._ui.zoomIn, className); 177 | L.DomUtil.removeClass(this._ui.zoomOut, className); 178 | 179 | if (zoomLevel === this._map.getMinZoom()) { 180 | L.DomUtil.addClass(this._ui.zoomOut, className); 181 | } 182 | if (zoomLevel === this._map.getMaxZoom()) { 183 | L.DomUtil.addClass(this._ui.zoomIn, className); 184 | } 185 | } 186 | }); 187 | 188 | return Zoomslider; 189 | })(); 190 | 191 | L.Map.mergeOptions({ 192 | zoomControl: false, 193 | zoomsliderControl: true 194 | }); 195 | 196 | L.Map.addInitHook(function () { 197 | //modified by Morteza //just adding to the "map" 198 | if (this.options.zoomsliderControl && this._container.id === 'map') { 199 | this.zoomsliderControl = new L.Control.Zoomslider(); 200 | this.addControl(this.zoomsliderControl); 201 | } 202 | }); 203 | 204 | L.control.zoomslider = function (options) { 205 | return new L.Control.Zoomslider(options); 206 | }; 207 | -------------------------------------------------------------------------------- /public/javascripts/images/marker-icon-2x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geovista/GeoTxt/4738037ec32cf380b711fd8050e5b183eee066af/public/javascripts/images/marker-icon-2x.png -------------------------------------------------------------------------------- /public/javascripts/images/marker-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geovista/GeoTxt/4738037ec32cf380b711fd8050e5b183eee066af/public/javascripts/images/marker-icon.png -------------------------------------------------------------------------------- /public/javascripts/images/marker-shadow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geovista/GeoTxt/4738037ec32cf380b711fd8050e5b183eee066af/public/javascripts/images/marker-shadow.png -------------------------------------------------------------------------------- /public/javascripts/oms.min.js: -------------------------------------------------------------------------------- 1 | (function(){/* 2 | OverlappingMarkerSpiderfier 3 | https://github.com/jawj/OverlappingMarkerSpiderfier-Leaflet 4 | Copyright (c) 2011 - 2012 George MacKerron 5 | Released under the MIT licence: http://opensource.org/licenses/mit-license 6 | Note: The Leaflet maps API must be included *before* this code 7 | */ 8 | (function(){var q={}.hasOwnProperty,r=[].slice;null!=this.L&&(this.OverlappingMarkerSpiderfier=function(){function n(c,b){var a,e,g,f,d=this;this.map=c;null==b&&(b={});for(a in b)q.call(b,a)&&(e=b[a],this[a]=e);this.initMarkerArrays();this.listeners={};f=["click","zoomend"];e=0;for(g=f.length;eb)return this;a=this.markerListeners.splice(b,1)[0];c.removeEventListener("click",a);delete c._oms;this.markers.splice(b,1);return this};d.clearMarkers=function(){var c,b,a,e,g;this.unspiderfy();g=this.markers;c=a=0;for(e=g.length;aa||this.listeners[c].splice(a,1);return this};d.clearListeners=function(c){this.listeners[c]=[];return this};d.trigger=function(){var c,b,a,e,g,f;b=arguments[0];c=2<=arguments.length?r.call(arguments,1):[];b=null!=(a=this.listeners[b])?a:[];f=[];e=0;for(g=b.length;ec;a=0<=c?++f:--f)a=this.circleStartAngle+a*e,d.push(new L.Point(b.x+g*Math.cos(a),b.y+g*Math.sin(a)));return d};d.generatePtsSpiral=function(c,b){var a,e,g,f,d;g=this.spiralLengthStart;a=0;d=[];for(e=f=0;0<=c?fc;e=0<=c?++f:--f)a+=this.spiralFootSeparation/g+5E-4*e,e=new L.Point(b.x+g*Math.cos(a),b.y+g*Math.sin(a)),g+=k*this.spiralLengthFactor/a,d.push(e);return d};d.spiderListener=function(c){var b,a,e,g,f,d,h,k,l;(b=null!= 13 | c._omsData)&&this.keepSpiderfied||this.unspiderfy();if(b)return this.trigger("click",c);g=[];f=[];d=this.nearbyDistance*this.nearbyDistance;e=this.map.latLngToLayerPoint(c.getLatLng());l=this.markers;h=0;for(k=l.length;h=this.circleSpiralSwitchover?this.generatePtsSpiral(m,a).reverse():this.generatePtsCircle(m,a);a=function(){var a,b,k,m=this;k=[];a=0;for(b=d.length;a() { 21 | public void invoke(TestBrowser browser) { 22 | browser.goTo("http://localhost:3333"); 23 | assertThat(browser.pageSource()).contains("Your new application is ready."); 24 | } 25 | }); 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /tweets.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geovista/GeoTxt/4738037ec32cf380b711fd8050e5b183eee066af/tweets.txt --------------------------------------------------------------------------------