├── .gitattributes ├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── .gitignore ├── BUILD.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Examples ├── .gitignore ├── Docker │ ├── Dockerfile │ ├── Dockerfile.offline │ ├── README_Docker.md │ ├── Sonarqube │ │ ├── docker-compose.yml │ │ └── my-sonar.sh │ ├── docker-compose.yml │ ├── dockerignore │ ├── dockerignore.offline │ └── settings.xml ├── MapReduce │ ├── .gitignore │ ├── README.md │ ├── build.sh │ ├── log4j.properties │ ├── log4jsupplemental.xml │ ├── pom.xml-solr4 │ ├── script │ │ └── xponents-mr.sh │ └── src │ │ ├── main │ │ └── java │ │ │ └── org │ │ │ └── opensextant │ │ │ └── mapreduce │ │ │ ├── AbstractMapper.java │ │ │ ├── GeoTaggerMapper.java │ │ │ ├── KeywordTaggerMapper.java │ │ │ ├── Log4JUtils.java │ │ │ ├── LoggingUtilities.java │ │ │ └── XponentsTaggerDemo.java │ │ └── test │ │ └── java │ │ └── org │ │ └── apache │ │ └── solr │ │ └── core │ │ └── CoreContainer.java ├── etc │ └── tika-config.xml ├── pom.xml ├── script │ ├── Xponents.groovy │ ├── xponents-demo.bat │ └── xponents-demo.sh └── src │ ├── main │ └── java │ │ └── org │ │ └── opensextant │ │ └── examples │ │ ├── BasicGeoTemporalProcessing.java │ │ ├── ExampleMain.java │ │ ├── TaxonomicTagger.java │ │ ├── WebCrawl.java │ │ ├── XponentsGazetteerExporter.java │ │ └── XponentsGazetteerQuery.java │ └── test │ └── resources │ ├── exclusions │ └── person-name-filter.txt │ └── logback.xml ├── LICENSE ├── NOTICE ├── README.md ├── RELEASE.md ├── _config.yml ├── build.properties ├── build.xml ├── dev.env ├── doc ├── 3rd-party.md ├── Geocoder_Handbook.md ├── LuceneRevolution17-Xponents,14Sept2017.pdf ├── LuceneRevolution17-Xponents.mp4 ├── Patterns.md ├── README_Basics.md ├── README_Examples.md ├── README_REST_Docker.md ├── README_Xlayer_REST.md ├── README_filegdb_ouput_format.md ├── README_gazetteer.md ├── README_postal.md ├── XCoord.md ├── core-apidocs │ ├── allclasses-frame.html │ ├── allclasses-index.html │ ├── allclasses-noframe.html │ ├── allpackages-index.html │ ├── constant-values.html │ ├── deprecated-list.html │ ├── doc-files │ │ └── opensextant-manual-logo.png │ ├── element-list │ ├── help-doc.html │ ├── index-all.html │ ├── index.html │ ├── jquery-ui.overrides.css │ ├── jquery │ │ ├── external │ │ │ └── jquery │ │ │ │ └── jquery.js │ │ ├── images │ │ │ ├── ui-bg_glass_55_fbf9ee_1x400.png │ │ │ ├── ui-bg_glass_65_dadada_1x400.png │ │ │ ├── ui-bg_glass_75_dadada_1x400.png │ │ │ ├── ui-bg_glass_75_e6e6e6_1x400.png │ │ │ ├── ui-bg_glass_95_fef1ec_1x400.png │ │ │ ├── ui-bg_highlight-soft_75_cccccc_1x100.png │ │ │ ├── ui-icons_222222_256x240.png │ │ │ ├── ui-icons_2e83ff_256x240.png │ │ │ ├── ui-icons_454545_256x240.png │ │ │ ├── ui-icons_888888_256x240.png │ │ │ └── ui-icons_cd0a0a_256x240.png │ │ ├── jquery-3.3.1.js │ │ ├── jquery-migrate-3.0.1.js │ │ ├── jquery-ui.css │ │ ├── jquery-ui.js │ │ ├── jquery-ui.min.css │ │ ├── jquery-ui.min.js │ │ ├── jquery-ui.structure.css │ │ └── jquery-ui.structure.min.css │ ├── legal │ │ ├── ADDITIONAL_LICENSE_INFO │ │ ├── ASSEMBLY_EXCEPTION │ │ ├── LICENSE │ │ ├── jquery.md │ │ └── jqueryUI.md │ ├── member-search-index.js │ ├── module-search-index.js │ ├── org │ │ └── opensextant │ │ │ ├── ConfigException.html │ │ │ ├── annotations │ │ │ ├── Annotation.html │ │ │ ├── AnnotationHelper.html │ │ │ ├── DeepEyeData.html │ │ │ ├── DeepEyeException.html │ │ │ ├── DeepEyeStore.html │ │ │ ├── Record.html │ │ │ ├── class-use │ │ │ │ ├── Annotation.html │ │ │ │ ├── AnnotationHelper.html │ │ │ │ ├── DeepEyeData.html │ │ │ │ ├── DeepEyeException.html │ │ │ │ ├── DeepEyeStore.html │ │ │ │ └── Record.html │ │ │ ├── package-frame.html │ │ │ ├── package-summary.html │ │ │ ├── package-tree.html │ │ │ └── package-use.html │ │ │ ├── class-use │ │ │ └── ConfigException.html │ │ │ ├── data │ │ │ ├── Country.TZ.html │ │ │ ├── Country.html │ │ │ ├── DocInput.html │ │ │ ├── GeoBase.html │ │ │ ├── Geocoding.html │ │ │ ├── Language.html │ │ │ ├── LatLon.html │ │ │ ├── MatchSchema.html │ │ │ ├── Place.html │ │ │ ├── Taxon.html │ │ │ ├── TextInput.html │ │ │ ├── class-use │ │ │ │ ├── Country.TZ.html │ │ │ │ ├── Country.html │ │ │ │ ├── DocInput.html │ │ │ │ ├── GeoBase.html │ │ │ │ ├── Geocoding.html │ │ │ │ ├── Language.html │ │ │ │ ├── LatLon.html │ │ │ │ ├── MatchSchema.html │ │ │ │ ├── Place.html │ │ │ │ ├── Taxon.html │ │ │ │ └── TextInput.html │ │ │ ├── package-frame.html │ │ │ ├── package-summary.html │ │ │ ├── package-tree.html │ │ │ ├── package-use.html │ │ │ └── social │ │ │ │ ├── JSONListener.html │ │ │ │ ├── Message.html │ │ │ │ ├── MessageParseException.html │ │ │ │ ├── Tweet.Mention.html │ │ │ │ ├── Tweet.html │ │ │ │ ├── TweetLoader.html │ │ │ │ ├── TweetUtility.html │ │ │ │ ├── class-use │ │ │ │ ├── JSONListener.html │ │ │ │ ├── Message.html │ │ │ │ ├── MessageParseException.html │ │ │ │ ├── Tweet.Mention.html │ │ │ │ ├── Tweet.html │ │ │ │ ├── TweetLoader.html │ │ │ │ └── TweetUtility.html │ │ │ │ ├── package-frame.html │ │ │ │ ├── package-summary.html │ │ │ │ ├── package-tree.html │ │ │ │ └── package-use.html │ │ │ ├── extraction │ │ │ ├── ExtractionException.html │ │ │ ├── ExtractionMetrics.html │ │ │ ├── ExtractionResult.html │ │ │ ├── Extractor.html │ │ │ ├── MatchFilter.html │ │ │ ├── MatcherUtils.html │ │ │ ├── NormalizationException.html │ │ │ ├── TextEntity.html │ │ │ ├── TextMatch.html │ │ │ ├── class-use │ │ │ │ ├── ExtractionException.html │ │ │ │ ├── ExtractionMetrics.html │ │ │ │ ├── ExtractionResult.html │ │ │ │ ├── Extractor.html │ │ │ │ ├── MatchFilter.html │ │ │ │ ├── MatcherUtils.html │ │ │ │ ├── NormalizationException.html │ │ │ │ ├── TextEntity.html │ │ │ │ └── TextMatch.html │ │ │ ├── package-frame.html │ │ │ ├── package-summary.html │ │ │ ├── package-tree.html │ │ │ └── package-use.html │ │ │ ├── extractors │ │ │ ├── flexpat │ │ │ │ ├── AbstractFlexPat.html │ │ │ │ ├── PatternTestCase.html │ │ │ │ ├── RegexPattern.html │ │ │ │ ├── RegexPatternManager.html │ │ │ │ ├── TextMatchResult.html │ │ │ │ ├── class-use │ │ │ │ │ ├── AbstractFlexPat.html │ │ │ │ │ ├── PatternTestCase.html │ │ │ │ │ ├── RegexPattern.html │ │ │ │ │ ├── RegexPatternManager.html │ │ │ │ │ └── TextMatchResult.html │ │ │ │ ├── package-frame.html │ │ │ │ ├── package-summary.html │ │ │ │ ├── package-tree.html │ │ │ │ └── package-use.html │ │ │ ├── langid │ │ │ │ ├── LangDetect.html │ │ │ │ ├── LangID.html │ │ │ │ ├── class-use │ │ │ │ │ ├── LangDetect.html │ │ │ │ │ └── LangID.html │ │ │ │ ├── package-frame.html │ │ │ │ ├── package-summary.html │ │ │ │ ├── package-tree.html │ │ │ │ └── package-use.html │ │ │ ├── poli │ │ │ │ ├── PatternsOfLife.html │ │ │ │ ├── PoliMatch.html │ │ │ │ ├── PoliPatternManager.html │ │ │ │ ├── TestCase.html │ │ │ │ ├── class-use │ │ │ │ │ ├── PatternsOfLife.html │ │ │ │ │ ├── PoliMatch.html │ │ │ │ │ ├── PoliPatternManager.html │ │ │ │ │ └── TestCase.html │ │ │ │ ├── data │ │ │ │ │ ├── EmailAddress.html │ │ │ │ │ ├── MACAddress.html │ │ │ │ │ ├── Money.html │ │ │ │ │ ├── TelephoneNumber.html │ │ │ │ │ ├── class-use │ │ │ │ │ │ ├── EmailAddress.html │ │ │ │ │ │ ├── MACAddress.html │ │ │ │ │ │ ├── Money.html │ │ │ │ │ │ └── TelephoneNumber.html │ │ │ │ │ ├── package-frame.html │ │ │ │ │ ├── package-summary.html │ │ │ │ │ ├── package-tree.html │ │ │ │ │ └── package-use.html │ │ │ │ ├── package-frame.html │ │ │ │ ├── package-summary.html │ │ │ │ ├── package-tree.html │ │ │ │ └── package-use.html │ │ │ ├── xcoord │ │ │ │ ├── DMSFilter.html │ │ │ │ ├── DMSOrdinate.Resolution.html │ │ │ │ ├── DMSOrdinate.html │ │ │ │ ├── GeocoordMatch.html │ │ │ │ ├── GeocoordMatchFilter.html │ │ │ │ ├── GeocoordNormalization.html │ │ │ │ ├── GeocoordPattern.html │ │ │ │ ├── GeocoordPrecision.html │ │ │ │ ├── GeocoordTestCase.html │ │ │ │ ├── Hemisphere.html │ │ │ │ ├── MGRSFilter.html │ │ │ │ ├── MGRSParser.html │ │ │ │ ├── PatternManager.html │ │ │ │ ├── PrecisionScales.html │ │ │ │ ├── UTMParser.html │ │ │ │ ├── XConstants.html │ │ │ │ ├── XCoord.html │ │ │ │ ├── class-use │ │ │ │ │ ├── DMSFilter.html │ │ │ │ │ ├── DMSOrdinate.Resolution.html │ │ │ │ │ ├── DMSOrdinate.html │ │ │ │ │ ├── GeocoordMatch.html │ │ │ │ │ ├── GeocoordMatchFilter.html │ │ │ │ │ ├── GeocoordNormalization.html │ │ │ │ │ ├── GeocoordPattern.html │ │ │ │ │ ├── GeocoordPrecision.html │ │ │ │ │ ├── GeocoordTestCase.html │ │ │ │ │ ├── Hemisphere.html │ │ │ │ │ ├── MGRSFilter.html │ │ │ │ │ ├── MGRSParser.html │ │ │ │ │ ├── PatternManager.html │ │ │ │ │ ├── PrecisionScales.html │ │ │ │ │ ├── UTMParser.html │ │ │ │ │ ├── XConstants.html │ │ │ │ │ └── XCoord.html │ │ │ │ ├── package-frame.html │ │ │ │ ├── package-summary.html │ │ │ │ ├── package-tree.html │ │ │ │ └── package-use.html │ │ │ └── xtemporal │ │ │ │ ├── DateMatch.TimeResolution.html │ │ │ │ ├── DateMatch.html │ │ │ │ ├── DateNormalization.html │ │ │ │ ├── DateTimePattern.html │ │ │ │ ├── PatternManager.html │ │ │ │ ├── TestCase.html │ │ │ │ ├── XTConstants.html │ │ │ │ ├── XTemporal.html │ │ │ │ ├── class-use │ │ │ │ ├── DateMatch.TimeResolution.html │ │ │ │ ├── DateMatch.html │ │ │ │ ├── DateNormalization.html │ │ │ │ ├── DateTimePattern.html │ │ │ │ ├── PatternManager.html │ │ │ │ ├── TestCase.html │ │ │ │ ├── XTConstants.html │ │ │ │ └── XTemporal.html │ │ │ │ ├── package-frame.html │ │ │ │ ├── package-summary.html │ │ │ │ ├── package-tree.html │ │ │ │ └── package-use.html │ │ │ ├── output │ │ │ ├── AbstractFormatter.html │ │ │ ├── AbstractGenericFormatter.html │ │ │ ├── CSVFormatter.html │ │ │ ├── CSVGenericFormatter.html │ │ │ ├── FormatterFactory.html │ │ │ ├── GDBFormatter.html │ │ │ ├── GISDataFormatter.html │ │ │ ├── GISDataModel.html │ │ │ ├── GeoCSVFormatter.html │ │ │ ├── KMLFormatter.html │ │ │ ├── MatchInterpreter.html │ │ │ ├── OpenSextantSchema.html │ │ │ ├── ResultsFormatter.html │ │ │ ├── ShapefileFormatter.html │ │ │ ├── WKTFormatter.html │ │ │ ├── class-use │ │ │ │ ├── AbstractFormatter.html │ │ │ │ ├── AbstractGenericFormatter.html │ │ │ │ ├── CSVFormatter.html │ │ │ │ ├── CSVGenericFormatter.html │ │ │ │ ├── FormatterFactory.html │ │ │ │ ├── GDBFormatter.html │ │ │ │ ├── GISDataFormatter.html │ │ │ │ ├── GISDataModel.html │ │ │ │ ├── GeoCSVFormatter.html │ │ │ │ ├── KMLFormatter.html │ │ │ │ ├── MatchInterpreter.html │ │ │ │ ├── OpenSextantSchema.html │ │ │ │ ├── ResultsFormatter.html │ │ │ │ ├── ShapefileFormatter.html │ │ │ │ └── WKTFormatter.html │ │ │ ├── package-frame.html │ │ │ ├── package-summary.html │ │ │ ├── package-tree.html │ │ │ └── package-use.html │ │ │ ├── package-frame.html │ │ │ ├── package-summary.html │ │ │ ├── package-tree.html │ │ │ ├── package-use.html │ │ │ ├── processing │ │ │ ├── Parameters.html │ │ │ ├── ProcessingException.html │ │ │ ├── ResultsUtility.html │ │ │ ├── RuntimeTools.html │ │ │ ├── XtractorGroup.html │ │ │ ├── class-use │ │ │ │ ├── Parameters.html │ │ │ │ ├── ProcessingException.html │ │ │ │ ├── ResultsUtility.html │ │ │ │ ├── RuntimeTools.html │ │ │ │ └── XtractorGroup.html │ │ │ ├── package-frame.html │ │ │ ├── package-summary.html │ │ │ ├── package-tree.html │ │ │ └── package-use.html │ │ │ └── util │ │ │ ├── AnyFilenameFilter.html │ │ │ ├── FileUtility.html │ │ │ ├── GeodeticUtility.html │ │ │ ├── GeonamesUtility.html │ │ │ ├── TextUtils.html │ │ │ ├── Unimap.html │ │ │ ├── class-use │ │ │ ├── AnyFilenameFilter.html │ │ │ ├── FileUtility.html │ │ │ ├── GeodeticUtility.html │ │ │ ├── GeonamesUtility.html │ │ │ ├── TextUtils.html │ │ │ └── Unimap.html │ │ │ ├── package-frame.html │ │ │ ├── package-summary.html │ │ │ ├── package-tree.html │ │ │ └── package-use.html │ ├── overview-frame.html │ ├── overview-summary.html │ ├── overview-tree.html │ ├── package-list │ ├── package-search-index.js │ ├── resources │ │ ├── glass.png │ │ └── x.png │ ├── script-dir │ │ ├── images │ │ │ ├── ui-bg_glass_55_fbf9ee_1x400.png │ │ │ ├── ui-bg_glass_65_dadada_1x400.png │ │ │ ├── ui-bg_glass_75_dadada_1x400.png │ │ │ ├── ui-bg_glass_75_e6e6e6_1x400.png │ │ │ ├── ui-bg_glass_95_fef1ec_1x400.png │ │ │ ├── ui-bg_highlight-soft_75_cccccc_1x100.png │ │ │ ├── ui-icons_222222_256x240.png │ │ │ ├── ui-icons_2e83ff_256x240.png │ │ │ ├── ui-icons_454545_256x240.png │ │ │ ├── ui-icons_888888_256x240.png │ │ │ └── ui-icons_cd0a0a_256x240.png │ │ ├── jquery-3.5.1.min.js │ │ ├── jquery-3.6.1.min.js │ │ ├── jquery-ui.min.css │ │ ├── jquery-ui.min.js │ │ └── jquery-ui.structure.min.css │ ├── script.js │ ├── search.js │ ├── serialized-form.html │ ├── stylesheet.css │ ├── tag-search-index.js │ └── type-search-index.js ├── geocoding-workflow.png ├── postal-concept-01.png ├── pydoc │ ├── opensextant.FlexPat.html │ ├── opensextant.TaxCat.html │ ├── opensextant.advas_phonetics.html │ ├── opensextant.extractors.html │ ├── opensextant.extractors.poli.html │ ├── opensextant.gazetteer.html │ ├── opensextant.html │ ├── opensextant.phonetics.html │ ├── opensextant.utility.html │ └── opensextant.xlayer.html ├── sdk-apidocs │ ├── allclasses-frame.html │ ├── allclasses-index.html │ ├── allclasses-noframe.html │ ├── allpackages-index.html │ ├── constant-values.html │ ├── deprecated-list.html │ ├── doc-files │ │ └── opensextant-manual-logo.png │ ├── element-list │ ├── help-doc.html │ ├── index-all.html │ ├── index.html │ ├── jquery-ui.overrides.css │ ├── jquery │ │ ├── external │ │ │ └── jquery │ │ │ │ └── jquery.js │ │ ├── images │ │ │ ├── ui-bg_glass_55_fbf9ee_1x400.png │ │ │ ├── ui-bg_glass_65_dadada_1x400.png │ │ │ ├── ui-bg_glass_75_dadada_1x400.png │ │ │ ├── ui-bg_glass_75_e6e6e6_1x400.png │ │ │ ├── ui-bg_glass_95_fef1ec_1x400.png │ │ │ ├── ui-bg_highlight-soft_75_cccccc_1x100.png │ │ │ ├── ui-icons_222222_256x240.png │ │ │ ├── ui-icons_2e83ff_256x240.png │ │ │ ├── ui-icons_454545_256x240.png │ │ │ ├── ui-icons_888888_256x240.png │ │ │ └── ui-icons_cd0a0a_256x240.png │ │ ├── jquery-3.3.1.js │ │ ├── jquery-migrate-3.0.1.js │ │ ├── jquery-ui.css │ │ ├── jquery-ui.js │ │ ├── jquery-ui.min.css │ │ ├── jquery-ui.min.js │ │ ├── jquery-ui.structure.css │ │ └── jquery-ui.structure.min.css │ ├── legal │ │ ├── ADDITIONAL_LICENSE_INFO │ │ ├── ASSEMBLY_EXCEPTION │ │ ├── LICENSE │ │ ├── jquery.md │ │ └── jqueryUI.md │ ├── member-search-index.js │ ├── module-search-index.js │ ├── org │ │ └── opensextant │ │ │ ├── extraction │ │ │ ├── SolrMatcherSupport.html │ │ │ ├── SolrTaggerRequest.html │ │ │ ├── class-use │ │ │ │ ├── SolrMatcherSupport.html │ │ │ │ └── SolrTaggerRequest.html │ │ │ ├── package-frame.html │ │ │ ├── package-summary.html │ │ │ ├── package-tree.html │ │ │ └── package-use.html │ │ │ ├── extractors │ │ │ ├── geo │ │ │ │ ├── BoundaryObserver.html │ │ │ │ ├── CountryCount.html │ │ │ │ ├── CountryObserver.html │ │ │ │ ├── GazetteerIndexer.html │ │ │ │ ├── GazetteerMatcher.html │ │ │ │ ├── GazetteerUpdateProcessorFactory.html │ │ │ │ ├── LocationObserver.html │ │ │ │ ├── PlaceCandidate.html │ │ │ │ ├── PlaceCount.html │ │ │ │ ├── PlaceEvidence.Scope.html │ │ │ │ ├── PlaceEvidence.html │ │ │ │ ├── PlaceGeocoder.html │ │ │ │ ├── PostalGeocoder.html │ │ │ │ ├── PostalTagger.html │ │ │ │ ├── ScoredPlace.html │ │ │ │ ├── SolrGazetteer.html │ │ │ │ ├── TagFilter.html │ │ │ │ ├── class-use │ │ │ │ │ ├── BoundaryObserver.html │ │ │ │ │ ├── CountryCount.html │ │ │ │ │ ├── CountryObserver.html │ │ │ │ │ ├── GazetteerIndexer.html │ │ │ │ │ ├── GazetteerMatcher.html │ │ │ │ │ ├── GazetteerUpdateProcessorFactory.html │ │ │ │ │ ├── LocationObserver.html │ │ │ │ │ ├── PlaceCandidate.html │ │ │ │ │ ├── PlaceCount.html │ │ │ │ │ ├── PlaceEvidence.Scope.html │ │ │ │ │ ├── PlaceEvidence.html │ │ │ │ │ ├── PlaceGeocoder.html │ │ │ │ │ ├── PostalGeocoder.html │ │ │ │ │ ├── PostalTagger.html │ │ │ │ │ ├── ScoredPlace.html │ │ │ │ │ ├── SolrGazetteer.html │ │ │ │ │ └── TagFilter.html │ │ │ │ ├── package-frame.html │ │ │ │ ├── package-summary.html │ │ │ │ ├── package-tree.html │ │ │ │ ├── package-use.html │ │ │ │ ├── rules │ │ │ │ │ ├── ContextualOrganizationRule.html │ │ │ │ │ ├── CoordinateAssociationRule.html │ │ │ │ │ ├── CountryRule.html │ │ │ │ │ ├── FeatureClassMeta.html │ │ │ │ │ ├── FeatureRule.html │ │ │ │ │ ├── GeocodeRule.html │ │ │ │ │ ├── HeatMapRule.html │ │ │ │ │ ├── LocationChooserRule.html │ │ │ │ │ ├── MajorPlaceRule.html │ │ │ │ │ ├── NameCodeRule.html │ │ │ │ │ ├── NameRule.html │ │ │ │ │ ├── NonLatinNameRule.html │ │ │ │ │ ├── NonsenseFilter.html │ │ │ │ │ ├── PersonNameFilter.html │ │ │ │ │ ├── PostalCodeAssociationRule.html │ │ │ │ │ ├── PostalCodeFilter.html │ │ │ │ │ ├── PostalLocationChooser.html │ │ │ │ │ ├── ProvinceAssociationRule.html │ │ │ │ │ ├── ProvinceNameSetter.html │ │ │ │ │ ├── RuleTool.html │ │ │ │ │ ├── class-use │ │ │ │ │ │ ├── ContextualOrganizationRule.html │ │ │ │ │ │ ├── CoordinateAssociationRule.html │ │ │ │ │ │ ├── CountryRule.html │ │ │ │ │ │ ├── FeatureClassMeta.html │ │ │ │ │ │ ├── FeatureRule.html │ │ │ │ │ │ ├── GeocodeRule.html │ │ │ │ │ │ ├── HeatMapRule.html │ │ │ │ │ │ ├── LocationChooserRule.html │ │ │ │ │ │ ├── MajorPlaceRule.html │ │ │ │ │ │ ├── NameCodeRule.html │ │ │ │ │ │ ├── NameRule.html │ │ │ │ │ │ ├── NonLatinNameRule.html │ │ │ │ │ │ ├── NonsenseFilter.html │ │ │ │ │ │ ├── PersonNameFilter.html │ │ │ │ │ │ ├── PostalCodeAssociationRule.html │ │ │ │ │ │ ├── PostalCodeFilter.html │ │ │ │ │ │ ├── PostalLocationChooser.html │ │ │ │ │ │ ├── ProvinceAssociationRule.html │ │ │ │ │ │ ├── ProvinceNameSetter.html │ │ │ │ │ │ └── RuleTool.html │ │ │ │ │ ├── package-frame.html │ │ │ │ │ ├── package-summary.html │ │ │ │ │ ├── package-tree.html │ │ │ │ │ └── package-use.html │ │ │ │ └── social │ │ │ │ │ ├── GeoInference.html │ │ │ │ │ ├── GeoInferencer.html │ │ │ │ │ ├── KMLDemoWriter.html │ │ │ │ │ ├── SimpleProcessorDemo.html │ │ │ │ │ ├── SocialGeo.html │ │ │ │ │ ├── XponentGeocoder.html │ │ │ │ │ ├── XponentTextGeotagger.html │ │ │ │ │ ├── class-use │ │ │ │ │ ├── GeoInference.html │ │ │ │ │ ├── GeoInferencer.html │ │ │ │ │ ├── KMLDemoWriter.html │ │ │ │ │ ├── SimpleProcessorDemo.html │ │ │ │ │ ├── SocialGeo.html │ │ │ │ │ ├── XponentGeocoder.html │ │ │ │ │ └── XponentTextGeotagger.html │ │ │ │ │ ├── package-frame.html │ │ │ │ │ ├── package-summary.html │ │ │ │ │ ├── package-tree.html │ │ │ │ │ └── package-use.html │ │ │ └── xtax │ │ │ │ ├── TaxonFilter.html │ │ │ │ ├── TaxonMatch.html │ │ │ │ ├── TaxonMatcher.html │ │ │ │ ├── class-use │ │ │ │ ├── TaxonFilter.html │ │ │ │ ├── TaxonMatch.html │ │ │ │ └── TaxonMatcher.html │ │ │ │ ├── package-frame.html │ │ │ │ ├── package-summary.html │ │ │ │ ├── package-tree.html │ │ │ │ └── package-use.html │ │ │ ├── output │ │ │ ├── TaggerMatchInterpeter.html │ │ │ ├── Transforms.html │ │ │ ├── class-use │ │ │ │ ├── TaggerMatchInterpeter.html │ │ │ │ └── Transforms.html │ │ │ ├── package-frame.html │ │ │ ├── package-summary.html │ │ │ ├── package-tree.html │ │ │ └── package-use.html │ │ │ ├── util │ │ │ ├── LuceneStopwords.html │ │ │ ├── SolrProxy.html │ │ │ ├── SolrUtil.html │ │ │ ├── class-use │ │ │ │ ├── LuceneStopwords.html │ │ │ │ ├── SolrProxy.html │ │ │ │ └── SolrUtil.html │ │ │ ├── package-frame.html │ │ │ ├── package-summary.html │ │ │ ├── package-tree.html │ │ │ └── package-use.html │ │ │ └── xlayer │ │ │ ├── XlayerClient.html │ │ │ ├── class-use │ │ │ └── XlayerClient.html │ │ │ ├── package-frame.html │ │ │ ├── package-summary.html │ │ │ ├── package-tree.html │ │ │ ├── package-use.html │ │ │ └── server │ │ │ ├── TaggerResource.html │ │ │ ├── XlayerApp.html │ │ │ ├── XlayerControl.html │ │ │ ├── class-use │ │ │ ├── TaggerResource.html │ │ │ ├── XlayerApp.html │ │ │ └── XlayerControl.html │ │ │ ├── package-frame.html │ │ │ ├── package-summary.html │ │ │ ├── package-tree.html │ │ │ ├── package-use.html │ │ │ └── xgeo │ │ │ ├── XlayerRestlet.html │ │ │ ├── XlayerServer.html │ │ │ ├── XponentsGeotagger.html │ │ │ ├── class-use │ │ │ ├── XlayerRestlet.html │ │ │ ├── XlayerServer.html │ │ │ └── XponentsGeotagger.html │ │ │ ├── package-frame.html │ │ │ ├── package-summary.html │ │ │ ├── package-tree.html │ │ │ └── package-use.html │ ├── overview-frame.html │ ├── overview-summary.html │ ├── overview-tree.html │ ├── package-list │ ├── package-search-index.js │ ├── resources │ │ ├── glass.png │ │ └── x.png │ ├── script-dir │ │ ├── images │ │ │ ├── ui-bg_glass_55_fbf9ee_1x400.png │ │ │ ├── ui-bg_glass_65_dadada_1x400.png │ │ │ ├── ui-bg_glass_75_dadada_1x400.png │ │ │ ├── ui-bg_glass_75_e6e6e6_1x400.png │ │ │ ├── ui-bg_glass_95_fef1ec_1x400.png │ │ │ ├── ui-bg_highlight-soft_75_cccccc_1x100.png │ │ │ ├── ui-icons_222222_256x240.png │ │ │ ├── ui-icons_2e83ff_256x240.png │ │ │ ├── ui-icons_454545_256x240.png │ │ │ ├── ui-icons_888888_256x240.png │ │ │ └── ui-icons_cd0a0a_256x240.png │ │ ├── jquery-3.5.1.min.js │ │ ├── jquery-3.6.1.min.js │ │ ├── jquery-ui.min.css │ │ ├── jquery-ui.min.js │ │ └── jquery-ui.structure.min.css │ ├── script.js │ ├── search.js │ ├── serialized-form.html │ ├── stylesheet.css │ ├── tag-search-index.js │ └── type-search-index.js └── xlayer-xgeo-server-example.png ├── etc ├── langdetect-profiles-v3.zip ├── logback.xml ├── logging.properties └── tika-config.xml ├── pom.xml ├── script ├── .gitignore ├── dist-docker-offline.sh ├── dist-docker.sh ├── dist.sh ├── tag-docker.sh ├── tester.sh ├── xlayer-docker.sh ├── xlayer-server.bat └── xlayer-server.sh ├── setup.sh ├── solr ├── .gitignore ├── GAZETEER_REPORT.md ├── README.md ├── VERSION ├── build-1-get-sources.sh ├── build-1-prep-admin1.sh ├── build-2-sqlite-master.sh ├── build-3-sqlite-postal.sh ├── build.sh ├── build.xml ├── etc │ ├── gazetteer │ │ ├── README_filters.md │ │ ├── Source_Schema_Notes.md │ │ ├── additions │ │ │ ├── README.md │ │ │ └── adhoc-placenames.csv │ │ ├── feature_adhoc_descriptions.csv │ │ ├── filters │ │ │ ├── continent-filter.txt │ │ │ ├── exclude-adhoc-names.txt │ │ │ ├── exclude-features.csv │ │ │ ├── include-adhoc-places.txt │ │ │ ├── non-placenames,acronym.csv │ │ │ ├── non-placenames,admin-codes.csv │ │ │ ├── non-placenames,ara.csv │ │ │ ├── non-placenames,deu.csv │ │ │ ├── non-placenames,rus,ukr.csv │ │ │ ├── non-placenames,spa.csv │ │ │ ├── non-placenames.csv │ │ │ ├── person-suffix-filter.txt │ │ │ └── person-title-filter.txt │ │ ├── geonames_admin1_mapping.csv │ │ ├── lang │ │ │ └── vietnamese-stopwords.txt │ │ ├── nga_2021_admin1_mapping.csv │ │ ├── nga_2022_admin1_mapping.csv │ │ └── usgs2gnis-feature-map.csv │ └── taxcat │ │ ├── README.md │ │ ├── entities-adhoc.txt │ │ ├── nationalities.csv │ │ ├── non-person-names.txt │ │ ├── stopwords-jrcnames.txt │ │ └── stopwords.txt ├── mysolr.sh ├── resources │ └── log4j.properties ├── script │ ├── assemble_person_filter.py │ ├── assemble_wfb_leaders.py │ ├── assemble_wfb_orgs.py │ ├── convert_latin1_folding.py │ ├── export_gazetteer.sh │ ├── gaz_admin_exporter.py │ ├── gaz_administrative_codes.py │ ├── gaz_country_meta.py │ ├── gaz_etl.py │ ├── gaz_exclusions.py │ ├── gaz_finalize.py │ ├── gaz_fix_country_coding.py │ ├── gaz_generate_variants.py │ ├── gaz_geonames.py │ ├── gaz_nga.py │ ├── gaz_pakistan_admin3.py │ ├── gaz_popstats.py │ ├── gaz_tool.py │ ├── gaz_usgs.py │ ├── inspector.sh │ ├── postal.py │ ├── solr7-dist-bin-solr │ ├── solr7-dist-bin-solr.cmd │ ├── taxcat_jrcnames.py │ ├── taxcat_nationalities.py │ ├── taxcat_person_names.py │ ├── taxcat_wfb.py │ ├── wordstats-collector.py │ └── wordstats.sh └── solr7 │ ├── README.txt │ ├── gazetteer │ ├── conf │ │ ├── OpenSextant-Gazetteer-ASCIIFolding.txt │ │ ├── OpenSextant-Gazetteer-Latin1Folding.txt │ │ ├── lang │ │ │ ├── contractions_ca.txt │ │ │ ├── contractions_fr.txt │ │ │ ├── contractions_ga.txt │ │ │ ├── contractions_it.txt │ │ │ ├── hyphenations_ga.txt │ │ │ ├── stemdict_nl.txt │ │ │ ├── stoptags_ja.txt │ │ │ ├── stopwords_ar.txt │ │ │ ├── stopwords_bg.txt │ │ │ ├── stopwords_ca.txt │ │ │ ├── stopwords_cz.txt │ │ │ ├── stopwords_da.txt │ │ │ ├── stopwords_de.txt │ │ │ ├── stopwords_el.txt │ │ │ ├── stopwords_en.txt │ │ │ ├── stopwords_es.txt │ │ │ ├── stopwords_eu.txt │ │ │ ├── stopwords_fa.txt │ │ │ ├── stopwords_fi.txt │ │ │ ├── stopwords_fr.txt │ │ │ ├── stopwords_ga.txt │ │ │ ├── stopwords_gl.txt │ │ │ ├── stopwords_hi.txt │ │ │ ├── stopwords_hu.txt │ │ │ ├── stopwords_hy.txt │ │ │ ├── stopwords_id.txt │ │ │ ├── stopwords_it.txt │ │ │ ├── stopwords_ja.txt │ │ │ ├── stopwords_lv.txt │ │ │ ├── stopwords_nl.txt │ │ │ ├── stopwords_no.txt │ │ │ ├── stopwords_pt.txt │ │ │ ├── stopwords_ro.txt │ │ │ ├── stopwords_ru.txt │ │ │ ├── stopwords_sv.txt │ │ │ ├── stopwords_th.txt │ │ │ ├── stopwords_tr.txt │ │ │ └── userdict_ja.txt │ │ ├── schema.xml │ │ └── solrconfig.xml │ └── core.properties │ ├── postal │ ├── conf │ │ ├── schema.xml │ │ └── solrconfig.xml │ └── core.properties │ ├── solr.xml │ ├── taxcat │ ├── conf │ │ ├── OpenSextant-Gazetteer-ASCIIFolding.txt │ │ ├── OpenSextant-Gazetteer-Latin1Folding.txt │ │ ├── lang │ │ │ ├── contractions_ca.txt │ │ │ ├── contractions_fr.txt │ │ │ ├── contractions_ga.txt │ │ │ ├── contractions_it.txt │ │ │ ├── hyphenations_ga.txt │ │ │ ├── stemdict_nl.txt │ │ │ ├── stoptags_ja.txt │ │ │ ├── stopwords_ar.txt │ │ │ ├── stopwords_bg.txt │ │ │ ├── stopwords_ca.txt │ │ │ ├── stopwords_cz.txt │ │ │ ├── stopwords_da.txt │ │ │ ├── stopwords_de.txt │ │ │ ├── stopwords_el.txt │ │ │ ├── stopwords_en.txt │ │ │ ├── stopwords_es.txt │ │ │ ├── stopwords_eu.txt │ │ │ ├── stopwords_fa.txt │ │ │ ├── stopwords_fi.txt │ │ │ ├── stopwords_fr.txt │ │ │ ├── stopwords_ga.txt │ │ │ ├── stopwords_gl.txt │ │ │ ├── stopwords_hi.txt │ │ │ ├── stopwords_hu.txt │ │ │ ├── stopwords_hy.txt │ │ │ ├── stopwords_id.txt │ │ │ ├── stopwords_it.txt │ │ │ ├── stopwords_ja.txt │ │ │ ├── stopwords_lv.txt │ │ │ ├── stopwords_nl.txt │ │ │ ├── stopwords_no.txt │ │ │ ├── stopwords_pt.txt │ │ │ ├── stopwords_ro.txt │ │ │ ├── stopwords_ru.txt │ │ │ ├── stopwords_sv.txt │ │ │ ├── stopwords_th.txt │ │ │ ├── stopwords_tr.txt │ │ │ └── userdict_ja.txt │ │ ├── schema.xml │ │ └── solrconfig.xml │ └── core.properties │ └── zoo.cfg ├── src ├── checkstyle-suppressions.xml ├── checkstyle.xml ├── main │ ├── java │ │ └── org │ │ │ └── opensextant │ │ │ ├── extraction │ │ │ ├── SolrMatcherSupport.java │ │ │ ├── SolrTaggerRequest.java │ │ │ └── TagFilter.java │ │ │ ├── extractors │ │ │ ├── geo │ │ │ │ ├── BoundaryObserver.java │ │ │ │ ├── CountryCount.java │ │ │ │ ├── CountryObserver.java │ │ │ │ ├── GazetteerMatcher.java │ │ │ │ ├── LocationObserver.java │ │ │ │ ├── PlaceCandidate.java │ │ │ │ ├── PlaceCount.java │ │ │ │ ├── PlaceEvidence.java │ │ │ │ ├── PlaceGeocoder.java │ │ │ │ ├── PostalGeocoder.java │ │ │ │ ├── PostalTagger.java │ │ │ │ ├── ScoredPlace.java │ │ │ │ ├── SolrGazetteer.java │ │ │ │ ├── rules │ │ │ │ │ ├── ContextualOrganizationRule.java │ │ │ │ │ ├── CoordinateAssociationRule.java │ │ │ │ │ ├── CountryRule.java │ │ │ │ │ ├── FeatureClassMeta.java │ │ │ │ │ ├── FeatureRule.java │ │ │ │ │ ├── GeocodeRule.java │ │ │ │ │ ├── HeatMapRule.java │ │ │ │ │ ├── LocationChooserRule.java │ │ │ │ │ ├── MajorPlaceRule.java │ │ │ │ │ ├── NameCodeRule.java │ │ │ │ │ ├── NameRule.java │ │ │ │ │ ├── NonLatinNameRule.java │ │ │ │ │ ├── NonsenseFilter.java │ │ │ │ │ ├── PersonNameFilter.java │ │ │ │ │ ├── PostalCodeAssociationRule.java │ │ │ │ │ ├── PostalCodeFilter.java │ │ │ │ │ ├── PostalLocationChooser.java │ │ │ │ │ ├── ProvinceAssociationRule.java │ │ │ │ │ ├── ProvinceNameSetter.java │ │ │ │ │ └── RuleTool.java │ │ │ │ └── social │ │ │ │ │ ├── GeoInference.java │ │ │ │ │ ├── GeoInferencer.java │ │ │ │ │ ├── KMLDemoWriter.java │ │ │ │ │ ├── SocialGeo.java │ │ │ │ │ ├── XponentGeocoder.java │ │ │ │ │ └── XponentTextGeotagger.java │ │ │ └── xtax │ │ │ │ ├── TaxonFilter.java │ │ │ │ ├── TaxonMatch.java │ │ │ │ └── TaxonMatcher.java │ │ │ ├── output │ │ │ ├── TaggerMatchInterpeter.java │ │ │ └── Transforms.java │ │ │ ├── util │ │ │ ├── LuceneStopwords.java │ │ │ ├── SolrProxy.java │ │ │ └── SolrUtil.java │ │ │ └── xlayer │ │ │ ├── XlayerClient.java │ │ │ └── server │ │ │ ├── TaggerResource.java │ │ │ ├── XlayerApp.java │ │ │ ├── XlayerControl.java │ │ │ └── xgeo │ │ │ ├── XlayerRestlet.java │ │ │ ├── XlayerServer.java │ │ │ └── XponentsGeotagger.java │ ├── javadoc │ │ ├── doc-files │ │ │ └── opensextant-manual-logo.png │ │ ├── org │ │ │ └── opensextant │ │ │ │ ├── extractors │ │ │ │ ├── geo │ │ │ │ │ ├── package.html │ │ │ │ │ ├── rules │ │ │ │ │ │ └── package.html │ │ │ │ │ └── social │ │ │ │ │ │ └── package.html │ │ │ │ └── xtax │ │ │ │ │ └── package.html │ │ │ │ └── package.html │ │ └── overview.html │ └── resources │ │ ├── banner.txt │ │ └── twitter │ │ ├── exclude-tweet-profile-placenames.txt │ │ └── tweet-xcoord.cfg └── test │ ├── java │ ├── GazetteerIndexer.java │ ├── PlaceGeocoderTester.java │ ├── PostalGeocoderTester.java │ ├── SocialGeoDemo.java │ ├── XTaxTester.java │ ├── XlayerClientTester.java │ └── org │ │ └── opensextant │ │ └── extractors │ │ └── test │ │ ├── TestGazMatcher.java │ │ ├── TestGazetteer.java │ │ ├── TestGazetteerConflationKey.java │ │ ├── TestJava8Maps.java │ │ ├── TestNameScoreUtils.java │ │ ├── TestPersonFilter.java │ │ ├── TestPlaceGeocoderGeoBoundaries.java │ │ ├── TestPlaceGeocoderLanguages.java │ │ ├── TestPlaceLookup.java │ │ ├── TestPlacePhonetics.java │ │ ├── TestPostalFilters.java │ │ ├── TestResourceLoading.java │ │ ├── TestReverseGeocoding.java │ │ ├── TestSolrUtils.java │ │ ├── TestStopFilters.java │ │ └── TestXTax.java │ └── resources │ ├── .gitignore │ ├── data │ ├── placename-tests-cjk.txt │ ├── placename-tests.txt │ ├── postal-addresses.json │ └── randomness.txt │ ├── filters │ ├── person-name-filter.txt │ ├── person-suffix-filter.txt │ └── person-title-filter.txt │ ├── logback.xml │ └── test-filter.txt └── test ├── test-xlayer-curl.sh ├── test-xlayer-java.sh ├── test-xlayer-python.sh └── xlayer-test-suite.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/dealing-with-line-endings 2 | 3 | # Set default behavior, in case users don't have core.autocrlf set. 4 | * text=auto 5 | 6 | # Explicitly declare text files we want to always be normalized and converted 7 | # to native line endings on checkout. 8 | *.java text 9 | *.py text 10 | *.groovy text 11 | 12 | *.csv text 13 | *.txt text 14 | *.xml text 15 | *.htm text 16 | *.html text 17 | *.properties text 18 | *.md text 19 | *.kml text 20 | *.pom text 21 | 22 | # Windows CRLF line endings on checkout. 23 | *.sln text eol=crlf 24 | *.bat text eol=bat 25 | 26 | # Unix LF line endings on checkout. 27 | *.sh text eol=lf 28 | 29 | # Denote all files that are truly binary and should not be modified. 30 | *.png binary 31 | *.gif binary 32 | *.tif binary 33 | *.jpg binary 34 | 35 | *.zip binary 36 | *.tar binary 37 | *.tgz binary 38 | 39 | *.pdf binary 40 | *.dll binary 41 | *.exe binary 42 | *.dbf binary 43 | *.shp binary 44 | 45 | *.rtf binary 46 | *.xls binary 47 | *.xlsx binary 48 | *.doc binary 49 | *.docx binary 50 | *.ppt binary 51 | *.pptx binary -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Help us identify bugs and squash 'em 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | What is broken? Class name, Module, etc? 12 | 13 | **To Reproduce** 14 | * Java or Python version: 15 | * Usage: 16 | * Data input: 17 | * Did you enable logging (level = `DEBUG`)? 18 | * Other notes: 19 | 20 | **Expected behavior** 21 | What should have happened? 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: How can Xponents help you? 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Type of Feature**: 11 | 12 | - [ ] Collaboration or partnership 13 | - [ ] Improvement or clarification 14 | - [ ] New Processing 15 | 16 | **Description of Feature** 17 | 18 | * new type of geospatial output? 19 | * new reference data set? 20 | * new patterns of entities? 21 | * evaluation work? 22 | * etc. Use psuedo code to describe your technical idea, if that helps. 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #Misc: 2 | .DS_Store 3 | release 4 | piplib 5 | maven-repo/ 6 | dist/ 7 | 8 | # Generated data or Downloaded resources. 9 | # Do not check in gazetteer metadata from ./solr/etc/ -- It is copied to src/main/resources to prepare JAR 10 | src/main/resources/additions/ 11 | src/main/resources/filters/ 12 | src/main/resources/lang/ 13 | Core/src/main/resources/geonames.org/ 14 | Core/src/main/resources/ISO-639-2_utf-8.txt 15 | Core/etc/langdetect 16 | 17 | 18 | #Maven output 19 | target/ 20 | # TODO can we remove this?: 21 | dist/ 22 | build/ 23 | bin/ 24 | log 25 | logs 26 | export 27 | results/ 28 | build.properties 29 | 30 | #Build files 31 | #Eclipse 32 | .classpath 33 | .project 34 | .settings/ 35 | .pydevproject 36 | 37 | #IntelliJ 38 | /out/ 39 | *.iml 40 | 41 | /*.ipr 42 | /*.iws 43 | /.idea/ 44 | /Core/.idea/ 45 | /Core/*.ipr 46 | /Core/*.iws 47 | 48 | #Netbeans 49 | nb-configuration.xml 50 | nbactions*.xml 51 | nbproject/ 52 | 53 | #If files with these extensions are needed, they can be added manually. 54 | *.jar 55 | *.war 56 | *.zip 57 | *.pyc 58 | 59 | # Solr indices: 60 | index/ 61 | *.sqlite 62 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Consider Contributing OpenSextant 2 | ================================= 3 | 4 | OpenSextant is our umbrella project, not a particular module. 5 | The contributors are loosely affiliated, but largely volunteers. 6 | 7 | * If you have ideas it is best to contact the author, here for Xponents == Marc Ubaldino, mubaldino@gmail.com 8 | * If you have specific requests or bug fixes related to current APIs, please file an issue at https://github.com/OpenSextant/Xponents/issues 9 | 10 | This is a research prototype that is consistently funded, but please have some patience and willingness to do some of the work. 11 | Collaborators that have a sincere interest may be considered to join the group. 12 | 13 | -------------------------------------------------------------------------------- /Examples/.gitignore: -------------------------------------------------------------------------------- 1 | #Misc: 2 | .DS_Store 3 | release 4 | 5 | #Maven output 6 | target/ 7 | # TODO can we remove this?: 8 | build/ 9 | bin/ 10 | 11 | #Build files 12 | #Eclipse 13 | .classpath 14 | .project 15 | .settings/ 16 | 17 | #IntelliJ 18 | /out/ 19 | *.iml 20 | 21 | /*.ipr 22 | /*.iws 23 | /.idea/ 24 | 25 | #Netbeans 26 | nb-configuration.xml 27 | nbactions*.xml 28 | nbproject/ 29 | 30 | #If files with these extensions are needed, they can be added manually. 31 | *.jar 32 | *.war 33 | *.zip 34 | -------------------------------------------------------------------------------- /Examples/Docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM eclipse-temurin:18-jdk 2 | 3 | ENV XLAYER_PORT=8787 4 | ENV RELEASE_NAME=Xponents-3.5 5 | ENV JAVA_XMS=3500m 6 | ENV JAVA_XMX=3500m 7 | ENV XPONENTS=/home/opensextant/Xponents 8 | 9 | RUN apt-get update && apt-get upgrade -y 10 | RUN adduser opensextant --home /home/opensextant --disabled-password 11 | USER opensextant 12 | 13 | # -------------------------- 14 | # Copy ./Xponents-3.x/ to target $XPONENTS/ 15 | # 16 | COPY --chown=opensextant:opensextant . $XPONENTS/ 17 | 18 | 19 | WORKDIR $XPONENTS 20 | EXPOSE $XLAYER_PORT 7000 21 | 22 | ENTRYPOINT ./script/xlayer-docker.sh $XLAYER_PORT 23 | 24 | -------------------------------------------------------------------------------- /Examples/Docker/Dockerfile.offline: -------------------------------------------------------------------------------- 1 | FROM eclipse-temurin:18-jdk 2 | # SHADOWS: FROM mubaldino/opensextant:xponents-3.5 3 | # 4 | # -------------------------- 5 | ENV XLAYER_PORT=8787 6 | ENV VERSION=Xponents-3.5 7 | ENV XPONENTS=/home/opensextant/Xponents 8 | ENV JAVA_XMS=3500m 9 | ENV JAVA_XMX=3500m 10 | ENV MVN_VER=3.8.5 11 | ENV MVN=apache-maven-$MVN_VER 12 | ENV PATH=/home/opensextant/maven/bin:$PATH 13 | ENV LOCAL_REPO_NAME=maven-repo 14 | # NOTE LOCAL_REPO is a absolute path 15 | ENV LOCAL_REPO=$XPONENTS/$LOCAL_REPO_NAME 16 | 17 | RUN apt-get update && apt-get upgrade -y 18 | RUN adduser opensextant --home /home/opensextant --disabled-password 19 | USER opensextant 20 | WORKDIR /home/opensextant 21 | 22 | # -------------------------- 23 | # Copy ./Xponents-3.x/ to target 24 | # 25 | COPY --chown=opensextant:opensextant ./ $XPONENTS/ 26 | 27 | # OFFLINE 28 | #--------------------------- 29 | # Install Maven to allow for offline recompilation 30 | RUN curl -O https://dlcdn.apache.org/maven/maven-3/$MVN_VER/binaries/$MVN-bin.tar.gz && \ 31 | tar xzf ./$MVN-bin.tar.gz && \ 32 | mv $MVN ./maven 33 | 34 | WORKDIR $XPONENTS 35 | 36 | # Stage offline mode 37 | RUN mvn dependency:go-offline -Dmaven.repo.local=$LOCAL_REPO 38 | RUN find $LOCAL_REPO -name "*.sha1" -exec rm {} \; 39 | RUN find $LOCAL_REPO -name "*.repositories" -exec rm {} \; 40 | 41 | # Build inside Docker to confirm "offline mode" works 42 | # 1. Build fully as a test to show project is buildable inside container. 43 | RUN cd ./Core && mvn -o install -Dmaven.repo.local=$LOCAL_REPO 44 | RUN mvn -o -Dopensextant.solr=./xponents-solr/solr7 \ 45 | -Dmaven.repo.local=$LOCAL_REPO clean package dependency:copy-dependencies 46 | 47 | # 1a. Log4J cleanup 48 | RUN for log4jdir in `find $LOCAL_REPO -type d | grep log4j | grep "2.11"`; do rm -rf $log4jdir; done 49 | 50 | 51 | # 2. Copy built items and metadata from install to final runtime CLASSPATH ./lib 52 | RUN cp ./target/*jar ./lib/ 53 | 54 | #--------------------------- 55 | EXPOSE $XLAYER_PORT 7000 56 | ENTRYPOINT ./script/xlayer-docker.sh $XLAYER_PORT 57 | -------------------------------------------------------------------------------- /Examples/Docker/Sonarqube/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | sonarqube: 5 | image: sonarqube:9-community 6 | depends_on: 7 | - db 8 | environment: 9 | SONAR_JDBC_URL: jdbc:postgresql://db:5432/sonar 10 | SONAR_JDBC_USERNAME: sonar 11 | SONAR_JDBC_PASSWORD: sonar 12 | volumes: 13 | - sonarqube9_data:/opt/sonarqube/data 14 | - sonarqube9_extensions:/opt/sonarqube/extensions 15 | - sonarqube9_logs:/opt/sonarqube/logs 16 | - sonarqube9_temp:/opt/sonarqube/temp 17 | ports: 18 | # NOTE: starting to see modern net proxies using port 9000; remap as needed. 19 | - "9900:9000" 20 | db: 21 | image: postgres:13 22 | environment: 23 | POSTGRES_USER: sonar 24 | POSTGRES_PASSWORD: sonar 25 | volumes: 26 | - postgresql_13:/var/lib/postgresql 27 | - postgresql_13_data:/var/lib/postgresql/data 28 | 29 | volumes: 30 | sonarqube9_data: 31 | sonarqube9_extensions: 32 | sonarqube9_logs: 33 | sonarqube9_temp: 34 | postgresql_13: 35 | postgresql_13_data: 36 | -------------------------------------------------------------------------------- /Examples/Docker/Sonarqube/my-sonar.sh: -------------------------------------------------------------------------------- 1 | 2 | PORT=9900 3 | echo Using token '$SONAR_TOKEN' 4 | 5 | pushd ./Xponents/Core 6 | mvn sonar:sonar \ 7 | -Dsonar.sourceEncoding=UTF-8 \ 8 | -Dsonar.projectKey=opensextant-xponents-core \ 9 | -Dsonar.host.url=http://localhost:$PORT \ 10 | -Dsonar.login=$SONAR_TOKEN \ 11 | -Dsonar.inclusions="**/*.java" 12 | 13 | popd 14 | pushd ./Xponents 15 | mvn sonar:sonar \ 16 | -Dsonar.sourceEncoding=UTF-8 \ 17 | -Dsonar.projectKey=opensextant-xponents \ 18 | -Dsonar.host.url=http://localhost:$PORT \ 19 | -Dsonar.login=$SONAR_TOKEN \ 20 | -Dsonar.inclusions="**/*.java" 21 | 22 | popd 23 | -------------------------------------------------------------------------------- /Examples/Docker/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | xponents: 4 | container_name: xponents 5 | image: mubaldino/opensextant:xponents-3.5 6 | environment: 7 | - JAVA_XMS=4g 8 | - JAVA_XMX=4g 9 | ports: 10 | - "8787:8787" 11 | 12 | gazetteer: 13 | container_name: gazetteer 14 | image: mubaldino/opensextant:xponents-3.5 15 | entrypoint: ./xponents-solr/solr7-dist/bin/solr start -p 7000 -s ./xponents-solr/solr7 -m 3g -q -foreground -force 16 | ports: 17 | - "7000:7000" 18 | 19 | networks: 20 | default: 21 | 22 | 23 | -------------------------------------------------------------------------------- /Examples/Docker/dockerignore: -------------------------------------------------------------------------------- 1 | piplib/ 2 | .idea/ 3 | .git/ 4 | target/ 5 | Core/ 6 | src/ 7 | doc/ 8 | Examples/target/ 9 | Examples/lib/ 10 | maven-repo/ 11 | 12 | 13 | -------------------------------------------------------------------------------- /Examples/Docker/dockerignore.offline: -------------------------------------------------------------------------------- 1 | piplib/ 2 | .idea/ 3 | 4 | -------------------------------------------------------------------------------- /Examples/Docker/settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | org.sonarsource.scanner.maven 6 | 7 | 8 | 9 | sonar 10 | 11 | true 12 | 13 | 14 | 15 | http://localhost:9000 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /Examples/MapReduce/.gitignore: -------------------------------------------------------------------------------- 1 | dependency-reduced-pom.xml 2 | -------------------------------------------------------------------------------- /Examples/MapReduce/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | MR=`dirname $0` 3 | MR=`cd -P $MR; echo $PWD` 4 | 5 | 6 | 7 | # Build project 8 | echo "Building Project" 9 | mvn install 10 | cp target/xponents-mapreduce-0.1.jar xponents-mapreduce.jar 11 | 12 | echo "Packaging JARs for Solr, Xponents, Gazetteer resources..." 13 | # Collect LIBJARs 14 | # Pay special attention to JARs required to run geotaggers. 15 | # ---------------------------------- 16 | mkdir -p $MR/libjars 17 | rm $MR/libjars/* 18 | 19 | # RUNTIME JARS: JTS, Spatial4J, Logging 20 | cp $MR/../solr/lib/ext/*jar $MR/libjars 21 | 22 | # Primary dependencies come from Xponents Extraction POM 23 | # Xponents support JARS; Get current Xponents JARS as well as all dependencies. 24 | cp $MR/../solr/solr4/lib/*jar $MR/libjars 25 | cd $MR/../Extraction 26 | rm ./lib/* 27 | mvn dependency:copy-dependencies 28 | cp lib/*jar $MR/libjars 29 | 30 | # Collect JAR for Gazetteer metadata and filters. 31 | cd $MR/../solr 32 | ant proxy gaz-meta 33 | cp ./solr4/lib/xponents-gazetteer-meta.jar $MR/libjars/ 34 | 35 | cd $MR 36 | # Conflict with Solr servlet API: 37 | rm libjars/javax.servlet-api-3.0.1.jar 38 | # GISCore not used; It supports formatting output and since we output only JSON, its not needed here. 39 | rm libjars/giscore*jar 40 | # Logback for now interferes with choice of Logging package 41 | rm libjars/logback*jar 42 | 43 | mvn dependency:copy-dependencies 44 | for LIB in json-lib ezmorph commons-beanutils; do 45 | cp target/dependency/$LIB*.jar ./libjars/ 46 | done 47 | # ---------------------------------- 48 | 49 | echo "Zipping Final Distribution, in ./dist" 50 | 51 | mkdir -p ./dist 52 | rm -rf ./dist/* 53 | 54 | DATE=`date +%Y%m%d` 55 | zip -r dist/xponents-mr-v$DATE.zip libjars script log* xponents*jar 56 | 57 | -------------------------------------------------------------------------------- /Examples/MapReduce/log4j.properties: -------------------------------------------------------------------------------- 1 | # Sample properties to initialise log4j 2 | log4j.rootLogger=ERROR, STDOUT 3 | 4 | # Appender config for STDOUT 5 | log4j.appender.STDOUT=org.apache.log4j.ConsoleAppender 6 | log4j.appender.STDOUT.Threshold=ERROR 7 | log4j.appender.STDOUT.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.STDOUT.layout.ConversionPattern=%5p - %m%n 9 | 10 | log4j.org.apache.solr=ERROR 11 | log4j.org.apache.solr.client.solrj.impl.HttpClientUtil=ERROR 12 | log4j.org.apache.solr.client.solrj=ERROR 13 | 14 | -------------------------------------------------------------------------------- /Examples/MapReduce/log4jsupplemental.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /Examples/MapReduce/src/main/java/org/opensextant/mapreduce/Log4JUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This software was produced for the U. S. Government 3 | * under Basic Contract No. W15P7T-13-C-A802, and is 4 | * subject to the Rights in Noncommercial Computer Software 5 | * and Noncommercial Computer Software Documentation 6 | * Clause 252.227-7014 (FEB 2012) 7 | * 8 | * Copyright (C) 2016 The MITRE Corporation. 9 | * Copyright (C) 2016 OpenSextant.org 10 | * 11 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 12 | * use this file except in compliance with the License. You may obtain a copy of 13 | * the License at 14 | * 15 | * http://www.apache.org/licenses/LICENSE-2.0 16 | * 17 | * Unless required by applicable law or agreed to in writing, software 18 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 19 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 20 | * License for the specific language governing permissions and limitations under 21 | * the License. 22 | */ 23 | package org.opensextant.mapreduce; 24 | 25 | import org.apache.log4j.xml.DOMConfigurator; 26 | 27 | import javax.xml.parsers.FactoryConfigurationError; 28 | import java.net.URL; 29 | 30 | /** 31 | * Configures Log4J logging. Only supports XML configuration because that configuration mode can be processed without 32 | * resetting the whole logging environment. 33 | *
34 | * This is in a separate class to insulate the caller against classpath errors if the host 35 | * environment doesn't include Log4J. 36 | */ 37 | public class Log4JUtils { 38 | public static void reconfigureLogging(URL log4JXMLConfigurationFile) throws FactoryConfigurationError { 39 | DOMConfigurator.configure(log4JXMLConfigurationFile); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /Examples/etc/tika-config.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /Examples/script/xponents-demo.bat: -------------------------------------------------------------------------------- 1 | set LANG=en_US 2 | 3 | echo "Usage -- run as .\script\xponents-demo.bat" 4 | @echo off 5 | 6 | REM Find current path to install 7 | set scriptdir=%~dp0 8 | set scriptdir=%scriptdir:~0,-1% 9 | set basedir=%scriptdir%\.. 10 | set logconf=%scriptdir:\=/% 11 | set XP=%basedir% 12 | 13 | set SOLR_HOME=%XP%\xponents-solr\solr7 14 | 15 | logging_args="-Dlogback.configurationFile=%basedir%\etc\logback.xml" 16 | tika_args="-Dtika.config=%basedir%\etc\tika-config.xml" 17 | xponents_args="-Dopensextant.solr=%SOLR_HOME% -Xmx1500m -Xms1500m" 18 | 19 | java %xponents_args% %tika_args% %logging_args% -cp "%XP%\etc;%XP%\lib\*" ^ 20 | org.codehaus.groovy.tools.GroovyStarter --main groovy.ui.GroovyMain ^ 21 | %XP%\script\Xponents.groovy %* 22 | 23 | pause 24 | -------------------------------------------------------------------------------- /Examples/script/xponents-demo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | scripts=`dirname $0` 4 | XP=`cd -P $scripts/..; echo $PWD` 5 | 6 | export CLASSPATH=$XP/etc:$XP/lib/* 7 | 8 | SOLR_HOME=$XP/xponents-solr/solr7 9 | if [ -d $XP/solr/ ] ; then 10 | # Use dir in development source tree. 11 | SOLR_HOME=$XP/solr/solr7 12 | fi 13 | 14 | proxy_args= 15 | if [ -n "$http_proxy" -o -n "$https_proxy" ]; then 16 | proxyHost=`echo $http_proxy | sed -e "s:http\://::g;" | sed -e "s:\:.$::;"` 17 | proxy_args="-Dhttp.proxyHost=${proxyHost} -Dhttp.proxyPort=80" 18 | proxy_args="-Dhttps.proxyHost=${proxyHost} -Dhttps.proxyPort=80 $PROXY" 19 | fi 20 | 21 | xponents_args=" -Dopensextant.solr=$SOLR_HOME -Xmx1500m -Xms1500m " 22 | logging_args="-Dlogback.configurationFile=$XP/etc/logback.xml" 23 | tika_args="-Dtika.config=$XP/etc/tika-config.xml" 24 | 25 | java $xponents_args $logging_args $tika_args $proxy_args -cp $CLASSPATH \ 26 | org.codehaus.groovy.tools.GroovyStarter --main groovy.ui.GroovyMain \ 27 | $XP/script/Xponents.groovy "$@" 28 | -------------------------------------------------------------------------------- /Examples/src/main/java/org/opensextant/examples/ExampleMain.java: -------------------------------------------------------------------------------- 1 | package org.opensextant.examples; 2 | 3 | public abstract class ExampleMain { 4 | 5 | public static void print(String msg) { 6 | System.out.println(msg); 7 | } 8 | 9 | public static void print(String msg, Object... args) { 10 | System.out.println(String.format(msg, args)); 11 | } 12 | 13 | public static void error(Exception err, String... args) { 14 | System.out.println("ERROR " + err.getMessage()); 15 | System.out.println(args); 16 | 17 | System.err.println("ERROR " + err.getMessage()); 18 | err.printStackTrace(System.err); 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /Examples/src/test/resources/exclusions/person-name-filter.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/Examples/src/test/resources/exclusions/person-name-filter.txt -------------------------------------------------------------------------------- /Examples/src/test/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | %d{yyyy-MM-dd'T'HH:mm:ss.SSS} %-5level %logger{36} - %msg%n 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | 2 | 3 | Copyright 2013-2023 MITRE Corporation. All Rights Reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | 17 | See also ./LICENSE or http://www.apache.org/licenses for the full Apache License 18 | 19 | 20 | * ************************************************************************** 21 | * NOTICE 22 | * This software was produced for the U. S. Government under Contract No. 23 | * W15P7T-12-C-F600, and is subject to the Rights in Noncommercial Computer 24 | * Software and Noncommercial Computer Software Documentation Clause 25 | * 252.227-7014 (JUN 1995) 26 | * 27 | * ************************************************************************** 28 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | #remote_theme: mmistakes/minimal-mistakes-jekyll 2 | theme: jekyll-theme-modernist 3 | #theme: jekyll-theme-midnight 4 | -------------------------------------------------------------------------------- /build.properties: -------------------------------------------------------------------------------- 1 | # Please copy this to "build.properties" which will provide settings for your workspace. 2 | # 3 | # General version of SDK: 4 | xponents.version=3.7 5 | 6 | # 'solr.home' is relative to ./solr/build.xml Ant script. 7 | solr.home=solr7 8 | solr.server.port=7000 9 | -------------------------------------------------------------------------------- /dev.env: -------------------------------------------------------------------------------- 1 | export XPONENTS=$PWD 2 | export PYTHONPATH=$XPONENTS/../piplib:$XPONENTS/solr/script 3 | export LOG4J_FORMAT_MSG_NO_LOOKUPS=true 4 | -------------------------------------------------------------------------------- /doc/3rd-party.md: -------------------------------------------------------------------------------- 1 | Third-Party Liceneses 2 | ====================== 3 | 4 | Apache Solr 5 | --------------------- 6 | * FILE: `solr/solr7-dist` or `xponents-solr/solr7-dist` 7 | * LICENSE, NOTICE, etc, are contained therein 8 | 9 | 10 | CyboZu/Narconex Language-Detection ("LANGDETECT") 11 | ----------------------- 12 | * Xponents Runtime: `./etc/langdetect/` 13 | * FILE: `./etc/langdetect-profiles-v3.zip` 14 | * SOURCE: https://code.google.com/archive/p/language-detection/ 15 | * CITATION: 16 | 17 | ```json 18 | @misc{nakatani2010langdetect, title = {Language Detection Library for Java}, 19 | author = {Shuyo, Nakatani}, url = {http://code.google.com/p/language-detection/}, 20 | year = {2010} } 21 | ``` 22 | 23 | Copyrights and License 24 | 25 | Copyright (c) 2010-2014 Cybozu Labs, Inc. All rights reserved. 26 | 27 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use 28 | this file except in compliance with the License. You may obtain a copy of the License at 29 | 30 | http://www.apache.org/licenses/LICENSE-2.0 31 | 32 | Unless required by applicable law or agreed to in writing, software distributed under 33 | the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 34 | either express or implied. See the License for the specific language governing permissions 35 | and limitations under the License. 36 | -------------------------------------------------------------------------------- /doc/LuceneRevolution17-Xponents,14Sept2017.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/LuceneRevolution17-Xponents,14Sept2017.pdf -------------------------------------------------------------------------------- /doc/LuceneRevolution17-Xponents.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/LuceneRevolution17-Xponents.mp4 -------------------------------------------------------------------------------- /doc/core-apidocs/doc-files/opensextant-manual-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/doc-files/opensextant-manual-logo.png -------------------------------------------------------------------------------- /doc/core-apidocs/element-list: -------------------------------------------------------------------------------- 1 | org.opensextant 2 | org.opensextant.annotations 3 | org.opensextant.data 4 | org.opensextant.data.social 5 | org.opensextant.extraction 6 | org.opensextant.extractors.flexpat 7 | org.opensextant.extractors.langid 8 | org.opensextant.extractors.poli 9 | org.opensextant.extractors.poli.data 10 | org.opensextant.extractors.xcoord 11 | org.opensextant.extractors.xtemporal 12 | org.opensextant.output 13 | org.opensextant.processing 14 | org.opensextant.util 15 | -------------------------------------------------------------------------------- /doc/core-apidocs/jquery-ui.overrides.css: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 | * 5 | * This code is free software; you can redistribute it and/or modify it 6 | * under the terms of the GNU General Public License version 2 only, as 7 | * published by the Free Software Foundation. Oracle designates this 8 | * particular file as subject to the "Classpath" exception as provided 9 | * by Oracle in the LICENSE file that accompanied this code. 10 | * 11 | * This code is distributed in the hope that it will be useful, but WITHOUT 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 | * version 2 for more details (a copy is included in the LICENSE file that 15 | * accompanied this code). 16 | * 17 | * You should have received a copy of the GNU General Public License version 18 | * 2 along with this work; if not, write to the Free Software Foundation, 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 | * 21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 | * or visit www.oracle.com if you need additional information or have any 23 | * questions. 24 | */ 25 | 26 | .ui-state-active, 27 | .ui-widget-content .ui-state-active, 28 | .ui-widget-header .ui-state-active, 29 | a.ui-button:active, 30 | .ui-button:active, 31 | .ui-button.ui-state-active:hover { 32 | /* Overrides the color of selection used in jQuery UI */ 33 | background: #F8981D; 34 | border: 1px solid #F8981D; 35 | } 36 | -------------------------------------------------------------------------------- /doc/core-apidocs/jquery/images/ui-bg_glass_55_fbf9ee_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/jquery/images/ui-bg_glass_55_fbf9ee_1x400.png -------------------------------------------------------------------------------- /doc/core-apidocs/jquery/images/ui-bg_glass_65_dadada_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/jquery/images/ui-bg_glass_65_dadada_1x400.png -------------------------------------------------------------------------------- /doc/core-apidocs/jquery/images/ui-bg_glass_75_dadada_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/jquery/images/ui-bg_glass_75_dadada_1x400.png -------------------------------------------------------------------------------- /doc/core-apidocs/jquery/images/ui-bg_glass_75_e6e6e6_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/jquery/images/ui-bg_glass_75_e6e6e6_1x400.png -------------------------------------------------------------------------------- /doc/core-apidocs/jquery/images/ui-bg_glass_95_fef1ec_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/jquery/images/ui-bg_glass_95_fef1ec_1x400.png -------------------------------------------------------------------------------- /doc/core-apidocs/jquery/images/ui-bg_highlight-soft_75_cccccc_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/jquery/images/ui-bg_highlight-soft_75_cccccc_1x100.png -------------------------------------------------------------------------------- /doc/core-apidocs/jquery/images/ui-icons_222222_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/jquery/images/ui-icons_222222_256x240.png -------------------------------------------------------------------------------- /doc/core-apidocs/jquery/images/ui-icons_2e83ff_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/jquery/images/ui-icons_2e83ff_256x240.png -------------------------------------------------------------------------------- /doc/core-apidocs/jquery/images/ui-icons_454545_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/jquery/images/ui-icons_454545_256x240.png -------------------------------------------------------------------------------- /doc/core-apidocs/jquery/images/ui-icons_888888_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/jquery/images/ui-icons_888888_256x240.png -------------------------------------------------------------------------------- /doc/core-apidocs/jquery/images/ui-icons_cd0a0a_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/jquery/images/ui-icons_cd0a0a_256x240.png -------------------------------------------------------------------------------- /doc/core-apidocs/jquery/jquery-ui.structure.min.css: -------------------------------------------------------------------------------- 1 | /*! jQuery UI - v1.12.1 - 2018-12-06 2 | * http://jqueryui.com 3 | * Copyright jQuery Foundation and other contributors; Licensed MIT */ 4 | 5 | .ui-helper-hidden{display:none}.ui-helper-hidden-accessible{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.ui-helper-reset{margin:0;padding:0;border:0;outline:0;line-height:1.3;text-decoration:none;font-size:100%;list-style:none}.ui-helper-clearfix:before,.ui-helper-clearfix:after{content:"";display:table;border-collapse:collapse}.ui-helper-clearfix:after{clear:both}.ui-helper-zfix{width:100%;height:100%;top:0;left:0;position:absolute;opacity:0;filter:Alpha(Opacity=0)}.ui-front{z-index:100}.ui-state-disabled{cursor:default!important;pointer-events:none}.ui-icon{display:inline-block;vertical-align:middle;margin-top:-.25em;position:relative;text-indent:-99999px;overflow:hidden;background-repeat:no-repeat}.ui-widget-icon-block{left:50%;margin-left:-8px;display:block}.ui-widget-overlay{position:fixed;top:0;left:0;width:100%;height:100%}.ui-autocomplete{position:absolute;top:0;left:0;cursor:default}.ui-menu{list-style:none;padding:0;margin:0;display:block;outline:0}.ui-menu .ui-menu{position:absolute}.ui-menu .ui-menu-item{margin:0;cursor:pointer;list-style-image:url("data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7")}.ui-menu .ui-menu-item-wrapper{position:relative;padding:3px 1em 3px .4em}.ui-menu .ui-menu-divider{margin:5px 0;height:0;font-size:0;line-height:0;border-width:1px 0 0 0}.ui-menu .ui-state-focus,.ui-menu .ui-state-active{margin:-1px}.ui-menu-icons{position:relative}.ui-menu-icons .ui-menu-item-wrapper{padding-left:2em}.ui-menu .ui-icon{position:absolute;top:0;bottom:0;left:.2em;margin:auto 0}.ui-menu .ui-menu-icon{left:auto;right:0} -------------------------------------------------------------------------------- /doc/core-apidocs/legal/ASSEMBLY_EXCEPTION: -------------------------------------------------------------------------------- 1 | 2 | OPENJDK ASSEMBLY EXCEPTION 3 | 4 | The OpenJDK source code made available by Oracle America, Inc. (Oracle) at 5 | openjdk.java.net ("OpenJDK Code") is distributed under the terms of the GNU 6 | General Public License version 2 7 | only ("GPL2"), with the following clarification and special exception. 8 | 9 | Linking this OpenJDK Code statically or dynamically with other code 10 | is making a combined work based on this library. Thus, the terms 11 | and conditions of GPL2 cover the whole combination. 12 | 13 | As a special exception, Oracle gives you permission to link this 14 | OpenJDK Code with certain code licensed by Oracle as indicated at 15 | http://openjdk.java.net/legal/exception-modules-2007-05-08.html 16 | ("Designated Exception Modules") to produce an executable, 17 | regardless of the license terms of the Designated Exception Modules, 18 | and to copy and distribute the resulting executable under GPL2, 19 | provided that the Designated Exception Modules continue to be 20 | governed by the licenses under which they were offered by Oracle. 21 | 22 | As such, it allows licensees and sublicensees of Oracle's GPL2 OpenJDK Code 23 | to build an executable that includes those portions of necessary code that 24 | Oracle could not provide under GPL2 (or that Oracle has provided under GPL2 25 | with the Classpath exception). If you modify or add to the OpenJDK code, 26 | that new GPL2 code may still be combined with Designated Exception Modules 27 | if the new code is made subject to this exception by its copyright holder. 28 | -------------------------------------------------------------------------------- /doc/core-apidocs/legal/jqueryUI.md: -------------------------------------------------------------------------------- 1 | ## jQuery UI v1.12.1 2 | 3 | ### jQuery UI License 4 | ``` 5 | Copyright jQuery Foundation and other contributors, https://jquery.org/ 6 | 7 | This software consists of voluntary contributions made by many 8 | individuals. For exact contribution history, see the revision history 9 | available at https://github.com/jquery/jquery-ui 10 | 11 | The following license applies to all parts of this software except as 12 | documented below: 13 | 14 | ==== 15 | 16 | Permission is hereby granted, free of charge, to any person obtaining 17 | a copy of this software and associated documentation files (the 18 | "Software"), to deal in the Software without restriction, including 19 | without limitation the rights to use, copy, modify, merge, publish, 20 | distribute, sublicense, and/or sell copies of the Software, and to 21 | permit persons to whom the Software is furnished to do so, subject to 22 | the following conditions: 23 | 24 | The above copyright notice and this permission notice shall be 25 | included in all copies or substantial portions of the Software. 26 | 27 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 28 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 29 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 30 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 31 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 32 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 33 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 34 | 35 | ==== 36 | 37 | Copyright and related rights for sample code are waived via CC0. Sample 38 | code is defined as all source code contained within the demos directory. 39 | 40 | CC0: http://creativecommons.org/publicdomain/zero/1.0/ 41 | 42 | ==== 43 | 44 | All files located in the node_modules and external directories are 45 | externally maintained libraries used by this software which have their 46 | own licenses; we recommend you read them, as their terms may differ from 47 | the terms above. 48 | 49 | ``` 50 | -------------------------------------------------------------------------------- /doc/core-apidocs/module-search-index.js: -------------------------------------------------------------------------------- 1 | moduleSearchIndex = [];updateSearchResults(); -------------------------------------------------------------------------------- /doc/core-apidocs/org/opensextant/annotations/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | org.opensextant.annotations (Xponents Core API) 8 | 9 | 10 | 11 | 12 | 13 |

org.opensextant.annotations

14 |
15 |

Interfaces

16 | 19 |

Classes

20 | 26 |

Exceptions

27 | 30 |
31 | 32 | 33 | -------------------------------------------------------------------------------- /doc/core-apidocs/org/opensextant/data/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | org.opensextant.data (Xponents Core API) 8 | 9 | 10 | 11 | 12 | 13 |

org.opensextant.data

14 |
15 |

Interfaces

16 | 20 |

Classes

21 | 31 |
32 | 33 | 34 | -------------------------------------------------------------------------------- /doc/core-apidocs/org/opensextant/data/social/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | org.opensextant.data.social (Xponents Core API) 8 | 9 | 10 | 11 | 12 | 13 |

org.opensextant.data.social

14 |
15 |

Interfaces

16 | 19 |

Classes

20 | 26 |

Exceptions

27 | 30 |
31 | 32 | 33 | -------------------------------------------------------------------------------- /doc/core-apidocs/org/opensextant/extractors/flexpat/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | org.opensextant.extractors.flexpat (Xponents Core API) 8 | 9 | 10 | 11 | 12 | 13 |

org.opensextant.extractors.flexpat

14 |
15 |

Classes

16 | 23 |
24 | 25 | 26 | -------------------------------------------------------------------------------- /doc/core-apidocs/org/opensextant/extractors/langid/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | org.opensextant.extractors.langid (Xponents Core API) 8 | 9 | 10 | 11 | 12 | 13 |

org.opensextant.extractors.langid

14 |
15 |

Classes

16 | 20 |
21 | 22 | 23 | -------------------------------------------------------------------------------- /doc/core-apidocs/org/opensextant/extractors/poli/data/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | org.opensextant.extractors.poli.data (Xponents Core API) 8 | 9 | 10 | 11 | 12 | 13 |

org.opensextant.extractors.poli.data

14 |
15 |

Classes

16 | 22 |
23 | 24 | 25 | -------------------------------------------------------------------------------- /doc/core-apidocs/org/opensextant/extractors/poli/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | org.opensextant.extractors.poli (Xponents Core API) 8 | 9 | 10 | 11 | 12 | 13 |

org.opensextant.extractors.poli

14 |
15 |

Classes

16 | 22 |
23 | 24 | 25 | -------------------------------------------------------------------------------- /doc/core-apidocs/org/opensextant/extractors/xtemporal/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | org.opensextant.extractors.xtemporal (Xponents Core API) 8 | 9 | 10 | 11 | 12 | 13 |

org.opensextant.extractors.xtemporal

14 |
15 |

Classes

16 | 25 |

Enums

26 | 29 |
30 | 31 | 32 | -------------------------------------------------------------------------------- /doc/core-apidocs/org/opensextant/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | org.opensextant (Xponents Core API) 8 | 9 | 10 | 11 | 12 | 13 |

org.opensextant

14 |
15 |

Exceptions

16 | 19 |
20 | 21 | 22 | -------------------------------------------------------------------------------- /doc/core-apidocs/org/opensextant/processing/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | org.opensextant.processing (Xponents Core API) 8 | 9 | 10 | 11 | 12 | 13 |

org.opensextant.processing

14 |
15 |

Classes

16 | 22 |

Exceptions

23 | 26 |
27 | 28 | 29 | -------------------------------------------------------------------------------- /doc/core-apidocs/org/opensextant/util/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | org.opensextant.util (Xponents Core API) 8 | 9 | 10 | 11 | 12 | 13 |

org.opensextant.util

14 |
15 |

Classes

16 | 23 |
24 | 25 | 26 | -------------------------------------------------------------------------------- /doc/core-apidocs/overview-summary.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Xponents Core API 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 17 | 18 | 19 |
20 | 23 |

index.html

24 |
25 | 26 | 27 | -------------------------------------------------------------------------------- /doc/core-apidocs/package-list: -------------------------------------------------------------------------------- 1 | org.opensextant 2 | org.opensextant.annotations 3 | org.opensextant.data 4 | org.opensextant.data.social 5 | org.opensextant.extraction 6 | org.opensextant.extractors.flexpat 7 | org.opensextant.extractors.langid 8 | org.opensextant.extractors.poli 9 | org.opensextant.extractors.poli.data 10 | org.opensextant.extractors.xcoord 11 | org.opensextant.extractors.xtemporal 12 | org.opensextant.output 13 | org.opensextant.processing 14 | org.opensextant.processing.progress 15 | org.opensextant.util 16 | -------------------------------------------------------------------------------- /doc/core-apidocs/package-search-index.js: -------------------------------------------------------------------------------- 1 | packageSearchIndex = [{"l":"All Packages","u":"allpackages-index.html"},{"l":"org.opensextant"},{"l":"org.opensextant.annotations"},{"l":"org.opensextant.data"},{"l":"org.opensextant.data.social"},{"l":"org.opensextant.extraction"},{"l":"org.opensextant.extractors.flexpat"},{"l":"org.opensextant.extractors.langid"},{"l":"org.opensextant.extractors.poli"},{"l":"org.opensextant.extractors.poli.data"},{"l":"org.opensextant.extractors.xcoord"},{"l":"org.opensextant.extractors.xtemporal"},{"l":"org.opensextant.output"},{"l":"org.opensextant.processing"},{"l":"org.opensextant.util"}];updateSearchResults(); -------------------------------------------------------------------------------- /doc/core-apidocs/resources/glass.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/resources/glass.png -------------------------------------------------------------------------------- /doc/core-apidocs/resources/x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/resources/x.png -------------------------------------------------------------------------------- /doc/core-apidocs/script-dir/images/ui-bg_glass_55_fbf9ee_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/script-dir/images/ui-bg_glass_55_fbf9ee_1x400.png -------------------------------------------------------------------------------- /doc/core-apidocs/script-dir/images/ui-bg_glass_65_dadada_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/script-dir/images/ui-bg_glass_65_dadada_1x400.png -------------------------------------------------------------------------------- /doc/core-apidocs/script-dir/images/ui-bg_glass_75_dadada_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/script-dir/images/ui-bg_glass_75_dadada_1x400.png -------------------------------------------------------------------------------- /doc/core-apidocs/script-dir/images/ui-bg_glass_75_e6e6e6_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/script-dir/images/ui-bg_glass_75_e6e6e6_1x400.png -------------------------------------------------------------------------------- /doc/core-apidocs/script-dir/images/ui-bg_glass_95_fef1ec_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/script-dir/images/ui-bg_glass_95_fef1ec_1x400.png -------------------------------------------------------------------------------- /doc/core-apidocs/script-dir/images/ui-bg_highlight-soft_75_cccccc_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/script-dir/images/ui-bg_highlight-soft_75_cccccc_1x100.png -------------------------------------------------------------------------------- /doc/core-apidocs/script-dir/images/ui-icons_222222_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/script-dir/images/ui-icons_222222_256x240.png -------------------------------------------------------------------------------- /doc/core-apidocs/script-dir/images/ui-icons_2e83ff_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/script-dir/images/ui-icons_2e83ff_256x240.png -------------------------------------------------------------------------------- /doc/core-apidocs/script-dir/images/ui-icons_454545_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/script-dir/images/ui-icons_454545_256x240.png -------------------------------------------------------------------------------- /doc/core-apidocs/script-dir/images/ui-icons_888888_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/script-dir/images/ui-icons_888888_256x240.png -------------------------------------------------------------------------------- /doc/core-apidocs/script-dir/images/ui-icons_cd0a0a_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/core-apidocs/script-dir/images/ui-icons_cd0a0a_256x240.png -------------------------------------------------------------------------------- /doc/core-apidocs/script-dir/jquery-ui.min.css: -------------------------------------------------------------------------------- 1 | /*! jQuery UI - v1.13.1 - 2022-05-12 2 | * http://jqueryui.com 3 | * Includes: core.css, autocomplete.css, menu.css 4 | * Copyright jQuery Foundation and other contributors; Licensed MIT */ 5 | 6 | .ui-helper-hidden{display:none}.ui-helper-hidden-accessible{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.ui-helper-reset{margin:0;padding:0;border:0;outline:0;line-height:1.3;text-decoration:none;font-size:100%;list-style:none}.ui-helper-clearfix:before,.ui-helper-clearfix:after{content:"";display:table;border-collapse:collapse}.ui-helper-clearfix:after{clear:both}.ui-helper-zfix{width:100%;height:100%;top:0;left:0;position:absolute;opacity:0;-ms-filter:"alpha(opacity=0)"}.ui-front{z-index:100}.ui-state-disabled{cursor:default!important;pointer-events:none}.ui-icon{display:inline-block;vertical-align:middle;margin-top:-.25em;position:relative;text-indent:-99999px;overflow:hidden;background-repeat:no-repeat}.ui-widget-icon-block{left:50%;margin-left:-8px;display:block}.ui-widget-overlay{position:fixed;top:0;left:0;width:100%;height:100%}.ui-autocomplete{position:absolute;top:0;left:0;cursor:default}.ui-menu{list-style:none;padding:0;margin:0;display:block;outline:0}.ui-menu .ui-menu{position:absolute}.ui-menu .ui-menu-item{margin:0;cursor:pointer;list-style-image:url("data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7")}.ui-menu .ui-menu-item-wrapper{position:relative;padding:3px 1em 3px .4em}.ui-menu .ui-menu-divider{margin:5px 0;height:0;font-size:0;line-height:0;border-width:1px 0 0 0}.ui-menu .ui-state-focus,.ui-menu .ui-state-active{margin:-1px}.ui-menu-icons{position:relative}.ui-menu-icons .ui-menu-item-wrapper{padding-left:2em}.ui-menu .ui-icon{position:absolute;top:0;bottom:0;left:.2em;margin:auto 0}.ui-menu .ui-menu-icon{left:auto;right:0} -------------------------------------------------------------------------------- /doc/core-apidocs/script-dir/jquery-ui.structure.min.css: -------------------------------------------------------------------------------- 1 | /*! jQuery UI - v1.12.1 - 2018-12-06 2 | * http://jqueryui.com 3 | * Copyright jQuery Foundation and other contributors; Licensed MIT */ 4 | 5 | .ui-helper-hidden{display:none}.ui-helper-hidden-accessible{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.ui-helper-reset{margin:0;padding:0;border:0;outline:0;line-height:1.3;text-decoration:none;font-size:100%;list-style:none}.ui-helper-clearfix:before,.ui-helper-clearfix:after{content:"";display:table;border-collapse:collapse}.ui-helper-clearfix:after{clear:both}.ui-helper-zfix{width:100%;height:100%;top:0;left:0;position:absolute;opacity:0;filter:Alpha(Opacity=0)}.ui-front{z-index:100}.ui-state-disabled{cursor:default!important;pointer-events:none}.ui-icon{display:inline-block;vertical-align:middle;margin-top:-.25em;position:relative;text-indent:-99999px;overflow:hidden;background-repeat:no-repeat}.ui-widget-icon-block{left:50%;margin-left:-8px;display:block}.ui-widget-overlay{position:fixed;top:0;left:0;width:100%;height:100%}.ui-autocomplete{position:absolute;top:0;left:0;cursor:default}.ui-menu{list-style:none;padding:0;margin:0;display:block;outline:0}.ui-menu .ui-menu{position:absolute}.ui-menu .ui-menu-item{margin:0;cursor:pointer;list-style-image:url("data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7")}.ui-menu .ui-menu-item-wrapper{position:relative;padding:3px 1em 3px .4em}.ui-menu .ui-menu-divider{margin:5px 0;height:0;font-size:0;line-height:0;border-width:1px 0 0 0}.ui-menu .ui-state-focus,.ui-menu .ui-state-active{margin:-1px}.ui-menu-icons{position:relative}.ui-menu-icons .ui-menu-item-wrapper{padding-left:2em}.ui-menu .ui-icon{position:absolute;top:0;bottom:0;left:.2em;margin:auto 0}.ui-menu .ui-menu-icon{left:auto;right:0} -------------------------------------------------------------------------------- /doc/core-apidocs/tag-search-index.js: -------------------------------------------------------------------------------- 1 | tagSearchIndex = [{"l":"Constant Field Values","h":"","u":"constant-values.html"},{"l":"Serialized Form","h":"","u":"serialized-form.html"}];updateSearchResults(); -------------------------------------------------------------------------------- /doc/geocoding-workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/geocoding-workflow.png -------------------------------------------------------------------------------- /doc/postal-concept-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/postal-concept-01.png -------------------------------------------------------------------------------- /doc/pydoc/opensextant.extractors.html: -------------------------------------------------------------------------------- 1 | 2 | Python: package opensextant.extractors 3 | 4 | 5 | 6 | 7 | 8 |
 
9 |  
opensextant.extractors
index
/Users/ubaldino/workspace/opensource/Xponents-Core/src/main/python/opensextant/extractors/__init__.py
12 |

13 |

14 | 15 | 16 | 18 | 19 | 20 |
 
17 | Package Contents
       
poli
21 |
xcoord
22 |
xtemporal
23 |
24 | -------------------------------------------------------------------------------- /doc/sdk-apidocs/doc-files/opensextant-manual-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/doc-files/opensextant-manual-logo.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/element-list: -------------------------------------------------------------------------------- 1 | org.opensextant.extraction 2 | org.opensextant.extractors.geo 3 | org.opensextant.extractors.geo.rules 4 | org.opensextant.extractors.geo.social 5 | org.opensextant.extractors.xtax 6 | org.opensextant.output 7 | org.opensextant.util 8 | org.opensextant.xlayer 9 | org.opensextant.xlayer.server 10 | org.opensextant.xlayer.server.xgeo 11 | -------------------------------------------------------------------------------- /doc/sdk-apidocs/jquery-ui.overrides.css: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 | * 5 | * This code is free software; you can redistribute it and/or modify it 6 | * under the terms of the GNU General Public License version 2 only, as 7 | * published by the Free Software Foundation. Oracle designates this 8 | * particular file as subject to the "Classpath" exception as provided 9 | * by Oracle in the LICENSE file that accompanied this code. 10 | * 11 | * This code is distributed in the hope that it will be useful, but WITHOUT 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 | * version 2 for more details (a copy is included in the LICENSE file that 15 | * accompanied this code). 16 | * 17 | * You should have received a copy of the GNU General Public License version 18 | * 2 along with this work; if not, write to the Free Software Foundation, 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 | * 21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 | * or visit www.oracle.com if you need additional information or have any 23 | * questions. 24 | */ 25 | 26 | .ui-state-active, 27 | .ui-widget-content .ui-state-active, 28 | .ui-widget-header .ui-state-active, 29 | a.ui-button:active, 30 | .ui-button:active, 31 | .ui-button.ui-state-active:hover { 32 | /* Overrides the color of selection used in jQuery UI */ 33 | background: #F8981D; 34 | border: 1px solid #F8981D; 35 | } 36 | -------------------------------------------------------------------------------- /doc/sdk-apidocs/jquery/images/ui-bg_glass_55_fbf9ee_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/jquery/images/ui-bg_glass_55_fbf9ee_1x400.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/jquery/images/ui-bg_glass_65_dadada_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/jquery/images/ui-bg_glass_65_dadada_1x400.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/jquery/images/ui-bg_glass_75_dadada_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/jquery/images/ui-bg_glass_75_dadada_1x400.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/jquery/images/ui-bg_glass_75_e6e6e6_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/jquery/images/ui-bg_glass_75_e6e6e6_1x400.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/jquery/images/ui-bg_glass_95_fef1ec_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/jquery/images/ui-bg_glass_95_fef1ec_1x400.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/jquery/images/ui-bg_highlight-soft_75_cccccc_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/jquery/images/ui-bg_highlight-soft_75_cccccc_1x100.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/jquery/images/ui-icons_222222_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/jquery/images/ui-icons_222222_256x240.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/jquery/images/ui-icons_2e83ff_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/jquery/images/ui-icons_2e83ff_256x240.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/jquery/images/ui-icons_454545_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/jquery/images/ui-icons_454545_256x240.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/jquery/images/ui-icons_888888_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/jquery/images/ui-icons_888888_256x240.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/jquery/images/ui-icons_cd0a0a_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/jquery/images/ui-icons_cd0a0a_256x240.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/jquery/jquery-ui.structure.min.css: -------------------------------------------------------------------------------- 1 | /*! jQuery UI - v1.12.1 - 2018-12-06 2 | * http://jqueryui.com 3 | * Copyright jQuery Foundation and other contributors; Licensed MIT */ 4 | 5 | .ui-helper-hidden{display:none}.ui-helper-hidden-accessible{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.ui-helper-reset{margin:0;padding:0;border:0;outline:0;line-height:1.3;text-decoration:none;font-size:100%;list-style:none}.ui-helper-clearfix:before,.ui-helper-clearfix:after{content:"";display:table;border-collapse:collapse}.ui-helper-clearfix:after{clear:both}.ui-helper-zfix{width:100%;height:100%;top:0;left:0;position:absolute;opacity:0;filter:Alpha(Opacity=0)}.ui-front{z-index:100}.ui-state-disabled{cursor:default!important;pointer-events:none}.ui-icon{display:inline-block;vertical-align:middle;margin-top:-.25em;position:relative;text-indent:-99999px;overflow:hidden;background-repeat:no-repeat}.ui-widget-icon-block{left:50%;margin-left:-8px;display:block}.ui-widget-overlay{position:fixed;top:0;left:0;width:100%;height:100%}.ui-autocomplete{position:absolute;top:0;left:0;cursor:default}.ui-menu{list-style:none;padding:0;margin:0;display:block;outline:0}.ui-menu .ui-menu{position:absolute}.ui-menu .ui-menu-item{margin:0;cursor:pointer;list-style-image:url("data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7")}.ui-menu .ui-menu-item-wrapper{position:relative;padding:3px 1em 3px .4em}.ui-menu .ui-menu-divider{margin:5px 0;height:0;font-size:0;line-height:0;border-width:1px 0 0 0}.ui-menu .ui-state-focus,.ui-menu .ui-state-active{margin:-1px}.ui-menu-icons{position:relative}.ui-menu-icons .ui-menu-item-wrapper{padding-left:2em}.ui-menu .ui-icon{position:absolute;top:0;bottom:0;left:.2em;margin:auto 0}.ui-menu .ui-menu-icon{left:auto;right:0} -------------------------------------------------------------------------------- /doc/sdk-apidocs/legal/ASSEMBLY_EXCEPTION: -------------------------------------------------------------------------------- 1 | 2 | OPENJDK ASSEMBLY EXCEPTION 3 | 4 | The OpenJDK source code made available by Oracle America, Inc. (Oracle) at 5 | openjdk.java.net ("OpenJDK Code") is distributed under the terms of the GNU 6 | General Public License version 2 7 | only ("GPL2"), with the following clarification and special exception. 8 | 9 | Linking this OpenJDK Code statically or dynamically with other code 10 | is making a combined work based on this library. Thus, the terms 11 | and conditions of GPL2 cover the whole combination. 12 | 13 | As a special exception, Oracle gives you permission to link this 14 | OpenJDK Code with certain code licensed by Oracle as indicated at 15 | http://openjdk.java.net/legal/exception-modules-2007-05-08.html 16 | ("Designated Exception Modules") to produce an executable, 17 | regardless of the license terms of the Designated Exception Modules, 18 | and to copy and distribute the resulting executable under GPL2, 19 | provided that the Designated Exception Modules continue to be 20 | governed by the licenses under which they were offered by Oracle. 21 | 22 | As such, it allows licensees and sublicensees of Oracle's GPL2 OpenJDK Code 23 | to build an executable that includes those portions of necessary code that 24 | Oracle could not provide under GPL2 (or that Oracle has provided under GPL2 25 | with the Classpath exception). If you modify or add to the OpenJDK code, 26 | that new GPL2 code may still be combined with Designated Exception Modules 27 | if the new code is made subject to this exception by its copyright holder. 28 | -------------------------------------------------------------------------------- /doc/sdk-apidocs/legal/jqueryUI.md: -------------------------------------------------------------------------------- 1 | ## jQuery UI v1.12.1 2 | 3 | ### jQuery UI License 4 | ``` 5 | Copyright jQuery Foundation and other contributors, https://jquery.org/ 6 | 7 | This software consists of voluntary contributions made by many 8 | individuals. For exact contribution history, see the revision history 9 | available at https://github.com/jquery/jquery-ui 10 | 11 | The following license applies to all parts of this software except as 12 | documented below: 13 | 14 | ==== 15 | 16 | Permission is hereby granted, free of charge, to any person obtaining 17 | a copy of this software and associated documentation files (the 18 | "Software"), to deal in the Software without restriction, including 19 | without limitation the rights to use, copy, modify, merge, publish, 20 | distribute, sublicense, and/or sell copies of the Software, and to 21 | permit persons to whom the Software is furnished to do so, subject to 22 | the following conditions: 23 | 24 | The above copyright notice and this permission notice shall be 25 | included in all copies or substantial portions of the Software. 26 | 27 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 28 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 29 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 30 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 31 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 32 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 33 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 34 | 35 | ==== 36 | 37 | Copyright and related rights for sample code are waived via CC0. Sample 38 | code is defined as all source code contained within the demos directory. 39 | 40 | CC0: http://creativecommons.org/publicdomain/zero/1.0/ 41 | 42 | ==== 43 | 44 | All files located in the node_modules and external directories are 45 | externally maintained libraries used by this software which have their 46 | own licenses; we recommend you read them, as their terms may differ from 47 | the terms above. 48 | 49 | ``` 50 | -------------------------------------------------------------------------------- /doc/sdk-apidocs/module-search-index.js: -------------------------------------------------------------------------------- 1 | moduleSearchIndex = [];updateSearchResults(); -------------------------------------------------------------------------------- /doc/sdk-apidocs/org/opensextant/extraction/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | org.opensextant.extraction (Xponents Extraction Toolkit) 8 | 9 | 10 | 11 | 12 | 13 |

org.opensextant.extraction 14 |

15 |
16 |

Classes

17 | 23 |
24 | 25 | 26 | -------------------------------------------------------------------------------- /doc/sdk-apidocs/org/opensextant/extractors/geo/social/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | org.opensextant.extractors.geo.social (Xponents Extraction Toolkit) 8 | 9 | 10 | 11 | 12 | 13 |

org.opensextant.extractors.geo.social

14 |
15 |

Classes

16 | 25 |
26 | 27 | 28 | -------------------------------------------------------------------------------- /doc/sdk-apidocs/org/opensextant/extractors/xtax/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | org.opensextant.extractors.xtax (Xponents Extraction Toolkit) 8 | 9 | 10 | 11 | 12 | 13 |

org.opensextant.extractors.xtax

14 |
15 |

Classes

16 | 21 |
22 | 23 | 24 | -------------------------------------------------------------------------------- /doc/sdk-apidocs/org/opensextant/output/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | org.opensextant.output (Xponents Extraction Toolkit) 8 | 9 | 10 | 11 | 12 | 13 |

org.opensextant.output 14 |

15 |
16 |

Classes

17 | 22 |
23 | 24 | 25 | -------------------------------------------------------------------------------- /doc/sdk-apidocs/org/opensextant/util/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | org.opensextant.util (Xponents Extraction Toolkit) 8 | 9 | 10 | 11 | 12 | 13 |

org.opensextant.util

15 |
16 |

Classes

17 | 23 |
24 | 25 | 26 | -------------------------------------------------------------------------------- /doc/sdk-apidocs/org/opensextant/xlayer/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | org.opensextant.xlayer (Xponents Extraction Toolkit) 8 | 9 | 10 | 11 | 12 | 13 |

org.opensextant.xlayer 14 |

15 |
16 |

Classes

17 | 21 |
22 | 23 | 24 | -------------------------------------------------------------------------------- /doc/sdk-apidocs/org/opensextant/xlayer/server/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | org.opensextant.xlayer.server (Xponents Extraction Toolkit) 8 | 9 | 10 | 11 | 12 | 13 |

org.opensextant.xlayer.server 14 |

15 |
16 |

Classes

17 | 25 |
26 | 27 | 28 | -------------------------------------------------------------------------------- /doc/sdk-apidocs/org/opensextant/xlayer/server/xgeo/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | org.opensextant.xlayer.server.xgeo (Xponents Extraction Toolkit) 8 | 9 | 10 | 11 | 12 | 13 |

org.opensextant.xlayer.server.xgeo 14 |

15 |
16 |

Classes

17 | 25 |
26 | 27 | 28 | -------------------------------------------------------------------------------- /doc/sdk-apidocs/overview-summary.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Xponents Extraction Toolkit 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 17 | 18 | 19 |
20 | 23 |

index.html

24 |
25 | 26 | 27 | -------------------------------------------------------------------------------- /doc/sdk-apidocs/package-list: -------------------------------------------------------------------------------- 1 | org.opensextant.extraction 2 | org.opensextant.extractors.geo 3 | org.opensextant.extractors.geo.rules 4 | org.opensextant.extractors.geo.social 5 | org.opensextant.extractors.xtax 6 | org.opensextant.output 7 | org.opensextant.util 8 | org.opensextant.xlayer 9 | org.opensextant.xlayer.server 10 | org.opensextant.xlayer.server.xgeo 11 | -------------------------------------------------------------------------------- /doc/sdk-apidocs/package-search-index.js: -------------------------------------------------------------------------------- 1 | packageSearchIndex = [{"l":"All Packages","u":"allpackages-index.html"},{"l":"org.opensextant.extraction"},{"l":"org.opensextant.extractors.geo"},{"l":"org.opensextant.extractors.geo.rules"},{"l":"org.opensextant.extractors.geo.social"},{"l":"org.opensextant.extractors.xtax"},{"l":"org.opensextant.output"},{"l":"org.opensextant.util"},{"l":"org.opensextant.xlayer"},{"l":"org.opensextant.xlayer.server"},{"l":"org.opensextant.xlayer.server.xgeo"}];updateSearchResults(); -------------------------------------------------------------------------------- /doc/sdk-apidocs/resources/glass.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/resources/glass.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/resources/x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/resources/x.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/script-dir/images/ui-bg_glass_55_fbf9ee_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/script-dir/images/ui-bg_glass_55_fbf9ee_1x400.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/script-dir/images/ui-bg_glass_65_dadada_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/script-dir/images/ui-bg_glass_65_dadada_1x400.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/script-dir/images/ui-bg_glass_75_dadada_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/script-dir/images/ui-bg_glass_75_dadada_1x400.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/script-dir/images/ui-bg_glass_75_e6e6e6_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/script-dir/images/ui-bg_glass_75_e6e6e6_1x400.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/script-dir/images/ui-bg_glass_95_fef1ec_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/script-dir/images/ui-bg_glass_95_fef1ec_1x400.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/script-dir/images/ui-bg_highlight-soft_75_cccccc_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/script-dir/images/ui-bg_highlight-soft_75_cccccc_1x100.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/script-dir/images/ui-icons_222222_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/script-dir/images/ui-icons_222222_256x240.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/script-dir/images/ui-icons_2e83ff_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/script-dir/images/ui-icons_2e83ff_256x240.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/script-dir/images/ui-icons_454545_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/script-dir/images/ui-icons_454545_256x240.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/script-dir/images/ui-icons_888888_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/script-dir/images/ui-icons_888888_256x240.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/script-dir/images/ui-icons_cd0a0a_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/sdk-apidocs/script-dir/images/ui-icons_cd0a0a_256x240.png -------------------------------------------------------------------------------- /doc/sdk-apidocs/script-dir/jquery-ui.min.css: -------------------------------------------------------------------------------- 1 | /*! jQuery UI - v1.13.1 - 2022-05-12 2 | * http://jqueryui.com 3 | * Includes: core.css, autocomplete.css, menu.css 4 | * Copyright jQuery Foundation and other contributors; Licensed MIT */ 5 | 6 | .ui-helper-hidden{display:none}.ui-helper-hidden-accessible{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.ui-helper-reset{margin:0;padding:0;border:0;outline:0;line-height:1.3;text-decoration:none;font-size:100%;list-style:none}.ui-helper-clearfix:before,.ui-helper-clearfix:after{content:"";display:table;border-collapse:collapse}.ui-helper-clearfix:after{clear:both}.ui-helper-zfix{width:100%;height:100%;top:0;left:0;position:absolute;opacity:0;-ms-filter:"alpha(opacity=0)"}.ui-front{z-index:100}.ui-state-disabled{cursor:default!important;pointer-events:none}.ui-icon{display:inline-block;vertical-align:middle;margin-top:-.25em;position:relative;text-indent:-99999px;overflow:hidden;background-repeat:no-repeat}.ui-widget-icon-block{left:50%;margin-left:-8px;display:block}.ui-widget-overlay{position:fixed;top:0;left:0;width:100%;height:100%}.ui-autocomplete{position:absolute;top:0;left:0;cursor:default}.ui-menu{list-style:none;padding:0;margin:0;display:block;outline:0}.ui-menu .ui-menu{position:absolute}.ui-menu .ui-menu-item{margin:0;cursor:pointer;list-style-image:url("data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7")}.ui-menu .ui-menu-item-wrapper{position:relative;padding:3px 1em 3px .4em}.ui-menu .ui-menu-divider{margin:5px 0;height:0;font-size:0;line-height:0;border-width:1px 0 0 0}.ui-menu .ui-state-focus,.ui-menu .ui-state-active{margin:-1px}.ui-menu-icons{position:relative}.ui-menu-icons .ui-menu-item-wrapper{padding-left:2em}.ui-menu .ui-icon{position:absolute;top:0;bottom:0;left:.2em;margin:auto 0}.ui-menu .ui-menu-icon{left:auto;right:0} -------------------------------------------------------------------------------- /doc/sdk-apidocs/script-dir/jquery-ui.structure.min.css: -------------------------------------------------------------------------------- 1 | /*! jQuery UI - v1.12.1 - 2018-12-06 2 | * http://jqueryui.com 3 | * Copyright jQuery Foundation and other contributors; Licensed MIT */ 4 | 5 | .ui-helper-hidden{display:none}.ui-helper-hidden-accessible{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.ui-helper-reset{margin:0;padding:0;border:0;outline:0;line-height:1.3;text-decoration:none;font-size:100%;list-style:none}.ui-helper-clearfix:before,.ui-helper-clearfix:after{content:"";display:table;border-collapse:collapse}.ui-helper-clearfix:after{clear:both}.ui-helper-zfix{width:100%;height:100%;top:0;left:0;position:absolute;opacity:0;filter:Alpha(Opacity=0)}.ui-front{z-index:100}.ui-state-disabled{cursor:default!important;pointer-events:none}.ui-icon{display:inline-block;vertical-align:middle;margin-top:-.25em;position:relative;text-indent:-99999px;overflow:hidden;background-repeat:no-repeat}.ui-widget-icon-block{left:50%;margin-left:-8px;display:block}.ui-widget-overlay{position:fixed;top:0;left:0;width:100%;height:100%}.ui-autocomplete{position:absolute;top:0;left:0;cursor:default}.ui-menu{list-style:none;padding:0;margin:0;display:block;outline:0}.ui-menu .ui-menu{position:absolute}.ui-menu .ui-menu-item{margin:0;cursor:pointer;list-style-image:url("data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7")}.ui-menu .ui-menu-item-wrapper{position:relative;padding:3px 1em 3px .4em}.ui-menu .ui-menu-divider{margin:5px 0;height:0;font-size:0;line-height:0;border-width:1px 0 0 0}.ui-menu .ui-state-focus,.ui-menu .ui-state-active{margin:-1px}.ui-menu-icons{position:relative}.ui-menu-icons .ui-menu-item-wrapper{padding-left:2em}.ui-menu .ui-icon{position:absolute;top:0;bottom:0;left:.2em;margin:auto 0}.ui-menu .ui-menu-icon{left:auto;right:0} -------------------------------------------------------------------------------- /doc/sdk-apidocs/tag-search-index.js: -------------------------------------------------------------------------------- 1 | tagSearchIndex = [{"l":"Constant Field Values","h":"","u":"constant-values.html"},{"l":"Serialized Form","h":"","u":"serialized-form.html"}];updateSearchResults(); -------------------------------------------------------------------------------- /doc/xlayer-xgeo-server-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/doc/xlayer-xgeo-server-example.png -------------------------------------------------------------------------------- /etc/langdetect-profiles-v3.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/etc/langdetect-profiles-v3.zip -------------------------------------------------------------------------------- /etc/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | %-5level %logger{36} - %msg%n 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /etc/tika-config.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /script/.gitignore: -------------------------------------------------------------------------------- 1 | runtime.cfg 2 | -------------------------------------------------------------------------------- /script/dist-docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Building Docker images - Regular and Offline (Maven+Xponents)" 4 | 5 | VERSION="3.5" 6 | script=`dirname $0;` 7 | basedir=`cd -P $script/..; echo $PWD` 8 | 9 | REL=$basedir/../dist/Xponents-$VERSION 10 | if [ ! -d $REL ] ; then 11 | echo "Distribution does not exist: $REL" 12 | echo "First build per BUILD.md" 13 | echo "Then run ./script/dist.sh " 14 | exit 1 15 | fi 16 | 17 | echo " Xponents Docker " 18 | echo "==============================================" 19 | 20 | 21 | echo "Version Number of Image" 22 | read IMG_VERSION 23 | 24 | cp $basedir/Examples/Docker/dockerignore $REL/.dockerignore 25 | cd $REL && docker build --tag opensextant:xponents-$IMG_VERSION . 26 | 27 | -------------------------------------------------------------------------------- /script/tag-docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | IMG_VERSION=$1 4 | IMG_TARGET=3.5 5 | REPO=mubaldino 6 | 7 | shift 8 | CMD=$1 9 | 10 | if [ -n "$IMG_VERSION" ] ; then 11 | docker tag opensextant:xponents-$IMG_VERSION $REPO/opensextant:xponents-$IMG_VERSION 12 | docker tag opensextant:xponents-$IMG_VERSION $REPO/opensextant:xponents-$IMG_TARGET 13 | 14 | docker tag opensextant:xponents-offline-$IMG_VERSION $REPO/opensextant:xponents-offline-$IMG_VERSION 15 | docker tag opensextant:xponents-offline-$IMG_VERSION $REPO/opensextant:xponents-offline-$IMG_TARGET 16 | # Final. 17 | docker tag $REPO/opensextant:xponents-$IMG_TARGET $REPO/opensextant:latest 18 | fi 19 | 20 | if [ "$CMD" = "push" ]; then 21 | docker push $REPO/opensextant:xponents-$IMG_VERSION 22 | docker push $REPO/opensextant:xponents-offline-$IMG_VERSION 23 | 24 | docker push $REPO/opensextant:xponents-$IMG_TARGET 25 | docker push $REPO/opensextant:xponents-offline-$IMG_TARGET 26 | docker push $REPO/opensextant:latest 27 | fi 28 | -------------------------------------------------------------------------------- /script/tester.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | script=`dirname $0;` 4 | basedir=`cd -P $script/..; echo $PWD` 5 | 6 | #TODO: Test as ant class? 7 | 8 | CLASS=$1 9 | shift 10 | 11 | CLASSPATH="$basedir/etc:$basedir/target/*:$basedir/lib/*" 12 | XPONENTS_SOLR=./solr 13 | 14 | java -Dopensextant.solr=$XPONENTS_SOLR/solr7 -Xmx3g -Xms3g \ 15 | -XX:+UseParallelGC \ 16 | -Dlogback.configurationFile=$basedir/etc/logback.xml \ 17 | -classpath "$CLASSPATH" $CLASS $* 18 | -------------------------------------------------------------------------------- /script/xlayer-docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | script=`dirname $0;` 4 | basedir=`cd -P $script/..; echo $PWD` 5 | 6 | XLAYER_PORT=$1 7 | 8 | cd $basedir 9 | CLASSPATH="$basedir/etc:$basedir/lib/*" 10 | XPONENTS_SOLR=${XPONENTS_SOLR:-$basedir/xponents-solr} 11 | 12 | java -Dopensextant.solr=$XPONENTS_SOLR/solr7 -Xmx${JAVA_XMX} -Xms${JAVA_XMS} \ 13 | -XX:+UseParallelGC -server \ 14 | -Dlogback.configurationFile=$basedir/etc/logback.xml \ 15 | -classpath "$CLASSPATH" org.opensextant.xlayer.server.xgeo.XlayerServer $XLAYER_PORT 16 | 17 | 18 | -------------------------------------------------------------------------------- /script/xlayer-server.bat: -------------------------------------------------------------------------------- 1 | set LANG=en_US 2 | 3 | echo "Usage -- run as .\script\xlayer.bat" 4 | echo JAVA_HOME = %JAVA_HOME% 5 | @echo off 6 | 7 | REM Find current path to install 8 | set scriptdir=%~dp0 9 | set scriptdir=%scriptdir:~0,-1% 10 | set basedir=%scriptdir%\.. 11 | set logconf=%scriptdir:\=/% 12 | set XPONENTS_SOLR=%basedir%\xponents-solr 13 | 14 | set COMMAND=%1 15 | set XLAYER_PORT=%2 16 | 17 | REM Default argument here is a port number. that is it. 18 | REM Note -- on windows if you log out, this process will die. 19 | REM You are responsible for making a resident windows service out of it, if you like 20 | REM Alternatively, we could deploy as a Tomcat or other webapp 21 | 22 | if "%COMMAND%" == "start" ( 23 | echo "START Xponents REST" 24 | java -Dopensextant.solr="%XPONENTS_SOLR%\solr7" -Xmx4g -Xms4g ^ 25 | -XX:+UseParallelGC -server ^ 26 | -Dlogback.configurationFile="%basedir%\etc\logback.xml" ^ 27 | -classpath "%basedir%\etc;%basedir%\lib\*" ^ 28 | org.opensextant.xlayer.server.xgeo.XlayerServer %XLAYER_PORT% 29 | pause 30 | ) 31 | 32 | if "%COMMAND%" == "stop" ( 33 | echo "STOP Xponents REST" 34 | echo "Launching a browser and running this... " 35 | start "" "http://localhost:%XLAYER_PORT%/xlayer/rest/control/stop" 36 | ) 37 | 38 | 39 | -------------------------------------------------------------------------------- /script/xlayer-server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | script=`dirname $0;` 4 | basedir=`cd -P $script/..; echo $PWD` 5 | 6 | CMD=$1 7 | XLAYER_PORT=$2 8 | 9 | case $CMD in 10 | 11 | 'start') 12 | echo JAVA_HOME = $JAVA_HOME 13 | echo $* 14 | cd $basedir 15 | logfile=$basedir/log/xlayer-stderr.log 16 | CLASSPATH="$basedir/etc:$basedir/lib/*" 17 | XPONENTS_SOLR=${XPONENTS_SOLR:-$basedir/xponents-solr} 18 | 19 | nohup java -Dopensextant.solr=$XPONENTS_SOLR/solr7 -Xmx4g -Xms4g \ 20 | -XX:+UseParallelGC -server \ 21 | -Dlogback.configurationFile=$basedir/etc/logback.xml \ 22 | -classpath "$CLASSPATH" org.opensextant.xlayer.server.xgeo.XlayerServer $XLAYER_PORT >$logfile 2>&1 & 23 | ;; 24 | 25 | 26 | 'stop') 27 | RESTAPI=http://localhost:$XLAYER_PORT/xlayer/rest/control/stop 28 | # Using curl, POST a JSON object to the service. 29 | curl "$RESTAPI" 30 | ;; 31 | 32 | esac 33 | -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | msg(){ 4 | echo 5 | echo $1 6 | echo "=========================" 7 | } 8 | 9 | 10 | msg "../Xponents-Core checkout and build is required to get started" 11 | 12 | unset PYTHONPATH 13 | msg "Install Python resources" 14 | 15 | PYLIB=`ls ../dist/xponents-core-3.*/python/opensextant-1.5.*.tar.gz` 16 | 17 | if [ ! -e $PYLIB ]; then 18 | msg Locate $PYLIB first please 19 | exit 20 | fi 21 | 22 | 23 | # Install built lib with dependencies to ./python. First install here are 24 | # libraries used by Solr/ETL scripting: 25 | pip3 install -U --target ./piplib lxml bs4 arrow requests pyshp pycountry $PYLIB 26 | 27 | msg "Assemble basic JAR resources" 28 | . ./dev.env 29 | 30 | pushd ./solr 31 | python3 ./script/assemble_person_filter.py 32 | ant gaz-meta 33 | popd 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /solr/.gitignore: -------------------------------------------------------------------------------- 1 | /tmp/ 2 | tmp 3 | /build.properties 4 | log 5 | /solr4/gazetteer/data 6 | /solr4/taxcat/data 7 | /solr4/lib 8 | /solr6/gazetteer/data 9 | /solr6/taxcat/data 10 | /solr6/lib 11 | /solr7/gazetteer/data 12 | /solr7/taxcat/data 13 | /solr7/lib 14 | /test 15 | jetty9 16 | webapps 17 | lib/ext 18 | generated-*json 19 | build 20 | download 21 | /etc/gazetteer/filters/carrot2* 22 | managed-schema 23 | /etc/gazetteer/additions/generated* 24 | /etc/gazetteer/filters/generated* 25 | /etc/gazetteer/filters/person-name-filter.txt 26 | /etc/gazetteer/filters/census 27 | /etc/taxcat/data/ 28 | /solr6-dist/ 29 | /solr7-dist/ 30 | /etc/gazetteer/ne_10m_admin_1_states_provinces* 31 | -------------------------------------------------------------------------------- /solr/VERSION: -------------------------------------------------------------------------------- 1 | name : Xponents Solr Index 2 | version : 3.5 3 | date : 20220324 4 | date-str: Tue Mar 24 21:18:14 EDT 2022 5 | release-tag: 2022Q2 6 | method: SQLite Master 7 | -------------------------------------------------------------------------------- /solr/build-1-get-sources.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | force=0 3 | 4 | echo "Where files EXIST -- please remove manually to refresh." 5 | 6 | echo 7 | echo 8 | echo "Download NGA Geonames - World" 9 | echo "===============================" 10 | FILE=Whole_World.7z 11 | TEST=tmp/$FILE 12 | if [ ! -f "$TEST" ]; then 13 | curl -k https://geonames.nga.mil/geonames/GNSData/fc_files/$FILE -o $TEST 14 | else 15 | echo "EXISTS $TEST" 16 | fi 17 | 18 | TARGET=./tmp/Whole_World.txt 19 | if [ -f "$TEST" -a ! -e $TARGET ]; then 20 | echo "Unpacking $TEST" 21 | # On Mac: 7zz; On Ubuntu: 7za 22 | UNZIP=7zz 23 | rm $TARGET 24 | $UNZIP e -so $TEST | grep -v "The geographic names in this database" > $TARGET 25 | echo "NGA Geonames file is at $TARGET" 26 | else 27 | echo "EXISTS $TARGET ?" 28 | fi 29 | 30 | echo 31 | echo 32 | echo "Download USGS National File" 33 | echo "===============================" 34 | USGS_PRODDATE=20210825 35 | FILE=NationalFile.zip 36 | TEST=tmp/$FILE 37 | if [ ! -f "$TEST" ]; then 38 | curl -k "https://geonames.usgs.gov/docs/stategaz/$FILE" -o $TEST 39 | else 40 | echo "EXISTS $TEST" 41 | fi 42 | 43 | TARGET=./tmp/NationalFile.txt 44 | if [ -f "$TEST" -a ! -e $TARGET ]; then 45 | echo "Unpacking $TEST" 46 | UNZIP=unzip 47 | $UNZIP -d ./tmp $TEST 48 | mv ./tmp/NationalFile_${USGS_PRODDATE}* $TARGET 49 | 50 | echo "USGS file is at $TARGET" 51 | else 52 | echo "EXISTS $TARGET ?" 53 | fi 54 | 55 | 56 | echo 57 | echo 58 | echo "===============================" 59 | echo "Download HumData Exchange - PAK" 60 | echo "===============================" 61 | FILE=pak_adm_wfp_20220909_shp.zip 62 | TEST=tmp/$FILE 63 | 64 | if [ ! -f "$TEST" ]; then 65 | echo "Download https://data.humdata.org/dataset/cod-ab-pak" 66 | echo " grab FILE $FILE" 67 | echo " copy to ./tmp/ here" 68 | echo "READY?" 69 | read ans 70 | if [ "$ans" = "y" -a -f "$TEST" ]; then 71 | echo "Continuing ... " 72 | unzip -d tmp/pak_adm_wfp $TEST 73 | else 74 | echo "File not found" 75 | fi 76 | fi 77 | 78 | if [ -f $TEST ]; then 79 | unzip -d ./tmp/pak_adm_wfp/ $TEST 80 | fi 81 | 82 | -------------------------------------------------------------------------------- /solr/build-1-prep-admin1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | script=`dirname $0;` 4 | basedir=`cd -P $script/..; echo $PWD` 5 | XPONENTS=$basedir 6 | export PYTHONPATH=$XPONENTS/python:$XPONENTS/piplib 7 | export PYTHONUNBUFFERED=1 8 | 9 | date 10 | 11 | # All of this is to produce a foundational mapping of ISO to FIPS administrative boundary codes. 12 | # Unfortunately we have to do this empirically so we know this mapping is consistent with our source data 13 | # AND to know if this source data has oddities. 14 | 15 | # Damn... You'd need this old copy of NGA geonames c.2021: 16 | python3 ./script/gaz_nga.py ./tmp/Countries.txt --adm1 17 | 18 | # Use the latest NGA geonames: 19 | python3 ./script/gaz_nga.py ./tmp/Whole_World.txt --adm1 20 | 21 | # Pull in the Geonames.org content 22 | python3 ./script/gaz_geonames.py ./tmp/allCountries.txt --adm1 23 | 24 | # Stitch it all together: 25 | ./script/gaz_admin_exporter.py 26 | 27 | 28 | echo "Find resulting output here:" 29 | 30 | ls -l etc/gazetteer/global_admin1_mapping.json 31 | 32 | echo "Intermediate files:" 33 | ls -l etc/gazetteer/*csv 34 | 35 | date 36 | 37 | echo "Run python tests in python/tests -- specifically adapt test_gazetteer_api.py on this result" 38 | echo "When testing fine then copy up to Core/src/main/resources/" 39 | 40 | -------------------------------------------------------------------------------- /solr/build-3-sqlite-postal.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | script=`dirname $0;` 4 | basedir=`cd -P $script/..; echo $PWD` 5 | XPONENTS=$basedir 6 | export PYTHONPATH=$XPONENTS/python:$XPONENTS/piplib 7 | export PYTHONUNBUFFERED=1 8 | 9 | do_test=0 10 | do_data=0 11 | while [ "$1" != "" ]; do 12 | case $1 in 13 | 'data') 14 | do_data=1 15 | shift 16 | ;; 17 | 18 | 'test') 19 | do_test=1 20 | shift 21 | ;; 22 | esac 23 | done 24 | 25 | if [ "$do_data" -eq 1 ]; then 26 | ant postal-sources 27 | fi 28 | 29 | if [ -d "./tmp/postal" ]; then 30 | echo "Ready to Roll" 31 | else 32 | echo "Some files may be missing - Please check ./tmp/postal and make sure required files are unpacked" 33 | ls -1 ./tmp/ 34 | exit 1 35 | fi 36 | 37 | if [ "$do_test" -eq 1 ] ; then 38 | DB=./tmp/postal_test.sqlite 39 | python3 ./script/postal.py ./tmp/postal/allCountries.txt 0 --db $DB --max 1000 40 | python3 ./script/postal.py ./tmp/postal/CA_full.txt 2000000 --db $DB --max 1000 --country CA 41 | python3 ./script/postal.py ./tmp/postal/NL_full.txt 3000000 --db $DB --max 1000 --country NL 42 | python3 ./script/postal.py ./tmp/postal/GB_full.txt 4000000 --db $DB --max 1000 --country GB 43 | python3 ./script/postal.py XX 6000000 --db $DB --max 1000 --copy-admin 44 | 45 | else 46 | # PRODUCTION 47 | DB=./tmp/postal_gazetteer.sqlite 48 | # start fresh: 49 | rm -f $DB 50 | echo POSTAL/GEONAMES `date` 51 | python3 ./script/postal.py ./tmp/postal/allCountries.txt 0 --db $DB 52 | echo POSTAL/CANADA `date` 53 | python3 ./script/postal.py ./tmp/postal/CA_full.txt 2000000 --db $DB --country CA 54 | echo POSTAL/NETHERLANDS `date` 55 | python3 ./script/postal.py ./tmp/postal/NL_full.txt 3000000 --db $DB --country NL 56 | echo POSTAL/UK `date` 57 | python3 ./script/postal.py ./tmp/postal/GB_full.txt 4000000 --db $DB --country GB 58 | echo POSTAL/ADMIN CODES `date` 59 | python3 ./script/postal.py XX 6000000 --db $DB --copy-admin --optimize 60 | 61 | fi 62 | -------------------------------------------------------------------------------- /solr/etc/gazetteer/additions/README.md: -------------------------------------------------------------------------------- 1 | This additions area allows any user to add additional 2 | place name entries to the gazetteer. The concept to start is pretty simple: 3 | 4 | 1. Use the existing geonames in the gazetteer find an entry that best lines up 5 | with the name/location variant you are adding. This works fine for adding 6 | nick names or other common names of places. 7 | 8 | 2. Insert new rows in the `adhoc-placenames.csv` 9 | 10 | 3. Requirements: 'id' field must be an integer. 11 | - It must not collide with existing gazetteer name space of 0 .. 20,000,000 12 | Possibly choose an id (based of the solr entry) add 20,000,000 to it. 13 | Then for any additional items increment that value. 14 | 15 | - name_type = A or N for abbreviation or name. 16 | - name_bias = 1 to 100, higher the value suggests it is weighted more (more likely an used name) 17 | - source = "X" for Xponents 18 | 19 | 4. Wait til you are ready to run full `build-sqlite-master.sh test` OR attempt this 20 | with the Python routine: `python3 ./script/gaz_generate_variants.py --db test.sqlite` 21 | 22 | -------------------------------------------------------------------------------- /solr/etc/gazetteer/filters/continent-filter.txt: -------------------------------------------------------------------------------- 1 | # Names of contients. They're common enough you will want to add language-specific versions of contients 2 | arctic 3 | antarctica 4 | africa 5 | asia 6 | europe 7 | oceania 8 | north america 9 | south america 10 | # Sub-contients 11 | north africa 12 | east asia 13 | se asia 14 | southeast asia 15 | southern asia 16 | eastern europe 17 | western europe 18 | northern europe 19 | southern europe 20 | eurasia 21 | mediterranean 22 | middle east 23 | mideast 24 | mid-east 25 | americas 26 | latin america 27 | central america 28 | -------------------------------------------------------------------------------- /solr/etc/gazetteer/filters/exclude-features.csv: -------------------------------------------------------------------------------- 1 | #CLASS,CODE,NAME 2 | A,PCLI,AIA 3 | A,PCLI,albany 4 | A,PCLI,Albany 5 | A,PCLI,German 6 | A,PCLI,a man 7 | A,PCLI,israeli 8 | A,PCLI,LSO 9 | A,PCLI,BRB 10 | A,PCLI,GT 11 | A,PCLI,isr 12 | A,PCLI,yemeni 13 | A,PCLI,pakistani 14 | A,PCLI,filipino 15 | A,PCLI,widen 16 | P,PPLX,iran 17 | P,PPLX,syria 18 | P,PPL,№ 2 19 | P,PPL,№ 6 20 | P,PPL,No 2 21 | P,PPL,No 6 22 | S,REST,4th 23 | -------------------------------------------------------------------------------- /solr/etc/gazetteer/filters/include-adhoc-places.txt: -------------------------------------------------------------------------------- 1 | #countries. Somehow Israel is now a common male name; had not seen this before. 2 | israel 3 | # Random find. York is a common place as well as last name. 4 | # But as a last name or first, "New York, Abc" would cause Abc to be filtered out. 5 | york 6 | england 7 | milan 8 | # Last names, that are ordinals -- do not filter out items based on last name, e.g., West Texas or "West, Texas" 9 | west 10 | # Syrian places 11 | mari 12 | # Swiss places 13 | geneva 14 | # Afghan places 15 | lashkar 16 | khan 17 | spin 18 | shah 19 | dara 20 | parvan 21 | khyber 22 | qom 23 | # US Cities 24 | savannah 25 | montgomery 26 | denver 27 | aurora 28 | cleveland 29 | columbus 30 | richmond 31 | austin 32 | durham 33 | modesto 34 | charlotte 35 | salinas 36 | alexandria 37 | carolina 38 | salvador 39 | washington 40 | warren 41 | randolph 42 | worthington 43 | kingston 44 | jackson 45 | holland 46 | santiago 47 | santos 48 | tijuana 49 | cruz 50 | houston 51 | compton 52 | hampton 53 | roma 54 | selma 55 | jefferson 56 | dallas 57 | paris 58 | granville 59 | mclean 60 | # US states 61 | virginia 62 | maryland 63 | georgia 64 | florida 65 | nevada 66 | homer 67 | orlando 68 | augusta 69 | # other countries: 70 | alberta 71 | india 72 | china 73 | america 74 | jordan 75 | lima 76 | georgia 77 | argentina 78 | trinidad 79 | chad 80 | sydney 81 | -------------------------------------------------------------------------------- /solr/etc/gazetteer/filters/non-placenames,ara.csv: -------------------------------------------------------------------------------- 1 | exclusion 2 | نم 3 | ىلإ 4 | نأ 5 | نع 6 | ام 7 | نيب 8 | اذه 9 | لبآ 10 | نود 11 | لمع 12 | امك 13 | نإ 14 | ال 15 | اهل 16 | ةرم 17 | ويام 18 | نب 19 | لامج 20 | دي 21 | دمو 22 | لاق -------------------------------------------------------------------------------- /solr/etc/gazetteer/filters/non-placenames,rus,ukr.csv: -------------------------------------------------------------------------------- 1 | exclusion,category 2 | полезная,adjective 3 | юг,directional 4 | privet,greeting 5 | priviet,greeting 6 | привет,greeting 7 | русский,language 8 | ага,noun 9 | белые,noun 10 | большой,noun 11 | вам,noun 12 | второй,noun 13 | главный,noun 14 | главный,noun 15 | движки,noun 16 | деньги,noun 17 | жабер,noun 18 | жалобы,noun 19 | защита,noun 20 | или,noun 21 | мда,noun 22 | нем,noun 23 | ниша,noun 24 | нужна,noun 25 | обзор,noun 26 | основной,noun 27 | ошибка,noun 28 | первый,noun 29 | правда,noun 30 | раздел,noun 31 | сети,noun 32 | система,noun 33 | старт,noun 34 | хорошая,noun 35 | центр,noun 36 | страна,place.general 37 | он,pronoun 38 | они,pronoun 39 | оп,pronoun 40 | але,stopword 41 | без,stopword 42 | би,stopword 43 | бо,stopword 44 | ва,stopword 45 | ви,stopword 46 | гу,stopword 47 | де,stopword 48 | до,stopword 49 | до,stopword 50 | ек,stopword 51 | за,stopword 52 | ин,stopword 53 | ит,stopword 54 | как,stopword 55 | ко,stopword 56 | мо,stopword 57 | на,stopword 58 | нан,stopword 59 | наш,stopword 60 | не,stopword 61 | но,stopword 62 | но,stopword 63 | об,stopword 64 | об,stopword 65 | од,stopword 66 | он,stopword 67 | оон,stopword 68 | от,stopword 69 | по,stopword 70 | так,stopword 71 | там,stopword 72 | те,stopword 73 | то,stopword 74 | усі,stopword 75 | эта,stopword -------------------------------------------------------------------------------- /solr/etc/gazetteer/filters/person-suffix-filter.txt: -------------------------------------------------------------------------------- 1 | phd 2 | ph.d 3 | md 4 | esq 5 | r.n 6 | rn 7 | cpa 8 | mba 9 | jr 10 | junior 11 | sr 12 | senior 13 | ii 14 | iii 15 | ret 16 | -------------------------------------------------------------------------------- /solr/etc/gazetteer/usgs2gnis-feature-map.csv: -------------------------------------------------------------------------------- 1 | USGS_FEATURE_CLASS,FEATURE_CLASS,FEATURE_CODE 2 | Airport,S,AIRP 3 | Arch,S,ARCH 4 | Area,L,AREA 5 | Arroyo,T,VAL 6 | Bar,T,BAR 7 | Basin,H,DCKB 8 | Bay,H,BAY 9 | Beach,T,BCH 10 | Bench,T,BNCH 11 | Bend,H,STMB 12 | Bridge,S,BDG 13 | Building,S,BLDG 14 | Canal,H,CNL 15 | Cape,T,CAPE 16 | Cave,S,CAVE 17 | Cemetery,S,CMTY 18 | Census,A,ADMD 19 | Channel,H,CHN 20 | Church,S,CH 21 | Civil,A,ADMD 22 | Cliff,T,CLF 23 | Crater,T,CRTR 24 | Crossing,T,FORD 25 | Dam,S,DAM 26 | Falls,H,FLLS 27 | Flat,H,FLTM 28 | Forest,V,FRST 29 | Gap,T,GAP 30 | Glacier,H,GLCR 31 | Gut,T,VAL 32 | Harbor,H,HBR 33 | Hospital,S,HSP 34 | Island,T,ISL 35 | Isthmus,T,ISTH 36 | Lake,H,LK 37 | Lava,T,LAVA 38 | Levee,T,LEV 39 | Locale,L,AREA 40 | Military,L,MILB 41 | Mine,S,MN 42 | Oilfield,L,OILF 43 | Park,L,PRK 44 | Pillar,T,PK 45 | Plain,T,PLN 46 | Populated Place,P,PPL 47 | Post Office,S,PO 48 | Range,T,MTS 49 | Rapids,H,RPDS 50 | Reserve,L,RES 51 | Reservoir,H,RSV 52 | Ridge,T,MTS 53 | School,S,SCH 54 | Sea,H,SEA 55 | Slope,T,SLP 56 | Spring,H,SPNG 57 | Stream,H,STM 58 | Summit,T,PK 59 | Swamp,H,SWMP 60 | Tower,S,TOWR 61 | Trail,R,TRL 62 | Tunnel,R,TNL 63 | Unknown,S,UNK 64 | Valley,T,VAL 65 | Well,H,WLL 66 | Woods,V,FRST 67 | 68 | -------------------------------------------------------------------------------- /solr/etc/taxcat/non-person-names.txt: -------------------------------------------------------------------------------- 1 | alpha 2 | art 3 | asia 4 | battle 5 | beach 6 | best 7 | black 8 | blue 9 | brain 10 | bravo 11 | camp 12 | carry 13 | case 14 | cash 15 | chase 16 | church 17 | day 18 | dawn 19 | del 20 | do 21 | dot 22 | echo 23 | else 24 | english 25 | era 26 | esta 27 | field 28 | france 29 | good 30 | green 31 | guy 32 | hall 33 | head 34 | hill 35 | hope 36 | house 37 | hunt 38 | justice 39 | key 40 | kit 41 | la 42 | law 43 | le 44 | les 45 | lo 46 | long 47 | love 48 | mac 49 | major 50 | man 51 | manual 52 | many 53 | max 54 | miles 55 | nam 56 | name 57 | ngo 58 | pace 59 | page 60 | pan 61 | park 62 | person 63 | price 64 | quick 65 | red 66 | royal 67 | salah 68 | see 69 | short 70 | small 71 | smart 72 | son 73 | soon 74 | spring 75 | star 76 | strong 77 | sun 78 | tottenham 79 | valencia 80 | una 81 | val 82 | van 83 | vic 84 | wan 85 | will 86 | young 87 | zona 88 | # week days 89 | sunday 90 | monday 91 | tuesday 92 | wednesday 93 | thursday 94 | friday 95 | saturday 96 | weeks 97 | # month names 98 | jan 99 | january 100 | april 101 | may 102 | june 103 | september 104 | august 105 | -------------------------------------------------------------------------------- /solr/etc/taxcat/stopwords-jrcnames.txt: -------------------------------------------------------------------------------- 1 | # random junk in JRC 2 | north 3 | south 4 | east 5 | west 6 | start 7 | end 8 | total 9 | times 10 | the sun 11 | the times 12 | news agency 13 | daily news 14 | press agency 15 | military intelligence 16 | independent 17 | international studies 18 | international trade 19 | will meet 20 | the nation 21 | facebook 22 | google 23 | internet explorer 24 | inter 25 | original post 26 | yahoo 27 | twitter 28 | youtube 29 | people 30 | they are 31 | our own 32 | just want 33 | are you 34 | all you 35 | ps 36 | pp 37 | reach 38 | armed forces 39 | canal 40 | the age 41 | read more 42 | presedential office 43 | set fire 44 | emergency 45 | nature 46 | status quo 47 | the independent 48 | gross domestic product 49 | privacy policy 50 | adobe reader 51 | guiding principles 52 | lessons learned 53 | better life 54 | secret 55 | top secret 56 | san diegó 57 | san diego 58 | san franciscó 59 | san francisco 60 | corpus christi 61 | nuevo león 62 | nuevo léon 63 | nuevo leon 64 | san pedro 65 | umm qasr 66 | gps 67 | windows 68 | operating system 69 | -------------------------------------------------------------------------------- /solr/mysolr.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Use Installed Solr $SOLR_INSTALL 3 | SOLR_INSTALL=./solr7-dist 4 | SOLR_HOME=./solr7 5 | 6 | PORT=$2 7 | case "$1" in 8 | 9 | 'start') 10 | $SOLR_INSTALL/bin/solr start -p $PORT -s $SOLR_HOME -m 3g -q 11 | ;; 12 | 13 | 'stop') 14 | $SOLR_INSTALL/bin/solr stop -p $PORT 15 | # TODO: Deletion of locks forcibly may be an issue. 16 | # For now we prefer to not be destructive -- if locks remain there is another issue. 17 | ;; 18 | *) 19 | echo "Please just start or stop the Solr server" 20 | esac 21 | -------------------------------------------------------------------------------- /solr/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Sample properties to initialise log4j 2 | log4j.rootLogger=INFO, STDOUT 3 | 4 | # Appender config for STDOUT 5 | #log 6 | log4j.appender.STDOUT=org.apache.log4j.ConsoleAppender 7 | log4j.appender.STDOUT.Threshold=DEBUG 8 | log4j.appender.STDOUT.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.STDOUT.layout.ConversionPattern=%5p - %m%n 10 | 11 | log4j.org.apache.solr=ERROR 12 | log4j.org.apache.solr.client.solrj.impl.HttpClientUtil=ERROR 13 | log4j.org.apache.solr.client.solrj=ERROR 14 | log4j.org.apache.solr.update.processor=WARN 15 | log4j.org.opensextant.extractors.geo.GazetteerUpdateProcessorFactory=DEBUG 16 | 17 | -------------------------------------------------------------------------------- /solr/script/convert_latin1_folding.py: -------------------------------------------------------------------------------- 1 | import os 2 | from opensextant.utility import get_list 3 | 4 | uni_array = [] 5 | ascii_array = [] 6 | uni_mapping = dict() 7 | 8 | with open(os.path.join("solr7/gazetteer/conf/OpenSextant-Gazetteer-ASCIIFolding.txt"), "r", encoding="UTF-8") as fh: 9 | for line in fh: 10 | text = line.strip() 11 | if not text or text.startswith("#"): 12 | continue 13 | 14 | if "=>" not in text: 15 | continue 16 | mapping = get_list(text, delim='=>') 17 | # print(mapping[0], mapping[1]) 18 | u = mapping[0].replace('"', '') 19 | a = mapping[1].replace('"', '') 20 | uni_mapping[u] = a 21 | uni_array.append(mapping[0]) 22 | ascii_array.append(mapping[1]) 23 | 24 | # print(repr(uni_array)) 25 | # print(repr(ascii_array)) 26 | print(uni_mapping) 27 | -------------------------------------------------------------------------------- /solr/script/export_gazetteer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Operating on a Copy of master db? " 4 | read CHECK 5 | 6 | if [ $CHECK != "y" ]; then 7 | echo "Answer y to proceed... exiting." 8 | exit 9 | fi 10 | 11 | DB=$1 12 | if [ -f $DB -a -e $DB ]; then 13 | # Drop odd ball INDEXES; Delete duplicates and vacuum remaining. 14 | # Print out a count of remaining. 15 | sqlite3 $DB << EOF 16 | delete from placenames where duplicate=1; 17 | DROP index IF EXISTS so_idx ; 18 | DROP index IF EXISTS dup_idx; 19 | 20 | ALTER TABLE "placenames" drop column "name_bias"; 21 | ALTER TABLE "placenames" drop column "id_bias"; 22 | ALTER TABLE "placenames" drop column "search_only"; 23 | ALTER TABLE "placenames" drop column "duplicate"; 24 | 25 | VACUUM; 26 | 27 | ALTER TABLE "placenames" add column "duplicate" BIT DEFAULT 0; 28 | ALTER TABLE "placenames" add column "name_bias" INTEGER DEFAULT 0; 29 | ALTER TABLE "placenames" add column "id_bias" INTEGER DEFAULT 0; 30 | ALTER TABLE "placenames" add column "search_only" BIT DEFAULT 0; 31 | 32 | select count(1) from placenames; 33 | EOF 34 | 35 | else 36 | echo "Did not find sqlite file: '$DB'" 37 | fi 38 | 39 | -------------------------------------------------------------------------------- /solr/script/gaz_etl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/solr/script/gaz_etl.py -------------------------------------------------------------------------------- /solr/script/gaz_fix_country_coding.py: -------------------------------------------------------------------------------- 1 | from opensextant import load_us_provinces 2 | from opensextant.gazetteer import get_default_db, DB 3 | 4 | 5 | class USFixer: 6 | 7 | def __init__(self, dbf, debug=False): 8 | """ 9 | Convert all US FIPS codes to use ISO US Postal codes. US.25 => US.MA 10 | :param dbf: 11 | :param debug: 12 | """ 13 | self.db = DB(dbf, debug=debug) 14 | 15 | def fix(self, limit=-1): 16 | load_us_provinces() 17 | distinct = set([]) 18 | count = 0 19 | from opensextant import usstates 20 | for st in usstates: 21 | count += 1 22 | # country, adm1_curr to adm1_new 23 | us_state = usstates[st] 24 | if us_state.adm1 in distinct: 25 | continue 26 | self.db.update_admin1_code("US", us_state.adm1, us_state.adm1_postalcode) 27 | distinct.add(us_state.adm1) 28 | if 0 < limit < count: 29 | print("User limit reached") 30 | break 31 | self.db.commit() 32 | self.db.close() 33 | 34 | 35 | if __name__ == "__main__": 36 | from argparse import ArgumentParser 37 | 38 | ap = ArgumentParser() 39 | ap.add_argument("country") 40 | ap.add_argument("--db", default=get_default_db()) 41 | ap.add_argument("--max", help="maximum rows to process for testing", default=-1) 42 | ap.add_argument("--debug", action="store_true", default=False) 43 | args = ap.parse_args() 44 | 45 | if args.country == "US": 46 | USFixer(args.db, debug=args.debug).fix(limit=int(args.max)) 47 | else: 48 | print("Only country needing fixin' is US.") 49 | -------------------------------------------------------------------------------- /solr/script/gaz_popstats.py: -------------------------------------------------------------------------------- 1 | from opensextant.gazetteer import get_default_db, DB 2 | 3 | 4 | class PopStats: 5 | def __init__(self, dbf): 6 | self.db = DB(dbf) 7 | 8 | def load(self): 9 | self.db.add_population_stats() 10 | 11 | 12 | if __name__ == "__main__": 13 | from argparse import ArgumentParser 14 | 15 | ap = ArgumentParser() 16 | ap.add_argument("--db", default=get_default_db()) 17 | ap.add_argument("--debug", action="store_true", default=False) 18 | 19 | args = ap.parse_args() 20 | 21 | PopStats(args.db).load() 22 | -------------------------------------------------------------------------------- /solr/script/inspector.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "Admin Code helper" 3 | 4 | if [ ! -d "$XPONENTS/solr" ] ; then 5 | echo "Please load ../dev.env and or set the XPONENTS variable to your checkout" 6 | exit 7 | fi 8 | DB=$XPONENTS/solr/tmp/master_gazetteer.sqlite 9 | countries=`sqlite3 $DB "select distinct(cc) from placenames;"` 10 | for cc in $countries ; do 11 | echo "COUNTRY=$cc" 12 | sqlite3 $DB "select adm1,count(1) as CNT from placenames where cc='$cc' group by adm1;" 13 | done 14 | -------------------------------------------------------------------------------- /solr/script/wordstats-collector.py: -------------------------------------------------------------------------------- 1 | # 2 | # WordStats app -- ingest terms from GoogleBooks NGram data sets 3 | # v0.1: load unigram counts for terms 2-30 chars. Identify most common > 10 million occurrences that are also in Gaz 4 | # 5 | if __name__ == "__main__": 6 | from argparse import ArgumentParser 7 | from opensextant.wordstats import WordStats 8 | 9 | ap = ArgumentParser() 10 | ap.add_argument("catalog") 11 | ap.add_argument("input") 12 | ap.add_argument("--db", default="./tmp/wordstats.sqlite") 13 | 14 | args = ap.parse_args() 15 | 16 | collector = WordStats(args.db, minlen=2, maxlen=30) 17 | collector.ingest(args.input, args.catalog) 18 | -------------------------------------------------------------------------------- /solr/script/wordstats.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | WORDSTATS=./tmp/wordstats 4 | 5 | download(){ 6 | mkdir -p $WORDSTATS 7 | for ALPHA in {a..z} ; do 8 | # rm -rf $WORDSTATS/*.gz 9 | URL="http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-${ALPHA}.gz" 10 | wget $URL -P $WORDSTATS/ 11 | done 12 | } 13 | 14 | assemble(){ 15 | # Catalog is GA, GB, etc. "G" for google. 16 | for ALPHA in {a..z} ; do 17 | gzfile=($WORDSTATS/*-${ALPHA}.gz) 18 | python3 ./script/wordstats-collector.py G${ALPHA} $gzfile --db ./tmp/wordstats.sqlite 19 | done 20 | } 21 | 22 | while [ -n "$1" ]; do 23 | case $1 in 24 | "download") 25 | download 26 | shift 27 | ;; 28 | "assemble") 29 | assemble 30 | shift 31 | ;; 32 | *) 33 | echo "wordstats.sh [download] [assemble]" 34 | break; 35 | ;; 36 | esac 37 | done -------------------------------------------------------------------------------- /solr/solr7/gazetteer/conf/lang/contractions_ca.txt: -------------------------------------------------------------------------------- 1 | # Set of Catalan contractions for ElisionFilter 2 | # TODO: load this as a resource from the analyzer and sync it in build.xml 3 | d 4 | l 5 | m 6 | n 7 | s 8 | t 9 | -------------------------------------------------------------------------------- /solr/solr7/gazetteer/conf/lang/contractions_fr.txt: -------------------------------------------------------------------------------- 1 | # Set of French contractions for ElisionFilter 2 | # TODO: load this as a resource from the analyzer and sync it in build.xml 3 | l 4 | m 5 | t 6 | qu 7 | n 8 | s 9 | j 10 | d 11 | c 12 | jusqu 13 | quoiqu 14 | lorsqu 15 | puisqu 16 | -------------------------------------------------------------------------------- /solr/solr7/gazetteer/conf/lang/contractions_ga.txt: -------------------------------------------------------------------------------- 1 | # Set of Irish contractions for ElisionFilter 2 | # TODO: load this as a resource from the analyzer and sync it in build.xml 3 | d 4 | m 5 | b 6 | -------------------------------------------------------------------------------- /solr/solr7/gazetteer/conf/lang/contractions_it.txt: -------------------------------------------------------------------------------- 1 | # Set of Italian contractions for ElisionFilter 2 | # TODO: load this as a resource from the analyzer and sync it in build.xml 3 | c 4 | l 5 | all 6 | dall 7 | dell 8 | nell 9 | sull 10 | coll 11 | pell 12 | gl 13 | agl 14 | dagl 15 | degl 16 | negl 17 | sugl 18 | un 19 | m 20 | t 21 | s 22 | v 23 | d 24 | -------------------------------------------------------------------------------- /solr/solr7/gazetteer/conf/lang/hyphenations_ga.txt: -------------------------------------------------------------------------------- 1 | # Set of Irish hyphenations for StopFilter 2 | # TODO: load this as a resource from the analyzer and sync it in build.xml 3 | h 4 | n 5 | t 6 | -------------------------------------------------------------------------------- /solr/solr7/gazetteer/conf/lang/stemdict_nl.txt: -------------------------------------------------------------------------------- 1 | # Set of overrides for the dutch stemmer 2 | # TODO: load this as a resource from the analyzer and sync it in build.xml 3 | fiets fiets 4 | bromfiets bromfiets 5 | ei eier 6 | kind kinder 7 | -------------------------------------------------------------------------------- /solr/solr7/gazetteer/conf/lang/stopwords_ar.txt: -------------------------------------------------------------------------------- 1 | # This file was created by Jacques Savoy and is distributed under the BSD license. 2 | # See http://members.unine.ch/jacques.savoy/clef/index.html. 3 | # Also see http://www.opensource.org/licenses/bsd-license.html 4 | # Cleaned on October 11, 2009 (not normalized, so use before normalization) 5 | # This means that when modifying this list, you might need to add some 6 | # redundant entries, for example containing forms with both أ and ا 7 | من 8 | ومن 9 | منها 10 | منه 11 | في 12 | وفي 13 | فيها 14 | فيه 15 | و 16 | ف 17 | ثم 18 | او 19 | أو 20 | ب 21 | بها 22 | به 23 | ا 24 | أ 25 | اى 26 | اي 27 | أي 28 | أى 29 | لا 30 | ولا 31 | الا 32 | ألا 33 | إلا 34 | لكن 35 | ما 36 | وما 37 | كما 38 | فما 39 | عن 40 | مع 41 | اذا 42 | إذا 43 | ان 44 | أن 45 | إن 46 | انها 47 | أنها 48 | إنها 49 | انه 50 | أنه 51 | إنه 52 | بان 53 | بأن 54 | فان 55 | فأن 56 | وان 57 | وأن 58 | وإن 59 | التى 60 | التي 61 | الذى 62 | الذي 63 | الذين 64 | الى 65 | الي 66 | إلى 67 | إلي 68 | على 69 | عليها 70 | عليه 71 | اما 72 | أما 73 | إما 74 | ايضا 75 | أيضا 76 | كل 77 | وكل 78 | لم 79 | ولم 80 | لن 81 | ولن 82 | هى 83 | هي 84 | هو 85 | وهى 86 | وهي 87 | وهو 88 | فهى 89 | فهي 90 | فهو 91 | انت 92 | أنت 93 | لك 94 | لها 95 | له 96 | هذه 97 | هذا 98 | تلك 99 | ذلك 100 | هناك 101 | كانت 102 | كان 103 | يكون 104 | تكون 105 | وكانت 106 | وكان 107 | غير 108 | بعض 109 | قد 110 | نحو 111 | بين 112 | بينما 113 | منذ 114 | ضمن 115 | حيث 116 | الان 117 | الآن 118 | خلال 119 | بعد 120 | قبل 121 | حتى 122 | عند 123 | عندما 124 | لدى 125 | جميع 126 | -------------------------------------------------------------------------------- /solr/solr7/gazetteer/conf/lang/stopwords_cz.txt: -------------------------------------------------------------------------------- 1 | a 2 | s 3 | k 4 | o 5 | i 6 | u 7 | v 8 | z 9 | dnes 10 | cz 11 | tímto 12 | budeš 13 | budem 14 | byli 15 | jseš 16 | můj 17 | svým 18 | ta 19 | tomto 20 | tohle 21 | tuto 22 | tyto 23 | jej 24 | zda 25 | proč 26 | máte 27 | tato 28 | kam 29 | tohoto 30 | kdo 31 | kteří 32 | mi 33 | nám 34 | tom 35 | tomuto 36 | mít 37 | nic 38 | proto 39 | kterou 40 | byla 41 | toho 42 | protože 43 | asi 44 | ho 45 | naši 46 | napište 47 | re 48 | což 49 | tím 50 | takže 51 | svých 52 | její 53 | svými 54 | jste 55 | aj 56 | tu 57 | tedy 58 | teto 59 | bylo 60 | kde 61 | ke 62 | pravé 63 | ji 64 | nad 65 | nejsou 66 | či 67 | pod 68 | téma 69 | mezi 70 | přes 71 | ty 72 | pak 73 | vám 74 | ani 75 | když 76 | však 77 | neg 78 | jsem 79 | tento 80 | článku 81 | články 82 | aby 83 | jsme 84 | před 85 | pta 86 | jejich 87 | byl 88 | ještě 89 | až 90 | bez 91 | také 92 | pouze 93 | první 94 | vaše 95 | která 96 | nás 97 | nový 98 | tipy 99 | pokud 100 | může 101 | strana 102 | jeho 103 | své 104 | jiné 105 | zprávy 106 | nové 107 | není 108 | vás 109 | jen 110 | podle 111 | zde 112 | už 113 | být 114 | více 115 | bude 116 | již 117 | než 118 | který 119 | by 120 | které 121 | co 122 | nebo 123 | ten 124 | tak 125 | má 126 | při 127 | od 128 | po 129 | jsou 130 | jak 131 | další 132 | ale 133 | si 134 | se 135 | ve 136 | to 137 | jako 138 | za 139 | zpět 140 | ze 141 | do 142 | pro 143 | je 144 | na 145 | atd 146 | atp 147 | jakmile 148 | přičemž 149 | já 150 | on 151 | ona 152 | ono 153 | oni 154 | ony 155 | my 156 | vy 157 | jí 158 | ji 159 | mě 160 | mne 161 | jemu 162 | tomu 163 | těm 164 | těmu 165 | němu 166 | němuž 167 | jehož 168 | jíž 169 | jelikož 170 | jež 171 | jakož 172 | načež 173 | -------------------------------------------------------------------------------- /solr/solr7/gazetteer/conf/lang/stopwords_el.txt: -------------------------------------------------------------------------------- 1 | # Lucene Greek Stopwords list 2 | # Note: by default this file is used after GreekLowerCaseFilter, 3 | # so when modifying this file use 'σ' instead of 'ς' 4 | ο 5 | η 6 | το 7 | οι 8 | τα 9 | του 10 | τησ 11 | των 12 | τον 13 | την 14 | και 15 | κι 16 | κ 17 | ειμαι 18 | εισαι 19 | ειναι 20 | ειμαστε 21 | ειστε 22 | στο 23 | στον 24 | στη 25 | στην 26 | μα 27 | αλλα 28 | απο 29 | για 30 | προσ 31 | με 32 | σε 33 | ωσ 34 | παρα 35 | αντι 36 | κατα 37 | μετα 38 | θα 39 | να 40 | δε 41 | δεν 42 | μη 43 | μην 44 | επι 45 | ενω 46 | εαν 47 | αν 48 | τοτε 49 | που 50 | πωσ 51 | ποιοσ 52 | ποια 53 | ποιο 54 | ποιοι 55 | ποιεσ 56 | ποιων 57 | ποιουσ 58 | αυτοσ 59 | αυτη 60 | αυτο 61 | αυτοι 62 | αυτων 63 | αυτουσ 64 | αυτεσ 65 | αυτα 66 | εκεινοσ 67 | εκεινη 68 | εκεινο 69 | εκεινοι 70 | εκεινεσ 71 | εκεινα 72 | εκεινων 73 | εκεινουσ 74 | οπωσ 75 | ομωσ 76 | ισωσ 77 | οσο 78 | οτι 79 | -------------------------------------------------------------------------------- /solr/solr7/gazetteer/conf/lang/stopwords_en.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # a couple of test stopwords to test that the words are really being 17 | # configured from this file: 18 | # stopworda 19 | # stopwordb 20 | 21 | # Standard english stop words taken from Lucene's StopAnalyzer 22 | a 23 | an 24 | and 25 | are 26 | as 27 | at 28 | be 29 | but 30 | by 31 | for 32 | if 33 | in 34 | into 35 | is 36 | it 37 | no 38 | not 39 | of 40 | on 41 | or 42 | such 43 | that 44 | the 45 | their 46 | then 47 | there 48 | these 49 | they 50 | this 51 | to 52 | was 53 | will 54 | with 55 | # common verbs 56 | do 57 | does 58 | has 59 | have 60 | had 61 | 62 | -------------------------------------------------------------------------------- /solr/solr7/gazetteer/conf/lang/stopwords_eu.txt: -------------------------------------------------------------------------------- 1 | # example set of basque stopwords 2 | al 3 | anitz 4 | arabera 5 | asko 6 | baina 7 | bat 8 | batean 9 | batek 10 | bati 11 | batzuei 12 | batzuek 13 | batzuetan 14 | batzuk 15 | bera 16 | beraiek 17 | berau 18 | berauek 19 | bere 20 | berori 21 | beroriek 22 | beste 23 | bezala 24 | da 25 | dago 26 | dira 27 | ditu 28 | du 29 | dute 30 | edo 31 | egin 32 | ere 33 | eta 34 | eurak 35 | ez 36 | gainera 37 | gu 38 | gutxi 39 | guzti 40 | haiei 41 | haiek 42 | haietan 43 | hainbeste 44 | hala 45 | han 46 | handik 47 | hango 48 | hara 49 | hari 50 | hark 51 | hartan 52 | hau 53 | hauei 54 | hauek 55 | hauetan 56 | hemen 57 | hemendik 58 | hemengo 59 | hi 60 | hona 61 | honek 62 | honela 63 | honetan 64 | honi 65 | hor 66 | hori 67 | horiei 68 | horiek 69 | horietan 70 | horko 71 | horra 72 | horrek 73 | horrela 74 | horretan 75 | horri 76 | hortik 77 | hura 78 | izan 79 | ni 80 | noiz 81 | nola 82 | non 83 | nondik 84 | nongo 85 | nor 86 | nora 87 | ze 88 | zein 89 | zen 90 | zenbait 91 | zenbat 92 | zer 93 | zergatik 94 | ziren 95 | zituen 96 | zu 97 | zuek 98 | zuen 99 | zuten 100 | -------------------------------------------------------------------------------- /solr/solr7/gazetteer/conf/lang/stopwords_ga.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | ach 4 | ag 5 | agus 6 | an 7 | aon 8 | ar 9 | arna 10 | as 11 | b' 12 | ba 13 | beirt 14 | bhúr 15 | caoga 16 | ceathair 17 | ceathrar 18 | chomh 19 | chtó 20 | chuig 21 | chun 22 | cois 23 | céad 24 | cúig 25 | cúigear 26 | d' 27 | daichead 28 | dar 29 | de 30 | deich 31 | deichniúr 32 | den 33 | dhá 34 | do 35 | don 36 | dtí 37 | dá 38 | dár 39 | dó 40 | faoi 41 | faoin 42 | faoina 43 | faoinár 44 | fara 45 | fiche 46 | gach 47 | gan 48 | go 49 | gur 50 | haon 51 | hocht 52 | i 53 | iad 54 | idir 55 | in 56 | ina 57 | ins 58 | inár 59 | is 60 | le 61 | leis 62 | lena 63 | lenár 64 | m' 65 | mar 66 | mo 67 | mé 68 | na 69 | nach 70 | naoi 71 | naonúr 72 | ná 73 | ní 74 | níor 75 | nó 76 | nócha 77 | ocht 78 | ochtar 79 | os 80 | roimh 81 | sa 82 | seacht 83 | seachtar 84 | seachtó 85 | seasca 86 | seisear 87 | siad 88 | sibh 89 | sinn 90 | sna 91 | sé 92 | sí 93 | tar 94 | thar 95 | thú 96 | triúr 97 | trí 98 | trína 99 | trínár 100 | tríocha 101 | tú 102 | um 103 | ár 104 | é 105 | éis 106 | í 107 | ó 108 | ón 109 | óna 110 | ónár 111 | -------------------------------------------------------------------------------- /solr/solr7/gazetteer/conf/lang/stopwords_gl.txt: -------------------------------------------------------------------------------- 1 | # galican stopwords 2 | a 3 | aínda 4 | alí 5 | aquel 6 | aquela 7 | aquelas 8 | aqueles 9 | aquilo 10 | aquí 11 | ao 12 | aos 13 | as 14 | así 15 | á 16 | ben 17 | cando 18 | che 19 | co 20 | coa 21 | comigo 22 | con 23 | connosco 24 | contigo 25 | convosco 26 | coas 27 | cos 28 | cun 29 | cuns 30 | cunha 31 | cunhas 32 | da 33 | dalgunha 34 | dalgunhas 35 | dalgún 36 | dalgúns 37 | das 38 | de 39 | del 40 | dela 41 | delas 42 | deles 43 | desde 44 | deste 45 | do 46 | dos 47 | dun 48 | duns 49 | dunha 50 | dunhas 51 | e 52 | el 53 | ela 54 | elas 55 | eles 56 | en 57 | era 58 | eran 59 | esa 60 | esas 61 | ese 62 | eses 63 | esta 64 | estar 65 | estaba 66 | está 67 | están 68 | este 69 | estes 70 | estiven 71 | estou 72 | eu 73 | é 74 | facer 75 | foi 76 | foron 77 | fun 78 | había 79 | hai 80 | iso 81 | isto 82 | la 83 | las 84 | lle 85 | lles 86 | lo 87 | los 88 | mais 89 | me 90 | meu 91 | meus 92 | min 93 | miña 94 | miñas 95 | moi 96 | na 97 | nas 98 | neste 99 | nin 100 | no 101 | non 102 | nos 103 | nosa 104 | nosas 105 | noso 106 | nosos 107 | nós 108 | nun 109 | nunha 110 | nuns 111 | nunhas 112 | o 113 | os 114 | ou 115 | ó 116 | ós 117 | para 118 | pero 119 | pode 120 | pois 121 | pola 122 | polas 123 | polo 124 | polos 125 | por 126 | que 127 | se 128 | senón 129 | ser 130 | seu 131 | seus 132 | sexa 133 | sido 134 | sobre 135 | súa 136 | súas 137 | tamén 138 | tan 139 | te 140 | ten 141 | teñen 142 | teño 143 | ter 144 | teu 145 | teus 146 | ti 147 | tido 148 | tiña 149 | tiven 150 | túa 151 | túas 152 | un 153 | unha 154 | unhas 155 | uns 156 | vos 157 | vosa 158 | vosas 159 | voso 160 | vosos 161 | vós 162 | -------------------------------------------------------------------------------- /solr/solr7/gazetteer/conf/lang/stopwords_hy.txt: -------------------------------------------------------------------------------- 1 | # example set of Armenian stopwords. 2 | այդ 3 | այլ 4 | այն 5 | այս 6 | դու 7 | դուք 8 | եմ 9 | են 10 | ենք 11 | ես 12 | եք 13 | է 14 | էի 15 | էին 16 | էինք 17 | էիր 18 | էիք 19 | էր 20 | ըստ 21 | թ 22 | ի 23 | ին 24 | իսկ 25 | իր 26 | կամ 27 | համար 28 | հետ 29 | հետո 30 | մենք 31 | մեջ 32 | մի 33 | ն 34 | նա 35 | նաև 36 | նրա 37 | նրանք 38 | որ 39 | որը 40 | որոնք 41 | որպես 42 | ու 43 | ում 44 | պիտի 45 | վրա 46 | և 47 | -------------------------------------------------------------------------------- /solr/solr7/gazetteer/conf/lang/stopwords_ja.txt: -------------------------------------------------------------------------------- 1 | あそこ 2 | あっ 3 | あの 4 | あのかた 5 | あの人 6 | あり 7 | あります 8 | ある 9 | あれ 10 | い 11 | いう 12 | います 13 | いる 14 | う 15 | うち 16 | え 17 | お 18 | および 19 | おり 20 | おります 21 | か 22 | かつて 23 | から 24 | が 25 | き 26 | ここ 27 | こちら 28 | こと 29 | この 30 | これ 31 | これら 32 | さ 33 | さらに 34 | し 35 | しかし 36 | する 37 | ず 38 | せ 39 | せる 40 | そこ 41 | そして 42 | その 43 | その他 44 | その後 45 | それ 46 | それぞれ 47 | それで 48 | た 49 | ただし 50 | たち 51 | ため 52 | たり 53 | だ 54 | だっ 55 | だれ 56 | つ 57 | て 58 | で 59 | でき 60 | できる 61 | です 62 | では 63 | でも 64 | と 65 | という 66 | といった 67 | とき 68 | ところ 69 | として 70 | とともに 71 | とも 72 | と共に 73 | どこ 74 | どの 75 | な 76 | ない 77 | なお 78 | なかっ 79 | ながら 80 | なく 81 | なっ 82 | など 83 | なに 84 | なら 85 | なり 86 | なる 87 | なん 88 | に 89 | において 90 | における 91 | について 92 | にて 93 | によって 94 | により 95 | による 96 | に対して 97 | に対する 98 | に関する 99 | の 100 | ので 101 | のみ 102 | は 103 | ば 104 | へ 105 | ほか 106 | ほとんど 107 | ほど 108 | ます 109 | また 110 | または 111 | まで 112 | も 113 | もの 114 | ものの 115 | や 116 | よう 117 | より 118 | ら 119 | られ 120 | られる 121 | れ 122 | れる 123 | を 124 | ん 125 | 何 126 | 及び 127 | 彼 128 | 彼女 129 | 我々 130 | 特に 131 | 私 132 | 私達 133 | 貴方 134 | 貴方方 -------------------------------------------------------------------------------- /solr/solr7/gazetteer/conf/lang/stopwords_lv.txt: -------------------------------------------------------------------------------- 1 | # Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins 2 | # the original list of over 800 forms was refined: 3 | # pronouns, adverbs, interjections were removed 4 | # 5 | # prepositions 6 | aiz 7 | ap 8 | ar 9 | apakš 10 | ārpus 11 | augšpus 12 | bez 13 | caur 14 | dēļ 15 | gar 16 | iekš 17 | iz 18 | kopš 19 | labad 20 | lejpus 21 | līdz 22 | no 23 | otrpus 24 | pa 25 | par 26 | pār 27 | pēc 28 | pie 29 | pirms 30 | pret 31 | priekš 32 | starp 33 | šaipus 34 | uz 35 | viņpus 36 | virs 37 | virspus 38 | zem 39 | apakšpus 40 | # Conjunctions 41 | un 42 | bet 43 | jo 44 | ja 45 | ka 46 | lai 47 | tomēr 48 | tikko 49 | turpretī 50 | arī 51 | kaut 52 | gan 53 | tādēļ 54 | tā 55 | ne 56 | tikvien 57 | vien 58 | kā 59 | ir 60 | te 61 | vai 62 | kamēr 63 | # Particles 64 | ar 65 | diezin 66 | droši 67 | diemžēl 68 | nebūt 69 | ik 70 | it 71 | taču 72 | nu 73 | pat 74 | tiklab 75 | iekšpus 76 | nedz 77 | tik 78 | nevis 79 | turpretim 80 | jeb 81 | iekam 82 | iekām 83 | iekāms 84 | kolīdz 85 | līdzko 86 | tiklīdz 87 | jebšu 88 | tālab 89 | tāpēc 90 | nekā 91 | itin 92 | jā 93 | jau 94 | jel 95 | nē 96 | nezin 97 | tad 98 | tikai 99 | vis 100 | tak 101 | iekams 102 | vien 103 | # modal verbs 104 | būt 105 | biju 106 | biji 107 | bija 108 | bijām 109 | bijāt 110 | esmu 111 | esi 112 | esam 113 | esat 114 | būšu 115 | būsi 116 | būs 117 | būsim 118 | būsiet 119 | tikt 120 | tiku 121 | tiki 122 | tika 123 | tikām 124 | tikāt 125 | tieku 126 | tiec 127 | tiek 128 | tiekam 129 | tiekat 130 | tikšu 131 | tiks 132 | tiksim 133 | tiksiet 134 | tapt 135 | tapi 136 | tapāt 137 | topat 138 | tapšu 139 | tapsi 140 | taps 141 | tapsim 142 | tapsiet 143 | kļūt 144 | kļuvu 145 | kļuvi 146 | kļuva 147 | kļuvām 148 | kļuvāt 149 | kļūstu 150 | kļūsti 151 | kļūst 152 | kļūstam 153 | kļūstat 154 | kļūšu 155 | kļūsi 156 | kļūs 157 | kļūsim 158 | kļūsiet 159 | # verbs 160 | varēt 161 | varēju 162 | varējām 163 | varēšu 164 | varēsim 165 | var 166 | varēji 167 | varējāt 168 | varēsi 169 | varēsiet 170 | varat 171 | varēja 172 | varēs 173 | -------------------------------------------------------------------------------- /solr/solr7/gazetteer/conf/lang/stopwords_th.txt: -------------------------------------------------------------------------------- 1 | # Thai stopwords from: 2 | # "Opinion Detection in Thai Political News Columns 3 | # Based on Subjectivity Analysis" 4 | # Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak 5 | ไว้ 6 | ไม่ 7 | ไป 8 | ได้ 9 | ให้ 10 | ใน 11 | โดย 12 | แห่ง 13 | แล้ว 14 | และ 15 | แรก 16 | แบบ 17 | แต่ 18 | เอง 19 | เห็น 20 | เลย 21 | เริ่ม 22 | เรา 23 | เมื่อ 24 | เพื่อ 25 | เพราะ 26 | เป็นการ 27 | เป็น 28 | เปิดเผย 29 | เปิด 30 | เนื่องจาก 31 | เดียวกัน 32 | เดียว 33 | เช่น 34 | เฉพาะ 35 | เคย 36 | เข้า 37 | เขา 38 | อีก 39 | อาจ 40 | อะไร 41 | ออก 42 | อย่าง 43 | อยู่ 44 | อยาก 45 | หาก 46 | หลาย 47 | หลังจาก 48 | หลัง 49 | หรือ 50 | หนึ่ง 51 | ส่วน 52 | ส่ง 53 | สุด 54 | สําหรับ 55 | ว่า 56 | วัน 57 | ลง 58 | ร่วม 59 | ราย 60 | รับ 61 | ระหว่าง 62 | รวม 63 | ยัง 64 | มี 65 | มาก 66 | มา 67 | พร้อม 68 | พบ 69 | ผ่าน 70 | ผล 71 | บาง 72 | น่า 73 | นี้ 74 | นํา 75 | นั้น 76 | นัก 77 | นอกจาก 78 | ทุก 79 | ที่สุด 80 | ที่ 81 | ทําให้ 82 | ทํา 83 | ทาง 84 | ทั้งนี้ 85 | ทั้ง 86 | ถ้า 87 | ถูก 88 | ถึง 89 | ต้อง 90 | ต่างๆ 91 | ต่าง 92 | ต่อ 93 | ตาม 94 | ตั้งแต่ 95 | ตั้ง 96 | ด้าน 97 | ด้วย 98 | ดัง 99 | ซึ่ง 100 | ช่วง 101 | จึง 102 | จาก 103 | จัด 104 | จะ 105 | คือ 106 | ความ 107 | ครั้ง 108 | คง 109 | ขึ้น 110 | ของ 111 | ขอ 112 | ขณะ 113 | ก่อน 114 | ก็ 115 | การ 116 | กับ 117 | กัน 118 | กว่า 119 | กล่าว 120 | -------------------------------------------------------------------------------- /solr/solr7/gazetteer/conf/lang/userdict_ja.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This is a sample user dictionary for Kuromoji (JapaneseTokenizer) 3 | # 4 | # Add entries to this file in order to override the statistical model in terms 5 | # of segmentation, readings and part-of-speech tags. Notice that entries do 6 | # not have weights since they are always used when found. This is by-design 7 | # in order to maximize ease-of-use. 8 | # 9 | # Entries are defined using the following CSV format: 10 | # , ... , ... , 11 | # 12 | # Notice that a single half-width space separates tokens and readings, and 13 | # that the number tokens and readings must match exactly. 14 | # 15 | # Also notice that multiple entries with the same is undefined. 16 | # 17 | # Whitespace only lines are ignored. Comments are not allowed on entry lines. 18 | # 19 | 20 | # Custom segmentation for kanji compounds 21 | 日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 22 | 関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 23 | 24 | # Custom segmentation for compound katakana 25 | トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 26 | ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 27 | 28 | # Custom reading for former sumo wrestler 29 | 朝青龍,朝青龍,アサショウリュウ,カスタム人名 30 | -------------------------------------------------------------------------------- /solr/solr7/gazetteer/core.properties: -------------------------------------------------------------------------------- 1 | name=gazetteer 2 | -------------------------------------------------------------------------------- /solr/solr7/postal/core.properties: -------------------------------------------------------------------------------- 1 | name=postal 2 | -------------------------------------------------------------------------------- /solr/solr7/taxcat/conf/lang/contractions_ca.txt: -------------------------------------------------------------------------------- 1 | # Set of Catalan contractions for ElisionFilter 2 | # TODO: load this as a resource from the analyzer and sync it in build.xml 3 | d 4 | l 5 | m 6 | n 7 | s 8 | t 9 | -------------------------------------------------------------------------------- /solr/solr7/taxcat/conf/lang/contractions_fr.txt: -------------------------------------------------------------------------------- 1 | # Set of French contractions for ElisionFilter 2 | # TODO: load this as a resource from the analyzer and sync it in build.xml 3 | l 4 | m 5 | t 6 | qu 7 | n 8 | s 9 | j 10 | d 11 | c 12 | jusqu 13 | quoiqu 14 | lorsqu 15 | puisqu 16 | -------------------------------------------------------------------------------- /solr/solr7/taxcat/conf/lang/contractions_ga.txt: -------------------------------------------------------------------------------- 1 | # Set of Irish contractions for ElisionFilter 2 | # TODO: load this as a resource from the analyzer and sync it in build.xml 3 | d 4 | m 5 | b 6 | -------------------------------------------------------------------------------- /solr/solr7/taxcat/conf/lang/contractions_it.txt: -------------------------------------------------------------------------------- 1 | # Set of Italian contractions for ElisionFilter 2 | # TODO: load this as a resource from the analyzer and sync it in build.xml 3 | c 4 | l 5 | all 6 | dall 7 | dell 8 | nell 9 | sull 10 | coll 11 | pell 12 | gl 13 | agl 14 | dagl 15 | degl 16 | negl 17 | sugl 18 | un 19 | m 20 | t 21 | s 22 | v 23 | d 24 | -------------------------------------------------------------------------------- /solr/solr7/taxcat/conf/lang/hyphenations_ga.txt: -------------------------------------------------------------------------------- 1 | # Set of Irish hyphenations for StopFilter 2 | # TODO: load this as a resource from the analyzer and sync it in build.xml 3 | h 4 | n 5 | t 6 | -------------------------------------------------------------------------------- /solr/solr7/taxcat/conf/lang/stemdict_nl.txt: -------------------------------------------------------------------------------- 1 | # Set of overrides for the dutch stemmer 2 | # TODO: load this as a resource from the analyzer and sync it in build.xml 3 | fiets fiets 4 | bromfiets bromfiets 5 | ei eier 6 | kind kinder 7 | -------------------------------------------------------------------------------- /solr/solr7/taxcat/conf/lang/stopwords_ar.txt: -------------------------------------------------------------------------------- 1 | # This file was created by Jacques Savoy and is distributed under the BSD license. 2 | # See http://members.unine.ch/jacques.savoy/clef/index.html. 3 | # Also see http://www.opensource.org/licenses/bsd-license.html 4 | # Cleaned on October 11, 2009 (not normalized, so use before normalization) 5 | # This means that when modifying this list, you might need to add some 6 | # redundant entries, for example containing forms with both أ and ا 7 | من 8 | ومن 9 | منها 10 | منه 11 | في 12 | وفي 13 | فيها 14 | فيه 15 | و 16 | ف 17 | ثم 18 | او 19 | أو 20 | ب 21 | بها 22 | به 23 | ا 24 | أ 25 | اى 26 | اي 27 | أي 28 | أى 29 | لا 30 | ولا 31 | الا 32 | ألا 33 | إلا 34 | لكن 35 | ما 36 | وما 37 | كما 38 | فما 39 | عن 40 | مع 41 | اذا 42 | إذا 43 | ان 44 | أن 45 | إن 46 | انها 47 | أنها 48 | إنها 49 | انه 50 | أنه 51 | إنه 52 | بان 53 | بأن 54 | فان 55 | فأن 56 | وان 57 | وأن 58 | وإن 59 | التى 60 | التي 61 | الذى 62 | الذي 63 | الذين 64 | الى 65 | الي 66 | إلى 67 | إلي 68 | على 69 | عليها 70 | عليه 71 | اما 72 | أما 73 | إما 74 | ايضا 75 | أيضا 76 | كل 77 | وكل 78 | لم 79 | ولم 80 | لن 81 | ولن 82 | هى 83 | هي 84 | هو 85 | وهى 86 | وهي 87 | وهو 88 | فهى 89 | فهي 90 | فهو 91 | انت 92 | أنت 93 | لك 94 | لها 95 | له 96 | هذه 97 | هذا 98 | تلك 99 | ذلك 100 | هناك 101 | كانت 102 | كان 103 | يكون 104 | تكون 105 | وكانت 106 | وكان 107 | غير 108 | بعض 109 | قد 110 | نحو 111 | بين 112 | بينما 113 | منذ 114 | ضمن 115 | حيث 116 | الان 117 | الآن 118 | خلال 119 | بعد 120 | قبل 121 | حتى 122 | عند 123 | عندما 124 | لدى 125 | جميع 126 | -------------------------------------------------------------------------------- /solr/solr7/taxcat/conf/lang/stopwords_cz.txt: -------------------------------------------------------------------------------- 1 | a 2 | s 3 | k 4 | o 5 | i 6 | u 7 | v 8 | z 9 | dnes 10 | cz 11 | tímto 12 | budeš 13 | budem 14 | byli 15 | jseš 16 | můj 17 | svým 18 | ta 19 | tomto 20 | tohle 21 | tuto 22 | tyto 23 | jej 24 | zda 25 | proč 26 | máte 27 | tato 28 | kam 29 | tohoto 30 | kdo 31 | kteří 32 | mi 33 | nám 34 | tom 35 | tomuto 36 | mít 37 | nic 38 | proto 39 | kterou 40 | byla 41 | toho 42 | protože 43 | asi 44 | ho 45 | naši 46 | napište 47 | re 48 | což 49 | tím 50 | takže 51 | svých 52 | její 53 | svými 54 | jste 55 | aj 56 | tu 57 | tedy 58 | teto 59 | bylo 60 | kde 61 | ke 62 | pravé 63 | ji 64 | nad 65 | nejsou 66 | či 67 | pod 68 | téma 69 | mezi 70 | přes 71 | ty 72 | pak 73 | vám 74 | ani 75 | když 76 | však 77 | neg 78 | jsem 79 | tento 80 | článku 81 | články 82 | aby 83 | jsme 84 | před 85 | pta 86 | jejich 87 | byl 88 | ještě 89 | až 90 | bez 91 | také 92 | pouze 93 | první 94 | vaše 95 | která 96 | nás 97 | nový 98 | tipy 99 | pokud 100 | může 101 | strana 102 | jeho 103 | své 104 | jiné 105 | zprávy 106 | nové 107 | není 108 | vás 109 | jen 110 | podle 111 | zde 112 | už 113 | být 114 | více 115 | bude 116 | již 117 | než 118 | který 119 | by 120 | které 121 | co 122 | nebo 123 | ten 124 | tak 125 | má 126 | při 127 | od 128 | po 129 | jsou 130 | jak 131 | další 132 | ale 133 | si 134 | se 135 | ve 136 | to 137 | jako 138 | za 139 | zpět 140 | ze 141 | do 142 | pro 143 | je 144 | na 145 | atd 146 | atp 147 | jakmile 148 | přičemž 149 | já 150 | on 151 | ona 152 | ono 153 | oni 154 | ony 155 | my 156 | vy 157 | jí 158 | ji 159 | mě 160 | mne 161 | jemu 162 | tomu 163 | těm 164 | těmu 165 | němu 166 | němuž 167 | jehož 168 | jíž 169 | jelikož 170 | jež 171 | jakož 172 | načež 173 | -------------------------------------------------------------------------------- /solr/solr7/taxcat/conf/lang/stopwords_el.txt: -------------------------------------------------------------------------------- 1 | # Lucene Greek Stopwords list 2 | # Note: by default this file is used after GreekLowerCaseFilter, 3 | # so when modifying this file use 'σ' instead of 'ς' 4 | ο 5 | η 6 | το 7 | οι 8 | τα 9 | του 10 | τησ 11 | των 12 | τον 13 | την 14 | και 15 | κι 16 | κ 17 | ειμαι 18 | εισαι 19 | ειναι 20 | ειμαστε 21 | ειστε 22 | στο 23 | στον 24 | στη 25 | στην 26 | μα 27 | αλλα 28 | απο 29 | για 30 | προσ 31 | με 32 | σε 33 | ωσ 34 | παρα 35 | αντι 36 | κατα 37 | μετα 38 | θα 39 | να 40 | δε 41 | δεν 42 | μη 43 | μην 44 | επι 45 | ενω 46 | εαν 47 | αν 48 | τοτε 49 | που 50 | πωσ 51 | ποιοσ 52 | ποια 53 | ποιο 54 | ποιοι 55 | ποιεσ 56 | ποιων 57 | ποιουσ 58 | αυτοσ 59 | αυτη 60 | αυτο 61 | αυτοι 62 | αυτων 63 | αυτουσ 64 | αυτεσ 65 | αυτα 66 | εκεινοσ 67 | εκεινη 68 | εκεινο 69 | εκεινοι 70 | εκεινεσ 71 | εκεινα 72 | εκεινων 73 | εκεινουσ 74 | οπωσ 75 | ομωσ 76 | ισωσ 77 | οσο 78 | οτι 79 | -------------------------------------------------------------------------------- /solr/solr7/taxcat/conf/lang/stopwords_en.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # a couple of test stopwords to test that the words are really being 17 | # configured from this file: 18 | # stopworda 19 | # stopwordb 20 | 21 | # Standard english stop words taken from Lucene's StopAnalyzer 22 | a 23 | an 24 | and 25 | are 26 | as 27 | at 28 | be 29 | but 30 | by 31 | for 32 | if 33 | in 34 | into 35 | is 36 | it 37 | no 38 | not 39 | of 40 | on 41 | or 42 | such 43 | that 44 | the 45 | their 46 | then 47 | there 48 | these 49 | they 50 | this 51 | to 52 | was 53 | will 54 | with 55 | # common verbs 56 | do 57 | does 58 | has 59 | have 60 | had 61 | 62 | -------------------------------------------------------------------------------- /solr/solr7/taxcat/conf/lang/stopwords_eu.txt: -------------------------------------------------------------------------------- 1 | # example set of basque stopwords 2 | al 3 | anitz 4 | arabera 5 | asko 6 | baina 7 | bat 8 | batean 9 | batek 10 | bati 11 | batzuei 12 | batzuek 13 | batzuetan 14 | batzuk 15 | bera 16 | beraiek 17 | berau 18 | berauek 19 | bere 20 | berori 21 | beroriek 22 | beste 23 | bezala 24 | da 25 | dago 26 | dira 27 | ditu 28 | du 29 | dute 30 | edo 31 | egin 32 | ere 33 | eta 34 | eurak 35 | ez 36 | gainera 37 | gu 38 | gutxi 39 | guzti 40 | haiei 41 | haiek 42 | haietan 43 | hainbeste 44 | hala 45 | han 46 | handik 47 | hango 48 | hara 49 | hari 50 | hark 51 | hartan 52 | hau 53 | hauei 54 | hauek 55 | hauetan 56 | hemen 57 | hemendik 58 | hemengo 59 | hi 60 | hona 61 | honek 62 | honela 63 | honetan 64 | honi 65 | hor 66 | hori 67 | horiei 68 | horiek 69 | horietan 70 | horko 71 | horra 72 | horrek 73 | horrela 74 | horretan 75 | horri 76 | hortik 77 | hura 78 | izan 79 | ni 80 | noiz 81 | nola 82 | non 83 | nondik 84 | nongo 85 | nor 86 | nora 87 | ze 88 | zein 89 | zen 90 | zenbait 91 | zenbat 92 | zer 93 | zergatik 94 | ziren 95 | zituen 96 | zu 97 | zuek 98 | zuen 99 | zuten 100 | -------------------------------------------------------------------------------- /solr/solr7/taxcat/conf/lang/stopwords_ga.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | ach 4 | ag 5 | agus 6 | an 7 | aon 8 | ar 9 | arna 10 | as 11 | b' 12 | ba 13 | beirt 14 | bhúr 15 | caoga 16 | ceathair 17 | ceathrar 18 | chomh 19 | chtó 20 | chuig 21 | chun 22 | cois 23 | céad 24 | cúig 25 | cúigear 26 | d' 27 | daichead 28 | dar 29 | de 30 | deich 31 | deichniúr 32 | den 33 | dhá 34 | do 35 | don 36 | dtí 37 | dá 38 | dár 39 | dó 40 | faoi 41 | faoin 42 | faoina 43 | faoinár 44 | fara 45 | fiche 46 | gach 47 | gan 48 | go 49 | gur 50 | haon 51 | hocht 52 | i 53 | iad 54 | idir 55 | in 56 | ina 57 | ins 58 | inár 59 | is 60 | le 61 | leis 62 | lena 63 | lenár 64 | m' 65 | mar 66 | mo 67 | mé 68 | na 69 | nach 70 | naoi 71 | naonúr 72 | ná 73 | ní 74 | níor 75 | nó 76 | nócha 77 | ocht 78 | ochtar 79 | os 80 | roimh 81 | sa 82 | seacht 83 | seachtar 84 | seachtó 85 | seasca 86 | seisear 87 | siad 88 | sibh 89 | sinn 90 | sna 91 | sé 92 | sí 93 | tar 94 | thar 95 | thú 96 | triúr 97 | trí 98 | trína 99 | trínár 100 | tríocha 101 | tú 102 | um 103 | ár 104 | é 105 | éis 106 | í 107 | ó 108 | ón 109 | óna 110 | ónár 111 | -------------------------------------------------------------------------------- /solr/solr7/taxcat/conf/lang/stopwords_gl.txt: -------------------------------------------------------------------------------- 1 | # galican stopwords 2 | a 3 | aínda 4 | alí 5 | aquel 6 | aquela 7 | aquelas 8 | aqueles 9 | aquilo 10 | aquí 11 | ao 12 | aos 13 | as 14 | así 15 | á 16 | ben 17 | cando 18 | che 19 | co 20 | coa 21 | comigo 22 | con 23 | connosco 24 | contigo 25 | convosco 26 | coas 27 | cos 28 | cun 29 | cuns 30 | cunha 31 | cunhas 32 | da 33 | dalgunha 34 | dalgunhas 35 | dalgún 36 | dalgúns 37 | das 38 | de 39 | del 40 | dela 41 | delas 42 | deles 43 | desde 44 | deste 45 | do 46 | dos 47 | dun 48 | duns 49 | dunha 50 | dunhas 51 | e 52 | el 53 | ela 54 | elas 55 | eles 56 | en 57 | era 58 | eran 59 | esa 60 | esas 61 | ese 62 | eses 63 | esta 64 | estar 65 | estaba 66 | está 67 | están 68 | este 69 | estes 70 | estiven 71 | estou 72 | eu 73 | é 74 | facer 75 | foi 76 | foron 77 | fun 78 | había 79 | hai 80 | iso 81 | isto 82 | la 83 | las 84 | lle 85 | lles 86 | lo 87 | los 88 | mais 89 | me 90 | meu 91 | meus 92 | min 93 | miña 94 | miñas 95 | moi 96 | na 97 | nas 98 | neste 99 | nin 100 | no 101 | non 102 | nos 103 | nosa 104 | nosas 105 | noso 106 | nosos 107 | nós 108 | nun 109 | nunha 110 | nuns 111 | nunhas 112 | o 113 | os 114 | ou 115 | ó 116 | ós 117 | para 118 | pero 119 | pode 120 | pois 121 | pola 122 | polas 123 | polo 124 | polos 125 | por 126 | que 127 | se 128 | senón 129 | ser 130 | seu 131 | seus 132 | sexa 133 | sido 134 | sobre 135 | súa 136 | súas 137 | tamén 138 | tan 139 | te 140 | ten 141 | teñen 142 | teño 143 | ter 144 | teu 145 | teus 146 | ti 147 | tido 148 | tiña 149 | tiven 150 | túa 151 | túas 152 | un 153 | unha 154 | unhas 155 | uns 156 | vos 157 | vosa 158 | vosas 159 | voso 160 | vosos 161 | vós 162 | -------------------------------------------------------------------------------- /solr/solr7/taxcat/conf/lang/stopwords_hy.txt: -------------------------------------------------------------------------------- 1 | # example set of Armenian stopwords. 2 | այդ 3 | այլ 4 | այն 5 | այս 6 | դու 7 | դուք 8 | եմ 9 | են 10 | ենք 11 | ես 12 | եք 13 | է 14 | էի 15 | էին 16 | էինք 17 | էիր 18 | էիք 19 | էր 20 | ըստ 21 | թ 22 | ի 23 | ին 24 | իսկ 25 | իր 26 | կամ 27 | համար 28 | հետ 29 | հետո 30 | մենք 31 | մեջ 32 | մի 33 | ն 34 | նա 35 | նաև 36 | նրա 37 | նրանք 38 | որ 39 | որը 40 | որոնք 41 | որպես 42 | ու 43 | ում 44 | պիտի 45 | վրա 46 | և 47 | -------------------------------------------------------------------------------- /solr/solr7/taxcat/conf/lang/stopwords_ja.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file defines a stopword set for Japanese. 3 | # 4 | # This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. 5 | # Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 6 | # for frequency lists, etc. that can be useful for making your own set (if desired) 7 | # 8 | # Note that there is an overlap between these stopwords and the terms stopped when used 9 | # in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note 10 | # that comments are not allowed on the same line as stopwords. 11 | # 12 | # Also note that stopping is done in a case-insensitive manner. Change your StopFilter 13 | # configuration if you need case-sensitive stopping. Lastly, note that stopping is done 14 | # using the same character width as the entries in this file. Since this StopFilter is 15 | # normally done after a CJKWidthFilter in your chain, you would usually want your romaji 16 | # entries to be in half-width and your kana entries to be in full-width. 17 | # 18 | の 19 | に 20 | は 21 | を 22 | た 23 | が 24 | で 25 | て 26 | と 27 | し 28 | れ 29 | さ 30 | ある 31 | いる 32 | も 33 | する 34 | から 35 | な 36 | こと 37 | として 38 | い 39 | や 40 | れる 41 | など 42 | なっ 43 | ない 44 | この 45 | ため 46 | その 47 | あっ 48 | よう 49 | また 50 | もの 51 | という 52 | あり 53 | まで 54 | られ 55 | なる 56 | へ 57 | か 58 | だ 59 | これ 60 | によって 61 | により 62 | おり 63 | より 64 | による 65 | ず 66 | なり 67 | られる 68 | において 69 | ば 70 | なかっ 71 | なく 72 | しかし 73 | について 74 | せ 75 | だっ 76 | その後 77 | できる 78 | それ 79 | う 80 | ので 81 | なお 82 | のみ 83 | でき 84 | き 85 | つ 86 | における 87 | および 88 | いう 89 | さらに 90 | でも 91 | ら 92 | たり 93 | その他 94 | に関する 95 | たち 96 | ます 97 | ん 98 | なら 99 | に対して 100 | 特に 101 | せる 102 | 及び 103 | これら 104 | とき 105 | では 106 | にて 107 | ほか 108 | ながら 109 | うち 110 | そして 111 | とともに 112 | ただし 113 | かつて 114 | それぞれ 115 | または 116 | お 117 | ほど 118 | ものの 119 | に対する 120 | ほとんど 121 | と共に 122 | といった 123 | です 124 | とも 125 | ところ 126 | ここ 127 | ##### End of file 128 | -------------------------------------------------------------------------------- /solr/solr7/taxcat/conf/lang/stopwords_lv.txt: -------------------------------------------------------------------------------- 1 | # Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins 2 | # the original list of over 800 forms was refined: 3 | # pronouns, adverbs, interjections were removed 4 | # 5 | # prepositions 6 | aiz 7 | ap 8 | ar 9 | apakš 10 | ārpus 11 | augšpus 12 | bez 13 | caur 14 | dēļ 15 | gar 16 | iekš 17 | iz 18 | kopš 19 | labad 20 | lejpus 21 | līdz 22 | no 23 | otrpus 24 | pa 25 | par 26 | pār 27 | pēc 28 | pie 29 | pirms 30 | pret 31 | priekš 32 | starp 33 | šaipus 34 | uz 35 | viņpus 36 | virs 37 | virspus 38 | zem 39 | apakšpus 40 | # Conjunctions 41 | un 42 | bet 43 | jo 44 | ja 45 | ka 46 | lai 47 | tomēr 48 | tikko 49 | turpretī 50 | arī 51 | kaut 52 | gan 53 | tādēļ 54 | tā 55 | ne 56 | tikvien 57 | vien 58 | kā 59 | ir 60 | te 61 | vai 62 | kamēr 63 | # Particles 64 | ar 65 | diezin 66 | droši 67 | diemžēl 68 | nebūt 69 | ik 70 | it 71 | taču 72 | nu 73 | pat 74 | tiklab 75 | iekšpus 76 | nedz 77 | tik 78 | nevis 79 | turpretim 80 | jeb 81 | iekam 82 | iekām 83 | iekāms 84 | kolīdz 85 | līdzko 86 | tiklīdz 87 | jebšu 88 | tālab 89 | tāpēc 90 | nekā 91 | itin 92 | jā 93 | jau 94 | jel 95 | nē 96 | nezin 97 | tad 98 | tikai 99 | vis 100 | tak 101 | iekams 102 | vien 103 | # modal verbs 104 | būt 105 | biju 106 | biji 107 | bija 108 | bijām 109 | bijāt 110 | esmu 111 | esi 112 | esam 113 | esat 114 | būšu 115 | būsi 116 | būs 117 | būsim 118 | būsiet 119 | tikt 120 | tiku 121 | tiki 122 | tika 123 | tikām 124 | tikāt 125 | tieku 126 | tiec 127 | tiek 128 | tiekam 129 | tiekat 130 | tikšu 131 | tiks 132 | tiksim 133 | tiksiet 134 | tapt 135 | tapi 136 | tapāt 137 | topat 138 | tapšu 139 | tapsi 140 | taps 141 | tapsim 142 | tapsiet 143 | kļūt 144 | kļuvu 145 | kļuvi 146 | kļuva 147 | kļuvām 148 | kļuvāt 149 | kļūstu 150 | kļūsti 151 | kļūst 152 | kļūstam 153 | kļūstat 154 | kļūšu 155 | kļūsi 156 | kļūs 157 | kļūsim 158 | kļūsiet 159 | # verbs 160 | varēt 161 | varēju 162 | varējām 163 | varēšu 164 | varēsim 165 | var 166 | varēji 167 | varējāt 168 | varēsi 169 | varēsiet 170 | varat 171 | varēja 172 | varēs 173 | -------------------------------------------------------------------------------- /solr/solr7/taxcat/conf/lang/stopwords_th.txt: -------------------------------------------------------------------------------- 1 | # Thai stopwords from: 2 | # "Opinion Detection in Thai Political News Columns 3 | # Based on Subjectivity Analysis" 4 | # Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak 5 | ไว้ 6 | ไม่ 7 | ไป 8 | ได้ 9 | ให้ 10 | ใน 11 | โดย 12 | แห่ง 13 | แล้ว 14 | และ 15 | แรก 16 | แบบ 17 | แต่ 18 | เอง 19 | เห็น 20 | เลย 21 | เริ่ม 22 | เรา 23 | เมื่อ 24 | เพื่อ 25 | เพราะ 26 | เป็นการ 27 | เป็น 28 | เปิดเผย 29 | เปิด 30 | เนื่องจาก 31 | เดียวกัน 32 | เดียว 33 | เช่น 34 | เฉพาะ 35 | เคย 36 | เข้า 37 | เขา 38 | อีก 39 | อาจ 40 | อะไร 41 | ออก 42 | อย่าง 43 | อยู่ 44 | อยาก 45 | หาก 46 | หลาย 47 | หลังจาก 48 | หลัง 49 | หรือ 50 | หนึ่ง 51 | ส่วน 52 | ส่ง 53 | สุด 54 | สําหรับ 55 | ว่า 56 | วัน 57 | ลง 58 | ร่วม 59 | ราย 60 | รับ 61 | ระหว่าง 62 | รวม 63 | ยัง 64 | มี 65 | มาก 66 | มา 67 | พร้อม 68 | พบ 69 | ผ่าน 70 | ผล 71 | บาง 72 | น่า 73 | นี้ 74 | นํา 75 | นั้น 76 | นัก 77 | นอกจาก 78 | ทุก 79 | ที่สุด 80 | ที่ 81 | ทําให้ 82 | ทํา 83 | ทาง 84 | ทั้งนี้ 85 | ทั้ง 86 | ถ้า 87 | ถูก 88 | ถึง 89 | ต้อง 90 | ต่างๆ 91 | ต่าง 92 | ต่อ 93 | ตาม 94 | ตั้งแต่ 95 | ตั้ง 96 | ด้าน 97 | ด้วย 98 | ดัง 99 | ซึ่ง 100 | ช่วง 101 | จึง 102 | จาก 103 | จัด 104 | จะ 105 | คือ 106 | ความ 107 | ครั้ง 108 | คง 109 | ขึ้น 110 | ของ 111 | ขอ 112 | ขณะ 113 | ก่อน 114 | ก็ 115 | การ 116 | กับ 117 | กัน 118 | กว่า 119 | กล่าว 120 | -------------------------------------------------------------------------------- /solr/solr7/taxcat/conf/lang/userdict_ja.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This is a sample user dictionary for Kuromoji (JapaneseTokenizer) 3 | # 4 | # Add entries to this file in order to override the statistical model in terms 5 | # of segmentation, readings and part-of-speech tags. Notice that entries do 6 | # not have weights since they are always used when found. This is by-design 7 | # in order to maximize ease-of-use. 8 | # 9 | # Entries are defined using the following CSV format: 10 | # , ... , ... , 11 | # 12 | # Notice that a single half-width space separates tokens and readings, and 13 | # that the number tokens and readings must match exactly. 14 | # 15 | # Also notice that multiple entries with the same is undefined. 16 | # 17 | # Whitespace only lines are ignored. Comments are not allowed on entry lines. 18 | # 19 | 20 | # Custom segmentation for kanji compounds 21 | 日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 22 | 関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 23 | 24 | # Custom segmentation for compound katakana 25 | トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 26 | ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 27 | 28 | # Custom reading for former sumo wrestler 29 | 朝青龍,朝青龍,アサショウリュウ,カスタム人名 30 | -------------------------------------------------------------------------------- /solr/solr7/taxcat/core.properties: -------------------------------------------------------------------------------- 1 | name=taxcat 2 | -------------------------------------------------------------------------------- /solr/solr7/zoo.cfg: -------------------------------------------------------------------------------- 1 | # The number of milliseconds of each tick 2 | tickTime=2000 3 | # The number of ticks that the initial 4 | # synchronization phase can take 5 | initLimit=10 6 | # The number of ticks that can pass between 7 | # sending a request and getting an acknowledgement 8 | syncLimit=5 9 | 10 | # the directory where the snapshot is stored. 11 | # dataDir=/opt/zookeeper/data 12 | # NOTE: Solr defaults the dataDir to /zoo_data 13 | 14 | # the port at which the clients will connect 15 | # clientPort=2181 16 | # NOTE: Solr sets this based on zkRun / zkHost params 17 | 18 | # the maximum number of client connections. 19 | # increase this if you need to handle more clients 20 | #maxClientCnxns=60 21 | # 22 | # Be sure to read the maintenance section of the 23 | # administrator guide before turning on autopurge. 24 | # 25 | # http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance 26 | # 27 | # The number of snapshots to retain in dataDir 28 | #autopurge.snapRetainCount=3 29 | # Purge task interval in hours 30 | # Set to "0" to disable auto purge feature 31 | #autopurge.purgeInterval=1 32 | -------------------------------------------------------------------------------- /src/checkstyle-suppressions.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 18 | 19 | 22 | 23 | 24 | 25 | 28 | 29 | -------------------------------------------------------------------------------- /src/main/java/org/opensextant/extraction/SolrTaggerRequest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * Copyright 2012-2018 The MITRE Corporation. 4 | */ 5 | package org.opensextant.extraction; 6 | 7 | import org.apache.solr.client.solrj.SolrRequest; 8 | import org.apache.solr.client.solrj.request.QueryRequest; 9 | import org.apache.solr.client.solrj.request.RequestWriter; 10 | import org.apache.solr.common.params.SolrParams; 11 | 12 | /** 13 | * @author dsmiley 14 | * @author ubaldino 15 | */ 16 | @SuppressWarnings("serial") 17 | public class SolrTaggerRequest extends QueryRequest { 18 | 19 | private final String input; 20 | 21 | public SolrTaggerRequest(SolrParams params, String text) { 22 | super(params, SolrRequest.METHOD.POST); 23 | this.input = text; 24 | } 25 | 26 | /* Fixed in Solr 7.x */ 27 | @Override 28 | public RequestWriter.ContentWriter getContentWriter(String expectedType) { 29 | return new RequestWriter.StringPayloadContentWriter(input, "text/plain; charset=UTF-8"); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/org/opensextant/extractors/geo/BoundaryObserver.java: -------------------------------------------------------------------------------- 1 | package org.opensextant.extractors.geo; 2 | 3 | import java.util.Map; 4 | 5 | import org.opensextant.data.Place; 6 | 7 | /** 8 | * Emit a boundary event when you come across a concrete reference to a 9 | * boundary, 10 | * e.g., county or state, district or prefecture. 11 | * 12 | * @author ubaldino 13 | */ 14 | public interface BoundaryObserver { 15 | 16 | /** Given the name (lower case, strip quotes), the location candidate infers an ADMIN boundary */ 17 | void boundaryLevel1InScope(String normalizedName, Place p); 18 | 19 | /** Given the name (lower case, strip quotes), the location candidate infers an ADMIN boundary */ 20 | void boundaryLevel2InScope(String normalizedName, Place p); 21 | 22 | /* 23 | * TODO: Ocean boundaries or coastal/island boundaries in scope? 24 | * Disputed territory boundaries in scope? 25 | */ 26 | 27 | /** 28 | * Calculates totals and ratios for the discovered set of boundaries, inferred 29 | * or explicit. 30 | * 31 | * @return counts for boundary places mentioned or inferred 32 | */ 33 | Map placeMentionCount(); 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/org/opensextant/extractors/geo/CountryCount.java: -------------------------------------------------------------------------------- 1 | package org.opensextant.extractors.geo; 2 | 3 | import org.opensextant.data.Country; 4 | 5 | /** 6 | * Country metrics 7 | * 8 | * @author ubaldino 9 | */ 10 | public class CountryCount { 11 | public int count = 1; 12 | public int total = 1; 13 | private double ratio = 0; 14 | public Country country = null; 15 | 16 | public CountryCount(Country C) { 17 | country = C; 18 | } 19 | 20 | public CountryCount(String cc) { 21 | country = new Country(cc, cc); 22 | } 23 | 24 | /** 25 | * given a total number of ALL country mentions, 26 | * you can derive a ratio, e.g., text ABC is 45% about country1, 34% about 27 | * country2, etc. 28 | * Set total attribute before calling this. 29 | * 30 | * @return double 31 | */ 32 | public double getRatio() { 33 | ratio = (double) count / total; 34 | return ratio; 35 | } 36 | 37 | @Override 38 | public String toString() { 39 | return String.format("%s (%d or %03.1f pct)", country.getCountryCode(), count, 100 * getRatio()); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/org/opensextant/extractors/geo/CountryObserver.java: -------------------------------------------------------------------------------- 1 | package org.opensextant.extractors.geo; 2 | 3 | import java.util.Map; 4 | 5 | import org.opensextant.data.Country; 6 | 7 | public interface CountryObserver { 8 | /** 9 | * Use a country code to signal that a country was mentioned. 10 | * 11 | * @param cc country code 12 | */ 13 | void countryInScope(String cc); 14 | 15 | /** 16 | * Use a country object to signal a country was mentioned or is in scope 17 | * 18 | * @param C country object 19 | */ 20 | void countryInScope(Country C); 21 | 22 | /** 23 | * Have you seen this country before? 24 | * 25 | * @param cc country code 26 | * @return true if observer saw country 27 | */ 28 | boolean countryObserved(String cc); 29 | 30 | /** 31 | * Have you seen this country before? 32 | * 33 | * @param C country object 34 | * @return true if observer saw country 35 | */ 36 | 37 | boolean countryObserved(Country C); 38 | 39 | int countryCount(); 40 | 41 | /** 42 | * Calculates totals and ratios for the discovered set of countries. 43 | * 44 | * @return map of country code : counts 45 | */ 46 | Map countryMentionCount(); 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/org/opensextant/extractors/geo/LocationObserver.java: -------------------------------------------------------------------------------- 1 | package org.opensextant.extractors.geo; 2 | 3 | import org.opensextant.data.Geocoding; 4 | import org.opensextant.data.Place; 5 | 6 | /** 7 | * Apply this interface where application logic observes a coordinate or any 8 | * hard location reference. 9 | * 10 | * @author ubaldino 11 | */ 12 | public interface LocationObserver { 13 | /** 14 | * If a given geo is in scope, fire this event. 15 | * 16 | * @param geo 17 | */ 18 | void locationInScope(Geocoding geo); 19 | 20 | /** 21 | * The place know by the ID, p.getKey() or p.getPlaceID() 22 | * was it observed directly or indirectly in this document? 23 | * 24 | * @param p 25 | * @return 26 | */ 27 | boolean placeObserved(Place p); 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/org/opensextant/extractors/geo/PlaceCount.java: -------------------------------------------------------------------------------- 1 | package org.opensextant.extractors.geo; 2 | 3 | import java.util.HashSet; 4 | import java.util.Set; 5 | 6 | import org.opensextant.data.Place; 7 | 8 | /** 9 | * Place metrics. Everything other than coords, countries, and filtered-out 10 | * 11 | * @author ubaldino 12 | */ 13 | public class PlaceCount { 14 | public int count = 1; 15 | private double ratio = 0; 16 | public Place place = null; 17 | public int total = 1; 18 | public String label; 19 | public Set names = new HashSet<>(); 20 | 21 | public PlaceCount(String l) { 22 | label = l; 23 | } 24 | 25 | public void add(String nm) { 26 | names.add(nm); 27 | } 28 | 29 | public int getCount() { 30 | return names.size(); 31 | } 32 | 33 | public String getCountryCode() { 34 | if (place != null) { 35 | return place.getCountryCode(); 36 | } 37 | if (label != null) { 38 | // We'll blindly wait until this is used. 39 | return label.split("\\.")[0]; 40 | } 41 | return null; 42 | } 43 | 44 | /** 45 | * given a total number of ALL place mentions, you can derive a ratio, e.g., 46 | * text ABC is 45% about province1, 34% about province2, etc. 47 | */ 48 | public double getRatio() { 49 | ratio = (double) count / total; 50 | return ratio; 51 | } 52 | 53 | @Override 54 | public String toString() { 55 | if (place != null) { 56 | return String.format("%s (%d or %03.1f pct)", place.getName(), count, 100 * getRatio()); 57 | } 58 | if (label != null) { 59 | return String.format("'%s' (%d)", label, getCount()); 60 | } 61 | return "empty"; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/org/opensextant/extractors/geo/rules/FeatureClassMeta.java: -------------------------------------------------------------------------------- 1 | package org.opensextant.extractors.geo.rules; 2 | 3 | /** 4 | * data structure to capture our assumptions about feature types. 5 | * 6 | * @author ubaldino 7 | */ 8 | public class FeatureClassMeta { 9 | public String label; 10 | /** 11 | * Absolute gazetteer count 12 | */ 13 | public int count; 14 | /** 15 | * Relative gazetteer proportion for this feature 16 | */ 17 | public double proportion; 18 | /** 19 | * Mention Weight 20 | */ 21 | public double weight; 22 | 23 | /** 24 | * Mention Weight X Relative proportion 25 | * - common features are more likely to be mentioned. - certain features we know should outweigh 26 | * others (e.g., Intermittent Streams are not often true positives.) 27 | */ 28 | public double factor; 29 | 30 | /** 31 | * number of entries; used as a denominator. 32 | */ 33 | private static final int GAZETTEER_BASE_COUNT = 25000000; 34 | 35 | public FeatureClassMeta(String l, int c, double wt) { 36 | this.label = l; 37 | this.count = c; 38 | this.weight = wt; 39 | this.proportion = (double) this.count / GAZETTEER_BASE_COUNT; 40 | 41 | // We have a number between 0 and 1 that increases with prevalence of feature 42 | // type but is adjusted on a-priori preference. 43 | this.factor = this.weight * this.proportion; 44 | } 45 | 46 | /** 47 | * A set factor bias, without considering gazetteer proportions. 48 | * @param l 49 | * @param f 50 | */ 51 | public FeatureClassMeta(String l, double f) { 52 | label = l; 53 | factor = f; 54 | } 55 | 56 | @Override 57 | public String toString() { 58 | return String.format("%s %f%%, factor=%f", this.label, 100 * this.proportion, this.factor); 59 | } 60 | } -------------------------------------------------------------------------------- /src/main/java/org/opensextant/extractors/geo/rules/NonLatinNameRule.java: -------------------------------------------------------------------------------- 1 | package org.opensextant.extractors.geo.rules; 2 | 3 | import org.opensextant.data.Place; 4 | import org.opensextant.extractors.geo.PlaceCandidate; 5 | 6 | 7 | /** 8 | * GeocodeRule called only if document is non-Latin such as C/J/K or MiddleEastern scripts. 9 | */ 10 | public class NonLatinNameRule extends GeocodeRule { 11 | 12 | @Override 13 | public boolean filterByNameOnly(PlaceCandidate name) { 14 | 15 | // Assess lesser known places if only two chars long or so: 16 | if (name.getLength() < 3 && !name.isCountry) { 17 | name.setFilteredOut(true); 18 | name.addRule("Lang.LengthHeuristic"); 19 | return true; 20 | } else 21 | // Assess general alpha to non-alpha content in the name 22 | { 23 | String lang = name.hasCJKText() ? "CJK" : name.hasMiddleEasternText() ? "AR" : "Gen"; 24 | int charRatio = name.hasCJKText() ? 3 : name.hasMiddleEasternText() ? 5 : -1; 25 | if (charRatio > 0 && !NonsenseFilter.assessPhraseDensity(name.getText(), charRatio)) { 26 | name.addRule(String.format("Lang.%s.DensityHeuristic", lang)); 27 | name.setFilteredOut(true); 28 | return true; 29 | } 30 | } 31 | return false; 32 | } 33 | 34 | @Override 35 | public void evaluate(PlaceCandidate name, Place geo) { 36 | /* no-op */ 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/org/opensextant/extractors/geo/rules/ProvinceNameSetter.java: -------------------------------------------------------------------------------- 1 | package org.opensextant.extractors.geo.rules; 2 | 3 | import java.io.IOException; 4 | import java.util.List; 5 | 6 | import org.opensextant.data.Place; 7 | import org.opensextant.extractors.geo.PlaceCandidate; 8 | import org.opensextant.util.GeonamesUtility; 9 | 10 | public class ProvinceNameSetter extends GeocodeRule { 11 | 12 | private GeonamesUtility nameHelper = null; 13 | 14 | /** 15 | * Configure name helper if you want Province name resolution and other things.. 16 | * 17 | * @throws IOException 18 | */ 19 | public ProvinceNameSetter(GeonamesUtility geonamesUtil) throws IOException { 20 | if (geonamesUtil == null) { 21 | nameHelper = new GeonamesUtility(); 22 | nameHelper.loadWorldAdmin1Metadata(); 23 | } else { 24 | nameHelper = geonamesUtil; 25 | } 26 | } 27 | 28 | protected void assignProvinceName(Place geo) { 29 | 30 | if (geo == null) { 31 | return; 32 | } 33 | if (geo.getCountryCode() == null || geo.getAdmin1() == null) { 34 | return; 35 | } 36 | 37 | Place adm1 = nameHelper.getProvince(geo.getCountryCode(), geo.getAdmin1()); 38 | if (adm1 != null) { 39 | geo.setAdmin1Name(adm1.getName()); 40 | } 41 | } 42 | 43 | /** 44 | * Apply a Province name to a chosen place 45 | */ 46 | @Override 47 | public void evaluate(List names) { 48 | for (PlaceCandidate pc : names) { 49 | if (pc.isFilteredOut()) { 50 | continue; 51 | } 52 | /* 53 | * First choice -- set the place name. 54 | */ 55 | assignProvinceName(pc.getChosenPlace()); 56 | } 57 | } 58 | 59 | @Override 60 | public void evaluate(PlaceCandidate name, Place geo) { 61 | /* no-op */ 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/org/opensextant/extractors/geo/rules/RuleTool.java: -------------------------------------------------------------------------------- 1 | package org.opensextant.extractors.geo.rules; 2 | 3 | import org.opensextant.extractors.geo.PlaceCandidate; 4 | 5 | public class RuleTool { 6 | /** 7 | * test if candidate match has trivial evidence. 8 | * These rules -- default score, feature score, and lexical match (case-insensitive) -- 9 | * are standard trivial rules. 10 | * 11 | * @param pc 12 | * @return 13 | */ 14 | public static boolean hasOnlyDefaultRules(PlaceCandidate pc) { 15 | int ruleCount = pc.getRules().size(); 16 | switch (ruleCount) { 17 | case 1: 18 | return pc.hasRule(PlaceCandidate.DEFAULT_SCORE); 19 | case 2: 20 | return pc.hasRule(PlaceCandidate.DEFAULT_SCORE) && pc.hasRule(FeatureRule.FEAT_RULE); 21 | case 3: 22 | return pc.hasRule(PlaceCandidate.DEFAULT_SCORE) && pc.hasRule(FeatureRule.FEAT_RULE) 23 | && pc.hasRule(NameRule.LEX2); 24 | default: 25 | return false; 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/org/opensextant/extractors/geo/social/GeoInference.java: -------------------------------------------------------------------------------- 1 | package org.opensextant.extractors.geo.social; 2 | 3 | import java.util.Map; 4 | 5 | import org.opensextant.data.Geocoding; 6 | 7 | /** 8 | * This is a light wrapper around TextMatch + Geocoding interfaces. 9 | * Place and GeocoordMatch objects are primary geocode payload, 10 | * but adds inferencing metadata at top level more clearly. 11 | * The intent is to allow other tools use GeoInference as an API object 12 | * that is independent of information extraction/matching technique or pipeline. 13 | * 14 | * @author ubaldino 15 | */ 16 | public class GeoInference { 17 | 18 | /** original data record that this inference is attached to */ 19 | public String recordId = null; 20 | /** 21 | * Contributor -- what app, class or module generated the inference. 22 | */ 23 | public String contributor = null; 24 | /** 25 | * a country, place or other geocoding that was inferred. 26 | */ 27 | public Geocoding geocode = null; 28 | /** 29 | * your confidence in this inference 30 | */ 31 | public int confidence = 0; 32 | /** 33 | * a short label for this type of location. E.g. device-location, 34 | * user-profile-loc, geo, etc. 35 | */ 36 | public String inferenceName = null; 37 | /** 38 | * Any additional attributes you would like to add. Optional. 39 | */ 40 | public Map attributes = null; 41 | /** 42 | * offset bounds. Borrowed from TextMatch. 43 | */ 44 | public int start = -1; 45 | public int end = -1; 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/org/opensextant/extractors/xtax/TaxonFilter.java: -------------------------------------------------------------------------------- 1 | package org.opensextant.extractors.xtax; 2 | 3 | import java.io.IOException; 4 | import java.util.regex.Pattern; 5 | 6 | import org.opensextant.extraction.TagFilter; 7 | 8 | public class TaxonFilter extends TagFilter { 9 | 10 | 11 | public TaxonFilter() throws IOException { 12 | super(); 13 | } 14 | 15 | static final Pattern anyInvalidPunct = Pattern.compile("[\\p{Punct}&&[^-_.'`]]+"); 16 | 17 | public static boolean irregularPunctPatterns(final String t) { 18 | return anyInvalidPunct.matcher(t).find(); 19 | } 20 | 21 | /** 22 | * Find any reason to filter out Taxons. 23 | */ 24 | @Override 25 | public boolean filterOut(String val) { 26 | return irregularPunctPatterns(val); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/org/opensextant/extractors/xtax/TaxonMatch.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * Copyright 2012-2015 The MITRE Corporation. 4 | * 5 | */ 6 | package org.opensextant.extractors.xtax; 7 | 8 | import java.util.ArrayList; 9 | import java.util.List; 10 | 11 | import org.opensextant.data.Taxon; 12 | import org.opensextant.extraction.TextMatch; 13 | import org.opensextant.util.TextUtils; 14 | 15 | /** 16 | * @author Marc C. Ubaldino, MITRE, ubaldino at mitre dot org 17 | */ 18 | public class TaxonMatch extends TextMatch { 19 | 20 | public TaxonMatch(int x1, int x2) { 21 | super(x1, x2); 22 | this.type = VAL_TAXON; 23 | this.producer = "XTax"; 24 | } 25 | 26 | @Override 27 | public void setText(String t) { 28 | super.setText(t); 29 | if (t != null) { 30 | this.hasDiacritics = TextUtils.hasDiacritics(t); 31 | } 32 | } 33 | 34 | public boolean isDefault(){ 35 | return VAL_TAXON.equals(type); 36 | } 37 | 38 | public boolean hasDiacritics = false; 39 | private List taxons = null; 40 | 41 | public List getTaxons() { 42 | return taxons; 43 | } 44 | 45 | public void addTaxon(Taxon t) { 46 | if (t == null) { 47 | return; 48 | } 49 | 50 | if (taxons == null) { 51 | taxons = new ArrayList<>(); 52 | } 53 | taxons.add(t); 54 | } 55 | 56 | public boolean hasTaxons() { 57 | if (taxons == null) { 58 | return false; 59 | } 60 | return !taxons.isEmpty(); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/org/opensextant/output/TaggerMatchInterpeter.java: -------------------------------------------------------------------------------- 1 | package org.opensextant.output; 2 | 3 | import org.opensextant.data.Geocoding; 4 | import org.opensextant.extraction.TextMatch; 5 | import org.opensextant.extractors.geo.PlaceCandidate; 6 | 7 | public class TaggerMatchInterpeter implements MatchInterpreter { 8 | 9 | /** 10 | * Trivial override of the default Match Interpreter on GISDataFormatter. 11 | * See Examples for usage. 12 | * 13 | * @param m 14 | * @return 15 | */ 16 | @Override 17 | public Geocoding getGeocoding(TextMatch m) { 18 | Geocoding geocoding = null; 19 | if (m instanceof Geocoding) { 20 | geocoding = (Geocoding) m; 21 | } else if (m instanceof PlaceCandidate) { 22 | geocoding = ((PlaceCandidate) m).getChosenPlace(); 23 | } 24 | return geocoding; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/org/opensextant/xlayer/server/xgeo/XlayerServer.java: -------------------------------------------------------------------------------- 1 | package org.opensextant.xlayer.server.xgeo; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | import org.restlet.Component; 7 | import org.restlet.Context; 8 | import org.restlet.data.Protocol; 9 | 10 | /** 11 | * @author ubaldino 12 | */ 13 | public class XlayerServer extends Component { 14 | 15 | private static final String USAGE = "Usage:\n\t\tXlayerServer "; 16 | 17 | public XlayerServer() { 18 | this(8888); 19 | } 20 | 21 | public XlayerServer(int port) { 22 | /* customize app before attaching */ 23 | Context ctx = new Context(); 24 | 25 | /* 26 | * If we had settings,... 27 | */ 28 | Map settings = new HashMap<>(); 29 | ctx.setAttributes(settings); 30 | XlayerRestlet service = new XlayerRestlet(ctx); 31 | /* 32 | * Configure ports, protocols, security 33 | */ 34 | getServers().add(Protocol.HTTP, port); 35 | 36 | /* 37 | * Configure URLs, endpoints. 38 | */ 39 | getDefaultHost().attach("/xlayer/rest", service); 40 | this.getContext().setAttributes(settings); 41 | } 42 | 43 | /** 44 | * @param args 45 | */ 46 | public static void main(String[] args) { 47 | if (args.length < 1) { 48 | System.err.println(USAGE); 49 | System.exit(-1); 50 | } 51 | try { 52 | new XlayerServer(Integer.parseInt(args[0])).start(); 53 | } catch (Exception err) { 54 | System.err.println(USAGE); 55 | System.err.printf("ERROR: %s%n", err.getMessage()); 56 | System.exit(-1); 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/main/javadoc/doc-files/opensextant-manual-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSextant/Xponents/77ef44bebb6a7b5cd5dd85947e5c1b52015c93d7/src/main/javadoc/doc-files/opensextant-manual-logo.png -------------------------------------------------------------------------------- /src/main/javadoc/org/opensextant/extractors/geo/rules/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 |

GeocodeRules

7 |

See the Geocoder Handbook,
8 | https://github.com/OpenSextant/Xponents/blob/master/doc/Geocoder_Handbook.md.
11 |

12 | 13 | 14 | -------------------------------------------------------------------------------- /src/main/javadoc/org/opensextant/extractors/xtax/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 |

XTax Taxonomy/Keyword Matcher

7 | 8 | 9 | -------------------------------------------------------------------------------- /src/main/javadoc/org/opensextant/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 |

OpenSextant Xponents Java API

7 | 8 | Xponents Java API for OpenSextant. 9 | For now a few top level projects use org.opensextant name space. This may change in the future. 10 | 11 | -------------------------------------------------------------------------------- /src/main/resources/banner.txt: -------------------------------------------------------------------------------- 1 | // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~| 2 | // 3 | // _____ ____ __ __ 4 | ///\ __`\ /\ _`\ /\ \__ /\ \__ 5 | //\ \ \/\ \ _____ __ ___ \ \,\L\_\ __ __ _\ \ ,_\ __ ___ \ \ ,_\ 6 | // \ \ \ \ \ /\ '__`\ /'__`\ /' _ `\ \/_\__ \ /'__`\/\ \/'\\ \ \/ /'__`\ /' _ `\\ \ \/ 7 | // \ \ \_\ \\ \ \L\ \/\ __/ /\ \/\ \ /\ \L\ \ /\ __/\/> postalMatches = geocoder.extract(t); 26 | 27 | // Emit just postal matches here -- to not overwhelm output. 28 | // But in general the super set of matches is output. 29 | summarizeFindings(postalMatches); 30 | 31 | print(" ** Only Postal Geotags were emitted. **\n"); 32 | } 33 | 34 | 35 | public static void main(String[] args) { 36 | PostalGeocoderTester tester = null; 37 | try { 38 | tester = new PostalGeocoderTester(); 39 | tester.parseOptions(args); 40 | if (tester.params.inputFile != null && tester.params.inputFile.endsWith(".json")) { 41 | tester.tagBatch(tester.params.inputFile); 42 | } else { 43 | tester.tagText(tester.inputText); 44 | } 45 | } catch (Exception err) { 46 | err.printStackTrace(); 47 | } 48 | if (tester!=null) { 49 | tester.geocoder.cleanup(); 50 | tester.preTagger.cleanup(); 51 | } 52 | System.exit(0); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/test/java/org/opensextant/extractors/test/TestGazetteerConflationKey.java: -------------------------------------------------------------------------------- 1 | package org.opensextant.extractors.test; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import org.junit.Test; 6 | 7 | public class TestGazetteerConflationKey { 8 | 9 | @Test 10 | public void test() { 11 | String data = "USGS1496538\tLee County\t36.70545\t-83.12853\tA\tADM2\tUS\tUS\tUSA\tUS51\tUS51.0105\t\t\t\tUSGS\t1496538\t[LATIN]\tname\tF\tlee county\t2\t327078029"; 12 | String[] fields = data.split("\t"); 13 | 14 | Double lat = Double.valueOf(fields[2]); 15 | Double lon = Double.valueOf(fields[3]); 16 | String name = fields[1]; 17 | String feat = fields[5]; 18 | // name + "-" + cc +"-" + type +"-" + lat +"-" + lon; 19 | String key1 = name + "-" + fields[6] + "-" + feat + "-" + lat + "-" + lon; 20 | Long l = Long.valueOf(key1.hashCode()); 21 | l = ((Integer) key1.hashCode()).longValue(); 22 | System.out.println("Key 1 = " + key1); 23 | System.out.println("Hash1 = " + l); 24 | 25 | Object[] args = { name.replace(" ", "_"), feat, lat, lon }; 26 | String key2 = String.format("%s;%s;%2.4f;%3.4f", name.replace(" ", "_"), feat, lat, lon); 27 | System.out.println("Key 2 = " + key2); 28 | System.out.println("Hash2 = " + key2.hashCode()); 29 | String key3 = String.format("%s;%s;%2.4f;%3.4f", args); 30 | System.out.println("Key 3 = " + key3); 31 | System.out.println("Hash3 = " + key3.hashCode()); 32 | 33 | assertEquals(key2, key3); 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/test/java/org/opensextant/extractors/test/TestJava8Maps.java: -------------------------------------------------------------------------------- 1 | package org.opensextant.extractors.test; 2 | import static org.junit.Assert.assertTrue; 3 | import org.junit.Test; 4 | 5 | import java.util.HashMap; 6 | 7 | public class TestJava8Maps { 8 | 9 | /** 10 | * A quick demonstration of Java 8 practices to leverage. 11 | */ 12 | @Test 13 | public void testHashKeys(){ 14 | HashMap m = new HashMap<>(); 15 | m.get("ABC"); 16 | m.computeIfAbsent("ABC", newVal -> Integer.valueOf(4)); 17 | assertTrue( m.containsKey("ABC")); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/test/java/org/opensextant/extractors/test/TestNameScoreUtils.java: -------------------------------------------------------------------------------- 1 | package org.opensextant.extractors.test; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import org.apache.commons.text.similarity.LevenshteinDistance; 6 | import org.junit.Test; 7 | 8 | public class TestNameScoreUtils { 9 | 10 | @Test 11 | public void test() { 12 | String a = "who dat"; 13 | String b = "me too"; 14 | int editDist = LevenshteinDistance.getDefaultInstance().apply(a, b); 15 | assertEquals(6, editDist); 16 | 17 | editDist = LevenshteinDistance.getDefaultInstance().apply(a, a); 18 | assertEquals(0, editDist); 19 | String c = "who dem"; 20 | editDist = LevenshteinDistance.getDefaultInstance().apply(a, c); 21 | assertEquals(2, editDist); 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /src/test/java/org/opensextant/extractors/test/TestPostalFilters.java: -------------------------------------------------------------------------------- 1 | package org.opensextant.extractors.test; 2 | 3 | import org.junit.Test; 4 | import org.opensextant.extractors.geo.rules.PostalCodeFilter; 5 | import static org.junit.Assert.assertFalse; 6 | import static org.junit.Assert.assertTrue; 7 | 8 | public class TestPostalFilters { 9 | 10 | 11 | private static final void print(String msg) { 12 | System.out.println(msg); 13 | } 14 | 15 | /** 16 | * Test detection of invalid punctuation in postal codes 17 | */ 18 | @Test 19 | public void testPunctuation() { 20 | 21 | PostalCodeFilter rule = new PostalCodeFilter(2); 22 | // Invalid 23 | assertTrue(rule.hasInvalidPunct("AB%66")); 24 | assertTrue(rule.hasInvalidPunct("AB(66")); 25 | // Valid 26 | assertFalse(rule.hasInvalidPunct("AB66")); 27 | assertFalse(rule.hasInvalidPunct("ab66")); 28 | assertFalse(rule.hasInvalidPunct("AB-66")); 29 | assertFalse(rule.hasInvalidPunct("AB-66")); 30 | assertFalse(rule.hasInvalidPunct(" AB 66 ")); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/test/java/org/opensextant/extractors/test/TestResourceLoading.java: -------------------------------------------------------------------------------- 1 | package org.opensextant.extractors.test; 2 | 3 | import static org.junit.Assert.assertNotNull; 4 | 5 | import java.io.IOException; 6 | 7 | import org.apache.lucene.analysis.util.ClasspathResourceLoader; 8 | import org.junit.Before; 9 | import org.junit.Test; 10 | import org.opensextant.extractors.geo.GazetteerMatcher; 11 | import org.opensextant.util.LuceneStopwords; 12 | 13 | public class TestResourceLoading { 14 | 15 | @Before 16 | public void setUp() throws Exception { 17 | } 18 | 19 | @Test 20 | public void test() throws IOException { 21 | System.out.println( 22 | "This requires CLASSPATH set via maven or other means to include ./solr/etc/gazetteer/ or xponents-gaz-meta JAR"); 23 | java.util.Set words = LuceneStopwords.getStopwords(new ClasspathResourceLoader(GazetteerMatcher.class), 24 | "ru"); 25 | assertNotNull(words); 26 | System.out.println("Russian Stopwords count=" + words.size()); 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /src/test/java/org/opensextant/extractors/test/TestSolrUtils.java: -------------------------------------------------------------------------------- 1 | package org.opensextant.extractors.test; 2 | 3 | import static org.junit.Assert.assertNotNull; 4 | 5 | import java.text.ParseException; 6 | import java.util.Date; 7 | 8 | import org.apache.solr.common.SolrDocument; 9 | import org.junit.Test; 10 | import org.opensextant.util.SolrUtil; 11 | 12 | public class TestSolrUtils { 13 | 14 | @Test 15 | public void test() throws ParseException { 16 | SolrDocument doc = new SolrDocument(); 17 | 18 | // Not much used. No dates in Solr, ... but just wanted to have this for 19 | // completeness. 20 | doc.put("someDay", "2017-01-02T04:05:06Z"); 21 | Date d = SolrUtil.getDate(doc, "someDay"); 22 | System.out.println("Date " + doc.get("someDay") + " Parses as " + d + " in current timezone"); 23 | assertNotNull(d); 24 | } 25 | 26 | } 27 | -------------------------------------------------------------------------------- /src/test/resources/.gitignore: -------------------------------------------------------------------------------- 1 | *.cfg 2 | -------------------------------------------------------------------------------- /src/test/resources/data/placename-tests-cjk.txt: -------------------------------------------------------------------------------- 1 | 大使飞往瑞士日内瓦,希望检查所有金条,并用同等大小的金箔包裹的巧克力棒代替。 他的狡猾行为与驻扎在雷克雅未克的保安公司同步。 2 | 3 | 大使飞往 瑞 士 日内瓦,希望 检查 所 有金条,并用 同 等 大 小 的 金 箔 包 裹 的 巧 克 力 棒 代 替。 他 的 狡 猾 行 为 与 驻 扎 在 雷 克 雅 未 克 的 保 安 公 司 同 步。 4 | 5 | 大 使 飞 往 瑞 士 日 内 瓦 , 希 望 检 查 所 有 金条 , 并 用 同 等 大 小 的 金 箔 包 裹 的 巧 克 力 棒 代 替。 他 的 狡 猾 行 为 与 驻 扎 在 雷 克 雅 未 克 的 保 安 公 司 同 步。 6 | 7 | 新华社记者高文成 年终岁尾,中国外贸顶住压力 新华 社记 者高文 成 年终岁尾,中 国 外 贸 顶 住 压 力 # 8 | 9 | # Excerpt from Xhinhua Chinese: 10 | 这是10月31日在瑞士沃韦拍摄的雀巢总部大楼。雀巢作为在全球188个国家和地区有经营的跨国公司,在全球有2000多个品牌。今年是雀巢连续第六次参加进博会。 据雀巢大中华大区董事长兼首席执行官张西强介绍,雀巢将带来奶品、糖果、咖啡和宠物食品等展品。作为进博会参展商联盟理事会成员,雀巢也期待在进博会这个高效创新的平台上继续促进行业内交流,并通过各种形式与消费者进行线上线下沟通。 新华社记者 连漪 摄 -------------------------------------------------------------------------------- /src/test/resources/data/randomness.txt: -------------------------------------------------------------------------------- 1 | Where is 56:08:45N, 117:33:12W? Is it near Lisbon or closer to Saskatchewan? 2 | 3 | Seriously, what part of Canada would you visit to see the new prime minister discus our border? 4 | 5 | Do you think Hillary Clinton or former President Clinton have an opinion on our Northern Border? 6 | 7 | "Aliwagwag is situated in the Eastern Mindanao Biodiversity Corridor which contans one of the largest remaining 8 | blocks of tropical lowland rainforest in the Philippines. It covers an area of 10,491.33 hectares (25,924.6acres) 9 | and a buffer zone of 420.6 hectares (1,039 acres) in the hydrologically rich mountainous interior of the municipalities 10 | of Cateel and Boston in Davao Oriental as well as a portion of the municipality of Compostela in Compostela Valley." 11 | - Wikipedia entry, https://en.wikipedia.org/wiki/Aliwagwag_Protected_Landscape 12 | 13 | It is also home to the tallest trees in the Philippines, the Philippine rosewood, known locally as toog. In the 14 | waters of the upper Cateel River, a rare species of fish can be found called sawugnun by locals which is harvested as adelicacy. 15 | 16 | None of this ever happened. Certainly not near November 14, 2008, or 16DEC2020. 17 | Is it possible that November 16th, 2008 is an interesting date? 18 | A press conference is scheduled for 12/13/2015... that is Décembre 13, 2015 19 | 20 | Who does tend to the heath crisis there? is it the WHO or The Who? 21 | 22 | Wellfleet, MA 02663 has some of the best Oysters on the Atlantic 23 | -------------------------------------------------------------------------------- /src/test/resources/filters/person-name-filter.txt: -------------------------------------------------------------------------------- 1 | # a comment. 2 | # this is only a test 3 | # add person names like 4 | # marc 5 | #or a full name. These will be filtered out 6 | -------------------------------------------------------------------------------- /src/test/resources/filters/person-suffix-filter.txt: -------------------------------------------------------------------------------- 1 | # terms trailing a match that indicate match is a person 2 | -------------------------------------------------------------------------------- /src/test/resources/filters/person-title-filter.txt: -------------------------------------------------------------------------------- 1 | # terms preceeding a match that indicate the match is a person 2 | -------------------------------------------------------------------------------- /src/test/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | %d{yyyy-MM-dd'T'HH:mm:ss.SSS} %-5level %logger{36} - %msg%n 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /src/test/resources/test-filter.txt: -------------------------------------------------------------------------------- 1 | # 2 | # A list of terms you wish to filter out. 3 | -------------------------------------------------------------------------------- /test/test-xlayer-curl.sh: -------------------------------------------------------------------------------- 1 | PORT=$1 2 | RESTAPI=http://localhost:$PORT/xlayer/rest/process 3 | # Using curl, POST a JSON object to the service. 4 | # features : LIST 5 | # text : ASCII or UTF-8 string 6 | # docid : identifier, mostly for logging purposes. 7 | # 8 | curl --data "{'docid':'SimpleTest#111', 'features':'places,coordinates,countries,persons,orgs,reverse-geocode', 'text':'Aliwagwag is situated in the Eastern Mindanao Biodiversity Corridor which contans one of the largest remaining blocks of tropical lowland rainforest in the Philippines. It covers an area of 10,491.33 hectares (25,924.6acres) and a buffer zone of 420.6 hectares (1,039 acres) in the hydrologically rich mountainous interior of the municipalities of Cateel and Boston in Davao Oriental as well as a portion of the municipality of Compostela in Compostela Valley. It is also home to the tallest trees in the Philippines, the Philippine rosewood, known locally as toog. In the waters of the upper Cateel River, a rare species of fish can be found called sawugnun by locals which is harvested as adelicacy.'}" $RESTAPI 9 | -------------------------------------------------------------------------------- /test/test-xlayer-java.sh: -------------------------------------------------------------------------------- 1 | script=`dirname $0;` 2 | basedir=`cd -P $script/..; echo $PWD` 3 | 4 | PORT=$1 5 | URL=http://localhost:$PORT/xlayer/rest/process 6 | FILE=$2 7 | 8 | java -Dlogback.configurationFile=$basedir/etc/logback.xml \ 9 | -classpath "$basedir/etc:$basedir/lib/*" XlayerClientTester "$URL" "$FILE" 10 | -------------------------------------------------------------------------------- /test/test-xlayer-python.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export PYTHONPATH=./piplib:./python 3 | # URL is host:port or the full prefix 4 | URL=$1 5 | FILE=$2 6 | # NOTE -- test data is in xlayer.py tester 7 | python3 -m opensextant.xlayer --service-url "$URL" --debug "$FILE" 8 | --------------------------------------------------------------------------------