├── NOTICE ├── README ├── nutch-plugins ├── filter-xpath │ ├── .gitignore │ ├── assembly.xml │ ├── conf │ │ ├── nutch-site.xml │ │ ├── plugin.xml │ │ └── xpathfilter-conf.xml │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── com │ │ │ └── atlantbh │ │ │ └── nutch │ │ │ └── filter │ │ │ └── xpath │ │ │ ├── FilterUtils.java │ │ │ ├── XPathHtmlParserFilter.java │ │ │ ├── XPathIndexingFilter.java │ │ │ └── config │ │ │ ├── FieldType.java │ │ │ ├── XPathFilterConfiguration.java │ │ │ ├── XPathIndexerProperties.java │ │ │ └── XPathIndexerPropertiesField.java │ │ └── test │ │ ├── java │ │ └── com │ │ │ └── atlantbh │ │ │ └── nutch │ │ │ └── filter │ │ │ └── xpath │ │ │ ├── XPathHtmlParserFilterTest.java │ │ │ └── XPathIndexingFilterTest.java │ │ └── resources │ │ └── com │ │ └── atlantbh │ │ └── nutch │ │ └── filter │ │ └── xpath │ │ ├── example-content.html │ │ ├── example-content.xml │ │ ├── example-xpathfilter-conf.xml │ │ └── example-xpathfilter-conf2.xml ├── index-alternative │ ├── .gitignore │ ├── assembly.xml │ ├── conf │ │ ├── alternativedataflow-indexfilter-conf.xml │ │ ├── nutch-site.xml │ │ └── plugin.xml │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── com │ │ │ └── atlantbh │ │ │ └── nutch │ │ │ └── index │ │ │ └── alternativedataflow │ │ │ ├── AlternativeDataFlowIndexingFilter.java │ │ │ ├── FilterUtils.java │ │ │ ├── conf │ │ │ ├── AlternativeDataFlowIndexingFilterConfiguration.java │ │ │ ├── Entry.java │ │ │ └── Field.java │ │ │ └── flow │ │ │ ├── CsvDataFlow.java │ │ │ └── DataFlow.java │ │ └── test │ │ ├── java │ │ └── com │ │ │ └── atlantbh │ │ │ └── nutch │ │ │ └── index │ │ │ └── alternativedataflow │ │ │ ├── AlternativeDataFlowIndexingFilterTest.java │ │ │ └── flow │ │ │ └── CsvDataFlowTest.java │ │ └── resources │ │ └── com │ │ └── atlantbh │ │ └── nutch │ │ └── index │ │ └── alternativedataflow │ │ ├── example.alternativedataflow-indexfilter-conf.xml │ │ └── flow │ │ └── example.csv └── index-omit │ ├── .gitignore │ ├── assembly.xml │ ├── conf │ ├── nutch-site.xml │ ├── omit-indexfilter-conf.xml │ └── plugin.xml │ ├── pom.xml │ └── src │ ├── main │ └── java │ │ └── com │ │ └── atlantbh │ │ └── nutch │ │ └── filter │ │ └── index │ │ └── omit │ │ ├── FilterUtils.java │ │ ├── OmitIndexingFilter.java │ │ └── config │ │ ├── FilteringType.java │ │ ├── OmitIndexingFilterConfiguration.java │ │ ├── OmitIndexingFilterConfigurationEntry.java │ │ └── Target.java │ └── test │ ├── java │ └── com │ │ └── atlantbh │ │ └── nutch │ │ └── filter │ │ └── index │ │ └── omit │ │ └── OmitIndexingFilterTest.java │ └── resources │ └── com │ └── atlantbh │ └── nutch │ └── filter │ └── index │ └── omit │ └── example-omit-indexfilter-conf.xml └── nutch-scripts ├── gfpu.sh ├── index.sh └── reparse.sh /NOTICE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/NOTICE -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nutch-plugins/filter-xpath/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/filter-xpath/.gitignore -------------------------------------------------------------------------------- /nutch-plugins/filter-xpath/assembly.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/filter-xpath/assembly.xml -------------------------------------------------------------------------------- /nutch-plugins/filter-xpath/conf/nutch-site.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/filter-xpath/conf/nutch-site.xml -------------------------------------------------------------------------------- /nutch-plugins/filter-xpath/conf/plugin.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/filter-xpath/conf/plugin.xml -------------------------------------------------------------------------------- /nutch-plugins/filter-xpath/conf/xpathfilter-conf.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/filter-xpath/conf/xpathfilter-conf.xml -------------------------------------------------------------------------------- /nutch-plugins/filter-xpath/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/filter-xpath/pom.xml -------------------------------------------------------------------------------- /nutch-plugins/filter-xpath/src/main/java/com/atlantbh/nutch/filter/xpath/FilterUtils.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/filter-xpath/src/main/java/com/atlantbh/nutch/filter/xpath/FilterUtils.java -------------------------------------------------------------------------------- /nutch-plugins/filter-xpath/src/main/java/com/atlantbh/nutch/filter/xpath/XPathHtmlParserFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/filter-xpath/src/main/java/com/atlantbh/nutch/filter/xpath/XPathHtmlParserFilter.java -------------------------------------------------------------------------------- /nutch-plugins/filter-xpath/src/main/java/com/atlantbh/nutch/filter/xpath/XPathIndexingFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/filter-xpath/src/main/java/com/atlantbh/nutch/filter/xpath/XPathIndexingFilter.java -------------------------------------------------------------------------------- /nutch-plugins/filter-xpath/src/main/java/com/atlantbh/nutch/filter/xpath/config/FieldType.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/filter-xpath/src/main/java/com/atlantbh/nutch/filter/xpath/config/FieldType.java -------------------------------------------------------------------------------- /nutch-plugins/filter-xpath/src/main/java/com/atlantbh/nutch/filter/xpath/config/XPathFilterConfiguration.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/filter-xpath/src/main/java/com/atlantbh/nutch/filter/xpath/config/XPathFilterConfiguration.java -------------------------------------------------------------------------------- /nutch-plugins/filter-xpath/src/main/java/com/atlantbh/nutch/filter/xpath/config/XPathIndexerProperties.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/filter-xpath/src/main/java/com/atlantbh/nutch/filter/xpath/config/XPathIndexerProperties.java -------------------------------------------------------------------------------- /nutch-plugins/filter-xpath/src/main/java/com/atlantbh/nutch/filter/xpath/config/XPathIndexerPropertiesField.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/filter-xpath/src/main/java/com/atlantbh/nutch/filter/xpath/config/XPathIndexerPropertiesField.java -------------------------------------------------------------------------------- /nutch-plugins/filter-xpath/src/test/java/com/atlantbh/nutch/filter/xpath/XPathHtmlParserFilterTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/filter-xpath/src/test/java/com/atlantbh/nutch/filter/xpath/XPathHtmlParserFilterTest.java -------------------------------------------------------------------------------- /nutch-plugins/filter-xpath/src/test/java/com/atlantbh/nutch/filter/xpath/XPathIndexingFilterTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/filter-xpath/src/test/java/com/atlantbh/nutch/filter/xpath/XPathIndexingFilterTest.java -------------------------------------------------------------------------------- /nutch-plugins/filter-xpath/src/test/resources/com/atlantbh/nutch/filter/xpath/example-content.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/filter-xpath/src/test/resources/com/atlantbh/nutch/filter/xpath/example-content.html -------------------------------------------------------------------------------- /nutch-plugins/filter-xpath/src/test/resources/com/atlantbh/nutch/filter/xpath/example-content.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/filter-xpath/src/test/resources/com/atlantbh/nutch/filter/xpath/example-content.xml -------------------------------------------------------------------------------- /nutch-plugins/filter-xpath/src/test/resources/com/atlantbh/nutch/filter/xpath/example-xpathfilter-conf.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/filter-xpath/src/test/resources/com/atlantbh/nutch/filter/xpath/example-xpathfilter-conf.xml -------------------------------------------------------------------------------- /nutch-plugins/filter-xpath/src/test/resources/com/atlantbh/nutch/filter/xpath/example-xpathfilter-conf2.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/filter-xpath/src/test/resources/com/atlantbh/nutch/filter/xpath/example-xpathfilter-conf2.xml -------------------------------------------------------------------------------- /nutch-plugins/index-alternative/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-alternative/.gitignore -------------------------------------------------------------------------------- /nutch-plugins/index-alternative/assembly.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-alternative/assembly.xml -------------------------------------------------------------------------------- /nutch-plugins/index-alternative/conf/alternativedataflow-indexfilter-conf.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-alternative/conf/alternativedataflow-indexfilter-conf.xml -------------------------------------------------------------------------------- /nutch-plugins/index-alternative/conf/nutch-site.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-alternative/conf/nutch-site.xml -------------------------------------------------------------------------------- /nutch-plugins/index-alternative/conf/plugin.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-alternative/conf/plugin.xml -------------------------------------------------------------------------------- /nutch-plugins/index-alternative/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-alternative/pom.xml -------------------------------------------------------------------------------- /nutch-plugins/index-alternative/src/main/java/com/atlantbh/nutch/index/alternativedataflow/AlternativeDataFlowIndexingFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-alternative/src/main/java/com/atlantbh/nutch/index/alternativedataflow/AlternativeDataFlowIndexingFilter.java -------------------------------------------------------------------------------- /nutch-plugins/index-alternative/src/main/java/com/atlantbh/nutch/index/alternativedataflow/FilterUtils.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-alternative/src/main/java/com/atlantbh/nutch/index/alternativedataflow/FilterUtils.java -------------------------------------------------------------------------------- /nutch-plugins/index-alternative/src/main/java/com/atlantbh/nutch/index/alternativedataflow/conf/AlternativeDataFlowIndexingFilterConfiguration.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-alternative/src/main/java/com/atlantbh/nutch/index/alternativedataflow/conf/AlternativeDataFlowIndexingFilterConfiguration.java -------------------------------------------------------------------------------- /nutch-plugins/index-alternative/src/main/java/com/atlantbh/nutch/index/alternativedataflow/conf/Entry.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-alternative/src/main/java/com/atlantbh/nutch/index/alternativedataflow/conf/Entry.java -------------------------------------------------------------------------------- /nutch-plugins/index-alternative/src/main/java/com/atlantbh/nutch/index/alternativedataflow/conf/Field.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-alternative/src/main/java/com/atlantbh/nutch/index/alternativedataflow/conf/Field.java -------------------------------------------------------------------------------- /nutch-plugins/index-alternative/src/main/java/com/atlantbh/nutch/index/alternativedataflow/flow/CsvDataFlow.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-alternative/src/main/java/com/atlantbh/nutch/index/alternativedataflow/flow/CsvDataFlow.java -------------------------------------------------------------------------------- /nutch-plugins/index-alternative/src/main/java/com/atlantbh/nutch/index/alternativedataflow/flow/DataFlow.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-alternative/src/main/java/com/atlantbh/nutch/index/alternativedataflow/flow/DataFlow.java -------------------------------------------------------------------------------- /nutch-plugins/index-alternative/src/test/java/com/atlantbh/nutch/index/alternativedataflow/AlternativeDataFlowIndexingFilterTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-alternative/src/test/java/com/atlantbh/nutch/index/alternativedataflow/AlternativeDataFlowIndexingFilterTest.java -------------------------------------------------------------------------------- /nutch-plugins/index-alternative/src/test/java/com/atlantbh/nutch/index/alternativedataflow/flow/CsvDataFlowTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-alternative/src/test/java/com/atlantbh/nutch/index/alternativedataflow/flow/CsvDataFlowTest.java -------------------------------------------------------------------------------- /nutch-plugins/index-alternative/src/test/resources/com/atlantbh/nutch/index/alternativedataflow/example.alternativedataflow-indexfilter-conf.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-alternative/src/test/resources/com/atlantbh/nutch/index/alternativedataflow/example.alternativedataflow-indexfilter-conf.xml -------------------------------------------------------------------------------- /nutch-plugins/index-alternative/src/test/resources/com/atlantbh/nutch/index/alternativedataflow/flow/example.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-alternative/src/test/resources/com/atlantbh/nutch/index/alternativedataflow/flow/example.csv -------------------------------------------------------------------------------- /nutch-plugins/index-omit/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-omit/.gitignore -------------------------------------------------------------------------------- /nutch-plugins/index-omit/assembly.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-omit/assembly.xml -------------------------------------------------------------------------------- /nutch-plugins/index-omit/conf/nutch-site.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-omit/conf/nutch-site.xml -------------------------------------------------------------------------------- /nutch-plugins/index-omit/conf/omit-indexfilter-conf.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-omit/conf/omit-indexfilter-conf.xml -------------------------------------------------------------------------------- /nutch-plugins/index-omit/conf/plugin.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-omit/conf/plugin.xml -------------------------------------------------------------------------------- /nutch-plugins/index-omit/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-omit/pom.xml -------------------------------------------------------------------------------- /nutch-plugins/index-omit/src/main/java/com/atlantbh/nutch/filter/index/omit/FilterUtils.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-omit/src/main/java/com/atlantbh/nutch/filter/index/omit/FilterUtils.java -------------------------------------------------------------------------------- /nutch-plugins/index-omit/src/main/java/com/atlantbh/nutch/filter/index/omit/OmitIndexingFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-omit/src/main/java/com/atlantbh/nutch/filter/index/omit/OmitIndexingFilter.java -------------------------------------------------------------------------------- /nutch-plugins/index-omit/src/main/java/com/atlantbh/nutch/filter/index/omit/config/FilteringType.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-omit/src/main/java/com/atlantbh/nutch/filter/index/omit/config/FilteringType.java -------------------------------------------------------------------------------- /nutch-plugins/index-omit/src/main/java/com/atlantbh/nutch/filter/index/omit/config/OmitIndexingFilterConfiguration.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-omit/src/main/java/com/atlantbh/nutch/filter/index/omit/config/OmitIndexingFilterConfiguration.java -------------------------------------------------------------------------------- /nutch-plugins/index-omit/src/main/java/com/atlantbh/nutch/filter/index/omit/config/OmitIndexingFilterConfigurationEntry.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-omit/src/main/java/com/atlantbh/nutch/filter/index/omit/config/OmitIndexingFilterConfigurationEntry.java -------------------------------------------------------------------------------- /nutch-plugins/index-omit/src/main/java/com/atlantbh/nutch/filter/index/omit/config/Target.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-omit/src/main/java/com/atlantbh/nutch/filter/index/omit/config/Target.java -------------------------------------------------------------------------------- /nutch-plugins/index-omit/src/test/java/com/atlantbh/nutch/filter/index/omit/OmitIndexingFilterTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-omit/src/test/java/com/atlantbh/nutch/filter/index/omit/OmitIndexingFilterTest.java -------------------------------------------------------------------------------- /nutch-plugins/index-omit/src/test/resources/com/atlantbh/nutch/filter/index/omit/example-omit-indexfilter-conf.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-plugins/index-omit/src/test/resources/com/atlantbh/nutch/filter/index/omit/example-omit-indexfilter-conf.xml -------------------------------------------------------------------------------- /nutch-scripts/gfpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-scripts/gfpu.sh -------------------------------------------------------------------------------- /nutch-scripts/index.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-scripts/index.sh -------------------------------------------------------------------------------- /nutch-scripts/reparse.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ATLANTBH/nutch-plugins/HEAD/nutch-scripts/reparse.sh --------------------------------------------------------------------------------