├── .gitignore ├── README.md ├── zal-extractor-core ├── pom.xml └── src │ ├── main │ ├── java │ │ └── ir │ │ │ └── co │ │ │ └── bayan │ │ │ └── simorq │ │ │ └── zal │ │ │ └── extractor │ │ │ ├── convert │ │ │ ├── Converter.java │ │ │ ├── DateConverter.java │ │ │ ├── DateTimeConverter.java │ │ │ ├── FloatConverter.java │ │ │ ├── LongConverter.java │ │ │ ├── PerisanConvertUtils.java │ │ │ ├── PersianDateConverter.java │ │ │ └── package-info.java │ │ │ ├── core │ │ │ ├── Content.java │ │ │ ├── ExtractEngine.java │ │ │ ├── ExtractUtil.java │ │ │ └── ExtractedDoc.java │ │ │ ├── evaluation │ │ │ ├── CssContext.java │ │ │ ├── CssEvaluator.java │ │ │ ├── EvaluationContext.java │ │ │ ├── Evaluator.java │ │ │ ├── EvaluatorFactory.java │ │ │ ├── MapNamespaceContext.java │ │ │ ├── TextEvaluationContext.java │ │ │ ├── TextEvaluator.java │ │ │ ├── XPathContext.java │ │ │ ├── XPathEvaluator.java │ │ │ └── package-info.java │ │ │ ├── model │ │ │ ├── Attribute.java │ │ │ ├── Concat.java │ │ │ ├── Constant.java │ │ │ ├── Decode.java │ │ │ ├── Default.java │ │ │ ├── Document.java │ │ │ ├── Expr.java │ │ │ ├── ExtractTo.java │ │ │ ├── Extractor.java │ │ │ ├── ExtractorConfig.java │ │ │ ├── Fetch.java │ │ │ ├── FetchParameter.java │ │ │ ├── Field.java │ │ │ ├── FieldValue.java │ │ │ ├── Filter.java │ │ │ ├── First.java │ │ │ ├── ForEach.java │ │ │ ├── Fragment.java │ │ │ ├── Function.java │ │ │ ├── FunctionHolder.java │ │ │ ├── Last.java │ │ │ ├── Link.java │ │ │ ├── Map.java │ │ │ ├── MatchMode.java │ │ │ ├── Matches.java │ │ │ ├── Process.java │ │ │ ├── ProcessorDef.java │ │ │ ├── Raw.java │ │ │ ├── Replace.java │ │ │ ├── Resolve.java │ │ │ ├── Rooted.java │ │ │ ├── Size.java │ │ │ ├── Text.java │ │ │ ├── Trim.java │ │ │ ├── Truncate.java │ │ │ ├── TypeDef.java │ │ │ ├── Url.java │ │ │ └── package-info.java │ │ │ ├── process │ │ │ ├── Capitalize.java │ │ │ ├── Lower.java │ │ │ ├── Processor.java │ │ │ ├── StringProcessor.java │ │ │ └── Upper.java │ │ │ └── protocol │ │ │ ├── Config.java │ │ │ ├── DirectHttpProtocol.java │ │ │ ├── FileProtocol.java │ │ │ ├── Protocol.java │ │ │ ├── ProtocolException.java │ │ │ └── ProtocolFactory.java │ └── resources │ │ ├── extractors.xsd │ │ └── functions.xsd │ └── test │ ├── java │ └── ir │ │ └── co │ │ └── bayan │ │ └── simorq │ │ └── zal │ │ └── extractor │ │ ├── convert │ │ ├── DateConverterTest.java │ │ ├── DateTimeConverterTest.java │ │ ├── FloatConverterTest.java │ │ ├── LongConverterTest.java │ │ ├── PersianDateConverterTest.java │ │ └── TestConverter.java │ │ ├── core │ │ ├── ExtractorCssTest.java │ │ ├── ExtractorCssTestMulti.java │ │ └── ExtractorXPathTest.java │ │ ├── evaluation │ │ └── TextEvaluatorTest.java │ │ ├── model │ │ ├── ExtractorConfigurationTest.java │ │ ├── FetchTest.java │ │ └── TruncateTest.java │ │ └── protocol │ │ └── HttpProtocolTest.java │ └── resources │ ├── extractors-config-test.xml │ ├── extractors-css-test-multi.xml │ ├── extractors-css-test.xml │ ├── extractors-xpath-test.xml │ ├── extractors.xml │ ├── sample.txt │ ├── test-ns.xml │ ├── test.htm │ ├── test.xml │ └── teste.xml ├── zal-extractor-nutch ├── assembly.xml ├── pom.xml └── src │ ├── main │ ├── java │ │ └── ir │ │ │ └── co │ │ │ └── bayan │ │ │ └── simorq │ │ │ └── zal │ │ │ └── extractor │ │ │ └── nutch │ │ │ ├── ExtractorFetchSchedule.java │ │ │ ├── ExtractorIndexingFilter.java │ │ │ ├── ExtractorParseFilter.java │ │ │ ├── ExtractorParser.java │ │ │ ├── ExtractorScoringFilter.java │ │ │ ├── ExtractorUrlFilter.java │ │ │ ├── NutchUtils.java │ │ │ ├── OPICScoringFilter.java │ │ │ └── package-info.java │ └── resources │ │ ├── continuous_crawl │ │ ├── crawl │ │ ├── parse_index │ │ └── plugin.xml │ └── test │ ├── java │ └── ir │ │ └── co │ │ └── bayan │ │ └── simorq │ │ └── zal │ │ └── extractor │ │ ├── convert │ │ └── TestConverter.java │ │ └── nutch │ │ └── ExtractorIndexingFilterTest.java │ └── resources │ └── extractors-index-test.xml ├── zal-extractor-tools ├── pom.xml └── src │ └── main │ └── java │ └── ir │ └── co │ └── bayan │ └── simorq │ └── zal │ └── extractor │ └── util │ └── UrlTester.java └── zal-parent └── pom.xml /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/README.md -------------------------------------------------------------------------------- /zal-extractor-core/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/pom.xml -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/convert/Converter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/convert/Converter.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/convert/DateConverter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/convert/DateConverter.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/convert/DateTimeConverter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/convert/DateTimeConverter.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/convert/FloatConverter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/convert/FloatConverter.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/convert/LongConverter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/convert/LongConverter.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/convert/PerisanConvertUtils.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/convert/PerisanConvertUtils.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/convert/PersianDateConverter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/convert/PersianDateConverter.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/convert/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/convert/package-info.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/core/Content.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/core/Content.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/core/ExtractEngine.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/core/ExtractEngine.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/core/ExtractUtil.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/core/ExtractUtil.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/core/ExtractedDoc.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/core/ExtractedDoc.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/CssContext.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/CssContext.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/CssEvaluator.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/CssEvaluator.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/EvaluationContext.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/EvaluationContext.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/Evaluator.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/Evaluator.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/EvaluatorFactory.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/EvaluatorFactory.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/MapNamespaceContext.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/MapNamespaceContext.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/TextEvaluationContext.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/TextEvaluationContext.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/TextEvaluator.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/TextEvaluator.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/XPathContext.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/XPathContext.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/XPathEvaluator.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/XPathEvaluator.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/evaluation/package-info.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Attribute.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Attribute.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Concat.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Concat.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Constant.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Constant.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Decode.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Decode.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Default.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Default.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Document.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Document.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Expr.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Expr.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/ExtractTo.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/ExtractTo.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Extractor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Extractor.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/ExtractorConfig.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/ExtractorConfig.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Fetch.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Fetch.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/FetchParameter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/FetchParameter.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Field.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Field.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/FieldValue.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/FieldValue.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Filter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Filter.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/First.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/First.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/ForEach.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/ForEach.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Fragment.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Fragment.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Function.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Function.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/FunctionHolder.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/FunctionHolder.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Last.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Last.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Link.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Link.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Map.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Map.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/MatchMode.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/MatchMode.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Matches.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Matches.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Process.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Process.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/ProcessorDef.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/ProcessorDef.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Raw.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Raw.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Replace.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Replace.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Resolve.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Resolve.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Rooted.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Rooted.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Size.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Size.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Text.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Text.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Trim.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Trim.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Truncate.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Truncate.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/TypeDef.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/TypeDef.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Url.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/Url.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/model/package-info.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/process/Capitalize.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/process/Capitalize.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/process/Lower.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/process/Lower.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/process/Processor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/process/Processor.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/process/StringProcessor.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/process/StringProcessor.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/process/Upper.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/process/Upper.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/protocol/Config.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/protocol/Config.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/protocol/DirectHttpProtocol.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/protocol/DirectHttpProtocol.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/protocol/FileProtocol.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/protocol/FileProtocol.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/protocol/Protocol.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/protocol/Protocol.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/protocol/ProtocolException.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/protocol/ProtocolException.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/protocol/ProtocolFactory.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/java/ir/co/bayan/simorq/zal/extractor/protocol/ProtocolFactory.java -------------------------------------------------------------------------------- /zal-extractor-core/src/main/resources/extractors.xsd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/resources/extractors.xsd -------------------------------------------------------------------------------- /zal-extractor-core/src/main/resources/functions.xsd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/main/resources/functions.xsd -------------------------------------------------------------------------------- /zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/convert/DateConverterTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/convert/DateConverterTest.java -------------------------------------------------------------------------------- /zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/convert/DateTimeConverterTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/convert/DateTimeConverterTest.java -------------------------------------------------------------------------------- /zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/convert/FloatConverterTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/convert/FloatConverterTest.java -------------------------------------------------------------------------------- /zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/convert/LongConverterTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/convert/LongConverterTest.java -------------------------------------------------------------------------------- /zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/convert/PersianDateConverterTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/convert/PersianDateConverterTest.java -------------------------------------------------------------------------------- /zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/convert/TestConverter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/convert/TestConverter.java -------------------------------------------------------------------------------- /zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/core/ExtractorCssTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/core/ExtractorCssTest.java -------------------------------------------------------------------------------- /zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/core/ExtractorCssTestMulti.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/core/ExtractorCssTestMulti.java -------------------------------------------------------------------------------- /zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/core/ExtractorXPathTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/core/ExtractorXPathTest.java -------------------------------------------------------------------------------- /zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/evaluation/TextEvaluatorTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/evaluation/TextEvaluatorTest.java -------------------------------------------------------------------------------- /zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/model/ExtractorConfigurationTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/model/ExtractorConfigurationTest.java -------------------------------------------------------------------------------- /zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/model/FetchTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/model/FetchTest.java -------------------------------------------------------------------------------- /zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/model/TruncateTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/model/TruncateTest.java -------------------------------------------------------------------------------- /zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/protocol/HttpProtocolTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/java/ir/co/bayan/simorq/zal/extractor/protocol/HttpProtocolTest.java -------------------------------------------------------------------------------- /zal-extractor-core/src/test/resources/extractors-config-test.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/resources/extractors-config-test.xml -------------------------------------------------------------------------------- /zal-extractor-core/src/test/resources/extractors-css-test-multi.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/resources/extractors-css-test-multi.xml -------------------------------------------------------------------------------- /zal-extractor-core/src/test/resources/extractors-css-test.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/resources/extractors-css-test.xml -------------------------------------------------------------------------------- /zal-extractor-core/src/test/resources/extractors-xpath-test.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/resources/extractors-xpath-test.xml -------------------------------------------------------------------------------- /zal-extractor-core/src/test/resources/extractors.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/resources/extractors.xml -------------------------------------------------------------------------------- /zal-extractor-core/src/test/resources/sample.txt: -------------------------------------------------------------------------------- 1 | l1 2 | l2 -------------------------------------------------------------------------------- /zal-extractor-core/src/test/resources/test-ns.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/resources/test-ns.xml -------------------------------------------------------------------------------- /zal-extractor-core/src/test/resources/test.htm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/resources/test.htm -------------------------------------------------------------------------------- /zal-extractor-core/src/test/resources/test.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/resources/test.xml -------------------------------------------------------------------------------- /zal-extractor-core/src/test/resources/teste.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-core/src/test/resources/teste.xml -------------------------------------------------------------------------------- /zal-extractor-nutch/assembly.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-nutch/assembly.xml -------------------------------------------------------------------------------- /zal-extractor-nutch/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-nutch/pom.xml -------------------------------------------------------------------------------- /zal-extractor-nutch/src/main/java/ir/co/bayan/simorq/zal/extractor/nutch/ExtractorFetchSchedule.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-nutch/src/main/java/ir/co/bayan/simorq/zal/extractor/nutch/ExtractorFetchSchedule.java -------------------------------------------------------------------------------- /zal-extractor-nutch/src/main/java/ir/co/bayan/simorq/zal/extractor/nutch/ExtractorIndexingFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-nutch/src/main/java/ir/co/bayan/simorq/zal/extractor/nutch/ExtractorIndexingFilter.java -------------------------------------------------------------------------------- /zal-extractor-nutch/src/main/java/ir/co/bayan/simorq/zal/extractor/nutch/ExtractorParseFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-nutch/src/main/java/ir/co/bayan/simorq/zal/extractor/nutch/ExtractorParseFilter.java -------------------------------------------------------------------------------- /zal-extractor-nutch/src/main/java/ir/co/bayan/simorq/zal/extractor/nutch/ExtractorParser.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-nutch/src/main/java/ir/co/bayan/simorq/zal/extractor/nutch/ExtractorParser.java -------------------------------------------------------------------------------- /zal-extractor-nutch/src/main/java/ir/co/bayan/simorq/zal/extractor/nutch/ExtractorScoringFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-nutch/src/main/java/ir/co/bayan/simorq/zal/extractor/nutch/ExtractorScoringFilter.java -------------------------------------------------------------------------------- /zal-extractor-nutch/src/main/java/ir/co/bayan/simorq/zal/extractor/nutch/ExtractorUrlFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-nutch/src/main/java/ir/co/bayan/simorq/zal/extractor/nutch/ExtractorUrlFilter.java -------------------------------------------------------------------------------- /zal-extractor-nutch/src/main/java/ir/co/bayan/simorq/zal/extractor/nutch/NutchUtils.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-nutch/src/main/java/ir/co/bayan/simorq/zal/extractor/nutch/NutchUtils.java -------------------------------------------------------------------------------- /zal-extractor-nutch/src/main/java/ir/co/bayan/simorq/zal/extractor/nutch/OPICScoringFilter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-nutch/src/main/java/ir/co/bayan/simorq/zal/extractor/nutch/OPICScoringFilter.java -------------------------------------------------------------------------------- /zal-extractor-nutch/src/main/java/ir/co/bayan/simorq/zal/extractor/nutch/package-info.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-nutch/src/main/java/ir/co/bayan/simorq/zal/extractor/nutch/package-info.java -------------------------------------------------------------------------------- /zal-extractor-nutch/src/main/resources/continuous_crawl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-nutch/src/main/resources/continuous_crawl -------------------------------------------------------------------------------- /zal-extractor-nutch/src/main/resources/crawl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-nutch/src/main/resources/crawl -------------------------------------------------------------------------------- /zal-extractor-nutch/src/main/resources/parse_index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-nutch/src/main/resources/parse_index -------------------------------------------------------------------------------- /zal-extractor-nutch/src/main/resources/plugin.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-nutch/src/main/resources/plugin.xml -------------------------------------------------------------------------------- /zal-extractor-nutch/src/test/java/ir/co/bayan/simorq/zal/extractor/convert/TestConverter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-nutch/src/test/java/ir/co/bayan/simorq/zal/extractor/convert/TestConverter.java -------------------------------------------------------------------------------- /zal-extractor-nutch/src/test/java/ir/co/bayan/simorq/zal/extractor/nutch/ExtractorIndexingFilterTest.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-nutch/src/test/java/ir/co/bayan/simorq/zal/extractor/nutch/ExtractorIndexingFilterTest.java -------------------------------------------------------------------------------- /zal-extractor-nutch/src/test/resources/extractors-index-test.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-nutch/src/test/resources/extractors-index-test.xml -------------------------------------------------------------------------------- /zal-extractor-tools/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-tools/pom.xml -------------------------------------------------------------------------------- /zal-extractor-tools/src/main/java/ir/co/bayan/simorq/zal/extractor/util/UrlTester.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-extractor-tools/src/main/java/ir/co/bayan/simorq/zal/extractor/util/UrlTester.java -------------------------------------------------------------------------------- /zal-parent/pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BayanGroup/nutch-custom-search/HEAD/zal-parent/pom.xml --------------------------------------------------------------------------------