├── .drone.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── autoextract_spiders ├── __init__.py ├── __version__.py ├── dupe_filter.py ├── middlewares.py ├── sessions.py ├── settings.py └── spiders │ ├── __init__.py │ ├── autoextract_article.py │ ├── autoextract_jobs.py │ ├── autoextract_product.py │ ├── autoextract_spider.py │ ├── config.py │ ├── crawler_spider.py │ ├── rule.py │ └── util.py ├── requirements.txt ├── scrapinghub.yml ├── scrapy.cfg ├── scripts ├── hcfpal.py └── manager.py ├── setup.cfg ├── setup.py └── tests ├── requirements.txt └── test_spider.py /.drone.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/.drone.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/.gitignore -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/README.md -------------------------------------------------------------------------------- /autoextract_spiders/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoextract_spiders/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.1' 2 | -------------------------------------------------------------------------------- /autoextract_spiders/dupe_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/autoextract_spiders/dupe_filter.py -------------------------------------------------------------------------------- /autoextract_spiders/middlewares.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/autoextract_spiders/middlewares.py -------------------------------------------------------------------------------- /autoextract_spiders/sessions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/autoextract_spiders/sessions.py -------------------------------------------------------------------------------- /autoextract_spiders/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/autoextract_spiders/settings.py -------------------------------------------------------------------------------- /autoextract_spiders/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/autoextract_spiders/spiders/__init__.py -------------------------------------------------------------------------------- /autoextract_spiders/spiders/autoextract_article.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/autoextract_spiders/spiders/autoextract_article.py -------------------------------------------------------------------------------- /autoextract_spiders/spiders/autoextract_jobs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/autoextract_spiders/spiders/autoextract_jobs.py -------------------------------------------------------------------------------- /autoextract_spiders/spiders/autoextract_product.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/autoextract_spiders/spiders/autoextract_product.py -------------------------------------------------------------------------------- /autoextract_spiders/spiders/autoextract_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/autoextract_spiders/spiders/autoextract_spider.py -------------------------------------------------------------------------------- /autoextract_spiders/spiders/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/autoextract_spiders/spiders/config.py -------------------------------------------------------------------------------- /autoextract_spiders/spiders/crawler_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/autoextract_spiders/spiders/crawler_spider.py -------------------------------------------------------------------------------- /autoextract_spiders/spiders/rule.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/autoextract_spiders/spiders/rule.py -------------------------------------------------------------------------------- /autoextract_spiders/spiders/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/autoextract_spiders/spiders/util.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/requirements.txt -------------------------------------------------------------------------------- /scrapinghub.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/scrapinghub.yml -------------------------------------------------------------------------------- /scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/scrapy.cfg -------------------------------------------------------------------------------- /scripts/hcfpal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/scripts/hcfpal.py -------------------------------------------------------------------------------- /scripts/manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/scripts/manager.py -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/setup.cfg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/setup.py -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | flake8 2 | pytest -------------------------------------------------------------------------------- /tests/test_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/autoextract-spiders/HEAD/tests/test_spider.py --------------------------------------------------------------------------------