├── README.md ├── celerycrawler ├── __init__.py ├── couchviews │ └── db │ │ ├── page │ │ ├── by_rank │ │ │ └── map.js │ │ ├── by_url │ │ │ └── map.js │ │ └── links_to_url │ │ │ └── map.js │ │ └── robotstxt │ │ └── by_domain │ │ └── map.js ├── indexer.py ├── management │ ├── __init__.py │ └── commands │ │ ├── __init__.py │ │ ├── index_update.py │ │ ├── start_crawl.py │ │ └── update_couchdb.py ├── models.py ├── settings.py ├── tasks.py ├── templates │ ├── base.html │ ├── index.html │ └── results.html ├── tests.py ├── urls.py ├── utils.py ├── views.py └── wsgi.py └── manage.py /README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /celerycrawler/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /celerycrawler/couchviews/db/page/by_rank/map.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewjw/celery-crawler/HEAD/celerycrawler/couchviews/db/page/by_rank/map.js -------------------------------------------------------------------------------- /celerycrawler/couchviews/db/page/by_url/map.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewjw/celery-crawler/HEAD/celerycrawler/couchviews/db/page/by_url/map.js -------------------------------------------------------------------------------- /celerycrawler/couchviews/db/page/links_to_url/map.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewjw/celery-crawler/HEAD/celerycrawler/couchviews/db/page/links_to_url/map.js -------------------------------------------------------------------------------- /celerycrawler/couchviews/db/robotstxt/by_domain/map.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewjw/celery-crawler/HEAD/celerycrawler/couchviews/db/robotstxt/by_domain/map.js -------------------------------------------------------------------------------- /celerycrawler/indexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewjw/celery-crawler/HEAD/celerycrawler/indexer.py -------------------------------------------------------------------------------- /celerycrawler/management/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /celerycrawler/management/commands/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /celerycrawler/management/commands/index_update.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewjw/celery-crawler/HEAD/celerycrawler/management/commands/index_update.py -------------------------------------------------------------------------------- /celerycrawler/management/commands/start_crawl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewjw/celery-crawler/HEAD/celerycrawler/management/commands/start_crawl.py -------------------------------------------------------------------------------- /celerycrawler/management/commands/update_couchdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewjw/celery-crawler/HEAD/celerycrawler/management/commands/update_couchdb.py -------------------------------------------------------------------------------- /celerycrawler/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewjw/celery-crawler/HEAD/celerycrawler/models.py -------------------------------------------------------------------------------- /celerycrawler/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewjw/celery-crawler/HEAD/celerycrawler/settings.py -------------------------------------------------------------------------------- /celerycrawler/tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewjw/celery-crawler/HEAD/celerycrawler/tasks.py -------------------------------------------------------------------------------- /celerycrawler/templates/base.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewjw/celery-crawler/HEAD/celerycrawler/templates/base.html -------------------------------------------------------------------------------- /celerycrawler/templates/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewjw/celery-crawler/HEAD/celerycrawler/templates/index.html -------------------------------------------------------------------------------- /celerycrawler/templates/results.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewjw/celery-crawler/HEAD/celerycrawler/templates/results.html -------------------------------------------------------------------------------- /celerycrawler/tests.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewjw/celery-crawler/HEAD/celerycrawler/tests.py -------------------------------------------------------------------------------- /celerycrawler/urls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewjw/celery-crawler/HEAD/celerycrawler/urls.py -------------------------------------------------------------------------------- /celerycrawler/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewjw/celery-crawler/HEAD/celerycrawler/utils.py -------------------------------------------------------------------------------- /celerycrawler/views.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewjw/celery-crawler/HEAD/celerycrawler/views.py -------------------------------------------------------------------------------- /celerycrawler/wsgi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewjw/celery-crawler/HEAD/celerycrawler/wsgi.py -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewjw/celery-crawler/HEAD/manage.py --------------------------------------------------------------------------------