├── .gitignore ├── LICENSE ├── README.md ├── collectors ├── basic.py ├── fake.py ├── headless_chromium.py └── headless_firefox.py ├── crawler.py ├── data ├── 1.html ├── 10.html ├── 11.html ├── 12.html ├── 13.html ├── 14.html ├── 15.html ├── 16.html ├── 17.html ├── 18.html ├── 19.html ├── 2.html ├── 20.html ├── 21.html ├── 22.html ├── 23.html ├── 24.html ├── 25.html ├── 26.html ├── 27.html ├── 28.html ├── 29.html ├── 3.html ├── 30.html ├── 31.html ├── 32.html ├── 33.html ├── 34.html ├── 35.html ├── 36.html ├── 37.html ├── 38.html ├── 39.html ├── 4.html ├── 40.html ├── 41.html ├── 42.html ├── 43.html ├── 44.html ├── 45.html ├── 46.html ├── 47.html ├── 48.html ├── 5.html ├── 6.html ├── 7.html ├── 8.html └── 9.html ├── headers.py ├── main.py ├── parserlist.py ├── parsers ├── defaults.py ├── quotestoscrape.py └── scrapemelive.py ├── proxies.py ├── repo.py ├── tasks.py ├── test.js └── test_proxys.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/README.md -------------------------------------------------------------------------------- /collectors/basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/collectors/basic.py -------------------------------------------------------------------------------- /collectors/fake.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/collectors/fake.py -------------------------------------------------------------------------------- /collectors/headless_chromium.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/collectors/headless_chromium.py -------------------------------------------------------------------------------- /collectors/headless_firefox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/collectors/headless_firefox.py -------------------------------------------------------------------------------- /crawler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/crawler.py -------------------------------------------------------------------------------- /data/1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/1.html -------------------------------------------------------------------------------- /data/10.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/10.html -------------------------------------------------------------------------------- /data/11.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/11.html -------------------------------------------------------------------------------- /data/12.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/12.html -------------------------------------------------------------------------------- /data/13.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/13.html -------------------------------------------------------------------------------- /data/14.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/14.html -------------------------------------------------------------------------------- /data/15.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/15.html -------------------------------------------------------------------------------- /data/16.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/16.html -------------------------------------------------------------------------------- /data/17.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/17.html -------------------------------------------------------------------------------- /data/18.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/18.html -------------------------------------------------------------------------------- /data/19.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/19.html -------------------------------------------------------------------------------- /data/2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/2.html -------------------------------------------------------------------------------- /data/20.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/20.html -------------------------------------------------------------------------------- /data/21.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/21.html -------------------------------------------------------------------------------- /data/22.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/22.html -------------------------------------------------------------------------------- /data/23.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/23.html -------------------------------------------------------------------------------- /data/24.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/24.html -------------------------------------------------------------------------------- /data/25.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/25.html -------------------------------------------------------------------------------- /data/26.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/26.html -------------------------------------------------------------------------------- /data/27.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/27.html -------------------------------------------------------------------------------- /data/28.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/28.html -------------------------------------------------------------------------------- /data/29.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/29.html -------------------------------------------------------------------------------- /data/3.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/3.html -------------------------------------------------------------------------------- /data/30.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/30.html -------------------------------------------------------------------------------- /data/31.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/31.html -------------------------------------------------------------------------------- /data/32.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/32.html -------------------------------------------------------------------------------- /data/33.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/33.html -------------------------------------------------------------------------------- /data/34.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/34.html -------------------------------------------------------------------------------- /data/35.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/35.html -------------------------------------------------------------------------------- /data/36.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/36.html -------------------------------------------------------------------------------- /data/37.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/37.html -------------------------------------------------------------------------------- /data/38.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/38.html -------------------------------------------------------------------------------- /data/39.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/39.html -------------------------------------------------------------------------------- /data/4.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/4.html -------------------------------------------------------------------------------- /data/40.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/40.html -------------------------------------------------------------------------------- /data/41.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/41.html -------------------------------------------------------------------------------- /data/42.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/42.html -------------------------------------------------------------------------------- /data/43.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/43.html -------------------------------------------------------------------------------- /data/44.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/44.html -------------------------------------------------------------------------------- /data/45.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/45.html -------------------------------------------------------------------------------- /data/46.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/46.html -------------------------------------------------------------------------------- /data/47.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/47.html -------------------------------------------------------------------------------- /data/48.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/48.html -------------------------------------------------------------------------------- /data/5.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/5.html -------------------------------------------------------------------------------- /data/6.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/6.html -------------------------------------------------------------------------------- /data/7.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/7.html -------------------------------------------------------------------------------- /data/8.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/8.html -------------------------------------------------------------------------------- /data/9.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/data/9.html -------------------------------------------------------------------------------- /headers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/headers.py -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/main.py -------------------------------------------------------------------------------- /parserlist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/parserlist.py -------------------------------------------------------------------------------- /parsers/defaults.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/parsers/defaults.py -------------------------------------------------------------------------------- /parsers/quotestoscrape.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/parsers/quotestoscrape.py -------------------------------------------------------------------------------- /parsers/scrapemelive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/parsers/scrapemelive.py -------------------------------------------------------------------------------- /proxies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/proxies.py -------------------------------------------------------------------------------- /repo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/repo.py -------------------------------------------------------------------------------- /tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/tasks.py -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/test.js -------------------------------------------------------------------------------- /test_proxys.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenRows/scaling-to-distributed-crawling/HEAD/test_proxys.py --------------------------------------------------------------------------------