├── .gitignore ├── README.md ├── alexa ├── alexa │ ├── __init__.py │ ├── cn.json │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ ├── spiders │ │ ├── __init__.py │ │ └── alexa_spider.py │ └── universal.json ├── read_from_json.ipynb └── scrapy.cfg ├── alexa_topsites ├── alexa_topsites │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ │ ├── __init__.py │ │ └── spider.py └── scrapy.cfg ├── amazonbook ├── amazonbook │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ │ ├── __init__.py │ │ └── spider.py └── scrapy.cfg ├── clean.sh ├── delay.sh ├── dianping ├── dianping │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ │ ├── __init__.py │ │ └── spider.py └── scrapy.cfg ├── dmoz ├── dmoz │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ │ ├── __init__.py │ │ └── spider.py └── scrapy.cfg ├── doubanbook ├── doubanbook │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ │ ├── __init__.py │ │ └── douban_spider.py ├── sample.jpg └── scrapy.cfg ├── doubanmovie ├── doubanmovie │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ │ ├── __init__.py │ │ └── spider.py └── scrapy.cfg ├── douyu ├── douyu │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ │ ├── __init__.py │ │ └── spider.py └── scrapy.cfg ├── general_spider ├── general_spider │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ │ ├── BasicSpiderConfig.py │ │ ├── __init__.py │ │ ├── run.sh │ │ ├── scrapy_examples.py │ │ ├── spider.py │ │ └── v2ex.py └── scrapy.cfg ├── github_trending ├── github_trending │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ │ ├── __init__.py │ │ └── spider.py └── scrapy.cfg ├── googlescholar ├── README.md ├── googlescholar │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ │ ├── __init__.py │ │ └── spider.py └── scrapy.cfg ├── hacker_news ├── hacker_news │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ │ ├── __init__.py │ │ └── spider.py └── scrapy.cfg ├── hrtencent ├── hrtencent │ ├── __init__.py │ ├── data_utf8.json │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ │ ├── __init__.py │ │ └── hrtencent_spider.py └── scrapy.cfg ├── linkedin ├── README.md ├── doc │ └── db-scheme.md └── linkedin │ ├── linkedin │ ├── Rakefile │ ├── __init__.py │ ├── agents.py │ ├── db.py │ ├── items.py │ ├── middleware.py │ ├── parser │ │ ├── HtmlParser.py │ │ ├── LinkedinParser.py │ │ └── __init__.py │ ├── pipelines.py │ ├── proxy.py │ ├── reload_proxy.py │ ├── settings.py │ └── spiders │ │ ├── LinkedinSpider.py │ │ └── __init__.py │ └── scrapy.cfg ├── misc ├── __init__.py ├── agents.py ├── log.py ├── middleware.py ├── proxy.py └── spider.py ├── pandatv ├── pandatv │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ │ ├── __init__.py │ │ └── spider.py └── scrapy.cfg ├── proxylist ├── proxylist │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ │ ├── __init__.py │ │ ├── log │ │ ├── free-proxy-list.net │ │ └── proxy-list.org │ │ └── spider.py └── scrapy.cfg ├── qqnews ├── qqnews │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ │ ├── __init__.py │ │ └── spider.py └── scrapy.cfg ├── reddit ├── reddit │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ │ ├── __init__.py │ │ └── spider.py └── scrapy.cfg ├── sinanews ├── scrapy.cfg └── sinanews │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ ├── __init__.py │ └── spider.py ├── sis ├── README.md ├── forum-230.json ├── forum-58.json ├── index.html ├── scrapy.cfg └── sis │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ ├── __init__.py │ └── sis_spider.py ├── startproject.sh ├── template ├── scrapy.cfg └── template │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ ├── __init__.py │ └── spider.py ├── tutorial ├── Books ├── Resources ├── data_utf8.json ├── scrapy.cfg └── tutorial │ ├── __init__.py │ ├── data_utf8.json │ ├── items.py │ ├── misc │ ├── __init__.py │ ├── agents.py │ ├── log.py │ ├── middleware.py │ └── proxy.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ ├── __init__.py │ └── naive_spider.py ├── underdev ├── README ├── meijutt │ ├── meijutt │ │ ├── __init__.py │ │ ├── items.py │ │ ├── pipelines.py │ │ ├── settings.py │ │ └── spiders │ │ │ ├── __init__.py │ │ │ └── spider.py │ └── scrapy.cfg └── twitch │ ├── README │ ├── scrapy.cfg │ └── twitch │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ ├── __init__.py │ └── spider.py ├── v2ex ├── scrapy.cfg └── v2ex │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ ├── __init__.py │ └── spider.py ├── youtube_trending ├── scrapy.cfg └── youtube_trending │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ ├── __init__.py │ └── spider.py ├── zhibo8 ├── run.sh ├── scrapy.cfg └── zhibo8 │ ├── README.md │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ ├── spiders │ ├── __init__.py │ ├── example.py │ ├── hupu_news_spider.py │ ├── zhibo8_decrypt.py │ └── zhibo8_schedule_spider.py │ ├── utils │ ├── __init__.py │ └── mysqldriver.py │ └── zhibo8 ├── zhihu ├── scrapy.cfg └── zhihu │ ├── __init__.py │ ├── items.py │ ├── pipelines.py │ ├── redis-test.py │ ├── settings.py │ └── spiders │ ├── __init__.py │ └── zhihu_spider.py └── ziroom ├── scrapy.cfg └── ziroom ├── __init__.py ├── items.py ├── pipelines.py ├── settings.py └── spiders ├── __init__.py └── spider.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/README.md -------------------------------------------------------------------------------- /alexa/alexa/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /alexa/alexa/cn.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/alexa/alexa/cn.json -------------------------------------------------------------------------------- /alexa/alexa/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/alexa/alexa/items.py -------------------------------------------------------------------------------- /alexa/alexa/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/alexa/alexa/pipelines.py -------------------------------------------------------------------------------- /alexa/alexa/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/alexa/alexa/settings.py -------------------------------------------------------------------------------- /alexa/alexa/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/alexa/alexa/spiders/__init__.py -------------------------------------------------------------------------------- /alexa/alexa/spiders/alexa_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/alexa/alexa/spiders/alexa_spider.py -------------------------------------------------------------------------------- /alexa/alexa/universal.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/alexa/alexa/universal.json -------------------------------------------------------------------------------- /alexa/read_from_json.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/alexa/read_from_json.ipynb -------------------------------------------------------------------------------- /alexa/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/alexa/scrapy.cfg -------------------------------------------------------------------------------- /alexa_topsites/alexa_topsites/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /alexa_topsites/alexa_topsites/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/alexa_topsites/alexa_topsites/items.py -------------------------------------------------------------------------------- /alexa_topsites/alexa_topsites/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/alexa_topsites/alexa_topsites/pipelines.py -------------------------------------------------------------------------------- /alexa_topsites/alexa_topsites/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/alexa_topsites/alexa_topsites/settings.py -------------------------------------------------------------------------------- /alexa_topsites/alexa_topsites/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/alexa_topsites/alexa_topsites/spiders/__init__.py -------------------------------------------------------------------------------- /alexa_topsites/alexa_topsites/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/alexa_topsites/alexa_topsites/spiders/spider.py -------------------------------------------------------------------------------- /alexa_topsites/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/alexa_topsites/scrapy.cfg -------------------------------------------------------------------------------- /amazonbook/amazonbook/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /amazonbook/amazonbook/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/amazonbook/amazonbook/items.py -------------------------------------------------------------------------------- /amazonbook/amazonbook/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/amazonbook/amazonbook/pipelines.py -------------------------------------------------------------------------------- /amazonbook/amazonbook/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/amazonbook/amazonbook/settings.py -------------------------------------------------------------------------------- /amazonbook/amazonbook/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/amazonbook/amazonbook/spiders/__init__.py -------------------------------------------------------------------------------- /amazonbook/amazonbook/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/amazonbook/amazonbook/spiders/spider.py -------------------------------------------------------------------------------- /amazonbook/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/amazonbook/scrapy.cfg -------------------------------------------------------------------------------- /clean.sh: -------------------------------------------------------------------------------- 1 | find . -name '*.pyc' | xargs rm 2 | -------------------------------------------------------------------------------- /delay.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/delay.sh -------------------------------------------------------------------------------- /dianping/dianping/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dianping/dianping/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/dianping/dianping/items.py -------------------------------------------------------------------------------- /dianping/dianping/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/dianping/dianping/pipelines.py -------------------------------------------------------------------------------- /dianping/dianping/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/dianping/dianping/settings.py -------------------------------------------------------------------------------- /dianping/dianping/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/dianping/dianping/spiders/__init__.py -------------------------------------------------------------------------------- /dianping/dianping/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/dianping/dianping/spiders/spider.py -------------------------------------------------------------------------------- /dianping/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/dianping/scrapy.cfg -------------------------------------------------------------------------------- /dmoz/dmoz/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dmoz/dmoz/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/dmoz/dmoz/items.py -------------------------------------------------------------------------------- /dmoz/dmoz/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/dmoz/dmoz/pipelines.py -------------------------------------------------------------------------------- /dmoz/dmoz/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/dmoz/dmoz/settings.py -------------------------------------------------------------------------------- /dmoz/dmoz/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/dmoz/dmoz/spiders/__init__.py -------------------------------------------------------------------------------- /dmoz/dmoz/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/dmoz/dmoz/spiders/spider.py -------------------------------------------------------------------------------- /dmoz/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/dmoz/scrapy.cfg -------------------------------------------------------------------------------- /doubanbook/doubanbook/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /doubanbook/doubanbook/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/doubanbook/doubanbook/items.py -------------------------------------------------------------------------------- /doubanbook/doubanbook/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/doubanbook/doubanbook/pipelines.py -------------------------------------------------------------------------------- /doubanbook/doubanbook/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/doubanbook/doubanbook/settings.py -------------------------------------------------------------------------------- /doubanbook/doubanbook/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/doubanbook/doubanbook/spiders/__init__.py -------------------------------------------------------------------------------- /doubanbook/doubanbook/spiders/douban_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/doubanbook/doubanbook/spiders/douban_spider.py -------------------------------------------------------------------------------- /doubanbook/sample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/doubanbook/sample.jpg -------------------------------------------------------------------------------- /doubanbook/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/doubanbook/scrapy.cfg -------------------------------------------------------------------------------- /doubanmovie/doubanmovie/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /doubanmovie/doubanmovie/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/doubanmovie/doubanmovie/items.py -------------------------------------------------------------------------------- /doubanmovie/doubanmovie/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/doubanmovie/doubanmovie/pipelines.py -------------------------------------------------------------------------------- /doubanmovie/doubanmovie/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/doubanmovie/doubanmovie/settings.py -------------------------------------------------------------------------------- /doubanmovie/doubanmovie/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/doubanmovie/doubanmovie/spiders/__init__.py -------------------------------------------------------------------------------- /doubanmovie/doubanmovie/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/doubanmovie/doubanmovie/spiders/spider.py -------------------------------------------------------------------------------- /doubanmovie/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/doubanmovie/scrapy.cfg -------------------------------------------------------------------------------- /douyu/douyu/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /douyu/douyu/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/douyu/douyu/items.py -------------------------------------------------------------------------------- /douyu/douyu/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/douyu/douyu/pipelines.py -------------------------------------------------------------------------------- /douyu/douyu/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/douyu/douyu/settings.py -------------------------------------------------------------------------------- /douyu/douyu/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/douyu/douyu/spiders/__init__.py -------------------------------------------------------------------------------- /douyu/douyu/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/douyu/douyu/spiders/spider.py -------------------------------------------------------------------------------- /douyu/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/douyu/scrapy.cfg -------------------------------------------------------------------------------- /general_spider/general_spider/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /general_spider/general_spider/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/general_spider/general_spider/items.py -------------------------------------------------------------------------------- /general_spider/general_spider/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/general_spider/general_spider/pipelines.py -------------------------------------------------------------------------------- /general_spider/general_spider/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/general_spider/general_spider/settings.py -------------------------------------------------------------------------------- /general_spider/general_spider/spiders/BasicSpiderConfig.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/general_spider/general_spider/spiders/BasicSpiderConfig.py -------------------------------------------------------------------------------- /general_spider/general_spider/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/general_spider/general_spider/spiders/__init__.py -------------------------------------------------------------------------------- /general_spider/general_spider/spiders/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/general_spider/general_spider/spiders/run.sh -------------------------------------------------------------------------------- /general_spider/general_spider/spiders/scrapy_examples.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/general_spider/general_spider/spiders/scrapy_examples.py -------------------------------------------------------------------------------- /general_spider/general_spider/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/general_spider/general_spider/spiders/spider.py -------------------------------------------------------------------------------- /general_spider/general_spider/spiders/v2ex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/general_spider/general_spider/spiders/v2ex.py -------------------------------------------------------------------------------- /general_spider/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/general_spider/scrapy.cfg -------------------------------------------------------------------------------- /github_trending/github_trending/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /github_trending/github_trending/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/github_trending/github_trending/items.py -------------------------------------------------------------------------------- /github_trending/github_trending/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/github_trending/github_trending/pipelines.py -------------------------------------------------------------------------------- /github_trending/github_trending/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/github_trending/github_trending/settings.py -------------------------------------------------------------------------------- /github_trending/github_trending/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/github_trending/github_trending/spiders/__init__.py -------------------------------------------------------------------------------- /github_trending/github_trending/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/github_trending/github_trending/spiders/spider.py -------------------------------------------------------------------------------- /github_trending/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/github_trending/scrapy.cfg -------------------------------------------------------------------------------- /googlescholar/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/googlescholar/README.md -------------------------------------------------------------------------------- /googlescholar/googlescholar/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /googlescholar/googlescholar/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/googlescholar/googlescholar/items.py -------------------------------------------------------------------------------- /googlescholar/googlescholar/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/googlescholar/googlescholar/pipelines.py -------------------------------------------------------------------------------- /googlescholar/googlescholar/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/googlescholar/googlescholar/settings.py -------------------------------------------------------------------------------- /googlescholar/googlescholar/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/googlescholar/googlescholar/spiders/__init__.py -------------------------------------------------------------------------------- /googlescholar/googlescholar/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/googlescholar/googlescholar/spiders/spider.py -------------------------------------------------------------------------------- /googlescholar/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/googlescholar/scrapy.cfg -------------------------------------------------------------------------------- /hacker_news/hacker_news/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hacker_news/hacker_news/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/hacker_news/hacker_news/items.py -------------------------------------------------------------------------------- /hacker_news/hacker_news/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/hacker_news/hacker_news/pipelines.py -------------------------------------------------------------------------------- /hacker_news/hacker_news/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/hacker_news/hacker_news/settings.py -------------------------------------------------------------------------------- /hacker_news/hacker_news/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/hacker_news/hacker_news/spiders/__init__.py -------------------------------------------------------------------------------- /hacker_news/hacker_news/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/hacker_news/hacker_news/spiders/spider.py -------------------------------------------------------------------------------- /hacker_news/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/hacker_news/scrapy.cfg -------------------------------------------------------------------------------- /hrtencent/hrtencent/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hrtencent/hrtencent/data_utf8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/hrtencent/hrtencent/data_utf8.json -------------------------------------------------------------------------------- /hrtencent/hrtencent/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/hrtencent/hrtencent/items.py -------------------------------------------------------------------------------- /hrtencent/hrtencent/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/hrtencent/hrtencent/pipelines.py -------------------------------------------------------------------------------- /hrtencent/hrtencent/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/hrtencent/hrtencent/settings.py -------------------------------------------------------------------------------- /hrtencent/hrtencent/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/hrtencent/hrtencent/spiders/__init__.py -------------------------------------------------------------------------------- /hrtencent/hrtencent/spiders/hrtencent_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/hrtencent/hrtencent/spiders/hrtencent_spider.py -------------------------------------------------------------------------------- /hrtencent/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/hrtencent/scrapy.cfg -------------------------------------------------------------------------------- /linkedin/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/linkedin/README.md -------------------------------------------------------------------------------- /linkedin/doc/db-scheme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/linkedin/doc/db-scheme.md -------------------------------------------------------------------------------- /linkedin/linkedin/linkedin/Rakefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/linkedin/linkedin/linkedin/Rakefile -------------------------------------------------------------------------------- /linkedin/linkedin/linkedin/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /linkedin/linkedin/linkedin/agents.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/linkedin/linkedin/linkedin/agents.py -------------------------------------------------------------------------------- /linkedin/linkedin/linkedin/db.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/linkedin/linkedin/linkedin/db.py -------------------------------------------------------------------------------- /linkedin/linkedin/linkedin/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/linkedin/linkedin/linkedin/items.py -------------------------------------------------------------------------------- /linkedin/linkedin/linkedin/middleware.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/linkedin/linkedin/linkedin/middleware.py -------------------------------------------------------------------------------- /linkedin/linkedin/linkedin/parser/HtmlParser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/linkedin/linkedin/linkedin/parser/HtmlParser.py -------------------------------------------------------------------------------- /linkedin/linkedin/linkedin/parser/LinkedinParser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/linkedin/linkedin/linkedin/parser/LinkedinParser.py -------------------------------------------------------------------------------- /linkedin/linkedin/linkedin/parser/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /linkedin/linkedin/linkedin/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/linkedin/linkedin/linkedin/pipelines.py -------------------------------------------------------------------------------- /linkedin/linkedin/linkedin/proxy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/linkedin/linkedin/linkedin/proxy.py -------------------------------------------------------------------------------- /linkedin/linkedin/linkedin/reload_proxy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/linkedin/linkedin/linkedin/reload_proxy.py -------------------------------------------------------------------------------- /linkedin/linkedin/linkedin/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/linkedin/linkedin/linkedin/settings.py -------------------------------------------------------------------------------- /linkedin/linkedin/linkedin/spiders/LinkedinSpider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/linkedin/linkedin/linkedin/spiders/LinkedinSpider.py -------------------------------------------------------------------------------- /linkedin/linkedin/linkedin/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/linkedin/linkedin/linkedin/spiders/__init__.py -------------------------------------------------------------------------------- /linkedin/linkedin/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/linkedin/linkedin/scrapy.cfg -------------------------------------------------------------------------------- /misc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /misc/agents.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/misc/agents.py -------------------------------------------------------------------------------- /misc/log.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/misc/log.py -------------------------------------------------------------------------------- /misc/middleware.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/misc/middleware.py -------------------------------------------------------------------------------- /misc/proxy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/misc/proxy.py -------------------------------------------------------------------------------- /misc/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/misc/spider.py -------------------------------------------------------------------------------- /pandatv/pandatv/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pandatv/pandatv/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/pandatv/pandatv/items.py -------------------------------------------------------------------------------- /pandatv/pandatv/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/pandatv/pandatv/pipelines.py -------------------------------------------------------------------------------- /pandatv/pandatv/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/pandatv/pandatv/settings.py -------------------------------------------------------------------------------- /pandatv/pandatv/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/pandatv/pandatv/spiders/__init__.py -------------------------------------------------------------------------------- /pandatv/pandatv/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/pandatv/pandatv/spiders/spider.py -------------------------------------------------------------------------------- /pandatv/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/pandatv/scrapy.cfg -------------------------------------------------------------------------------- /proxylist/proxylist/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /proxylist/proxylist/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/proxylist/proxylist/items.py -------------------------------------------------------------------------------- /proxylist/proxylist/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/proxylist/proxylist/pipelines.py -------------------------------------------------------------------------------- /proxylist/proxylist/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/proxylist/proxylist/settings.py -------------------------------------------------------------------------------- /proxylist/proxylist/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/proxylist/proxylist/spiders/__init__.py -------------------------------------------------------------------------------- /proxylist/proxylist/spiders/log/free-proxy-list.net: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/proxylist/proxylist/spiders/log/free-proxy-list.net -------------------------------------------------------------------------------- /proxylist/proxylist/spiders/log/proxy-list.org: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/proxylist/proxylist/spiders/log/proxy-list.org -------------------------------------------------------------------------------- /proxylist/proxylist/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/proxylist/proxylist/spiders/spider.py -------------------------------------------------------------------------------- /proxylist/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/proxylist/scrapy.cfg -------------------------------------------------------------------------------- /qqnews/qqnews/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /qqnews/qqnews/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/qqnews/qqnews/items.py -------------------------------------------------------------------------------- /qqnews/qqnews/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/qqnews/qqnews/pipelines.py -------------------------------------------------------------------------------- /qqnews/qqnews/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/qqnews/qqnews/settings.py -------------------------------------------------------------------------------- /qqnews/qqnews/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/qqnews/qqnews/spiders/__init__.py -------------------------------------------------------------------------------- /qqnews/qqnews/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/qqnews/qqnews/spiders/spider.py -------------------------------------------------------------------------------- /qqnews/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/qqnews/scrapy.cfg -------------------------------------------------------------------------------- /reddit/reddit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /reddit/reddit/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/reddit/reddit/items.py -------------------------------------------------------------------------------- /reddit/reddit/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/reddit/reddit/pipelines.py -------------------------------------------------------------------------------- /reddit/reddit/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/reddit/reddit/settings.py -------------------------------------------------------------------------------- /reddit/reddit/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/reddit/reddit/spiders/__init__.py -------------------------------------------------------------------------------- /reddit/reddit/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/reddit/reddit/spiders/spider.py -------------------------------------------------------------------------------- /reddit/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/reddit/scrapy.cfg -------------------------------------------------------------------------------- /sinanews/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/sinanews/scrapy.cfg -------------------------------------------------------------------------------- /sinanews/sinanews/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sinanews/sinanews/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/sinanews/sinanews/items.py -------------------------------------------------------------------------------- /sinanews/sinanews/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/sinanews/sinanews/pipelines.py -------------------------------------------------------------------------------- /sinanews/sinanews/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/sinanews/sinanews/settings.py -------------------------------------------------------------------------------- /sinanews/sinanews/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/sinanews/sinanews/spiders/__init__.py -------------------------------------------------------------------------------- /sinanews/sinanews/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/sinanews/sinanews/spiders/spider.py -------------------------------------------------------------------------------- /sis/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/sis/README.md -------------------------------------------------------------------------------- /sis/forum-230.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/sis/forum-230.json -------------------------------------------------------------------------------- /sis/forum-58.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/sis/forum-58.json -------------------------------------------------------------------------------- /sis/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/sis/index.html -------------------------------------------------------------------------------- /sis/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/sis/scrapy.cfg -------------------------------------------------------------------------------- /sis/sis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sis/sis/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/sis/sis/items.py -------------------------------------------------------------------------------- /sis/sis/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/sis/sis/pipelines.py -------------------------------------------------------------------------------- /sis/sis/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/sis/sis/settings.py -------------------------------------------------------------------------------- /sis/sis/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/sis/sis/spiders/__init__.py -------------------------------------------------------------------------------- /sis/sis/spiders/sis_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/sis/sis/spiders/sis_spider.py -------------------------------------------------------------------------------- /startproject.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/startproject.sh -------------------------------------------------------------------------------- /template/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/template/scrapy.cfg -------------------------------------------------------------------------------- /template/template/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /template/template/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/template/template/items.py -------------------------------------------------------------------------------- /template/template/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/template/template/pipelines.py -------------------------------------------------------------------------------- /template/template/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/template/template/settings.py -------------------------------------------------------------------------------- /template/template/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/template/template/spiders/__init__.py -------------------------------------------------------------------------------- /template/template/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/template/template/spiders/spider.py -------------------------------------------------------------------------------- /tutorial/Books: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/tutorial/Books -------------------------------------------------------------------------------- /tutorial/Resources: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/tutorial/Resources -------------------------------------------------------------------------------- /tutorial/data_utf8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/tutorial/data_utf8.json -------------------------------------------------------------------------------- /tutorial/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/tutorial/scrapy.cfg -------------------------------------------------------------------------------- /tutorial/tutorial/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tutorial/tutorial/data_utf8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/tutorial/tutorial/data_utf8.json -------------------------------------------------------------------------------- /tutorial/tutorial/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/tutorial/tutorial/items.py -------------------------------------------------------------------------------- /tutorial/tutorial/misc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tutorial/tutorial/misc/agents.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/tutorial/tutorial/misc/agents.py -------------------------------------------------------------------------------- /tutorial/tutorial/misc/log.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/tutorial/tutorial/misc/log.py -------------------------------------------------------------------------------- /tutorial/tutorial/misc/middleware.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/tutorial/tutorial/misc/middleware.py -------------------------------------------------------------------------------- /tutorial/tutorial/misc/proxy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/tutorial/tutorial/misc/proxy.py -------------------------------------------------------------------------------- /tutorial/tutorial/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/tutorial/tutorial/pipelines.py -------------------------------------------------------------------------------- /tutorial/tutorial/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/tutorial/tutorial/settings.py -------------------------------------------------------------------------------- /tutorial/tutorial/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/tutorial/tutorial/spiders/__init__.py -------------------------------------------------------------------------------- /tutorial/tutorial/spiders/naive_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/tutorial/tutorial/spiders/naive_spider.py -------------------------------------------------------------------------------- /underdev/README: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/underdev/README -------------------------------------------------------------------------------- /underdev/meijutt/meijutt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /underdev/meijutt/meijutt/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/underdev/meijutt/meijutt/items.py -------------------------------------------------------------------------------- /underdev/meijutt/meijutt/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/underdev/meijutt/meijutt/pipelines.py -------------------------------------------------------------------------------- /underdev/meijutt/meijutt/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/underdev/meijutt/meijutt/settings.py -------------------------------------------------------------------------------- /underdev/meijutt/meijutt/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/underdev/meijutt/meijutt/spiders/__init__.py -------------------------------------------------------------------------------- /underdev/meijutt/meijutt/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/underdev/meijutt/meijutt/spiders/spider.py -------------------------------------------------------------------------------- /underdev/meijutt/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/underdev/meijutt/scrapy.cfg -------------------------------------------------------------------------------- /underdev/twitch/README: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/underdev/twitch/README -------------------------------------------------------------------------------- /underdev/twitch/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/underdev/twitch/scrapy.cfg -------------------------------------------------------------------------------- /underdev/twitch/twitch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /underdev/twitch/twitch/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/underdev/twitch/twitch/items.py -------------------------------------------------------------------------------- /underdev/twitch/twitch/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/underdev/twitch/twitch/pipelines.py -------------------------------------------------------------------------------- /underdev/twitch/twitch/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/underdev/twitch/twitch/settings.py -------------------------------------------------------------------------------- /underdev/twitch/twitch/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/underdev/twitch/twitch/spiders/__init__.py -------------------------------------------------------------------------------- /underdev/twitch/twitch/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/underdev/twitch/twitch/spiders/spider.py -------------------------------------------------------------------------------- /v2ex/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/v2ex/scrapy.cfg -------------------------------------------------------------------------------- /v2ex/v2ex/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /v2ex/v2ex/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/v2ex/v2ex/items.py -------------------------------------------------------------------------------- /v2ex/v2ex/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/v2ex/v2ex/pipelines.py -------------------------------------------------------------------------------- /v2ex/v2ex/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/v2ex/v2ex/settings.py -------------------------------------------------------------------------------- /v2ex/v2ex/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/v2ex/v2ex/spiders/__init__.py -------------------------------------------------------------------------------- /v2ex/v2ex/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/v2ex/v2ex/spiders/spider.py -------------------------------------------------------------------------------- /youtube_trending/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/youtube_trending/scrapy.cfg -------------------------------------------------------------------------------- /youtube_trending/youtube_trending/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /youtube_trending/youtube_trending/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/youtube_trending/youtube_trending/items.py -------------------------------------------------------------------------------- /youtube_trending/youtube_trending/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/youtube_trending/youtube_trending/pipelines.py -------------------------------------------------------------------------------- /youtube_trending/youtube_trending/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/youtube_trending/youtube_trending/settings.py -------------------------------------------------------------------------------- /youtube_trending/youtube_trending/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/youtube_trending/youtube_trending/spiders/__init__.py -------------------------------------------------------------------------------- /youtube_trending/youtube_trending/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/youtube_trending/youtube_trending/spiders/spider.py -------------------------------------------------------------------------------- /zhibo8/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/zhibo8/run.sh -------------------------------------------------------------------------------- /zhibo8/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/zhibo8/scrapy.cfg -------------------------------------------------------------------------------- /zhibo8/zhibo8/README.md: -------------------------------------------------------------------------------- 1 | 抓取虎扑新闻和直播吧赛程 2 | 基于scrapy[http://scrapy.org/] 3 | -------------------------------------------------------------------------------- /zhibo8/zhibo8/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /zhibo8/zhibo8/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/zhibo8/zhibo8/items.py -------------------------------------------------------------------------------- /zhibo8/zhibo8/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/zhibo8/zhibo8/pipelines.py -------------------------------------------------------------------------------- /zhibo8/zhibo8/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/zhibo8/zhibo8/settings.py -------------------------------------------------------------------------------- /zhibo8/zhibo8/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/zhibo8/zhibo8/spiders/__init__.py -------------------------------------------------------------------------------- /zhibo8/zhibo8/spiders/example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/zhibo8/zhibo8/spiders/example.py -------------------------------------------------------------------------------- /zhibo8/zhibo8/spiders/hupu_news_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/zhibo8/zhibo8/spiders/hupu_news_spider.py -------------------------------------------------------------------------------- /zhibo8/zhibo8/spiders/zhibo8_decrypt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/zhibo8/zhibo8/spiders/zhibo8_decrypt.py -------------------------------------------------------------------------------- /zhibo8/zhibo8/spiders/zhibo8_schedule_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/zhibo8/zhibo8/spiders/zhibo8_schedule_spider.py -------------------------------------------------------------------------------- /zhibo8/zhibo8/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /zhibo8/zhibo8/utils/mysqldriver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/zhibo8/zhibo8/utils/mysqldriver.py -------------------------------------------------------------------------------- /zhibo8/zhibo8/zhibo8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/zhibo8/zhibo8/zhibo8 -------------------------------------------------------------------------------- /zhihu/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/zhihu/scrapy.cfg -------------------------------------------------------------------------------- /zhihu/zhihu/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /zhihu/zhihu/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/zhihu/zhihu/items.py -------------------------------------------------------------------------------- /zhihu/zhihu/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/zhihu/zhihu/pipelines.py -------------------------------------------------------------------------------- /zhihu/zhihu/redis-test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/zhihu/zhihu/redis-test.py -------------------------------------------------------------------------------- /zhihu/zhihu/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/zhihu/zhihu/settings.py -------------------------------------------------------------------------------- /zhihu/zhihu/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/zhihu/zhihu/spiders/__init__.py -------------------------------------------------------------------------------- /zhihu/zhihu/spiders/zhihu_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/zhihu/zhihu/spiders/zhihu_spider.py -------------------------------------------------------------------------------- /ziroom/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/ziroom/scrapy.cfg -------------------------------------------------------------------------------- /ziroom/ziroom/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ziroom/ziroom/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/ziroom/ziroom/items.py -------------------------------------------------------------------------------- /ziroom/ziroom/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/ziroom/ziroom/pipelines.py -------------------------------------------------------------------------------- /ziroom/ziroom/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/ziroom/ziroom/settings.py -------------------------------------------------------------------------------- /ziroom/ziroom/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/ziroom/ziroom/spiders/__init__.py -------------------------------------------------------------------------------- /ziroom/ziroom/spiders/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekan/scrapy-examples/HEAD/ziroom/ziroom/spiders/spider.py --------------------------------------------------------------------------------