├── .gitignore ├── LICENSE ├── README.md ├── appendix-b ├── .DS_Store ├── blog_spider │ ├── blog_spider │ │ ├── __init__.py │ │ ├── items.py │ │ ├── middlewares.py │ │ ├── pipelines.py │ │ ├── settings.py │ │ └── spiders │ │ │ ├── __init__.py │ │ │ └── blog_spider.py │ └── scrapy.cfg ├── books_spider_v1 │ ├── books_spider │ │ ├── __init__.py │ │ ├── items.py │ │ ├── middlewares.py │ │ ├── pipelines.py │ │ ├── settings.py │ │ └── spiders │ │ │ ├── __init__.py │ │ │ └── books_spider.py │ └── scrapy.cfg └── books_spider_v2 │ ├── .DS_Store │ ├── books_spider │ ├── __init__.py │ ├── items.py │ ├── middlewares.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ │ ├── __init__.py │ │ └── books_spider.py │ └── scrapy.cfg ├── ch1 ├── connect.py └── try_connect.py ├── ch2 ├── find.py ├── navigation.py └── regex.py ├── ch3 ├── dcard.py ├── google_stock.py ├── news.py ├── ptt_gossiping.py └── yahoo_movie.py ├── ch4 ├── dcard_api.py ├── google_places.py ├── imdb.py └── ptt_gossiping_ip.py ├── ch5 ├── db.sqlite ├── ezprice.csv ├── ezprice_csv.py ├── ptt_beauty.py └── sqlite.py ├── ch6 ├── do_xml.py ├── example.xml └── non_utf.py ├── ch7 ├── bot_house.py ├── change_ip.py ├── kingstone.py ├── taqm_epa.py ├── thrsc.py └── yelp_login.py ├── ch8 ├── finanance │ ├── data.sqlite3 │ ├── kd.py │ └── twse_db.py ├── notice │ └── notice.py └── sentiment │ ├── dict.txt.big │ ├── id_to_body.json │ ├── mov_neg.csv │ ├── mov_pos.csv │ ├── ptt_movie_body.py │ ├── ptt_movie_title.py │ └── review_classifier.py └── update.md /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/README.md -------------------------------------------------------------------------------- /appendix-b/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/.DS_Store -------------------------------------------------------------------------------- /appendix-b/blog_spider/blog_spider/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /appendix-b/blog_spider/blog_spider/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/blog_spider/blog_spider/items.py -------------------------------------------------------------------------------- /appendix-b/blog_spider/blog_spider/middlewares.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/blog_spider/blog_spider/middlewares.py -------------------------------------------------------------------------------- /appendix-b/blog_spider/blog_spider/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/blog_spider/blog_spider/pipelines.py -------------------------------------------------------------------------------- /appendix-b/blog_spider/blog_spider/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/blog_spider/blog_spider/settings.py -------------------------------------------------------------------------------- /appendix-b/blog_spider/blog_spider/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/blog_spider/blog_spider/spiders/__init__.py -------------------------------------------------------------------------------- /appendix-b/blog_spider/blog_spider/spiders/blog_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/blog_spider/blog_spider/spiders/blog_spider.py -------------------------------------------------------------------------------- /appendix-b/blog_spider/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/blog_spider/scrapy.cfg -------------------------------------------------------------------------------- /appendix-b/books_spider_v1/books_spider/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /appendix-b/books_spider_v1/books_spider/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/books_spider_v1/books_spider/items.py -------------------------------------------------------------------------------- /appendix-b/books_spider_v1/books_spider/middlewares.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/books_spider_v1/books_spider/middlewares.py -------------------------------------------------------------------------------- /appendix-b/books_spider_v1/books_spider/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/books_spider_v1/books_spider/pipelines.py -------------------------------------------------------------------------------- /appendix-b/books_spider_v1/books_spider/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/books_spider_v1/books_spider/settings.py -------------------------------------------------------------------------------- /appendix-b/books_spider_v1/books_spider/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/books_spider_v1/books_spider/spiders/__init__.py -------------------------------------------------------------------------------- /appendix-b/books_spider_v1/books_spider/spiders/books_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/books_spider_v1/books_spider/spiders/books_spider.py -------------------------------------------------------------------------------- /appendix-b/books_spider_v1/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/books_spider_v1/scrapy.cfg -------------------------------------------------------------------------------- /appendix-b/books_spider_v2/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/books_spider_v2/.DS_Store -------------------------------------------------------------------------------- /appendix-b/books_spider_v2/books_spider/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /appendix-b/books_spider_v2/books_spider/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/books_spider_v2/books_spider/items.py -------------------------------------------------------------------------------- /appendix-b/books_spider_v2/books_spider/middlewares.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/books_spider_v2/books_spider/middlewares.py -------------------------------------------------------------------------------- /appendix-b/books_spider_v2/books_spider/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/books_spider_v2/books_spider/pipelines.py -------------------------------------------------------------------------------- /appendix-b/books_spider_v2/books_spider/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/books_spider_v2/books_spider/settings.py -------------------------------------------------------------------------------- /appendix-b/books_spider_v2/books_spider/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/books_spider_v2/books_spider/spiders/__init__.py -------------------------------------------------------------------------------- /appendix-b/books_spider_v2/books_spider/spiders/books_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/books_spider_v2/books_spider/spiders/books_spider.py -------------------------------------------------------------------------------- /appendix-b/books_spider_v2/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/appendix-b/books_spider_v2/scrapy.cfg -------------------------------------------------------------------------------- /ch1/connect.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch1/connect.py -------------------------------------------------------------------------------- /ch1/try_connect.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch1/try_connect.py -------------------------------------------------------------------------------- /ch2/find.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch2/find.py -------------------------------------------------------------------------------- /ch2/navigation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch2/navigation.py -------------------------------------------------------------------------------- /ch2/regex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch2/regex.py -------------------------------------------------------------------------------- /ch3/dcard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch3/dcard.py -------------------------------------------------------------------------------- /ch3/google_stock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch3/google_stock.py -------------------------------------------------------------------------------- /ch3/news.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch3/news.py -------------------------------------------------------------------------------- /ch3/ptt_gossiping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch3/ptt_gossiping.py -------------------------------------------------------------------------------- /ch3/yahoo_movie.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch3/yahoo_movie.py -------------------------------------------------------------------------------- /ch4/dcard_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch4/dcard_api.py -------------------------------------------------------------------------------- /ch4/google_places.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch4/google_places.py -------------------------------------------------------------------------------- /ch4/imdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch4/imdb.py -------------------------------------------------------------------------------- /ch4/ptt_gossiping_ip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch4/ptt_gossiping_ip.py -------------------------------------------------------------------------------- /ch5/db.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch5/db.sqlite -------------------------------------------------------------------------------- /ch5/ezprice.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch5/ezprice.csv -------------------------------------------------------------------------------- /ch5/ezprice_csv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch5/ezprice_csv.py -------------------------------------------------------------------------------- /ch5/ptt_beauty.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch5/ptt_beauty.py -------------------------------------------------------------------------------- /ch5/sqlite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch5/sqlite.py -------------------------------------------------------------------------------- /ch6/do_xml.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch6/do_xml.py -------------------------------------------------------------------------------- /ch6/example.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch6/example.xml -------------------------------------------------------------------------------- /ch6/non_utf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch6/non_utf.py -------------------------------------------------------------------------------- /ch7/bot_house.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch7/bot_house.py -------------------------------------------------------------------------------- /ch7/change_ip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch7/change_ip.py -------------------------------------------------------------------------------- /ch7/kingstone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch7/kingstone.py -------------------------------------------------------------------------------- /ch7/taqm_epa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch7/taqm_epa.py -------------------------------------------------------------------------------- /ch7/thrsc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch7/thrsc.py -------------------------------------------------------------------------------- /ch7/yelp_login.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch7/yelp_login.py -------------------------------------------------------------------------------- /ch8/finanance/data.sqlite3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch8/finanance/data.sqlite3 -------------------------------------------------------------------------------- /ch8/finanance/kd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch8/finanance/kd.py -------------------------------------------------------------------------------- /ch8/finanance/twse_db.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch8/finanance/twse_db.py -------------------------------------------------------------------------------- /ch8/notice/notice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch8/notice/notice.py -------------------------------------------------------------------------------- /ch8/sentiment/dict.txt.big: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch8/sentiment/dict.txt.big -------------------------------------------------------------------------------- /ch8/sentiment/id_to_body.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch8/sentiment/id_to_body.json -------------------------------------------------------------------------------- /ch8/sentiment/mov_neg.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch8/sentiment/mov_neg.csv -------------------------------------------------------------------------------- /ch8/sentiment/mov_pos.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch8/sentiment/mov_pos.csv -------------------------------------------------------------------------------- /ch8/sentiment/ptt_movie_body.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch8/sentiment/ptt_movie_body.py -------------------------------------------------------------------------------- /ch8/sentiment/ptt_movie_title.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch8/sentiment/ptt_movie_title.py -------------------------------------------------------------------------------- /ch8/sentiment/review_classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/ch8/sentiment/review_classifier.py -------------------------------------------------------------------------------- /update.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwlin/py-scraping-analysis-book/HEAD/update.md --------------------------------------------------------------------------------