├── .gitignore ├── .ipynb_checkpoints ├── Untitled-checkpoint.ipynb └── analysis-checkpoint.ipynb ├── README.md ├── analysis.ipynb ├── analysis.py ├── cache ├── captcha.png ├── captcha2.png ├── http_list.txt └── https_list.txt ├── crawler.py ├── db.py ├── encrypt.js ├── output ├── qu_wordcloud_n.png ├── qu_wordcloud_nr.png └── qu_wordcloud_ns.png ├── proxy ├── client │ ├── __init__.py │ └── py_cli.py ├── config │ ├── __init__.py │ ├── rules.py │ └── settings.py └── utils │ ├── __init__.py │ ├── functools.py │ ├── get_ip.py │ └── redis_util.py ├── res ├── canger02_W03.ttf ├── stopwords_zhihu.txt └── 刘看山.png ├── zhihu_captcha ├── __init__.py ├── checkpoint │ ├── checkpoint │ ├── ocr-model-22001.data-00000-of-00001 │ ├── ocr-model-22001.index │ └── ocr-model-22001.meta ├── orcmodel.py ├── utils.py └── zhihu_captcha.py ├── zhihu_login.py ├── zhihu_spider.py └── zhihudata.sql /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/.gitignore -------------------------------------------------------------------------------- /.ipynb_checkpoints/Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/.ipynb_checkpoints/Untitled-checkpoint.ipynb -------------------------------------------------------------------------------- /.ipynb_checkpoints/analysis-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/.ipynb_checkpoints/analysis-checkpoint.ipynb -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/README.md -------------------------------------------------------------------------------- /analysis.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/analysis.ipynb -------------------------------------------------------------------------------- /analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/analysis.py -------------------------------------------------------------------------------- /cache/captcha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/cache/captcha.png -------------------------------------------------------------------------------- /cache/captcha2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/cache/captcha2.png -------------------------------------------------------------------------------- /cache/http_list.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/cache/http_list.txt -------------------------------------------------------------------------------- /cache/https_list.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/cache/https_list.txt -------------------------------------------------------------------------------- /crawler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/crawler.py -------------------------------------------------------------------------------- /db.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/db.py -------------------------------------------------------------------------------- /encrypt.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/encrypt.js -------------------------------------------------------------------------------- /output/qu_wordcloud_n.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/output/qu_wordcloud_n.png -------------------------------------------------------------------------------- /output/qu_wordcloud_nr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/output/qu_wordcloud_nr.png -------------------------------------------------------------------------------- /output/qu_wordcloud_ns.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/output/qu_wordcloud_ns.png -------------------------------------------------------------------------------- /proxy/client/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/proxy/client/__init__.py -------------------------------------------------------------------------------- /proxy/client/py_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/proxy/client/py_cli.py -------------------------------------------------------------------------------- /proxy/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /proxy/config/rules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/proxy/config/rules.py -------------------------------------------------------------------------------- /proxy/config/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/proxy/config/settings.py -------------------------------------------------------------------------------- /proxy/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/proxy/utils/__init__.py -------------------------------------------------------------------------------- /proxy/utils/functools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/proxy/utils/functools.py -------------------------------------------------------------------------------- /proxy/utils/get_ip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/proxy/utils/get_ip.py -------------------------------------------------------------------------------- /proxy/utils/redis_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/proxy/utils/redis_util.py -------------------------------------------------------------------------------- /res/canger02_W03.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/res/canger02_W03.ttf -------------------------------------------------------------------------------- /res/stopwords_zhihu.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/res/stopwords_zhihu.txt -------------------------------------------------------------------------------- /res/刘看山.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/res/刘看山.png -------------------------------------------------------------------------------- /zhihu_captcha/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/zhihu_captcha/__init__.py -------------------------------------------------------------------------------- /zhihu_captcha/checkpoint/checkpoint: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/zhihu_captcha/checkpoint/checkpoint -------------------------------------------------------------------------------- /zhihu_captcha/checkpoint/ocr-model-22001.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/zhihu_captcha/checkpoint/ocr-model-22001.data-00000-of-00001 -------------------------------------------------------------------------------- /zhihu_captcha/checkpoint/ocr-model-22001.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/zhihu_captcha/checkpoint/ocr-model-22001.index -------------------------------------------------------------------------------- /zhihu_captcha/checkpoint/ocr-model-22001.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/zhihu_captcha/checkpoint/ocr-model-22001.meta -------------------------------------------------------------------------------- /zhihu_captcha/orcmodel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/zhihu_captcha/orcmodel.py -------------------------------------------------------------------------------- /zhihu_captcha/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/zhihu_captcha/utils.py -------------------------------------------------------------------------------- /zhihu_captcha/zhihu_captcha.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/zhihu_captcha/zhihu_captcha.py -------------------------------------------------------------------------------- /zhihu_login.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/zhihu_login.py -------------------------------------------------------------------------------- /zhihu_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/zhihu_spider.py -------------------------------------------------------------------------------- /zhihudata.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MogicianXD/ZhihuCrawler/HEAD/zhihudata.sql --------------------------------------------------------------------------------