├── .gitignore
├── README.md
├── baiduTranslate
    └── translater.py
├── bdbaike
    ├── baike_spider
    │   ├── __init__.py
    │   ├── html_downloader.py
    │   ├── html_outputer.py
    │   ├── html_parser.py
    │   └── url_manager.py
    └── spider_main.py
├── doubanBook
    ├── README.md
    ├── bookCrawler2
    │   ├── books.txt
    │   ├── crawler.py
    │   └── tagList.txt
    ├── bookCrawler3
    │   ├── README.md
    │   ├── bookSearch
    │   │   ├── bookSearch.py
    │   │   └── config.ini
    │   ├── crawler.py
    │   ├── initTable.sql
    │   └── test
    │   │   ├── bookinfotest.py
    │   │   └── multiThreading.py
    ├── bookSpiderXPath.py
    ├── book_list.txt
    └── book_list_spider.py
├── getWebpage
    ├── c1.py
    └── test.html
├── jenkinsJob
    ├── run.py
    └── utils.py
├── poem
    ├── README.md
    ├── config.py
    ├── crawler.py
    ├── models
    │   ├── __init__.py
    │   ├── author.py
    │   ├── poem.py
    │   └── poem_list.py
    └── utils
    │   └── http_util.py
├── proxyInfo
    └── proxyInfo2.py
├── qiushibaike
    ├── log.txt
    └── main.py
├── srtpInfo
    └── srtpSpider.py
└── tieba
    ├── content.txt
    └── tiebaspider.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | *.pyc
3 | **/output*
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/README.md


--------------------------------------------------------------------------------
/baiduTranslate/translater.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/baiduTranslate/translater.py


--------------------------------------------------------------------------------
/bdbaike/baike_spider/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/bdbaike/baike_spider/html_downloader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/bdbaike/baike_spider/html_downloader.py


--------------------------------------------------------------------------------
/bdbaike/baike_spider/html_outputer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/bdbaike/baike_spider/html_outputer.py


--------------------------------------------------------------------------------
/bdbaike/baike_spider/html_parser.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/bdbaike/baike_spider/html_parser.py


--------------------------------------------------------------------------------
/bdbaike/baike_spider/url_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/bdbaike/baike_spider/url_manager.py


--------------------------------------------------------------------------------
/bdbaike/spider_main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/bdbaike/spider_main.py


--------------------------------------------------------------------------------
/doubanBook/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/doubanBook/README.md


--------------------------------------------------------------------------------
/doubanBook/bookCrawler2/books.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/doubanBook/bookCrawler2/books.txt


--------------------------------------------------------------------------------
/doubanBook/bookCrawler2/crawler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/doubanBook/bookCrawler2/crawler.py


--------------------------------------------------------------------------------
/doubanBook/bookCrawler2/tagList.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/doubanBook/bookCrawler2/tagList.txt


--------------------------------------------------------------------------------
/doubanBook/bookCrawler3/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/doubanBook/bookCrawler3/README.md


--------------------------------------------------------------------------------
/doubanBook/bookCrawler3/bookSearch/bookSearch.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/doubanBook/bookCrawler3/bookSearch/bookSearch.py


--------------------------------------------------------------------------------
/doubanBook/bookCrawler3/bookSearch/config.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/doubanBook/bookCrawler3/bookSearch/config.ini


--------------------------------------------------------------------------------
/doubanBook/bookCrawler3/crawler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/doubanBook/bookCrawler3/crawler.py


--------------------------------------------------------------------------------
/doubanBook/bookCrawler3/initTable.sql:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/doubanBook/bookCrawler3/initTable.sql


--------------------------------------------------------------------------------
/doubanBook/bookCrawler3/test/bookinfotest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/doubanBook/bookCrawler3/test/bookinfotest.py


--------------------------------------------------------------------------------
/doubanBook/bookCrawler3/test/multiThreading.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/doubanBook/bookCrawler3/test/multiThreading.py


--------------------------------------------------------------------------------
/doubanBook/bookSpiderXPath.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/doubanBook/bookSpiderXPath.py


--------------------------------------------------------------------------------
/doubanBook/book_list.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/doubanBook/book_list.txt


--------------------------------------------------------------------------------
/doubanBook/book_list_spider.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/doubanBook/book_list_spider.py


--------------------------------------------------------------------------------
/getWebpage/c1.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/getWebpage/c1.py


--------------------------------------------------------------------------------
/getWebpage/test.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/getWebpage/test.html


--------------------------------------------------------------------------------
/jenkinsJob/run.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/jenkinsJob/run.py


--------------------------------------------------------------------------------
/jenkinsJob/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/jenkinsJob/utils.py


--------------------------------------------------------------------------------
/poem/README.md:
--------------------------------------------------------------------------------
1 | 从百度汉语中，爬取某个作者的所有诗。
2 | 
3 | 使用方法：
4 |   - 修改 config.py 中的用户配置部分
5 |   - 运行 crawler.py
6 | 


--------------------------------------------------------------------------------
/poem/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/poem/config.py


--------------------------------------------------------------------------------
/poem/crawler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/poem/crawler.py


--------------------------------------------------------------------------------
/poem/models/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | 模型类都放这里
3 | """
4 | 
5 | from models.poem_list import PoemList
6 | 


--------------------------------------------------------------------------------
/poem/models/author.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/poem/models/author.py


--------------------------------------------------------------------------------
/poem/models/poem.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/poem/models/poem.py


--------------------------------------------------------------------------------
/poem/models/poem_list.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/poem/models/poem_list.py


--------------------------------------------------------------------------------
/poem/utils/http_util.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/poem/utils/http_util.py


--------------------------------------------------------------------------------
/proxyInfo/proxyInfo2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/proxyInfo/proxyInfo2.py


--------------------------------------------------------------------------------
/qiushibaike/log.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/qiushibaike/log.txt


--------------------------------------------------------------------------------
/qiushibaike/main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/qiushibaike/main.py


--------------------------------------------------------------------------------
/srtpInfo/srtpSpider.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/srtpInfo/srtpSpider.py


--------------------------------------------------------------------------------
/tieba/content.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/tieba/content.txt


--------------------------------------------------------------------------------
/tieba/tiebaspider.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plough/myCrawler/HEAD/tieba/tiebaspider.py


--------------------------------------------------------------------------------