├── .idea ├── encodings.xml ├── misc.xml ├── modules.xml ├── python-web-crawlers.iml ├── vcs.xml └── workspace.xml ├── Bonus_spider ├── GIrl │ ├── .DS_Store │ ├── GIrl │ │ ├── .DS_Store │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-35.pyc │ │ │ └── settings.cpython-35.pyc │ │ ├── items.py │ │ ├── pipelines.py │ │ ├── settings.py │ │ └── spiders │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-35.pyc │ │ │ └── find_girl.cpython-35.pyc │ │ │ └── find_girl.py │ └── scrapy.cfg ├── Girl_image │ ├── Image_spider.py │ ├── Url_list.py │ └── test.py ├── Girl_spider │ ├── .DS_Store │ ├── Find_Url.py │ ├── Page_list.py │ ├── Page_parsing.py │ ├── __pycache__ │ │ ├── Page_list.cpython-35.pyc │ │ ├── Page_parsing.cpython-35.pyc │ │ └── channel_list.cpython-35.pyc │ ├── channel_list.py │ ├── count.py │ ├── girl_url.py │ ├── js.py │ ├── main.py │ ├── print.py │ └── stringID.py ├── girlspider │ ├── Email.py │ ├── __init__.py │ ├── crawl(2).py │ ├── crawl(3).py │ ├── crawl(4).py │ ├── crawl.py │ ├── download.py │ ├── downloadimage.py │ ├── downloadjpg.py │ ├── fatherlabel.py │ ├── find_message.py │ ├── find_only_jpg.py │ ├── find_url.py │ ├── findgirlurl.py │ ├── findmessageUrl.py │ ├── findurllabel.py │ ├── girl.py │ ├── myfile │ ├── page.txt │ ├── realurl.txt │ ├── refindurl.py │ └── start_crawl].py └── prettylegs │ └── find_imageurl.py ├── Code_crack ├── CNN_code │ ├── .DS_Store │ └── cnn.py ├── Captcha_code │ ├── 1.jpeg │ ├── 2.jpg │ ├── 3.jpg │ ├── captcha_work.py │ └── test_captcha.py ├── Example_image │ ├── Create_image │ │ └── create_code.py │ ├── Image │ │ ├── output.txt │ │ ├── simple3615.jpg │ │ └── simple5376.png │ └── TTF │ │ └── Georgia.ttf ├── Image_code │ ├── 1.jpg │ ├── 1.png │ ├── CNN_code.py │ ├── output.txt │ ├── simple_image_code.py │ └── vercode.gif ├── PIL_Code │ ├── 1.jpg │ └── PIL_work.py ├── Sliding_verfication_code │ └── test_selenium.py ├── Web_code_example │ └── zhiwang │ │ ├── CheckCode.jpg │ │ └── Zhiwang_code.py ├── selenium_test │ └── test_selenium.py └── slither_code │ ├── bg.jpg │ ├── fullbg.jpg │ └── slither.py ├── Fuctional_web_spider ├── Clear data │ ├── Clear data(1).py │ └── clear data(2).py ├── DownloadImage │ ├── .DS_Store │ ├── DownloadImage │ │ ├── .DS_Store │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-35.pyc │ │ │ └── settings.cpython-35.pyc │ │ ├── items.py │ │ ├── pipelines.py │ │ ├── settings.py │ │ └── spiders │ │ │ ├── Downloadimage.py │ │ │ ├── __init__.py │ │ │ └── __pycache__ │ │ │ ├── Downloadimage.cpython-35.pyc │ │ │ └── __init__.cpython-35.pyc │ └── scrapy.cfg ├── IP │ ├── IP.py │ └── IP_proxy.py ├── Image_download │ ├── .DS_Store │ ├── Download.py │ ├── Image │ │ ├── 0.jpg │ │ ├── 1.jpg │ │ ├── 10.jpg │ │ ├── 11.jpg │ │ ├── 12.jpg │ │ ├── 13.jpg │ │ ├── 14.jpg │ │ ├── 15.jpg │ │ ├── 16.jpg │ │ ├── 17.jpg │ │ ├── 18.jpg │ │ ├── 19.jpg │ │ ├── 2.jpg │ │ ├── 20.jpg │ │ ├── 21.jpg │ │ ├── 22.jpg │ │ ├── 23.jpg │ │ ├── 3.jpg │ │ ├── 4.jpg │ │ ├── 5.jpg │ │ ├── 6.jpg │ │ ├── 7.jpg │ │ ├── 8.jpg │ │ └── 9.jpg │ └── download(2).py ├── Regular Email │ ├── HTML_CSV.py │ ├── Save_data_CSV.py │ ├── URL Download(2).py │ ├── URL Download.py │ ├── editors.csv │ └── test.csv └── images │ ├── image(2).py │ ├── image(3).py │ ├── image(4).py │ ├── image(5).py │ └── image.py ├── Mainstrea_login ├── .DS_Store ├── 12306_login │ └── login.py ├── 163mail_login │ └── login.py ├── GITHUB_log │ └── login.py ├── Login_interface │ ├── .DS_Store │ ├── 163_data.py │ ├── @163Email.py │ ├── CSDN_login.py │ ├── Find_captchaUrl.py │ ├── Landing simulation interface.py │ ├── Sina_weibo │ │ ├── weibo.py │ │ └── weibo_login.py │ ├── __init__.py │ ├── captcha.jpg │ ├── codekey.py │ ├── douban_login(2).py │ ├── douban_login.py │ ├── ghostdriver.log │ ├── log.py │ ├── login_163Email.py │ ├── school.py │ ├── school_logon.py │ ├── weibo_login.py │ ├── weibo_login2.py │ ├── zhihu │ │ ├── zhihu_login(2).py │ │ └── zhihu_login.py │ ├── zhihu_login(2).py │ └── zhihu_login.py ├── Login_spider │ ├── zhihu(test).py │ └── zhihu_login.py ├── School_login │ ├── ghostdriver.log │ ├── login(1).py │ ├── login.py │ ├── next.py │ └── 验证码.py ├── meican_login │ └── meican_login.py └── sina_weibo_login │ └── login.py ├── Mainstream_web_spider ├── 58 │ ├── .DS_Store │ ├── 58_BEJing │ │ ├── __init__.py │ │ ├── channel_extact.py │ │ ├── counts.py │ │ ├── main.py │ │ └── pages_parsing.py │ └── 58_spider │ │ ├── __init__.py │ │ ├── channel_extract.py │ │ ├── counts.py │ │ ├── main.py │ │ └── page_parsing.py ├── .DS_Store ├── Baidu │ ├── .DS_Store │ ├── Baidu_Spider │ │ ├── Tieba_spider.py │ │ ├── __init__.py │ │ ├── wenku_spider.py │ │ └── 【原创】银寒演义(战争东方奇幻武侠).txt │ ├── __init__.py │ └── baidu_musci │ │ ├── __init__.py │ │ └── baidu.py ├── Douban │ ├── .DS_Store │ ├── Douban_book │ │ ├── book_name.py │ │ ├── douban_book.py │ │ └── page.py │ ├── Douban_spider(simple) │ │ ├── 1-getPageMedia.py │ │ ├── Douban_Spider.py │ │ ├── crewl_URL.py │ │ ├── doubanURL(test).py │ │ ├── douban_movie(image).py │ │ └── spider.py │ ├── __init__.py │ ├── doubanSpider │ │ ├── .DS_Store │ │ ├── __init__.py │ │ ├── doubanSpider │ │ │ ├── __init__.py │ │ │ ├── items.py │ │ │ ├── pipelines.py │ │ │ ├── settings.py │ │ │ └── spiders │ │ │ │ └── __init__.py │ │ ├── movie │ │ │ ├── .DS_Store │ │ │ ├── __init__.py │ │ │ ├── items.py │ │ │ ├── misc │ │ │ │ ├── __init__.py │ │ │ │ ├── bloomfilter.py │ │ │ │ ├── helper.py │ │ │ │ ├── middlewares.py │ │ │ │ └── store.py │ │ │ ├── pipelines.py │ │ │ ├── scrapy.cfg │ │ │ ├── settings.py │ │ │ └── spiders │ │ │ │ ├── __init__.py │ │ │ │ └── movie.py │ │ └── scrapy.cfg │ └── doubanmovie │ │ ├── __init__.py │ │ ├── doubanmovie │ │ ├── __init__.py │ │ ├── items.py │ │ ├── pipelines.py │ │ ├── settings.py │ │ └── spiders │ │ │ ├── __init__.py │ │ │ └── douspider.py │ │ └── scrapy.cfg ├── Game_spider │ └── GameUrl_spider.py ├── Ganji │ ├── Ganji_spider │ │ ├── Page_parsing.py │ │ ├── __init__.py │ │ ├── channel_extract.py │ │ ├── counts.py │ │ ├── main.py │ │ └── try.py │ └── __init__.py ├── JD │ ├── JD_spider │ │ ├── __init__.py │ │ ├── ghostdriver.log │ │ └── page_message.py │ └── __init__.py ├── README.md ├── Sina │ ├── Sina_spider │ │ ├── Browser camouflage.py │ │ ├── Find_Url │ │ ├── __init__.py │ │ ├── find_sina_imge.py │ │ ├── output.txt │ │ ├── sina_URL.py │ │ ├── sina_URL_find.py │ │ ├── sina_download.py │ │ ├── sina_downloadImg.py │ │ ├── sina_image_download.py │ │ ├── sina_news_h1.py │ │ ├── sina_news_img_spider.py │ │ └── sina_news_spider.py │ ├── Sina_weibo │ │ ├── __init__.py │ │ └── weibo_LOGIN.py │ ├── __init__.py │ └── news │ │ ├── Books │ │ ├── Resources │ │ ├── __init__.py │ │ ├── news │ │ ├── __init__.py │ │ ├── items.py │ │ ├── pipelines.py │ │ ├── settings.py │ │ └── spiders │ │ │ ├── __init__.py │ │ │ └── news.py │ │ ├── scrapy.cfg │ │ └── www.sina.com.cn ├── Spider_zhihu │ ├── Find_url_message.py │ ├── URL_spider.py │ ├── find_URL.py │ ├── find_question_url(alter).py │ └── find_question_url.py ├── Taobao │ ├── TM_spider │ │ ├── TM.py │ │ ├── __init__.py │ │ ├── chantomjs.py │ │ ├── main.py │ │ ├── route_heanders.py │ │ └── route_ip.py │ ├── __init__.py │ └── taobao_Spider │ │ ├── Channel_extract.py │ │ ├── Description.py │ │ ├── __init__.py │ │ └── ghostdriver.log ├── Zhihu │ ├── __init__.py │ └── zhihu_spider │ │ ├── Find_zhihu_question.py │ │ ├── __init__.py │ │ ├── captcha.jpg │ │ ├── cookies.txt │ │ ├── zhihuLogin.py │ │ ├── zhihulogin(2).py │ │ ├── zhihuquestion_spider.py │ │ └── zhihuquestionspider(simple).py └── maoyan │ └── maoyan_movie.py ├── Other_web_spider ├── GNW_Spider │ ├── gnw.py │ ├── text.pkl │ └── text.txt ├── Phantomjs │ ├── Phantomjs_DEMO(2).py │ ├── Phantomjs_DEMO(3).py │ ├── Phantomjs_DEMO(4).py │ ├── Phantomjs_DEMO.py │ ├── Phantomjs_GPS.py │ ├── Phantomjs_base.py │ ├── Phantomjs_classname_loaction.py │ ├── Phantomjs_click.py │ ├── Phantomjs_css_location.py │ ├── Phantomjs_example.py │ ├── Phantomjs_id_location.py │ ├── Phantomjs_keyboard_operation.py │ ├── Phantomjs_linktext_location.py │ ├── Phantomjs_name_location.py │ ├── Phantomjs_tagname_location.py │ ├── Phantomjs_xpath_location.py │ ├── Print_page.py │ ├── click,doubleclick.py │ ├── explore_action.py │ ├── explore_max.py │ ├── explore_weight_hign.py │ ├── for in.py │ ├── ghostdriver.log │ ├── js.html │ └── jsaction.py ├── Spider_Thread │ ├── Thread_base.py │ ├── Thread_common_suse.py │ ├── Thread_example(2).py │ ├── Thread_example(3).py │ ├── Thread_example.py │ ├── Thread_first.py │ ├── Thread_math.py │ └── mtsleep.py ├── novel_Spider │ ├── List_book_spider.py │ ├── Novel_txt.py │ ├── URL_read.py │ ├── txt_spider.py │ └── url.txt └── recognition_imger │ ├── .DS_Store │ ├── Drink-shill_spider │ ├── Shii.py │ ├── Spider.py │ ├── jiutuo_spider.py │ ├── name.py │ └── shillspider.py │ ├── From_url_imgae_message.py │ ├── Tesseract_image.py │ ├── ghostdriver.log │ ├── output.txt │ ├── text1.tif │ ├── text_2.jpg │ ├── text_2_clean.png │ └── textoutput.txt ├── README.md ├── Scrapy_spider ├── Download URL │ ├── 13.py │ ├── Download URL(midified).py │ ├── Download URL(simple).py │ └── sqlite_python integration.py └── scrapy_example │ ├── MySpider(2).py │ ├── Myscrapy.py │ ├── sina_news.py │ ├── stackoverflow_spider.py │ ├── start_urls.py │ └── top-stackoverflow-questions.json ├── Spider_base ├── ajax_spider.py ├── favicon.ico ├── re_work │ ├── re_test.py │ ├── re_work.py │ └── resule.txt └── spider_base.py ├── Spider_data ├── csv_data.py ├── data.csv ├── data.json ├── explore.txt ├── json_data.py └── txt_data.py └── Spider_frame ├── Gavent_Spider └── gavent_spider.py ├── Process_Spider └── process_spider.py └── Thread_Spider ├── lock_thread.py ├── queue_thread.py ├── test_spider.py ├── test_thread.py └── thread_spider.py /.idea/encodings.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/.idea/encodings.xml -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/.idea/misc.xml -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/.idea/modules.xml -------------------------------------------------------------------------------- /.idea/python-web-crawlers.iml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/.idea/python-web-crawlers.iml -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/.idea/vcs.xml -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/.idea/workspace.xml -------------------------------------------------------------------------------- /Bonus_spider/GIrl/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/GIrl/.DS_Store -------------------------------------------------------------------------------- /Bonus_spider/GIrl/GIrl/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/GIrl/GIrl/.DS_Store -------------------------------------------------------------------------------- /Bonus_spider/GIrl/GIrl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Bonus_spider/GIrl/GIrl/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/GIrl/GIrl/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /Bonus_spider/GIrl/GIrl/__pycache__/settings.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/GIrl/GIrl/__pycache__/settings.cpython-35.pyc -------------------------------------------------------------------------------- /Bonus_spider/GIrl/GIrl/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/GIrl/GIrl/items.py -------------------------------------------------------------------------------- /Bonus_spider/GIrl/GIrl/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/GIrl/GIrl/pipelines.py -------------------------------------------------------------------------------- /Bonus_spider/GIrl/GIrl/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/GIrl/GIrl/settings.py -------------------------------------------------------------------------------- /Bonus_spider/GIrl/GIrl/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/GIrl/GIrl/spiders/__init__.py -------------------------------------------------------------------------------- /Bonus_spider/GIrl/GIrl/spiders/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/GIrl/GIrl/spiders/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /Bonus_spider/GIrl/GIrl/spiders/__pycache__/find_girl.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/GIrl/GIrl/spiders/__pycache__/find_girl.cpython-35.pyc -------------------------------------------------------------------------------- /Bonus_spider/GIrl/GIrl/spiders/find_girl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/GIrl/GIrl/spiders/find_girl.py -------------------------------------------------------------------------------- /Bonus_spider/GIrl/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/GIrl/scrapy.cfg -------------------------------------------------------------------------------- /Bonus_spider/Girl_image/Image_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/Girl_image/Image_spider.py -------------------------------------------------------------------------------- /Bonus_spider/Girl_image/Url_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/Girl_image/Url_list.py -------------------------------------------------------------------------------- /Bonus_spider/Girl_image/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/Girl_image/test.py -------------------------------------------------------------------------------- /Bonus_spider/Girl_spider/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/Girl_spider/.DS_Store -------------------------------------------------------------------------------- /Bonus_spider/Girl_spider/Find_Url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/Girl_spider/Find_Url.py -------------------------------------------------------------------------------- /Bonus_spider/Girl_spider/Page_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/Girl_spider/Page_list.py -------------------------------------------------------------------------------- /Bonus_spider/Girl_spider/Page_parsing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/Girl_spider/Page_parsing.py -------------------------------------------------------------------------------- /Bonus_spider/Girl_spider/__pycache__/Page_list.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/Girl_spider/__pycache__/Page_list.cpython-35.pyc -------------------------------------------------------------------------------- /Bonus_spider/Girl_spider/__pycache__/Page_parsing.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/Girl_spider/__pycache__/Page_parsing.cpython-35.pyc -------------------------------------------------------------------------------- /Bonus_spider/Girl_spider/__pycache__/channel_list.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/Girl_spider/__pycache__/channel_list.cpython-35.pyc -------------------------------------------------------------------------------- /Bonus_spider/Girl_spider/channel_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/Girl_spider/channel_list.py -------------------------------------------------------------------------------- /Bonus_spider/Girl_spider/count.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/Girl_spider/count.py -------------------------------------------------------------------------------- /Bonus_spider/Girl_spider/girl_url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/Girl_spider/girl_url.py -------------------------------------------------------------------------------- /Bonus_spider/Girl_spider/js.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/Girl_spider/js.py -------------------------------------------------------------------------------- /Bonus_spider/Girl_spider/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/Girl_spider/main.py -------------------------------------------------------------------------------- /Bonus_spider/Girl_spider/print.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/Girl_spider/print.py -------------------------------------------------------------------------------- /Bonus_spider/Girl_spider/stringID.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/Girl_spider/stringID.py -------------------------------------------------------------------------------- /Bonus_spider/girlspider/Email.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/girlspider/Email.py -------------------------------------------------------------------------------- /Bonus_spider/girlspider/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Bonus_spider/girlspider/crawl(2).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/girlspider/crawl(2).py -------------------------------------------------------------------------------- /Bonus_spider/girlspider/crawl(3).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/girlspider/crawl(3).py -------------------------------------------------------------------------------- /Bonus_spider/girlspider/crawl(4).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/girlspider/crawl(4).py -------------------------------------------------------------------------------- /Bonus_spider/girlspider/crawl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/girlspider/crawl.py -------------------------------------------------------------------------------- /Bonus_spider/girlspider/download.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/girlspider/download.py -------------------------------------------------------------------------------- /Bonus_spider/girlspider/downloadimage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/girlspider/downloadimage.py -------------------------------------------------------------------------------- /Bonus_spider/girlspider/downloadjpg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/girlspider/downloadjpg.py -------------------------------------------------------------------------------- /Bonus_spider/girlspider/fatherlabel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/girlspider/fatherlabel.py -------------------------------------------------------------------------------- /Bonus_spider/girlspider/find_message.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/girlspider/find_message.py -------------------------------------------------------------------------------- /Bonus_spider/girlspider/find_only_jpg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/girlspider/find_only_jpg.py -------------------------------------------------------------------------------- /Bonus_spider/girlspider/find_url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/girlspider/find_url.py -------------------------------------------------------------------------------- /Bonus_spider/girlspider/findgirlurl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/girlspider/findgirlurl.py -------------------------------------------------------------------------------- /Bonus_spider/girlspider/findmessageUrl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/girlspider/findmessageUrl.py -------------------------------------------------------------------------------- /Bonus_spider/girlspider/findurllabel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/girlspider/findurllabel.py -------------------------------------------------------------------------------- /Bonus_spider/girlspider/girl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/girlspider/girl.py -------------------------------------------------------------------------------- /Bonus_spider/girlspider/myfile: -------------------------------------------------------------------------------- 1 | http://www.mmkao.net/wcgz/11.html -------------------------------------------------------------------------------- /Bonus_spider/girlspider/page.txt: -------------------------------------------------------------------------------- 1 | 2629_7.html -------------------------------------------------------------------------------- /Bonus_spider/girlspider/realurl.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Bonus_spider/girlspider/refindurl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/girlspider/refindurl.py -------------------------------------------------------------------------------- /Bonus_spider/girlspider/start_crawl].py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/girlspider/start_crawl].py -------------------------------------------------------------------------------- /Bonus_spider/prettylegs/find_imageurl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Bonus_spider/prettylegs/find_imageurl.py -------------------------------------------------------------------------------- /Code_crack/CNN_code/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/CNN_code/.DS_Store -------------------------------------------------------------------------------- /Code_crack/CNN_code/cnn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/CNN_code/cnn.py -------------------------------------------------------------------------------- /Code_crack/Captcha_code/1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/Captcha_code/1.jpeg -------------------------------------------------------------------------------- /Code_crack/Captcha_code/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/Captcha_code/2.jpg -------------------------------------------------------------------------------- /Code_crack/Captcha_code/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/Captcha_code/3.jpg -------------------------------------------------------------------------------- /Code_crack/Captcha_code/captcha_work.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/Captcha_code/captcha_work.py -------------------------------------------------------------------------------- /Code_crack/Captcha_code/test_captcha.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/Captcha_code/test_captcha.py -------------------------------------------------------------------------------- /Code_crack/Example_image/Create_image/create_code.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/Example_image/Create_image/create_code.py -------------------------------------------------------------------------------- /Code_crack/Example_image/Image/output.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code_crack/Example_image/Image/simple3615.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/Example_image/Image/simple3615.jpg -------------------------------------------------------------------------------- /Code_crack/Example_image/Image/simple5376.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/Example_image/Image/simple5376.png -------------------------------------------------------------------------------- /Code_crack/Example_image/TTF/Georgia.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/Example_image/TTF/Georgia.ttf -------------------------------------------------------------------------------- /Code_crack/Image_code/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/Image_code/1.jpg -------------------------------------------------------------------------------- /Code_crack/Image_code/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/Image_code/1.png -------------------------------------------------------------------------------- /Code_crack/Image_code/CNN_code.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/Image_code/CNN_code.py -------------------------------------------------------------------------------- /Code_crack/Image_code/output.txt: -------------------------------------------------------------------------------- 1 | NKWII 2 | 3 | -------------------------------------------------------------------------------- /Code_crack/Image_code/simple_image_code.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/Image_code/simple_image_code.py -------------------------------------------------------------------------------- /Code_crack/Image_code/vercode.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/Image_code/vercode.gif -------------------------------------------------------------------------------- /Code_crack/PIL_Code/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/PIL_Code/1.jpg -------------------------------------------------------------------------------- /Code_crack/PIL_Code/PIL_work.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/PIL_Code/PIL_work.py -------------------------------------------------------------------------------- /Code_crack/Sliding_verfication_code/test_selenium.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/Sliding_verfication_code/test_selenium.py -------------------------------------------------------------------------------- /Code_crack/Web_code_example/zhiwang/CheckCode.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/Web_code_example/zhiwang/CheckCode.jpg -------------------------------------------------------------------------------- /Code_crack/Web_code_example/zhiwang/Zhiwang_code.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/Web_code_example/zhiwang/Zhiwang_code.py -------------------------------------------------------------------------------- /Code_crack/selenium_test/test_selenium.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/selenium_test/test_selenium.py -------------------------------------------------------------------------------- /Code_crack/slither_code/bg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/slither_code/bg.jpg -------------------------------------------------------------------------------- /Code_crack/slither_code/fullbg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/slither_code/fullbg.jpg -------------------------------------------------------------------------------- /Code_crack/slither_code/slither.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Code_crack/slither_code/slither.py -------------------------------------------------------------------------------- /Fuctional_web_spider/Clear data/Clear data(1).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Clear data/Clear data(1).py -------------------------------------------------------------------------------- /Fuctional_web_spider/Clear data/clear data(2).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Clear data/clear data(2).py -------------------------------------------------------------------------------- /Fuctional_web_spider/DownloadImage/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/DownloadImage/.DS_Store -------------------------------------------------------------------------------- /Fuctional_web_spider/DownloadImage/DownloadImage/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/DownloadImage/DownloadImage/.DS_Store -------------------------------------------------------------------------------- /Fuctional_web_spider/DownloadImage/DownloadImage/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Fuctional_web_spider/DownloadImage/DownloadImage/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/DownloadImage/DownloadImage/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /Fuctional_web_spider/DownloadImage/DownloadImage/__pycache__/settings.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/DownloadImage/DownloadImage/__pycache__/settings.cpython-35.pyc -------------------------------------------------------------------------------- /Fuctional_web_spider/DownloadImage/DownloadImage/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/DownloadImage/DownloadImage/items.py -------------------------------------------------------------------------------- /Fuctional_web_spider/DownloadImage/DownloadImage/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/DownloadImage/DownloadImage/pipelines.py -------------------------------------------------------------------------------- /Fuctional_web_spider/DownloadImage/DownloadImage/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/DownloadImage/DownloadImage/settings.py -------------------------------------------------------------------------------- /Fuctional_web_spider/DownloadImage/DownloadImage/spiders/Downloadimage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/DownloadImage/DownloadImage/spiders/Downloadimage.py -------------------------------------------------------------------------------- /Fuctional_web_spider/DownloadImage/DownloadImage/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/DownloadImage/DownloadImage/spiders/__init__.py -------------------------------------------------------------------------------- /Fuctional_web_spider/DownloadImage/DownloadImage/spiders/__pycache__/Downloadimage.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/DownloadImage/DownloadImage/spiders/__pycache__/Downloadimage.cpython-35.pyc -------------------------------------------------------------------------------- /Fuctional_web_spider/DownloadImage/DownloadImage/spiders/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/DownloadImage/DownloadImage/spiders/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /Fuctional_web_spider/DownloadImage/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/DownloadImage/scrapy.cfg -------------------------------------------------------------------------------- /Fuctional_web_spider/IP/IP.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/IP/IP.py -------------------------------------------------------------------------------- /Fuctional_web_spider/IP/IP_proxy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/IP/IP_proxy.py -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/.DS_Store -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Download.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Download.py -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/0.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/1.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/10.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/11.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/11.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/12.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/12.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/13.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/14.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/14.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/15.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/15.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/16.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/16.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/17.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/17.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/18.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/18.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/19.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/19.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/2.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/20.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/20.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/21.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/21.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/22.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/22.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/23.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/23.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/3.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/4.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/5.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/6.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/7.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/8.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/Image/9.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/Image/9.jpg -------------------------------------------------------------------------------- /Fuctional_web_spider/Image_download/download(2).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Image_download/download(2).py -------------------------------------------------------------------------------- /Fuctional_web_spider/Regular Email/HTML_CSV.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Regular Email/HTML_CSV.py -------------------------------------------------------------------------------- /Fuctional_web_spider/Regular Email/Save_data_CSV.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Regular Email/Save_data_CSV.py -------------------------------------------------------------------------------- /Fuctional_web_spider/Regular Email/URL Download(2).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Regular Email/URL Download(2).py -------------------------------------------------------------------------------- /Fuctional_web_spider/Regular Email/URL Download.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Regular Email/URL Download.py -------------------------------------------------------------------------------- /Fuctional_web_spider/Regular Email/editors.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Regular Email/editors.csv -------------------------------------------------------------------------------- /Fuctional_web_spider/Regular Email/test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/Regular Email/test.csv -------------------------------------------------------------------------------- /Fuctional_web_spider/images/image(2).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/images/image(2).py -------------------------------------------------------------------------------- /Fuctional_web_spider/images/image(3).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/images/image(3).py -------------------------------------------------------------------------------- /Fuctional_web_spider/images/image(4).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/images/image(4).py -------------------------------------------------------------------------------- /Fuctional_web_spider/images/image(5).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/images/image(5).py -------------------------------------------------------------------------------- /Fuctional_web_spider/images/image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Fuctional_web_spider/images/image.py -------------------------------------------------------------------------------- /Mainstrea_login/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/.DS_Store -------------------------------------------------------------------------------- /Mainstrea_login/12306_login/login.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/12306_login/login.py -------------------------------------------------------------------------------- /Mainstrea_login/163mail_login/login.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/163mail_login/login.py -------------------------------------------------------------------------------- /Mainstrea_login/GITHUB_log/login.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/GITHUB_log/login.py -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_interface/.DS_Store -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/163_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_interface/163_data.py -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/@163Email.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_interface/@163Email.py -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/CSDN_login.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_interface/CSDN_login.py -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/Find_captchaUrl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_interface/Find_captchaUrl.py -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/Landing simulation interface.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/Sina_weibo/weibo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_interface/Sina_weibo/weibo.py -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/Sina_weibo/weibo_login.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_interface/Sina_weibo/weibo_login.py -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/captcha.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_interface/captcha.jpg -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/codekey.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_interface/codekey.py -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/douban_login(2).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_interface/douban_login(2).py -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/douban_login.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_interface/douban_login.py -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/ghostdriver.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_interface/ghostdriver.log -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/log.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_interface/log.py -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/login_163Email.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_interface/login_163Email.py -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/school.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/school_logon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_interface/school_logon.py -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/weibo_login.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_interface/weibo_login.py -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/weibo_login2.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/zhihu/zhihu_login(2).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_interface/zhihu/zhihu_login(2).py -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/zhihu/zhihu_login.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_interface/zhihu/zhihu_login.py -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/zhihu_login(2).py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Mainstrea_login/Login_interface/zhihu_login.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_interface/zhihu_login.py -------------------------------------------------------------------------------- /Mainstrea_login/Login_spider/zhihu(test).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_spider/zhihu(test).py -------------------------------------------------------------------------------- /Mainstrea_login/Login_spider/zhihu_login.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/Login_spider/zhihu_login.py -------------------------------------------------------------------------------- /Mainstrea_login/School_login/ghostdriver.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/School_login/ghostdriver.log -------------------------------------------------------------------------------- /Mainstrea_login/School_login/login(1).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/School_login/login(1).py -------------------------------------------------------------------------------- /Mainstrea_login/School_login/login.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/School_login/login.py -------------------------------------------------------------------------------- /Mainstrea_login/School_login/next.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/School_login/next.py -------------------------------------------------------------------------------- /Mainstrea_login/School_login/验证码.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/School_login/验证码.py -------------------------------------------------------------------------------- /Mainstrea_login/meican_login/meican_login.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/meican_login/meican_login.py -------------------------------------------------------------------------------- /Mainstrea_login/sina_weibo_login/login.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstrea_login/sina_weibo_login/login.py -------------------------------------------------------------------------------- /Mainstream_web_spider/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/.DS_Store -------------------------------------------------------------------------------- /Mainstream_web_spider/58/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/58/.DS_Store -------------------------------------------------------------------------------- /Mainstream_web_spider/58/58_BEJing/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | __author__ = 'lau.wenbo' -------------------------------------------------------------------------------- /Mainstream_web_spider/58/58_BEJing/channel_extact.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/58/58_BEJing/channel_extact.py -------------------------------------------------------------------------------- /Mainstream_web_spider/58/58_BEJing/counts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/58/58_BEJing/counts.py -------------------------------------------------------------------------------- /Mainstream_web_spider/58/58_BEJing/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/58/58_BEJing/main.py -------------------------------------------------------------------------------- /Mainstream_web_spider/58/58_BEJing/pages_parsing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/58/58_BEJing/pages_parsing.py -------------------------------------------------------------------------------- /Mainstream_web_spider/58/58_spider/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Mainstream_web_spider/58/58_spider/channel_extract.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/58/58_spider/channel_extract.py -------------------------------------------------------------------------------- /Mainstream_web_spider/58/58_spider/counts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/58/58_spider/counts.py -------------------------------------------------------------------------------- /Mainstream_web_spider/58/58_spider/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/58/58_spider/main.py -------------------------------------------------------------------------------- /Mainstream_web_spider/58/58_spider/page_parsing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/58/58_spider/page_parsing.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Baidu/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Baidu/.DS_Store -------------------------------------------------------------------------------- /Mainstream_web_spider/Baidu/Baidu_Spider/Tieba_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Baidu/Baidu_Spider/Tieba_spider.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Baidu/Baidu_Spider/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | __author__ = 'lau.wenbo' -------------------------------------------------------------------------------- /Mainstream_web_spider/Baidu/Baidu_Spider/wenku_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Baidu/Baidu_Spider/wenku_spider.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Baidu/Baidu_Spider/【原创】银寒演义(战争东方奇幻武侠).txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Baidu/Baidu_Spider/【原创】银寒演义(战争东方奇幻武侠).txt -------------------------------------------------------------------------------- /Mainstream_web_spider/Baidu/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | __author__ = 'lau.wenbo' -------------------------------------------------------------------------------- /Mainstream_web_spider/Baidu/baidu_musci/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | __author__ = 'lau.wenbo' -------------------------------------------------------------------------------- /Mainstream_web_spider/Baidu/baidu_musci/baidu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Baidu/baidu_musci/baidu.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/.DS_Store -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/Douban_book/book_name.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/Douban_book/book_name.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/Douban_book/douban_book.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/Douban_book/douban_book.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/Douban_book/page.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/Douban_book/page.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/Douban_spider(simple)/1-getPageMedia.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/Douban_spider(simple)/1-getPageMedia.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/Douban_spider(simple)/Douban_Spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/Douban_spider(simple)/Douban_Spider.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/Douban_spider(simple)/crewl_URL.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/Douban_spider(simple)/crewl_URL.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/Douban_spider(simple)/doubanURL(test).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/Douban_spider(simple)/doubanURL(test).py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/Douban_spider(simple)/douban_movie(image).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/Douban_spider(simple)/douban_movie(image).py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/Douban_spider(simple)/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/Douban_spider(simple)/spider.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | __author__ = 'lau.wenbo' -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanSpider/.DS_Store -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | __author__ = 'lau.wenbo' -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/doubanSpider/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/doubanSpider/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanSpider/doubanSpider/items.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/doubanSpider/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanSpider/doubanSpider/pipelines.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/doubanSpider/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanSpider/doubanSpider/settings.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/doubanSpider/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanSpider/doubanSpider/spiders/__init__.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/movie/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanSpider/movie/.DS_Store -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/movie/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/movie/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanSpider/movie/items.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/movie/misc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/movie/misc/bloomfilter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanSpider/movie/misc/bloomfilter.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/movie/misc/helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanSpider/movie/misc/helper.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/movie/misc/middlewares.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanSpider/movie/misc/middlewares.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/movie/misc/store.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanSpider/movie/misc/store.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/movie/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanSpider/movie/pipelines.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/movie/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanSpider/movie/scrapy.cfg -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/movie/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanSpider/movie/settings.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/movie/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/movie/spiders/movie.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanSpider/movie/spiders/movie.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanSpider/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanSpider/scrapy.cfg -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanmovie/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | __author__ = 'lau.wenbo' -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanmovie/doubanmovie/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanmovie/doubanmovie/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanmovie/doubanmovie/items.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanmovie/doubanmovie/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanmovie/doubanmovie/pipelines.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanmovie/doubanmovie/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanmovie/doubanmovie/settings.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanmovie/doubanmovie/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanmovie/doubanmovie/spiders/__init__.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanmovie/doubanmovie/spiders/douspider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanmovie/doubanmovie/spiders/douspider.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Douban/doubanmovie/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Douban/doubanmovie/scrapy.cfg -------------------------------------------------------------------------------- /Mainstream_web_spider/Game_spider/GameUrl_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Game_spider/GameUrl_spider.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Ganji/Ganji_spider/Page_parsing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Ganji/Ganji_spider/Page_parsing.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Ganji/Ganji_spider/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | __author__ = 'lau.wenbo' -------------------------------------------------------------------------------- /Mainstream_web_spider/Ganji/Ganji_spider/channel_extract.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Ganji/Ganji_spider/channel_extract.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Ganji/Ganji_spider/counts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Ganji/Ganji_spider/counts.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Ganji/Ganji_spider/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Ganji/Ganji_spider/main.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Ganji/Ganji_spider/try.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Mainstream_web_spider/Ganji/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | __author__ = 'lau.wenbo' -------------------------------------------------------------------------------- /Mainstream_web_spider/JD/JD_spider/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | __author__ = 'lau.wenbo' -------------------------------------------------------------------------------- /Mainstream_web_spider/JD/JD_spider/ghostdriver.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/JD/JD_spider/ghostdriver.log -------------------------------------------------------------------------------- /Mainstream_web_spider/JD/JD_spider/page_message.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/JD/JD_spider/page_message.py -------------------------------------------------------------------------------- /Mainstream_web_spider/JD/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | __author__ = 'lau.wenbo' -------------------------------------------------------------------------------- /Mainstream_web_spider/README.md: -------------------------------------------------------------------------------- 1 | # **网站列表** 2 | 3 | 1. 58同城 4 | 2. 知乎 5 | 3. 京东 6 | 4. 新浪(微博/新闻) 7 | 5. 淘宝 8 | 6. WIKI 9 | 7. 知网 10 | 8. 豆瓣(movie/book/小组) 11 | 9. 百度(贴吧/图片) 12 | 10. 赶集 13 | 11. 拉勾 14 | 12. B站 15 | 13. 斗鱼 16 | 14. 虎牙 -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/Sina_spider/Browser camouflage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/Sina_spider/Browser camouflage.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/Sina_spider/Find_Url: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/Sina_spider/Find_Url -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/Sina_spider/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | __author__ = 'lau.wenbo' -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/Sina_spider/find_sina_imge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/Sina_spider/find_sina_imge.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/Sina_spider/output.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/Sina_spider/output.txt -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/Sina_spider/sina_URL.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/Sina_spider/sina_URL.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/Sina_spider/sina_URL_find.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/Sina_spider/sina_URL_find.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/Sina_spider/sina_download.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/Sina_spider/sina_download.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/Sina_spider/sina_downloadImg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/Sina_spider/sina_downloadImg.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/Sina_spider/sina_image_download.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/Sina_spider/sina_image_download.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/Sina_spider/sina_news_h1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/Sina_spider/sina_news_h1.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/Sina_spider/sina_news_img_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/Sina_spider/sina_news_img_spider.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/Sina_spider/sina_news_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/Sina_spider/sina_news_spider.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/Sina_weibo/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | __author__ = 'lau.wenbo' -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/Sina_weibo/weibo_LOGIN.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/Sina_weibo/weibo_LOGIN.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | __author__ = 'lau.wenbo' -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/news/Books: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/news/Books -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/news/Resources: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/news/Resources -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/news/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | __author__ = 'lau.wenbo' -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/news/news/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/news/news/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/news/news/items.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/news/news/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/news/news/pipelines.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/news/news/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/news/news/settings.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/news/news/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/news/news/spiders/__init__.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/news/news/spiders/news.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/news/news/spiders/news.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/news/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/news/scrapy.cfg -------------------------------------------------------------------------------- /Mainstream_web_spider/Sina/news/www.sina.com.cn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Sina/news/www.sina.com.cn -------------------------------------------------------------------------------- /Mainstream_web_spider/Spider_zhihu/Find_url_message.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Spider_zhihu/Find_url_message.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Spider_zhihu/URL_spider.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Mainstream_web_spider/Spider_zhihu/find_URL.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Spider_zhihu/find_URL.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Spider_zhihu/find_question_url(alter).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Spider_zhihu/find_question_url(alter).py -------------------------------------------------------------------------------- /Mainstream_web_spider/Spider_zhihu/find_question_url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Spider_zhihu/find_question_url.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Taobao/TM_spider/TM.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Taobao/TM_spider/TM.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Taobao/TM_spider/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | __author__ = 'lau.wenbo' -------------------------------------------------------------------------------- /Mainstream_web_spider/Taobao/TM_spider/chantomjs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Taobao/TM_spider/chantomjs.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Taobao/TM_spider/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Taobao/TM_spider/main.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Taobao/TM_spider/route_heanders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Taobao/TM_spider/route_heanders.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Taobao/TM_spider/route_ip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Taobao/TM_spider/route_ip.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Taobao/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | __author__ = 'lau.wenbo' -------------------------------------------------------------------------------- /Mainstream_web_spider/Taobao/taobao_Spider/Channel_extract.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Taobao/taobao_Spider/Channel_extract.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Taobao/taobao_Spider/Description.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Mainstream_web_spider/Taobao/taobao_Spider/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | __author__ = 'lau.wenbo' -------------------------------------------------------------------------------- /Mainstream_web_spider/Taobao/taobao_Spider/ghostdriver.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Taobao/taobao_Spider/ghostdriver.log -------------------------------------------------------------------------------- /Mainstream_web_spider/Zhihu/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | __author__ = 'lau.wenbo' -------------------------------------------------------------------------------- /Mainstream_web_spider/Zhihu/zhihu_spider/Find_zhihu_question.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Zhihu/zhihu_spider/Find_zhihu_question.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Zhihu/zhihu_spider/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | __author__ = 'lau.wenbo' -------------------------------------------------------------------------------- /Mainstream_web_spider/Zhihu/zhihu_spider/captcha.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Zhihu/zhihu_spider/captcha.jpg -------------------------------------------------------------------------------- /Mainstream_web_spider/Zhihu/zhihu_spider/cookies.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Zhihu/zhihu_spider/cookies.txt -------------------------------------------------------------------------------- /Mainstream_web_spider/Zhihu/zhihu_spider/zhihuLogin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Zhihu/zhihu_spider/zhihuLogin.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Zhihu/zhihu_spider/zhihulogin(2).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Zhihu/zhihu_spider/zhihulogin(2).py -------------------------------------------------------------------------------- /Mainstream_web_spider/Zhihu/zhihu_spider/zhihuquestion_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Zhihu/zhihu_spider/zhihuquestion_spider.py -------------------------------------------------------------------------------- /Mainstream_web_spider/Zhihu/zhihu_spider/zhihuquestionspider(simple).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/Zhihu/zhihu_spider/zhihuquestionspider(simple).py -------------------------------------------------------------------------------- /Mainstream_web_spider/maoyan/maoyan_movie.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Mainstream_web_spider/maoyan/maoyan_movie.py -------------------------------------------------------------------------------- /Other_web_spider/GNW_Spider/gnw.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/GNW_Spider/gnw.py -------------------------------------------------------------------------------- /Other_web_spider/GNW_Spider/text.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/GNW_Spider/text.pkl -------------------------------------------------------------------------------- /Other_web_spider/GNW_Spider/text.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/GNW_Spider/text.txt -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/Phantomjs_DEMO(2).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/Phantomjs_DEMO(2).py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/Phantomjs_DEMO(3).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/Phantomjs_DEMO(3).py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/Phantomjs_DEMO(4).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/Phantomjs_DEMO(4).py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/Phantomjs_DEMO.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/Phantomjs_DEMO.py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/Phantomjs_GPS.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/Phantomjs_GPS.py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/Phantomjs_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/Phantomjs_base.py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/Phantomjs_classname_loaction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/Phantomjs_classname_loaction.py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/Phantomjs_click.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/Phantomjs_click.py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/Phantomjs_css_location.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/Phantomjs_css_location.py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/Phantomjs_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/Phantomjs_example.py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/Phantomjs_id_location.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/Phantomjs_id_location.py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/Phantomjs_keyboard_operation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/Phantomjs_keyboard_operation.py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/Phantomjs_linktext_location.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/Phantomjs_linktext_location.py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/Phantomjs_name_location.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/Phantomjs_name_location.py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/Phantomjs_tagname_location.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/Phantomjs_tagname_location.py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/Phantomjs_xpath_location.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/Phantomjs_xpath_location.py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/Print_page.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/Print_page.py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/click,doubleclick.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/explore_action.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/explore_action.py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/explore_max.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/explore_max.py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/explore_weight_hign.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/explore_weight_hign.py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/for in.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/for in.py -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/ghostdriver.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/ghostdriver.log -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/js.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/js.html -------------------------------------------------------------------------------- /Other_web_spider/Phantomjs/jsaction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Phantomjs/jsaction.py -------------------------------------------------------------------------------- /Other_web_spider/Spider_Thread/Thread_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Spider_Thread/Thread_base.py -------------------------------------------------------------------------------- /Other_web_spider/Spider_Thread/Thread_common_suse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Spider_Thread/Thread_common_suse.py -------------------------------------------------------------------------------- /Other_web_spider/Spider_Thread/Thread_example(2).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Spider_Thread/Thread_example(2).py -------------------------------------------------------------------------------- /Other_web_spider/Spider_Thread/Thread_example(3).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Spider_Thread/Thread_example(3).py -------------------------------------------------------------------------------- /Other_web_spider/Spider_Thread/Thread_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Spider_Thread/Thread_example.py -------------------------------------------------------------------------------- /Other_web_spider/Spider_Thread/Thread_first.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Spider_Thread/Thread_first.py -------------------------------------------------------------------------------- /Other_web_spider/Spider_Thread/Thread_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Spider_Thread/Thread_math.py -------------------------------------------------------------------------------- /Other_web_spider/Spider_Thread/mtsleep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/Spider_Thread/mtsleep.py -------------------------------------------------------------------------------- /Other_web_spider/novel_Spider/List_book_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/novel_Spider/List_book_spider.py -------------------------------------------------------------------------------- /Other_web_spider/novel_Spider/Novel_txt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/novel_Spider/Novel_txt.py -------------------------------------------------------------------------------- /Other_web_spider/novel_Spider/URL_read.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/novel_Spider/URL_read.py -------------------------------------------------------------------------------- /Other_web_spider/novel_Spider/txt_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/novel_Spider/txt_spider.py -------------------------------------------------------------------------------- /Other_web_spider/novel_Spider/url.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/novel_Spider/url.txt -------------------------------------------------------------------------------- /Other_web_spider/recognition_imger/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/recognition_imger/.DS_Store -------------------------------------------------------------------------------- /Other_web_spider/recognition_imger/Drink-shill_spider/Shii.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/recognition_imger/Drink-shill_spider/Shii.py -------------------------------------------------------------------------------- /Other_web_spider/recognition_imger/Drink-shill_spider/Spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/recognition_imger/Drink-shill_spider/Spider.py -------------------------------------------------------------------------------- /Other_web_spider/recognition_imger/Drink-shill_spider/jiutuo_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/recognition_imger/Drink-shill_spider/jiutuo_spider.py -------------------------------------------------------------------------------- /Other_web_spider/recognition_imger/Drink-shill_spider/name.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/recognition_imger/Drink-shill_spider/name.py -------------------------------------------------------------------------------- /Other_web_spider/recognition_imger/Drink-shill_spider/shillspider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/recognition_imger/Drink-shill_spider/shillspider.py -------------------------------------------------------------------------------- /Other_web_spider/recognition_imger/From_url_imgae_message.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/recognition_imger/From_url_imgae_message.py -------------------------------------------------------------------------------- /Other_web_spider/recognition_imger/Tesseract_image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/recognition_imger/Tesseract_image.py -------------------------------------------------------------------------------- /Other_web_spider/recognition_imger/ghostdriver.log: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Other_web_spider/recognition_imger/output.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/recognition_imger/output.txt -------------------------------------------------------------------------------- /Other_web_spider/recognition_imger/text1.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/recognition_imger/text1.tif -------------------------------------------------------------------------------- /Other_web_spider/recognition_imger/text_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/recognition_imger/text_2.jpg -------------------------------------------------------------------------------- /Other_web_spider/recognition_imger/text_2_clean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/recognition_imger/text_2_clean.png -------------------------------------------------------------------------------- /Other_web_spider/recognition_imger/textoutput.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Other_web_spider/recognition_imger/textoutput.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/README.md -------------------------------------------------------------------------------- /Scrapy_spider/Download URL/13.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Scrapy_spider/Download URL/13.py -------------------------------------------------------------------------------- /Scrapy_spider/Download URL/Download URL(midified).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Scrapy_spider/Download URL/Download URL(midified).py -------------------------------------------------------------------------------- /Scrapy_spider/Download URL/Download URL(simple).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Scrapy_spider/Download URL/Download URL(simple).py -------------------------------------------------------------------------------- /Scrapy_spider/Download URL/sqlite_python integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Scrapy_spider/Download URL/sqlite_python integration.py -------------------------------------------------------------------------------- /Scrapy_spider/scrapy_example/MySpider(2).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Scrapy_spider/scrapy_example/MySpider(2).py -------------------------------------------------------------------------------- /Scrapy_spider/scrapy_example/Myscrapy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Scrapy_spider/scrapy_example/Myscrapy.py -------------------------------------------------------------------------------- /Scrapy_spider/scrapy_example/sina_news.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Scrapy_spider/scrapy_example/sina_news.py -------------------------------------------------------------------------------- /Scrapy_spider/scrapy_example/stackoverflow_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Scrapy_spider/scrapy_example/stackoverflow_spider.py -------------------------------------------------------------------------------- /Scrapy_spider/scrapy_example/start_urls.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | -------------------------------------------------------------------------------- /Scrapy_spider/scrapy_example/top-stackoverflow-questions.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Scrapy_spider/scrapy_example/top-stackoverflow-questions.json -------------------------------------------------------------------------------- /Spider_base/ajax_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Spider_base/ajax_spider.py -------------------------------------------------------------------------------- /Spider_base/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Spider_base/favicon.ico -------------------------------------------------------------------------------- /Spider_base/re_work/re_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Spider_base/re_work/re_test.py -------------------------------------------------------------------------------- /Spider_base/re_work/re_work.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Spider_base/re_work/re_work.py -------------------------------------------------------------------------------- /Spider_base/re_work/resule.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Spider_base/re_work/resule.txt -------------------------------------------------------------------------------- /Spider_base/spider_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Spider_base/spider_base.py -------------------------------------------------------------------------------- /Spider_data/csv_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Spider_data/csv_data.py -------------------------------------------------------------------------------- /Spider_data/data.csv: -------------------------------------------------------------------------------- 1 | id,name,age 2 | 10001,Mike,20 3 | -------------------------------------------------------------------------------- /Spider_data/data.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Spider_data/data.json -------------------------------------------------------------------------------- /Spider_data/explore.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Spider_data/explore.txt -------------------------------------------------------------------------------- /Spider_data/json_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Spider_data/json_data.py -------------------------------------------------------------------------------- /Spider_data/txt_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Spider_data/txt_data.py -------------------------------------------------------------------------------- /Spider_frame/Gavent_Spider/gavent_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Spider_frame/Gavent_Spider/gavent_spider.py -------------------------------------------------------------------------------- /Spider_frame/Process_Spider/process_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Spider_frame/Process_Spider/process_spider.py -------------------------------------------------------------------------------- /Spider_frame/Thread_Spider/lock_thread.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Spider_frame/Thread_Spider/lock_thread.py -------------------------------------------------------------------------------- /Spider_frame/Thread_Spider/queue_thread.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Spider_frame/Thread_Spider/queue_thread.py -------------------------------------------------------------------------------- /Spider_frame/Thread_Spider/test_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Spider_frame/Thread_Spider/test_spider.py -------------------------------------------------------------------------------- /Spider_frame/Thread_Spider/test_thread.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Spider_frame/Thread_Spider/test_thread.py -------------------------------------------------------------------------------- /Spider_frame/Thread_Spider/thread_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alexanderklau/python-web-crawlers/HEAD/Spider_frame/Thread_Spider/thread_spider.py --------------------------------------------------------------------------------