├── .idea ├── .gitignore ├── crawlProject.iml ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml └── vcs.xml ├── README.md ├── bilibili ├── .idea │ ├── bilibili.iml │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ └── workspace.xml └── crawl.py ├── douyin ├── .idea │ ├── douyin.iml │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ └── workspace.xml ├── crawl.py ├── douyin.ttf ├── font_1.xml └── transNum.py ├── douyu ├── .idea │ ├── douyu.iml │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ └── workspace.xml └── crawl.py ├── steam ├── .idea │ ├── .gitignore │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ ├── steam.iml │ └── vcs.xml ├── __pycache__ │ ├── const.cpython-38.pyc │ └── parsers.cpython-38.pyc ├── app.py ├── const.py ├── parsers.py └── utils.py ├── taobao ├── .idea │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ ├── taobao.iml │ └── workspace.xml ├── consoleWarning.PNG ├── crawl.py └── taobao_login_cookies.txt ├── toutiao ├── .idea │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ ├── toutiao.iml │ ├── vcs.xml │ └── workspace.xml ├── __pycache__ │ └── m_toutiao.cpython-38.pyc └── crawl.py ├── wangyiMusic ├── .idea │ ├── .gitignore │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ ├── vcs.xml │ └── wangyiMusic.iml └── crawl.py ├── weibo ├── .idea │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ ├── vcs.xml │ ├── weibo.iml │ └── workspace.xml ├── __pycache__ │ └── login.cpython-38.pyc ├── captcha.jpg ├── crawl.py ├── login.py └── transCookie.py └── youdao ├── .idea ├── .gitignore ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── vcs.xml └── youdao.iml ├── cookie.txt └── crawl.py /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml 3 | -------------------------------------------------------------------------------- /.idea/crawlProject.iml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/.idea/crawlProject.iml -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/.idea/inspectionProfiles/profiles_settings.xml -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/.idea/misc.xml -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/.idea/modules.xml -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/.idea/vcs.xml -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 针对如下网站的爬取 2 | 今日头条 3 | 2.淘宝 4 | 3.微博 5 | 4.斗鱼 6 | 5.抖音 7 | 6.哔哩哔哩 8 | 7.有道翻译 9 | 8.steam 10 | 9.网易云音乐 11 | -------------------------------------------------------------------------------- /bilibili/.idea/bilibili.iml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/bilibili/.idea/bilibili.iml -------------------------------------------------------------------------------- /bilibili/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/bilibili/.idea/inspectionProfiles/profiles_settings.xml -------------------------------------------------------------------------------- /bilibili/.idea/misc.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/bilibili/.idea/misc.xml -------------------------------------------------------------------------------- /bilibili/.idea/modules.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/bilibili/.idea/modules.xml -------------------------------------------------------------------------------- /bilibili/.idea/workspace.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/bilibili/.idea/workspace.xml -------------------------------------------------------------------------------- /bilibili/crawl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/bilibili/crawl.py -------------------------------------------------------------------------------- /douyin/.idea/douyin.iml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/douyin/.idea/douyin.iml -------------------------------------------------------------------------------- /douyin/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/douyin/.idea/inspectionProfiles/profiles_settings.xml -------------------------------------------------------------------------------- /douyin/.idea/misc.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/douyin/.idea/misc.xml -------------------------------------------------------------------------------- /douyin/.idea/modules.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/douyin/.idea/modules.xml -------------------------------------------------------------------------------- /douyin/.idea/workspace.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/douyin/.idea/workspace.xml -------------------------------------------------------------------------------- /douyin/crawl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/douyin/crawl.py -------------------------------------------------------------------------------- /douyin/douyin.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/douyin/douyin.ttf -------------------------------------------------------------------------------- /douyin/font_1.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/douyin/font_1.xml -------------------------------------------------------------------------------- /douyin/transNum.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/douyin/transNum.py -------------------------------------------------------------------------------- /douyu/.idea/douyu.iml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/douyu/.idea/douyu.iml -------------------------------------------------------------------------------- /douyu/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/douyu/.idea/inspectionProfiles/profiles_settings.xml -------------------------------------------------------------------------------- /douyu/.idea/misc.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/douyu/.idea/misc.xml -------------------------------------------------------------------------------- /douyu/.idea/modules.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/douyu/.idea/modules.xml -------------------------------------------------------------------------------- /douyu/.idea/workspace.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/douyu/.idea/workspace.xml -------------------------------------------------------------------------------- /douyu/crawl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/douyu/crawl.py -------------------------------------------------------------------------------- /steam/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml 3 | -------------------------------------------------------------------------------- /steam/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/steam/.idea/inspectionProfiles/profiles_settings.xml -------------------------------------------------------------------------------- /steam/.idea/misc.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/steam/.idea/misc.xml -------------------------------------------------------------------------------- /steam/.idea/modules.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/steam/.idea/modules.xml -------------------------------------------------------------------------------- /steam/.idea/steam.iml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/steam/.idea/steam.iml -------------------------------------------------------------------------------- /steam/.idea/vcs.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/steam/.idea/vcs.xml -------------------------------------------------------------------------------- /steam/__pycache__/const.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/steam/__pycache__/const.cpython-38.pyc -------------------------------------------------------------------------------- /steam/__pycache__/parsers.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/steam/__pycache__/parsers.cpython-38.pyc -------------------------------------------------------------------------------- /steam/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/steam/app.py -------------------------------------------------------------------------------- /steam/const.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/steam/const.py -------------------------------------------------------------------------------- /steam/parsers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/steam/parsers.py -------------------------------------------------------------------------------- /steam/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/steam/utils.py -------------------------------------------------------------------------------- /taobao/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/taobao/.idea/inspectionProfiles/profiles_settings.xml -------------------------------------------------------------------------------- /taobao/.idea/misc.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/taobao/.idea/misc.xml -------------------------------------------------------------------------------- /taobao/.idea/modules.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/taobao/.idea/modules.xml -------------------------------------------------------------------------------- /taobao/.idea/taobao.iml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/taobao/.idea/taobao.iml -------------------------------------------------------------------------------- /taobao/.idea/workspace.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/taobao/.idea/workspace.xml -------------------------------------------------------------------------------- /taobao/consoleWarning.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/taobao/consoleWarning.PNG -------------------------------------------------------------------------------- /taobao/crawl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/taobao/crawl.py -------------------------------------------------------------------------------- /taobao/taobao_login_cookies.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/taobao/taobao_login_cookies.txt -------------------------------------------------------------------------------- /toutiao/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/toutiao/.idea/inspectionProfiles/profiles_settings.xml -------------------------------------------------------------------------------- /toutiao/.idea/misc.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/toutiao/.idea/misc.xml -------------------------------------------------------------------------------- /toutiao/.idea/modules.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/toutiao/.idea/modules.xml -------------------------------------------------------------------------------- /toutiao/.idea/toutiao.iml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/toutiao/.idea/toutiao.iml -------------------------------------------------------------------------------- /toutiao/.idea/vcs.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/toutiao/.idea/vcs.xml -------------------------------------------------------------------------------- /toutiao/.idea/workspace.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/toutiao/.idea/workspace.xml -------------------------------------------------------------------------------- /toutiao/__pycache__/m_toutiao.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/toutiao/__pycache__/m_toutiao.cpython-38.pyc -------------------------------------------------------------------------------- /toutiao/crawl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/toutiao/crawl.py -------------------------------------------------------------------------------- /wangyiMusic/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml 3 | -------------------------------------------------------------------------------- /wangyiMusic/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/wangyiMusic/.idea/inspectionProfiles/profiles_settings.xml -------------------------------------------------------------------------------- /wangyiMusic/.idea/misc.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/wangyiMusic/.idea/misc.xml -------------------------------------------------------------------------------- /wangyiMusic/.idea/modules.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/wangyiMusic/.idea/modules.xml -------------------------------------------------------------------------------- /wangyiMusic/.idea/vcs.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/wangyiMusic/.idea/vcs.xml -------------------------------------------------------------------------------- /wangyiMusic/.idea/wangyiMusic.iml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/wangyiMusic/.idea/wangyiMusic.iml -------------------------------------------------------------------------------- /wangyiMusic/crawl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/wangyiMusic/crawl.py -------------------------------------------------------------------------------- /weibo/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/weibo/.idea/inspectionProfiles/profiles_settings.xml -------------------------------------------------------------------------------- /weibo/.idea/misc.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/weibo/.idea/misc.xml -------------------------------------------------------------------------------- /weibo/.idea/modules.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/weibo/.idea/modules.xml -------------------------------------------------------------------------------- /weibo/.idea/vcs.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/weibo/.idea/vcs.xml -------------------------------------------------------------------------------- /weibo/.idea/weibo.iml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/weibo/.idea/weibo.iml -------------------------------------------------------------------------------- /weibo/.idea/workspace.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/weibo/.idea/workspace.xml -------------------------------------------------------------------------------- /weibo/__pycache__/login.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/weibo/__pycache__/login.cpython-38.pyc -------------------------------------------------------------------------------- /weibo/captcha.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/weibo/captcha.jpg -------------------------------------------------------------------------------- /weibo/crawl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/weibo/crawl.py -------------------------------------------------------------------------------- /weibo/login.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/weibo/login.py -------------------------------------------------------------------------------- /weibo/transCookie.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/weibo/transCookie.py -------------------------------------------------------------------------------- /youdao/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml -------------------------------------------------------------------------------- /youdao/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/youdao/.idea/inspectionProfiles/profiles_settings.xml -------------------------------------------------------------------------------- /youdao/.idea/misc.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/youdao/.idea/misc.xml -------------------------------------------------------------------------------- /youdao/.idea/modules.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/youdao/.idea/modules.xml -------------------------------------------------------------------------------- /youdao/.idea/vcs.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/youdao/.idea/vcs.xml -------------------------------------------------------------------------------- /youdao/.idea/youdao.iml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/youdao/.idea/youdao.iml -------------------------------------------------------------------------------- /youdao/cookie.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/youdao/cookie.txt -------------------------------------------------------------------------------- /youdao/crawl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Germey/crawlProject/HEAD/youdao/crawl.py --------------------------------------------------------------------------------