├── .gitattributes ├── douban_pic_download.py └── douban_love_download.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /douban_pic_download.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | # 下载豆瓣 王祖贤的海报 3 | import requests 4 | import json 5 | 6 | query = '王祖贤' 7 | ''' 下载图片 ''' 8 | def download(src, id): 9 | dir = './' + str(id) + '.jpg' 10 | try: 11 | pic = requests.get(src, timeout=10) 12 | except requests.exceptions.ConnectionError: 13 | print('图片无法下载') 14 | 15 | fp = open(dir, 'wb') 16 | fp.write(pic.content) 17 | fp.close() 18 | 19 | ''' for循环 请求全部的url ''' 20 | for i in range(0, 22471, 20): 21 | url = 'https://www.douban.com/j/search_photo?q='+query+'&limit=20&start='+str(i) 22 | html = requests.get(url).text #得到返回结果 23 | response = json.loads(html,encoding='utf-8') #将JSON格式转换成Python对象 24 | for image in response['images']: 25 | print(image['src']) #查看当前下载的图片网址 26 | download(image['src'], image['id']) #下载一张图片 27 | 28 | -------------------------------------------------------------------------------- /douban_love_download.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | # 下载豆瓣爱情的电影封面 3 | import requests 4 | import json 5 | 6 | # 下载图片 7 | def download(url, title): 8 | dir = './' + title + '.jpg' 9 | try: 10 | pic = requests.get(url) 11 | fp = open(dir, 'wb') 12 | fp.write(pic.content) 13 | fp.close() 14 | print(title) 15 | except requests.exceptions.ConnectionError: 16 | print('图片无法下载') 17 | 18 | 19 | for num in range(0, 1000, 20): 20 | # 构造url,翻页变换参数为start=, tag=电影, gender=爱情, 改变start=后面的数字,可以爬取不同的页 21 | url = 'https://movie.douban.com/j/new_search_subjects?sort=U&range=0,10&tags=%E7%94%B5%E5%BD%B1&start='\ 22 | + str(num)+'&genres=%E7%88%B1%E6%83%85' 23 | print(url) 24 | html = requests.get(url).text 25 | # 转为json格式 26 | res = json.loads(html, encoding='utf-8') 27 | for result in res['data']: 28 | cover = result['cover'] 29 | title = result['title'] 30 | download(cover, title) --------------------------------------------------------------------------------