├── README.md ├── doubanreader.py ├── main.py └── template.py /README.md: -------------------------------------------------------------------------------- 1 | # DouBanReader 2 | 3 | DouBanReader是一个自动根据你的豆瓣读书标记生成读书报告的脚本。 4 | 5 | ## 运行方法 6 | 7 | ``` python 8 | python main.py 9 | ``` 10 | 11 | ## 依赖 12 | 13 | * [Requests: HTTP for Humans v2.7.0.](http://www.python-requests.org/en/latest/) 14 | 15 | ## 开发日志 16 | 17 | 项目开发记录详见:[豆瓣阅读报告生成器](http://findingsea.github.io/2015/07/20/doubanreader-notes/) -------------------------------------------------------------------------------- /doubanreader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import re, json, os, urllib 3 | import requests 4 | 5 | class DBUser: 6 | 7 | name = '' 8 | id = '' 9 | uid = '' 10 | authorization_code = '' 11 | access_token = '' 12 | refresh_token = '' 13 | 14 | def __init__(self): 15 | if os.path.isfile(USER_INFO_FILE): 16 | pre_file = open(USER_INFO_FILE, 'r') 17 | else: 18 | pre_file = open(USER_INFO_FILE, 'w+r') 19 | pre_file_read = pre_file.read() 20 | pre_file.close() 21 | if pre_file_read.strip(): 22 | user_info = json.loads(pre_file_read.strip()) 23 | self.name = user_info['name'] 24 | self.id = user_info['id'] 25 | self.uid = user_info['uid'] 26 | self.authorization_code = user_info['authorization_code'] 27 | self.access_token = user_info['access_token'] 28 | self.refresh_token = user_info['refresh_token'] 29 | 30 | def save(self): 31 | pre_file = open(USER_INFO_FILE, 'w') 32 | user_info = {} 33 | user_info['name'] = self.name 34 | user_info['id'] = self.id 35 | user_info['uid'] = self.uid 36 | user_info['authorization_code'] = self.authorization_code 37 | user_info['access_token'] = self.access_token 38 | user_info['refresh_token'] = self.refresh_token 39 | pre_file.write(json.dumps(user_info)) 40 | pre_file.close() 41 | 42 | 43 | class DBRClient: 44 | 45 | user = None 46 | 47 | def __init__(self, user): 48 | self.user = user 49 | 50 | def isAuth(self): 51 | if self.user.authorization_code and self.user.access_token: 52 | return True 53 | else: 54 | return False 55 | 56 | def auth(self): 57 | 58 | params = {} 59 | params['client_id'] = CLIENT_ID 60 | params['redirect_uri'] = REDIRECT_URI 61 | params['response_type'] = 'code' 62 | params['scope'] = SCOPE 63 | 64 | print u'请访问如下地址,点击确认后,复制跳转的网址到下面:' + AUTHORIZATION_CODE_URL \ 65 | + '?' + urllib.urlencode(params) 66 | 67 | authorization_code = re.match(\ 68 | u'(.*)code=(.*)', \ 69 | raw_input(u'跳转网址:'.encode('utf-8')).strip()\ 70 | ).group(2) 71 | 72 | params = {'client_id': CLIENT_ID, 'client_secret': SECRET, \ 73 | 'redirect_uri': REDIRECT_URI, 'grant_type': 'authorization_code', \ 74 | 'code': authorization_code} 75 | res = requests.post(\ 76 | url=ACCESS_TOKEN_URL, data=params, \ 77 | headers={'Content-Type': 'application/x-www-form-urlencoded'} \ 78 | ).json() 79 | 80 | self.user.authorization_code = authorization_code 81 | self.user.access_token = res['access_token'] 82 | self.user.refresh_token = res['refresh_token'] 83 | 84 | res = requests.get(\ 85 | url=AUTH_USER_INFO_URL, \ 86 | headers={\ 87 | 'Authorization': 'Bearer ' + self.user.access_token\ 88 | }\ 89 | ).json() 90 | 91 | self.user.id = res['id'] 92 | self.user.uid = res['uid'] 93 | self.user.name = res['name'] 94 | 95 | self.user.save() 96 | 97 | def getUserBookCollections(self, year, month): 98 | if month == 0: 99 | params = {\ 100 | 'status': 'read', \ 101 | 'from': '%d-01-01T00:00:00+08:00' \ 102 | % (year), 103 | 'to': '%d-01-01T00:00:00+08:00' \ 104 | % (year + 1), 105 | 'count': 100 106 | } 107 | else: 108 | params = {\ 109 | 'status': 'read',\ 110 | 'from': '%d-%02d-01T00:00:00+08:00' \ 111 | % (year, month),\ 112 | 'to': '%d-%02d-01T00:00:00+08:00' \ 113 | % (year if month < 12 else year + 1, \ 114 | month + 1 if month < 12 else 1)\ 115 | } 116 | 117 | res = requests.get(USER_BOOK_COLLECTIONS_URL % self.user.id, \ 118 | params=params 119 | ) 120 | return res.json()['collections'] 121 | 122 | def getUserBookReview(self, book_id): 123 | review_content = '' 124 | start = 0 125 | total = 100 126 | while start < total: 127 | res = requests.get(BOOK_REVIEWS_URL % book_id, \ 128 | params={'count': 100, 'start': start}).json() 129 | review_alt = '' 130 | for review in res['reviews']: 131 | if review['author']['id'] == self.user.id: 132 | review_alt = review['alt'] 133 | else: 134 | continue 135 | if review_alt: 136 | review_content = self.getReivew(review_alt) 137 | break 138 | total = res['total'] 139 | start = start + 100 140 | return review_content 141 | 142 | def getReivew(self, url): 143 | res = requests.get(url) 144 | res_text = res.text.replace(u'\u3000', '') 145 | c = re.compile('\s+') 146 | res_text = re.sub(c, '', res_text) 147 | pattern = u'(.*)(.*?)(.*)' 148 | content = re.match(pattern, res_text).group(2) 149 | content = content.replace('
', '\n') 150 | ind = content.find('