├── README.md
├── doubanreader.py
├── main.py
└── template.py


/README.md:
--------------------------------------------------------------------------------
 1 | # DouBanReader
 2 | 
 3 | DouBanReader是一个自动根据你的豆瓣读书标记生成读书报告的脚本。
 4 | 
 5 | ## 运行方法
 6 | 
 7 | ``` python
 8 | python main.py
 9 | ```
10 | 
11 | ## 依赖
12 | 
13 | * [Requests: HTTP for Humans v2.7.0.](http://www.python-requests.org/en/latest/)
14 | 
15 | ## 开发日志
16 | 
17 | 项目开发记录详见：[豆瓣阅读报告生成器](http://findingsea.github.io/2015/07/20/doubanreader-notes/)


--------------------------------------------------------------------------------
/doubanreader.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import re, json, os, urllib
  3 | import requests
  4 | 
  5 | class DBUser:
  6 | 
  7 |     name = ''
  8 |     id = ''
  9 |     uid = ''
 10 |     authorization_code = ''
 11 |     access_token = ''
 12 |     refresh_token = ''
 13 | 
 14 |     def __init__(self):
 15 |         if os.path.isfile(USER_INFO_FILE):
 16 |             pre_file = open(USER_INFO_FILE, 'r')
 17 |         else:
 18 |             pre_file = open(USER_INFO_FILE, 'w+r')
 19 |         pre_file_read = pre_file.read()
 20 |         pre_file.close()
 21 |         if pre_file_read.strip():
 22 |             user_info = json.loads(pre_file_read.strip())
 23 |             self.name = user_info['name']
 24 |             self.id = user_info['id']
 25 |             self.uid = user_info['uid']
 26 |             self.authorization_code = user_info['authorization_code']
 27 |             self.access_token = user_info['access_token']
 28 |             self.refresh_token = user_info['refresh_token']
 29 | 
 30 |     def save(self):
 31 |         pre_file = open(USER_INFO_FILE, 'w')
 32 |         user_info = {}
 33 |         user_info['name'] = self.name
 34 |         user_info['id'] = self.id
 35 |         user_info['uid'] = self.uid
 36 |         user_info['authorization_code'] = self.authorization_code
 37 |         user_info['access_token'] = self.access_token
 38 |         user_info['refresh_token'] = self.refresh_token
 39 |         pre_file.write(json.dumps(user_info))
 40 |         pre_file.close()
 41 | 
 42 | 
 43 | class DBRClient:
 44 | 
 45 |     user = None
 46 | 
 47 |     def __init__(self, user):
 48 |         self.user = user
 49 | 
 50 |     def isAuth(self):
 51 |         if self.user.authorization_code and self.user.access_token:
 52 |             return True
 53 |         else:
 54 |             return False
 55 | 
 56 |     def auth(self):
 57 | 
 58 |         params = {}
 59 |         params['client_id'] = CLIENT_ID
 60 |         params['redirect_uri'] = REDIRECT_URI
 61 |         params['response_type'] = 'code'
 62 |         params['scope'] = SCOPE
 63 | 
 64 |         print u'请访问如下地址，点击确认后，复制跳转的网址到下面：' + AUTHORIZATION_CODE_URL \
 65 |             + '?' + urllib.urlencode(params)
 66 | 
 67 |         authorization_code = re.match(\
 68 |             u'(.*)code=(.*)', \
 69 |             raw_input(u'跳转网址：'.encode('utf-8')).strip()\
 70 |         ).group(2)
 71 | 
 72 |         params = {'client_id': CLIENT_ID, 'client_secret': SECRET, \
 73 |             'redirect_uri': REDIRECT_URI, 'grant_type': 'authorization_code', \
 74 |             'code': authorization_code}
 75 |         res = requests.post(\
 76 |             url=ACCESS_TOKEN_URL, data=params, \
 77 |             headers={'Content-Type': 'application/x-www-form-urlencoded'} \
 78 |         ).json()
 79 | 
 80 |         self.user.authorization_code = authorization_code
 81 |         self.user.access_token = res['access_token']
 82 |         self.user.refresh_token = res['refresh_token']
 83 | 
 84 |         res = requests.get(\
 85 |             url=AUTH_USER_INFO_URL, \
 86 |             headers={\
 87 |                 'Authorization': 'Bearer ' + self.user.access_token\
 88 |             }\
 89 |         ).json()
 90 | 
 91 |         self.user.id = res['id']
 92 |         self.user.uid = res['uid']
 93 |         self.user.name = res['name']
 94 | 
 95 |         self.user.save()
 96 | 
 97 |     def getUserBookCollections(self, year, month):
 98 |         if month == 0:
 99 |             params = {\
100 |                 'status': 'read', \
101 |                 'from': '%d-01-01T00:00:00+08:00' \
102 |                     % (year),
103 |                 'to': '%d-01-01T00:00:00+08:00' \
104 |                     % (year + 1),
105 |                 'count': 100
106 |             }
107 |         else:
108 |             params = {\
109 |                 'status': 'read',\
110 |                 'from': '%d-%02d-01T00:00:00+08:00' \
111 |                     % (year, month),\
112 |                 'to': '%d-%02d-01T00:00:00+08:00' \
113 |                     % (year if month < 12 else year + 1, \
114 |                         month + 1 if month < 12 else 1)\
115 |             }
116 | 
117 |         res = requests.get(USER_BOOK_COLLECTIONS_URL % self.user.id, \
118 |             params=params
119 |         )
120 |         return res.json()['collections']
121 | 
122 |     def getUserBookReview(self, book_id):
123 |         review_content = ''
124 |         start = 0
125 |         total = 100
126 |         while start < total:
127 |             res = requests.get(BOOK_REVIEWS_URL % book_id, \
128 |                 params={'count': 100, 'start': start}).json()
129 |             review_alt = ''
130 |             for review in res['reviews']:
131 |                 if review['author']['id'] == self.user.id:
132 |                     review_alt = review['alt']
133 |                 else:
134 |                     continue
135 |             if review_alt:
136 |                 review_content = self.getReivew(review_alt)
137 |                 break
138 |             total = res['total']
139 |             start = start + 100
140 |         return review_content
141 | 
142 |     def getReivew(self, url):
143 |         res = requests.get(url)
144 |         res_text = res.text.replace(u'\u3000', '')
145 |         c = re.compile('\s+')
146 |         res_text = re.sub(c, '', res_text)
147 |         pattern = u'(.*)<spanproperty="v:description"class="">(.*?)</span>(.*)'
148 |         content = re.match(pattern, res_text).group(2)
149 |         content = content.replace('<br/>', '\n')
150 |         ind = content.find('<divclass')
151 |         if ind == -1:
152 |             return content
153 |         else:
154 |             return content[:ind]
155 | 
156 |     def getBookTags(self, book_id):
157 |         res = requests.get(BOOK_TAGS_URL % book_id).json()
158 |         tags_list = []
159 |         for tag in res['tags']:
160 |             tags_list.append(tag['name'])
161 |             if len(tags_list) == 10:
162 |                 break
163 |         return tags_list
164 | 
165 |     def convertToUTF8(self, content):
166 |         return content.encode('utf-8')
167 | 
168 | USER_INFO_FILE = 'user_info.txt'
169 | CLIENT_ID = '022edc4b51cf759d068c94e1f56e60d7'
170 | API_KEY = CLIENT_ID
171 | SECRET = 'bfeee4fbd1b29e6c'
172 | REDIRECT_URI = 'http://findingsea.github.io'
173 | 
174 | AUTHORIZATION_CODE = 'authorization_code'
175 | ACCESS_TOKEN = 'access_token'
176 | REFRESH_TOEKN = 'refresh_token'
177 | DOUBAN_USER_ID = 'id'
178 | DOUBAN_USER_UID = 'uid'
179 | DOUBAN_USER_NAME = 'name'
180 | 
181 | ACCESS_TOKEN_URL = 'https://www.douban.com/service/auth2/token'
182 | AUTHORIZATION_CODE_URL = 'https://www.douban.com/service/auth2/auth'
183 | SCOPE = 'shuo_basic_r,shuo_basic_w,douban_basic_common'
184 | AUTH_USER_INFO_URL = 'https://api.douban.com/v2/user/~me'
185 | USER_BOOK_COLLECTIONS_URL = 'https://api.douban.com/v2/book/user/%s/collections'
186 | BOOK_REVIEWS_URL = 'https://api.douban.com/v2/book/%s/reviews'
187 | BOOK_TAGS_URL = 'https://api.douban.com/v2/book/%s/tags'
188 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os, datetime
 3 | from doubanreader import DBRClient, DBUser
 4 | import template
 5 | 
 6 | 
 7 | user = DBUser()
 8 | client = DBRClient(user)
 9 | 
10 | if not client.isAuth():
11 |     client.auth()
12 | 
13 | print u'您已经是认证用户:)'
14 | 
15 | year = 0
16 | month = 0
17 | 
18 | while True:
19 |     try:
20 |         print u'输入格式：'
21 |         print u'1.直接输入月份生成当年该月的读书报告。'
22 |         print u'2.直接输入年份生成该年的全年读书报告。'
23 |         print u'3.输入格式为xxxx.xx生成指定年份指定月份的读书报告。\n'
24 |         s = raw_input(u'请输入您需要生成的读书报告：'.encode('utf-8'))
25 |         if s.find('.') != -1:
26 |             year = int(s.split('.')[0])
27 |             month = int(s.split('.')[1])
28 |         elif len(s) == 4:
29 |             year = int(s)
30 |         else:
31 |             month = int(s)
32 |     except Exception, e:
33 |         print u'您的输入有误！'
34 |     else:
35 |         break
36 | 
37 | if year == 0:
38 |     year = datetime.date.today().year
39 | 
40 | collections = client.getUserBookCollections(year, month)
41 | 
42 | path = str(year) + '/' + str(month)
43 | if not os.path.exists(path):
44 |     os.makedirs(path)
45 | 
46 | if month != 0:
47 |     filename = template.MARKDOWN_TEMPLATE_TITLE % {'year': year, 'month': month} + '.md'
48 | else:
49 |     filename = template.MARKDOWN_TEMPLATE_WHOLE_YEAR_TITLE % year + '.md'
50 | 
51 | mk_file = open(path + '/' + filename, 'w+r')
52 | 
53 | introduction = template.MARKDOWN_TEMPLATE_INTRODUCTION
54 | 
55 | mk_file.write(client.convertToUTF8(introduction))
56 | mk_file.write('\n\n')
57 | 
58 | print 'Number of books: %s' % len(collections)
59 | 
60 | for info in collections:
61 |     book = {}
62 |     book['id'] = info['book']['id']
63 |     book['title'] = info['book']['title']
64 |     book['alt'] = info['book']['alt']
65 |     book['image'] = info['book']['image']
66 |     if info['book']['images'].has_key('large'):
67 |         book['image'] = info['book']['images']['large']
68 |     elif info['book']['images'].has_key('medium'):
69 |         book['image'] = info['book']['images']['medium']
70 | 
71 |     if month == 0:
72 |         section_title = template.MARKDOWN_TEMPLATE_WHOLE_YEAR_SECTION_TITLE % book
73 |     else:
74 |         section_title = template.MARKDOWN_TEMPLATE_SECTION_TITLE % book
75 | 
76 |     mk_file.write(client.convertToUTF8(section_title))
77 |     if month == 0:
78 |         mk_file.write(' ' + info['updated'].split()[0])
79 |     mk_file.write('\n\n')
80 | 
81 |     if month != 0:
82 |         section_picture = template.MARKDOWN_TEMPLATE_SECTION_PICTURE % book
83 |         mk_file.write(client.convertToUTF8(section_picture))
84 |         mk_file.write('\n\n')
85 | 
86 |         mk_file.write(client.convertToUTF8(client.getUserBookReview(book['id'])))
87 |         mk_file.write('\n\n')
88 | 
89 | mk_file.close()
90 | 


--------------------------------------------------------------------------------
/template.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | MARKDOWN_TEMPLATE_INTRODUCTION = u'以下内容均由[豆瓣读书报告生成器](https://github.com/findingsea/DouBanReader)自动生成。'
4 | MARKDOWN_TEMPLATE_TITLE = u'%(year)d年%(month)d月读书报告'
5 | MARKDOWN_TEMPLATE_WHOLE_YEAR_TITLE = u'%d年读书报告'
6 | MARKDOWN_TEMPLATE_SECTION_TITLE = u'### [《%(title)s》](%(alt)s)'
7 | MARKDOWN_TEMPLATE_WHOLE_YEAR_SECTION_TITLE = u'《%(title)s》'
8 | MARKDOWN_TEMPLATE_SECTION_PICTURE = u'![《%(title)s》](%(image)s)'
9 | 


--------------------------------------------------------------------------------