├── .gitignore
├── LICENSE
├── setup.py
├── README.rst
└── backup_goodreads.py


/.gitignore:
--------------------------------------------------------------------------------
1 | goodreads_reviews.json
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2017 Alex Chan
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | 
 4 | import codecs
 5 | import os
 6 | 
 7 | from setuptools import find_packages, setup
 8 | 
 9 | 
10 | def local_file(name):
11 |     return os.path.relpath(os.path.join(os.path.dirname(__file__), name))
12 | 
13 | 
14 | README = local_file('README.rst')
15 | long_description = codecs.open(README, encoding='utf-8').read()
16 | 
17 | 
18 | setup(
19 |     name='backup_goodreads',
20 |     version='1.0.0',
21 |     description='A script for backing up reviews from your Goodreads account',
22 |     long_description=long_description,
23 |     url='https://github.com/alexwlchan/backup-goodreads',
24 |     author='Alex Chan',
25 |     author_email='alex@alexwlchan.net',
26 |     license='MIT',
27 |     classifiers=[
28 |         'Development Status :: 5 - Production/Stable',
29 |         'Intended Audience :: Other Audience',
30 |         'License :: OSI Approved :: MIT License',
31 |         'Programming Language :: Python :: 2',
32 |         'Programming Language :: Python :: 2.7',
33 |         'Programming Language :: Python :: 3',
34 |         'Programming Language :: Python :: 3.3',
35 |         'Programming Language :: Python :: 3.4',
36 |         'Programming Language :: Python :: 3.5',
37 |         'Programming Language :: Python :: 3.6',
38 |     ],
39 |     install_requires=[
40 |         'keyring>=10.2,<11',
41 |         'requests>=2.13.0,<3',
42 |     ],
43 |     entry_points={
44 |         'console_scripts': [
45 |             'backup_goodreads=backup_goodreads:main',
46 |         ],
47 |     },
48 | )
49 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | backup-goodreads
 2 | ================
 3 | 
 4 | This is a Python script for backing up your reviews from Goodreads.  It mimics
 5 | the `export function <https://www.goodreads.com/help/show/5-how-do-i-import-or-export-my-books>`_
 6 | provided by Goodreads, but in script form.
 7 | 
 8 | Installation
 9 | ************
10 | 
11 | To install this script, use pip:
12 | 
13 | .. code-block:: console
14 | 
15 |    $ pip install -e git+git://github.com/alexwlchan/backup-goodreads.git#egg=backup_gooreads
16 | 
17 | or `pipsi <https://github.com/mitsuhiko/pipsi>`_:
18 | 
19 | .. code-block:: console
20 | 
21 |    $ pipsi install -e git+git://github.com/alexwlchan/backup-goodreads.git#egg=backup_gooreads
22 | 
23 | You can use Python 2.7 and Python 3.3+.
24 | 
25 | You need to set up three things before you can use the script::
26 | 
27 | 1. Make your Goodreads reviews public.  This script only uses the basic API,
28 |    not OAuth, and so private reviews can't be backed up.
29 | 2. Get your Goodreads user ID.  This is the 8-digit number in the URL of your
30 |    profile page.  For example, if your user page was
31 |    ``https://www.goodreads.com/user/show/12345678-john-smith``, then your
32 |    user ID is ``12345678``.
33 | 3. Get a `developer API key <https://www.goodreads.com/api/keys>`_ from the
34 |    Goodreads website.
35 | 
36 | Usage
37 | *****
38 | 
39 | Run the script, passing your user ID and API key as command-line flags:
40 | 
41 | .. code-block:: console
42 | 
43 |    $ backup_goodreads --user-id=12345678 --api-key=abcdefg123
44 | 
45 | This will write your Goodreads reviews to ``goodreads_reviews.json``.
46 | 
47 | To see other options, run with the ``--help`` flag:
48 | 
49 | .. code-block:: console
50 | 
51 |    $ backup_goodreads --help
52 | 
53 | License
54 | *******
55 | 
56 | This script is licensed under the MIT license.
57 | 


--------------------------------------------------------------------------------
/backup_goodreads.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | """A script for backing up your Goodreads library.  Writes a backup to
  4 | `goodreads_backup.json`, or you can provide an alternative name as the first
  5 | command-line argument.
  6 | 
  7 | See the README for more details.
  8 | 
  9 | """
 10 | 
 11 | from __future__ import print_function
 12 | 
 13 | import argparse
 14 | import itertools
 15 | import json
 16 | 
 17 | try:
 18 |     from datetime import timezone
 19 | except ImportError:  # Python 2
 20 |     from time import timezone
 21 | 
 22 | from datetime import datetime
 23 | from xml.etree import ElementTree as ET
 24 | 
 25 | import keyring
 26 | import requests
 27 | 
 28 | 
 29 | def identity(x):
 30 |     """Identity function."""
 31 |     return x.text
 32 | 
 33 | 
 34 | def convert_date(element):
 35 |     """Convert a date string from the Goodreads API into an ISO-8601 UTC
 36 |     string."""
 37 |     # We may get ``None`` if the API is missing any information for a
 38 |     # particular date field -- for example, the ``date_read`` field is
 39 |     # only filled in when a book is read.
 40 |     date_str = element.text
 41 |     if date_str is None:
 42 |         return None
 43 |     else:
 44 |         # In the API responses, dates are returned in the form
 45 |         # "Mon Oct 24 12:26:31 -0700 2016"
 46 |         date_obj = datetime.strptime(date_str, '%a %b %d %H:%M:%S %z %Y')
 47 |         return str(date_obj.astimezone(timezone.utc))
 48 | 
 49 | 
 50 | def convert_rating(element):
 51 |     """Convert a rating from the Goodreads API."""
 52 |     rating = element.text
 53 | 
 54 |     # The Goodreads API returns '0' to indicate an unrated book; make
 55 |     # this a proper null type.
 56 |     if rating == '0':
 57 |         return None
 58 |     else:
 59 |         return rating
 60 | 
 61 | 
 62 | def convert_authors(element):
 63 |     """Get the names of the authors for this book."""
 64 |     # The Goodreads API returns information about authors in an <authors>
 65 |     # element on the <review> element, in the following format:
 66 |     #
 67 |     #    <authors>
 68 |     #      <author>
 69 |     #        <id>1234</id>
 70 |     #        <name>John Smith</name>
 71 |     #        <snip>...</snip>
 72 |     #      </author>
 73 |     #      <author>
 74 |     #        <id>5678</id>
 75 |     #        <name>Jane Doe</name>
 76 |     #        <snip>...</snip>
 77 |     #      </author>
 78 |     #    </authors>
 79 |     #
 80 |     return [author.find('name').text for author in element.findall('author')]
 81 | 
 82 | 
 83 | def convert_shelves(element):
 84 |     """Get the names of all the shelves that hold this book."""
 85 |     # The shelves for a review are returned in the following format:
 86 |     #
 87 |     #    <shelves>
 88 |     #      <shelf name="read" exclusive="true"/>
 89 |     #      <shelf name="fiction" exclusive="false" review_shelf_id="1234"/>
 90 |     #    </shelves>
 91 |     #
 92 |     return [shelf.attrib['name'] for shelf in element.findall('shelf')]
 93 | 
 94 | 
 95 | def convert_page_count(element):
 96 |     try:
 97 |         return int(element.text)
 98 |     except TypeError:
 99 |         return None
100 | 
101 | 
102 | def convert_body(element):
103 |     return element.text.strip()
104 | 
105 | 
106 | # Map from <review> tag name to pair (output key, converter)
107 | REVIEW_TAGS = {
108 |     'read_at':      ('date_read',   convert_date),
109 |     'date_added':   ('date_added',  convert_date),
110 |     'body':         ('review',      convert_body),
111 |     'rating':       ('my_rating',   convert_rating),
112 |     'shelves':      ('bookshelves', convert_shelves)
113 | }
114 | 
115 | # Map from <book> tag name to pair (output key, converter)
116 | BOOK_TAGS = {
117 |     'authors':          ('authors',             convert_authors),
118 |     'id':               ('book_id',             identity),
119 |     'title':            ('title',               identity),
120 |     'isbn':             ('isbn',                identity),
121 |     'isbn13':           ('isbn13',              identity),
122 |     'average_rating':   ('average_rating',      identity),
123 |     'publisher':        ('publisher',           identity),
124 |     'format':           ('binding',             identity),
125 |     'num_pages':        ('page_count',          convert_page_count),
126 |     'publication_year': ('publication_year',    identity),
127 |     'published':        ('orig_year_published', identity),
128 | }
129 | 
130 | 
131 | def _get_data_from_goodreads_api(user_id, api_key, page_no):
132 |     """Retrieve data about the reviews of a given user from the Goodreads API.
133 |     Returns a Request object if successful, raises an Exception if not.
134 | 
135 |     :param user_id: The ID of the Goodreads user.
136 |     :param api_key: The Goodreads API key.
137 |     :param page_no: Which page of reviews to fetch.
138 | 
139 |     """
140 |     # reviews.list (https://www.goodreads.com/api/index#reviews.list) gets
141 |     # all the books on somebody's shelf.
142 |     req = requests.get('https://www.goodreads.com/review/list.xml', params={
143 |         'v': '2',
144 |         'key': api_key,
145 |         'id': user_id,
146 |         'page': str(page_no),
147 |     })
148 |     if req.status_code != 200:
149 |         raise Exception(
150 |             'Unexpected error code from Goodreads API: %s\n'
151 |             'Error message: %r' % (req.status_code, req.text)
152 |         )
153 |     return req
154 | 
155 | 
156 | def _get_reviews_from_api(user_id, api_key):
157 |     """Generates <review> elements from the API data.
158 | 
159 |     :param user_id: The ID of the Goodreads user.
160 |     :param api_key: The Goodreads API key.
161 | 
162 |     """
163 |     for page_no in itertools.count(1):
164 |         req = _get_data_from_goodreads_api(
165 |             user_id=user_id, api_key=api_key, page_no=page_no
166 |         )
167 |         reviews = ET.fromstring(req.text).find('reviews')
168 |         for r in reviews:
169 |             yield r
170 |         if int(reviews.attrib['end']) >= int(reviews.attrib['total']):
171 |             break
172 | 
173 | 
174 | def get_reviews(user_id, api_key):
175 |     """Generate reviews associated with a Goodreads user as dictionaries.
176 | 
177 |     :param user_id: The ID of the Goodreads user.
178 |     :param api_key: The Goodreads API key.
179 | 
180 |     """
181 |     for review in _get_reviews_from_api(user_id, api_key=api_key):
182 |         data = {}
183 | 
184 |         def convert(element, tag_mapping):
185 |             if element.tag in tag_mapping:
186 |                 key, converter = tag_mapping[element.tag]
187 |                 data[key] = converter(element)
188 | 
189 |         for review_elt in review:
190 |             if review_elt.tag == 'book':
191 |                 for book_elt in review_elt:
192 |                     convert(book_elt, BOOK_TAGS)
193 |             else:
194 |                 convert(review_elt, REVIEW_TAGS)
195 |         yield data
196 | 
197 | 
198 | def read_config():
199 |     """Returns configuration for using the script.
200 | 
201 |     Configuration is read from one of two places:
202 |      1. The system keychain
203 |      2. Command-line arguments
204 | 
205 |     Command-line arguments take precedence over keychain values.  If the
206 |     keychain values are empty/missing, the command-line switches are required.
207 | 
208 |     """
209 |     # Read some initial config from the system keychain: if this doesn't
210 |     # exist, then we read it from command-line arguments.
211 |     user_id = keyring.get_password('goodreads', 'user_id')
212 |     api_key = keyring.get_password('goodreads', 'api_key')
213 | 
214 |     parser = argparse.ArgumentParser(
215 |         description='A script to back up reviews from Goodreads.')
216 | 
217 |     parser.add_argument(
218 |         '--output', default='goodreads_reviews.json',
219 |         help='output path for the backup file')
220 |     parser.add_argument(
221 |         '--user-id', required=(user_id is None),
222 |         help='Goodreads user ID')
223 |     parser.add_argument(
224 |         '--api-key', required=(api_key is None),
225 |         help='Goodreads API key (https://www.goodreads.com/api/keys)')
226 | 
227 |     config = vars(parser.parse_args())
228 | 
229 |     if config['user_id'] is None:
230 |         config['user_id'] = user_id
231 |     if config['api_key'] is None:
232 |         config['api_key'] = api_key
233 | 
234 |     return config
235 | 
236 | 
237 | def main():
238 |     """Parse the Goodreads API and save the reviews to disk."""
239 |     cfg = read_config()
240 |     reviews = get_reviews(user_id=cfg['user_id'], api_key=cfg['api_key'])
241 |     json_str = json.dumps(list(reviews), indent=2, sort_keys=True)
242 |     with open(cfg['output'], 'w', encoding='utf-8') as f:
243 |         f.write(json_str)
244 | 
245 | 
246 | if __name__ == '__main__':
247 |     main()
248 | 


--------------------------------------------------------------------------------