├── .gitignore ├── LICENSE ├── setup.py ├── README.rst └── backup_goodreads.py /.gitignore: -------------------------------------------------------------------------------- 1 | goodreads_reviews.json 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2017 Alex Chan 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | import codecs 5 | import os 6 | 7 | from setuptools import find_packages, setup 8 | 9 | 10 | def local_file(name): 11 | return os.path.relpath(os.path.join(os.path.dirname(__file__), name)) 12 | 13 | 14 | README = local_file('README.rst') 15 | long_description = codecs.open(README, encoding='utf-8').read() 16 | 17 | 18 | setup( 19 | name='backup_goodreads', 20 | version='1.0.0', 21 | description='A script for backing up reviews from your Goodreads account', 22 | long_description=long_description, 23 | url='https://github.com/alexwlchan/backup-goodreads', 24 | author='Alex Chan', 25 | author_email='alex@alexwlchan.net', 26 | license='MIT', 27 | classifiers=[ 28 | 'Development Status :: 5 - Production/Stable', 29 | 'Intended Audience :: Other Audience', 30 | 'License :: OSI Approved :: MIT License', 31 | 'Programming Language :: Python :: 2', 32 | 'Programming Language :: Python :: 2.7', 33 | 'Programming Language :: Python :: 3', 34 | 'Programming Language :: Python :: 3.3', 35 | 'Programming Language :: Python :: 3.4', 36 | 'Programming Language :: Python :: 3.5', 37 | 'Programming Language :: Python :: 3.6', 38 | ], 39 | install_requires=[ 40 | 'keyring>=10.2,<11', 41 | 'requests>=2.13.0,<3', 42 | ], 43 | entry_points={ 44 | 'console_scripts': [ 45 | 'backup_goodreads=backup_goodreads:main', 46 | ], 47 | }, 48 | ) 49 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | backup-goodreads 2 | ================ 3 | 4 | This is a Python script for backing up your reviews from Goodreads. It mimics 5 | the `export function `_ 6 | provided by Goodreads, but in script form. 7 | 8 | Installation 9 | ************ 10 | 11 | To install this script, use pip: 12 | 13 | .. code-block:: console 14 | 15 | $ pip install -e git+git://github.com/alexwlchan/backup-goodreads.git#egg=backup_gooreads 16 | 17 | or `pipsi `_: 18 | 19 | .. code-block:: console 20 | 21 | $ pipsi install -e git+git://github.com/alexwlchan/backup-goodreads.git#egg=backup_gooreads 22 | 23 | You can use Python 2.7 and Python 3.3+. 24 | 25 | You need to set up three things before you can use the script:: 26 | 27 | 1. Make your Goodreads reviews public. This script only uses the basic API, 28 | not OAuth, and so private reviews can't be backed up. 29 | 2. Get your Goodreads user ID. This is the 8-digit number in the URL of your 30 | profile page. For example, if your user page was 31 | ``https://www.goodreads.com/user/show/12345678-john-smith``, then your 32 | user ID is ``12345678``. 33 | 3. Get a `developer API key `_ from the 34 | Goodreads website. 35 | 36 | Usage 37 | ***** 38 | 39 | Run the script, passing your user ID and API key as command-line flags: 40 | 41 | .. code-block:: console 42 | 43 | $ backup_goodreads --user-id=12345678 --api-key=abcdefg123 44 | 45 | This will write your Goodreads reviews to ``goodreads_reviews.json``. 46 | 47 | To see other options, run with the ``--help`` flag: 48 | 49 | .. code-block:: console 50 | 51 | $ backup_goodreads --help 52 | 53 | License 54 | ******* 55 | 56 | This script is licensed under the MIT license. 57 | -------------------------------------------------------------------------------- /backup_goodreads.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | """A script for backing up your Goodreads library. Writes a backup to 4 | `goodreads_backup.json`, or you can provide an alternative name as the first 5 | command-line argument. 6 | 7 | See the README for more details. 8 | 9 | """ 10 | 11 | from __future__ import print_function 12 | 13 | import argparse 14 | import itertools 15 | import json 16 | 17 | try: 18 | from datetime import timezone 19 | except ImportError: # Python 2 20 | from time import timezone 21 | 22 | from datetime import datetime 23 | from xml.etree import ElementTree as ET 24 | 25 | import keyring 26 | import requests 27 | 28 | 29 | def identity(x): 30 | """Identity function.""" 31 | return x.text 32 | 33 | 34 | def convert_date(element): 35 | """Convert a date string from the Goodreads API into an ISO-8601 UTC 36 | string.""" 37 | # We may get ``None`` if the API is missing any information for a 38 | # particular date field -- for example, the ``date_read`` field is 39 | # only filled in when a book is read. 40 | date_str = element.text 41 | if date_str is None: 42 | return None 43 | else: 44 | # In the API responses, dates are returned in the form 45 | # "Mon Oct 24 12:26:31 -0700 2016" 46 | date_obj = datetime.strptime(date_str, '%a %b %d %H:%M:%S %z %Y') 47 | return str(date_obj.astimezone(timezone.utc)) 48 | 49 | 50 | def convert_rating(element): 51 | """Convert a rating from the Goodreads API.""" 52 | rating = element.text 53 | 54 | # The Goodreads API returns '0' to indicate an unrated book; make 55 | # this a proper null type. 56 | if rating == '0': 57 | return None 58 | else: 59 | return rating 60 | 61 | 62 | def convert_authors(element): 63 | """Get the names of the authors for this book.""" 64 | # The Goodreads API returns information about authors in an 65 | # element on the element, in the following format: 66 | # 67 | # 68 | # 69 | # 1234 70 | # John Smith 71 | # ... 72 | # 73 | # 74 | # 5678 75 | # Jane Doe 76 | # ... 77 | # 78 | # 79 | # 80 | return [author.find('name').text for author in element.findall('author')] 81 | 82 | 83 | def convert_shelves(element): 84 | """Get the names of all the shelves that hold this book.""" 85 | # The shelves for a review are returned in the following format: 86 | # 87 | # 88 | # 89 | # 90 | # 91 | # 92 | return [shelf.attrib['name'] for shelf in element.findall('shelf')] 93 | 94 | 95 | def convert_page_count(element): 96 | try: 97 | return int(element.text) 98 | except TypeError: 99 | return None 100 | 101 | 102 | def convert_body(element): 103 | return element.text.strip() 104 | 105 | 106 | # Map from tag name to pair (output key, converter) 107 | REVIEW_TAGS = { 108 | 'read_at': ('date_read', convert_date), 109 | 'date_added': ('date_added', convert_date), 110 | 'body': ('review', convert_body), 111 | 'rating': ('my_rating', convert_rating), 112 | 'shelves': ('bookshelves', convert_shelves) 113 | } 114 | 115 | # Map from tag name to pair (output key, converter) 116 | BOOK_TAGS = { 117 | 'authors': ('authors', convert_authors), 118 | 'id': ('book_id', identity), 119 | 'title': ('title', identity), 120 | 'isbn': ('isbn', identity), 121 | 'isbn13': ('isbn13', identity), 122 | 'average_rating': ('average_rating', identity), 123 | 'publisher': ('publisher', identity), 124 | 'format': ('binding', identity), 125 | 'num_pages': ('page_count', convert_page_count), 126 | 'publication_year': ('publication_year', identity), 127 | 'published': ('orig_year_published', identity), 128 | } 129 | 130 | 131 | def _get_data_from_goodreads_api(user_id, api_key, page_no): 132 | """Retrieve data about the reviews of a given user from the Goodreads API. 133 | Returns a Request object if successful, raises an Exception if not. 134 | 135 | :param user_id: The ID of the Goodreads user. 136 | :param api_key: The Goodreads API key. 137 | :param page_no: Which page of reviews to fetch. 138 | 139 | """ 140 | # reviews.list (https://www.goodreads.com/api/index#reviews.list) gets 141 | # all the books on somebody's shelf. 142 | req = requests.get('https://www.goodreads.com/review/list.xml', params={ 143 | 'v': '2', 144 | 'key': api_key, 145 | 'id': user_id, 146 | 'page': str(page_no), 147 | }) 148 | if req.status_code != 200: 149 | raise Exception( 150 | 'Unexpected error code from Goodreads API: %s\n' 151 | 'Error message: %r' % (req.status_code, req.text) 152 | ) 153 | return req 154 | 155 | 156 | def _get_reviews_from_api(user_id, api_key): 157 | """Generates elements from the API data. 158 | 159 | :param user_id: The ID of the Goodreads user. 160 | :param api_key: The Goodreads API key. 161 | 162 | """ 163 | for page_no in itertools.count(1): 164 | req = _get_data_from_goodreads_api( 165 | user_id=user_id, api_key=api_key, page_no=page_no 166 | ) 167 | reviews = ET.fromstring(req.text).find('reviews') 168 | for r in reviews: 169 | yield r 170 | if int(reviews.attrib['end']) >= int(reviews.attrib['total']): 171 | break 172 | 173 | 174 | def get_reviews(user_id, api_key): 175 | """Generate reviews associated with a Goodreads user as dictionaries. 176 | 177 | :param user_id: The ID of the Goodreads user. 178 | :param api_key: The Goodreads API key. 179 | 180 | """ 181 | for review in _get_reviews_from_api(user_id, api_key=api_key): 182 | data = {} 183 | 184 | def convert(element, tag_mapping): 185 | if element.tag in tag_mapping: 186 | key, converter = tag_mapping[element.tag] 187 | data[key] = converter(element) 188 | 189 | for review_elt in review: 190 | if review_elt.tag == 'book': 191 | for book_elt in review_elt: 192 | convert(book_elt, BOOK_TAGS) 193 | else: 194 | convert(review_elt, REVIEW_TAGS) 195 | yield data 196 | 197 | 198 | def read_config(): 199 | """Returns configuration for using the script. 200 | 201 | Configuration is read from one of two places: 202 | 1. The system keychain 203 | 2. Command-line arguments 204 | 205 | Command-line arguments take precedence over keychain values. If the 206 | keychain values are empty/missing, the command-line switches are required. 207 | 208 | """ 209 | # Read some initial config from the system keychain: if this doesn't 210 | # exist, then we read it from command-line arguments. 211 | user_id = keyring.get_password('goodreads', 'user_id') 212 | api_key = keyring.get_password('goodreads', 'api_key') 213 | 214 | parser = argparse.ArgumentParser( 215 | description='A script to back up reviews from Goodreads.') 216 | 217 | parser.add_argument( 218 | '--output', default='goodreads_reviews.json', 219 | help='output path for the backup file') 220 | parser.add_argument( 221 | '--user-id', required=(user_id is None), 222 | help='Goodreads user ID') 223 | parser.add_argument( 224 | '--api-key', required=(api_key is None), 225 | help='Goodreads API key (https://www.goodreads.com/api/keys)') 226 | 227 | config = vars(parser.parse_args()) 228 | 229 | if config['user_id'] is None: 230 | config['user_id'] = user_id 231 | if config['api_key'] is None: 232 | config['api_key'] = api_key 233 | 234 | return config 235 | 236 | 237 | def main(): 238 | """Parse the Goodreads API and save the reviews to disk.""" 239 | cfg = read_config() 240 | reviews = get_reviews(user_id=cfg['user_id'], api_key=cfg['api_key']) 241 | json_str = json.dumps(list(reviews), indent=2, sort_keys=True) 242 | with open(cfg['output'], 'w', encoding='utf-8') as f: 243 | f.write(json_str) 244 | 245 | 246 | if __name__ == '__main__': 247 | main() 248 | --------------------------------------------------------------------------------