├── .gitignore ├── README.md ├── google_sheets_to_sqlite ├── __init__.py ├── cli.py └── utils.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | auth.json 2 | **.egg-info/ 3 | 4 | **.egg-info/ 5 | .idea/ 6 | build/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Google-Sheets-to-SQLite tool 2 | 3 | Create a SQLite database containing data from a [Google Sheets](https://www.google.com/sheets) document. 4 | 5 | Google Sheets provides a simple way to collect and work on data in a collaborative way. However, publishing data can be hassle. 6 | 7 | This tool can download spreadsheet data and store them in an SQLite database. 8 | 9 | This lets you use SQL to analyze your data, using [Datasette](https://datasette.io/) or the SQLite command-line tool or any other SQLite database browsing software. 10 | 11 | ## Installation 12 | 13 | Install this tool using `pip` in the repository root: 14 | 15 | pip install . 16 | 17 | ## Quickstart 18 | 19 | Get you Google Developer account ready: 20 | 21 | 1. Create a Google Developer Project: https://console.cloud.google.com/projectcreate 22 | 2. OAuth Consent Screen: [click here](https://console.cloud.google.com/apis/credentials/consent) then choose External and add an app, add `https://www.googleapis.com/auth/spreadsheets.readonly` to the scopes and an account you have access to as a Test user. The Test user should have access to the documents you want to import with this tool. 23 | 3. Create OAuth 2.0 Credentials: [click here](https://console.cloud.google.com/apis/credentials). These credentials are needed below. 24 | 4. Activate the Sheets API : [click here](https://console.developers.google.com/apis/api/sheets.googleapis.com/overview) and "Enable" the Google Sheets API without which your requests will be rejected with a code 403. 25 | 26 | Authenticate with Google Sheets with the credentials you created above: 27 | 28 | google-sheets-to-sqlite authenticate --id 29 | 30 | Now create a SQLite database with the data in "Sheet 1" of document with sheet id "sheet_id": 31 | 32 | google-sheets-to-sqlite get database.db table_name sheet_id "Sheet 1" 33 | 34 | You can explore the resulting database using [Datasette](https://datasette.io/): 35 | 36 | $ pip install datasette 37 | $ datasette database.db 38 | INFO: Started server process [24661] 39 | INFO: Uvicorn running on http://127.0.0.1:8001 40 | 41 | 42 | ## TODO 43 | 44 | - Tests 45 | - pypi package, CI? 46 | - Handle primary key from data 47 | - Handle partial arguments (missing table, missing sheet name, ...) 48 | - Handle datatypes? 49 | - Handle multiple sheets -------------------------------------------------------------------------------- /google_sheets_to_sqlite/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theophilec/google-sheets-to-sqlite/24f8f722efdfcea25c1ba9e078f9c9f48171cf46/google_sheets_to_sqlite/__init__.py -------------------------------------------------------------------------------- /google_sheets_to_sqlite/cli.py: -------------------------------------------------------------------------------- 1 | import click 2 | import sqlite_utils 3 | 4 | from .utils import load_tokens, _auth_challenge, _refresh_token_request, _save_auth_token, _get_data 5 | 6 | # default values 7 | DEFAULT_SPREADSHEET_ID = '1Zx7Aieu8XmXLKe5kkce9IUfWpRtiwC7InvVR6HImJHI' 8 | DEFAULT_SHEET_RANGE = 'Sheet1' # TODO : handle default first sheet, params 9 | 10 | 11 | # constants 12 | # SCOPE = 'https://www.googleapis.com/auth/spreadsheets' 13 | 14 | 15 | # Authentication flow : from client id and secret, get refresh_token and keep the three in a json file. 16 | # In documentation and "quickstart" highlight that this code allows you to *create* an app but not to connect to Google 17 | # directly. 18 | 19 | 20 | @click.group() 21 | def cli(): 22 | """Create a SQLite database with data stored in a Google Sheets document.""" 23 | pass 24 | 25 | 26 | @cli.command() 27 | @click.option( 28 | "--id", 29 | type=str, 30 | ) 31 | @click.option( 32 | "--secret", 33 | type=str, 34 | ) 35 | @click.option( 36 | "-a", 37 | "--auth", 38 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=True), 39 | default="auth.json", 40 | help="Path to auth.json token file.", 41 | ) 42 | def authenticate(id, secret, auth): 43 | token_exists = True 44 | try: 45 | token = load_tokens(auth) 46 | except (KeyError, FileNotFoundError): 47 | token_exists = False 48 | if not id or not secret: 49 | raise click.ClickException( 50 | "Credentials not found. Please create credentials file or use --id and --secret options.") 51 | 52 | if not token_exists: 53 | try: 54 | token = _auth_challenge(id, secret) 55 | except: 56 | raise click.ClickException("Error getting token from credentials") 57 | 58 | token["access_token"] = _refresh_token_request(token["client_id"], token["client_secret"], token["refresh_token"]) 59 | 60 | _save_auth_token(auth, token) 61 | click.echo("ok") 62 | 63 | 64 | @cli.command() 65 | @click.argument( 66 | "database", 67 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 68 | ) 69 | @click.argument( 70 | "table", 71 | type=str, 72 | required=False 73 | ) 74 | @click.argument("spreadsheet_id") 75 | @click.argument("sheet_range") 76 | @click.option( 77 | "-a", 78 | "--auth", 79 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=True), 80 | default="auth.json", 81 | help="Path to auth.json token file.", 82 | ) 83 | @click.option( 84 | "-h", 85 | "--header", 86 | default=0, 87 | help="Header row index. If all sheets are imported, header row index is common." 88 | ) 89 | def get(database, table, spreadsheet_id, sheet_range, auth, header): 90 | access_token = None 91 | try: 92 | auth_tokens = load_tokens(auth) 93 | access_token = auth_tokens["access_token"] 94 | except FileNotFoundError: 95 | click.ClickException("Auth token not found. Wrong path or use authenticate first.") 96 | 97 | db = sqlite_utils.Database(database, recreate=True) 98 | contents = _get_data(access_token, spreadsheet_id, sheet_range) 99 | # TODO: understand need to refresh 100 | values = contents["values"] 101 | keys = values[header] # first row is keys 102 | zipped = [dict(zip(keys, row)) for row in values[header + 1:]] 103 | db[table].insert_all(zipped) -------------------------------------------------------------------------------- /google_sheets_to_sqlite/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pathlib 3 | import urllib 4 | 5 | import click 6 | import httpx 7 | 8 | 9 | def start_auth_url(client_id, scope): 10 | AUTH_URI = "https://accounts.google.com/o/oauth2/auth" 11 | url = AUTH_URI 12 | url += "?" + urllib.parse.urlencode( 13 | { 14 | "access_type": "offline", 15 | "client_id": client_id, 16 | "redirect_uri": 'https://localhost:1', 17 | "response_type": "code", 18 | "scope": scope 19 | } 20 | ) 21 | return url 22 | 23 | 24 | def _auth_copied_code_request(copied_code, client_id, client_secret): 25 | response = httpx.post( 26 | "https://www.googleapis.com/oauth2/v4/token", 27 | data={ 28 | "code": copied_code, 29 | "client_id": client_id, 30 | "client_secret": client_secret, 31 | "redirect_uri": "https://localhost:1", 32 | "grant_type": "authorization_code", 33 | }, 34 | ) 35 | response.json() 36 | return response.json() 37 | 38 | 39 | def _save_auth_token(filepath, token): 40 | with open(filepath, "w") as fp: 41 | fp.write(json.dumps(token, indent=4)) 42 | pathlib.Path(filepath).chmod(0o600) 43 | 44 | 45 | def _refresh_token_request(client_id, client_secret, refresh_token): 46 | response = httpx.post( 47 | "https://www.googleapis.com/oauth2/v4/token", 48 | data={ 49 | "client_id": client_id, 50 | "client_secret": client_secret, 51 | "refresh_token": refresh_token, 52 | "grant_type": "refresh_token", 53 | }, 54 | ) 55 | return response.json()["access_token"] 56 | 57 | 58 | def _auth_challenge(client_id, client_secret): 59 | SCOPE = 'https://www.googleapis.com/auth/spreadsheets.readonly' 60 | print(f"Please copy & paste the following URL into a web browser and authorize the request.") 61 | print(f"Then copy & paste the code you obtain in the URL (your browser should give you an error) below:\n") 62 | copied_code = input("Code: ") 63 | response = _auth_copied_code_request(copied_code, client_id, client_secret) 64 | refresh_token = response["refresh_token"] 65 | return {'client_id': client_id, 'client_secret': client_secret, 'refresh_token': refresh_token} 66 | 67 | 68 | def load_tokens(auth): 69 | token_info = json.load(open(auth)) 70 | return { 71 | "access_token": token_info["access_token"], 72 | "refresh_token": token_info["refresh_token"], 73 | "client_id": token_info["client_id"], 74 | "client_secret": token_info["client_secret"] 75 | } 76 | 77 | 78 | def _get_sheets(access_token, spreadsheet_id): 79 | sheets_request_url = f'https://sheets.googleapis.com/v4/spreadsheets/{spreadsheet_id}?fields=sheets.properties.title' 80 | response = httpx.get( 81 | sheets_request_url, 82 | headers={"Authorization": f"Bearer {access_token}"}, 83 | ) 84 | if response.status_code == httpx.codes.OK: 85 | contents = response.json() 86 | return [sheet['properties']['title'] for sheet in contents['sheets']] 87 | else: 88 | print(response) 89 | click.ClickException("Error when getting data...") 90 | 91 | 92 | def _get_data(access_token, spreadsheet_id, sheet_range): 93 | sheets_request_url = f"https://sheets.googleapis.com/v4/spreadsheets/{spreadsheet_id}/values/{sheet_range}" 94 | response = httpx.get( 95 | sheets_request_url, 96 | headers={"Authorization": f"Bearer {access_token}"} 97 | ) 98 | if response.status_code == httpx.codes.OK: 99 | return response.json() 100 | else: 101 | print(response.json().message) 102 | click.ClickException("Error when getting data...") 103 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import os 3 | 4 | VERSION = "0.1" 5 | 6 | 7 | def get_long_description(): 8 | with open( 9 | os.path.join(os.path.dirname(os.path.abspath(__file__)), "README.md"), 10 | encoding="utf8", 11 | ) as fp: 12 | return fp.read() 13 | 14 | 15 | setup( 16 | name="google-sheets-to-sqlite", 17 | description="Create a SQLite database containing data from Google Sheets", 18 | long_description=get_long_description(), 19 | long_description_content_type="text/markdown", 20 | author="Théophile Cantelobre", 21 | url="https://github.com/theophilec/google-sheets-to-sqlite", 22 | project_urls={ 23 | "Issues": "https://github.com/theophilec/google-sheets-to-sqlite/issues", 24 | # "CI": "https://github.com/theophilec/google-sheets-to-sqlite/actions", 25 | # "Changelog": "https://github.com/theophilec/google-sheets-to-sqlite/releases", 26 | }, 27 | license="Apache License, Version 2.0", 28 | version=VERSION, 29 | packages=["google_sheets_to_sqlite"], 30 | entry_points=""" 31 | [console_scripts] 32 | google-sheets-to-sqlite=google_sheets_to_sqlite.cli:cli 33 | """, 34 | install_requires=["click", "httpx", "sqlite-utils"], 35 | extras_require={"test": ["pytest", "pytest-httpx", "pytest-mock", "cogapp"]}, 36 | python_requires=">=3.6", 37 | ) 38 | --------------------------------------------------------------------------------